atlas-schema 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
atlas_schema/_version.py CHANGED
@@ -1,7 +1,14 @@
1
1
  # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
3
 
4
- __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
5
12
 
6
13
  TYPE_CHECKING = False
7
14
  if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
9
16
  from typing import Union
10
17
 
11
18
  VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
12
20
  else:
13
21
  VERSION_TUPLE = object
22
+ COMMIT_ID = object
14
23
 
15
24
  version: str
16
25
  __version__: str
17
26
  __version_tuple__: VERSION_TUPLE
18
27
  version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
19
30
 
20
- __version__ = version = '0.3.0'
21
- __version_tuple__ = version_tuple = (0, 3, 0)
31
+ __version__ = version = '0.4.0'
32
+ __version_tuple__ = version_tuple = (0, 4, 0)
33
+
34
+ __commit_id__ = commit_id = None
atlas_schema/enums.py CHANGED
@@ -18,7 +18,7 @@ class MultipleEnumAccessMeta(EnumType):
18
18
  Enum Metaclass to provide a way to access multiple values all at once.
19
19
  """
20
20
 
21
- def __getitem__(self: type[_E], key: str | tuple[str]) -> _E | list[_E]: # type:ignore[misc,override]
21
+ def __getitem__(cls: type[_E], key: str | tuple[str]) -> _E | list[_E]: # type:ignore[misc,override]
22
22
  getitem = cast(Callable[[str], _E], super().__getitem__) # type:ignore[misc]
23
23
  if isinstance(key, tuple):
24
24
  return [getitem(name) for name in key]
atlas_schema/methods.py CHANGED
@@ -18,22 +18,127 @@ behavior.update(base.behavior)
18
18
  behavior.update(candidate.behavior)
19
19
 
20
20
 
21
+ def _set_repr_name(classname):
22
+ def namefcn(_self):
23
+ return classname
24
+
25
+ behavior[("__typestr__", classname)] = classname[0].lower() + classname[1:]
26
+ behavior[classname].__repr__ = namefcn
27
+
28
+
21
29
  class NtupleEvents(behavior["NanoEvents"]): # type: ignore[misc, valid-type, name-defined]
30
+ """Individual systematic variation of events."""
31
+
22
32
  def __repr__(self):
23
- return f"<event {getattr(self, 'runNumber', '??')}:\
24
- {getattr(self, 'eventNumber', '??')}:\
25
- {getattr(self, 'mcChannelNumber', '??')}>"
33
+ return f"<event {getattr(self, 'runNumber', '??')}:{getattr(self, 'eventNumber', '??')}:{getattr(self, 'mcChannelNumber', '??')}>"
26
34
 
35
+ def __getitem__(self, key):
36
+ """Support accessing systematic variations via bracket notation.
27
37
 
28
- behavior["NanoEvents"] = NtupleEvents
38
+ Args:
39
+ key: The systematic variation name. "NOSYS" returns the nominal events.
29
40
 
41
+ Returns:
42
+ The requested systematic variation or nominal events for "NOSYS".
43
+ """
44
+ if key == "NOSYS":
45
+ return self
46
+ return super().__getitem__(key)
30
47
 
31
- def _set_repr_name(classname):
32
- def namefcn(_self):
33
- return classname
48
+ @property
49
+ def systematic(self):
50
+ """Get the systematic variation name for this event collection."""
51
+ return "nominal"
34
52
 
35
- behavior[("__typestr__", classname)] = classname[0].lower() + classname[1:]
36
- behavior[classname].__repr__ = namefcn
53
+ @property
54
+ def systematic_names(self):
55
+ """Get all systematic variations available in this event collection.
56
+
57
+ Returns a list of systematic variation names, including 'NOSYS' for nominal.
58
+ """
59
+ # Get systematics from metadata stored during schema building
60
+ systematics = self.metadata.get("systematics", [])
61
+ return ["NOSYS", *systematics]
62
+
63
+ @property
64
+ def systematics(self):
65
+ """Get all systematic variations available in this event collection.
66
+
67
+ Returns a list of systematic variation names, excluding 'nominal'.
68
+ """
69
+ # Get systematics from metadata stored during schema building
70
+ return [
71
+ getattr(self, systematic)
72
+ for systematic in self.systematic_names
73
+ if systematic != "NOSYS"
74
+ ]
75
+
76
+
77
+ behavior["NtupleEvents"] = NtupleEvents
78
+
79
+
80
+ class NtupleEventsArray(behavior[("*", "NanoEvents")]): # type: ignore[misc, valid-type, name-defined]
81
+ """Collection of NtupleEvents objects, one for each systematic variation."""
82
+
83
+ def __getitem__(self, key):
84
+ """Support accessing systematic variations via bracket notation.
85
+
86
+ Args:
87
+ key: The systematic variation name. "NOSYS" returns the nominal events.
88
+
89
+ Returns:
90
+ The requested systematic variation or nominal events for "NOSYS".
91
+ """
92
+ if key == "NOSYS":
93
+ return self
94
+ return super().__getitem__(key)
95
+
96
+ @property
97
+ def systematic_names(self):
98
+ """Get all systematic variations available in this event collection.
99
+
100
+ Returns a list of systematic variation names, including 'NOSYS' for nominal.
101
+ """
102
+ # Get systematics from metadata stored during schema building
103
+ systematics = self.metadata.get("systematics", [])
104
+ return ["NOSYS", *systematics]
105
+
106
+ @property
107
+ def systematics(self):
108
+ """Get all systematic variations available in this event collection.
109
+
110
+ Returns a list of systematic variation names, excluding 'nominal'.
111
+ """
112
+ # Get systematics from metadata stored during schema building
113
+ return [
114
+ getattr(self, systematic)
115
+ for systematic in self.systematic_names
116
+ if systematic != "NOSYS"
117
+ ]
118
+
119
+
120
+ behavior[("*", "NtupleEvents")] = NtupleEventsArray
121
+
122
+
123
+ @awkward.mixin_class(behavior)
124
+ class Systematic(base.NanoCollection, base.Systematic):
125
+ """Base class for systematic variations."""
126
+
127
+ @property
128
+ def metadata(self):
129
+ """Arbitrary metadata"""
130
+ return self.layout.purelist_parameter("metadata") # pylint: disable=no-member
131
+
132
+ @property
133
+ def systematic(self):
134
+ """Get the systematic variation name for this event collection."""
135
+ return self.metadata["systematic"]
136
+
137
+ def __repr__(self):
138
+ return f"<event {self.systematic}>"
139
+
140
+
141
+ _set_repr_name("Systematic")
37
142
 
38
143
 
39
144
  @awkward.mixin_class(behavior)
@@ -50,7 +155,7 @@ class Pass(base.NanoCollection, base.Systematic): ...
50
155
  _set_repr_name("Pass")
51
156
 
52
157
  behavior.update(
53
- awkward._util.copy_behaviors("PtEtaPhiMLorentzVector", "Particle", behavior)
158
+ awkward._util.copy_behaviors("PtEtaPhiMLorentzVector", "Particle", behavior) # pylint: disable=protected-access
54
159
  )
55
160
 
56
161
 
@@ -88,17 +193,19 @@ class Particle(vector.PtEtaPhiMLorentzVector):
88
193
 
89
194
  _set_repr_name("Particle")
90
195
 
91
- ParticleArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821
92
- ParticleArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821
93
- ParticleArray.ProjectionClass4D = ParticleArray # noqa: F821
94
- ParticleArray.MomentumClass = vector.LorentzVectorArray # noqa: F821
196
+ ParticleArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
197
+ ParticleArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
198
+ ParticleArray.ProjectionClass4D = ParticleArray # noqa: F821 # pylint: disable=undefined-variable
199
+ ParticleArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
95
200
 
96
201
 
97
- behavior.update(awkward._util.copy_behaviors("PolarTwoVector", "MissingET", behavior))
202
+ behavior.update(awkward._util.copy_behaviors("PolarTwoVector", "MissingET", behavior)) # pylint: disable=protected-access
98
203
 
99
204
 
100
205
  @awkward.mixin_class(behavior)
101
206
  class MissingET(vector.PolarTwoVector, base.NanoCollection, base.Systematic):
207
+ """Missing transverse energy collection."""
208
+
102
209
  @property
103
210
  def r(self):
104
211
  """Distance from origin in XY plane"""
@@ -107,16 +214,18 @@ class MissingET(vector.PolarTwoVector, base.NanoCollection, base.Systematic):
107
214
 
108
215
  _set_repr_name("MissingET")
109
216
 
110
- MissingETArray.ProjectionClass2D = MissingETArray # noqa: F821
111
- MissingETArray.ProjectionClass3D = vector.SphericalThreeVectorArray # noqa: F821
112
- MissingETArray.ProjectionClass4D = vector.LorentzVectorArray # noqa: F821
113
- MissingETArray.MomentumClass = MissingETArray # noqa: F821
217
+ MissingETArray.ProjectionClass2D = MissingETArray # noqa: F821 # pylint: disable=undefined-variable
218
+ MissingETArray.ProjectionClass3D = vector.SphericalThreeVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
219
+ MissingETArray.ProjectionClass4D = vector.LorentzVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
220
+ MissingETArray.MomentumClass = MissingETArray # noqa: F821 # pylint: disable=undefined-variable
114
221
 
115
- behavior.update(awkward._util.copy_behaviors("Particle", "Photon", behavior))
222
+ behavior.update(awkward._util.copy_behaviors("Particle", "Photon", behavior)) # pylint: disable=protected-access
116
223
 
117
224
 
118
225
  @awkward.mixin_class(behavior)
119
226
  class Photon(Particle, base.NanoCollection, base.Systematic):
227
+ """Photon particle collection."""
228
+
120
229
  @property
121
230
  def mass(self):
122
231
  """Return zero mass for photon."""
@@ -129,82 +238,90 @@ class Photon(Particle, base.NanoCollection, base.Systematic):
129
238
 
130
239
  @property
131
240
  def isEM(self):
132
- return self.isEM_syst.NOSYS == 0
241
+ return self.isEM_syst.NOSYS == 0 # pylint: disable=no-member
133
242
 
134
243
  def pass_isEM(self, words: list[PhotonID]):
135
244
  # 0 is pass, 1 is fail
136
245
  return (
137
- self.isEM_syst.NOSYS & reduce(ior, (1 << word.value for word in words))
246
+ self.isEM_syst.NOSYS & reduce(ior, (1 << word.value for word in words)) # pylint: disable=no-member
138
247
  ) == 0
139
248
 
140
249
 
141
250
  _set_repr_name("Photon")
142
251
 
143
- PhotonArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821
144
- PhotonArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821
145
- PhotonArray.ProjectionClass4D = PhotonArray # noqa: F821
146
- PhotonArray.MomentumClass = vector.LorentzVectorArray # noqa: F821
252
+ PhotonArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
253
+ PhotonArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
254
+ PhotonArray.ProjectionClass4D = PhotonArray # noqa: F821 # pylint: disable=undefined-variable
255
+ PhotonArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
147
256
 
148
- behavior.update(awkward._util.copy_behaviors("Particle", "Electron", behavior))
257
+ behavior.update(awkward._util.copy_behaviors("Particle", "Electron", behavior)) # pylint: disable=protected-access
149
258
 
150
259
 
151
260
  @awkward.mixin_class(behavior)
152
261
  class Electron(Particle, base.NanoCollection, base.Systematic):
262
+ """Electron particle collection."""
263
+
153
264
  @property
154
265
  def mass(self):
155
266
  """Electron mass in MeV"""
156
- return awkward.ones_like(self.pt) * particle.literals.e_minus.mass
267
+ return awkward.ones_like(self.pt) * particle.literals.e_minus.mass # pylint: disable=no-member
157
268
 
158
269
 
159
270
  _set_repr_name("Electron")
160
271
 
161
- ElectronArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821
162
- ElectronArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821
163
- ElectronArray.ProjectionClass4D = ElectronArray # noqa: F821
164
- ElectronArray.MomentumClass = vector.LorentzVectorArray # noqa: F821
272
+ ElectronArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
273
+ ElectronArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
274
+ ElectronArray.ProjectionClass4D = ElectronArray # noqa: F821 # pylint: disable=undefined-variable
275
+ ElectronArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
165
276
 
166
- behavior.update(awkward._util.copy_behaviors("Particle", "Muon", behavior))
277
+ behavior.update(awkward._util.copy_behaviors("Particle", "Muon", behavior)) # pylint: disable=protected-access
167
278
 
168
279
 
169
280
  @awkward.mixin_class(behavior)
170
281
  class Muon(Particle, base.NanoCollection, base.Systematic):
282
+ """Muon particle collection."""
283
+
171
284
  @property
172
285
  def mass(self):
173
286
  """Muon mass in MeV"""
174
- return awkward.ones_like(self.pt) * particle.literals.mu_minus.mass
287
+ return awkward.ones_like(self.pt) * particle.literals.mu_minus.mass # pylint: disable=no-member
175
288
 
176
289
 
177
290
  _set_repr_name("Muon")
178
291
 
179
- MuonArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821
180
- MuonArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821
181
- MuonArray.ProjectionClass4D = MuonArray # noqa: F821
182
- MuonArray.MomentumClass = vector.LorentzVectorArray # noqa: F821
292
+ MuonArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
293
+ MuonArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
294
+ MuonArray.ProjectionClass4D = MuonArray # noqa: F821 # pylint: disable=undefined-variable
295
+ MuonArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
183
296
 
184
- behavior.update(awkward._util.copy_behaviors("Particle", "Tau", behavior))
297
+ behavior.update(awkward._util.copy_behaviors("Particle", "Tau", behavior)) # pylint: disable=protected-access
185
298
 
186
299
 
187
300
  @awkward.mixin_class(behavior)
188
301
  class Tau(Particle, base.NanoCollection, base.Systematic):
302
+ """Tau particle collection."""
303
+
189
304
  @property
190
305
  def mass(self):
191
306
  """Tau mass in MeV"""
192
- return awkward.ones_like(self.pt) * particle.literals.tau_minus.mass
307
+ return awkward.ones_like(self.pt) * particle.literals.tau_minus.mass # pylint: disable=no-member
193
308
 
194
309
 
195
310
  _set_repr_name("Tau")
196
311
 
197
- TauArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821
198
- TauArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821
199
- TauArray.ProjectionClass4D = TauArray # noqa: F821
200
- TauArray.MomentumClass = vector.LorentzVectorArray # noqa: F821
312
+ TauArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
313
+ TauArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
314
+ TauArray.ProjectionClass4D = TauArray # noqa: F821 # pylint: disable=undefined-variable
315
+ TauArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
201
316
 
202
317
 
203
- behavior.update(awkward._util.copy_behaviors("Particle", "Jet", behavior))
318
+ behavior.update(awkward._util.copy_behaviors("Particle", "Jet", behavior)) # pylint: disable=protected-access
204
319
 
205
320
 
206
321
  @awkward.mixin_class(behavior)
207
322
  class Jet(Particle, base.NanoCollection, base.Systematic):
323
+ """Jet particle collection."""
324
+
208
325
  @property
209
326
  def mass(self):
210
327
  r"""Invariant mass (+, -, -, -)
@@ -216,31 +333,31 @@ class Jet(Particle, base.NanoCollection, base.Systematic):
216
333
 
217
334
  _set_repr_name("Jet")
218
335
 
219
- JetArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821
220
- JetArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821
221
- JetArray.ProjectionClass4D = JetArray # noqa: F821
222
- JetArray.MomentumClass = vector.LorentzVectorArray # noqa: F821
336
+ JetArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
337
+ JetArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
338
+ JetArray.ProjectionClass4D = JetArray # noqa: F821 # pylint: disable=undefined-variable
339
+ JetArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 # pylint: disable=undefined-variable,no-member
223
340
 
224
341
  __all__ = [
225
342
  "Electron",
226
- "ElectronArray", # noqa: F822
227
- "ElectronRecord", # noqa: F822
343
+ "ElectronArray", # noqa: F822 # pylint: disable=undefined-all-variable
344
+ "ElectronRecord", # noqa: F822 # pylint: disable=undefined-all-variable
228
345
  "Jet",
229
- "JetArray", # noqa: F822
230
- "JetRecord", # noqa: F822
346
+ "JetArray", # noqa: F822 # pylint: disable=undefined-all-variable
347
+ "JetRecord", # noqa: F822 # pylint: disable=undefined-all-variable
231
348
  "MissingET",
232
- "MissingETArray", # noqa: F822
233
- "MissingETRecord", # noqa: F822
349
+ "MissingETArray", # noqa: F822 # pylint: disable=undefined-all-variable
350
+ "MissingETRecord", # noqa: F822 # pylint: disable=undefined-all-variable
234
351
  "Muon",
235
- "MuonArray", # noqa: F822
236
- "MuonRecord", # noqa: F822
352
+ "MuonArray", # noqa: F822 # pylint: disable=undefined-all-variable
353
+ "MuonRecord", # noqa: F822 # pylint: disable=undefined-all-variable
237
354
  "NtupleEvents",
238
355
  "Particle",
239
- "ParticleArray", # noqa: F822
240
- "ParticleRecord", # noqa: F822
356
+ "ParticleArray", # noqa: F822 # pylint: disable=undefined-all-variable
357
+ "ParticleRecord", # noqa: F822 # pylint: disable=undefined-all-variable
241
358
  "Pass",
242
359
  "Photon",
243
- "PhotonArray", # noqa: F822
244
- "PhotonRecord", # noqa: F822
360
+ "PhotonArray", # noqa: F822 # pylint: disable=undefined-all-variable
361
+ "PhotonRecord", # noqa: F822 # pylint: disable=undefined-all-variable
245
362
  "Weight",
246
363
  ]
atlas_schema/schema.py CHANGED
@@ -171,10 +171,12 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
171
171
  pass
172
172
  else:
173
173
  pass
174
- self._form["fields"], self._form["contents"] = self._build_collections(
175
- self._form["fields"], self._form["contents"]
174
+ self._form["fields"], self._form["contents"], discovered_systematics = (
175
+ self._build_collections(self._form["fields"], self._form["contents"])
176
176
  )
177
177
  self._form["parameters"]["metadata"]["version"] = self._version
178
+ self._form["parameters"]["metadata"]["systematics"] = discovered_systematics
179
+ self._form["parameters"]["__record__"] = "NtupleEvents"
178
180
 
179
181
  @classmethod
180
182
  def v1(cls, base_form: dict[str, Any]) -> Self:
@@ -187,7 +189,7 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
187
189
 
188
190
  def _build_collections(
189
191
  self, field_names: list[str], input_contents: list[Any]
190
- ) -> tuple[KeysView[str], ValuesView[dict[str, Any]]]:
192
+ ) -> tuple[KeysView[str], ValuesView[dict[str, Any]], list[str]]:
191
193
  branch_forms = dict(zip(field_names, input_contents))
192
194
 
193
195
  # parse into high-level records (collections, list collections, and singletons)
@@ -240,6 +242,19 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
240
242
  msg = "One of the branches does not follow the assumed pattern for this schema. [invalid-branch-name]"
241
243
  raise RuntimeError(msg) from exc
242
244
 
245
+ all_systematics = self._discover_systematics(
246
+ branch_forms, collections, subcollections
247
+ )
248
+
249
+ # Pre-compute systematic branch patterns for O(1) lookups
250
+ # This replaces the expensive O(m*s) nested condition checks
251
+ systematic_branch_patterns = set()
252
+ for collection in collections:
253
+ for subcoll in subcollections:
254
+ for sys in all_systematics:
255
+ if sys != "NOSYS":
256
+ systematic_branch_patterns.add(f"{collection}_{subcoll}_{sys}")
257
+
243
258
  # Check the presence of the event_ids
244
259
  missing_event_ids = [
245
260
  event_id for event_id in self.event_ids if event_id not in branch_forms
@@ -285,91 +300,206 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
285
300
 
286
301
  output[name] = branch_forms[name]
287
302
 
288
- # next, go through and start grouping up collections
289
- for name in collections:
290
- content = {}
303
+ # First, build nominal collections the traditional way
304
+ nominal_collections = {}
305
+ for collection_name in collections:
306
+ collection_content = {}
291
307
  used = set()
292
308
 
309
+ # Process subcollections with NOSYS variations
293
310
  for subname in subcollections:
294
- prefix = f"{name}_{subname}_"
295
- used.update({k for k in branch_forms if k.startswith(prefix)})
296
- subcontent = {
297
- k[len(prefix) :]: branch_forms[k]
298
- for k in branch_forms
299
- if k.startswith(prefix)
300
- }
301
- if subcontent:
302
- # create the nominal version
303
- content[subname] = branch_forms[f"{prefix}NOSYS"]
304
- # create a collection of the systematic variations for the given variable
305
- content[f"{subname}_syst"] = zip_forms(
306
- subcontent, f"{name}_syst", record_name="NanoCollection"
311
+ prefix = f"{collection_name}_{subname}_"
312
+ nosys_branch = f"{prefix}NOSYS"
313
+
314
+ if nosys_branch in branch_forms:
315
+ collection_content[subname] = branch_forms[nosys_branch]
316
+ used.add(nosys_branch)
317
+
318
+ # Add non-systematic branches (like eta, phi that don't vary)
319
+ for k, form in branch_forms.items():
320
+ if (
321
+ k.startswith(collection_name + "_")
322
+ and k not in used
323
+ and "_NOSYS" not in k
324
+ and k
325
+ not in systematic_branch_patterns # O(1) lookup instead of O(m*s)
326
+ ):
327
+ field_name = k[len(collection_name) + 1 :]
328
+ if field_name not in collection_content:
329
+ collection_content[field_name] = form
330
+
331
+ if collection_content:
332
+ behavior = self.mixins.get(collection_name, "")
333
+ if not behavior:
334
+ behavior = self.suggested_behavior(collection_name)
335
+ warnings.warn(
336
+ f"I found a collection with no defined mixin: '{collection_name}'. I will assume behavior: '{behavior}'. To suppress this warning next time, please define mixins for your custom collections. [mixin-undefined]",
337
+ RuntimeWarning,
338
+ stacklevel=2,
307
339
  )
340
+ nominal_collections[collection_name] = zip_forms(
341
+ collection_content, collection_name, record_name=behavior
342
+ )
343
+ nominal_collections[collection_name].setdefault("parameters", {})
344
+ nominal_collections[collection_name]["parameters"].update(
345
+ {"collection_name": collection_name}
346
+ )
308
347
 
309
- content.update(
310
- {
311
- k[len(name) + 1 :]: branch_forms[k]
312
- for k in branch_forms
313
- if k.startswith(name + "_") and k not in used
348
+ # Add nominal collections to output
349
+ output.update(nominal_collections)
350
+
351
+ # Now build systematic event structures
352
+ for systematic in all_systematics:
353
+ if systematic == "NOSYS":
354
+ continue
355
+
356
+ # Check which collections actually have this systematic variation
357
+ systematic_collections = {}
358
+
359
+ for collection_name in collections:
360
+ # Check if this collection has any systematic branches for this systematic
361
+ has_systematic_data = False
362
+ collection_content = {}
363
+ used = set()
364
+
365
+ # Process subcollections with systematic variations
366
+ for subname in subcollections:
367
+ prefix = f"{collection_name}_{subname}_"
368
+ target_branch = f"{prefix}{systematic}"
369
+ fallback_branch = f"{prefix}NOSYS"
370
+
371
+ if target_branch in branch_forms:
372
+ # Use the systematic variation
373
+ collection_content[subname] = branch_forms[target_branch]
374
+ used.add(target_branch)
375
+ has_systematic_data = True
376
+ elif fallback_branch in branch_forms:
377
+ # Fall back to nominal
378
+ collection_content[subname] = branch_forms[fallback_branch]
379
+ used.add(fallback_branch)
380
+
381
+ # Add non-systematic branches
382
+ for k, form in branch_forms.items():
383
+ if (
384
+ k.startswith(collection_name + "_")
385
+ and k not in used
386
+ and "_NOSYS" not in k
387
+ and k
388
+ not in systematic_branch_patterns # O(1) lookup instead of O(m*s)
389
+ ):
390
+ field_name = k[len(collection_name) + 1 :]
391
+ if field_name not in collection_content:
392
+ collection_content[field_name] = form
393
+
394
+ # If this collection has systematic data or fallback data, include it
395
+ if collection_content:
396
+ behavior = self.mixins.get(collection_name, "")
397
+ if not behavior:
398
+ behavior = self.suggested_behavior(collection_name)
399
+ # Only warn once (for nominal collections)
400
+
401
+ # If no systematic data, use the nominal collection directly
402
+ if (
403
+ not has_systematic_data
404
+ and collection_name in nominal_collections
405
+ ):
406
+ systematic_collections[collection_name] = nominal_collections[
407
+ collection_name
408
+ ]
409
+ else:
410
+ # Build the systematic collection
411
+ systematic_collections[collection_name] = zip_forms(
412
+ collection_content, collection_name, record_name=behavior
413
+ )
414
+ systematic_collections[collection_name].setdefault(
415
+ "parameters", {}
416
+ )
417
+ systematic_collections[collection_name]["parameters"].update(
418
+ {"collection_name": collection_name}
419
+ )
420
+
421
+ # Only create systematic event if there are collections for it
422
+ if systematic_collections:
423
+ output[systematic] = {
424
+ "class": "RecordArray",
425
+ "contents": list(systematic_collections.values()),
426
+ "fields": list(systematic_collections.keys()),
427
+ "form_key": f"%21invalid%2C{systematic}",
428
+ "parameters": {
429
+ "__record__": "Systematic",
430
+ "metadata": {"systematic": systematic},
431
+ },
314
432
  }
315
- )
316
433
 
317
- if not used and not content:
434
+ # Handle any remaining unrecognized branches as singletons
435
+ processed_branches = set()
436
+ # Add event IDs and explicit singletons
437
+ processed_branches.update(self.event_ids)
438
+ processed_branches.update(self.singletons)
439
+ # Add collection-related branches
440
+ for collection_name in collections:
441
+ for branch_name in branch_forms:
442
+ if branch_name.startswith(collection_name + "_"):
443
+ processed_branches.add(branch_name)
444
+
445
+ # Find unrecognized branches
446
+ for branch_name, form in branch_forms.items():
447
+ if branch_name not in processed_branches:
448
+ # This is an unrecognized branch - treat as singleton with warning
318
449
  warnings.warn(
319
- f"I identified a branch that likely does not have any leaves: '{name}'. I will treat this as a 'singleton'. To suppress this warning next time, please define your singletons explicitly. [singleton-undefined]",
450
+ f"I identified a branch that likely does not have any leaves: '{branch_name}'. I will treat this as a 'singleton'. To suppress this warning, add this branch to the singletons set. [singleton-undefined]",
320
451
  RuntimeWarning,
321
452
  stacklevel=2,
322
453
  )
323
- self.singletons.add(name)
324
- output[name] = branch_forms[name]
454
+ output[branch_name] = form
325
455
 
326
- else:
327
- behavior = self.mixins.get(name, "")
328
- if not behavior:
329
- behavior = self.suggested_behavior(name)
330
- warnings.warn(
331
- f"I found a collection with no defined mixin: '{name}'. I will assume behavior: '{behavior}'. To suppress this warning next time, please define mixins for your custom collections. [mixin-undefined]",
332
- RuntimeWarning,
333
- stacklevel=2,
334
- )
456
+ # Return discovered systematics (excluding NOSYS/nominal)
457
+ discovered_systematics = sorted([s for s in all_systematics if s != "NOSYS"])
335
458
 
336
- output[name] = zip_forms(content, name, record_name=behavior)
337
-
338
- output[name].setdefault("parameters", {})
339
- output[name]["parameters"].update({"collection_name": name})
340
-
341
- if output[name]["class"] == "ListOffsetArray":
342
- if output[name]["class"] == "RecordArray":
343
- parameters = output[name]["content"]["fields"]
344
- contents = output[name]["content"]["contents"]
345
- else:
346
- # these are also singletons of another kind that we just pass through
347
- continue
348
- elif output[name]["class"] == "RecordArray":
349
- parameters = output[name]["fields"]
350
- contents = output[name]["contents"]
351
- elif output[name]["class"] == "NumpyArray":
352
- # these are singletons that we just pass through
353
- continue
354
- else:
355
- msg = f"Unhandled class {output[name]['class']}"
356
- raise RuntimeError(msg)
459
+ return output.keys(), output.values(), discovered_systematics
357
460
 
358
- # update docstrings as needed
359
- # NB: must be before flattening for easier logic
360
- for index, parameter in enumerate(parameters):
361
- if "parameters" not in contents[index]:
362
- continue
363
-
364
- parsed_name = parameter.replace("_NOSYS", "")
365
- contents[index]["parameters"]["__doc__"] = self.docstrings.get(
366
- parsed_name,
367
- contents[index]["parameters"].get(
368
- "__doc__", "no docstring available"
369
- ),
370
- )
461
+ def _discover_systematics(
462
+ self,
463
+ branch_forms: dict[str, Any],
464
+ collections: set[str],
465
+ subcollections: set[str],
466
+ ) -> set[str]:
467
+ """Extract systematic variations from branch names.
468
+
469
+ Returns:
470
+ set: Set of all systematic variation names found in branches
471
+ """
472
+ # Optimize systematic discovery: pre-index branches by pattern
473
+ # This avoids O(n*m) nested loops in systematic discovery
474
+ subcoll_patterns = {f"{subcoll}_" for subcoll in subcollections}
475
+
476
+ all_systematics = set()
477
+ for k in branch_forms:
478
+ if not ("_" in k and k not in self.singletons):
479
+ continue
480
+ # Handle the pattern: collection_subcollection_systematic
481
+ # where systematic can contain double underscores like "JET_EnergyResolution__1up"
482
+ parts = k.split("_")
483
+ if len(parts) < 3:
484
+ continue
485
+ # Find the collection and subcollection parts
486
+ collection = parts[0]
487
+ if collection not in collections:
488
+ continue
489
+ # Find where the subcollection ends by looking for a known pattern
490
+ # The systematic starts after the subcollection
491
+ remaining = "_".join(parts[1:])
492
+ # Use optimized lookup instead of iterating all subcollections
493
+ for pattern in subcoll_patterns:
494
+ if remaining.startswith(pattern):
495
+ systematic = remaining[len(pattern) :]
496
+ if systematic and systematic != "NOSYS":
497
+ all_systematics.add(systematic)
498
+ break
371
499
 
372
- return output.keys(), output.values()
500
+ # Always include NOSYS as the nominal case
501
+ all_systematics.add("NOSYS")
502
+ return all_systematics
373
503
 
374
504
  @classmethod
375
505
  def behavior(cls) -> Behavior:
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atlas-schema
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Helper python package for ATLAS Common NTuple Analysis work.
5
5
  Project-URL: Homepage, https://github.com/scipp-atlas/atlas-schema
6
6
  Project-URL: Bug Tracker, https://github.com/scipp-atlas/atlas-schema/issues
7
7
  Project-URL: Discussions, https://github.com/scipp-atlas/atlas-schema/discussions
8
- Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.3.0/
8
+ Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.4.0/
9
9
  Project-URL: Releases, https://github.com/scipp-atlas/atlas-schema/releases
10
10
  Project-URL: Release Notes, https://atlas-schema.readthedocs.io/en/latest/history.html
11
11
  Author-email: Giordon Stark <kratsg@gmail.com>
@@ -251,7 +251,7 @@ Requires-Dist: tbump>=6.7.0; extra == 'test'
251
251
  Requires-Dist: twine; extra == 'test'
252
252
  Description-Content-Type: text/markdown
253
253
 
254
- # atlas-schema v0.3.0
254
+ # atlas-schema v0.4.0
255
255
 
256
256
  [![Actions Status][actions-badge]][actions-link]
257
257
  [![Documentation Status][rtd-badge]][rtd-link]
@@ -399,6 +399,57 @@ which produces
399
399
 
400
400
  <img src="https://raw.githubusercontent.com/scipp-atlas/atlas-schema/main/docs/_static/img/ph_pt.png" alt="three stacked histograms of photon pT, with each stack corresponding to: no selection, requiring the isEM flag, and inverting the isEM requirement" width="500" style="display: block; margin-left: auto; margin-right: auto;">
401
401
 
402
+ ## Processing with Systematic Variations
403
+
404
+ For analyses requiring systematic uncertainty evaluation, you can easily iterate
405
+ over all systematic variations using the new `events["NOSYS"]` alias and
406
+ `systematic_names` property:
407
+
408
+ ```python
409
+ import awkward as ak
410
+ from hist import Hist
411
+ from coffea import processor
412
+ from atlas_schema.schema import NtupleSchema
413
+
414
+
415
+ class SystematicsProcessor(processor.ProcessorABC):
416
+ def __init__(self):
417
+ self.h = (
418
+ Hist.new.StrCat([], name="variation", growth=True)
419
+ .Regular(50, 0.0, 500.0, name="jet_pt", label="Leading Jet $p_T$ [GeV]")
420
+ .Int64()
421
+ )
422
+
423
+ def process(self, events):
424
+ dsid = events.metadata["dataset"]
425
+
426
+ # Process all systematic variations including nominal ("NOSYS")
427
+ for variation in events.systematic_names:
428
+ event_view = events[variation]
429
+
430
+ # Fill histogram with leading jet pT for this systematic variation
431
+ leading_jet_pt = event_view.jet.pt[:, 0] / 1_000 # Convert MeV to GeV
432
+ weights = (
433
+ event_view.weight.mc
434
+ if hasattr(event_view, "weight")
435
+ else ak.ones_like(leading_jet_pt)
436
+ )
437
+
438
+ self.h.fill(variation=variation, jet_pt=leading_jet_pt, weight=weights)
439
+
440
+ return {
441
+ "hist": self.h,
442
+ "meta": {"sumw": {dsid: {(events.metadata["fileuuid"], ak.sum(weights))}}},
443
+ }
444
+
445
+ def postprocess(self, accumulator):
446
+ return accumulator
447
+ ```
448
+
449
+ This approach allows you to seamlessly process both nominal and systematic
450
+ variations in a single loop, eliminating the need for special-case handling of
451
+ the nominal variation.
452
+
402
453
  <!-- SPHINX-END -->
403
454
 
404
455
  ## Developer Notes
@@ -0,0 +1,13 @@
1
+ atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
2
+ atlas_schema/_version.py,sha256=2_0GUP7yBCXRus-qiJKxQD62z172WSs1sQ6DVpPsbmM,704
3
+ atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
4
+ atlas_schema/enums.py,sha256=GDDKSBZY-L8X5W41Kwi0G5Yd4Vu4Kiga-ttSbztEXEM,3687
5
+ atlas_schema/methods.py,sha256=rQRQgD26ndCzwpxAuAeEbXIHd8v64cK2rP5A5GxvBn8,12934
6
+ atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ atlas_schema/schema.py,sha256=TbNxekA0DRwS5Mye0frHo7p1K7LW7FBXrkEwmPUQ8MA,27884
8
+ atlas_schema/typing_compat.py,sha256=3G8h4WfLoDmrtWZvtYKLCwEpCQ_O4Fwygb2WlDRSE4E,488
9
+ atlas_schema/utils.py,sha256=E3jCka-pf_0h_r3OO0hMLlbF6dQKoxr2T1Gd18-aJ4U,2034
10
+ atlas_schema-0.4.0.dist-info/METADATA,sha256=9d1QN2OaZ0i68ZDAR3GBKUNw9Lm4l1nAJycCb9jSmqg,21755
11
+ atlas_schema-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ atlas_schema-0.4.0.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
13
+ atlas_schema-0.4.0.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- atlas_schema/__init__.py,sha256=ebY-rTiwSGnfvt1yWATze2GE7K3fVgJj6fT64Sl4sH8,469
2
- atlas_schema/_version.py,sha256=AGmG_Lx0-9ztFw_7d9mYbaYuC-2abxE1oXOUNAY29YY,511
3
- atlas_schema/_version.pyi,sha256=j5kbzfm6lOn8BzASXWjGIA1yT0OlHTWqlbyZ8Si_o0E,118
4
- atlas_schema/enums.py,sha256=hwgOvFBmITNxL0MQkrNpbiPv9VMezFoE-eyGgjzem8E,3688
5
- atlas_schema/methods.py,sha256=DPeEFofeD5_bCk7V3KudJaE_sAUMpBIh-gPnM4kWDe8,7124
6
- atlas_schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- atlas_schema/schema.py,sha256=44i7ri-7OP4SK6_D_3JAGvoiIY-IcPBp1p6MlKfWK5I,21675
8
- atlas_schema/typing_compat.py,sha256=3G8h4WfLoDmrtWZvtYKLCwEpCQ_O4Fwygb2WlDRSE4E,488
9
- atlas_schema/utils.py,sha256=E3jCka-pf_0h_r3OO0hMLlbF6dQKoxr2T1Gd18-aJ4U,2034
10
- atlas_schema-0.3.0.dist-info/METADATA,sha256=NCkA4ydLhlTHYJOTipTgZlR7_yhDsDe7zpa0jbnqw00,20069
11
- atlas_schema-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- atlas_schema-0.3.0.dist-info/licenses/LICENSE,sha256=snem82NV8fgAi4DKaaUIfReaM5RqIWbH5OOXOvy40_w,11344
13
- atlas_schema-0.3.0.dist-info/RECORD,,