atlas-schema 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/PKG-INFO +128 -3
- atlas_schema-0.2.3/README.md +160 -0
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/pyproject.toml +20 -3
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/src/atlas_schema/__init__.py +2 -1
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/src/atlas_schema/_version.py +2 -2
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/src/atlas_schema/enums.py +26 -4
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/src/atlas_schema/schema.py +17 -9
- atlas_schema-0.2.3/src/atlas_schema/utils.py +49 -0
- atlas_schema-0.2.1/README.md +0 -37
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/.gitignore +0 -0
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/LICENSE +0 -0
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/src/atlas_schema/_version.pyi +0 -0
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/src/atlas_schema/methods.py +0 -0
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/src/atlas_schema/py.typed +0 -0
- {atlas_schema-0.2.1 → atlas_schema-0.2.3}/src/atlas_schema/typing_compat.py +0 -0
@@ -1,11 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: atlas-schema
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.3
|
4
4
|
Summary: Helper python package for ATLAS Common NTuple Analysis work.
|
5
5
|
Project-URL: Homepage, https://github.com/scipp-atlas/atlas-schema
|
6
6
|
Project-URL: Bug Tracker, https://github.com/scipp-atlas/atlas-schema/issues
|
7
7
|
Project-URL: Discussions, https://github.com/scipp-atlas/atlas-schema/discussions
|
8
|
-
Project-URL:
|
8
|
+
Project-URL: Documentation, https://atlas-schema.readthedocs.io/en/v0.2.3/
|
9
|
+
Project-URL: Releases, https://github.com/scipp-atlas/atlas-schema/releases
|
10
|
+
Project-URL: Release Notes, https://atlas-schema.readthedocs.io/en/latest/history.html
|
9
11
|
Author-email: Giordon Stark <kratsg@gmail.com>
|
10
12
|
License:
|
11
13
|
Apache License
|
@@ -249,7 +251,7 @@ Requires-Dist: tbump>=6.7.0; extra == 'test'
|
|
249
251
|
Requires-Dist: twine; extra == 'test'
|
250
252
|
Description-Content-Type: text/markdown
|
251
253
|
|
252
|
-
# atlas-schema v0.2.
|
254
|
+
# atlas-schema v0.2.3
|
253
255
|
|
254
256
|
[![Actions Status][actions-badge]][actions-link]
|
255
257
|
[![Documentation Status][rtd-badge]][rtd-link]
|
@@ -277,6 +279,129 @@ Description-Content-Type: text/markdown
|
|
277
279
|
|
278
280
|
<!-- prettier-ignore-end -->
|
279
281
|
|
282
|
+
This is the python package containing schemas and helper functions enabling
|
283
|
+
analyzers to work with ATLAS datasets (Monte Carlo and Data), using
|
284
|
+
[coffea](https://coffea-hep.readthedocs.io/en/latest/).
|
285
|
+
|
286
|
+
## Hello World
|
287
|
+
|
288
|
+
The simplest example is to just get started processing the file as expected:
|
289
|
+
|
290
|
+
```python
|
291
|
+
from atlas_schema.schema import NtupleSchema
|
292
|
+
from coffea import dataset_tools
|
293
|
+
import awkward as ak
|
294
|
+
|
295
|
+
fileset = {"ttbar": {"files": {"path/to/ttbar.root": "tree_name"}}}
|
296
|
+
samples, report = dataset_tools.preprocess(fileset)
|
297
|
+
|
298
|
+
|
299
|
+
def noop(events):
|
300
|
+
return ak.fields(events)
|
301
|
+
|
302
|
+
|
303
|
+
fields = dataset_tools.apply_to_fileset(noop, samples, schemaclass=NtupleSchema)
|
304
|
+
print(fields)
|
305
|
+
```
|
306
|
+
|
307
|
+
which produces something similar to
|
308
|
+
|
309
|
+
```python
|
310
|
+
{
|
311
|
+
"ttbar": [
|
312
|
+
"dataTakingYear",
|
313
|
+
"mcChannelNumber",
|
314
|
+
"runNumber",
|
315
|
+
"eventNumber",
|
316
|
+
"lumiBlock",
|
317
|
+
"actualInteractionsPerCrossing",
|
318
|
+
"averageInteractionsPerCrossing",
|
319
|
+
"truthjet",
|
320
|
+
"PileupWeight",
|
321
|
+
"RandomRunNumber",
|
322
|
+
"met",
|
323
|
+
"recojet",
|
324
|
+
"truth",
|
325
|
+
"generatorWeight",
|
326
|
+
"beamSpotWeight",
|
327
|
+
"trigPassed",
|
328
|
+
"jvt",
|
329
|
+
]
|
330
|
+
}
|
331
|
+
```
|
332
|
+
|
333
|
+
However, a more involved example to apply a selection and fill a histogram looks
|
334
|
+
like below:
|
335
|
+
|
336
|
+
```python
|
337
|
+
import awkward as ak
|
338
|
+
import dask
|
339
|
+
import hist.dask as had
|
340
|
+
import matplotlib.pyplot as plt
|
341
|
+
from coffea import processor
|
342
|
+
from coffea.nanoevents import NanoEventsFactory
|
343
|
+
from distributed import Client
|
344
|
+
|
345
|
+
from atlas_schema.schema import NtupleSchema
|
346
|
+
|
347
|
+
|
348
|
+
class MyFirstProcessor(processor.ProcessorABC):
|
349
|
+
def __init__(self):
|
350
|
+
pass
|
351
|
+
|
352
|
+
def process(self, events):
|
353
|
+
dataset = events.metadata["dataset"]
|
354
|
+
h_ph_pt = (
|
355
|
+
had.Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
|
356
|
+
.Regular(200, 0.0, 2000.0, name="pt", label="$pt_{\gamma}$ [GeV]")
|
357
|
+
.Int64()
|
358
|
+
)
|
359
|
+
|
360
|
+
cut = ak.all(events.ph.isEM, axis=1)
|
361
|
+
h_ph_pt.fill(isEM="all", pt=ak.firsts(events.ph.pt / 1.0e3))
|
362
|
+
h_ph_pt.fill(isEM="pass", pt=ak.firsts(events[cut].ph.pt / 1.0e3))
|
363
|
+
h_ph_pt.fill(isEM="fail", pt=ak.firsts(events[~cut].ph.pt / 1.0e3))
|
364
|
+
|
365
|
+
return {
|
366
|
+
dataset: {
|
367
|
+
"entries": ak.num(events, axis=0),
|
368
|
+
"ph_pt": h_ph_pt,
|
369
|
+
}
|
370
|
+
}
|
371
|
+
|
372
|
+
def postprocess(self, accumulator):
|
373
|
+
pass
|
374
|
+
|
375
|
+
|
376
|
+
if __name__ == "__main__":
|
377
|
+
client = Client()
|
378
|
+
|
379
|
+
fname = "ntuple.root"
|
380
|
+
events = NanoEventsFactory.from_root(
|
381
|
+
{fname: "analysis"},
|
382
|
+
schemaclass=NtupleSchema,
|
383
|
+
metadata={"dataset": "700352.Zqqgamma.mc20d.v1"},
|
384
|
+
).events()
|
385
|
+
|
386
|
+
p = MyFirstProcessor()
|
387
|
+
out = p.process(events)
|
388
|
+
(computed,) = dask.compute(out)
|
389
|
+
print(computed)
|
390
|
+
|
391
|
+
fig, ax = plt.subplots()
|
392
|
+
computed["700352.Zqqgamma.mc20d.v1"]["ph_pt"].plot1d(ax=ax)
|
393
|
+
ax.set_xscale("log")
|
394
|
+
ax.legend(title="Photon pT for Zqqgamma")
|
395
|
+
|
396
|
+
fig.savefig("ph_pt.pdf")
|
397
|
+
```
|
398
|
+
|
399
|
+
which produces
|
400
|
+
|
401
|
+
<img src="https://raw.githubusercontent.com/scipp-atlas/atlas-schema/main/docs/_static/img/ph_pt.png" alt="three stacked histograms of photon pT, with each stack corresponding to: no selection, requiring the isEM flag, and inverting the isEM requirement" width="500" style="display: block; margin-left: auto; margin-right: auto;">
|
402
|
+
|
403
|
+
<!-- SPHINX-END -->
|
404
|
+
|
280
405
|
## Developer Notes
|
281
406
|
|
282
407
|
### Converting Enums from C++ to Python
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# atlas-schema v0.2.3
|
2
|
+
|
3
|
+
[![Actions Status][actions-badge]][actions-link]
|
4
|
+
[![Documentation Status][rtd-badge]][rtd-link]
|
5
|
+
|
6
|
+
[![PyPI version][pypi-version]][pypi-link]
|
7
|
+
[![Conda-Forge][conda-badge]][conda-link]
|
8
|
+
[![PyPI platforms][pypi-platforms]][pypi-link]
|
9
|
+
|
10
|
+
[![GitHub Discussion][github-discussions-badge]][github-discussions-link]
|
11
|
+
|
12
|
+
<!-- SPHINX-START -->
|
13
|
+
|
14
|
+
<!-- prettier-ignore-start -->
|
15
|
+
[actions-badge]: https://github.com/scipp-atlas/atlas-schema/workflows/CI/badge.svg
|
16
|
+
[actions-link]: https://github.com/scipp-atlas/atlas-schema/actions
|
17
|
+
[conda-badge]: https://img.shields.io/conda/vn/conda-forge/atlas-schema
|
18
|
+
[conda-link]: https://github.com/conda-forge/atlas-schema-feedstock
|
19
|
+
[github-discussions-badge]: https://img.shields.io/static/v1?label=Discussions&message=Ask&color=blue&logo=github
|
20
|
+
[github-discussions-link]: https://github.com/scipp-atlas/atlas-schema/discussions
|
21
|
+
[pypi-link]: https://pypi.org/project/atlas-schema/
|
22
|
+
[pypi-platforms]: https://img.shields.io/pypi/pyversions/atlas-schema
|
23
|
+
[pypi-version]: https://img.shields.io/pypi/v/atlas-schema
|
24
|
+
[rtd-badge]: https://readthedocs.org/projects/atlas-schema/badge/?version=latest
|
25
|
+
[rtd-link]: https://atlas-schema.readthedocs.io/en/latest/?badge=latest
|
26
|
+
|
27
|
+
<!-- prettier-ignore-end -->
|
28
|
+
|
29
|
+
This is the python package containing schemas and helper functions enabling
|
30
|
+
analyzers to work with ATLAS datasets (Monte Carlo and Data), using
|
31
|
+
[coffea](https://coffea-hep.readthedocs.io/en/latest/).
|
32
|
+
|
33
|
+
## Hello World
|
34
|
+
|
35
|
+
The simplest example is to just get started processing the file as expected:
|
36
|
+
|
37
|
+
```python
|
38
|
+
from atlas_schema.schema import NtupleSchema
|
39
|
+
from coffea import dataset_tools
|
40
|
+
import awkward as ak
|
41
|
+
|
42
|
+
fileset = {"ttbar": {"files": {"path/to/ttbar.root": "tree_name"}}}
|
43
|
+
samples, report = dataset_tools.preprocess(fileset)
|
44
|
+
|
45
|
+
|
46
|
+
def noop(events):
|
47
|
+
return ak.fields(events)
|
48
|
+
|
49
|
+
|
50
|
+
fields = dataset_tools.apply_to_fileset(noop, samples, schemaclass=NtupleSchema)
|
51
|
+
print(fields)
|
52
|
+
```
|
53
|
+
|
54
|
+
which produces something similar to
|
55
|
+
|
56
|
+
```python
|
57
|
+
{
|
58
|
+
"ttbar": [
|
59
|
+
"dataTakingYear",
|
60
|
+
"mcChannelNumber",
|
61
|
+
"runNumber",
|
62
|
+
"eventNumber",
|
63
|
+
"lumiBlock",
|
64
|
+
"actualInteractionsPerCrossing",
|
65
|
+
"averageInteractionsPerCrossing",
|
66
|
+
"truthjet",
|
67
|
+
"PileupWeight",
|
68
|
+
"RandomRunNumber",
|
69
|
+
"met",
|
70
|
+
"recojet",
|
71
|
+
"truth",
|
72
|
+
"generatorWeight",
|
73
|
+
"beamSpotWeight",
|
74
|
+
"trigPassed",
|
75
|
+
"jvt",
|
76
|
+
]
|
77
|
+
}
|
78
|
+
```
|
79
|
+
|
80
|
+
However, a more involved example to apply a selection and fill a histogram looks
|
81
|
+
like below:
|
82
|
+
|
83
|
+
```python
|
84
|
+
import awkward as ak
|
85
|
+
import dask
|
86
|
+
import hist.dask as had
|
87
|
+
import matplotlib.pyplot as plt
|
88
|
+
from coffea import processor
|
89
|
+
from coffea.nanoevents import NanoEventsFactory
|
90
|
+
from distributed import Client
|
91
|
+
|
92
|
+
from atlas_schema.schema import NtupleSchema
|
93
|
+
|
94
|
+
|
95
|
+
class MyFirstProcessor(processor.ProcessorABC):
|
96
|
+
def __init__(self):
|
97
|
+
pass
|
98
|
+
|
99
|
+
def process(self, events):
|
100
|
+
dataset = events.metadata["dataset"]
|
101
|
+
h_ph_pt = (
|
102
|
+
had.Hist.new.StrCat(["all", "pass", "fail"], name="isEM")
|
103
|
+
.Regular(200, 0.0, 2000.0, name="pt", label="$pt_{\gamma}$ [GeV]")
|
104
|
+
.Int64()
|
105
|
+
)
|
106
|
+
|
107
|
+
cut = ak.all(events.ph.isEM, axis=1)
|
108
|
+
h_ph_pt.fill(isEM="all", pt=ak.firsts(events.ph.pt / 1.0e3))
|
109
|
+
h_ph_pt.fill(isEM="pass", pt=ak.firsts(events[cut].ph.pt / 1.0e3))
|
110
|
+
h_ph_pt.fill(isEM="fail", pt=ak.firsts(events[~cut].ph.pt / 1.0e3))
|
111
|
+
|
112
|
+
return {
|
113
|
+
dataset: {
|
114
|
+
"entries": ak.num(events, axis=0),
|
115
|
+
"ph_pt": h_ph_pt,
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
def postprocess(self, accumulator):
|
120
|
+
pass
|
121
|
+
|
122
|
+
|
123
|
+
if __name__ == "__main__":
|
124
|
+
client = Client()
|
125
|
+
|
126
|
+
fname = "ntuple.root"
|
127
|
+
events = NanoEventsFactory.from_root(
|
128
|
+
{fname: "analysis"},
|
129
|
+
schemaclass=NtupleSchema,
|
130
|
+
metadata={"dataset": "700352.Zqqgamma.mc20d.v1"},
|
131
|
+
).events()
|
132
|
+
|
133
|
+
p = MyFirstProcessor()
|
134
|
+
out = p.process(events)
|
135
|
+
(computed,) = dask.compute(out)
|
136
|
+
print(computed)
|
137
|
+
|
138
|
+
fig, ax = plt.subplots()
|
139
|
+
computed["700352.Zqqgamma.mc20d.v1"]["ph_pt"].plot1d(ax=ax)
|
140
|
+
ax.set_xscale("log")
|
141
|
+
ax.legend(title="Photon pT for Zqqgamma")
|
142
|
+
|
143
|
+
fig.savefig("ph_pt.pdf")
|
144
|
+
```
|
145
|
+
|
146
|
+
which produces
|
147
|
+
|
148
|
+
<img src="https://raw.githubusercontent.com/scipp-atlas/atlas-schema/main/docs/_static/img/ph_pt.png" alt="three stacked histograms of photon pT, with each stack corresponding to: no selection, requiring the isEM flag, and inverting the isEM requirement" width="500" style="display: block; margin-left: auto; margin-right: auto;">
|
149
|
+
|
150
|
+
<!-- SPHINX-END -->
|
151
|
+
|
152
|
+
## Developer Notes
|
153
|
+
|
154
|
+
### Converting Enums from C++ to Python
|
155
|
+
|
156
|
+
This useful `vim` substitution helps:
|
157
|
+
|
158
|
+
```
|
159
|
+
%s/ \([A-Za-z]\+\)\s\+= \(\d\+\),\?/ \1: Annotated[int, "\1"] = \2
|
160
|
+
```
|
@@ -60,7 +60,9 @@ docs = [
|
|
60
60
|
Homepage = "https://github.com/scipp-atlas/atlas-schema"
|
61
61
|
"Bug Tracker" = "https://github.com/scipp-atlas/atlas-schema/issues"
|
62
62
|
Discussions = "https://github.com/scipp-atlas/atlas-schema/discussions"
|
63
|
-
|
63
|
+
Documentation = "https://atlas-schema.readthedocs.io/en/v0.2.3/"
|
64
|
+
Releases = "https://github.com/scipp-atlas/atlas-schema/releases"
|
65
|
+
"Release Notes" = "https://atlas-schema.readthedocs.io/en/latest/history.html"
|
64
66
|
|
65
67
|
|
66
68
|
[tool.hatch]
|
@@ -94,17 +96,30 @@ packages = ["src/atlas_schema"]
|
|
94
96
|
|
95
97
|
[tool.pytest.ini_options]
|
96
98
|
minversion = "6.0"
|
97
|
-
addopts = [
|
99
|
+
addopts = [
|
100
|
+
"-ra",
|
101
|
+
"--showlocals",
|
102
|
+
"--strict-markers",
|
103
|
+
"--strict-config",
|
104
|
+
"--doctest-modules",
|
105
|
+
"--doctest-glob=*.rst",
|
106
|
+
"--cov",
|
107
|
+
"--cov-report=xml",
|
108
|
+
"--cov-report=term",
|
109
|
+
"--durations=20",
|
110
|
+
"--ignore=docs/conf.py",
|
111
|
+
]
|
98
112
|
xfail_strict = true
|
99
113
|
filterwarnings = [
|
100
114
|
"error",
|
101
115
|
]
|
102
116
|
log_cli_level = "INFO"
|
103
117
|
testpaths = [
|
118
|
+
"src",
|
104
119
|
"tests",
|
120
|
+
"docs",
|
105
121
|
]
|
106
122
|
|
107
|
-
|
108
123
|
[tool.coverage]
|
109
124
|
run.source = ["atlas_schema"]
|
110
125
|
report.exclude_also = [
|
@@ -113,6 +128,7 @@ report.exclude_also = [
|
|
113
128
|
]
|
114
129
|
|
115
130
|
[tool.mypy]
|
131
|
+
mypy_path = ["src"]
|
116
132
|
files = ["src", "tests"]
|
117
133
|
python_version = "3.9"
|
118
134
|
warn_unused_configs = true
|
@@ -133,6 +149,7 @@ disallow_incomplete_defs = true
|
|
133
149
|
module = [
|
134
150
|
'awkward.*',
|
135
151
|
'coffea.*',
|
152
|
+
'dask_awkward.*',
|
136
153
|
]
|
137
154
|
ignore_missing_imports = true
|
138
155
|
|
@@ -10,7 +10,8 @@ import warnings
|
|
10
10
|
|
11
11
|
from atlas_schema._version import version as __version__
|
12
12
|
from atlas_schema.enums import ParticleOrigin, PhotonID
|
13
|
+
from atlas_schema.utils import isin
|
13
14
|
|
14
15
|
warnings.filterwarnings("ignore", module="coffea.*")
|
15
16
|
|
16
|
-
__all__ = ["ParticleOrigin", "PhotonID", "__version__"]
|
17
|
+
__all__ = ["ParticleOrigin", "PhotonID", "__version__", "isin"]
|
@@ -1,9 +1,31 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
|
3
|
+
import sys
|
4
|
+
from enum import Enum, IntEnum
|
4
5
|
|
6
|
+
if sys.version_info >= (3, 11):
|
7
|
+
from enum import EnumType
|
8
|
+
else:
|
9
|
+
from enum import EnumMeta as EnumType
|
5
10
|
|
6
|
-
|
11
|
+
from typing import Callable, TypeVar, cast
|
12
|
+
|
13
|
+
_E = TypeVar("_E", bound=Enum)
|
14
|
+
|
15
|
+
|
16
|
+
class MultipleEnumAccessMeta(EnumType):
|
17
|
+
"""
|
18
|
+
Enum Metaclass to provide a way to access multiple values all at once.
|
19
|
+
"""
|
20
|
+
|
21
|
+
def __getitem__(self: type[_E], key: str | tuple[str]) -> _E | list[_E]: # type:ignore[misc,override]
|
22
|
+
getitem = cast(Callable[[str], _E], super().__getitem__) # type:ignore[misc]
|
23
|
+
if isinstance(key, tuple):
|
24
|
+
return [getitem(name) for name in key]
|
25
|
+
return getitem(key)
|
26
|
+
|
27
|
+
|
28
|
+
class ParticleType(IntEnum, metaclass=MultipleEnumAccessMeta):
|
7
29
|
"""
|
8
30
|
Taken from `ATLAS Truth Utilities for ParticleType <https://gitlab.cern.ch/atlas/athena/-/blob/74f43ff0910edb2a2bd3778880ccbdad648dc037/Generators/TruthUtils/TruthUtils/TruthClasses.h#L8-49>`_.
|
9
31
|
"""
|
@@ -50,7 +72,7 @@ class ParticleType(IntEnum):
|
|
50
72
|
UnknownJet = 38
|
51
73
|
|
52
74
|
|
53
|
-
class ParticleOrigin(IntEnum):
|
75
|
+
class ParticleOrigin(IntEnum, metaclass=MultipleEnumAccessMeta):
|
54
76
|
"""
|
55
77
|
Taken from `ATLAS Truth Utilities for ParticleOrigin <https://gitlab.cern.ch/atlas/athena/-/blob/74f43ff0910edb2a2bd3778880ccbdad648dc037/Generators/TruthUtils/TruthUtils/TruthClasses.h#L51-103>`_.
|
56
78
|
"""
|
@@ -105,7 +127,7 @@ class ParticleOrigin(IntEnum):
|
|
105
127
|
QCD = 45
|
106
128
|
|
107
129
|
|
108
|
-
class PhotonID(IntEnum):
|
130
|
+
class PhotonID(IntEnum, metaclass=MultipleEnumAccessMeta):
|
109
131
|
"""
|
110
132
|
Taken from the `EGamma Identification CP group's twiki <https://twiki.cern.ch/twiki/bin/viewauth/AtlasProtected/EGammaIdentificationRun2#Photon_isEM_word>`_.
|
111
133
|
"""
|
@@ -49,7 +49,7 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
49
49
|
}
|
50
50
|
|
51
51
|
# These are stored as length-1 vectors unnecessarily
|
52
|
-
singletons: ClassVar[
|
52
|
+
singletons: ClassVar[set[str]] = set()
|
53
53
|
|
54
54
|
docstrings: ClassVar[dict[str, str]] = {
|
55
55
|
"charge": "charge",
|
@@ -127,8 +127,8 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
127
127
|
|
128
128
|
output = {}
|
129
129
|
|
130
|
-
# first, register
|
131
|
-
for name in self.event_ids:
|
130
|
+
# first, register singletons (event-level, others)
|
131
|
+
for name in {*self.event_ids, *self.singletons}:
|
132
132
|
if name in missing_event_ids:
|
133
133
|
continue
|
134
134
|
output[name] = branch_forms[name]
|
@@ -163,7 +163,17 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
163
163
|
}
|
164
164
|
)
|
165
165
|
|
166
|
-
|
166
|
+
if not used and not content:
|
167
|
+
warnings.warn(
|
168
|
+
f"I identified a branch that likely does not have any leaves: '{name}'. I will treat this as a 'singleton'. To suppress this warning next time, please define your singletons explicitly.",
|
169
|
+
RuntimeWarning,
|
170
|
+
stacklevel=2,
|
171
|
+
)
|
172
|
+
self.singletons.add(name)
|
173
|
+
output[name] = branch_forms[name]
|
174
|
+
|
175
|
+
else:
|
176
|
+
output[name] = zip_forms(content, name, record_name=mixin)
|
167
177
|
|
168
178
|
output[name].setdefault("parameters", {})
|
169
179
|
output[name]["parameters"].update({"collection_name": name})
|
@@ -174,6 +184,9 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
174
184
|
elif output[name]["class"] == "RecordArray":
|
175
185
|
parameters = output[name]["fields"]
|
176
186
|
contents = output[name]["contents"]
|
187
|
+
elif output[name]["class"] == "NumpyArray":
|
188
|
+
# these are singletons that we just pass through
|
189
|
+
continue
|
177
190
|
else:
|
178
191
|
msg = f"Unhandled class {output[name]['class']}"
|
179
192
|
raise RuntimeError(msg)
|
@@ -191,11 +204,6 @@ class NtupleSchema(BaseSchema): # type: ignore[misc]
|
|
191
204
|
),
|
192
205
|
)
|
193
206
|
|
194
|
-
if name in self.singletons:
|
195
|
-
# flatten! this 'promotes' the content of an inner dimension
|
196
|
-
# upwards, effectively hiding one nested dimension
|
197
|
-
output[name] = output[name]["content"]
|
198
|
-
|
199
207
|
return output.keys(), output.values()
|
200
208
|
|
201
209
|
@classmethod
|
@@ -0,0 +1,49 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from enum import Enum
|
4
|
+
from typing import TypeVar, Union, cast
|
5
|
+
|
6
|
+
import awkward as ak
|
7
|
+
import dask_awkward as dak
|
8
|
+
|
9
|
+
Array = TypeVar("Array", bound=Union[dak.Array, ak.Array])
|
10
|
+
_E = TypeVar("_E", bound=Enum)
|
11
|
+
|
12
|
+
|
13
|
+
def isin(element: Array, test_elements: dak.Array | ak.Array, axis: int = -1) -> Array:
|
14
|
+
"""
|
15
|
+
Find test_elements in element. Similar in API as :func:`numpy.isin`.
|
16
|
+
|
17
|
+
Calculates `element in test_elements`, broadcasting over *element elements only*. Returns a boolean array of the same shape as *element* that is `True` where an element of *element* is in *test_elements* and `False` otherwise.
|
18
|
+
|
19
|
+
This works by first transforming *test_elements* to an array with one more
|
20
|
+
dimension than the *element*, placing the *test_elements* at *axis*, and then doing a
|
21
|
+
comparison.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
element (dak.Array or ak.Array): input array of values.
|
25
|
+
test_elements (dak.Array or ak.Array): one-dimensional set of values against which to test each value of *element*.
|
26
|
+
axis (int): the axis along which the comparison is performed
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
dak.Array or ak.Array: result of comparison for test_elements in *element*
|
30
|
+
|
31
|
+
Example:
|
32
|
+
>>> import awkward as ak
|
33
|
+
>>> import atlas_schema as ats
|
34
|
+
>>> truth_origins = ak.Array([[1, 2, 3], [4], [5, 6, 7], [1]])
|
35
|
+
>>> prompt_origins = ak.Array([1, 2, 7])
|
36
|
+
>>> ats.isin(truth_origins, prompt_origins).to_list()
|
37
|
+
[[True, True, False], [False], [False, False, True], [True]]
|
38
|
+
"""
|
39
|
+
assert test_elements.ndim == 1, "test_elements must be one-dimensional"
|
40
|
+
assert axis >= -1, "axis must be -1 or positive-valued"
|
41
|
+
assert axis < element.ndim + 1, "axis too large for the element"
|
42
|
+
|
43
|
+
# First, build up the transformation, with slice(None) indicating where to stick the test_elements
|
44
|
+
reshaper: list[None | slice] = [None] * element.ndim
|
45
|
+
axis = element.ndim if axis == -1 else axis
|
46
|
+
reshaper.insert(axis, slice(None))
|
47
|
+
|
48
|
+
# Note: reshaper needs to be a tuple for indexing purposes
|
49
|
+
return cast(Array, ak.any(element == test_elements[tuple(reshaper)], axis=-1))
|
atlas_schema-0.2.1/README.md
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
# atlas-schema v0.2.1
|
2
|
-
|
3
|
-
[![Actions Status][actions-badge]][actions-link]
|
4
|
-
[![Documentation Status][rtd-badge]][rtd-link]
|
5
|
-
|
6
|
-
[![PyPI version][pypi-version]][pypi-link]
|
7
|
-
[![Conda-Forge][conda-badge]][conda-link]
|
8
|
-
[![PyPI platforms][pypi-platforms]][pypi-link]
|
9
|
-
|
10
|
-
[![GitHub Discussion][github-discussions-badge]][github-discussions-link]
|
11
|
-
|
12
|
-
<!-- SPHINX-START -->
|
13
|
-
|
14
|
-
<!-- prettier-ignore-start -->
|
15
|
-
[actions-badge]: https://github.com/scipp-atlas/atlas-schema/workflows/CI/badge.svg
|
16
|
-
[actions-link]: https://github.com/scipp-atlas/atlas-schema/actions
|
17
|
-
[conda-badge]: https://img.shields.io/conda/vn/conda-forge/atlas-schema
|
18
|
-
[conda-link]: https://github.com/conda-forge/atlas-schema-feedstock
|
19
|
-
[github-discussions-badge]: https://img.shields.io/static/v1?label=Discussions&message=Ask&color=blue&logo=github
|
20
|
-
[github-discussions-link]: https://github.com/scipp-atlas/atlas-schema/discussions
|
21
|
-
[pypi-link]: https://pypi.org/project/atlas-schema/
|
22
|
-
[pypi-platforms]: https://img.shields.io/pypi/pyversions/atlas-schema
|
23
|
-
[pypi-version]: https://img.shields.io/pypi/v/atlas-schema
|
24
|
-
[rtd-badge]: https://readthedocs.org/projects/atlas-schema/badge/?version=latest
|
25
|
-
[rtd-link]: https://atlas-schema.readthedocs.io/en/latest/?badge=latest
|
26
|
-
|
27
|
-
<!-- prettier-ignore-end -->
|
28
|
-
|
29
|
-
## Developer Notes
|
30
|
-
|
31
|
-
### Converting Enums from C++ to Python
|
32
|
-
|
33
|
-
This useful `vim` substitution helps:
|
34
|
-
|
35
|
-
```
|
36
|
-
%s/ \([A-Za-z]\+\)\s\+= \(\d\+\),\?/ \1: Annotated[int, "\1"] = \2
|
37
|
-
```
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|