func-adl-local 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ .venv
@@ -0,0 +1,149 @@
1
+ Metadata-Version: 2.4
2
+ Name: func_adl_local
3
+ Version: 0.1.0
4
+ Summary: Run FuncADL queries on local files without ServiceX
5
+ Project-URL: Homepage, https://github.com/RogerJanusiak/func_adl_local
6
+ Author-email: Roger Janusiak <rogerjanusiak@gmail.com>
7
+ License: MIT
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Programming Language :: Python
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Scientific/Engineering :: Physics
18
+ Requires-Python: >=3.9
19
+ Requires-Dist: func-adl-servicex-xaodr21
20
+ Requires-Dist: func-adl-servicex-xaodr22
21
+ Requires-Dist: func-adl-servicex-xaodr25
22
+ Requires-Dist: func-adl-uproot
23
+ Requires-Dist: func-adl>=3.2
24
+ Requires-Dist: jinja2
25
+ Requires-Dist: servicex-analysis-utils
26
+ Requires-Dist: servicex-local
27
+ Provides-Extra: test
28
+ Requires-Dist: pytest; extra == 'test'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # func_adl_local
32
+
33
+ Run [FuncADL](https://github.com/iris-hep/func_adl) queries on local files without ServiceX.
34
+
35
+ [FuncADL](https://github.com/iris-hep/func_adl) (Functional Analysis Description Language) is a declarative query language for HEP data analysis. Normally it is used with [ServiceX](https://github.com/ssl-hep/ServiceX) to query remote datasets. `func_adl_local` brings that same interface to files on your local machine — no ServiceX deployment required.
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ pip install func_adl_local
41
+ ```
42
+
43
+ ## Usage
44
+
45
+ ### xAOD files
46
+
47
+ Use `xAODConfig` to configure the ATLAS release and runtime platform, then call `get_data` with your dataset path and FuncADL query:
48
+
49
+ ```python
50
+ from func_adl_local import xAODConfig, get_data
51
+
52
+ config = xAODConfig(
53
+ release=22, # ATLAS release: 21, 22, or 25
54
+ platform="docker", # "docker", "singularity", or "wsl2"
55
+ )
56
+
57
+ # Build a query using the release-appropriate FuncADL dataset type
58
+ query = config.FuncADLQueryPHYS().SelectMany(lambda e: e.Jets("AntiKt4EMTopoJets")).Select(
59
+ lambda j: {"pt": j.pt(), "eta": j.eta()}
60
+ )
61
+
62
+ result = get_data("path/to/file.root", query, config)
63
+ ```
64
+
65
+ To get the result as an [awkward-array](https://awkward-array.org/), set `awk=True`:
66
+
67
+ ```python
68
+ config = xAODConfig(release=22, awk=True)
69
+ result = get_data("path/to/file.root", query, config)
70
+ # result is now an awkward array
71
+ ```
72
+
73
+ **Available query types:**
74
+
75
+ | Method | Releases |
76
+ |---|---|
77
+ | `config.FuncADLQueryPHYS()` | 21, 22, 25 |
78
+ | `config.FuncADLQueryPHYSLITE()` | 22, 25 |
79
+
80
+ **Platforms:**
81
+
82
+ | Value | Description |
83
+ |---|---|
84
+ | `"docker"` | Run the transformer in Docker (default) |
85
+ | `"singularity"` | Run in Singularity/Apptainer |
86
+ | `"wsl2"` | Run in WSL2 |
87
+
88
+ **Docker image versions:**
89
+
90
+ `xAODConfig` automatically selects the latest image for the chosen release. You can inspect available versions or pin to a specific one:
91
+
92
+ ```python
93
+ config = xAODConfig(release=22)
94
+ print(config.available_versions) # all available tags
95
+ print(config.latest_r22_version) # e.g. "22.2.110"
96
+
97
+ config_pinned = xAODConfig(release=22, version="22.2.107")
98
+ ```
99
+
100
+ ### Uproot / columnar files
101
+
102
+ For ROOT files and other columnar formats, use the re-exported `UprootDataset` from [func_adl_uproot](https://github.com/iris-hep/func_adl_uproot):
103
+
104
+ ```python
105
+ from func_adl_local import UprootDataset
106
+
107
+ ds = UprootDataset("path/to/file.root", "treename")
108
+ result = ds.Select(lambda e: {"pt": e["pt"]}).AsAwkwardArray().value()
109
+ ```
110
+
111
+ ## API Reference
112
+
113
+ ### `xAODConfig`
114
+
115
+ ```python
116
+ @dataclass
117
+ class xAODConfig:
118
+ release: int = 21 # ATLAS release year: 21, 22, or 25
119
+ version: str = "latest" # Docker image tag, or "latest" to auto-select
120
+ platform: str = "docker" # "docker", "singularity", or "wsl2"
121
+ ignore_cache: bool = False # Bypass the local ServiceX cache
122
+ awk: bool = False # Return results as awkward arrays
123
+ ```
124
+
125
+ ### `get_data(ds_name, query, config)`
126
+
127
+ Runs a FuncADL query against a local xAOD file.
128
+
129
+ - `ds_name` — path to the local dataset
130
+ - `query` — a FuncADL `ObjectStream` built from `config.FuncADLQueryPHYS()` or `config.FuncADLQueryPHYSLITE()`
131
+ - `config` — an `xAODConfig` instance
132
+
133
+ Returns a dict of arrays (or an awkward array if `config.awk=True`).
134
+
135
+ ## Related Projects
136
+
137
+ - [func_adl](https://github.com/iris-hep/func_adl) — core FuncADL library
138
+ - [func_adl_uproot](https://github.com/iris-hep/func_adl_uproot) — Uproot backend for FuncADL
139
+ - [servicex_local](https://github.com/iris-hep/servicex_local) — local ServiceX runtime
140
+ - [ServiceX](https://github.com/ssl-hep/ServiceX) — remote data delivery service
141
+ - [func_adl_servicex](https://github.com/iris-hep/func_adl_servicex) — FuncADL ServiceX backend
142
+
143
+ ## Contributing
144
+
145
+ Contributions are welcome! Please open an issue or pull request on [GitHub](https://github.com/RogerJanusiak/func_adl_local).
146
+
147
+ ## License
148
+
149
+ MIT License
@@ -0,0 +1,119 @@
1
+ # func_adl_local
2
+
3
+ Run [FuncADL](https://github.com/iris-hep/func_adl) queries on local files without ServiceX.
4
+
5
+ [FuncADL](https://github.com/iris-hep/func_adl) (Functional Analysis Description Language) is a declarative query language for HEP data analysis. Normally it is used with [ServiceX](https://github.com/ssl-hep/ServiceX) to query remote datasets. `func_adl_local` brings that same interface to files on your local machine — no ServiceX deployment required.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install func_adl_local
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ### xAOD files
16
+
17
+ Use `xAODConfig` to configure the ATLAS release and runtime platform, then call `get_data` with your dataset path and FuncADL query:
18
+
19
+ ```python
20
+ from func_adl_local import xAODConfig, get_data
21
+
22
+ config = xAODConfig(
23
+ release=22, # ATLAS release: 21, 22, or 25
24
+ platform="docker", # "docker", "singularity", or "wsl2"
25
+ )
26
+
27
+ # Build a query using the release-appropriate FuncADL dataset type
28
+ query = config.FuncADLQueryPHYS().SelectMany(lambda e: e.Jets("AntiKt4EMTopoJets")).Select(
29
+ lambda j: {"pt": j.pt(), "eta": j.eta()}
30
+ )
31
+
32
+ result = get_data("path/to/file.root", query, config)
33
+ ```
34
+
35
+ To get the result as an [awkward-array](https://awkward-array.org/), set `awk=True`:
36
+
37
+ ```python
38
+ config = xAODConfig(release=22, awk=True)
39
+ result = get_data("path/to/file.root", query, config)
40
+ # result is now an awkward array
41
+ ```
42
+
43
+ **Available query types:**
44
+
45
+ | Method | Releases |
46
+ |---|---|
47
+ | `config.FuncADLQueryPHYS()` | 21, 22, 25 |
48
+ | `config.FuncADLQueryPHYSLITE()` | 22, 25 |
49
+
50
+ **Platforms:**
51
+
52
+ | Value | Description |
53
+ |---|---|
54
+ | `"docker"` | Run the transformer in Docker (default) |
55
+ | `"singularity"` | Run in Singularity/Apptainer |
56
+ | `"wsl2"` | Run in WSL2 |
57
+
58
+ **Docker image versions:**
59
+
60
+ `xAODConfig` automatically selects the latest image for the chosen release. You can inspect available versions or pin to a specific one:
61
+
62
+ ```python
63
+ config = xAODConfig(release=22)
64
+ print(config.available_versions) # all available tags
65
+ print(config.latest_r22_version) # e.g. "22.2.110"
66
+
67
+ config_pinned = xAODConfig(release=22, version="22.2.107")
68
+ ```
69
+
70
+ ### Uproot / columnar files
71
+
72
+ For ROOT files and other columnar formats, use the re-exported `UprootDataset` from [func_adl_uproot](https://github.com/iris-hep/func_adl_uproot):
73
+
74
+ ```python
75
+ from func_adl_local import UprootDataset
76
+
77
+ ds = UprootDataset("path/to/file.root", "treename")
78
+ result = ds.Select(lambda e: {"pt": e["pt"]}).AsAwkwardArray().value()
79
+ ```
80
+
81
+ ## API Reference
82
+
83
+ ### `xAODConfig`
84
+
85
+ ```python
86
+ @dataclass
87
+ class xAODConfig:
88
+ release: int = 21 # ATLAS release year: 21, 22, or 25
89
+ version: str = "latest" # Docker image tag, or "latest" to auto-select
90
+ platform: str = "docker" # "docker", "singularity", or "wsl2"
91
+ ignore_cache: bool = False # Bypass the local ServiceX cache
92
+ awk: bool = False # Return results as awkward arrays
93
+ ```
94
+
95
+ ### `get_data(ds_name, query, config)`
96
+
97
+ Runs a FuncADL query against a local xAOD file.
98
+
99
+ - `ds_name` — path to the local dataset
100
+ - `query` — a FuncADL `ObjectStream` built from `config.FuncADLQueryPHYS()` or `config.FuncADLQueryPHYSLITE()`
101
+ - `config` — an `xAODConfig` instance
102
+
103
+ Returns a dict of arrays (or an awkward array if `config.awk=True`).
104
+
105
+ ## Related Projects
106
+
107
+ - [func_adl](https://github.com/iris-hep/func_adl) — core FuncADL library
108
+ - [func_adl_uproot](https://github.com/iris-hep/func_adl_uproot) — Uproot backend for FuncADL
109
+ - [servicex_local](https://github.com/iris-hep/servicex_local) — local ServiceX runtime
110
+ - [ServiceX](https://github.com/ssl-hep/ServiceX) — remote data delivery service
111
+ - [func_adl_servicex](https://github.com/iris-hep/func_adl_servicex) — FuncADL ServiceX backend
112
+
113
+ ## Contributing
114
+
115
+ Contributions are welcome! Please open an issue or pull request on [GitHub](https://github.com/RogerJanusiak/func_adl_local).
116
+
117
+ ## License
118
+
119
+ MIT License
@@ -0,0 +1,8 @@
1
+ # func_adl_local — run FuncADL queries on local files without ServiceX
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ # Allow function from UprootTo be used without importing the module
6
+ from func_adl_uproot import UprootDataset
7
+
8
+ from .functions import xAODConfig, get_data
@@ -0,0 +1,140 @@
1
+ import importlib
2
+ import json
3
+ import urllib.request
4
+ from dataclasses import dataclass
5
+ from typing import TYPE_CHECKING, Union
6
+
7
+ from func_adl import ObjectStream
8
+ from servicex import deliver
9
+ from servicex.databinder_models import Sample, ServiceXSpec
10
+ from servicex_analysis_utils import to_awk
11
+
12
+ from enum import Enum
13
+
14
+
15
+ class Platform(Enum):
16
+ """Options for which platform to use for the runtime environment."""
17
+
18
+ docker = "docker"
19
+ singularity = "singularity"
20
+ wsl2 = "wsl2"
21
+
22
+ if TYPE_CHECKING:
23
+ from func_adl_servicex_xaodr21.sx_dataset import FuncADLQueryPHYS as _PHYS21
24
+ from func_adl_servicex_xaodr22.sx_dataset import FuncADLQueryPHYS as _PHYS22
25
+ from func_adl_servicex_xaodr22.sx_dataset import FuncADLQueryPHYSLITE as _PHYSLITE22
26
+ from func_adl_servicex_xaodr25.sx_dataset import FuncADLQueryPHYS as _PHYS25
27
+ from func_adl_servicex_xaodr25.sx_dataset import FuncADLQueryPHYSLITE as _PHYSLITE25
28
+
29
+ _DOCKER_IMAGE = "sslhep/servicex_func_adl_xaod_transformer"
30
+
31
+
32
+ _VALID_RELEASES = (21, 22, 25)
33
+
34
+
35
+ @dataclass
36
+ class xAODConfig:
37
+ """Configuration for xAOD datasets."""
38
+
39
+ release: int = 21
40
+ version: str = "latest"
41
+ platform: Union[Platform, str] = "docker"
42
+ ignore_cache: bool = False
43
+ awk: bool = False
44
+
45
+ def __post_init__(self):
46
+ if self.release not in _VALID_RELEASES:
47
+ raise ValueError(f"release must be one of {_VALID_RELEASES}, got {self.release}")
48
+ if self.version == "latest":
49
+ self.version = self._latest_for_release(self.release)
50
+ if isinstance(self.platform, str):
51
+ self.platform = Platform[self.platform]
52
+
53
+ def _latest_for_release(self, release: int) -> str:
54
+ versions = [t for t in self.available_versions if t.startswith(f"{release}.")]
55
+ if not versions:
56
+ raise ValueError(f"No versions found for release {release}")
57
+ return max(versions, key=lambda t: tuple(int(x) for x in t.split(".")))
58
+
59
+ @property
60
+ def available_versions(self) -> list[str]:
61
+ """Return available versions (21.x, 22.x, 25.x) of the xAOD transformer Docker image."""
62
+ tags = []
63
+ url = f"https://hub.docker.com/v2/repositories/{_DOCKER_IMAGE}/tags?page_size=100"
64
+ while url:
65
+ with urllib.request.urlopen(url) as response:
66
+ data = json.loads(response.read())
67
+ tags.extend(result["name"] for result in data["results"])
68
+ url = data.get("next")
69
+ return [t for t in tags if t.startswith(("21.", "22.", "25."))]
70
+
71
+ @property
72
+ def latest_r21_version(self) -> str:
73
+ """Return the latest 21.x version of the xAOD transformer Docker image."""
74
+ return self._latest_for_release(21)
75
+
76
+ @property
77
+ def latest_r22_version(self) -> str:
78
+ """Return the latest 22.x version of the xAOD transformer Docker image."""
79
+ return self._latest_for_release(22)
80
+
81
+ @property
82
+ def latest_r25_version(self) -> str:
83
+ """Return the latest 25.x version of the xAOD transformer Docker image."""
84
+ return self._latest_for_release(25)
85
+
86
+ def _release_module(self):
87
+ return importlib.import_module(f"func_adl_servicex_xaodr{self.release}")
88
+
89
+ def FuncADLQueryPHYS(self) -> "Union[_PHYS21, _PHYS22, _PHYS25]":
90
+ return self._release_module().FuncADLQueryPHYS()
91
+
92
+ def FuncADLQueryPHYSLITE(self) -> "Union[_PHYSLITE22, _PHYSLITE25]":
93
+ return self._release_module().FuncADLQueryPHYSLITE()
94
+
95
+
96
+ def build_sx_spec(query, ds_name: str, config: xAODConfig):
97
+ """Build a ServiceX spec from the given query and dataset."""
98
+ from servicex_local.utils import find_dataset, install_sx_local
99
+ from servicex_local.utils import Platform as _SxPlatform
100
+
101
+ dataset, use_local = find_dataset(ds_name, prefer_local=True)
102
+
103
+ if not use_local:
104
+ raise ValueError(f"Unable to run dataset {ds_name} locally.")
105
+
106
+ image = f"docker://{_DOCKER_IMAGE}:{config.version}"
107
+ sx_platform = _SxPlatform(config.platform.value)
108
+ codegen_name, adaptor = install_sx_local(image, sx_platform)
109
+
110
+ spec = ServiceXSpec(
111
+ Sample=[
112
+ Sample(
113
+ Name="MySample",
114
+ Dataset=dataset,
115
+ Query=query,
116
+ Codegen=codegen_name,
117
+ ),
118
+ ],
119
+ )
120
+
121
+ return spec, adaptor
122
+
123
+
124
+ def get_data(
125
+ ds_name: str,
126
+ query: ObjectStream,
127
+ config: xAODConfig,
128
+ ):
129
+ """Run a query against a dataset, either locally or remotely."""
130
+ from servicex_local.deliver import deliver as local_deliver
131
+
132
+ spec, adaptor = build_sx_spec(query, ds_name, config)
133
+
134
+ sx_result = local_deliver(
135
+ spec, adaptor=adaptor, ignore_local_cache=config.ignore_cache
136
+ )
137
+
138
+ if config.awk:
139
+ return to_awk(sx_result)["MySample"]
140
+ return sx_result["MySample"]
@@ -0,0 +1,61 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "func_adl_local"
7
+ description = "Run FuncADL queries on local files without ServiceX"
8
+ readme = "README.md"
9
+ authors = [{ name = "Roger Janusiak", email = "rogerjanusiak@gmail.com" }]
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.9"
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Developers",
15
+ "Intended Audience :: Science/Research",
16
+ "Programming Language :: Python",
17
+ "Programming Language :: Python :: 3.9",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Topic :: Scientific/Engineering :: Physics",
23
+ ]
24
+ dependencies = [
25
+ "func_adl>=3.2",
26
+ "func_adl_uproot",
27
+ "servicex_local",
28
+ "func_adl_servicex_xaodr21",
29
+ "func_adl_servicex_xaodr22",
30
+ "func_adl_servicex_xaodr25",
31
+ "jinja2",
32
+ "servicex_analysis_utils",
33
+ ]
34
+ dynamic = ["version"]
35
+
36
+ [project.optional-dependencies]
37
+ test = ["pytest"]
38
+
39
+ [tool.hatch.version]
40
+ path = "func_adl_local/__init__.py"
41
+
42
+ [tool.hatch.build.targets.sdist]
43
+ include = ["README.md", "/func_adl_local"]
44
+
45
+ [tool.hatch.build.targets.wheel]
46
+ include = ["README.md", "/func_adl_local"]
47
+
48
+ [project.urls]
49
+ Homepage = "https://github.com/RogerJanusiak/func_adl_local"
50
+
51
+ [tool.black]
52
+ line-length = 99
53
+
54
+ [tool.isort]
55
+ profile = "black"
56
+
57
+ [tool.flake8]
58
+ max-line-length = 99
59
+
60
+ [tool.ruff]
61
+ line-length = 99