LbExec 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LbExec/__init__.py +38 -0
- LbExec/__main__.py +39 -0
- LbExec/cli_utils.py +381 -0
- LbExec/options.py +166 -0
- LbExec/workflows.py +248 -0
- LbExec-0.2.0.dist-info/LICENSE +674 -0
- LbExec-0.2.0.dist-info/METADATA +33 -0
- LbExec-0.2.0.dist-info/RECORD +11 -0
- LbExec-0.2.0.dist-info/WHEEL +5 -0
- LbExec-0.2.0.dist-info/entry_points.txt +2 -0
- LbExec-0.2.0.dist-info/top_level.txt +1 -0
LbExec/__init__.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
# (c) Copyright 2022-2023 CERN for the benefit of the LHCb Collaboration #
|
|
3
|
+
# #
|
|
4
|
+
# This software is distributed under the terms of the GNU General Public #
|
|
5
|
+
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
|
|
6
|
+
# #
|
|
7
|
+
# In applying this licence, CERN does not waive the privileges and immunities #
|
|
8
|
+
# granted to it by virtue of its status as an Intergovernmental Organization #
|
|
9
|
+
# or submit itself to any jurisdiction. #
|
|
10
|
+
###############################################################################
|
|
11
|
+
__all__ = ("Options", "main", "skim_and_merge")
|
|
12
|
+
|
|
13
|
+
import sys
|
|
14
|
+
|
|
15
|
+
from .options import Options
|
|
16
|
+
from .workflows import skim_and_merge
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def main(function, options, extra_args):
|
|
20
|
+
"""Run a job with lbexec.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
function (callable): A callable that will return the Gaudi configuration
|
|
24
|
+
options (Options): An initialised APP.Options object
|
|
25
|
+
extra_args (list of str): list of strings to add the the call to ``function``
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
return_code (int): The Gaudi process's return code
|
|
29
|
+
"""
|
|
30
|
+
_ = function(options, *extra_args)
|
|
31
|
+
|
|
32
|
+
# Ensure that any printout that has been made by the user provided function
|
|
33
|
+
# has been flushed. Without this, non-interactive jobs such as tests end up
|
|
34
|
+
# showing the print out in the middle of the Gaudi application log
|
|
35
|
+
sys.stdout.flush()
|
|
36
|
+
sys.stderr.flush()
|
|
37
|
+
|
|
38
|
+
return 0
|
LbExec/__main__.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
# (c) Copyright 2024 CERN for the benefit of the LHCb Collaboration #
|
|
3
|
+
# #
|
|
4
|
+
# This software is distributed under the terms of the GNU General Public #
|
|
5
|
+
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
|
|
6
|
+
# #
|
|
7
|
+
# In applying this licence, CERN does not waive the privileges and immunities #
|
|
8
|
+
# granted to it by virtue of its status as an Intergovernmental Organization #
|
|
9
|
+
# or submit itself to any jurisdiction. #
|
|
10
|
+
###############################################################################
|
|
11
|
+
import argparse
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
from . import main
|
|
15
|
+
from .cli_utils import FunctionLoader, OptionsLoader # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def parse_args():
|
|
19
|
+
parser = argparse.ArgumentParser()
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"function",
|
|
22
|
+
type=FunctionLoader,
|
|
23
|
+
help="Function to call with the options that will return the configuration. "
|
|
24
|
+
"Given in the form 'my_module:function_name'.",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"options",
|
|
28
|
+
help="YAML data to populate the Application.Options object with. "
|
|
29
|
+
"Multiple files can merged using 'file1.yaml+file2.yaml'.",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument("extra_args", nargs="*")
|
|
32
|
+
|
|
33
|
+
kwargs = vars(parser.parse_args())
|
|
34
|
+
kwargs["options"] = OptionsLoader(kwargs["function"], kwargs["options"])
|
|
35
|
+
return main(**kwargs)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
sys.exit(parse_args())
|
LbExec/cli_utils.py
ADDED
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
# type: ignore
|
|
2
|
+
# pylint: skip-file
|
|
3
|
+
###############################################################################
|
|
4
|
+
# (c) Copyright 2022-2023 CERN for the benefit of the LHCb Collaboration #
|
|
5
|
+
# #
|
|
6
|
+
# This software is distributed under the terms of the GNU General Public #
|
|
7
|
+
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
|
|
8
|
+
# #
|
|
9
|
+
# In applying this licence, CERN does not waive the privileges and immunities #
|
|
10
|
+
# granted to it by virtue of its status as an Intergovernmental Organization #
|
|
11
|
+
# or submit itself to any jurisdiction. #
|
|
12
|
+
###############################################################################
|
|
13
|
+
"""Utilities for parsing the positional arguments to ``lbexec``.
|
|
14
|
+
|
|
15
|
+
This module provides two callable objects that can be used as types with
|
|
16
|
+
``argparse``. The majority of the code is for providing hints to the user about
|
|
17
|
+
what might be wrong in the case of errors.
|
|
18
|
+
|
|
19
|
+
``FunctionLoader``
|
|
20
|
+
------------------
|
|
21
|
+
|
|
22
|
+
Wrapper class which takes a function spec of the form ``module.name:callable``.
|
|
23
|
+
In the event of errors a best effort is made to advise the user of how to
|
|
24
|
+
correct the error. In the event the module to import or function raises an
|
|
25
|
+
exception tracebacks are rewritten to hide the implementation details of
|
|
26
|
+
``lbexec``.
|
|
27
|
+
|
|
28
|
+
``OptionsLoader``
|
|
29
|
+
------------------
|
|
30
|
+
|
|
31
|
+
Converts a '+' separated list of YAML file paths into an ``Application.Options``
|
|
32
|
+
object. The current application is discovered using the ``GAUDIAPPNAME``
|
|
33
|
+
environment variable. If required ``OVERRIDE_LBEXEC_APP`` can be passed to
|
|
34
|
+
override which application is loaded. This is used by projects created by
|
|
35
|
+
lb-dev where the value of ``GAUDIAPPNAME`` is ``${PROJECT_NAME}Dev``.
|
|
36
|
+
"""
|
|
37
|
+
import ast
|
|
38
|
+
import difflib
|
|
39
|
+
import inspect
|
|
40
|
+
import os
|
|
41
|
+
import re
|
|
42
|
+
import shlex
|
|
43
|
+
import sys
|
|
44
|
+
import traceback
|
|
45
|
+
from importlib import import_module
|
|
46
|
+
from importlib.machinery import SourceFileLoader
|
|
47
|
+
from pathlib import Path
|
|
48
|
+
from types import ModuleType
|
|
49
|
+
from typing import Callable, Optional, get_type_hints
|
|
50
|
+
|
|
51
|
+
import click
|
|
52
|
+
|
|
53
|
+
# with warnings.catch_warnings():
|
|
54
|
+
# warnings.simplefilter("ignore", category=SyntaxWarning)
|
|
55
|
+
import pydantic
|
|
56
|
+
import yaml
|
|
57
|
+
|
|
58
|
+
from .options import Options as OptionsBase
|
|
59
|
+
|
|
60
|
+
# Workaround for https://gitlab.cern.ch/lhcb/LHCb/-/issues/292
|
|
61
|
+
# import warnings
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class FunctionLoader:
|
|
65
|
+
"""Class for parsing the function_spec argument to lbexec"""
|
|
66
|
+
|
|
67
|
+
def __init__(self, spec: str):
|
|
68
|
+
self.spec = spec
|
|
69
|
+
try:
|
|
70
|
+
self.module_name, self.func_name = spec.split(":", 1)
|
|
71
|
+
except ValueError:
|
|
72
|
+
_suggest_spec_fix(spec, spec)
|
|
73
|
+
sys.exit(1)
|
|
74
|
+
|
|
75
|
+
# Import the module, ensuring sys.path behaves the same way as
|
|
76
|
+
# "python my_script.py" and is always restored to it's original value
|
|
77
|
+
path_backup = sys.path
|
|
78
|
+
try:
|
|
79
|
+
module = self.__load_module()
|
|
80
|
+
except Exception as e:
|
|
81
|
+
if isinstance(e, ModuleNotFoundError) and Path(self.module_name).is_file():
|
|
82
|
+
_suggest_spec_fix(spec, self.module_name, self.func_name)
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
action_msg = f"import {self.module_name!r}"
|
|
85
|
+
_raise_user_exception(e, action_msg, self)
|
|
86
|
+
finally:
|
|
87
|
+
sys.path = path_backup
|
|
88
|
+
|
|
89
|
+
# Get the function
|
|
90
|
+
try:
|
|
91
|
+
self._function = getattr(module, self.func_name)
|
|
92
|
+
except AttributeError:
|
|
93
|
+
function_names = _guess_function_names(self.module_name, self.func_name)
|
|
94
|
+
_suggest_module_fix(self.spec, self.module_name, function_names)
|
|
95
|
+
sys.exit(1)
|
|
96
|
+
|
|
97
|
+
def __load_module(self) -> ModuleType:
|
|
98
|
+
if self.module_name.endswith(".py"):
|
|
99
|
+
module_path = Path(self.module_name)
|
|
100
|
+
if module_path.is_file():
|
|
101
|
+
self.module_name = module_path.with_suffix("").name
|
|
102
|
+
sys.path = [module_path.parent] + sys.path[1:]
|
|
103
|
+
return SourceFileLoader(
|
|
104
|
+
self.module_name, str(module_path)
|
|
105
|
+
).load_module()
|
|
106
|
+
if "/" in self.module_name:
|
|
107
|
+
log_error(
|
|
108
|
+
f"{self.module_name} looks like a filename but it doesn't exist"
|
|
109
|
+
)
|
|
110
|
+
sys.exit(1)
|
|
111
|
+
log_warn(f"{self.module_name} doesn't exist, assuming it's a Python module")
|
|
112
|
+
sys.path = [os.getcwd()] + sys.path[1:]
|
|
113
|
+
return import_module(self.module_name)
|
|
114
|
+
|
|
115
|
+
def __call__(self, options: OptionsBase, *extra_args: list[str]):
|
|
116
|
+
"""Run the user provided function and validate the result"""
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
config = self._function(options, *extra_args)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
args = ", ".join(["options"] + [repr(x) for x in extra_args])
|
|
122
|
+
action_msg = "call " + click.style(f"{self.spec}({args})", fg="green")
|
|
123
|
+
_raise_user_exception(e, action_msg, self)
|
|
124
|
+
|
|
125
|
+
# if not isinstance(config, ComponentConfig):
|
|
126
|
+
# log_error(f"{self._function!r} returned {type(config)}, "
|
|
127
|
+
# f"expected {ComponentConfig}")
|
|
128
|
+
# sys.exit(1)
|
|
129
|
+
|
|
130
|
+
return config
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def OptionsClass(self) -> type[OptionsBase]:
|
|
134
|
+
"""Return the Options class used by the function"""
|
|
135
|
+
valid_types = (
|
|
136
|
+
inspect.Parameter.POSITIONAL_ONLY,
|
|
137
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
138
|
+
)
|
|
139
|
+
positional_param_names = [
|
|
140
|
+
n
|
|
141
|
+
for n, p in inspect.signature(self._function).parameters.items()
|
|
142
|
+
if p.kind in valid_types
|
|
143
|
+
]
|
|
144
|
+
if len(positional_param_names) == 0:
|
|
145
|
+
raise TypeError(
|
|
146
|
+
f"{self.spec} must accept one or more positional argument(s)"
|
|
147
|
+
)
|
|
148
|
+
type_hints = get_type_hints(self._function)
|
|
149
|
+
options_arg = positional_param_names[0]
|
|
150
|
+
if options_arg not in type_hints:
|
|
151
|
+
log_error(f"Failed to find an options type hint for {self.spec}")
|
|
152
|
+
_make_type_hint_suggestion(self._function, options_arg)
|
|
153
|
+
sys.exit(1)
|
|
154
|
+
OptionsClass = type_hints[options_arg]
|
|
155
|
+
if not issubclass(OptionsClass, OptionsBase):
|
|
156
|
+
log_error(f"OptionsClass for {self.spec} should inherit from OptionsBase")
|
|
157
|
+
sys.exit(1)
|
|
158
|
+
return OptionsClass
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def OptionsLoader(function: FunctionLoader, options_spec: str) -> OptionsBase:
|
|
162
|
+
"""Convert a '+' separated list of paths to an Application.Options object."""
|
|
163
|
+
if options_spec.endswith((".yaml", ".yml", ".json")):
|
|
164
|
+
# Load and merge the various input YAML files
|
|
165
|
+
options = {}
|
|
166
|
+
for options_yaml in map(Path, re.split(r"(?<!\\)\+", options_spec)):
|
|
167
|
+
if not options_yaml.is_file():
|
|
168
|
+
log_error(f"{options_yaml} does not exist")
|
|
169
|
+
sys.exit(1)
|
|
170
|
+
options_data = yaml.safe_load(options_yaml.read_text())
|
|
171
|
+
if not isinstance(options_data, dict):
|
|
172
|
+
log_error(
|
|
173
|
+
f"{options_yaml} should contain a mapping but got {options_data!r}"
|
|
174
|
+
)
|
|
175
|
+
sys.exit(1)
|
|
176
|
+
options.update(options_data)
|
|
177
|
+
elif ":" in options_spec:
|
|
178
|
+
if options_spec.startswith(":"):
|
|
179
|
+
options_spec = function.spec.split(":", 1)[0] + options_spec
|
|
180
|
+
# HACK: Abuse the FunctionLoader class to load the options
|
|
181
|
+
options = FunctionLoader(options_spec)._function
|
|
182
|
+
if not isinstance(options, dict):
|
|
183
|
+
log_error(f"{options_spec} should point to a mapping but got {options!r}")
|
|
184
|
+
sys.exit(1)
|
|
185
|
+
else:
|
|
186
|
+
raise NotImplementedError(f"Unrecognised {options_spec!r}")
|
|
187
|
+
|
|
188
|
+
# Parse the merged YAML
|
|
189
|
+
try:
|
|
190
|
+
return function.OptionsClass.parse_obj(options)
|
|
191
|
+
except pydantic.ValidationError as e:
|
|
192
|
+
errors = e.errors()
|
|
193
|
+
log_error(f"Failed to validate options! Found {len(errors)} errors:")
|
|
194
|
+
for error in errors:
|
|
195
|
+
extra_msg = ""
|
|
196
|
+
if error["type"].startswith("extra") and len(error["loc"]) == 1:
|
|
197
|
+
suggestions = difflib.get_close_matches(
|
|
198
|
+
error["loc"][0], function.OptionsClass.schema()["properties"], n=1
|
|
199
|
+
)
|
|
200
|
+
if suggestions:
|
|
201
|
+
extra_msg = click.style(
|
|
202
|
+
f"Did you mean {suggestions[0]!r}?", fg="green"
|
|
203
|
+
)
|
|
204
|
+
locs = ", ".join(map(repr, error["loc"]))
|
|
205
|
+
if locs == "'__root__'":
|
|
206
|
+
click.echo(f" * {error['msg']}", err=True)
|
|
207
|
+
else:
|
|
208
|
+
click.echo(
|
|
209
|
+
f" * {click.style(error['msg'], fg='red')}: {locs} {extra_msg}",
|
|
210
|
+
err=True,
|
|
211
|
+
)
|
|
212
|
+
sys.exit(1)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def log_info(message):
|
|
216
|
+
click.echo(click.style("INFO: ", fg="green") + message, err=True)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def log_warn(message):
|
|
220
|
+
click.echo(click.style("WARN: ", fg="yellow") + message, err=True)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def log_error(message):
|
|
224
|
+
click.echo(click.style("ERROR: ", fg="red") + message, err=True)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _make_type_hint_suggestion(function: Callable, options_arg: str) -> str:
|
|
228
|
+
# Try to guess what the application name should be
|
|
229
|
+
app_name = os.environ.get("GAUDIAPPNAME", "GenericPython")
|
|
230
|
+
if app_name == "GenericPython":
|
|
231
|
+
app_name = "LbExec"
|
|
232
|
+
|
|
233
|
+
original = inspect.getsource(function).split("\n")[0]
|
|
234
|
+
# Try to guess how the Python code should look
|
|
235
|
+
sig = inspect.signature(function)
|
|
236
|
+
parameter_fixed = sig.parameters[options_arg].replace(annotation="to-be-replaced!")
|
|
237
|
+
sig_fixed = sig.replace(
|
|
238
|
+
parameters=[parameter_fixed] + list(sig.parameters.values())[1:]
|
|
239
|
+
)
|
|
240
|
+
fixed = original.replace(
|
|
241
|
+
str(sig), str(sig_fixed).replace("'to-be-replaced!'", "Options")
|
|
242
|
+
)
|
|
243
|
+
# If the code is unchanged something went wrong
|
|
244
|
+
if fixed == original:
|
|
245
|
+
log_error("Failed to generate corrected Python code")
|
|
246
|
+
else:
|
|
247
|
+
log_error("*** You probably need to replace:")
|
|
248
|
+
log_error(original)
|
|
249
|
+
log_error("*** with:")
|
|
250
|
+
log_error(f"from {app_name} import Options")
|
|
251
|
+
log_error("")
|
|
252
|
+
log_error(fixed)
|
|
253
|
+
source_file = inspect.getsourcefile(function)
|
|
254
|
+
_, lineno = inspect.getsourcelines(function)
|
|
255
|
+
log_error(f"*** in {source_file}:{lineno}")
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _suggest_spec_fix(spec, module_name: str, func_name: Optional[str] = None):
|
|
259
|
+
if os.path.isfile(module_name):
|
|
260
|
+
filename = Path(module_name).absolute().relative_to(Path.cwd())
|
|
261
|
+
module_name = str(filename.with_suffix("")).replace(os.sep, ".")
|
|
262
|
+
if module_name.endswith(".__init__"):
|
|
263
|
+
module_name = module_name[: -len(".__init__")]
|
|
264
|
+
# If given, assume the user's function name is correct
|
|
265
|
+
func_names = [func_name] if func_name else _guess_function_names(module_name)
|
|
266
|
+
else:
|
|
267
|
+
func_names = _guess_function_names(module_name)
|
|
268
|
+
_suggest_module_fix(spec, module_name, func_names)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _guess_function_names(
|
|
272
|
+
module_name: str, function_name: Optional[str] = None
|
|
273
|
+
) -> list[str]:
|
|
274
|
+
module_path = _guess_module_path(module_name)
|
|
275
|
+
if not module_path:
|
|
276
|
+
return []
|
|
277
|
+
module_ast = ast.parse(module_path.read_text())
|
|
278
|
+
functions = [
|
|
279
|
+
node
|
|
280
|
+
for node in ast.iter_child_nodes(module_ast)
|
|
281
|
+
if isinstance(node, ast.FunctionDef)
|
|
282
|
+
]
|
|
283
|
+
function_names = [
|
|
284
|
+
function.name
|
|
285
|
+
for function in functions
|
|
286
|
+
if function.args.args and function.args.args[0].arg == "options"
|
|
287
|
+
]
|
|
288
|
+
if function_name:
|
|
289
|
+
function_names = difflib.get_close_matches(
|
|
290
|
+
function_name, function_names, cutoff=0
|
|
291
|
+
)
|
|
292
|
+
return function_names
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _suggest_module_fix(spec: str, module_name: str, function_names: list[str]):
|
|
296
|
+
log_error("There seems to be an issue with your function specification.")
|
|
297
|
+
if function_names:
|
|
298
|
+
click.echo("Did you mean one of these:\n", err=True)
|
|
299
|
+
for maybe_function in function_names:
|
|
300
|
+
argv = [Path(sys.argv[0]).name] + sys.argv[1:]
|
|
301
|
+
original = shlex.join(argv)
|
|
302
|
+
argv[sys.argv.index(spec)] = f"{module_name}:{maybe_function}"
|
|
303
|
+
corrected = shlex.join(argv)
|
|
304
|
+
_print_diff(original, corrected)
|
|
305
|
+
click.echo(err=True)
|
|
306
|
+
elif spec[-4:] == ".qmt":
|
|
307
|
+
log_error(
|
|
308
|
+
"Is it possible you are trying to run a .qmt test? If so, use qmtexec."
|
|
309
|
+
)
|
|
310
|
+
else:
|
|
311
|
+
log_error("Failed to find a suggested fix")
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _print_diff(original: str, corrected: str):
|
|
315
|
+
s = difflib.SequenceMatcher(None, original, corrected)
|
|
316
|
+
s1 = s2 = ""
|
|
317
|
+
for tag, i1, i2, j1, j2 in s.get_opcodes():
|
|
318
|
+
fg1 = fg2 = None
|
|
319
|
+
if tag in ["replace", "delete"]:
|
|
320
|
+
fg1 = "red"
|
|
321
|
+
if tag in ["replace", "insert"]:
|
|
322
|
+
fg2 = "green"
|
|
323
|
+
s1 += click.style(s.a[i1:i2], fg=fg1)
|
|
324
|
+
s2 += click.style(s.b[j1:j2], fg=fg2)
|
|
325
|
+
click.echo(click.style(" Original: ", fg="red") + s1, err=True)
|
|
326
|
+
click.echo(click.style("Corrected: ", fg="green") + s2, err=True)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _raise_user_exception(e, action_msg, spec):
|
|
330
|
+
module_path = str(_guess_module_path(spec.module_name))
|
|
331
|
+
|
|
332
|
+
# Make a new trace back with everything above module_path removed
|
|
333
|
+
total_frames = 0
|
|
334
|
+
to_ignore = None
|
|
335
|
+
for frame, _ in traceback.walk_tb(sys.exc_info()[2]):
|
|
336
|
+
if to_ignore is None and frame.f_code.co_filename == module_path:
|
|
337
|
+
to_ignore = total_frames
|
|
338
|
+
total_frames += 1
|
|
339
|
+
limit = to_ignore - total_frames if to_ignore else None
|
|
340
|
+
traceback.print_exception(*sys.exc_info(), limit=limit)
|
|
341
|
+
|
|
342
|
+
log_error(f"Failed to {action_msg}, see above for more information.")
|
|
343
|
+
sys.exit(1)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _guess_module_path(name: str) -> Optional[Path]:
|
|
347
|
+
"""Static implementation of ``importlib.util.find_spec``
|
|
348
|
+
|
|
349
|
+
When calling ``find_spec("foo.bar")`` Python will execute ``foo/__init__.py``.
|
|
350
|
+
This is required for correctness however when showing help messages we should
|
|
351
|
+
avoid running any extra code so this function attempts to guess the path to
|
|
352
|
+
the module's source.
|
|
353
|
+
"""
|
|
354
|
+
from importlib.machinery import SourceFileLoader
|
|
355
|
+
from importlib.util import find_spec
|
|
356
|
+
|
|
357
|
+
top_module, *child_modules = name.split(".")
|
|
358
|
+
|
|
359
|
+
sys.path.insert(0, os.getcwd())
|
|
360
|
+
try:
|
|
361
|
+
module_spec = find_spec(top_module)
|
|
362
|
+
finally:
|
|
363
|
+
sys.path.pop(0)
|
|
364
|
+
|
|
365
|
+
if not (module_spec and child_modules):
|
|
366
|
+
if isinstance(getattr(module_spec, "loader", None), SourceFileLoader):
|
|
367
|
+
return Path(module_spec.loader.get_filename())
|
|
368
|
+
return None
|
|
369
|
+
|
|
370
|
+
if not module_spec.submodule_search_locations:
|
|
371
|
+
return None
|
|
372
|
+
module_path = Path(module_spec.submodule_search_locations[0])
|
|
373
|
+
|
|
374
|
+
for name in child_modules:
|
|
375
|
+
module_path = module_path / name
|
|
376
|
+
if module_path.is_dir():
|
|
377
|
+
module_path = module_path / "__init__.py"
|
|
378
|
+
else:
|
|
379
|
+
module_path = module_path.parent / f"{module_path.name}.py"
|
|
380
|
+
|
|
381
|
+
return module_path if module_path.is_file() else None
|
LbExec/options.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
# (c) Copyright 2022-2024 CERN for the benefit of the LHCb Collaboration #
|
|
3
|
+
# #
|
|
4
|
+
# This software is distributed under the terms of the GNU General Public #
|
|
5
|
+
# Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". #
|
|
6
|
+
# #
|
|
7
|
+
# In applying this licence, CERN does not waive the privileges and immunities #
|
|
8
|
+
# granted to it by virtue of its status as an Intergovernmental Organization #
|
|
9
|
+
# or submit itself to any jurisdiction. #
|
|
10
|
+
###############################################################################
|
|
11
|
+
import glob
|
|
12
|
+
import re
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from itertools import product
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
from pydantic import BaseModel, Field, model_validator, validator
|
|
18
|
+
from typing_extensions import Annotated
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CompressionAlgs(str, Enum):
|
|
22
|
+
"""ROOT compression algorithms."""
|
|
23
|
+
|
|
24
|
+
ZLIB = "ZLIB"
|
|
25
|
+
LZMA = "LZMA"
|
|
26
|
+
LZ4 = "LZ4"
|
|
27
|
+
ZSTD = "ZSTD"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class CompressionSettings(BaseModel):
|
|
31
|
+
"""Compression configuration settings."""
|
|
32
|
+
|
|
33
|
+
algorithm: CompressionAlgs = CompressionAlgs.ZSTD
|
|
34
|
+
level: int = 4
|
|
35
|
+
optimise_baskets: bool = True
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Options(BaseModel):
|
|
39
|
+
"""Input"""
|
|
40
|
+
|
|
41
|
+
input_files: list[str]
|
|
42
|
+
xml_file_catalog: Optional[str] = None
|
|
43
|
+
"""Output"""
|
|
44
|
+
output_file_: Annotated[str, Field(alias="output_file")]
|
|
45
|
+
compression: Optional[CompressionSettings] = None
|
|
46
|
+
xml_summary_file: Optional[str] = None
|
|
47
|
+
"""Processing"""
|
|
48
|
+
n_threads: int = 1
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def output_file(self) -> str:
|
|
52
|
+
if "{stream}" in self.output_file_:
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"output_file contains {stream} and get_output_file must be used instead."
|
|
55
|
+
)
|
|
56
|
+
return self.output_file
|
|
57
|
+
|
|
58
|
+
def get_output_file(self, stream: str) -> str:
|
|
59
|
+
return self.output_file_.format(stream=stream)
|
|
60
|
+
|
|
61
|
+
@validator("input_files", pre=True)
|
|
62
|
+
def glob_input_files(cls, input_files):
|
|
63
|
+
if isinstance(input_files, str):
|
|
64
|
+
resolved_input_files = []
|
|
65
|
+
for pattern in _expand_braces(input_files):
|
|
66
|
+
if "*" not in pattern:
|
|
67
|
+
resolved_input_files.append(pattern)
|
|
68
|
+
continue
|
|
69
|
+
if pattern.startswith("root://"):
|
|
70
|
+
raise NotImplementedError("Cannot glob with XRootD URLs")
|
|
71
|
+
matches = glob.glob(pattern, recursive=True)
|
|
72
|
+
if not matches:
|
|
73
|
+
raise ValueError(f"No input files found matching {pattern!r}")
|
|
74
|
+
resolved_input_files += matches
|
|
75
|
+
return resolved_input_files
|
|
76
|
+
return input_files
|
|
77
|
+
|
|
78
|
+
@model_validator(mode="before")
|
|
79
|
+
def validate_input(cls, values):
|
|
80
|
+
if not values.get("input_files"):
|
|
81
|
+
raise ValueError("'input_files' is required.")
|
|
82
|
+
return values
|
|
83
|
+
|
|
84
|
+
class Config:
|
|
85
|
+
use_enum_values = True
|
|
86
|
+
frozen = True
|
|
87
|
+
extra = "forbid"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _expand_braces(text):
|
|
91
|
+
"""Perform bash-like brace expansion
|
|
92
|
+
|
|
93
|
+
See: https://www.gnu.org/software/bash/manual/html_node/Brace-Expansion.html
|
|
94
|
+
|
|
95
|
+
There are two notable deviations from the bash behaviour:
|
|
96
|
+
* Duplicates are removed from the output
|
|
97
|
+
* The order of the returned results can differ
|
|
98
|
+
"""
|
|
99
|
+
seen = set()
|
|
100
|
+
# HACK: Use a reserved unicode page to substitute patterns like {abc} that
|
|
101
|
+
# don't contain a comma and should therefore have the curly braces preserved
|
|
102
|
+
# in the output
|
|
103
|
+
substitutions = {"\uE000": ""}
|
|
104
|
+
for s in _expand_braces_impl(text, seen, substitutions):
|
|
105
|
+
for k, v in reversed(substitutions.items()):
|
|
106
|
+
s = s.replace(k, v)
|
|
107
|
+
if s:
|
|
108
|
+
yield s
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _expand_braces_impl(text, seen, substitutions):
|
|
112
|
+
int_range_pattern = r"[\-\+]?[0-9]+(\.[0-9]+)?(\.\.[\-\+]?[0-9]+(\.[0-9]+)?){1,2}"
|
|
113
|
+
char_range_pattern = r"([a-z]\.\.[a-z]|[A-Z]\.\.[A-Z])(\.\.[\-\+]?[0-9]+)?"
|
|
114
|
+
patterns = [
|
|
115
|
+
",",
|
|
116
|
+
r"([^{}]|{})*,([^{}]|{})+",
|
|
117
|
+
r"([^{}]|{})+,([^{}]|{})*",
|
|
118
|
+
int_range_pattern,
|
|
119
|
+
char_range_pattern,
|
|
120
|
+
r"([^{},]|{})+",
|
|
121
|
+
]
|
|
122
|
+
spans = [m.span() for m in re.finditer(rf"{{({'|'.join(patterns)})}}", text)][::-1]
|
|
123
|
+
if len(spans) == 0:
|
|
124
|
+
if text not in seen:
|
|
125
|
+
yield text
|
|
126
|
+
seen.add(text)
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
alts = []
|
|
130
|
+
for start, stop in spans:
|
|
131
|
+
alt_full = text[start:stop]
|
|
132
|
+
alt = alt_full[1:-1].split(",")
|
|
133
|
+
is_int_range = re.fullmatch(rf"{{{int_range_pattern}}}", alt_full)
|
|
134
|
+
is_char_range = re.fullmatch(rf"{{{char_range_pattern}}}", alt_full)
|
|
135
|
+
if is_int_range or is_char_range:
|
|
136
|
+
range_args = alt[0].split("..")
|
|
137
|
+
leading_zeros = 0
|
|
138
|
+
if any(
|
|
139
|
+
len(x) > 1 and x.strip("-")[0] == "0" and x.strip("-") != "0"
|
|
140
|
+
for x in range_args[:2]
|
|
141
|
+
):
|
|
142
|
+
leading_zeros = max(map(len, range_args[:2]))
|
|
143
|
+
start, stop = map(int if is_int_range else ord, range_args[:2])
|
|
144
|
+
step = int(range_args[2]) if len(range_args) == 3 else 0
|
|
145
|
+
step = 1 if step == 0 else abs(int(step))
|
|
146
|
+
if stop < start:
|
|
147
|
+
step = -step
|
|
148
|
+
stop = stop + int(step / abs(step))
|
|
149
|
+
alt = [
|
|
150
|
+
f"{s:0{leading_zeros}d}" if is_int_range else chr(s)
|
|
151
|
+
for s in range(start, stop, step)
|
|
152
|
+
]
|
|
153
|
+
elif len(alt) == 1:
|
|
154
|
+
substitution = chr(0xE000 + len(substitutions))
|
|
155
|
+
substitutions[substitution] = alt_full
|
|
156
|
+
alt = [substitution]
|
|
157
|
+
alts.append(alt)
|
|
158
|
+
|
|
159
|
+
for combo in product(*alts):
|
|
160
|
+
replaced = list(text)
|
|
161
|
+
for (start, stop), replacement in zip(spans, combo):
|
|
162
|
+
# Add dummy charactors to prevent brace expansion being applied recursively
|
|
163
|
+
# i.e. "{{0..1}2}" should become "{02}" "{12}" not "02" "12"
|
|
164
|
+
replaced[start:stop] = f"\uE000{replacement}\uE000"
|
|
165
|
+
|
|
166
|
+
yield from _expand_braces_impl("".join(replaced), seen, substitutions)
|