groundhog-hpc 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundhog_hpc/__init__.py +4 -0
- groundhog_hpc/app/add.py +74 -0
- groundhog_hpc/app/init.py +54 -10
- groundhog_hpc/app/remove.py +91 -16
- groundhog_hpc/app/run.py +14 -2
- groundhog_hpc/compute.py +23 -8
- groundhog_hpc/configuration/defaults.py +1 -0
- groundhog_hpc/configuration/endpoints.py +38 -171
- groundhog_hpc/configuration/models.py +26 -3
- groundhog_hpc/configuration/pep723.py +279 -3
- groundhog_hpc/configuration/resolver.py +36 -8
- groundhog_hpc/console.py +1 -1
- groundhog_hpc/decorators.py +26 -24
- groundhog_hpc/function.py +63 -36
- groundhog_hpc/future.py +48 -10
- groundhog_hpc/logging.py +51 -0
- groundhog_hpc/serialization.py +22 -2
- groundhog_hpc/templates/init_script.py.jinja +3 -5
- groundhog_hpc/templates/shell_command.sh.jinja +15 -1
- groundhog_hpc/templating.py +17 -0
- {groundhog_hpc-0.5.5.dist-info → groundhog_hpc-0.5.7.dist-info}/METADATA +12 -6
- groundhog_hpc-0.5.7.dist-info/RECORD +34 -0
- groundhog_hpc-0.5.5.dist-info/RECORD +0 -33
- {groundhog_hpc-0.5.5.dist-info → groundhog_hpc-0.5.7.dist-info}/WHEEL +0 -0
- {groundhog_hpc-0.5.5.dist-info → groundhog_hpc-0.5.7.dist-info}/entry_points.txt +0 -0
- {groundhog_hpc-0.5.5.dist-info → groundhog_hpc-0.5.7.dist-info}/licenses/LICENSE +0 -0
|
@@ -19,6 +19,7 @@ PEP 723 metadata sets sharable defaults, decorators customize per-function,
|
|
|
19
19
|
and call-time overrides allow runtime changes.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
+
import logging
|
|
22
23
|
from pathlib import Path
|
|
23
24
|
from typing import Any
|
|
24
25
|
|
|
@@ -26,14 +27,16 @@ from groundhog_hpc.configuration.defaults import DEFAULT_USER_CONFIG
|
|
|
26
27
|
from groundhog_hpc.configuration.models import EndpointConfig, EndpointVariant
|
|
27
28
|
from groundhog_hpc.configuration.pep723 import read_pep723
|
|
28
29
|
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
29
32
|
|
|
30
33
|
def _merge_endpoint_configs(
|
|
31
34
|
base_endpoint_config: dict, override_config: dict | None = None
|
|
32
35
|
) -> dict:
|
|
33
|
-
"""Merge endpoint configurations, ensuring worker_init commands are combined.
|
|
36
|
+
"""Merge endpoint configurations, ensuring worker_init and endpoint_setup commands are combined.
|
|
34
37
|
|
|
35
|
-
The worker_init
|
|
36
|
-
concatenated with the base's
|
|
38
|
+
The worker_init and endpoint_setup fields are special-cased: if both configs provide them, they are
|
|
39
|
+
concatenated with the base's commands executed first, followed by the override's.
|
|
37
40
|
All other fields from override_config simply replace fields from base_endpoint_config.
|
|
38
41
|
|
|
39
42
|
Args:
|
|
@@ -44,10 +47,10 @@ def _merge_endpoint_configs(
|
|
|
44
47
|
A new merged configuration dict
|
|
45
48
|
|
|
46
49
|
Example:
|
|
47
|
-
>>> base = {"worker_init": "pip install uv"}
|
|
48
|
-
>>> override = {"worker_init": "module load
|
|
50
|
+
>>> base = {"worker_init": "pip install uv", "endpoint_setup": "module load gcc"}
|
|
51
|
+
>>> override = {"worker_init": "module load python", "endpoint_setup": "module load cuda", "cores": 4}
|
|
49
52
|
>>> _merge_endpoint_configs(base, override)
|
|
50
|
-
{'worker_init': 'pip install uv\\nmodule load gcc', 'cores': 4}
|
|
53
|
+
{'worker_init': 'pip install uv\\nmodule load python', 'endpoint_setup': 'module load gcc\\nmodule load cuda', 'cores': 4}
|
|
51
54
|
"""
|
|
52
55
|
if not override_config:
|
|
53
56
|
return base_endpoint_config.copy()
|
|
@@ -62,6 +65,13 @@ def _merge_endpoint_configs(
|
|
|
62
65
|
override_init = override_config.pop("worker_init")
|
|
63
66
|
merged["worker_init"] = f"{base_init.strip()}\n{override_init.strip()}\n"
|
|
64
67
|
|
|
68
|
+
# Special handling for endpoint_setup: prepend base to override
|
|
69
|
+
if "endpoint_setup" in override_config and "endpoint_setup" in base_endpoint_config:
|
|
70
|
+
base_setup = base_endpoint_config["endpoint_setup"]
|
|
71
|
+
# pop endpoint_setup so update doesn't clobber concatenated value
|
|
72
|
+
override_setup = override_config.pop("endpoint_setup")
|
|
73
|
+
merged["endpoint_setup"] = f"{base_setup.strip()}\n{override_setup.strip()}\n"
|
|
74
|
+
|
|
65
75
|
merged.update(override_config)
|
|
66
76
|
return merged
|
|
67
77
|
|
|
@@ -80,7 +90,7 @@ class ConfigResolver:
|
|
|
80
90
|
5. Call-time config (.remote(user_endpoint_config={...}))
|
|
81
91
|
|
|
82
92
|
Special handling:
|
|
83
|
-
- worker_init commands are concatenated (not replaced) across all layers
|
|
93
|
+
- worker_init and endpoint_setup commands are concatenated (not replaced) across all layers
|
|
84
94
|
- endpoint field in PEP 723 config can override the endpoint UUID
|
|
85
95
|
- Variants inherit from their base configuration
|
|
86
96
|
|
|
@@ -132,9 +142,12 @@ class ConfigResolver:
|
|
|
132
142
|
ValidationError: If any config level has invalid fields (e.g., negative walltime)
|
|
133
143
|
"""
|
|
134
144
|
|
|
145
|
+
logger.debug(f"Resolving config for endpoint: {endpoint_name}")
|
|
146
|
+
|
|
135
147
|
# Layer 1: Start with DEFAULT_USER_CONFIG
|
|
136
148
|
config = DEFAULT_USER_CONFIG.copy()
|
|
137
149
|
base_name, *variant_path = endpoint_name.split(".")
|
|
150
|
+
logger.debug(f"Starting with DEFAULT_USER_CONFIG: {config}")
|
|
138
151
|
|
|
139
152
|
# Layer 2-3: walk base[.variant[.sub]] path hierarchically
|
|
140
153
|
metadata: dict = self._load_pep723_metadata()
|
|
@@ -142,8 +155,11 @@ class ConfigResolver:
|
|
|
142
155
|
metadata.get("tool", {}).get("hog", {}).get(base_name, {}).copy()
|
|
143
156
|
)
|
|
144
157
|
if base_variant:
|
|
158
|
+
logger.debug(f"Found base config for '{base_name}': {base_variant}")
|
|
145
159
|
EndpointConfig.model_validate(base_variant)
|
|
146
160
|
config["endpoint"] = base_variant.pop("endpoint")
|
|
161
|
+
else:
|
|
162
|
+
logger.debug(f"No PEP 723 config found for '{base_name}'")
|
|
147
163
|
|
|
148
164
|
def _merge_variant_path(
|
|
149
165
|
variant_names: list[str], current_variant: dict, accumulated_config: dict
|
|
@@ -164,12 +180,18 @@ class ConfigResolver:
|
|
|
164
180
|
+ variant_path[: len(variant_path) - len(remaining_names)]
|
|
165
181
|
)
|
|
166
182
|
if next_variant is None:
|
|
183
|
+
logger.error(f"Variant '{next_name}' not found in '{path_so_far}'")
|
|
167
184
|
raise ValueError(f"Variant {next_name} not found in {path_so_far}")
|
|
168
185
|
else:
|
|
186
|
+
logger.error(
|
|
187
|
+
f"Variant '{next_name}' in '{path_so_far}' is not a valid variant "
|
|
188
|
+
f"(expected dict, got {type(next_variant).__name__})"
|
|
189
|
+
)
|
|
169
190
|
raise ValueError(
|
|
170
191
|
f"Variant {next_name} in {path_so_far} is not a valid variant "
|
|
171
192
|
f"(expected dict, got {type(next_variant).__name__})"
|
|
172
193
|
)
|
|
194
|
+
logger.debug(f"Merging variant '{next_name}' config: {next_variant}")
|
|
173
195
|
return _merge_variant_path(
|
|
174
196
|
remaining_names, next_variant, accumulated_config
|
|
175
197
|
)
|
|
@@ -177,16 +199,22 @@ class ConfigResolver:
|
|
|
177
199
|
config = _merge_variant_path(variant_path, base_variant, config)
|
|
178
200
|
|
|
179
201
|
# Layer 4: Merge decorator config
|
|
202
|
+
if decorator_config:
|
|
203
|
+
logger.debug(f"Merging decorator config: {decorator_config}")
|
|
180
204
|
config = _merge_endpoint_configs(config, decorator_config)
|
|
181
205
|
|
|
182
206
|
# Layer 5: Call-time overrides
|
|
207
|
+
if call_time_config:
|
|
208
|
+
logger.debug(f"Merging call-time config: {call_time_config}")
|
|
183
209
|
config = _merge_endpoint_configs(config, call_time_config)
|
|
184
210
|
|
|
185
211
|
# Layer 5 1/2: we want to ensure uv is installed *after* any user
|
|
186
212
|
# worker_init, e.g. activating a conda env, which might impact the
|
|
187
213
|
# templated shell command's ability to `uv.find_uv_bin()`
|
|
188
|
-
uv_init_config = {"worker_init": "pip show -qq uv || pip install uv"}
|
|
214
|
+
uv_init_config = {"worker_init": "pip show -qq uv || pip install uv || true"}
|
|
189
215
|
config = _merge_endpoint_configs(config, uv_init_config)
|
|
216
|
+
|
|
217
|
+
logger.debug(f"Final resolved config: {config}")
|
|
190
218
|
return config
|
|
191
219
|
|
|
192
220
|
def _load_pep723_metadata(self) -> dict[str, Any]:
|
groundhog_hpc/console.py
CHANGED
|
@@ -173,7 +173,7 @@ def _get_status_display(
|
|
|
173
173
|
display.append(")", style="dim")
|
|
174
174
|
|
|
175
175
|
display.append(" | ", style="dim")
|
|
176
|
-
display.append(spinner.render(current_time))
|
|
176
|
+
display.append(spinner.render(current_time)) # type: ignore[arg-type]
|
|
177
177
|
|
|
178
178
|
return display
|
|
179
179
|
|
groundhog_hpc/decorators.py
CHANGED
|
@@ -7,21 +7,24 @@ orchestration.
|
|
|
7
7
|
|
|
8
8
|
import functools
|
|
9
9
|
import inspect
|
|
10
|
-
import
|
|
10
|
+
import logging
|
|
11
11
|
from types import FunctionType
|
|
12
12
|
from typing import Any, Callable
|
|
13
13
|
|
|
14
14
|
from groundhog_hpc.function import Function, Method
|
|
15
15
|
from groundhog_hpc.harness import Harness
|
|
16
16
|
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
17
19
|
|
|
18
20
|
def harness() -> Callable[[FunctionType], Harness]:
|
|
19
21
|
"""Decorator to mark a function as a local orchestrator harness.
|
|
20
22
|
|
|
21
23
|
Harness functions:
|
|
24
|
+
|
|
22
25
|
- Must be called via the CLI: `hog run script.py harness_name`
|
|
23
26
|
- Cannot accept any arguments
|
|
24
|
-
- Can call
|
|
27
|
+
- Can call `.remote()` or `.submit()` on `@hog.function`-decorated functions
|
|
25
28
|
|
|
26
29
|
Returns:
|
|
27
30
|
A decorator function that wraps the harness
|
|
@@ -45,28 +48,29 @@ def harness() -> Callable[[FunctionType], Harness]:
|
|
|
45
48
|
|
|
46
49
|
def function(
|
|
47
50
|
endpoint: str | None = None,
|
|
48
|
-
walltime: int | None = None,
|
|
49
51
|
**user_endpoint_config: Any,
|
|
50
52
|
) -> Callable[[FunctionType], Function]:
|
|
51
53
|
"""Decorator to mark a function for remote execution on Globus Compute.
|
|
52
54
|
|
|
53
55
|
Decorated functions can be:
|
|
54
|
-
|
|
55
|
-
- Called
|
|
56
|
-
-
|
|
56
|
+
|
|
57
|
+
- Called locally: `func(args)`
|
|
58
|
+
- Called remotely (blocking): `func.remote(args)`
|
|
59
|
+
- Submitted asynchronously: `func.submit(args)`
|
|
60
|
+
- Called locally in an isolated environment: `func.local(args)`
|
|
57
61
|
|
|
58
62
|
Args:
|
|
59
|
-
endpoint: Globus Compute endpoint UUID
|
|
60
|
-
|
|
63
|
+
endpoint: Globus Compute endpoint UUID or named endpoint from
|
|
64
|
+
`[tool.hog.<name>]` PEP 723 metadata
|
|
61
65
|
**user_endpoint_config: Options to pass through to the Executor as
|
|
62
|
-
user_endpoint_config (e.g. account, partition, etc)
|
|
66
|
+
user_endpoint_config (e.g. account, partition, walltime, etc)
|
|
63
67
|
|
|
64
68
|
Returns:
|
|
65
69
|
A decorator function that wraps the function as a Function instance
|
|
66
70
|
|
|
67
71
|
Example:
|
|
68
72
|
```python
|
|
69
|
-
@hog.function(endpoint="my-remote-endpoint-uuid",
|
|
73
|
+
@hog.function(endpoint="my-remote-endpoint-uuid", account='my-account')
|
|
70
74
|
def train_model(data):
|
|
71
75
|
# This runs on the remote HPC cluster
|
|
72
76
|
model = train(data)
|
|
@@ -81,7 +85,7 @@ def function(
|
|
|
81
85
|
"""
|
|
82
86
|
|
|
83
87
|
def decorator(func: FunctionType) -> Function:
|
|
84
|
-
wrapper = Function(func, endpoint,
|
|
88
|
+
wrapper = Function(func, endpoint, **user_endpoint_config)
|
|
85
89
|
functools.update_wrapper(wrapper, func)
|
|
86
90
|
return wrapper
|
|
87
91
|
|
|
@@ -90,24 +94,24 @@ def function(
|
|
|
90
94
|
|
|
91
95
|
def method(
|
|
92
96
|
endpoint: str | None = None,
|
|
93
|
-
walltime: int | None = None,
|
|
94
97
|
**user_endpoint_config: Any,
|
|
95
98
|
) -> Callable[[FunctionType], Method]:
|
|
96
99
|
"""Decorator to mark a class method for remote execution on Globus Compute.
|
|
97
100
|
|
|
98
|
-
|
|
99
|
-
staticmethod-like semantics - the decorated method does not receive self.
|
|
101
|
+
Analogous to `@hog.function()` but for use with class methods. Provides
|
|
102
|
+
staticmethod-like semantics - the decorated method does not receive self or cls.
|
|
100
103
|
|
|
101
104
|
Decorated methods can be:
|
|
102
|
-
|
|
103
|
-
- Called
|
|
104
|
-
-
|
|
105
|
+
|
|
106
|
+
- Called locally: `MyClass.method(args)` or `obj.method(args)`
|
|
107
|
+
- Called remotely (blocking): `MyClass.method.remote(args)`
|
|
108
|
+
- Submitted asynchronously: `MyClass.method.submit(args)`
|
|
109
|
+
- Called locally in an isolated environment: `MyClass.method.local(args)`
|
|
105
110
|
|
|
106
111
|
Args:
|
|
107
112
|
endpoint: Globus Compute endpoint UUID
|
|
108
|
-
walltime: Maximum execution time in seconds
|
|
109
113
|
**user_endpoint_config: Options to pass through to the Executor as
|
|
110
|
-
user_endpoint_config (e.g. account, partition, etc)
|
|
114
|
+
user_endpoint_config (e.g. account, partition, walltime, etc)
|
|
111
115
|
|
|
112
116
|
Returns:
|
|
113
117
|
A decorator function that wraps the function as a Method instance
|
|
@@ -129,15 +133,13 @@ def method(
|
|
|
129
133
|
sig = inspect.signature(func)
|
|
130
134
|
params = list(sig.parameters.keys())
|
|
131
135
|
if params and params[0] in ("self", "cls"):
|
|
132
|
-
|
|
136
|
+
logger.warning(
|
|
133
137
|
f"Method '{func.__name__}' has first parameter '{params[0]}', "
|
|
134
138
|
f"but @hog.method provides staticmethod-like semantics and will not "
|
|
135
|
-
f"pass the instance or class. Consider removing '{params[0]}' from the signature."
|
|
136
|
-
UserWarning,
|
|
137
|
-
stacklevel=2,
|
|
139
|
+
f"pass the instance or class. Consider removing '{params[0]}' from the signature."
|
|
138
140
|
)
|
|
139
141
|
|
|
140
|
-
wrapper = Method(func, endpoint,
|
|
142
|
+
wrapper = Method(func, endpoint, **user_endpoint_config)
|
|
141
143
|
functools.update_wrapper(wrapper, func)
|
|
142
144
|
return wrapper
|
|
143
145
|
|
groundhog_hpc/function.py
CHANGED
|
@@ -10,6 +10,7 @@ as defaults but overridden when calling .remote() or .submit().
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
import inspect
|
|
13
|
+
import logging
|
|
13
14
|
import os
|
|
14
15
|
import sys
|
|
15
16
|
import tempfile
|
|
@@ -30,6 +31,8 @@ from groundhog_hpc.future import GroundhogFuture
|
|
|
30
31
|
from groundhog_hpc.serialization import deserialize_stdout, serialize
|
|
31
32
|
from groundhog_hpc.utils import prefix_output
|
|
32
33
|
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
33
36
|
if TYPE_CHECKING:
|
|
34
37
|
import globus_compute_sdk
|
|
35
38
|
|
|
@@ -44,22 +47,22 @@ class Function:
|
|
|
44
47
|
"""Wrapper that enables a Python function to be executed remotely on Globus Compute.
|
|
45
48
|
|
|
46
49
|
Decorated functions can be called in four ways:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
50
|
+
|
|
51
|
+
1. Direct call: `func(*args)` - executes locally (regular python call)
|
|
52
|
+
2. Remote call: `func.remote(*args)` - executes remotely and blocks until complete
|
|
53
|
+
3. Async submit: `func.submit(*args)` - executes remotely and returns a GroundhogFuture
|
|
54
|
+
4. Local subprocess: `func.local(*args)` - executes locally in a separate process
|
|
51
55
|
|
|
52
56
|
Attributes:
|
|
53
|
-
endpoint: Default Globus Compute endpoint UUID or
|
|
54
|
-
|
|
55
|
-
default_user_endpoint_config: Default endpoint configuration (e.g., worker_init)
|
|
57
|
+
endpoint: Default Globus Compute endpoint UUID or named endpoint from
|
|
58
|
+
`[tool.hog.<name>]` PEP 723 metadata, or None to use resolved config
|
|
59
|
+
default_user_endpoint_config: Default endpoint configuration (e.g., worker_init, walltime)
|
|
56
60
|
"""
|
|
57
61
|
|
|
58
62
|
def __init__(
|
|
59
63
|
self,
|
|
60
64
|
func: FunctionType,
|
|
61
65
|
endpoint: str | None = None,
|
|
62
|
-
walltime: int | None = None,
|
|
63
66
|
**user_endpoint_config: Any,
|
|
64
67
|
) -> None:
|
|
65
68
|
"""Initialize a Function wrapper.
|
|
@@ -67,19 +70,22 @@ class Function:
|
|
|
67
70
|
Args:
|
|
68
71
|
func: The Python function to wrap
|
|
69
72
|
endpoint: Globus Compute endpoint UUID or named endpoint from `[tool.hog.<name>]` PEP 723
|
|
70
|
-
walltime: Maximum execution time in seconds (can also be set in config)
|
|
71
73
|
**user_endpoint_config: Additional endpoint configuration to pass to
|
|
72
|
-
Globus Compute Executor (e.g., worker_init commands)
|
|
74
|
+
Globus Compute Executor (e.g., worker_init commands, walltime)
|
|
73
75
|
"""
|
|
74
76
|
self._script_path: str | None = None
|
|
75
77
|
self.endpoint: str | None = endpoint
|
|
76
|
-
self.walltime: int | None = walltime
|
|
77
78
|
self.default_user_endpoint_config: dict[str, Any] = user_endpoint_config
|
|
78
79
|
|
|
79
|
-
|
|
80
|
+
# ShellFunction walltime - always None here to prevent conflicts with a
|
|
81
|
+
# 'walltime' endpoint config, but the attribute exists as an escape
|
|
82
|
+
# hatch if users need to set it after the function's been created
|
|
83
|
+
self.walltime: int | float | None = None
|
|
84
|
+
|
|
85
|
+
self._wrapped_function: FunctionType = func
|
|
80
86
|
self._config_resolver: ConfigResolver | None = None
|
|
81
87
|
|
|
82
|
-
def __call__(self, *args, **kwargs) -> Any:
|
|
88
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
83
89
|
"""Execute the function locally (not remotely).
|
|
84
90
|
|
|
85
91
|
Args:
|
|
@@ -89,7 +95,7 @@ class Function:
|
|
|
89
95
|
Returns:
|
|
90
96
|
The result of the local function execution
|
|
91
97
|
"""
|
|
92
|
-
return self.
|
|
98
|
+
return self._wrapped_function(*args, **kwargs)
|
|
93
99
|
|
|
94
100
|
def _get_available_endpoints_from_pep723(self) -> list[str]:
|
|
95
101
|
"""Get list of endpoint names defined in PEP 723 [tool.hog.*] sections."""
|
|
@@ -103,7 +109,6 @@ class Function:
|
|
|
103
109
|
self,
|
|
104
110
|
*args: Any,
|
|
105
111
|
endpoint: str | None = None,
|
|
106
|
-
walltime: int | None = None,
|
|
107
112
|
user_endpoint_config: dict[str, Any] | None = None,
|
|
108
113
|
**kwargs: Any,
|
|
109
114
|
) -> GroundhogFuture:
|
|
@@ -111,8 +116,8 @@ class Function:
|
|
|
111
116
|
|
|
112
117
|
Args:
|
|
113
118
|
*args: Positional arguments to pass to the function
|
|
114
|
-
endpoint: Globus Compute endpoint UUID (
|
|
115
|
-
|
|
119
|
+
endpoint: Globus Compute endpoint UUID (or named endpoint from
|
|
120
|
+
`[tool.hog.<name>]` PEP 723 metadata). Replaces decorator default.
|
|
116
121
|
user_endpoint_config: Endpoint configuration dict (merged with decorator default)
|
|
117
122
|
**kwargs: Keyword arguments to pass to the function
|
|
118
123
|
|
|
@@ -125,21 +130,22 @@ class Function:
|
|
|
125
130
|
PayloadTooLargeError: If serialized arguments exceed 10MB
|
|
126
131
|
"""
|
|
127
132
|
# Check if module has been marked as safe for .remote() calls
|
|
128
|
-
module = sys.modules.get(self.
|
|
133
|
+
module = sys.modules.get(self._wrapped_function.__module__)
|
|
129
134
|
if not getattr(module, "__groundhog_imported__", False):
|
|
135
|
+
logger.error(
|
|
136
|
+
f"Import safety check failed for module '{self._wrapped_function.__module__}'"
|
|
137
|
+
)
|
|
130
138
|
raise ModuleImportError(
|
|
131
|
-
self.
|
|
139
|
+
self._wrapped_function.__name__,
|
|
140
|
+
"submit",
|
|
141
|
+
self._wrapped_function.__module__,
|
|
132
142
|
)
|
|
133
143
|
|
|
144
|
+
logger.debug(f"Preparing to submit function '{self.name}'")
|
|
134
145
|
endpoint = endpoint or self.endpoint
|
|
135
146
|
|
|
136
147
|
decorator_config = self.default_user_endpoint_config.copy()
|
|
137
|
-
if self.walltime is not None:
|
|
138
|
-
decorator_config["walltime"] = self.walltime
|
|
139
|
-
|
|
140
148
|
call_time_config = user_endpoint_config.copy() if user_endpoint_config else {}
|
|
141
|
-
if walltime is not None:
|
|
142
|
-
call_time_config["walltime"] = walltime
|
|
143
149
|
|
|
144
150
|
# merge all config sources
|
|
145
151
|
config = self.config_resolver.resolve(
|
|
@@ -158,15 +164,22 @@ class Function:
|
|
|
158
164
|
available_endpoints = self._get_available_endpoints_from_pep723()
|
|
159
165
|
if available_endpoints:
|
|
160
166
|
endpoints_str = ", ".join(f"'{e}'" for e in available_endpoints)
|
|
167
|
+
logger.error(f"No endpoint specified. Available: {endpoints_str}")
|
|
161
168
|
raise ValueError(
|
|
162
169
|
f"No endpoint specified. Available endpoints found in config: {endpoints_str}. "
|
|
163
170
|
f"Call with endpoint=<name>, or specify a function default endpoint in decorator."
|
|
164
171
|
)
|
|
165
172
|
else:
|
|
173
|
+
logger.error("No endpoint specified and none found in config")
|
|
166
174
|
raise ValueError("No endpoint specified")
|
|
167
175
|
|
|
176
|
+
logger.debug(
|
|
177
|
+
f"Serializing {len(args)} args and {len(kwargs)} kwargs for '{self.name}'"
|
|
178
|
+
)
|
|
168
179
|
payload = serialize((args, kwargs), use_proxy=False, proxy_threshold_mb=None)
|
|
169
|
-
shell_function = script_to_submittable(
|
|
180
|
+
shell_function = script_to_submittable(
|
|
181
|
+
self.script_path, self.name, payload, walltime=self.walltime
|
|
182
|
+
)
|
|
170
183
|
|
|
171
184
|
future: GroundhogFuture = submit_to_executor(
|
|
172
185
|
UUID(endpoint),
|
|
@@ -182,7 +195,6 @@ class Function:
|
|
|
182
195
|
self,
|
|
183
196
|
*args: Any,
|
|
184
197
|
endpoint: str | None = None,
|
|
185
|
-
walltime: int | None = None,
|
|
186
198
|
user_endpoint_config: dict[str, Any] | None = None,
|
|
187
199
|
**kwargs: Any,
|
|
188
200
|
) -> Any:
|
|
@@ -193,8 +205,8 @@ class Function:
|
|
|
193
205
|
|
|
194
206
|
Args:
|
|
195
207
|
*args: Positional arguments to pass to the function
|
|
196
|
-
endpoint: Globus Compute endpoint UUID (
|
|
197
|
-
|
|
208
|
+
endpoint: Globus Compute endpoint UUID (or named endpoint from
|
|
209
|
+
`[tool.hog.<name>]` PEP 723 metadata). Replaces decorator default.
|
|
198
210
|
user_endpoint_config: Endpoint configuration dict (merged with decorator default)
|
|
199
211
|
**kwargs: Keyword arguments to pass to the function
|
|
200
212
|
|
|
@@ -207,15 +219,17 @@ class Function:
|
|
|
207
219
|
PayloadTooLargeError: If serialized arguments exceed 10MB
|
|
208
220
|
RemoteExecutionError: If remote execution fails (non-zero exit code)
|
|
209
221
|
"""
|
|
222
|
+
logger.debug(f"Calling remote execution for '{self.name}'")
|
|
210
223
|
future = self.submit(
|
|
211
224
|
*args,
|
|
212
225
|
endpoint=endpoint,
|
|
213
|
-
walltime=walltime,
|
|
214
226
|
user_endpoint_config=user_endpoint_config,
|
|
215
227
|
**kwargs,
|
|
216
228
|
)
|
|
217
229
|
display_task_status(future)
|
|
218
|
-
|
|
230
|
+
result = future.result()
|
|
231
|
+
logger.debug(f"Remote execution of '{self.name}' completed successfully")
|
|
232
|
+
return result
|
|
219
233
|
|
|
220
234
|
def local(self, *args: Any, **kwargs: Any) -> Any:
|
|
221
235
|
"""Execute the function locally in an isolated subprocess.
|
|
@@ -233,12 +247,18 @@ class Function:
|
|
|
233
247
|
LocalExecutionError: If local execution fails (non-zero exit code)
|
|
234
248
|
"""
|
|
235
249
|
# Check if module has been marked as safe for .local() calls
|
|
236
|
-
module = sys.modules.get(self.
|
|
250
|
+
module = sys.modules.get(self._wrapped_function.__module__)
|
|
237
251
|
if not getattr(module, "__groundhog_imported__", False):
|
|
252
|
+
logger.error(
|
|
253
|
+
f"Import safety check failed for module '{self._wrapped_function.__module__}'"
|
|
254
|
+
)
|
|
238
255
|
raise ModuleImportError(
|
|
239
|
-
self.
|
|
256
|
+
self._wrapped_function.__name__,
|
|
257
|
+
"local",
|
|
258
|
+
self._wrapped_function.__module__,
|
|
240
259
|
)
|
|
241
260
|
|
|
261
|
+
logger.debug(f"Executing function '{self.name}' in local subprocess")
|
|
242
262
|
with prefix_output(prefix="[local]", prefix_color="blue"):
|
|
243
263
|
# Create ShellFunction just like we do for remote execution
|
|
244
264
|
payload = serialize((args, kwargs), proxy_threshold_mb=1.0)
|
|
@@ -254,6 +274,9 @@ class Function:
|
|
|
254
274
|
assert not isinstance(result, dict)
|
|
255
275
|
|
|
256
276
|
if result.returncode != 0:
|
|
277
|
+
logger.error(
|
|
278
|
+
f"Local subprocess failed with exit code {result.returncode}"
|
|
279
|
+
)
|
|
257
280
|
if result.stderr:
|
|
258
281
|
print(result.stderr, file=sys.stderr)
|
|
259
282
|
if result.stdout:
|
|
@@ -266,12 +289,16 @@ class Function:
|
|
|
266
289
|
try:
|
|
267
290
|
user_stdout, deserialized_result = deserialize_stdout(result.stdout)
|
|
268
291
|
except DeserializationError as e:
|
|
292
|
+
logger.error(f"Failed to deserialize local result: {e}")
|
|
269
293
|
if result.stderr:
|
|
270
294
|
print(result.stderr, file=sys.stderr)
|
|
271
295
|
if e.user_output:
|
|
272
296
|
print(e.user_output)
|
|
273
297
|
raise
|
|
274
298
|
else:
|
|
299
|
+
logger.debug(
|
|
300
|
+
f"Local execution of '{self.name}' completed successfully"
|
|
301
|
+
)
|
|
275
302
|
if result.stderr:
|
|
276
303
|
print(result.stderr, file=sys.stderr)
|
|
277
304
|
if user_stdout:
|
|
@@ -297,7 +324,7 @@ class Function:
|
|
|
297
324
|
return self._script_path
|
|
298
325
|
|
|
299
326
|
try:
|
|
300
|
-
source_file = inspect.getfile(self.
|
|
327
|
+
source_file = inspect.getfile(self._wrapped_function)
|
|
301
328
|
self._script_path = str(Path(source_file).resolve())
|
|
302
329
|
return self._script_path
|
|
303
330
|
except (TypeError, OSError) as e:
|
|
@@ -315,13 +342,13 @@ class Function:
|
|
|
315
342
|
|
|
316
343
|
@property
|
|
317
344
|
def name(self) -> str:
|
|
318
|
-
return self.
|
|
345
|
+
return self._wrapped_function.__qualname__
|
|
319
346
|
|
|
320
347
|
|
|
321
348
|
class Method(Function):
|
|
322
|
-
"""
|
|
349
|
+
"""Minimal descriptor variant of Function for use as class methods.
|
|
323
350
|
|
|
324
|
-
Provides staticmethod-like semantics (no self) with remote execution.
|
|
351
|
+
Provides staticmethod-like semantics (no `self`/`cls`) with remote execution.
|
|
325
352
|
"""
|
|
326
353
|
|
|
327
354
|
def __get__(self, obj, objtype=None):
|
groundhog_hpc/future.py
CHANGED
|
@@ -22,16 +22,16 @@ else:
|
|
|
22
22
|
|
|
23
23
|
class GroundhogFuture(Future):
|
|
24
24
|
"""A Future that deserializes stdout for its .result(), but still allows
|
|
25
|
-
access to the raw ShellResult
|
|
25
|
+
access to the raw `ShellResult`.
|
|
26
26
|
|
|
27
27
|
This future automatically deserializes the payload when .result() is called,
|
|
28
|
-
but preserves access to the original ShellResult (with stdout, stderr, returncode)
|
|
28
|
+
but preserves access to the original `ShellResult` (with stdout, stderr, returncode)
|
|
29
29
|
via the .shell_result property.
|
|
30
30
|
|
|
31
31
|
Attributes:
|
|
32
32
|
task_id: Globus Compute task ID (set when the future completes)
|
|
33
|
-
endpoint:
|
|
34
|
-
user_endpoint_config:
|
|
33
|
+
endpoint: The endpoint where the task was submitted
|
|
34
|
+
user_endpoint_config: Resolved configuration dict used for the endpoint
|
|
35
35
|
function_name: Name of the function being executed
|
|
36
36
|
"""
|
|
37
37
|
|
|
@@ -48,9 +48,9 @@ class GroundhogFuture(Future):
|
|
|
48
48
|
self._user_stdout: str | None = None
|
|
49
49
|
|
|
50
50
|
# set after created in Function.submit, useful for invocation logs etc
|
|
51
|
-
self.
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
51
|
+
self._endpoint: str | None = None
|
|
52
|
+
self._user_endpoint_config: dict[str, Any] | None = None
|
|
53
|
+
self._function_name: str | None = None
|
|
54
54
|
|
|
55
55
|
def callback(fut: Future) -> None:
|
|
56
56
|
try:
|
|
@@ -69,7 +69,7 @@ class GroundhogFuture(Future):
|
|
|
69
69
|
|
|
70
70
|
@property
|
|
71
71
|
def shell_result(self) -> ShellResult:
|
|
72
|
-
"""Access the raw ShellResult with stdout, stderr, returncode.
|
|
72
|
+
"""Access the raw Globus Compute `ShellResult` with stdout, stderr, returncode.
|
|
73
73
|
|
|
74
74
|
This property provides access to the underlying shell execution metadata,
|
|
75
75
|
which can be useful for debugging, logging, or inspecting stderr output
|
|
@@ -93,7 +93,45 @@ class GroundhogFuture(Future):
|
|
|
93
93
|
|
|
94
94
|
@property
|
|
95
95
|
def task_id(self) -> str | None:
|
|
96
|
-
|
|
96
|
+
"""The Globus Compute task ID for this future.
|
|
97
|
+
|
|
98
|
+
Returns the task ID from the underlying Globus Compute future, which may
|
|
99
|
+
not be populated immediately.
|
|
100
|
+
"""
|
|
101
|
+
return self._original_future.task_id # type: ignore[attr-defined]
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def endpoint(self) -> str | None:
|
|
105
|
+
"""The endpoint where this task was submitted."""
|
|
106
|
+
return self._endpoint
|
|
107
|
+
|
|
108
|
+
@endpoint.setter
|
|
109
|
+
def endpoint(self, value: str | None) -> None:
|
|
110
|
+
self._endpoint = value
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def user_endpoint_config(self) -> dict[str, Any] | None:
|
|
114
|
+
"""The endpoint configuration used for this task submission.
|
|
115
|
+
|
|
116
|
+
Set by `Function.submit()` when the task is created. Contains
|
|
117
|
+
configuration like account, partition, walltime, etc. Useful for
|
|
118
|
+
debugging, since this is the final resolved config that was actually
|
|
119
|
+
passed to the `Executor`.
|
|
120
|
+
"""
|
|
121
|
+
return self._user_endpoint_config
|
|
122
|
+
|
|
123
|
+
@user_endpoint_config.setter
|
|
124
|
+
def user_endpoint_config(self, value: dict[str, Any] | None) -> None:
|
|
125
|
+
self._user_endpoint_config = value
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def function_name(self) -> str | None:
|
|
129
|
+
"""The name of the function being executed."""
|
|
130
|
+
return self._function_name
|
|
131
|
+
|
|
132
|
+
@function_name.setter
|
|
133
|
+
def function_name(self, value: str | None) -> None:
|
|
134
|
+
self._function_name = value
|
|
97
135
|
|
|
98
136
|
|
|
99
137
|
def _truncate_payload_in_cmd(cmd: str, max_length: int = 100) -> str:
|
|
@@ -123,7 +161,7 @@ def _truncate_payload_in_cmd(cmd: str, max_length: int = 100) -> str:
|
|
|
123
161
|
|
|
124
162
|
|
|
125
163
|
def _process_shell_result(shell_result: ShellResult) -> tuple[str | None, Any]:
|
|
126
|
-
"""Process a ShellResult by checking for errors and deserializing the result payload.
|
|
164
|
+
"""Process a `ShellResult` by checking for errors and deserializing the result payload.
|
|
127
165
|
|
|
128
166
|
The stdout contains two parts separated by "__GROUNDHOG_RESULT__":
|
|
129
167
|
1. User output (from the .stdout file) - returned as first element of tuple
|