groundhog-hpc 0.5.6__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundhog_hpc/__init__.py +4 -0
- groundhog_hpc/app/add.py +74 -0
- groundhog_hpc/app/init.py +54 -10
- groundhog_hpc/app/main.py +5 -1
- groundhog_hpc/app/remove.py +91 -16
- groundhog_hpc/app/run.py +70 -2
- groundhog_hpc/compute.py +16 -1
- groundhog_hpc/configuration/defaults.py +1 -0
- groundhog_hpc/configuration/endpoints.py +38 -171
- groundhog_hpc/configuration/models.py +26 -0
- groundhog_hpc/configuration/pep723.py +278 -2
- groundhog_hpc/configuration/resolver.py +36 -8
- groundhog_hpc/console.py +1 -1
- groundhog_hpc/decorators.py +35 -16
- groundhog_hpc/function.py +53 -19
- groundhog_hpc/future.py +48 -10
- groundhog_hpc/harness.py +15 -19
- groundhog_hpc/logging.py +51 -0
- groundhog_hpc/serialization.py +22 -2
- groundhog_hpc/templates/init_script.py.jinja +4 -5
- groundhog_hpc/templates/shell_command.sh.jinja +15 -1
- groundhog_hpc/templating.py +17 -0
- {groundhog_hpc-0.5.6.dist-info → groundhog_hpc-0.7.0.dist-info}/METADATA +12 -6
- groundhog_hpc-0.7.0.dist-info/RECORD +34 -0
- groundhog_hpc-0.5.6.dist-info/RECORD +0 -33
- {groundhog_hpc-0.5.6.dist-info → groundhog_hpc-0.7.0.dist-info}/WHEEL +0 -0
- {groundhog_hpc-0.5.6.dist-info → groundhog_hpc-0.7.0.dist-info}/entry_points.txt +0 -0
- {groundhog_hpc-0.5.6.dist-info → groundhog_hpc-0.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -19,6 +19,7 @@ PEP 723 metadata sets sharable defaults, decorators customize per-function,
|
|
|
19
19
|
and call-time overrides allow runtime changes.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
+
import logging
|
|
22
23
|
from pathlib import Path
|
|
23
24
|
from typing import Any
|
|
24
25
|
|
|
@@ -26,14 +27,16 @@ from groundhog_hpc.configuration.defaults import DEFAULT_USER_CONFIG
|
|
|
26
27
|
from groundhog_hpc.configuration.models import EndpointConfig, EndpointVariant
|
|
27
28
|
from groundhog_hpc.configuration.pep723 import read_pep723
|
|
28
29
|
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
29
32
|
|
|
30
33
|
def _merge_endpoint_configs(
|
|
31
34
|
base_endpoint_config: dict, override_config: dict | None = None
|
|
32
35
|
) -> dict:
|
|
33
|
-
"""Merge endpoint configurations, ensuring worker_init commands are combined.
|
|
36
|
+
"""Merge endpoint configurations, ensuring worker_init and endpoint_setup commands are combined.
|
|
34
37
|
|
|
35
|
-
The worker_init
|
|
36
|
-
concatenated with the base's
|
|
38
|
+
The worker_init and endpoint_setup fields are special-cased: if both configs provide them, they are
|
|
39
|
+
concatenated with the base's commands executed first, followed by the override's.
|
|
37
40
|
All other fields from override_config simply replace fields from base_endpoint_config.
|
|
38
41
|
|
|
39
42
|
Args:
|
|
@@ -44,10 +47,10 @@ def _merge_endpoint_configs(
|
|
|
44
47
|
A new merged configuration dict
|
|
45
48
|
|
|
46
49
|
Example:
|
|
47
|
-
>>> base = {"worker_init": "pip install uv"}
|
|
48
|
-
>>> override = {"worker_init": "module load
|
|
50
|
+
>>> base = {"worker_init": "pip install uv", "endpoint_setup": "module load gcc"}
|
|
51
|
+
>>> override = {"worker_init": "module load python", "endpoint_setup": "module load cuda", "cores": 4}
|
|
49
52
|
>>> _merge_endpoint_configs(base, override)
|
|
50
|
-
{'worker_init': 'pip install uv\\nmodule load gcc', 'cores': 4}
|
|
53
|
+
{'worker_init': 'pip install uv\\nmodule load python', 'endpoint_setup': 'module load gcc\\nmodule load cuda', 'cores': 4}
|
|
51
54
|
"""
|
|
52
55
|
if not override_config:
|
|
53
56
|
return base_endpoint_config.copy()
|
|
@@ -62,6 +65,13 @@ def _merge_endpoint_configs(
|
|
|
62
65
|
override_init = override_config.pop("worker_init")
|
|
63
66
|
merged["worker_init"] = f"{base_init.strip()}\n{override_init.strip()}\n"
|
|
64
67
|
|
|
68
|
+
# Special handling for endpoint_setup: prepend base to override
|
|
69
|
+
if "endpoint_setup" in override_config and "endpoint_setup" in base_endpoint_config:
|
|
70
|
+
base_setup = base_endpoint_config["endpoint_setup"]
|
|
71
|
+
# pop endpoint_setup so update doesn't clobber concatenated value
|
|
72
|
+
override_setup = override_config.pop("endpoint_setup")
|
|
73
|
+
merged["endpoint_setup"] = f"{base_setup.strip()}\n{override_setup.strip()}\n"
|
|
74
|
+
|
|
65
75
|
merged.update(override_config)
|
|
66
76
|
return merged
|
|
67
77
|
|
|
@@ -80,7 +90,7 @@ class ConfigResolver:
|
|
|
80
90
|
5. Call-time config (.remote(user_endpoint_config={...}))
|
|
81
91
|
|
|
82
92
|
Special handling:
|
|
83
|
-
- worker_init commands are concatenated (not replaced) across all layers
|
|
93
|
+
- worker_init and endpoint_setup commands are concatenated (not replaced) across all layers
|
|
84
94
|
- endpoint field in PEP 723 config can override the endpoint UUID
|
|
85
95
|
- Variants inherit from their base configuration
|
|
86
96
|
|
|
@@ -132,9 +142,12 @@ class ConfigResolver:
|
|
|
132
142
|
ValidationError: If any config level has invalid fields (e.g., negative walltime)
|
|
133
143
|
"""
|
|
134
144
|
|
|
145
|
+
logger.debug(f"Resolving config for endpoint: {endpoint_name}")
|
|
146
|
+
|
|
135
147
|
# Layer 1: Start with DEFAULT_USER_CONFIG
|
|
136
148
|
config = DEFAULT_USER_CONFIG.copy()
|
|
137
149
|
base_name, *variant_path = endpoint_name.split(".")
|
|
150
|
+
logger.debug(f"Starting with DEFAULT_USER_CONFIG: {config}")
|
|
138
151
|
|
|
139
152
|
# Layer 2-3: walk base[.variant[.sub]] path hierarchically
|
|
140
153
|
metadata: dict = self._load_pep723_metadata()
|
|
@@ -142,8 +155,11 @@ class ConfigResolver:
|
|
|
142
155
|
metadata.get("tool", {}).get("hog", {}).get(base_name, {}).copy()
|
|
143
156
|
)
|
|
144
157
|
if base_variant:
|
|
158
|
+
logger.debug(f"Found base config for '{base_name}': {base_variant}")
|
|
145
159
|
EndpointConfig.model_validate(base_variant)
|
|
146
160
|
config["endpoint"] = base_variant.pop("endpoint")
|
|
161
|
+
else:
|
|
162
|
+
logger.debug(f"No PEP 723 config found for '{base_name}'")
|
|
147
163
|
|
|
148
164
|
def _merge_variant_path(
|
|
149
165
|
variant_names: list[str], current_variant: dict, accumulated_config: dict
|
|
@@ -164,12 +180,18 @@ class ConfigResolver:
|
|
|
164
180
|
+ variant_path[: len(variant_path) - len(remaining_names)]
|
|
165
181
|
)
|
|
166
182
|
if next_variant is None:
|
|
183
|
+
logger.error(f"Variant '{next_name}' not found in '{path_so_far}'")
|
|
167
184
|
raise ValueError(f"Variant {next_name} not found in {path_so_far}")
|
|
168
185
|
else:
|
|
186
|
+
logger.error(
|
|
187
|
+
f"Variant '{next_name}' in '{path_so_far}' is not a valid variant "
|
|
188
|
+
f"(expected dict, got {type(next_variant).__name__})"
|
|
189
|
+
)
|
|
169
190
|
raise ValueError(
|
|
170
191
|
f"Variant {next_name} in {path_so_far} is not a valid variant "
|
|
171
192
|
f"(expected dict, got {type(next_variant).__name__})"
|
|
172
193
|
)
|
|
194
|
+
logger.debug(f"Merging variant '{next_name}' config: {next_variant}")
|
|
173
195
|
return _merge_variant_path(
|
|
174
196
|
remaining_names, next_variant, accumulated_config
|
|
175
197
|
)
|
|
@@ -177,16 +199,22 @@ class ConfigResolver:
|
|
|
177
199
|
config = _merge_variant_path(variant_path, base_variant, config)
|
|
178
200
|
|
|
179
201
|
# Layer 4: Merge decorator config
|
|
202
|
+
if decorator_config:
|
|
203
|
+
logger.debug(f"Merging decorator config: {decorator_config}")
|
|
180
204
|
config = _merge_endpoint_configs(config, decorator_config)
|
|
181
205
|
|
|
182
206
|
# Layer 5: Call-time overrides
|
|
207
|
+
if call_time_config:
|
|
208
|
+
logger.debug(f"Merging call-time config: {call_time_config}")
|
|
183
209
|
config = _merge_endpoint_configs(config, call_time_config)
|
|
184
210
|
|
|
185
211
|
# Layer 5 1/2: we want to ensure uv is installed *after* any user
|
|
186
212
|
# worker_init, e.g. activating a conda env, which might impact the
|
|
187
213
|
# templated shell command's ability to `uv.find_uv_bin()`
|
|
188
|
-
uv_init_config = {"worker_init": "pip show -qq uv || pip install uv"}
|
|
214
|
+
uv_init_config = {"worker_init": "pip show -qq uv || pip install uv || true"}
|
|
189
215
|
config = _merge_endpoint_configs(config, uv_init_config)
|
|
216
|
+
|
|
217
|
+
logger.debug(f"Final resolved config: {config}")
|
|
190
218
|
return config
|
|
191
219
|
|
|
192
220
|
def _load_pep723_metadata(self) -> dict[str, Any]:
|
groundhog_hpc/console.py
CHANGED
|
@@ -173,7 +173,7 @@ def _get_status_display(
|
|
|
173
173
|
display.append(")", style="dim")
|
|
174
174
|
|
|
175
175
|
display.append(" | ", style="dim")
|
|
176
|
-
display.append(spinner.render(current_time))
|
|
176
|
+
display.append(spinner.render(current_time)) # type: ignore[arg-type]
|
|
177
177
|
|
|
178
178
|
return display
|
|
179
179
|
|
groundhog_hpc/decorators.py
CHANGED
|
@@ -7,32 +7,49 @@ orchestration.
|
|
|
7
7
|
|
|
8
8
|
import functools
|
|
9
9
|
import inspect
|
|
10
|
-
import
|
|
10
|
+
import logging
|
|
11
11
|
from types import FunctionType
|
|
12
12
|
from typing import Any, Callable
|
|
13
13
|
|
|
14
14
|
from groundhog_hpc.function import Function, Method
|
|
15
15
|
from groundhog_hpc.harness import Harness
|
|
16
16
|
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
17
19
|
|
|
18
20
|
def harness() -> Callable[[FunctionType], Harness]:
|
|
19
21
|
"""Decorator to mark a function as a local orchestrator harness.
|
|
20
22
|
|
|
23
|
+
Harness functions are entry points that coordinate remote function calls.
|
|
24
|
+
They run locally and can accept parameters passed as CLI arguments.
|
|
25
|
+
|
|
21
26
|
Harness functions:
|
|
22
|
-
|
|
23
|
-
-
|
|
24
|
-
- Can
|
|
27
|
+
|
|
28
|
+
- Are invoked via the CLI: `hog run script.py [harness_name]`
|
|
29
|
+
- Can accept parameters, which map to CLI arguments
|
|
30
|
+
- Can call `.remote()` or `.submit()` on `@hog.function`-decorated functions
|
|
25
31
|
|
|
26
32
|
Returns:
|
|
27
33
|
A decorator function that wraps the harness
|
|
28
34
|
|
|
29
35
|
Example:
|
|
36
|
+
Zero-argument harness:
|
|
30
37
|
```python
|
|
31
38
|
@hog.harness()
|
|
32
39
|
def main():
|
|
33
40
|
result = my_function.remote("far out, man!")
|
|
34
41
|
return result
|
|
35
42
|
```
|
|
43
|
+
|
|
44
|
+
Parameterized harness:
|
|
45
|
+
```python
|
|
46
|
+
@hog.harness()
|
|
47
|
+
def train(dataset: str, epochs: int = 10):
|
|
48
|
+
result = train_model.remote(dataset, epochs)
|
|
49
|
+
return result
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Run with: `hog run script.py train -- my_data --epochs=20`
|
|
36
53
|
"""
|
|
37
54
|
|
|
38
55
|
def decorator(func: FunctionType) -> Harness:
|
|
@@ -50,9 +67,11 @@ def function(
|
|
|
50
67
|
"""Decorator to mark a function for remote execution on Globus Compute.
|
|
51
68
|
|
|
52
69
|
Decorated functions can be:
|
|
53
|
-
|
|
54
|
-
- Called
|
|
55
|
-
-
|
|
70
|
+
|
|
71
|
+
- Called locally: `func(args)`
|
|
72
|
+
- Called remotely (blocking): `func.remote(args)`
|
|
73
|
+
- Submitted asynchronously: `func.submit(args)`
|
|
74
|
+
- Called locally in an isolated environment: `func.local(args)`
|
|
56
75
|
|
|
57
76
|
Args:
|
|
58
77
|
endpoint: Globus Compute endpoint UUID or named endpoint from
|
|
@@ -93,13 +112,15 @@ def method(
|
|
|
93
112
|
) -> Callable[[FunctionType], Method]:
|
|
94
113
|
"""Decorator to mark a class method for remote execution on Globus Compute.
|
|
95
114
|
|
|
96
|
-
|
|
97
|
-
staticmethod-like semantics - the decorated method does not receive self.
|
|
115
|
+
Analogous to `@hog.function()` but for use with class methods. Provides
|
|
116
|
+
staticmethod-like semantics - the decorated method does not receive self or cls.
|
|
98
117
|
|
|
99
118
|
Decorated methods can be:
|
|
100
|
-
|
|
101
|
-
- Called
|
|
102
|
-
-
|
|
119
|
+
|
|
120
|
+
- Called locally: `MyClass.method(args)` or `obj.method(args)`
|
|
121
|
+
- Called remotely (blocking): `MyClass.method.remote(args)`
|
|
122
|
+
- Submitted asynchronously: `MyClass.method.submit(args)`
|
|
123
|
+
- Called locally in an isolated environment: `MyClass.method.local(args)`
|
|
103
124
|
|
|
104
125
|
Args:
|
|
105
126
|
endpoint: Globus Compute endpoint UUID
|
|
@@ -126,12 +147,10 @@ def method(
|
|
|
126
147
|
sig = inspect.signature(func)
|
|
127
148
|
params = list(sig.parameters.keys())
|
|
128
149
|
if params and params[0] in ("self", "cls"):
|
|
129
|
-
|
|
150
|
+
logger.warning(
|
|
130
151
|
f"Method '{func.__name__}' has first parameter '{params[0]}', "
|
|
131
152
|
f"but @hog.method provides staticmethod-like semantics and will not "
|
|
132
|
-
f"pass the instance or class. Consider removing '{params[0]}' from the signature."
|
|
133
|
-
UserWarning,
|
|
134
|
-
stacklevel=2,
|
|
153
|
+
f"pass the instance or class. Consider removing '{params[0]}' from the signature."
|
|
135
154
|
)
|
|
136
155
|
|
|
137
156
|
wrapper = Method(func, endpoint, **user_endpoint_config)
|
groundhog_hpc/function.py
CHANGED
|
@@ -10,6 +10,7 @@ as defaults but overridden when calling .remote() or .submit().
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
import inspect
|
|
13
|
+
import logging
|
|
13
14
|
import os
|
|
14
15
|
import sys
|
|
15
16
|
import tempfile
|
|
@@ -30,6 +31,8 @@ from groundhog_hpc.future import GroundhogFuture
|
|
|
30
31
|
from groundhog_hpc.serialization import deserialize_stdout, serialize
|
|
31
32
|
from groundhog_hpc.utils import prefix_output
|
|
32
33
|
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
33
36
|
if TYPE_CHECKING:
|
|
34
37
|
import globus_compute_sdk
|
|
35
38
|
|
|
@@ -44,13 +47,15 @@ class Function:
|
|
|
44
47
|
"""Wrapper that enables a Python function to be executed remotely on Globus Compute.
|
|
45
48
|
|
|
46
49
|
Decorated functions can be called in four ways:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
50
|
+
|
|
51
|
+
1. Direct call: `func(*args)` - executes locally (regular python call)
|
|
52
|
+
2. Remote call: `func.remote(*args)` - executes remotely and blocks until complete
|
|
53
|
+
3. Async submit: `func.submit(*args)` - executes remotely and returns a GroundhogFuture
|
|
54
|
+
4. Local subprocess: `func.local(*args)` - executes locally in a separate process
|
|
51
55
|
|
|
52
56
|
Attributes:
|
|
53
|
-
endpoint: Default Globus Compute endpoint UUID or
|
|
57
|
+
endpoint: Default Globus Compute endpoint UUID or named endpoint from
|
|
58
|
+
`[tool.hog.<name>]` PEP 723 metadata, or None to use resolved config
|
|
54
59
|
default_user_endpoint_config: Default endpoint configuration (e.g., worker_init, walltime)
|
|
55
60
|
"""
|
|
56
61
|
|
|
@@ -77,10 +82,10 @@ class Function:
|
|
|
77
82
|
# hatch if users need to set it after the function's been created
|
|
78
83
|
self.walltime: int | float | None = None
|
|
79
84
|
|
|
80
|
-
self.
|
|
85
|
+
self._wrapped_function: FunctionType = func
|
|
81
86
|
self._config_resolver: ConfigResolver | None = None
|
|
82
87
|
|
|
83
|
-
def __call__(self, *args, **kwargs) -> Any:
|
|
88
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
84
89
|
"""Execute the function locally (not remotely).
|
|
85
90
|
|
|
86
91
|
Args:
|
|
@@ -90,7 +95,7 @@ class Function:
|
|
|
90
95
|
Returns:
|
|
91
96
|
The result of the local function execution
|
|
92
97
|
"""
|
|
93
|
-
return self.
|
|
98
|
+
return self._wrapped_function(*args, **kwargs)
|
|
94
99
|
|
|
95
100
|
def _get_available_endpoints_from_pep723(self) -> list[str]:
|
|
96
101
|
"""Get list of endpoint names defined in PEP 723 [tool.hog.*] sections."""
|
|
@@ -111,7 +116,8 @@ class Function:
|
|
|
111
116
|
|
|
112
117
|
Args:
|
|
113
118
|
*args: Positional arguments to pass to the function
|
|
114
|
-
endpoint: Globus Compute endpoint UUID (
|
|
119
|
+
endpoint: Globus Compute endpoint UUID (or named endpoint from
|
|
120
|
+
`[tool.hog.<name>]` PEP 723 metadata). Replaces decorator default.
|
|
115
121
|
user_endpoint_config: Endpoint configuration dict (merged with decorator default)
|
|
116
122
|
**kwargs: Keyword arguments to pass to the function
|
|
117
123
|
|
|
@@ -124,12 +130,18 @@ class Function:
|
|
|
124
130
|
PayloadTooLargeError: If serialized arguments exceed 10MB
|
|
125
131
|
"""
|
|
126
132
|
# Check if module has been marked as safe for .remote() calls
|
|
127
|
-
module = sys.modules.get(self.
|
|
133
|
+
module = sys.modules.get(self._wrapped_function.__module__)
|
|
128
134
|
if not getattr(module, "__groundhog_imported__", False):
|
|
135
|
+
logger.error(
|
|
136
|
+
f"Import safety check failed for module '{self._wrapped_function.__module__}'"
|
|
137
|
+
)
|
|
129
138
|
raise ModuleImportError(
|
|
130
|
-
self.
|
|
139
|
+
self._wrapped_function.__name__,
|
|
140
|
+
"submit",
|
|
141
|
+
self._wrapped_function.__module__,
|
|
131
142
|
)
|
|
132
143
|
|
|
144
|
+
logger.debug(f"Preparing to submit function '{self.name}'")
|
|
133
145
|
endpoint = endpoint or self.endpoint
|
|
134
146
|
|
|
135
147
|
decorator_config = self.default_user_endpoint_config.copy()
|
|
@@ -152,13 +164,18 @@ class Function:
|
|
|
152
164
|
available_endpoints = self._get_available_endpoints_from_pep723()
|
|
153
165
|
if available_endpoints:
|
|
154
166
|
endpoints_str = ", ".join(f"'{e}'" for e in available_endpoints)
|
|
167
|
+
logger.error(f"No endpoint specified. Available: {endpoints_str}")
|
|
155
168
|
raise ValueError(
|
|
156
169
|
f"No endpoint specified. Available endpoints found in config: {endpoints_str}. "
|
|
157
170
|
f"Call with endpoint=<name>, or specify a function default endpoint in decorator."
|
|
158
171
|
)
|
|
159
172
|
else:
|
|
173
|
+
logger.error("No endpoint specified and none found in config")
|
|
160
174
|
raise ValueError("No endpoint specified")
|
|
161
175
|
|
|
176
|
+
logger.debug(
|
|
177
|
+
f"Serializing {len(args)} args and {len(kwargs)} kwargs for '{self.name}'"
|
|
178
|
+
)
|
|
162
179
|
payload = serialize((args, kwargs), use_proxy=False, proxy_threshold_mb=None)
|
|
163
180
|
shell_function = script_to_submittable(
|
|
164
181
|
self.script_path, self.name, payload, walltime=self.walltime
|
|
@@ -188,7 +205,8 @@ class Function:
|
|
|
188
205
|
|
|
189
206
|
Args:
|
|
190
207
|
*args: Positional arguments to pass to the function
|
|
191
|
-
endpoint: Globus Compute endpoint UUID (
|
|
208
|
+
endpoint: Globus Compute endpoint UUID (or named endpoint from
|
|
209
|
+
`[tool.hog.<name>]` PEP 723 metadata). Replaces decorator default.
|
|
192
210
|
user_endpoint_config: Endpoint configuration dict (merged with decorator default)
|
|
193
211
|
**kwargs: Keyword arguments to pass to the function
|
|
194
212
|
|
|
@@ -201,6 +219,7 @@ class Function:
|
|
|
201
219
|
PayloadTooLargeError: If serialized arguments exceed 10MB
|
|
202
220
|
RemoteExecutionError: If remote execution fails (non-zero exit code)
|
|
203
221
|
"""
|
|
222
|
+
logger.debug(f"Calling remote execution for '{self.name}'")
|
|
204
223
|
future = self.submit(
|
|
205
224
|
*args,
|
|
206
225
|
endpoint=endpoint,
|
|
@@ -208,7 +227,9 @@ class Function:
|
|
|
208
227
|
**kwargs,
|
|
209
228
|
)
|
|
210
229
|
display_task_status(future)
|
|
211
|
-
|
|
230
|
+
result = future.result()
|
|
231
|
+
logger.debug(f"Remote execution of '{self.name}' completed successfully")
|
|
232
|
+
return result
|
|
212
233
|
|
|
213
234
|
def local(self, *args: Any, **kwargs: Any) -> Any:
|
|
214
235
|
"""Execute the function locally in an isolated subprocess.
|
|
@@ -226,12 +247,18 @@ class Function:
|
|
|
226
247
|
LocalExecutionError: If local execution fails (non-zero exit code)
|
|
227
248
|
"""
|
|
228
249
|
# Check if module has been marked as safe for .local() calls
|
|
229
|
-
module = sys.modules.get(self.
|
|
250
|
+
module = sys.modules.get(self._wrapped_function.__module__)
|
|
230
251
|
if not getattr(module, "__groundhog_imported__", False):
|
|
252
|
+
logger.error(
|
|
253
|
+
f"Import safety check failed for module '{self._wrapped_function.__module__}'"
|
|
254
|
+
)
|
|
231
255
|
raise ModuleImportError(
|
|
232
|
-
self.
|
|
256
|
+
self._wrapped_function.__name__,
|
|
257
|
+
"local",
|
|
258
|
+
self._wrapped_function.__module__,
|
|
233
259
|
)
|
|
234
260
|
|
|
261
|
+
logger.debug(f"Executing function '{self.name}' in local subprocess")
|
|
235
262
|
with prefix_output(prefix="[local]", prefix_color="blue"):
|
|
236
263
|
# Create ShellFunction just like we do for remote execution
|
|
237
264
|
payload = serialize((args, kwargs), proxy_threshold_mb=1.0)
|
|
@@ -247,6 +274,9 @@ class Function:
|
|
|
247
274
|
assert not isinstance(result, dict)
|
|
248
275
|
|
|
249
276
|
if result.returncode != 0:
|
|
277
|
+
logger.error(
|
|
278
|
+
f"Local subprocess failed with exit code {result.returncode}"
|
|
279
|
+
)
|
|
250
280
|
if result.stderr:
|
|
251
281
|
print(result.stderr, file=sys.stderr)
|
|
252
282
|
if result.stdout:
|
|
@@ -259,12 +289,16 @@ class Function:
|
|
|
259
289
|
try:
|
|
260
290
|
user_stdout, deserialized_result = deserialize_stdout(result.stdout)
|
|
261
291
|
except DeserializationError as e:
|
|
292
|
+
logger.error(f"Failed to deserialize local result: {e}")
|
|
262
293
|
if result.stderr:
|
|
263
294
|
print(result.stderr, file=sys.stderr)
|
|
264
295
|
if e.user_output:
|
|
265
296
|
print(e.user_output)
|
|
266
297
|
raise
|
|
267
298
|
else:
|
|
299
|
+
logger.debug(
|
|
300
|
+
f"Local execution of '{self.name}' completed successfully"
|
|
301
|
+
)
|
|
268
302
|
if result.stderr:
|
|
269
303
|
print(result.stderr, file=sys.stderr)
|
|
270
304
|
if user_stdout:
|
|
@@ -290,7 +324,7 @@ class Function:
|
|
|
290
324
|
return self._script_path
|
|
291
325
|
|
|
292
326
|
try:
|
|
293
|
-
source_file = inspect.getfile(self.
|
|
327
|
+
source_file = inspect.getfile(self._wrapped_function)
|
|
294
328
|
self._script_path = str(Path(source_file).resolve())
|
|
295
329
|
return self._script_path
|
|
296
330
|
except (TypeError, OSError) as e:
|
|
@@ -308,13 +342,13 @@ class Function:
|
|
|
308
342
|
|
|
309
343
|
@property
|
|
310
344
|
def name(self) -> str:
|
|
311
|
-
return self.
|
|
345
|
+
return self._wrapped_function.__qualname__
|
|
312
346
|
|
|
313
347
|
|
|
314
348
|
class Method(Function):
|
|
315
|
-
"""
|
|
349
|
+
"""Minimal descriptor variant of Function for use as class methods.
|
|
316
350
|
|
|
317
|
-
Provides staticmethod-like semantics (no self) with remote execution.
|
|
351
|
+
Provides staticmethod-like semantics (no `self`/`cls`) with remote execution.
|
|
318
352
|
"""
|
|
319
353
|
|
|
320
354
|
def __get__(self, obj, objtype=None):
|
groundhog_hpc/future.py
CHANGED
|
@@ -22,16 +22,16 @@ else:
|
|
|
22
22
|
|
|
23
23
|
class GroundhogFuture(Future):
|
|
24
24
|
"""A Future that deserializes stdout for its .result(), but still allows
|
|
25
|
-
access to the raw ShellResult
|
|
25
|
+
access to the raw `ShellResult`.
|
|
26
26
|
|
|
27
27
|
This future automatically deserializes the payload when .result() is called,
|
|
28
|
-
but preserves access to the original ShellResult (with stdout, stderr, returncode)
|
|
28
|
+
but preserves access to the original `ShellResult` (with stdout, stderr, returncode)
|
|
29
29
|
via the .shell_result property.
|
|
30
30
|
|
|
31
31
|
Attributes:
|
|
32
32
|
task_id: Globus Compute task ID (set when the future completes)
|
|
33
|
-
endpoint:
|
|
34
|
-
user_endpoint_config:
|
|
33
|
+
endpoint: The endpoint where the task was submitted
|
|
34
|
+
user_endpoint_config: Resolved configuration dict used for the endpoint
|
|
35
35
|
function_name: Name of the function being executed
|
|
36
36
|
"""
|
|
37
37
|
|
|
@@ -48,9 +48,9 @@ class GroundhogFuture(Future):
|
|
|
48
48
|
self._user_stdout: str | None = None
|
|
49
49
|
|
|
50
50
|
# set after created in Function.submit, useful for invocation logs etc
|
|
51
|
-
self.
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
51
|
+
self._endpoint: str | None = None
|
|
52
|
+
self._user_endpoint_config: dict[str, Any] | None = None
|
|
53
|
+
self._function_name: str | None = None
|
|
54
54
|
|
|
55
55
|
def callback(fut: Future) -> None:
|
|
56
56
|
try:
|
|
@@ -69,7 +69,7 @@ class GroundhogFuture(Future):
|
|
|
69
69
|
|
|
70
70
|
@property
|
|
71
71
|
def shell_result(self) -> ShellResult:
|
|
72
|
-
"""Access the raw ShellResult with stdout, stderr, returncode.
|
|
72
|
+
"""Access the raw Globus Compute `ShellResult` with stdout, stderr, returncode.
|
|
73
73
|
|
|
74
74
|
This property provides access to the underlying shell execution metadata,
|
|
75
75
|
which can be useful for debugging, logging, or inspecting stderr output
|
|
@@ -93,7 +93,45 @@ class GroundhogFuture(Future):
|
|
|
93
93
|
|
|
94
94
|
@property
|
|
95
95
|
def task_id(self) -> str | None:
|
|
96
|
-
|
|
96
|
+
"""The Globus Compute task ID for this future.
|
|
97
|
+
|
|
98
|
+
Returns the task ID from the underlying Globus Compute future, which may
|
|
99
|
+
not be populated immediately.
|
|
100
|
+
"""
|
|
101
|
+
return self._original_future.task_id # type: ignore[attr-defined]
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def endpoint(self) -> str | None:
|
|
105
|
+
"""The endpoint where this task was submitted."""
|
|
106
|
+
return self._endpoint
|
|
107
|
+
|
|
108
|
+
@endpoint.setter
|
|
109
|
+
def endpoint(self, value: str | None) -> None:
|
|
110
|
+
self._endpoint = value
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def user_endpoint_config(self) -> dict[str, Any] | None:
|
|
114
|
+
"""The endpoint configuration used for this task submission.
|
|
115
|
+
|
|
116
|
+
Set by `Function.submit()` when the task is created. Contains
|
|
117
|
+
configuration like account, partition, walltime, etc. Useful for
|
|
118
|
+
debugging, since this is the final resolved config that was actually
|
|
119
|
+
passed to the `Executor`.
|
|
120
|
+
"""
|
|
121
|
+
return self._user_endpoint_config
|
|
122
|
+
|
|
123
|
+
@user_endpoint_config.setter
|
|
124
|
+
def user_endpoint_config(self, value: dict[str, Any] | None) -> None:
|
|
125
|
+
self._user_endpoint_config = value
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def function_name(self) -> str | None:
|
|
129
|
+
"""The name of the function being executed."""
|
|
130
|
+
return self._function_name
|
|
131
|
+
|
|
132
|
+
@function_name.setter
|
|
133
|
+
def function_name(self, value: str | None) -> None:
|
|
134
|
+
self._function_name = value
|
|
97
135
|
|
|
98
136
|
|
|
99
137
|
def _truncate_payload_in_cmd(cmd: str, max_length: int = 100) -> str:
|
|
@@ -123,7 +161,7 @@ def _truncate_payload_in_cmd(cmd: str, max_length: int = 100) -> str:
|
|
|
123
161
|
|
|
124
162
|
|
|
125
163
|
def _process_shell_result(shell_result: ShellResult) -> tuple[str | None, Any]:
|
|
126
|
-
"""Process a ShellResult by checking for errors and deserializing the result payload.
|
|
164
|
+
"""Process a `ShellResult` by checking for errors and deserializing the result payload.
|
|
127
165
|
|
|
128
166
|
The stdout contains two parts separated by "__GROUNDHOG_RESULT__":
|
|
129
167
|
1. User output (from the .stdout file) - returned as first element of tuple
|
groundhog_hpc/harness.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""Harness wrapper for orchestrating remote function execution.
|
|
2
2
|
|
|
3
|
-
This module provides the Harness class, which wraps
|
|
4
|
-
|
|
3
|
+
This module provides the Harness class, which wraps entry point functions that
|
|
4
|
+
orchestrate calls to remote @hog.function decorated functions. Harnesses can
|
|
5
|
+
accept parameters which are parsed from CLI arguments via `hog run`.
|
|
5
6
|
"""
|
|
6
7
|
|
|
7
8
|
import inspect
|
|
@@ -10,13 +11,15 @@ from typing import Any
|
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class Harness:
|
|
13
|
-
"""Wrapper for
|
|
14
|
+
"""Wrapper for an orchestrator function.
|
|
14
15
|
|
|
15
16
|
Harness functions are entry points that typically coordinate calls to
|
|
16
|
-
@hog.function decorated functions. They
|
|
17
|
+
@hog.function decorated functions. They can accept parameters that are
|
|
18
|
+
parsed from CLI arguments when invoked via `hog run script.py -- args`.
|
|
17
19
|
|
|
18
20
|
Attributes:
|
|
19
21
|
func: The wrapped orchestrator function
|
|
22
|
+
signature: The function's signature for CLI argument parsing
|
|
20
23
|
"""
|
|
21
24
|
|
|
22
25
|
def __init__(self, func: FunctionType):
|
|
@@ -24,25 +27,18 @@ class Harness:
|
|
|
24
27
|
|
|
25
28
|
Args:
|
|
26
29
|
func: The orchestrator function to wrap
|
|
27
|
-
|
|
28
|
-
Raises:
|
|
29
|
-
TypeError: If the function accepts any arguments
|
|
30
30
|
"""
|
|
31
31
|
self.func: FunctionType = func
|
|
32
|
-
self.
|
|
32
|
+
self.signature: inspect.Signature = inspect.signature(func)
|
|
33
|
+
|
|
34
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
35
|
+
"""Execute the harness function with optional arguments.
|
|
33
36
|
|
|
34
|
-
|
|
35
|
-
|
|
37
|
+
Args:
|
|
38
|
+
*args: Positional arguments to pass to the harness function
|
|
39
|
+
**kwargs: Keyword arguments to pass to the harness function
|
|
36
40
|
|
|
37
41
|
Returns:
|
|
38
42
|
The result of the harness function execution
|
|
39
43
|
"""
|
|
40
|
-
return self.func()
|
|
41
|
-
|
|
42
|
-
def _validate_signature(self) -> None:
|
|
43
|
-
sig = inspect.signature(self.func)
|
|
44
|
-
if len(sig.parameters) > 0:
|
|
45
|
-
raise TypeError(
|
|
46
|
-
f"Harness function '{self.func.__qualname__}' must not accept any arguments, "
|
|
47
|
-
f"but has parameters: {list(sig.parameters.keys())}"
|
|
48
|
-
)
|
|
44
|
+
return self.func(*args, **kwargs)
|
groundhog_hpc/logging.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Logging configuration for Groundhog HPC.
|
|
2
|
+
|
|
3
|
+
This module provides centralized logging setup with support for:
|
|
4
|
+
- Hierarchical per-module loggers (groundhog.compute, groundhog.serialization, etc.)
|
|
5
|
+
- Environment variable configuration (GROUNDHOG_LOG_LEVEL)
|
|
6
|
+
- CLI flag overrides
|
|
7
|
+
- Remote log level propagation
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def setup_logging() -> None:
|
|
16
|
+
"""Configure the root groundhog logger.
|
|
17
|
+
|
|
18
|
+
Reads log level from GROUNDHOG_LOG_LEVEL environment variable.
|
|
19
|
+
Defaults to WARNING if not set.
|
|
20
|
+
|
|
21
|
+
Valid log levels: DEBUG, INFO, WARNING, ERROR, CRITICAL
|
|
22
|
+
|
|
23
|
+
Can be called multiple times to reconfigure the log level.
|
|
24
|
+
"""
|
|
25
|
+
level_name = os.getenv("GROUNDHOG_LOG_LEVEL", "WARNING").upper()
|
|
26
|
+
|
|
27
|
+
# Convert string to logging level, default to WARNING if invalid
|
|
28
|
+
level = getattr(logging, level_name, logging.WARNING)
|
|
29
|
+
|
|
30
|
+
# Configure root groundhog logger
|
|
31
|
+
logger = logging.getLogger("groundhog_hpc")
|
|
32
|
+
logger.setLevel(level)
|
|
33
|
+
|
|
34
|
+
# Add stderr handler if not already present
|
|
35
|
+
if not logger.handlers:
|
|
36
|
+
handler = logging.StreamHandler(sys.stderr)
|
|
37
|
+
formatter = logging.Formatter(
|
|
38
|
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
39
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
40
|
+
)
|
|
41
|
+
handler.setFormatter(formatter)
|
|
42
|
+
logger.addHandler(handler)
|
|
43
|
+
else:
|
|
44
|
+
# Update level on existing handlers
|
|
45
|
+
for handler in logger.handlers:
|
|
46
|
+
handler.setLevel(level)
|
|
47
|
+
|
|
48
|
+
# Allow propagation to parent loggers (enables pytest caplog capture)
|
|
49
|
+
# This won't cause duplicate logs unless the root logger also has handlers,
|
|
50
|
+
# which is rare in production but common in tests
|
|
51
|
+
logger.propagate = True
|