pyworkflow-engine 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +1 -1
- pyworkflow/celery/app.py +10 -0
- pyworkflow/celery/singleton.py +370 -0
- pyworkflow/celery/tasks.py +125 -54
- pyworkflow/context/local.py +46 -0
- pyworkflow/core/step.py +8 -0
- pyworkflow/core/validation.py +112 -0
- pyworkflow/primitives/resume_hook.py +2 -1
- pyworkflow/runtime/base.py +4 -0
- pyworkflow/runtime/celery.py +12 -1
- pyworkflow/runtime/local.py +8 -0
- pyworkflow/storage/base.py +4 -1
- pyworkflow/storage/cassandra.py +30 -25
- pyworkflow/storage/dynamodb.py +32 -16
- pyworkflow/storage/file.py +39 -13
- pyworkflow/storage/memory.py +28 -11
- pyworkflow/storage/mysql.py +27 -11
- pyworkflow/storage/postgres.py +29 -12
- pyworkflow/storage/sqlite.py +29 -12
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/METADATA +1 -1
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/RECORD +25 -23
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/licenses/LICENSE +0 -0
- {pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/top_level.txt +0 -0
pyworkflow/context/local.py
CHANGED
|
@@ -114,6 +114,31 @@ class LocalContext(WorkflowContext):
|
|
|
114
114
|
self._replay_events(event_log)
|
|
115
115
|
self._is_replaying = False
|
|
116
116
|
|
|
117
|
+
def _extract_counter_from_id(self, id_string: str) -> int:
|
|
118
|
+
"""Extract counter value from hook_id or sleep_id.
|
|
119
|
+
|
|
120
|
+
Formats:
|
|
121
|
+
- hook_{name}_{counter}
|
|
122
|
+
- sleep_{counter}_{duration}s
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
id_string: The hook_id or sleep_id string
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
The counter value, or 0 if parsing fails
|
|
129
|
+
"""
|
|
130
|
+
try:
|
|
131
|
+
parts = id_string.split("_")
|
|
132
|
+
if id_string.startswith("hook_"):
|
|
133
|
+
# hook_{name}_{counter} - counter is last part
|
|
134
|
+
return int(parts[-1])
|
|
135
|
+
elif id_string.startswith("sleep_"):
|
|
136
|
+
# sleep_{counter}_{duration}s - counter is second part
|
|
137
|
+
return int(parts[1])
|
|
138
|
+
except (ValueError, IndexError):
|
|
139
|
+
pass
|
|
140
|
+
return 0
|
|
141
|
+
|
|
117
142
|
def _replay_events(self, events: list[Any]) -> None:
|
|
118
143
|
"""Replay events to restore state."""
|
|
119
144
|
from pyworkflow.engine.events import EventType
|
|
@@ -142,6 +167,12 @@ class LocalContext(WorkflowContext):
|
|
|
142
167
|
payload = deserialize(event.data.get("payload"))
|
|
143
168
|
self._hook_results[hook_id] = payload
|
|
144
169
|
|
|
170
|
+
elif event.type == EventType.HOOK_CREATED:
|
|
171
|
+
# Track pending hooks for re-suspension
|
|
172
|
+
hook_id = event.data.get("hook_id")
|
|
173
|
+
if hook_id:
|
|
174
|
+
self._pending_hooks[hook_id] = event.data
|
|
175
|
+
|
|
145
176
|
elif event.type == EventType.STEP_RETRYING:
|
|
146
177
|
step_id = event.data.get("step_id")
|
|
147
178
|
self._retry_states[step_id] = {
|
|
@@ -893,6 +924,21 @@ class LocalContext(WorkflowContext):
|
|
|
893
924
|
logger.debug(f"[replay] Hook {hook_id} already received")
|
|
894
925
|
return self._hook_results[hook_id]
|
|
895
926
|
|
|
927
|
+
# Check if already pending (created but not yet received - replay mode)
|
|
928
|
+
# This prevents duplicate hook creation when workflow resumes
|
|
929
|
+
if hook_id in self._pending_hooks:
|
|
930
|
+
logger.debug(f"[replay] Hook {hook_id} already pending, re-suspending")
|
|
931
|
+
pending_data = self._pending_hooks[hook_id]
|
|
932
|
+
actual_token = pending_data.get("token")
|
|
933
|
+
# Call on_created callback if provided
|
|
934
|
+
if on_created is not None:
|
|
935
|
+
await on_created(actual_token)
|
|
936
|
+
raise SuspensionSignal(
|
|
937
|
+
reason=f"hook:{hook_id}",
|
|
938
|
+
hook_id=hook_id,
|
|
939
|
+
token=actual_token,
|
|
940
|
+
)
|
|
941
|
+
|
|
896
942
|
# Generate composite token: run_id:hook_id
|
|
897
943
|
from pyworkflow.primitives.resume_hook import create_hook_token
|
|
898
944
|
|
pyworkflow/core/step.py
CHANGED
|
@@ -23,6 +23,7 @@ from loguru import logger
|
|
|
23
23
|
from pyworkflow.context import get_context, has_context
|
|
24
24
|
from pyworkflow.core.exceptions import FatalError, RetryableError
|
|
25
25
|
from pyworkflow.core.registry import register_step
|
|
26
|
+
from pyworkflow.core.validation import validate_step_parameters
|
|
26
27
|
from pyworkflow.engine.events import (
|
|
27
28
|
create_step_completed_event,
|
|
28
29
|
create_step_failed_event,
|
|
@@ -118,6 +119,8 @@ def step(
|
|
|
118
119
|
f"Step {step_name} in transient mode, executing directly",
|
|
119
120
|
run_id=ctx.run_id,
|
|
120
121
|
)
|
|
122
|
+
# Validate parameters before execution
|
|
123
|
+
validate_step_parameters(func, args, kwargs, step_name)
|
|
121
124
|
return await _execute_with_retries(
|
|
122
125
|
func, args, kwargs, step_name, max_retries, retry_delay
|
|
123
126
|
)
|
|
@@ -172,6 +175,8 @@ def step(
|
|
|
172
175
|
# When running in a distributed runtime (e.g., Celery), dispatch steps
|
|
173
176
|
# to step workers instead of executing inline.
|
|
174
177
|
if ctx.runtime == "celery":
|
|
178
|
+
# Validate parameters before dispatching to Celery
|
|
179
|
+
validate_step_parameters(func, args, kwargs, step_name)
|
|
175
180
|
return await _dispatch_step_to_celery(
|
|
176
181
|
ctx=ctx,
|
|
177
182
|
func=func,
|
|
@@ -240,6 +245,9 @@ def step(
|
|
|
240
245
|
# Check for cancellation before executing step
|
|
241
246
|
ctx.check_cancellation()
|
|
242
247
|
|
|
248
|
+
# Validate parameters before execution
|
|
249
|
+
validate_step_parameters(func, args, kwargs, step_name)
|
|
250
|
+
|
|
243
251
|
try:
|
|
244
252
|
# Execute step function
|
|
245
253
|
result = await func(*args, **kwargs)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic validation for step parameters.
|
|
3
|
+
|
|
4
|
+
Validates step function arguments against their type hints using Pydantic's
|
|
5
|
+
TypeAdapter for runtime type checking.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import inspect
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from typing import Any, get_type_hints
|
|
11
|
+
|
|
12
|
+
from pydantic import TypeAdapter, ValidationError
|
|
13
|
+
|
|
14
|
+
from pyworkflow.core.exceptions import FatalError
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class StepValidationError(FatalError):
|
|
18
|
+
"""
|
|
19
|
+
Raised when step parameter validation fails.
|
|
20
|
+
|
|
21
|
+
This is a FatalError subclass to ensure validation failures
|
|
22
|
+
immediately fail the workflow without retries.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
step_name: str,
|
|
28
|
+
param_name: str,
|
|
29
|
+
expected_type: type,
|
|
30
|
+
received_value: Any,
|
|
31
|
+
validation_error: ValidationError,
|
|
32
|
+
) -> None:
|
|
33
|
+
self.step_name = step_name
|
|
34
|
+
self.param_name = param_name
|
|
35
|
+
self.expected_type = expected_type
|
|
36
|
+
self.received_value = received_value
|
|
37
|
+
self.validation_error = validation_error
|
|
38
|
+
|
|
39
|
+
# Build clear error message
|
|
40
|
+
error_details = str(validation_error)
|
|
41
|
+
message = (
|
|
42
|
+
f"Step '{step_name}' parameter validation failed for '{param_name}': "
|
|
43
|
+
f"expected {expected_type}, got {type(received_value).__name__} "
|
|
44
|
+
f"with value {received_value!r}. Details: {error_details}"
|
|
45
|
+
)
|
|
46
|
+
super().__init__(message)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def validate_step_parameters(
|
|
50
|
+
func: Callable,
|
|
51
|
+
args: tuple,
|
|
52
|
+
kwargs: dict,
|
|
53
|
+
step_name: str,
|
|
54
|
+
) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Validate step parameters against their type hints using Pydantic.
|
|
57
|
+
|
|
58
|
+
Only parameters with type annotations are validated. Parameters without
|
|
59
|
+
type hints are skipped.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
func: The step function (original, unwrapped)
|
|
63
|
+
args: Positional arguments passed to the step
|
|
64
|
+
kwargs: Keyword arguments passed to the step
|
|
65
|
+
step_name: Name of the step for error messages
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
StepValidationError: If any typed parameter fails validation
|
|
69
|
+
"""
|
|
70
|
+
# Get function signature and type hints
|
|
71
|
+
sig = inspect.signature(func)
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
# Try to get type hints, may fail for some edge cases
|
|
75
|
+
type_hints = get_type_hints(func)
|
|
76
|
+
except Exception:
|
|
77
|
+
# If we can't get type hints, skip validation
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
if not type_hints:
|
|
81
|
+
# No type hints at all, skip validation
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
# Bind arguments to parameters
|
|
85
|
+
try:
|
|
86
|
+
bound = sig.bind(*args, **kwargs)
|
|
87
|
+
bound.apply_defaults()
|
|
88
|
+
except TypeError:
|
|
89
|
+
# If binding fails, the function call itself will fail
|
|
90
|
+
# Let the normal execution handle this
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
# Validate each parameter that has a type hint
|
|
94
|
+
for param_name, param_value in bound.arguments.items():
|
|
95
|
+
if param_name not in type_hints:
|
|
96
|
+
# No type hint for this parameter, skip validation
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
expected_type = type_hints[param_name]
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
# Use Pydantic TypeAdapter for validation
|
|
103
|
+
adapter = TypeAdapter(expected_type)
|
|
104
|
+
adapter.validate_python(param_value)
|
|
105
|
+
except ValidationError as e:
|
|
106
|
+
raise StepValidationError(
|
|
107
|
+
step_name=step_name,
|
|
108
|
+
param_name=param_name,
|
|
109
|
+
expected_type=expected_type,
|
|
110
|
+
received_value=param_value,
|
|
111
|
+
validation_error=e,
|
|
112
|
+
)
|
|
@@ -185,6 +185,7 @@ async def resume_hook(
|
|
|
185
185
|
hook_id=hook_id,
|
|
186
186
|
status=HookStatus.RECEIVED,
|
|
187
187
|
payload=serialized_payload,
|
|
188
|
+
run_id=run_id,
|
|
188
189
|
)
|
|
189
190
|
|
|
190
191
|
# Schedule workflow resumption via configured runtime
|
|
@@ -195,7 +196,7 @@ async def resume_hook(
|
|
|
195
196
|
runtime = get_runtime(config.default_runtime)
|
|
196
197
|
|
|
197
198
|
try:
|
|
198
|
-
await runtime.schedule_resume(run_id, storage)
|
|
199
|
+
await runtime.schedule_resume(run_id, storage, triggered_by_hook_id=hook_id)
|
|
199
200
|
except Exception as e:
|
|
200
201
|
logger.warning(
|
|
201
202
|
f"Failed to schedule workflow resumption: {e}",
|
pyworkflow/runtime/base.py
CHANGED
|
@@ -97,6 +97,7 @@ class Runtime(ABC):
|
|
|
97
97
|
self,
|
|
98
98
|
run_id: str,
|
|
99
99
|
storage: "StorageBackend",
|
|
100
|
+
triggered_by_hook_id: str | None = None,
|
|
100
101
|
) -> None:
|
|
101
102
|
"""
|
|
102
103
|
Schedule a workflow to be resumed immediately.
|
|
@@ -109,6 +110,9 @@ class Runtime(ABC):
|
|
|
109
110
|
Args:
|
|
110
111
|
run_id: The run_id of the workflow to resume
|
|
111
112
|
storage: Storage backend
|
|
113
|
+
triggered_by_hook_id: Optional hook ID that triggered this resume.
|
|
114
|
+
Used by distributed runtimes to prevent
|
|
115
|
+
spurious resumes from duplicate calls.
|
|
112
116
|
"""
|
|
113
117
|
# Default implementation: no-op
|
|
114
118
|
# Subclasses override if they support async scheduling
|
pyworkflow/runtime/celery.py
CHANGED
|
@@ -202,25 +202,36 @@ class CeleryRuntime(Runtime):
|
|
|
202
202
|
self,
|
|
203
203
|
run_id: str,
|
|
204
204
|
storage: "StorageBackend",
|
|
205
|
+
triggered_by_hook_id: str | None = None,
|
|
205
206
|
) -> None:
|
|
206
207
|
"""
|
|
207
208
|
Schedule immediate workflow resumption via Celery task.
|
|
208
209
|
|
|
209
210
|
This is called by resume_hook() to trigger workflow resumption
|
|
210
211
|
after a hook event is received.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
run_id: The workflow run ID to resume
|
|
215
|
+
storage: Storage backend for configuration
|
|
216
|
+
triggered_by_hook_id: Optional hook ID that triggered this resume.
|
|
217
|
+
Used to prevent spurious resumes from duplicate calls.
|
|
211
218
|
"""
|
|
212
219
|
from pyworkflow.celery.tasks import resume_workflow_task
|
|
213
220
|
|
|
214
221
|
logger.info(
|
|
215
222
|
f"Scheduling workflow resume via Celery: {run_id}",
|
|
216
223
|
run_id=run_id,
|
|
224
|
+
triggered_by_hook_id=triggered_by_hook_id,
|
|
217
225
|
)
|
|
218
226
|
|
|
219
227
|
storage_config = self._get_storage_config(storage)
|
|
220
228
|
|
|
221
229
|
resume_workflow_task.apply_async(
|
|
222
230
|
args=[run_id],
|
|
223
|
-
kwargs={
|
|
231
|
+
kwargs={
|
|
232
|
+
"storage_config": storage_config,
|
|
233
|
+
"triggered_by_hook_id": triggered_by_hook_id,
|
|
234
|
+
},
|
|
224
235
|
)
|
|
225
236
|
|
|
226
237
|
logger.info(
|
pyworkflow/runtime/local.py
CHANGED
|
@@ -507,16 +507,24 @@ class LocalRuntime(Runtime):
|
|
|
507
507
|
self,
|
|
508
508
|
run_id: str,
|
|
509
509
|
storage: "StorageBackend",
|
|
510
|
+
triggered_by_hook_id: str | None = None,
|
|
510
511
|
) -> None:
|
|
511
512
|
"""
|
|
512
513
|
Schedule immediate workflow resumption.
|
|
513
514
|
|
|
514
515
|
For local runtime, this directly calls resume_workflow since
|
|
515
516
|
execution happens in-process.
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
run_id: The workflow run ID to resume
|
|
520
|
+
storage: Storage backend
|
|
521
|
+
triggered_by_hook_id: Optional hook ID that triggered this resume.
|
|
522
|
+
Not used in local runtime (no queueing).
|
|
516
523
|
"""
|
|
517
524
|
logger.info(
|
|
518
525
|
f"Scheduling immediate workflow resume: {run_id}",
|
|
519
526
|
run_id=run_id,
|
|
527
|
+
triggered_by_hook_id=triggered_by_hook_id,
|
|
520
528
|
)
|
|
521
529
|
|
|
522
530
|
try:
|
pyworkflow/storage/base.py
CHANGED
|
@@ -291,12 +291,13 @@ class StorageBackend(ABC):
|
|
|
291
291
|
pass
|
|
292
292
|
|
|
293
293
|
@abstractmethod
|
|
294
|
-
async def get_hook(self, hook_id: str) -> Hook | None:
|
|
294
|
+
async def get_hook(self, hook_id: str, run_id: str | None = None) -> Hook | None:
|
|
295
295
|
"""
|
|
296
296
|
Retrieve a hook by ID.
|
|
297
297
|
|
|
298
298
|
Args:
|
|
299
299
|
hook_id: Hook identifier
|
|
300
|
+
run_id: Run ID (required for composite key lookup in SQL backends)
|
|
300
301
|
|
|
301
302
|
Returns:
|
|
302
303
|
Hook if found, None otherwise
|
|
@@ -322,6 +323,7 @@ class StorageBackend(ABC):
|
|
|
322
323
|
hook_id: str,
|
|
323
324
|
status: HookStatus,
|
|
324
325
|
payload: str | None = None,
|
|
326
|
+
run_id: str | None = None,
|
|
325
327
|
) -> None:
|
|
326
328
|
"""
|
|
327
329
|
Update hook status and optionally payload.
|
|
@@ -330,6 +332,7 @@ class StorageBackend(ABC):
|
|
|
330
332
|
hook_id: Hook identifier
|
|
331
333
|
status: New status
|
|
332
334
|
payload: JSON serialized payload (if received)
|
|
335
|
+
run_id: Run ID (required for composite key lookup in SQL backends)
|
|
333
336
|
"""
|
|
334
337
|
pass
|
|
335
338
|
|
pyworkflow/storage/cassandra.py
CHANGED
|
@@ -1072,29 +1072,31 @@ class CassandraStorageBackend(StorageBackend):
|
|
|
1072
1072
|
|
|
1073
1073
|
session.execute(batch)
|
|
1074
1074
|
|
|
1075
|
-
async def get_hook(self, hook_id: str) -> Hook | None:
|
|
1076
|
-
"""Retrieve a hook by ID."""
|
|
1075
|
+
async def get_hook(self, hook_id: str, run_id: str | None = None) -> Hook | None:
|
|
1076
|
+
"""Retrieve a hook by ID (run_id allows skipping lookup table)."""
|
|
1077
1077
|
session = self._ensure_connected()
|
|
1078
1078
|
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1079
|
+
if not run_id:
|
|
1080
|
+
# First lookup run_id from lookup table
|
|
1081
|
+
lookup = session.execute(
|
|
1082
|
+
SimpleStatement(
|
|
1083
|
+
"SELECT run_id FROM hooks_by_id WHERE hook_id = %s",
|
|
1084
|
+
consistency_level=self.read_consistency,
|
|
1085
|
+
),
|
|
1086
|
+
(hook_id,),
|
|
1087
|
+
).one()
|
|
1087
1088
|
|
|
1088
|
-
|
|
1089
|
-
|
|
1089
|
+
if not lookup:
|
|
1090
|
+
return None
|
|
1091
|
+
run_id = lookup.run_id
|
|
1090
1092
|
|
|
1091
|
-
#
|
|
1093
|
+
# Get full hook
|
|
1092
1094
|
row = session.execute(
|
|
1093
1095
|
SimpleStatement(
|
|
1094
1096
|
"SELECT * FROM hooks WHERE run_id = %s AND hook_id = %s",
|
|
1095
1097
|
consistency_level=self.read_consistency,
|
|
1096
1098
|
),
|
|
1097
|
-
(
|
|
1099
|
+
(run_id, hook_id),
|
|
1098
1100
|
).one()
|
|
1099
1101
|
|
|
1100
1102
|
if not row:
|
|
@@ -1137,21 +1139,24 @@ class CassandraStorageBackend(StorageBackend):
|
|
|
1137
1139
|
hook_id: str,
|
|
1138
1140
|
status: HookStatus,
|
|
1139
1141
|
payload: str | None = None,
|
|
1142
|
+
run_id: str | None = None,
|
|
1140
1143
|
) -> None:
|
|
1141
1144
|
"""Update hook status and optionally payload."""
|
|
1142
1145
|
session = self._ensure_connected()
|
|
1143
1146
|
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1147
|
+
if not run_id:
|
|
1148
|
+
# First lookup run_id from lookup table
|
|
1149
|
+
lookup = session.execute(
|
|
1150
|
+
SimpleStatement(
|
|
1151
|
+
"SELECT run_id FROM hooks_by_id WHERE hook_id = %s",
|
|
1152
|
+
consistency_level=self.read_consistency,
|
|
1153
|
+
),
|
|
1154
|
+
(hook_id,),
|
|
1155
|
+
).one()
|
|
1152
1156
|
|
|
1153
|
-
|
|
1154
|
-
|
|
1157
|
+
if not lookup:
|
|
1158
|
+
return
|
|
1159
|
+
run_id = lookup.run_id
|
|
1155
1160
|
|
|
1156
1161
|
received_at = datetime.now(UTC) if status == HookStatus.RECEIVED else None
|
|
1157
1162
|
|
|
@@ -1164,7 +1169,7 @@ class CassandraStorageBackend(StorageBackend):
|
|
|
1164
1169
|
""",
|
|
1165
1170
|
consistency_level=self.write_consistency,
|
|
1166
1171
|
),
|
|
1167
|
-
(status.value, payload, received_at,
|
|
1172
|
+
(status.value, payload, received_at, run_id, hook_id),
|
|
1168
1173
|
)
|
|
1169
1174
|
|
|
1170
1175
|
async def list_hooks(
|
pyworkflow/storage/dynamodb.py
CHANGED
|
@@ -722,9 +722,9 @@ class DynamoDBStorageBackend(StorageBackend):
|
|
|
722
722
|
async def create_hook(self, hook: Hook) -> None:
|
|
723
723
|
"""Create a hook record."""
|
|
724
724
|
async with self._get_client() as client:
|
|
725
|
-
# Main hook item
|
|
725
|
+
# Main hook item (composite key: run_id + hook_id)
|
|
726
726
|
item = {
|
|
727
|
-
"PK": f"HOOK#{hook.hook_id}",
|
|
727
|
+
"PK": f"HOOK#{hook.run_id}#{hook.hook_id}",
|
|
728
728
|
"SK": "#METADATA",
|
|
729
729
|
"entity_type": "hook",
|
|
730
730
|
"hook_id": hook.hook_id,
|
|
@@ -741,12 +741,13 @@ class DynamoDBStorageBackend(StorageBackend):
|
|
|
741
741
|
"GSI1SK": f"{hook.status.value}#{hook.created_at.isoformat()}",
|
|
742
742
|
}
|
|
743
743
|
|
|
744
|
-
# Token lookup item
|
|
744
|
+
# Token lookup item (stores run_id and hook_id for lookup)
|
|
745
745
|
token_item = {
|
|
746
746
|
"PK": f"TOKEN#{hook.token}",
|
|
747
|
-
"SK": f"HOOK#{hook.hook_id}",
|
|
747
|
+
"SK": f"HOOK#{hook.run_id}#{hook.hook_id}",
|
|
748
748
|
"entity_type": "hook_token",
|
|
749
749
|
"hook_id": hook.hook_id,
|
|
750
|
+
"run_id": hook.run_id,
|
|
750
751
|
}
|
|
751
752
|
|
|
752
753
|
# Write both items
|
|
@@ -759,16 +760,26 @@ class DynamoDBStorageBackend(StorageBackend):
|
|
|
759
760
|
Item=self._dict_to_item(token_item),
|
|
760
761
|
)
|
|
761
762
|
|
|
762
|
-
async def get_hook(self, hook_id: str) -> Hook | None:
|
|
763
|
-
"""Retrieve a hook by ID."""
|
|
763
|
+
async def get_hook(self, hook_id: str, run_id: str | None = None) -> Hook | None:
|
|
764
|
+
"""Retrieve a hook by ID (requires run_id for composite key)."""
|
|
764
765
|
async with self._get_client() as client:
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
766
|
+
if run_id:
|
|
767
|
+
response = await client.get_item(
|
|
768
|
+
TableName=self.table_name,
|
|
769
|
+
Key={
|
|
770
|
+
"PK": {"S": f"HOOK#{run_id}#{hook_id}"},
|
|
771
|
+
"SK": {"S": "#METADATA"},
|
|
772
|
+
},
|
|
773
|
+
)
|
|
774
|
+
else:
|
|
775
|
+
# Fallback: try old format without run_id
|
|
776
|
+
response = await client.get_item(
|
|
777
|
+
TableName=self.table_name,
|
|
778
|
+
Key={
|
|
779
|
+
"PK": {"S": f"HOOK#{hook_id}"},
|
|
780
|
+
"SK": {"S": "#METADATA"},
|
|
781
|
+
},
|
|
782
|
+
)
|
|
772
783
|
|
|
773
784
|
item = response.get("Item")
|
|
774
785
|
if not item:
|
|
@@ -779,7 +790,7 @@ class DynamoDBStorageBackend(StorageBackend):
|
|
|
779
790
|
async def get_hook_by_token(self, token: str) -> Hook | None:
|
|
780
791
|
"""Retrieve a hook by its token."""
|
|
781
792
|
async with self._get_client() as client:
|
|
782
|
-
# First get the hook_id from the token lookup item
|
|
793
|
+
# First get the hook_id and run_id from the token lookup item
|
|
783
794
|
response = await client.query(
|
|
784
795
|
TableName=self.table_name,
|
|
785
796
|
KeyConditionExpression="PK = :pk",
|
|
@@ -792,13 +803,16 @@ class DynamoDBStorageBackend(StorageBackend):
|
|
|
792
803
|
return None
|
|
793
804
|
|
|
794
805
|
hook_id = self._deserialize_value(items[0]["hook_id"])
|
|
795
|
-
|
|
806
|
+
run_id_attr = items[0].get("run_id")
|
|
807
|
+
run_id = self._deserialize_value(run_id_attr) if run_id_attr else None
|
|
808
|
+
return await self.get_hook(hook_id, run_id)
|
|
796
809
|
|
|
797
810
|
async def update_hook_status(
|
|
798
811
|
self,
|
|
799
812
|
hook_id: str,
|
|
800
813
|
status: HookStatus,
|
|
801
814
|
payload: str | None = None,
|
|
815
|
+
run_id: str | None = None,
|
|
802
816
|
) -> None:
|
|
803
817
|
"""Update hook status and optionally payload."""
|
|
804
818
|
async with self._get_client() as client:
|
|
@@ -814,10 +828,12 @@ class DynamoDBStorageBackend(StorageBackend):
|
|
|
814
828
|
update_expr += ", received_at = :received_at"
|
|
815
829
|
expr_values[":received_at"] = {"S": datetime.now(UTC).isoformat()}
|
|
816
830
|
|
|
831
|
+
pk = f"HOOK#{run_id}#{hook_id}" if run_id else f"HOOK#{hook_id}"
|
|
832
|
+
|
|
817
833
|
await client.update_item(
|
|
818
834
|
TableName=self.table_name,
|
|
819
835
|
Key={
|
|
820
|
-
"PK": {"S":
|
|
836
|
+
"PK": {"S": pk},
|
|
821
837
|
"SK": {"S": "#METADATA"},
|
|
822
838
|
},
|
|
823
839
|
UpdateExpression=update_expr,
|
pyworkflow/storage/file.py
CHANGED
|
@@ -464,7 +464,8 @@ class FileStorageBackend(StorageBackend):
|
|
|
464
464
|
|
|
465
465
|
async def create_hook(self, hook: Hook) -> None:
|
|
466
466
|
"""Create a hook record."""
|
|
467
|
-
|
|
467
|
+
# Use composite filename: run_id__hook_id.json (double underscore separator)
|
|
468
|
+
hook_file = self.hooks_dir / f"{hook.run_id}__{hook.hook_id}.json"
|
|
468
469
|
lock_file = self.locks_dir / "token_index.lock"
|
|
469
470
|
lock = FileLock(str(lock_file))
|
|
470
471
|
|
|
@@ -473,16 +474,25 @@ class FileStorageBackend(StorageBackend):
|
|
|
473
474
|
def _write() -> None:
|
|
474
475
|
with lock:
|
|
475
476
|
hook_file.write_text(json.dumps(data, indent=2))
|
|
476
|
-
# Update token index
|
|
477
|
+
# Update token index (stores run_id:hook_id as value)
|
|
477
478
|
index = self._load_token_index()
|
|
478
|
-
index[hook.token] = hook.hook_id
|
|
479
|
+
index[hook.token] = f"{hook.run_id}:{hook.hook_id}"
|
|
479
480
|
self._save_token_index(index)
|
|
480
481
|
|
|
481
482
|
await asyncio.to_thread(_write)
|
|
482
483
|
|
|
483
|
-
async def get_hook(self, hook_id: str) -> Hook | None:
|
|
484
|
-
"""Retrieve a hook by ID."""
|
|
485
|
-
|
|
484
|
+
async def get_hook(self, hook_id: str, run_id: str | None = None) -> Hook | None:
|
|
485
|
+
"""Retrieve a hook by ID (requires run_id for composite filename)."""
|
|
486
|
+
if run_id:
|
|
487
|
+
hook_file = self.hooks_dir / f"{run_id}__{hook_id}.json"
|
|
488
|
+
else:
|
|
489
|
+
# Fallback: try old format for backwards compat
|
|
490
|
+
hook_file = self.hooks_dir / f"{hook_id}.json"
|
|
491
|
+
if not hook_file.exists():
|
|
492
|
+
# Search for any file with this hook_id
|
|
493
|
+
for f in self.hooks_dir.glob(f"*__{hook_id}.json"):
|
|
494
|
+
hook_file = f
|
|
495
|
+
break
|
|
486
496
|
|
|
487
497
|
if not hook_file.exists():
|
|
488
498
|
return None
|
|
@@ -496,13 +506,18 @@ class FileStorageBackend(StorageBackend):
|
|
|
496
506
|
async def get_hook_by_token(self, token: str) -> Hook | None:
|
|
497
507
|
"""Retrieve a hook by its token."""
|
|
498
508
|
|
|
499
|
-
def _lookup() -> str | None:
|
|
509
|
+
def _lookup() -> tuple[str, str] | None:
|
|
500
510
|
index = self._load_token_index()
|
|
501
|
-
|
|
511
|
+
value = index.get(token)
|
|
512
|
+
if value and ":" in value:
|
|
513
|
+
parts = value.split(":", 1)
|
|
514
|
+
return (parts[0], parts[1])
|
|
515
|
+
return None
|
|
502
516
|
|
|
503
|
-
|
|
504
|
-
if
|
|
505
|
-
|
|
517
|
+
result = await asyncio.to_thread(_lookup)
|
|
518
|
+
if result:
|
|
519
|
+
run_id, hook_id = result
|
|
520
|
+
return await self.get_hook(hook_id, run_id)
|
|
506
521
|
return None
|
|
507
522
|
|
|
508
523
|
async def update_hook_status(
|
|
@@ -510,14 +525,25 @@ class FileStorageBackend(StorageBackend):
|
|
|
510
525
|
hook_id: str,
|
|
511
526
|
status: HookStatus,
|
|
512
527
|
payload: str | None = None,
|
|
528
|
+
run_id: str | None = None,
|
|
513
529
|
) -> None:
|
|
514
530
|
"""Update hook status and optionally payload."""
|
|
515
|
-
|
|
531
|
+
if run_id:
|
|
532
|
+
hook_file = self.hooks_dir / f"{run_id}__{hook_id}.json"
|
|
533
|
+
else:
|
|
534
|
+
# Fallback: try old format
|
|
535
|
+
hook_file = self.hooks_dir / f"{hook_id}.json"
|
|
536
|
+
if not hook_file.exists():
|
|
537
|
+
# Search for any file with this hook_id
|
|
538
|
+
for f in self.hooks_dir.glob(f"*__{hook_id}.json"):
|
|
539
|
+
hook_file = f
|
|
540
|
+
break
|
|
516
541
|
|
|
517
542
|
if not hook_file.exists():
|
|
518
543
|
raise ValueError(f"Hook {hook_id} not found")
|
|
519
544
|
|
|
520
|
-
|
|
545
|
+
safe_hook_id = hook_id.replace("/", "_").replace(":", "_")
|
|
546
|
+
lock_file = self.locks_dir / f"hook_{safe_hook_id}.lock"
|
|
521
547
|
lock = FileLock(str(lock_file))
|
|
522
548
|
|
|
523
549
|
def _update() -> None:
|