horsies 0.1.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- horsies/__init__.py +117 -0
- horsies/core/__init__.py +0 -0
- horsies/core/app.py +552 -0
- horsies/core/banner.py +144 -0
- horsies/core/brokers/__init__.py +5 -0
- horsies/core/brokers/listener.py +444 -0
- horsies/core/brokers/postgres.py +993 -0
- horsies/core/cli.py +624 -0
- horsies/core/codec/serde.py +596 -0
- horsies/core/errors.py +535 -0
- horsies/core/logging.py +90 -0
- horsies/core/models/__init__.py +0 -0
- horsies/core/models/app.py +268 -0
- horsies/core/models/broker.py +79 -0
- horsies/core/models/queues.py +23 -0
- horsies/core/models/recovery.py +101 -0
- horsies/core/models/schedule.py +229 -0
- horsies/core/models/task_pg.py +307 -0
- horsies/core/models/tasks.py +358 -0
- horsies/core/models/workflow.py +1990 -0
- horsies/core/models/workflow_pg.py +245 -0
- horsies/core/registry/tasks.py +101 -0
- horsies/core/scheduler/__init__.py +26 -0
- horsies/core/scheduler/calculator.py +267 -0
- horsies/core/scheduler/service.py +569 -0
- horsies/core/scheduler/state.py +260 -0
- horsies/core/task_decorator.py +656 -0
- horsies/core/types/status.py +38 -0
- horsies/core/utils/imports.py +203 -0
- horsies/core/utils/loop_runner.py +44 -0
- horsies/core/worker/current.py +17 -0
- horsies/core/worker/worker.py +1967 -0
- horsies/core/workflows/__init__.py +23 -0
- horsies/core/workflows/engine.py +2344 -0
- horsies/core/workflows/recovery.py +501 -0
- horsies/core/workflows/registry.py +97 -0
- horsies/py.typed +0 -0
- horsies-0.1.0a4.dist-info/METADATA +35 -0
- horsies-0.1.0a4.dist-info/RECORD +42 -0
- horsies-0.1.0a4.dist-info/WHEEL +5 -0
- horsies-0.1.0a4.dist-info/entry_points.txt +2 -0
- horsies-0.1.0a4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2344 @@
|
|
|
1
|
+
"""Workflow execution engine.
|
|
2
|
+
|
|
3
|
+
This module handles workflow lifecycle:
|
|
4
|
+
- Starting workflows
|
|
5
|
+
- Enqueuing tasks based on DAG dependencies
|
|
6
|
+
- Handling task completion and dependency resolution
|
|
7
|
+
- Workflow completion detection
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import uuid
|
|
13
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
14
|
+
|
|
15
|
+
from sqlalchemy import text
|
|
16
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
17
|
+
|
|
18
|
+
from horsies.core.codec.serde import dumps_json, loads_json, task_result_from_json
|
|
19
|
+
from horsies.core.logging import get_logger
|
|
20
|
+
from horsies.core.models.workflow import (
|
|
21
|
+
WorkflowHandle,
|
|
22
|
+
TaskNode,
|
|
23
|
+
SubWorkflowNode,
|
|
24
|
+
SubWorkflowSummary,
|
|
25
|
+
WorkflowStatus,
|
|
26
|
+
WorkflowTaskStatus,
|
|
27
|
+
WorkflowDefinition,
|
|
28
|
+
AnyNode,
|
|
29
|
+
WORKFLOW_TASK_TERMINAL_STATES,
|
|
30
|
+
)
|
|
31
|
+
from horsies.core.errors import WorkflowValidationError, ErrorCode
|
|
32
|
+
|
|
33
|
+
logger = get_logger('workflow.engine')
|
|
34
|
+
|
|
35
|
+
_WF_TASK_TERMINAL_VALUES: list[str] = [s.value for s in WORKFLOW_TASK_TERMINAL_STATES]
|
|
36
|
+
|
|
37
|
+
if TYPE_CHECKING:
|
|
38
|
+
from horsies.core.models.workflow import WorkflowSpec
|
|
39
|
+
from horsies.core.brokers.postgres import PostgresBroker
|
|
40
|
+
from horsies.core.models.tasks import TaskResult, TaskError
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _as_str_list(value: object) -> list[str]:
|
|
44
|
+
if not isinstance(value, list):
|
|
45
|
+
return []
|
|
46
|
+
str_items: list[str] = []
|
|
47
|
+
for item in cast(list[object], value):
|
|
48
|
+
if not isinstance(item, str):
|
|
49
|
+
return []
|
|
50
|
+
str_items.append(item)
|
|
51
|
+
return str_items
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def start_workflow_async(
|
|
55
|
+
spec: 'WorkflowSpec',
|
|
56
|
+
broker: 'PostgresBroker',
|
|
57
|
+
workflow_id: str | None = None,
|
|
58
|
+
) -> WorkflowHandle:
|
|
59
|
+
"""
|
|
60
|
+
Start a workflow asynchronously.
|
|
61
|
+
|
|
62
|
+
Creates workflow and workflow_tasks records, then enqueues root tasks.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
spec: The workflow specification
|
|
66
|
+
broker: PostgreSQL broker for database operations
|
|
67
|
+
workflow_id: Optional custom workflow ID
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
WorkflowHandle for tracking and retrieving results
|
|
71
|
+
|
|
72
|
+
Notes:
|
|
73
|
+
If workflow_id is provided and already exists, returns existing handle
|
|
74
|
+
(idempotent operation).
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: If WorkflowSpec was not created via app.workflow() (missing
|
|
78
|
+
queue/priority resolution).
|
|
79
|
+
"""
|
|
80
|
+
# Validate that WorkflowSpec was created via app.workflow()
|
|
81
|
+
# TaskNodes must have resolved queue and priority (SubWorkflowNodes don't have these)
|
|
82
|
+
for node in spec.tasks:
|
|
83
|
+
if isinstance(node, SubWorkflowNode):
|
|
84
|
+
continue # SubWorkflowNodes don't have queue/priority
|
|
85
|
+
task = node
|
|
86
|
+
if task.queue is None:
|
|
87
|
+
raise WorkflowValidationError(
|
|
88
|
+
message='TaskNode has unresolved queue',
|
|
89
|
+
code=ErrorCode.WORKFLOW_UNRESOLVED_QUEUE,
|
|
90
|
+
notes=[f"TaskNode '{task.name}' has queue=None"],
|
|
91
|
+
help_text='use app.workflow() to create WorkflowSpec with proper validation',
|
|
92
|
+
)
|
|
93
|
+
if task.priority is None:
|
|
94
|
+
raise WorkflowValidationError(
|
|
95
|
+
message='TaskNode has unresolved priority',
|
|
96
|
+
code=ErrorCode.WORKFLOW_UNRESOLVED_PRIORITY,
|
|
97
|
+
notes=[f"TaskNode '{task.name}' has priority=None"],
|
|
98
|
+
help_text='use app.workflow() to create WorkflowSpec with proper validation',
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
wf_id = workflow_id or str(uuid.uuid4())
|
|
102
|
+
|
|
103
|
+
await broker.ensure_schema_initialized()
|
|
104
|
+
|
|
105
|
+
async with broker.session_factory() as session:
|
|
106
|
+
# Check if workflow already exists (idempotent start)
|
|
107
|
+
if workflow_id:
|
|
108
|
+
existing = await session.execute(
|
|
109
|
+
text('SELECT id FROM horsies_workflows WHERE id = :wf_id'),
|
|
110
|
+
{'wf_id': wf_id},
|
|
111
|
+
)
|
|
112
|
+
if existing.fetchone():
|
|
113
|
+
logger.warning(
|
|
114
|
+
f'Workflow {wf_id} already exists, returning existing handle'
|
|
115
|
+
)
|
|
116
|
+
return WorkflowHandle(workflow_id=wf_id, broker=broker)
|
|
117
|
+
|
|
118
|
+
# 1. Insert workflow record
|
|
119
|
+
output_index = spec.output.index if spec.output else None
|
|
120
|
+
|
|
121
|
+
# Serialize success_policy to index-based format
|
|
122
|
+
success_policy_json: dict[str, Any] | None = None
|
|
123
|
+
if spec.success_policy is not None:
|
|
124
|
+
success_policy_json = {
|
|
125
|
+
'cases': [
|
|
126
|
+
{
|
|
127
|
+
'required_indices': [
|
|
128
|
+
t.index for t in case.required if t.index is not None
|
|
129
|
+
]
|
|
130
|
+
}
|
|
131
|
+
for case in spec.success_policy.cases
|
|
132
|
+
],
|
|
133
|
+
}
|
|
134
|
+
if spec.success_policy.optional:
|
|
135
|
+
success_policy_json['optional_indices'] = [
|
|
136
|
+
t.index for t in spec.success_policy.optional if t.index is not None
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
await session.execute(
|
|
140
|
+
text("""
|
|
141
|
+
INSERT INTO horsies_workflows (id, name, status, on_error, output_task_index,
|
|
142
|
+
success_policy, workflow_def_module, workflow_def_qualname,
|
|
143
|
+
depth, root_workflow_id,
|
|
144
|
+
created_at, started_at, updated_at)
|
|
145
|
+
VALUES (:id, :name, 'RUNNING', :on_error, :output_idx,
|
|
146
|
+
:success_policy, :wf_module, :wf_qualname,
|
|
147
|
+
0, :id,
|
|
148
|
+
NOW(), NOW(), NOW())
|
|
149
|
+
"""),
|
|
150
|
+
{
|
|
151
|
+
'id': wf_id,
|
|
152
|
+
'name': spec.name,
|
|
153
|
+
'on_error': spec.on_error.value,
|
|
154
|
+
'output_idx': output_index,
|
|
155
|
+
'success_policy': dumps_json(success_policy_json)
|
|
156
|
+
if success_policy_json
|
|
157
|
+
else None,
|
|
158
|
+
'wf_module': spec.workflow_def_module,
|
|
159
|
+
'wf_qualname': spec.workflow_def_qualname,
|
|
160
|
+
},
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# 2. Insert all workflow_tasks
|
|
164
|
+
for node in spec.tasks:
|
|
165
|
+
dep_indices = [d.index for d in node.waits_for if d.index is not None]
|
|
166
|
+
args_from_indices = {
|
|
167
|
+
k: v.index for k, v in node.args_from.items() if v.index is not None
|
|
168
|
+
}
|
|
169
|
+
ctx_from_ids = (
|
|
170
|
+
[n.node_id for n in node.workflow_ctx_from if n.node_id is not None]
|
|
171
|
+
if node.workflow_ctx_from
|
|
172
|
+
else None
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
wt_id = str(uuid.uuid4())
|
|
176
|
+
|
|
177
|
+
if isinstance(node, SubWorkflowNode):
|
|
178
|
+
# SubWorkflowNode: no fn, queue, priority, good_until
|
|
179
|
+
await session.execute(
|
|
180
|
+
text("""
|
|
181
|
+
INSERT INTO horsies_workflow_tasks
|
|
182
|
+
(id, workflow_id, task_index, node_id, task_name, task_args, task_kwargs,
|
|
183
|
+
queue_name, priority, dependencies, args_from, workflow_ctx_from,
|
|
184
|
+
allow_failed_deps, join_type, min_success, task_options, status,
|
|
185
|
+
is_subworkflow, sub_workflow_name, sub_workflow_retry_mode,
|
|
186
|
+
sub_workflow_module, sub_workflow_qualname, created_at)
|
|
187
|
+
VALUES (:id, :wf_id, :idx, :node_id, :name, :args, :kwargs, :queue, :priority,
|
|
188
|
+
:deps, :args_from, :ctx_from, :allow_failed, :join_type, :min_success,
|
|
189
|
+
:task_options, :status, TRUE, :sub_wf_name, :sub_wf_retry_mode,
|
|
190
|
+
:sub_wf_module, :sub_wf_qualname, NOW())
|
|
191
|
+
"""),
|
|
192
|
+
{
|
|
193
|
+
'id': wt_id,
|
|
194
|
+
'wf_id': wf_id,
|
|
195
|
+
'idx': node.index,
|
|
196
|
+
'node_id': node.node_id,
|
|
197
|
+
'name': node.name,
|
|
198
|
+
'args': dumps_json(node.args),
|
|
199
|
+
'kwargs': dumps_json(node.kwargs),
|
|
200
|
+
'queue': 'default', # SubWorkflowNode doesn't have queue
|
|
201
|
+
'priority': 100, # SubWorkflowNode doesn't have priority
|
|
202
|
+
'deps': dep_indices,
|
|
203
|
+
'args_from': dumps_json(args_from_indices)
|
|
204
|
+
if args_from_indices
|
|
205
|
+
else None,
|
|
206
|
+
'ctx_from': ctx_from_ids,
|
|
207
|
+
'allow_failed': node.allow_failed_deps,
|
|
208
|
+
'join_type': node.join,
|
|
209
|
+
'min_success': node.min_success,
|
|
210
|
+
'task_options': None,
|
|
211
|
+
'status': 'PENDING' if dep_indices else 'READY',
|
|
212
|
+
'sub_wf_name': node.workflow_def.name,
|
|
213
|
+
'sub_wf_retry_mode': node.retry_mode.value,
|
|
214
|
+
'sub_wf_module': node.workflow_def.__module__,
|
|
215
|
+
'sub_wf_qualname': node.workflow_def.__qualname__,
|
|
216
|
+
},
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
# TaskNode: has fn, queue, priority, good_until
|
|
220
|
+
task = node
|
|
221
|
+
|
|
222
|
+
# Get task_options_json from the task function (set by @task decorator)
|
|
223
|
+
# and merge in TaskNode.good_until if set
|
|
224
|
+
task_options_json: str | None = getattr(
|
|
225
|
+
task.fn, 'task_options_json', None
|
|
226
|
+
)
|
|
227
|
+
if task.good_until is not None:
|
|
228
|
+
# Merge good_until from TaskNode into task_options
|
|
229
|
+
base_options: dict[str, Any] = {}
|
|
230
|
+
if task_options_json:
|
|
231
|
+
parsed = loads_json(task_options_json)
|
|
232
|
+
if isinstance(parsed, dict):
|
|
233
|
+
base_options = parsed
|
|
234
|
+
base_options['good_until'] = task.good_until.isoformat()
|
|
235
|
+
task_options_json = dumps_json(base_options)
|
|
236
|
+
|
|
237
|
+
await session.execute(
|
|
238
|
+
text("""
|
|
239
|
+
INSERT INTO horsies_workflow_tasks
|
|
240
|
+
(id, workflow_id, task_index, node_id, task_name, task_args, task_kwargs,
|
|
241
|
+
queue_name, priority, dependencies, args_from, workflow_ctx_from,
|
|
242
|
+
allow_failed_deps, join_type, min_success, task_options, status,
|
|
243
|
+
is_subworkflow, created_at)
|
|
244
|
+
VALUES (:id, :wf_id, :idx, :node_id, :name, :args, :kwargs, :queue, :priority,
|
|
245
|
+
:deps, :args_from, :ctx_from, :allow_failed, :join_type, :min_success,
|
|
246
|
+
:task_options, :status, FALSE, NOW())
|
|
247
|
+
"""),
|
|
248
|
+
{
|
|
249
|
+
'id': wt_id,
|
|
250
|
+
'wf_id': wf_id,
|
|
251
|
+
'idx': task.index,
|
|
252
|
+
'node_id': task.node_id,
|
|
253
|
+
'name': task.name,
|
|
254
|
+
'args': dumps_json(task.args),
|
|
255
|
+
'kwargs': dumps_json(task.kwargs),
|
|
256
|
+
# Queue: use override, else task's declared queue, else "default"
|
|
257
|
+
'queue': task.queue
|
|
258
|
+
or getattr(task.fn, 'task_queue_name', None)
|
|
259
|
+
or 'default',
|
|
260
|
+
# Priority: use override, else default
|
|
261
|
+
'priority': task.priority if task.priority is not None else 100,
|
|
262
|
+
'deps': dep_indices,
|
|
263
|
+
'args_from': dumps_json(args_from_indices)
|
|
264
|
+
if args_from_indices
|
|
265
|
+
else None,
|
|
266
|
+
'ctx_from': ctx_from_ids,
|
|
267
|
+
'allow_failed': task.allow_failed_deps,
|
|
268
|
+
'join_type': task.join,
|
|
269
|
+
'min_success': task.min_success,
|
|
270
|
+
'task_options': task_options_json,
|
|
271
|
+
'status': 'PENDING' if dep_indices else 'READY',
|
|
272
|
+
},
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# 3. Enqueue root tasks (no dependencies)
|
|
276
|
+
root_nodes = [t for t in spec.tasks if not t.waits_for]
|
|
277
|
+
for root_node in root_nodes:
|
|
278
|
+
if root_node.index is not None:
|
|
279
|
+
if isinstance(root_node, SubWorkflowNode):
|
|
280
|
+
# Start child workflow
|
|
281
|
+
await _enqueue_subworkflow_task(
|
|
282
|
+
session, broker, wf_id, root_node.index, {}, 0, wf_id
|
|
283
|
+
)
|
|
284
|
+
else:
|
|
285
|
+
# Enqueue regular task
|
|
286
|
+
await _enqueue_workflow_task(session, wf_id, root_node.index, {})
|
|
287
|
+
|
|
288
|
+
await session.commit()
|
|
289
|
+
|
|
290
|
+
return WorkflowHandle(workflow_id=wf_id, broker=broker)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def start_workflow(
|
|
294
|
+
spec: 'WorkflowSpec',
|
|
295
|
+
broker: 'PostgresBroker',
|
|
296
|
+
workflow_id: str | None = None,
|
|
297
|
+
) -> WorkflowHandle:
|
|
298
|
+
"""
|
|
299
|
+
Start a workflow synchronously.
|
|
300
|
+
|
|
301
|
+
Sync wrapper around start_workflow_async().
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
spec: The workflow specification
|
|
305
|
+
broker: PostgreSQL broker for database operations
|
|
306
|
+
workflow_id: Optional custom workflow ID
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
WorkflowHandle for tracking and retrieving results
|
|
310
|
+
"""
|
|
311
|
+
from horsies.core.utils.loop_runner import LoopRunner
|
|
312
|
+
|
|
313
|
+
runner = LoopRunner()
|
|
314
|
+
try:
|
|
315
|
+
return runner.call(start_workflow_async, spec, broker, workflow_id)
|
|
316
|
+
finally:
|
|
317
|
+
runner.stop()
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
async def pause_workflow(
|
|
321
|
+
broker: 'PostgresBroker',
|
|
322
|
+
workflow_id: str,
|
|
323
|
+
) -> bool:
|
|
324
|
+
"""
|
|
325
|
+
Pause a running workflow.
|
|
326
|
+
|
|
327
|
+
Transitions workflow from RUNNING to PAUSED state. Already-running tasks
|
|
328
|
+
will continue to completion, but:
|
|
329
|
+
- No new PENDING tasks will become READY
|
|
330
|
+
- No READY tasks will be enqueued
|
|
331
|
+
- Workflow completion check is skipped while PAUSED
|
|
332
|
+
|
|
333
|
+
Also cascades pause to all running child workflows (iteratively, not recursively).
|
|
334
|
+
|
|
335
|
+
Use resume_workflow() to continue execution.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
broker: PostgreSQL broker for database operations
|
|
339
|
+
workflow_id: The workflow ID to pause
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
True if workflow was paused, False if not RUNNING (no-op)
|
|
343
|
+
"""
|
|
344
|
+
async with broker.session_factory() as session:
|
|
345
|
+
result = await session.execute(
|
|
346
|
+
text("""
|
|
347
|
+
UPDATE horsies_workflows
|
|
348
|
+
SET status = 'PAUSED', updated_at = NOW()
|
|
349
|
+
WHERE id = :wf_id AND status = 'RUNNING'
|
|
350
|
+
RETURNING id
|
|
351
|
+
"""),
|
|
352
|
+
{'wf_id': workflow_id},
|
|
353
|
+
)
|
|
354
|
+
row = result.fetchone()
|
|
355
|
+
if row is None:
|
|
356
|
+
return False
|
|
357
|
+
|
|
358
|
+
# Cascade pause to running child workflows (iterative BFS)
|
|
359
|
+
await _cascade_pause_to_children(session, workflow_id)
|
|
360
|
+
|
|
361
|
+
# Notify clients of pause (so get() returns immediately with WORKFLOW_PAUSED)
|
|
362
|
+
await session.execute(
|
|
363
|
+
text("SELECT pg_notify('workflow_done', :wf_id)"),
|
|
364
|
+
{'wf_id': workflow_id},
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
await session.commit()
|
|
368
|
+
return True
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
async def _cascade_pause_to_children(
|
|
372
|
+
session: AsyncSession,
|
|
373
|
+
workflow_id: str,
|
|
374
|
+
) -> None:
|
|
375
|
+
"""
|
|
376
|
+
Iteratively pause all running child workflows using BFS.
|
|
377
|
+
Avoids deep recursion for deeply nested workflows.
|
|
378
|
+
"""
|
|
379
|
+
queue = [workflow_id]
|
|
380
|
+
|
|
381
|
+
while queue:
|
|
382
|
+
current_id = queue.pop(0)
|
|
383
|
+
|
|
384
|
+
# Find running child workflows
|
|
385
|
+
children = await session.execute(
|
|
386
|
+
text("""
|
|
387
|
+
SELECT id FROM horsies_workflows
|
|
388
|
+
WHERE parent_workflow_id = :wf_id AND status = 'RUNNING'
|
|
389
|
+
"""),
|
|
390
|
+
{'wf_id': current_id},
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
for child_row in children.fetchall():
|
|
394
|
+
child_id = child_row[0]
|
|
395
|
+
|
|
396
|
+
# Pause child
|
|
397
|
+
await session.execute(
|
|
398
|
+
text("""
|
|
399
|
+
UPDATE horsies_workflows
|
|
400
|
+
SET status = 'PAUSED', updated_at = NOW()
|
|
401
|
+
WHERE id = :wf_id AND status = 'RUNNING'
|
|
402
|
+
"""),
|
|
403
|
+
{'wf_id': child_id},
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Notify of child pause
|
|
407
|
+
await session.execute(
|
|
408
|
+
text("SELECT pg_notify('workflow_done', :wf_id)"),
|
|
409
|
+
{'wf_id': child_id},
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Add to queue to pause its children
|
|
413
|
+
queue.append(child_id)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def pause_workflow_sync(
|
|
417
|
+
broker: 'PostgresBroker',
|
|
418
|
+
workflow_id: str,
|
|
419
|
+
) -> bool:
|
|
420
|
+
"""
|
|
421
|
+
Pause a running workflow synchronously.
|
|
422
|
+
|
|
423
|
+
Sync wrapper around pause_workflow().
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
broker: PostgreSQL broker for database operations
|
|
427
|
+
workflow_id: The workflow ID to pause
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
True if workflow was paused, False if not RUNNING (no-op)
|
|
431
|
+
"""
|
|
432
|
+
from horsies.core.utils.loop_runner import LoopRunner
|
|
433
|
+
|
|
434
|
+
runner = LoopRunner()
|
|
435
|
+
try:
|
|
436
|
+
return runner.call(pause_workflow, broker, workflow_id)
|
|
437
|
+
finally:
|
|
438
|
+
runner.stop()
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
async def resume_workflow(
|
|
442
|
+
broker: 'PostgresBroker',
|
|
443
|
+
workflow_id: str,
|
|
444
|
+
) -> bool:
|
|
445
|
+
"""
|
|
446
|
+
Resume a paused workflow.
|
|
447
|
+
|
|
448
|
+
Re-evaluates all PENDING tasks (marks READY if deps are terminal) and
|
|
449
|
+
enqueues all READY tasks. Only works if workflow is currently PAUSED.
|
|
450
|
+
|
|
451
|
+
Also cascades resume to all paused child workflows (iteratively, not recursively).
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
broker: PostgreSQL broker for database operations
|
|
455
|
+
workflow_id: The workflow ID to resume
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
True if workflow was resumed, False if not PAUSED (no-op)
|
|
459
|
+
"""
|
|
460
|
+
async with broker.session_factory() as session:
|
|
461
|
+
# 1. Transition PAUSED → RUNNING (only if currently PAUSED)
|
|
462
|
+
result = await session.execute(
|
|
463
|
+
text("""
|
|
464
|
+
UPDATE horsies_workflows
|
|
465
|
+
SET status = 'RUNNING', updated_at = NOW()
|
|
466
|
+
WHERE id = :wf_id AND status = 'PAUSED'
|
|
467
|
+
RETURNING id, depth, root_workflow_id
|
|
468
|
+
"""),
|
|
469
|
+
{'wf_id': workflow_id},
|
|
470
|
+
)
|
|
471
|
+
row = result.fetchone()
|
|
472
|
+
if row is None:
|
|
473
|
+
# Not PAUSED, no-op
|
|
474
|
+
return False
|
|
475
|
+
|
|
476
|
+
depth = row[1] or 0
|
|
477
|
+
root_wf_id = row[2] or workflow_id
|
|
478
|
+
|
|
479
|
+
# 2. Find all PENDING tasks and try to make them READY
|
|
480
|
+
pending_result = await session.execute(
|
|
481
|
+
text("""
|
|
482
|
+
SELECT task_index FROM horsies_workflow_tasks
|
|
483
|
+
WHERE workflow_id = :wf_id AND status = 'PENDING'
|
|
484
|
+
"""),
|
|
485
|
+
{'wf_id': workflow_id},
|
|
486
|
+
)
|
|
487
|
+
pending_indices = [r[0] for r in pending_result.fetchall()]
|
|
488
|
+
|
|
489
|
+
for task_index in pending_indices:
|
|
490
|
+
await _try_make_ready_and_enqueue(
|
|
491
|
+
session, broker, workflow_id, task_index, depth, root_wf_id
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
# 3. Find all READY tasks and enqueue them
|
|
495
|
+
# (These may be tasks that were READY at pause time, or tasks that
|
|
496
|
+
# couldn't be enqueued during step 2 due to failed deps check)
|
|
497
|
+
ready_result = await session.execute(
|
|
498
|
+
text("""
|
|
499
|
+
SELECT task_index, dependencies, is_subworkflow FROM horsies_workflow_tasks
|
|
500
|
+
WHERE workflow_id = :wf_id AND status = 'READY'
|
|
501
|
+
"""),
|
|
502
|
+
{'wf_id': workflow_id},
|
|
503
|
+
)
|
|
504
|
+
ready_tasks = ready_result.fetchall()
|
|
505
|
+
|
|
506
|
+
for task_index, dependencies, is_subworkflow in ready_tasks:
|
|
507
|
+
# Fetch dependency results for this task
|
|
508
|
+
dep_indices: list[int] = (
|
|
509
|
+
cast(list[int], dependencies) if isinstance(dependencies, list) else []
|
|
510
|
+
)
|
|
511
|
+
dep_results = await _get_dependency_results(
|
|
512
|
+
session, workflow_id, dep_indices
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
if is_subworkflow:
|
|
516
|
+
await _enqueue_subworkflow_task(
|
|
517
|
+
session,
|
|
518
|
+
broker,
|
|
519
|
+
workflow_id,
|
|
520
|
+
task_index,
|
|
521
|
+
dep_results,
|
|
522
|
+
depth,
|
|
523
|
+
root_wf_id,
|
|
524
|
+
)
|
|
525
|
+
else:
|
|
526
|
+
await _enqueue_workflow_task(
|
|
527
|
+
session, workflow_id, task_index, dep_results
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
# 4. Cascade resume to paused child workflows
|
|
531
|
+
await _cascade_resume_to_children(session, broker, workflow_id)
|
|
532
|
+
|
|
533
|
+
await session.commit()
|
|
534
|
+
return True
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
async def _cascade_resume_to_children(
|
|
538
|
+
session: AsyncSession,
|
|
539
|
+
broker: 'PostgresBroker',
|
|
540
|
+
workflow_id: str,
|
|
541
|
+
) -> None:
|
|
542
|
+
"""
|
|
543
|
+
Iteratively resume all paused child workflows using BFS.
|
|
544
|
+
Avoids deep recursion for deeply nested workflows.
|
|
545
|
+
"""
|
|
546
|
+
queue = [workflow_id]
|
|
547
|
+
|
|
548
|
+
while queue:
|
|
549
|
+
current_id = queue.pop(0)
|
|
550
|
+
|
|
551
|
+
# Find paused child workflows
|
|
552
|
+
children = await session.execute(
|
|
553
|
+
text("""
|
|
554
|
+
SELECT id, depth, root_workflow_id FROM horsies_workflows
|
|
555
|
+
WHERE parent_workflow_id = :wf_id AND status = 'PAUSED'
|
|
556
|
+
"""),
|
|
557
|
+
{'wf_id': current_id},
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
for child_row in children.fetchall():
|
|
561
|
+
child_id = child_row[0]
|
|
562
|
+
child_depth = child_row[1] or 0
|
|
563
|
+
child_root = child_row[2] or child_id
|
|
564
|
+
|
|
565
|
+
# Resume child
|
|
566
|
+
await session.execute(
|
|
567
|
+
text("""
|
|
568
|
+
UPDATE horsies_workflows
|
|
569
|
+
SET status = 'RUNNING', updated_at = NOW()
|
|
570
|
+
WHERE id = :wf_id AND status = 'PAUSED'
|
|
571
|
+
"""),
|
|
572
|
+
{'wf_id': child_id},
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
# Re-evaluate and enqueue child's PENDING/READY tasks
|
|
576
|
+
child_pending = await session.execute(
|
|
577
|
+
text("""
|
|
578
|
+
SELECT task_index FROM horsies_workflow_tasks
|
|
579
|
+
WHERE workflow_id = :wf_id AND status = 'PENDING'
|
|
580
|
+
"""),
|
|
581
|
+
{'wf_id': child_id},
|
|
582
|
+
)
|
|
583
|
+
for pending_row in child_pending.fetchall():
|
|
584
|
+
await _try_make_ready_and_enqueue(
|
|
585
|
+
session, broker, child_id, pending_row[0], child_depth, child_root
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
child_ready = await session.execute(
|
|
589
|
+
text("""
|
|
590
|
+
SELECT task_index, dependencies, is_subworkflow FROM horsies_workflow_tasks
|
|
591
|
+
WHERE workflow_id = :wf_id AND status = 'READY'
|
|
592
|
+
"""),
|
|
593
|
+
{'wf_id': child_id},
|
|
594
|
+
)
|
|
595
|
+
for ready_row in child_ready.fetchall():
|
|
596
|
+
task_idx = ready_row[0]
|
|
597
|
+
deps = ready_row[1]
|
|
598
|
+
is_sub = ready_row[2]
|
|
599
|
+
dep_indices: list[int] = (
|
|
600
|
+
cast(list[int], deps) if isinstance(deps, list) else []
|
|
601
|
+
)
|
|
602
|
+
dep_res = await _get_dependency_results(session, child_id, dep_indices)
|
|
603
|
+
|
|
604
|
+
if is_sub:
|
|
605
|
+
await _enqueue_subworkflow_task(
|
|
606
|
+
session,
|
|
607
|
+
broker,
|
|
608
|
+
child_id,
|
|
609
|
+
task_idx,
|
|
610
|
+
dep_res,
|
|
611
|
+
child_depth,
|
|
612
|
+
child_root,
|
|
613
|
+
)
|
|
614
|
+
else:
|
|
615
|
+
await _enqueue_workflow_task(session, child_id, task_idx, dep_res)
|
|
616
|
+
|
|
617
|
+
# Add to queue to resume its children
|
|
618
|
+
queue.append(child_id)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def resume_workflow_sync(
|
|
622
|
+
broker: 'PostgresBroker',
|
|
623
|
+
workflow_id: str,
|
|
624
|
+
) -> bool:
|
|
625
|
+
"""
|
|
626
|
+
Resume a paused workflow synchronously.
|
|
627
|
+
|
|
628
|
+
Sync wrapper around resume_workflow().
|
|
629
|
+
|
|
630
|
+
Args:
|
|
631
|
+
broker: PostgreSQL broker for database operations
|
|
632
|
+
workflow_id: The workflow ID to resume
|
|
633
|
+
|
|
634
|
+
Returns:
|
|
635
|
+
True if workflow was resumed, False if not PAUSED (no-op)
|
|
636
|
+
"""
|
|
637
|
+
from horsies.core.utils.loop_runner import LoopRunner
|
|
638
|
+
|
|
639
|
+
runner = LoopRunner()
|
|
640
|
+
try:
|
|
641
|
+
return runner.call(resume_workflow, broker, workflow_id)
|
|
642
|
+
finally:
|
|
643
|
+
runner.stop()
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
async def _enqueue_workflow_task(
|
|
647
|
+
session: AsyncSession,
|
|
648
|
+
workflow_id: str,
|
|
649
|
+
task_index: int,
|
|
650
|
+
all_dep_results: dict[int, 'TaskResult[Any, TaskError]'],
|
|
651
|
+
) -> str | None:
|
|
652
|
+
"""
|
|
653
|
+
Enqueue a single workflow task.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
session: Database session
|
|
657
|
+
workflow_id: Workflow ID
|
|
658
|
+
task_index: Task index to enqueue
|
|
659
|
+
all_dep_results: Results from ALL completed dependencies (by index)
|
|
660
|
+
|
|
661
|
+
Returns:
|
|
662
|
+
task_id if enqueued, None if already enqueued, not ready, or workflow not RUNNING.
|
|
663
|
+
"""
|
|
664
|
+
# Atomic: READY → ENQUEUED only if still READY AND workflow is RUNNING
|
|
665
|
+
# PAUSE guard: JOIN with workflows ensures we don't enqueue while paused
|
|
666
|
+
result = await session.execute(
|
|
667
|
+
text("""
|
|
668
|
+
UPDATE horsies_workflow_tasks wt
|
|
669
|
+
SET status = 'ENQUEUED', started_at = NOW()
|
|
670
|
+
FROM horsies_workflows w
|
|
671
|
+
WHERE wt.workflow_id = :wf_id
|
|
672
|
+
AND wt.task_index = :idx
|
|
673
|
+
AND wt.status = 'READY'
|
|
674
|
+
AND w.id = wt.workflow_id
|
|
675
|
+
AND w.status = 'RUNNING'
|
|
676
|
+
RETURNING wt.id, wt.task_name, wt.task_args, wt.task_kwargs, wt.queue_name, wt.priority,
|
|
677
|
+
wt.args_from, wt.workflow_ctx_from, wt.task_options
|
|
678
|
+
"""),
|
|
679
|
+
{'wf_id': workflow_id, 'idx': task_index},
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
row = result.fetchone()
|
|
683
|
+
if row is None:
|
|
684
|
+
return None # Already enqueued, not ready, or workflow not RUNNING
|
|
685
|
+
|
|
686
|
+
# Parse retry config and good_until from task_options (row[8])
|
|
687
|
+
task_options_str: str | None = row[8]
|
|
688
|
+
max_retries = 0
|
|
689
|
+
good_until_str: str | None = None
|
|
690
|
+
if task_options_str:
|
|
691
|
+
try:
|
|
692
|
+
options_data = loads_json(task_options_str)
|
|
693
|
+
if isinstance(options_data, dict):
|
|
694
|
+
retry_policy = options_data.get('retry_policy')
|
|
695
|
+
if isinstance(retry_policy, dict):
|
|
696
|
+
max_retries = retry_policy.get('max_retries', 3)
|
|
697
|
+
# Extract good_until if present
|
|
698
|
+
good_until_raw = options_data.get('good_until')
|
|
699
|
+
if good_until_raw is not None:
|
|
700
|
+
good_until_str = str(good_until_raw)
|
|
701
|
+
except Exception:
|
|
702
|
+
pass
|
|
703
|
+
|
|
704
|
+
# Start with static kwargs
|
|
705
|
+
raw_kwargs = loads_json(row[3])
|
|
706
|
+
kwargs: dict[str, Any] = raw_kwargs if isinstance(raw_kwargs, dict) else {}
|
|
707
|
+
|
|
708
|
+
# Inject args_from: map kwarg_name -> TaskResult from dependency
|
|
709
|
+
if row[6]: # args_from
|
|
710
|
+
args_from_raw = row[6]
|
|
711
|
+
# args_from is stored as JSONB, may come back as dict directly
|
|
712
|
+
if isinstance(args_from_raw, str):
|
|
713
|
+
args_from_map = loads_json(args_from_raw)
|
|
714
|
+
else:
|
|
715
|
+
args_from_map = args_from_raw
|
|
716
|
+
|
|
717
|
+
if isinstance(args_from_map, dict):
|
|
718
|
+
# Cast to proper type - args_from stores {kwarg_name: task_index}
|
|
719
|
+
args_from_typed = cast(dict[str, int], args_from_map)
|
|
720
|
+
for kwarg_name, dep_index in args_from_typed.items():
|
|
721
|
+
dep_result = all_dep_results.get(dep_index)
|
|
722
|
+
if dep_result is not None:
|
|
723
|
+
# Serialize TaskResult for transport
|
|
724
|
+
kwargs[kwarg_name] = {
|
|
725
|
+
'__horsies_taskresult__': True,
|
|
726
|
+
'data': dumps_json(dep_result),
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
# Inject workflow_ctx if workflow_ctx_from is set
|
|
730
|
+
if row[7]: # workflow_ctx_from
|
|
731
|
+
ctx_from_ids = cast(list[str], row[7]) # Already a list from PostgreSQL ARRAY
|
|
732
|
+
ctx_data = await _build_workflow_context_data(
|
|
733
|
+
session=session,
|
|
734
|
+
workflow_id=workflow_id,
|
|
735
|
+
task_index=task_index,
|
|
736
|
+
task_name=row[1],
|
|
737
|
+
ctx_from_ids=ctx_from_ids,
|
|
738
|
+
)
|
|
739
|
+
# Will be filtered out by worker if task doesn't declare workflow_ctx param
|
|
740
|
+
kwargs['__horsies_workflow_ctx__'] = ctx_data
|
|
741
|
+
|
|
742
|
+
# Create actual task in tasks table
|
|
743
|
+
task_id = str(uuid.uuid4())
|
|
744
|
+
await session.execute(
|
|
745
|
+
text("""
|
|
746
|
+
INSERT INTO horsies_tasks (id, task_name, queue_name, priority, args, kwargs, status,
|
|
747
|
+
sent_at, created_at, updated_at, claimed, retry_count, max_retries,
|
|
748
|
+
task_options, good_until)
|
|
749
|
+
VALUES (:id, :name, :queue, :priority, :args, :kwargs, 'PENDING',
|
|
750
|
+
NOW(), NOW(), NOW(), FALSE, 0, :max_retries, :task_options, :good_until)
|
|
751
|
+
"""),
|
|
752
|
+
{
|
|
753
|
+
'id': task_id,
|
|
754
|
+
'name': row[1], # task_name
|
|
755
|
+
'queue': row[4], # queue_name
|
|
756
|
+
'priority': row[5], # priority
|
|
757
|
+
'args': row[2], # task_args (already JSON string)
|
|
758
|
+
'kwargs': dumps_json(kwargs),
|
|
759
|
+
'max_retries': max_retries,
|
|
760
|
+
'task_options': task_options_str,
|
|
761
|
+
'good_until': good_until_str,
|
|
762
|
+
},
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
# Link workflow_task to actual task
|
|
766
|
+
await session.execute(
|
|
767
|
+
text("""
|
|
768
|
+
UPDATE horsies_workflow_tasks SET task_id = :tid WHERE workflow_id = :wf_id AND task_index = :idx
|
|
769
|
+
"""),
|
|
770
|
+
{'tid': task_id, 'wf_id': workflow_id, 'idx': task_index},
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
return task_id
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
async def _enqueue_subworkflow_task(
|
|
777
|
+
session: AsyncSession,
|
|
778
|
+
broker: 'PostgresBroker',
|
|
779
|
+
workflow_id: str,
|
|
780
|
+
task_index: int,
|
|
781
|
+
all_dep_results: dict[int, 'TaskResult[Any, TaskError]'],
|
|
782
|
+
parent_depth: int,
|
|
783
|
+
root_workflow_id: str,
|
|
784
|
+
) -> str | None:
|
|
785
|
+
"""
|
|
786
|
+
Start a child workflow for a SubWorkflowNode.
|
|
787
|
+
|
|
788
|
+
Args:
|
|
789
|
+
session: Database session
|
|
790
|
+
broker: PostgreSQL broker for database operations
|
|
791
|
+
workflow_id: Parent workflow ID
|
|
792
|
+
task_index: Task index of the SubWorkflowNode in parent
|
|
793
|
+
all_dep_results: Results from ALL completed dependencies (by index)
|
|
794
|
+
parent_depth: Nesting depth of the parent workflow
|
|
795
|
+
root_workflow_id: Root workflow ID (for efficient queries)
|
|
796
|
+
|
|
797
|
+
Returns:
|
|
798
|
+
child_workflow_id if started, None if already started or not ready.
|
|
799
|
+
"""
|
|
800
|
+
# 1. Atomically mark parent node as ENQUEUED (with workflow RUNNING guard)
|
|
801
|
+
result = await session.execute(
|
|
802
|
+
text("""
|
|
803
|
+
UPDATE horsies_workflow_tasks wt
|
|
804
|
+
SET status = 'ENQUEUED', started_at = NOW()
|
|
805
|
+
FROM horsies_workflows w
|
|
806
|
+
WHERE wt.workflow_id = :wf_id
|
|
807
|
+
AND wt.task_index = :idx
|
|
808
|
+
AND wt.status = 'READY'
|
|
809
|
+
AND wt.is_subworkflow = TRUE
|
|
810
|
+
AND w.id = wt.workflow_id
|
|
811
|
+
AND w.status = 'RUNNING'
|
|
812
|
+
RETURNING wt.id, wt.sub_workflow_name, wt.task_args, wt.task_kwargs,
|
|
813
|
+
wt.args_from, wt.node_id, wt.sub_workflow_module,
|
|
814
|
+
wt.sub_workflow_qualname, wt.sub_workflow_retry_mode
|
|
815
|
+
"""),
|
|
816
|
+
{'wf_id': workflow_id, 'idx': task_index},
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
row = result.fetchone()
|
|
820
|
+
if row is None:
|
|
821
|
+
return None # Already enqueued, not ready, or workflow not RUNNING
|
|
822
|
+
|
|
823
|
+
_wt_id = row[0] # Unused but kept for row unpacking clarity
|
|
824
|
+
_sub_workflow_name = row[1] # Unused but kept for row unpacking clarity
|
|
825
|
+
task_args_json = row[2]
|
|
826
|
+
task_kwargs_json = row[3]
|
|
827
|
+
args_from_raw = row[4]
|
|
828
|
+
_node_id = row[5] # Unused but kept for row unpacking clarity
|
|
829
|
+
sub_workflow_module = row[6]
|
|
830
|
+
sub_workflow_qualname = row[7]
|
|
831
|
+
_sub_workflow_retry_mode = row[8] # Currently unused (retry_mode not implemented)
|
|
832
|
+
|
|
833
|
+
# 2. Try to get workflow_def from registry (fast path) or import fallback
|
|
834
|
+
#
|
|
835
|
+
# Registry lookup succeeds when:
|
|
836
|
+
# - Parent workflow module is imported in worker
|
|
837
|
+
# - WorkflowSpec.build(app) was called (registers nodes)
|
|
838
|
+
# This typically works in tests but rarely in production workers.
|
|
839
|
+
#
|
|
840
|
+
# Fallback import path (sub_workflow_module/qualname stored in DB) handles
|
|
841
|
+
# the common case where registry is empty.
|
|
842
|
+
workflow_name_result = await session.execute(
|
|
843
|
+
text('SELECT name FROM horsies_workflows WHERE id = :wf_id'),
|
|
844
|
+
{'wf_id': workflow_id},
|
|
845
|
+
)
|
|
846
|
+
workflow_name_row = workflow_name_result.fetchone()
|
|
847
|
+
if workflow_name_row is None:
|
|
848
|
+
logger.error(f'Workflow {workflow_id} not found')
|
|
849
|
+
return None
|
|
850
|
+
|
|
851
|
+
workflow_name = workflow_name_row[0]
|
|
852
|
+
|
|
853
|
+
from horsies.core.workflows.registry import get_subworkflow_node
|
|
854
|
+
|
|
855
|
+
subworkflow_node = get_subworkflow_node(workflow_name, task_index)
|
|
856
|
+
|
|
857
|
+
workflow_def: type[WorkflowDefinition[Any]] | None = None
|
|
858
|
+
if subworkflow_node is not None:
|
|
859
|
+
# Registry hit: use cached workflow_def
|
|
860
|
+
workflow_def = subworkflow_node.workflow_def
|
|
861
|
+
elif sub_workflow_module and sub_workflow_qualname:
|
|
862
|
+
# Registry miss: load child workflow def via import path (stored in DB)
|
|
863
|
+
workflow_def = _load_workflow_def_from_path(
|
|
864
|
+
sub_workflow_module, sub_workflow_qualname
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
if workflow_def is None:
|
|
868
|
+
# Critical: Revert ENQUEUED → FAILED to prevent stuck task
|
|
869
|
+
# This can happen if the subworkflow module cannot be imported
|
|
870
|
+
from horsies.core.models.tasks import TaskError, TaskResult
|
|
871
|
+
|
|
872
|
+
error = TaskError(
|
|
873
|
+
error_code='SUBWORKFLOW_LOAD_FAILED',
|
|
874
|
+
message=f'Failed to load subworkflow definition for {workflow_name}:{task_index}',
|
|
875
|
+
data={'module': sub_workflow_module, 'qualname': sub_workflow_qualname},
|
|
876
|
+
)
|
|
877
|
+
await session.execute(
|
|
878
|
+
text("""
|
|
879
|
+
UPDATE horsies_workflow_tasks
|
|
880
|
+
SET status = 'FAILED', result = :result, completed_at = NOW()
|
|
881
|
+
WHERE workflow_id = :wf_id AND task_index = :idx
|
|
882
|
+
"""),
|
|
883
|
+
{
|
|
884
|
+
'wf_id': workflow_id,
|
|
885
|
+
'idx': task_index,
|
|
886
|
+
'result': dumps_json(TaskResult(err=error)),
|
|
887
|
+
},
|
|
888
|
+
)
|
|
889
|
+
logger.error(f'SubWorkflowNode load failed for {workflow_name}:{task_index}')
|
|
890
|
+
|
|
891
|
+
# Handle failure and propagate to dependents
|
|
892
|
+
failure_result: TaskResult[Any, TaskError] = TaskResult(err=error)
|
|
893
|
+
should_continue = await _handle_workflow_task_failure(
|
|
894
|
+
session, workflow_id, task_index, failure_result
|
|
895
|
+
)
|
|
896
|
+
if should_continue:
|
|
897
|
+
await _process_dependents(session, workflow_id, task_index, broker)
|
|
898
|
+
await _check_workflow_completion(session, workflow_id, broker)
|
|
899
|
+
|
|
900
|
+
return None
|
|
901
|
+
|
|
902
|
+
# 3. Parse static kwargs and merge args_from
|
|
903
|
+
raw_args = loads_json(task_args_json) if task_args_json else []
|
|
904
|
+
task_args: tuple[Any, ...] = ()
|
|
905
|
+
if isinstance(raw_args, list):
|
|
906
|
+
task_args = tuple(raw_args)
|
|
907
|
+
raw_kwargs = loads_json(task_kwargs_json) if task_kwargs_json else {}
|
|
908
|
+
kwargs: dict[str, Any] = raw_kwargs if isinstance(raw_kwargs, dict) else {}
|
|
909
|
+
|
|
910
|
+
if args_from_raw:
|
|
911
|
+
# args_from is stored as JSONB, may come back as dict directly
|
|
912
|
+
if isinstance(args_from_raw, str):
|
|
913
|
+
args_from_map = loads_json(args_from_raw)
|
|
914
|
+
else:
|
|
915
|
+
args_from_map = args_from_raw
|
|
916
|
+
|
|
917
|
+
if isinstance(args_from_map, dict):
|
|
918
|
+
args_from_typed = cast(dict[str, int], args_from_map)
|
|
919
|
+
for kwarg_name, dep_index in args_from_typed.items():
|
|
920
|
+
dep_result = all_dep_results.get(dep_index)
|
|
921
|
+
if dep_result is not None:
|
|
922
|
+
# Serialize TaskResult for transport
|
|
923
|
+
kwargs[kwarg_name] = {
|
|
924
|
+
'__horsies_taskresult__': True,
|
|
925
|
+
'data': dumps_json(dep_result),
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
# 4. Build child WorkflowSpec (parameterized)
|
|
929
|
+
if broker.app is None:
|
|
930
|
+
raise WorkflowValidationError(
|
|
931
|
+
message='Broker missing app reference for subworkflow',
|
|
932
|
+
code=ErrorCode.WORKFLOW_SUBWORKFLOW_APP_MISSING,
|
|
933
|
+
notes=[
|
|
934
|
+
'Subworkflows require a Horsies app instance to build the child spec',
|
|
935
|
+
'Ensure broker.app is set (Horsies.get_broker() does this automatically)',
|
|
936
|
+
],
|
|
937
|
+
help_text='use app.get_broker() or attach app to broker before starting workflows',
|
|
938
|
+
)
|
|
939
|
+
child_spec = workflow_def.build_with(broker.app, *task_args, **kwargs)
|
|
940
|
+
|
|
941
|
+
# 5. Create child workflow with parent reference
|
|
942
|
+
child_id = str(uuid.uuid4())
|
|
943
|
+
|
|
944
|
+
# Serialize child's success_policy
|
|
945
|
+
child_success_policy_json: dict[str, Any] | None = None
|
|
946
|
+
if child_spec.success_policy is not None:
|
|
947
|
+
child_success_policy_json = {
|
|
948
|
+
'cases': [
|
|
949
|
+
{
|
|
950
|
+
'required_indices': [
|
|
951
|
+
t.index for t in case.required if t.index is not None
|
|
952
|
+
]
|
|
953
|
+
}
|
|
954
|
+
for case in child_spec.success_policy.cases
|
|
955
|
+
],
|
|
956
|
+
}
|
|
957
|
+
if child_spec.success_policy.optional:
|
|
958
|
+
child_success_policy_json['optional_indices'] = [
|
|
959
|
+
t.index
|
|
960
|
+
for t in child_spec.success_policy.optional
|
|
961
|
+
if t.index is not None
|
|
962
|
+
]
|
|
963
|
+
|
|
964
|
+
child_output_index = child_spec.output.index if child_spec.output else None
|
|
965
|
+
|
|
966
|
+
await session.execute(
|
|
967
|
+
text("""
|
|
968
|
+
INSERT INTO horsies_workflows
|
|
969
|
+
(id, name, status, on_error, output_task_index, success_policy,
|
|
970
|
+
workflow_def_module, workflow_def_qualname,
|
|
971
|
+
parent_workflow_id, parent_task_index, depth, root_workflow_id,
|
|
972
|
+
created_at, started_at, updated_at)
|
|
973
|
+
VALUES (:id, :name, 'RUNNING', :on_error, :output_idx, :success_policy,
|
|
974
|
+
:wf_module, :wf_qualname,
|
|
975
|
+
:parent_wf_id, :parent_idx, :depth, :root_wf_id,
|
|
976
|
+
NOW(), NOW(), NOW())
|
|
977
|
+
"""),
|
|
978
|
+
{
|
|
979
|
+
'id': child_id,
|
|
980
|
+
'name': child_spec.name,
|
|
981
|
+
'on_error': child_spec.on_error.value,
|
|
982
|
+
'output_idx': child_output_index,
|
|
983
|
+
'success_policy': dumps_json(child_success_policy_json)
|
|
984
|
+
if child_success_policy_json
|
|
985
|
+
else None,
|
|
986
|
+
'wf_module': child_spec.workflow_def_module,
|
|
987
|
+
'wf_qualname': child_spec.workflow_def_qualname,
|
|
988
|
+
'parent_wf_id': workflow_id,
|
|
989
|
+
'parent_idx': task_index,
|
|
990
|
+
'depth': parent_depth + 1,
|
|
991
|
+
'root_wf_id': root_workflow_id,
|
|
992
|
+
},
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
# 6. Insert child workflow_tasks
|
|
996
|
+
for child_node in child_spec.tasks:
|
|
997
|
+
child_dep_indices = [
|
|
998
|
+
d.index for d in child_node.waits_for if d.index is not None
|
|
999
|
+
]
|
|
1000
|
+
child_args_from_indices = {
|
|
1001
|
+
k: v.index for k, v in child_node.args_from.items() if v.index is not None
|
|
1002
|
+
}
|
|
1003
|
+
child_ctx_from_ids = (
|
|
1004
|
+
[n.node_id for n in child_node.workflow_ctx_from if n.node_id is not None]
|
|
1005
|
+
if child_node.workflow_ctx_from
|
|
1006
|
+
else None
|
|
1007
|
+
)
|
|
1008
|
+
|
|
1009
|
+
child_wt_id = str(uuid.uuid4())
|
|
1010
|
+
child_is_subworkflow = isinstance(child_node, SubWorkflowNode)
|
|
1011
|
+
|
|
1012
|
+
if child_is_subworkflow:
|
|
1013
|
+
child_sub = child_node
|
|
1014
|
+
await session.execute(
|
|
1015
|
+
text("""
|
|
1016
|
+
INSERT INTO horsies_workflow_tasks
|
|
1017
|
+
(id, workflow_id, task_index, node_id, task_name, task_args, task_kwargs,
|
|
1018
|
+
queue_name, priority, dependencies, args_from, workflow_ctx_from,
|
|
1019
|
+
allow_failed_deps, join_type, min_success, task_options, status,
|
|
1020
|
+
is_subworkflow, sub_workflow_name, sub_workflow_retry_mode,
|
|
1021
|
+
sub_workflow_module, sub_workflow_qualname, created_at)
|
|
1022
|
+
VALUES (:id, :wf_id, :idx, :node_id, :name, :args, :kwargs, :queue, :priority,
|
|
1023
|
+
:deps, :args_from, :ctx_from, :allow_failed, :join_type, :min_success,
|
|
1024
|
+
:task_options, :status, TRUE, :sub_wf_name, :sub_wf_retry_mode,
|
|
1025
|
+
:sub_wf_module, :sub_wf_qualname, NOW())
|
|
1026
|
+
"""),
|
|
1027
|
+
{
|
|
1028
|
+
'id': child_wt_id,
|
|
1029
|
+
'wf_id': child_id,
|
|
1030
|
+
'idx': child_sub.index,
|
|
1031
|
+
'node_id': child_sub.node_id,
|
|
1032
|
+
'name': child_sub.name,
|
|
1033
|
+
'args': dumps_json(child_sub.args),
|
|
1034
|
+
'kwargs': dumps_json(child_sub.kwargs),
|
|
1035
|
+
'queue': 'default',
|
|
1036
|
+
'priority': 100,
|
|
1037
|
+
'deps': child_dep_indices,
|
|
1038
|
+
'args_from': dumps_json(child_args_from_indices)
|
|
1039
|
+
if child_args_from_indices
|
|
1040
|
+
else None,
|
|
1041
|
+
'ctx_from': child_ctx_from_ids,
|
|
1042
|
+
'allow_failed': child_sub.allow_failed_deps,
|
|
1043
|
+
'join_type': child_sub.join,
|
|
1044
|
+
'min_success': child_sub.min_success,
|
|
1045
|
+
'task_options': None,
|
|
1046
|
+
'status': 'PENDING' if child_dep_indices else 'READY',
|
|
1047
|
+
'sub_wf_name': child_sub.workflow_def.name,
|
|
1048
|
+
'sub_wf_retry_mode': child_sub.retry_mode.value,
|
|
1049
|
+
'sub_wf_module': child_sub.workflow_def.__module__,
|
|
1050
|
+
'sub_wf_qualname': child_sub.workflow_def.__qualname__,
|
|
1051
|
+
},
|
|
1052
|
+
)
|
|
1053
|
+
else:
|
|
1054
|
+
child_task = child_node
|
|
1055
|
+
child_task_options_json: str | None = getattr(
|
|
1056
|
+
child_task.fn, 'task_options_json', None
|
|
1057
|
+
)
|
|
1058
|
+
if child_task.good_until is not None:
|
|
1059
|
+
child_base_options: dict[str, Any] = {}
|
|
1060
|
+
if child_task_options_json:
|
|
1061
|
+
parsed = loads_json(child_task_options_json)
|
|
1062
|
+
if isinstance(parsed, dict):
|
|
1063
|
+
child_base_options = parsed
|
|
1064
|
+
child_base_options['good_until'] = child_task.good_until.isoformat()
|
|
1065
|
+
child_task_options_json = dumps_json(child_base_options)
|
|
1066
|
+
|
|
1067
|
+
await session.execute(
|
|
1068
|
+
text("""
|
|
1069
|
+
INSERT INTO horsies_workflow_tasks
|
|
1070
|
+
(id, workflow_id, task_index, node_id, task_name, task_args, task_kwargs,
|
|
1071
|
+
queue_name, priority, dependencies, args_from, workflow_ctx_from,
|
|
1072
|
+
allow_failed_deps, join_type, min_success, task_options, status,
|
|
1073
|
+
is_subworkflow, created_at)
|
|
1074
|
+
VALUES (:id, :wf_id, :idx, :node_id, :name, :args, :kwargs, :queue, :priority,
|
|
1075
|
+
:deps, :args_from, :ctx_from, :allow_failed, :join_type, :min_success,
|
|
1076
|
+
:task_options, :status, FALSE, NOW())
|
|
1077
|
+
"""),
|
|
1078
|
+
{
|
|
1079
|
+
'id': child_wt_id,
|
|
1080
|
+
'wf_id': child_id,
|
|
1081
|
+
'idx': child_task.index,
|
|
1082
|
+
'node_id': child_task.node_id,
|
|
1083
|
+
'name': child_task.name,
|
|
1084
|
+
'args': dumps_json(child_task.args),
|
|
1085
|
+
'kwargs': dumps_json(child_task.kwargs),
|
|
1086
|
+
'queue': child_task.queue
|
|
1087
|
+
or getattr(child_task.fn, 'task_queue_name', None)
|
|
1088
|
+
or 'default',
|
|
1089
|
+
'priority': child_task.priority
|
|
1090
|
+
if child_task.priority is not None
|
|
1091
|
+
else 100,
|
|
1092
|
+
'deps': child_dep_indices,
|
|
1093
|
+
'args_from': dumps_json(child_args_from_indices)
|
|
1094
|
+
if child_args_from_indices
|
|
1095
|
+
else None,
|
|
1096
|
+
'ctx_from': child_ctx_from_ids,
|
|
1097
|
+
'allow_failed': child_task.allow_failed_deps,
|
|
1098
|
+
'join_type': child_task.join,
|
|
1099
|
+
'min_success': child_task.min_success,
|
|
1100
|
+
'task_options': child_task_options_json,
|
|
1101
|
+
'status': 'PENDING' if child_dep_indices else 'READY',
|
|
1102
|
+
},
|
|
1103
|
+
)
|
|
1104
|
+
|
|
1105
|
+
# 7. Update parent's workflow_task with child_workflow_id and mark RUNNING
|
|
1106
|
+
await session.execute(
|
|
1107
|
+
text("""
|
|
1108
|
+
UPDATE horsies_workflow_tasks
|
|
1109
|
+
SET sub_workflow_id = :child_id, status = 'RUNNING'
|
|
1110
|
+
WHERE workflow_id = :wf_id AND task_index = :idx
|
|
1111
|
+
"""),
|
|
1112
|
+
{'child_id': child_id, 'wf_id': workflow_id, 'idx': task_index},
|
|
1113
|
+
)
|
|
1114
|
+
|
|
1115
|
+
# 8. Enqueue child's root tasks
|
|
1116
|
+
child_root_nodes = [t for t in child_spec.tasks if not t.waits_for]
|
|
1117
|
+
for child_root in child_root_nodes:
|
|
1118
|
+
if child_root.index is not None:
|
|
1119
|
+
if isinstance(child_root, SubWorkflowNode):
|
|
1120
|
+
await _enqueue_subworkflow_task(
|
|
1121
|
+
session,
|
|
1122
|
+
broker,
|
|
1123
|
+
child_id,
|
|
1124
|
+
child_root.index,
|
|
1125
|
+
{},
|
|
1126
|
+
parent_depth + 1,
|
|
1127
|
+
root_workflow_id,
|
|
1128
|
+
)
|
|
1129
|
+
else:
|
|
1130
|
+
await _enqueue_workflow_task(session, child_id, child_root.index, {})
|
|
1131
|
+
|
|
1132
|
+
logger.info(f'Started child workflow {child_id} for {workflow_name}:{task_index}')
|
|
1133
|
+
return child_id
|
|
1134
|
+
|
|
1135
|
+
|
|
1136
|
+
async def _build_workflow_context_data(
|
|
1137
|
+
session: AsyncSession,
|
|
1138
|
+
workflow_id: str,
|
|
1139
|
+
task_index: int,
|
|
1140
|
+
task_name: str,
|
|
1141
|
+
ctx_from_ids: list[str],
|
|
1142
|
+
) -> dict[str, Any]:
|
|
1143
|
+
"""
|
|
1144
|
+
Build serializable workflow context data.
|
|
1145
|
+
|
|
1146
|
+
Returns a plain dict that can be JSON-serialized and reconstructed
|
|
1147
|
+
on the worker side into a WorkflowContext.
|
|
1148
|
+
|
|
1149
|
+
Results are keyed by node_id for stable lookup in WorkflowContext.
|
|
1150
|
+
Also fetches SubWorkflowSummary for SubWorkflowNodes.
|
|
1151
|
+
"""
|
|
1152
|
+
# Fetch results for the specified node_ids
|
|
1153
|
+
dep_results = await _get_dependency_results_with_names(
|
|
1154
|
+
session, workflow_id, ctx_from_ids
|
|
1155
|
+
)
|
|
1156
|
+
|
|
1157
|
+
results_by_id: dict[str, str] = {}
|
|
1158
|
+
for node_id, result in dep_results.by_id.items():
|
|
1159
|
+
results_by_id[node_id] = dumps_json(result)
|
|
1160
|
+
|
|
1161
|
+
# Fetch summaries for SubWorkflowNodes
|
|
1162
|
+
summaries_by_id: dict[str, str] = {}
|
|
1163
|
+
if ctx_from_ids:
|
|
1164
|
+
summary_result = await session.execute(
|
|
1165
|
+
text("""
|
|
1166
|
+
SELECT node_id, sub_workflow_summary
|
|
1167
|
+
FROM horsies_workflow_tasks
|
|
1168
|
+
WHERE workflow_id = :wf_id
|
|
1169
|
+
AND node_id = ANY(:node_ids)
|
|
1170
|
+
AND is_subworkflow = TRUE
|
|
1171
|
+
AND sub_workflow_summary IS NOT NULL
|
|
1172
|
+
"""),
|
|
1173
|
+
{'wf_id': workflow_id, 'node_ids': ctx_from_ids},
|
|
1174
|
+
)
|
|
1175
|
+
for row in summary_result.fetchall():
|
|
1176
|
+
node_id = row[0]
|
|
1177
|
+
summary_json = row[1]
|
|
1178
|
+
if node_id and summary_json:
|
|
1179
|
+
summaries_by_id[node_id] = summary_json
|
|
1180
|
+
|
|
1181
|
+
return {
|
|
1182
|
+
'workflow_id': workflow_id,
|
|
1183
|
+
'task_index': task_index,
|
|
1184
|
+
'task_name': task_name,
|
|
1185
|
+
'results_by_id': results_by_id,
|
|
1186
|
+
'summaries_by_id': summaries_by_id,
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
async def on_workflow_task_complete(
|
|
1191
|
+
session: AsyncSession,
|
|
1192
|
+
task_id: str,
|
|
1193
|
+
result: 'TaskResult[Any, TaskError]',
|
|
1194
|
+
broker: 'PostgresBroker | None' = None,
|
|
1195
|
+
) -> None:
|
|
1196
|
+
"""
|
|
1197
|
+
Called from worker._finalize_after when a task completes.
|
|
1198
|
+
Handles workflow task status update and dependency resolution.
|
|
1199
|
+
"""
|
|
1200
|
+
# 1. Find workflow_task by task_id
|
|
1201
|
+
wt_result = await session.execute(
|
|
1202
|
+
text("""
|
|
1203
|
+
SELECT workflow_id, task_index
|
|
1204
|
+
FROM horsies_workflow_tasks
|
|
1205
|
+
WHERE task_id = :tid
|
|
1206
|
+
"""),
|
|
1207
|
+
{'tid': task_id},
|
|
1208
|
+
)
|
|
1209
|
+
|
|
1210
|
+
row = wt_result.fetchone()
|
|
1211
|
+
if row is None:
|
|
1212
|
+
return # Not a workflow task
|
|
1213
|
+
|
|
1214
|
+
workflow_id = row[0]
|
|
1215
|
+
task_index = row[1]
|
|
1216
|
+
|
|
1217
|
+
# 2. Update workflow_task status and store result
|
|
1218
|
+
new_status = 'COMPLETED' if result.is_ok() else 'FAILED'
|
|
1219
|
+
await session.execute(
|
|
1220
|
+
text("""
|
|
1221
|
+
UPDATE horsies_workflow_tasks
|
|
1222
|
+
SET status = :status, result = :result, completed_at = NOW()
|
|
1223
|
+
WHERE workflow_id = :wf_id AND task_index = :idx
|
|
1224
|
+
"""),
|
|
1225
|
+
{
|
|
1226
|
+
'status': new_status,
|
|
1227
|
+
'result': dumps_json(result),
|
|
1228
|
+
'wf_id': workflow_id,
|
|
1229
|
+
'idx': task_index,
|
|
1230
|
+
},
|
|
1231
|
+
)
|
|
1232
|
+
|
|
1233
|
+
# 3. Handle failure based on on_error policy
|
|
1234
|
+
if result.is_err():
|
|
1235
|
+
should_continue = await _handle_workflow_task_failure(
|
|
1236
|
+
session, workflow_id, task_index, result
|
|
1237
|
+
)
|
|
1238
|
+
if not should_continue:
|
|
1239
|
+
# PAUSE mode - stop processing, don't propagate to dependents
|
|
1240
|
+
return
|
|
1241
|
+
|
|
1242
|
+
# 4. Check if workflow is PAUSED (may have been paused by another task)
|
|
1243
|
+
status_check = await session.execute(
|
|
1244
|
+
text('SELECT status FROM horsies_workflows WHERE id = :wf_id'),
|
|
1245
|
+
{'wf_id': workflow_id},
|
|
1246
|
+
)
|
|
1247
|
+
status_row = status_check.fetchone()
|
|
1248
|
+
if status_row and status_row[0] == 'PAUSED':
|
|
1249
|
+
return # Don't propagate - workflow is paused
|
|
1250
|
+
|
|
1251
|
+
# 5. Find and potentially enqueue dependent tasks
|
|
1252
|
+
await _process_dependents(session, workflow_id, task_index, broker)
|
|
1253
|
+
|
|
1254
|
+
# 6. Check if workflow is complete
|
|
1255
|
+
await _check_workflow_completion(session, workflow_id, broker)
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
async def _process_dependents(
|
|
1259
|
+
session: AsyncSession,
|
|
1260
|
+
workflow_id: str,
|
|
1261
|
+
completed_task_index: int,
|
|
1262
|
+
broker: 'PostgresBroker | None' = None,
|
|
1263
|
+
) -> None:
|
|
1264
|
+
"""
|
|
1265
|
+
Find tasks that depend on the completed task and enqueue if ready.
|
|
1266
|
+
|
|
1267
|
+
Args:
|
|
1268
|
+
session: Database session
|
|
1269
|
+
workflow_id: Workflow ID
|
|
1270
|
+
completed_task_index: Index of the task that just completed
|
|
1271
|
+
broker: PostgreSQL broker (required for SubWorkflowNode enqueue)
|
|
1272
|
+
"""
|
|
1273
|
+
# Find tasks that have completed_task_index in their dependencies
|
|
1274
|
+
dependents = await session.execute(
|
|
1275
|
+
text("""
|
|
1276
|
+
SELECT task_index FROM horsies_workflow_tasks
|
|
1277
|
+
WHERE workflow_id = :wf_id
|
|
1278
|
+
AND :completed_idx = ANY(dependencies)
|
|
1279
|
+
AND status = 'PENDING'
|
|
1280
|
+
"""),
|
|
1281
|
+
{'wf_id': workflow_id, 'completed_idx': completed_task_index},
|
|
1282
|
+
)
|
|
1283
|
+
|
|
1284
|
+
# Get workflow depth and root for subworkflow support
|
|
1285
|
+
wf_info = await session.execute(
|
|
1286
|
+
text('SELECT depth, root_workflow_id FROM horsies_workflows WHERE id = :wf_id'),
|
|
1287
|
+
{'wf_id': workflow_id},
|
|
1288
|
+
)
|
|
1289
|
+
wf_row = wf_info.fetchone()
|
|
1290
|
+
depth = wf_row[0] if wf_row else 0
|
|
1291
|
+
root_wf_id = wf_row[1] if wf_row else workflow_id
|
|
1292
|
+
|
|
1293
|
+
for row in dependents.fetchall():
|
|
1294
|
+
await _try_make_ready_and_enqueue(
|
|
1295
|
+
session, broker, workflow_id, row[0], depth, root_wf_id
|
|
1296
|
+
)
|
|
1297
|
+
|
|
1298
|
+
|
|
1299
|
+
async def _try_make_ready_and_enqueue(
|
|
1300
|
+
session: AsyncSession,
|
|
1301
|
+
broker: 'PostgresBroker | None',
|
|
1302
|
+
workflow_id: str,
|
|
1303
|
+
task_index: int,
|
|
1304
|
+
depth: int = 0,
|
|
1305
|
+
root_workflow_id: str | None = None,
|
|
1306
|
+
) -> None:
|
|
1307
|
+
"""
|
|
1308
|
+
Check if task's join condition is satisfied and handle accordingly.
|
|
1309
|
+
|
|
1310
|
+
Supports three join modes:
|
|
1311
|
+
- "all": task runs when ALL dependencies are terminal (default)
|
|
1312
|
+
- "any": task runs when ANY dependency succeeds (COMPLETED)
|
|
1313
|
+
- "quorum": task runs when at least min_success dependencies succeed
|
|
1314
|
+
|
|
1315
|
+
For any/quorum, task is SKIPPED if it becomes impossible to meet threshold.
|
|
1316
|
+
|
|
1317
|
+
PAUSE guard: Only proceeds if workflow status is RUNNING.
|
|
1318
|
+
If workflow is PAUSED/CANCELLED/etc, task remains PENDING.
|
|
1319
|
+
|
|
1320
|
+
Args:
|
|
1321
|
+
session: Database session
|
|
1322
|
+
broker: PostgreSQL broker (required for SubWorkflowNode enqueue)
|
|
1323
|
+
workflow_id: Workflow ID
|
|
1324
|
+
task_index: Task index to check
|
|
1325
|
+
depth: Current workflow nesting depth (for child workflows)
|
|
1326
|
+
root_workflow_id: Root workflow ID (for efficient queries)
|
|
1327
|
+
"""
|
|
1328
|
+
# 1. Fetch task configuration
|
|
1329
|
+
config_result = await session.execute(
|
|
1330
|
+
text("""
|
|
1331
|
+
SELECT wt.status, wt.dependencies, wt.allow_failed_deps,
|
|
1332
|
+
wt.join_type, wt.min_success, wt.workflow_ctx_from,
|
|
1333
|
+
wt.is_subworkflow,
|
|
1334
|
+
w.status as wf_status
|
|
1335
|
+
FROM horsies_workflow_tasks wt
|
|
1336
|
+
JOIN horsies_workflows w ON w.id = wt.workflow_id
|
|
1337
|
+
WHERE wt.workflow_id = :wf_id AND wt.task_index = :idx
|
|
1338
|
+
"""),
|
|
1339
|
+
{'wf_id': workflow_id, 'idx': task_index},
|
|
1340
|
+
)
|
|
1341
|
+
config_row = config_result.fetchone()
|
|
1342
|
+
if config_row is None:
|
|
1343
|
+
return
|
|
1344
|
+
|
|
1345
|
+
task_status = config_row[0]
|
|
1346
|
+
raw_deps = config_row[1]
|
|
1347
|
+
allow_failed_deps: bool = config_row[2] if config_row[2] is not None else False
|
|
1348
|
+
join_type: str = config_row[3] or 'all'
|
|
1349
|
+
min_success: int | None = config_row[4]
|
|
1350
|
+
raw_ctx_from = config_row[5]
|
|
1351
|
+
is_subworkflow: bool = config_row[6] if config_row[6] is not None else False
|
|
1352
|
+
wf_status = config_row[7]
|
|
1353
|
+
|
|
1354
|
+
# Guard: only proceed if task is PENDING and workflow is RUNNING
|
|
1355
|
+
if task_status != 'PENDING' or wf_status != 'RUNNING':
|
|
1356
|
+
return
|
|
1357
|
+
|
|
1358
|
+
dependencies: list[int] = (
|
|
1359
|
+
cast(list[int], raw_deps) if isinstance(raw_deps, list) else []
|
|
1360
|
+
)
|
|
1361
|
+
|
|
1362
|
+
if not dependencies:
|
|
1363
|
+
# No dependencies - should already be READY (root task)
|
|
1364
|
+
return
|
|
1365
|
+
|
|
1366
|
+
# 2. Get dependency status counts
|
|
1367
|
+
dep_status_result = await session.execute(
|
|
1368
|
+
text("""
|
|
1369
|
+
SELECT status, COUNT(*) as cnt
|
|
1370
|
+
FROM horsies_workflow_tasks
|
|
1371
|
+
WHERE workflow_id = :wf_id AND task_index = ANY(:deps)
|
|
1372
|
+
GROUP BY status
|
|
1373
|
+
"""),
|
|
1374
|
+
{'wf_id': workflow_id, 'deps': dependencies},
|
|
1375
|
+
)
|
|
1376
|
+
status_counts: dict[str, int] = {
|
|
1377
|
+
row[0]: row[1] for row in dep_status_result.fetchall()
|
|
1378
|
+
}
|
|
1379
|
+
|
|
1380
|
+
completed = status_counts.get(WorkflowTaskStatus.COMPLETED.value, 0)
|
|
1381
|
+
failed = status_counts.get(WorkflowTaskStatus.FAILED.value, 0)
|
|
1382
|
+
skipped = status_counts.get(WorkflowTaskStatus.SKIPPED.value, 0)
|
|
1383
|
+
terminal = completed + failed + skipped
|
|
1384
|
+
total_deps = len(dependencies)
|
|
1385
|
+
|
|
1386
|
+
# 3. Determine readiness based on join type
|
|
1387
|
+
should_become_ready = False
|
|
1388
|
+
should_skip = False
|
|
1389
|
+
|
|
1390
|
+
if join_type == 'all':
|
|
1391
|
+
# All deps must be terminal
|
|
1392
|
+
if terminal == total_deps:
|
|
1393
|
+
should_become_ready = True
|
|
1394
|
+
|
|
1395
|
+
elif join_type == 'any':
|
|
1396
|
+
# At least one dep must be COMPLETED
|
|
1397
|
+
if completed >= 1:
|
|
1398
|
+
should_become_ready = True
|
|
1399
|
+
elif terminal == total_deps and completed == 0:
|
|
1400
|
+
# All deps terminal but none succeeded
|
|
1401
|
+
should_skip = True
|
|
1402
|
+
|
|
1403
|
+
elif join_type == 'quorum':
|
|
1404
|
+
# At least min_success deps must be COMPLETED
|
|
1405
|
+
threshold = min_success or 1
|
|
1406
|
+
if completed >= threshold:
|
|
1407
|
+
should_become_ready = True
|
|
1408
|
+
else:
|
|
1409
|
+
# Check if it's impossible to reach threshold
|
|
1410
|
+
remaining = total_deps - terminal
|
|
1411
|
+
max_possible = completed + remaining
|
|
1412
|
+
if max_possible < threshold:
|
|
1413
|
+
should_skip = True
|
|
1414
|
+
|
|
1415
|
+
if should_skip:
|
|
1416
|
+
# Mark task as SKIPPED (impossible to meet join condition)
|
|
1417
|
+
await session.execute(
|
|
1418
|
+
text("""
|
|
1419
|
+
UPDATE horsies_workflow_tasks
|
|
1420
|
+
SET status = 'SKIPPED'
|
|
1421
|
+
WHERE workflow_id = :wf_id AND task_index = :idx AND status = 'PENDING'
|
|
1422
|
+
"""),
|
|
1423
|
+
{'wf_id': workflow_id, 'idx': task_index},
|
|
1424
|
+
)
|
|
1425
|
+
# Propagate SKIPPED to dependents
|
|
1426
|
+
await _process_dependents(session, workflow_id, task_index, broker)
|
|
1427
|
+
return
|
|
1428
|
+
|
|
1429
|
+
if not should_become_ready:
|
|
1430
|
+
return # Stay PENDING
|
|
1431
|
+
|
|
1432
|
+
# Ensure workflow_ctx_from deps are terminal before enqueueing
|
|
1433
|
+
ctx_from_ids = _as_str_list(raw_ctx_from)
|
|
1434
|
+
if ctx_from_ids:
|
|
1435
|
+
ctx_terminal_result = await session.execute(
|
|
1436
|
+
text("""
|
|
1437
|
+
SELECT COUNT(*) as cnt
|
|
1438
|
+
FROM horsies_workflow_tasks
|
|
1439
|
+
WHERE workflow_id = :wf_id
|
|
1440
|
+
AND node_id = ANY(:node_ids)
|
|
1441
|
+
AND status = ANY(:wf_task_terminal_states)
|
|
1442
|
+
"""),
|
|
1443
|
+
{
|
|
1444
|
+
'wf_id': workflow_id,
|
|
1445
|
+
'node_ids': ctx_from_ids,
|
|
1446
|
+
'wf_task_terminal_states': _WF_TASK_TERMINAL_VALUES,
|
|
1447
|
+
},
|
|
1448
|
+
)
|
|
1449
|
+
ctx_terminal = ctx_terminal_result.scalar_one()
|
|
1450
|
+
if ctx_terminal < len(ctx_from_ids):
|
|
1451
|
+
return # Context deps not ready; stay PENDING
|
|
1452
|
+
|
|
1453
|
+
# 4. Mark task as READY
|
|
1454
|
+
ready_result = await session.execute(
|
|
1455
|
+
text("""
|
|
1456
|
+
UPDATE horsies_workflow_tasks
|
|
1457
|
+
SET status = 'READY'
|
|
1458
|
+
WHERE workflow_id = :wf_id AND task_index = :idx AND status = 'PENDING'
|
|
1459
|
+
RETURNING task_index
|
|
1460
|
+
"""),
|
|
1461
|
+
{'wf_id': workflow_id, 'idx': task_index},
|
|
1462
|
+
)
|
|
1463
|
+
if ready_result.fetchone() is None:
|
|
1464
|
+
return # Already processed by another worker
|
|
1465
|
+
|
|
1466
|
+
# 5. For join="all", check failed deps and skip if not allow_failed_deps
|
|
1467
|
+
# For any/quorum, we don't skip on failed deps (they're expected)
|
|
1468
|
+
if join_type == 'all':
|
|
1469
|
+
failed_or_skipped = failed + skipped
|
|
1470
|
+
if failed_or_skipped > 0 and not allow_failed_deps:
|
|
1471
|
+
await session.execute(
|
|
1472
|
+
text("""
|
|
1473
|
+
UPDATE horsies_workflow_tasks
|
|
1474
|
+
SET status = 'SKIPPED'
|
|
1475
|
+
WHERE workflow_id = :wf_id AND task_index = :idx AND status = 'READY'
|
|
1476
|
+
"""),
|
|
1477
|
+
{'wf_id': workflow_id, 'idx': task_index},
|
|
1478
|
+
)
|
|
1479
|
+
await _process_dependents(session, workflow_id, task_index, broker)
|
|
1480
|
+
return
|
|
1481
|
+
|
|
1482
|
+
# 6. Evaluate conditions (run_when/skip_when) if set
|
|
1483
|
+
# Requires workflow name to look up TaskNode from registry
|
|
1484
|
+
workflow_name_result = await session.execute(
|
|
1485
|
+
text('SELECT name FROM horsies_workflows WHERE id = :wf_id'),
|
|
1486
|
+
{'wf_id': workflow_id},
|
|
1487
|
+
)
|
|
1488
|
+
workflow_name_row = workflow_name_result.fetchone()
|
|
1489
|
+
workflow_name = workflow_name_row[0] if workflow_name_row else None
|
|
1490
|
+
|
|
1491
|
+
if workflow_name:
|
|
1492
|
+
should_skip_condition = await _evaluate_conditions(
|
|
1493
|
+
session, workflow_id, workflow_name, task_index, dependencies
|
|
1494
|
+
)
|
|
1495
|
+
if should_skip_condition:
|
|
1496
|
+
await session.execute(
|
|
1497
|
+
text("""
|
|
1498
|
+
UPDATE horsies_workflow_tasks
|
|
1499
|
+
SET status = 'SKIPPED'
|
|
1500
|
+
WHERE workflow_id = :wf_id AND task_index = :idx AND status = 'READY'
|
|
1501
|
+
"""),
|
|
1502
|
+
{'wf_id': workflow_id, 'idx': task_index},
|
|
1503
|
+
)
|
|
1504
|
+
await _process_dependents(session, workflow_id, task_index, broker)
|
|
1505
|
+
return
|
|
1506
|
+
|
|
1507
|
+
# 7. Fetch dependency results and enqueue
|
|
1508
|
+
dep_results = await _get_dependency_results(session, workflow_id, dependencies)
|
|
1509
|
+
|
|
1510
|
+
if is_subworkflow and broker is not None:
|
|
1511
|
+
# SubWorkflowNode: start child workflow
|
|
1512
|
+
actual_root = root_workflow_id or workflow_id
|
|
1513
|
+
await _enqueue_subworkflow_task(
|
|
1514
|
+
session, broker, workflow_id, task_index, dep_results, depth, actual_root
|
|
1515
|
+
)
|
|
1516
|
+
else:
|
|
1517
|
+
# Regular TaskNode: enqueue as task
|
|
1518
|
+
await _enqueue_workflow_task(session, workflow_id, task_index, dep_results)
|
|
1519
|
+
|
|
1520
|
+
|
|
1521
|
+
async def _evaluate_conditions(
|
|
1522
|
+
session: AsyncSession,
|
|
1523
|
+
workflow_id: str,
|
|
1524
|
+
workflow_name: str,
|
|
1525
|
+
task_index: int,
|
|
1526
|
+
dependencies: list[int],
|
|
1527
|
+
) -> bool:
|
|
1528
|
+
"""
|
|
1529
|
+
Evaluate run_when/skip_when conditions for a task.
|
|
1530
|
+
|
|
1531
|
+
Returns True if task should be SKIPPED, False to proceed with enqueue.
|
|
1532
|
+
|
|
1533
|
+
Conditions are evaluated in order:
|
|
1534
|
+
1. If skip_when returns True → skip
|
|
1535
|
+
2. Else if run_when returns False → skip
|
|
1536
|
+
3. Otherwise → proceed
|
|
1537
|
+
"""
|
|
1538
|
+
from horsies.core.workflows.registry import get_task_node
|
|
1539
|
+
from horsies.core.models.workflow import WorkflowContext
|
|
1540
|
+
|
|
1541
|
+
wf_def: type[WorkflowDefinition[Any]] | None = None
|
|
1542
|
+
node = get_task_node(workflow_name, task_index)
|
|
1543
|
+
if node is None:
|
|
1544
|
+
# Try to load workflow definition by import path (Option B.1)
|
|
1545
|
+
def_result = await session.execute(
|
|
1546
|
+
text("""
|
|
1547
|
+
SELECT workflow_def_module, workflow_def_qualname
|
|
1548
|
+
FROM horsies_workflows
|
|
1549
|
+
WHERE id = :wf_id
|
|
1550
|
+
"""),
|
|
1551
|
+
{'wf_id': workflow_id},
|
|
1552
|
+
)
|
|
1553
|
+
def_row = def_result.fetchone()
|
|
1554
|
+
if def_row and def_row[0] and def_row[1]:
|
|
1555
|
+
wf_def = _load_workflow_def_from_path(def_row[0], def_row[1])
|
|
1556
|
+
if wf_def is not None:
|
|
1557
|
+
node = _node_from_workflow_def(wf_def, task_index)
|
|
1558
|
+
if node is None:
|
|
1559
|
+
# Node not registered (workflow module not imported in this process)
|
|
1560
|
+
# Proceed without condition evaluation
|
|
1561
|
+
return False
|
|
1562
|
+
|
|
1563
|
+
node_any: AnyNode = node
|
|
1564
|
+
|
|
1565
|
+
if node_any.skip_when is None and node_any.run_when is None:
|
|
1566
|
+
# No conditions to evaluate
|
|
1567
|
+
return False
|
|
1568
|
+
|
|
1569
|
+
# Build WorkflowContext for condition evaluation
|
|
1570
|
+
# Use workflow_ctx_from if set, otherwise use all dependencies
|
|
1571
|
+
ctx_from_ids: list[str]
|
|
1572
|
+
if node_any.workflow_ctx_from:
|
|
1573
|
+
ctx_from_ids = [
|
|
1574
|
+
n.node_id for n in node_any.workflow_ctx_from if n.node_id is not None
|
|
1575
|
+
]
|
|
1576
|
+
else:
|
|
1577
|
+
ctx_from_ids = []
|
|
1578
|
+
for dep_index in dependencies:
|
|
1579
|
+
dep_node = get_task_node(workflow_name, dep_index)
|
|
1580
|
+
if dep_node is None:
|
|
1581
|
+
if 'wf_def' in locals() and wf_def is not None:
|
|
1582
|
+
dep_node = _node_from_workflow_def(wf_def, dep_index)
|
|
1583
|
+
if dep_node and dep_node.node_id is not None:
|
|
1584
|
+
ctx_from_ids.append(dep_node.node_id)
|
|
1585
|
+
|
|
1586
|
+
# Fetch results for context
|
|
1587
|
+
dep_results = await _get_dependency_results_with_names(
|
|
1588
|
+
session, workflow_id, ctx_from_ids
|
|
1589
|
+
)
|
|
1590
|
+
|
|
1591
|
+
# Fetch summaries for SubWorkflowNodes in context
|
|
1592
|
+
summaries_by_id: dict[str, SubWorkflowSummary[Any]] = {}
|
|
1593
|
+
if ctx_from_ids:
|
|
1594
|
+
summary_result = await session.execute(
|
|
1595
|
+
text("""
|
|
1596
|
+
SELECT node_id, sub_workflow_summary
|
|
1597
|
+
FROM horsies_workflow_tasks
|
|
1598
|
+
WHERE workflow_id = :wf_id
|
|
1599
|
+
AND node_id = ANY(:node_ids)
|
|
1600
|
+
AND is_subworkflow = TRUE
|
|
1601
|
+
AND sub_workflow_summary IS NOT NULL
|
|
1602
|
+
"""),
|
|
1603
|
+
{'wf_id': workflow_id, 'node_ids': ctx_from_ids},
|
|
1604
|
+
)
|
|
1605
|
+
for row in summary_result.fetchall():
|
|
1606
|
+
node_id = row[0]
|
|
1607
|
+
summary_json = row[1]
|
|
1608
|
+
if node_id and summary_json:
|
|
1609
|
+
try:
|
|
1610
|
+
parsed = loads_json(summary_json)
|
|
1611
|
+
if isinstance(parsed, dict):
|
|
1612
|
+
summaries_by_id[node_id] = SubWorkflowSummary.from_json(parsed)
|
|
1613
|
+
except Exception:
|
|
1614
|
+
continue
|
|
1615
|
+
|
|
1616
|
+
# Get task name for context
|
|
1617
|
+
task_name = node_any.name
|
|
1618
|
+
|
|
1619
|
+
# Build context
|
|
1620
|
+
ctx = WorkflowContext(
|
|
1621
|
+
workflow_id=workflow_id,
|
|
1622
|
+
task_index=task_index,
|
|
1623
|
+
task_name=task_name,
|
|
1624
|
+
results_by_id=dep_results.by_id,
|
|
1625
|
+
summaries_by_id=summaries_by_id,
|
|
1626
|
+
)
|
|
1627
|
+
|
|
1628
|
+
# Evaluate conditions
|
|
1629
|
+
try:
|
|
1630
|
+
if node_any.skip_when is not None and node_any.skip_when(ctx):
|
|
1631
|
+
return True # Skip
|
|
1632
|
+
if node_any.run_when is not None and not node_any.run_when(ctx):
|
|
1633
|
+
return True # Skip (run_when returned False)
|
|
1634
|
+
except Exception as e:
|
|
1635
|
+
# Condition evaluation failed - log and proceed with skip
|
|
1636
|
+
logger.warning(f'Condition evaluation failed for task {task_name}: {e}')
|
|
1637
|
+
return True # Skip on error (safer default)
|
|
1638
|
+
|
|
1639
|
+
return False # Proceed with enqueue
|
|
1640
|
+
|
|
1641
|
+
|
|
1642
|
+
class DependencyResults:
|
|
1643
|
+
"""Container for dependency results with index, name, and node_id mappings."""
|
|
1644
|
+
|
|
1645
|
+
def __init__(self) -> None:
|
|
1646
|
+
self.by_index: dict[int, 'TaskResult[Any, TaskError]'] = {}
|
|
1647
|
+
self.by_name: dict[str, 'TaskResult[Any, TaskError]'] = {}
|
|
1648
|
+
self.by_id: dict[str, 'TaskResult[Any, TaskError]'] = {}
|
|
1649
|
+
|
|
1650
|
+
|
|
1651
|
+
async def _get_dependency_results(
|
|
1652
|
+
session: AsyncSession,
|
|
1653
|
+
workflow_id: str,
|
|
1654
|
+
dependency_indices: list[int],
|
|
1655
|
+
) -> dict[int, 'TaskResult[Any, TaskError]']:
|
|
1656
|
+
"""
|
|
1657
|
+
Fetch TaskResults for dependencies in terminal states.
|
|
1658
|
+
|
|
1659
|
+
- COMPLETED/FAILED: returns actual TaskResult from stored result
|
|
1660
|
+
- SKIPPED: returns sentinel TaskResult with UPSTREAM_SKIPPED error
|
|
1661
|
+
"""
|
|
1662
|
+
from horsies.core.models.tasks import LibraryErrorCode, TaskError, TaskResult
|
|
1663
|
+
|
|
1664
|
+
if not dependency_indices:
|
|
1665
|
+
return {}
|
|
1666
|
+
|
|
1667
|
+
result = await session.execute(
|
|
1668
|
+
text("""
|
|
1669
|
+
SELECT task_index, status, result
|
|
1670
|
+
FROM horsies_workflow_tasks
|
|
1671
|
+
WHERE workflow_id = :wf_id
|
|
1672
|
+
AND task_index = ANY(:indices)
|
|
1673
|
+
AND status = ANY(:wf_task_terminal_states)
|
|
1674
|
+
"""),
|
|
1675
|
+
{
|
|
1676
|
+
'wf_id': workflow_id,
|
|
1677
|
+
'indices': dependency_indices,
|
|
1678
|
+
'wf_task_terminal_states': _WF_TASK_TERMINAL_VALUES,
|
|
1679
|
+
},
|
|
1680
|
+
)
|
|
1681
|
+
|
|
1682
|
+
results: dict[int, TaskResult[Any, TaskError]] = {}
|
|
1683
|
+
for row in result.fetchall():
|
|
1684
|
+
task_index = row[0]
|
|
1685
|
+
status = row[1]
|
|
1686
|
+
stored_result = row[2]
|
|
1687
|
+
|
|
1688
|
+
if status == WorkflowTaskStatus.SKIPPED.value:
|
|
1689
|
+
# Inject sentinel TaskResult for SKIPPED dependencies
|
|
1690
|
+
results[task_index] = TaskResult(
|
|
1691
|
+
err=TaskError(
|
|
1692
|
+
error_code=LibraryErrorCode.UPSTREAM_SKIPPED,
|
|
1693
|
+
message='Upstream dependency was SKIPPED',
|
|
1694
|
+
data={'dependency_index': task_index},
|
|
1695
|
+
)
|
|
1696
|
+
)
|
|
1697
|
+
elif stored_result:
|
|
1698
|
+
results[task_index] = task_result_from_json(loads_json(stored_result))
|
|
1699
|
+
|
|
1700
|
+
return results
|
|
1701
|
+
|
|
1702
|
+
|
|
1703
|
+
async def _get_dependency_results_with_names(
|
|
1704
|
+
session: AsyncSession,
|
|
1705
|
+
workflow_id: str,
|
|
1706
|
+
dependency_node_ids: list[str],
|
|
1707
|
+
) -> DependencyResults:
|
|
1708
|
+
"""
|
|
1709
|
+
Fetch TaskResults with index, name, and node_id mappings.
|
|
1710
|
+
Used for building WorkflowContext.
|
|
1711
|
+
|
|
1712
|
+
- COMPLETED/FAILED: returns actual TaskResult from stored result
|
|
1713
|
+
- SKIPPED: returns sentinel TaskResult with UPSTREAM_SKIPPED error
|
|
1714
|
+
"""
|
|
1715
|
+
from horsies.core.models.tasks import TaskError, LibraryErrorCode, TaskResult
|
|
1716
|
+
|
|
1717
|
+
dep_results = DependencyResults()
|
|
1718
|
+
|
|
1719
|
+
if not dependency_node_ids:
|
|
1720
|
+
return dep_results
|
|
1721
|
+
|
|
1722
|
+
result = await session.execute(
|
|
1723
|
+
text("""
|
|
1724
|
+
SELECT task_index, task_name, node_id, status, result
|
|
1725
|
+
FROM horsies_workflow_tasks
|
|
1726
|
+
WHERE workflow_id = :wf_id
|
|
1727
|
+
AND node_id = ANY(:node_ids)
|
|
1728
|
+
AND status = ANY(:wf_task_terminal_states)
|
|
1729
|
+
"""),
|
|
1730
|
+
{
|
|
1731
|
+
'wf_id': workflow_id,
|
|
1732
|
+
'node_ids': dependency_node_ids,
|
|
1733
|
+
'wf_task_terminal_states': _WF_TASK_TERMINAL_VALUES,
|
|
1734
|
+
},
|
|
1735
|
+
)
|
|
1736
|
+
|
|
1737
|
+
for row in result.fetchall():
|
|
1738
|
+
task_index = row[0]
|
|
1739
|
+
task_name = row[1]
|
|
1740
|
+
node_id = row[2]
|
|
1741
|
+
status = row[3]
|
|
1742
|
+
stored_result = row[4]
|
|
1743
|
+
|
|
1744
|
+
if status == WorkflowTaskStatus.SKIPPED.value:
|
|
1745
|
+
# Inject sentinel TaskResult for SKIPPED dependencies
|
|
1746
|
+
task_result: TaskResult[Any, TaskError] = TaskResult(
|
|
1747
|
+
err=TaskError(
|
|
1748
|
+
error_code=LibraryErrorCode.UPSTREAM_SKIPPED,
|
|
1749
|
+
message='Upstream dependency was SKIPPED',
|
|
1750
|
+
data={'dependency_index': task_index},
|
|
1751
|
+
)
|
|
1752
|
+
)
|
|
1753
|
+
elif stored_result:
|
|
1754
|
+
task_result = task_result_from_json(loads_json(stored_result))
|
|
1755
|
+
else:
|
|
1756
|
+
continue # No result to include
|
|
1757
|
+
|
|
1758
|
+
dep_results.by_index[task_index] = task_result
|
|
1759
|
+
if node_id is not None:
|
|
1760
|
+
dep_results.by_id[node_id] = task_result
|
|
1761
|
+
# Use unique key to avoid collisions when same task appears multiple times
|
|
1762
|
+
unique_key = f'{task_name}#{task_index}'
|
|
1763
|
+
dep_results.by_name[unique_key] = task_result
|
|
1764
|
+
|
|
1765
|
+
return dep_results
|
|
1766
|
+
|
|
1767
|
+
|
|
1768
|
+
async def _check_workflow_completion(
|
|
1769
|
+
session: AsyncSession,
|
|
1770
|
+
workflow_id: str,
|
|
1771
|
+
broker: 'PostgresBroker | None' = None,
|
|
1772
|
+
) -> None:
|
|
1773
|
+
"""
|
|
1774
|
+
Check if all workflow tasks are complete and update workflow status.
|
|
1775
|
+
Handles cases where workflow may already be FAILED but DAG is still resolving.
|
|
1776
|
+
|
|
1777
|
+
If success_policy is set, evaluates success cases instead of "any failure → FAILED".
|
|
1778
|
+
"""
|
|
1779
|
+
# Get current workflow status, error, success_policy, and task counts
|
|
1780
|
+
result = await session.execute(
|
|
1781
|
+
text("""
|
|
1782
|
+
SELECT
|
|
1783
|
+
w.status,
|
|
1784
|
+
w.completed_at,
|
|
1785
|
+
w.error,
|
|
1786
|
+
w.success_policy,
|
|
1787
|
+
w.name,
|
|
1788
|
+
COUNT(*) FILTER (WHERE NOT (wt.status = ANY(:wf_task_terminal_states))) as incomplete,
|
|
1789
|
+
COUNT(*) FILTER (WHERE wt.status = 'FAILED') as failed,
|
|
1790
|
+
COUNT(*) FILTER (WHERE wt.status = 'COMPLETED') as completed,
|
|
1791
|
+
COUNT(*) as total
|
|
1792
|
+
FROM horsies_workflows w
|
|
1793
|
+
LEFT JOIN horsies_workflow_tasks wt ON wt.workflow_id = w.id
|
|
1794
|
+
WHERE w.id = :wf_id
|
|
1795
|
+
GROUP BY w.id, w.status, w.completed_at, w.error, w.success_policy, w.name
|
|
1796
|
+
"""),
|
|
1797
|
+
{
|
|
1798
|
+
'wf_id': workflow_id,
|
|
1799
|
+
'wf_task_terminal_states': _WF_TASK_TERMINAL_VALUES,
|
|
1800
|
+
},
|
|
1801
|
+
)
|
|
1802
|
+
|
|
1803
|
+
row = result.fetchone()
|
|
1804
|
+
if row is None:
|
|
1805
|
+
return
|
|
1806
|
+
|
|
1807
|
+
current_status = row[0]
|
|
1808
|
+
already_completed = row[1] is not None
|
|
1809
|
+
has_error = row[2] is not None
|
|
1810
|
+
success_policy_data = row[3] # JSONB, may be None
|
|
1811
|
+
workflow_name = row[4]
|
|
1812
|
+
incomplete = row[5] or 0
|
|
1813
|
+
failed = row[6] or 0
|
|
1814
|
+
completed = row[7] or 0
|
|
1815
|
+
total = row[8] or 0
|
|
1816
|
+
|
|
1817
|
+
# Don't process if workflow is PAUSED (waiting for manual intervention)
|
|
1818
|
+
if current_status == 'PAUSED':
|
|
1819
|
+
return
|
|
1820
|
+
|
|
1821
|
+
if incomplete > 0:
|
|
1822
|
+
return # Still running
|
|
1823
|
+
|
|
1824
|
+
if already_completed:
|
|
1825
|
+
return # Already finalized
|
|
1826
|
+
|
|
1827
|
+
# All tasks done - determine final result
|
|
1828
|
+
final_result = await _get_workflow_final_result(session, workflow_id)
|
|
1829
|
+
|
|
1830
|
+
# Determine final status using success policy or default behavior
|
|
1831
|
+
workflow_succeeded = await _evaluate_workflow_success(
|
|
1832
|
+
session, workflow_id, success_policy_data, has_error, failed
|
|
1833
|
+
)
|
|
1834
|
+
|
|
1835
|
+
if workflow_succeeded:
|
|
1836
|
+
await session.execute(
|
|
1837
|
+
text("""
|
|
1838
|
+
UPDATE horsies_workflows
|
|
1839
|
+
SET status = 'COMPLETED', result = :result, completed_at = NOW(), updated_at = NOW()
|
|
1840
|
+
WHERE id = :wf_id AND completed_at IS NULL
|
|
1841
|
+
"""),
|
|
1842
|
+
{'wf_id': workflow_id, 'result': final_result},
|
|
1843
|
+
)
|
|
1844
|
+
logger.info(
|
|
1845
|
+
f"Workflow '{workflow_name}' ({workflow_id[:8]}) COMPLETED: "
|
|
1846
|
+
f"{completed}/{total} tasks succeeded, {failed} failed"
|
|
1847
|
+
)
|
|
1848
|
+
else:
|
|
1849
|
+
# Compute error based on success_policy semantics
|
|
1850
|
+
# With success_policy: always recompute to reflect required task failures
|
|
1851
|
+
# Without success_policy: use existing error if already set
|
|
1852
|
+
error_json: str | None = None
|
|
1853
|
+
if success_policy_data is not None or not has_error:
|
|
1854
|
+
error_json = await _get_workflow_failure_error(
|
|
1855
|
+
session, workflow_id, success_policy_data
|
|
1856
|
+
)
|
|
1857
|
+
|
|
1858
|
+
await session.execute(
|
|
1859
|
+
text("""
|
|
1860
|
+
UPDATE horsies_workflows
|
|
1861
|
+
SET status = 'FAILED', result = :result,
|
|
1862
|
+
error = COALESCE(:error, error),
|
|
1863
|
+
completed_at = NOW(), updated_at = NOW()
|
|
1864
|
+
WHERE id = :wf_id AND completed_at IS NULL
|
|
1865
|
+
"""),
|
|
1866
|
+
{'wf_id': workflow_id, 'result': final_result, 'error': error_json},
|
|
1867
|
+
)
|
|
1868
|
+
logger.info(
|
|
1869
|
+
f"Workflow '{workflow_name}' ({workflow_id[:8]}) FAILED: "
|
|
1870
|
+
f"{completed}/{total} tasks succeeded, {failed} failed"
|
|
1871
|
+
)
|
|
1872
|
+
|
|
1873
|
+
# Send NOTIFY for workflow completion
|
|
1874
|
+
await session.execute(
|
|
1875
|
+
text("SELECT pg_notify('workflow_done', :wf_id)"),
|
|
1876
|
+
{'wf_id': workflow_id},
|
|
1877
|
+
)
|
|
1878
|
+
|
|
1879
|
+
# If this is a child workflow, notify parent
|
|
1880
|
+
parent_result = await session.execute(
|
|
1881
|
+
text("""
|
|
1882
|
+
SELECT parent_workflow_id, parent_task_index
|
|
1883
|
+
FROM horsies_workflows WHERE id = :wf_id
|
|
1884
|
+
"""),
|
|
1885
|
+
{'wf_id': workflow_id},
|
|
1886
|
+
)
|
|
1887
|
+
parent_row = parent_result.fetchone()
|
|
1888
|
+
if parent_row and parent_row[0] is not None:
|
|
1889
|
+
# This is a child workflow - notify parent
|
|
1890
|
+
await _on_subworkflow_complete(session, workflow_id, broker)
|
|
1891
|
+
|
|
1892
|
+
|
|
1893
|
+
async def _on_subworkflow_complete(
|
|
1894
|
+
session: AsyncSession,
|
|
1895
|
+
child_workflow_id: str,
|
|
1896
|
+
broker: 'PostgresBroker | None' = None,
|
|
1897
|
+
) -> None:
|
|
1898
|
+
"""
|
|
1899
|
+
Called when a child workflow completes.
|
|
1900
|
+
Updates parent node status and propagates to parent DAG.
|
|
1901
|
+
"""
|
|
1902
|
+
from horsies.core.models.tasks import TaskResult, SubWorkflowError
|
|
1903
|
+
|
|
1904
|
+
# 1. Get child workflow info and task counts
|
|
1905
|
+
child_result = await session.execute(
|
|
1906
|
+
text("""
|
|
1907
|
+
SELECT w.status, w.result, w.error, w.parent_workflow_id, w.parent_task_index,
|
|
1908
|
+
(SELECT COUNT(*) FROM horsies_workflow_tasks WHERE workflow_id = w.id) as total,
|
|
1909
|
+
(SELECT COUNT(*) FROM horsies_workflow_tasks WHERE workflow_id = w.id AND status = 'COMPLETED') as completed,
|
|
1910
|
+
(SELECT COUNT(*) FROM horsies_workflow_tasks WHERE workflow_id = w.id AND status = 'FAILED') as failed,
|
|
1911
|
+
(SELECT COUNT(*) FROM horsies_workflow_tasks WHERE workflow_id = w.id AND status = 'SKIPPED') as skipped
|
|
1912
|
+
FROM horsies_workflows w
|
|
1913
|
+
WHERE w.id = :child_id
|
|
1914
|
+
"""),
|
|
1915
|
+
{'child_id': child_workflow_id},
|
|
1916
|
+
)
|
|
1917
|
+
|
|
1918
|
+
row = child_result.fetchone()
|
|
1919
|
+
if row is None:
|
|
1920
|
+
logger.error(f'Child workflow {child_workflow_id} not found')
|
|
1921
|
+
return
|
|
1922
|
+
|
|
1923
|
+
child_status = row[0]
|
|
1924
|
+
child_result_json = row[1]
|
|
1925
|
+
child_error = row[2]
|
|
1926
|
+
parent_wf_id = row[3]
|
|
1927
|
+
parent_task_idx = row[4]
|
|
1928
|
+
total_tasks = row[5] or 0
|
|
1929
|
+
completed_tasks = row[6] or 0
|
|
1930
|
+
failed_tasks = row[7] or 0
|
|
1931
|
+
skipped_tasks = row[8] or 0
|
|
1932
|
+
|
|
1933
|
+
if parent_wf_id is None:
|
|
1934
|
+
# Not a child workflow (or already detached)
|
|
1935
|
+
return
|
|
1936
|
+
|
|
1937
|
+
# 2. Build SubWorkflowSummary
|
|
1938
|
+
child_output: Any = None
|
|
1939
|
+
if child_result_json:
|
|
1940
|
+
try:
|
|
1941
|
+
parsed_result = task_result_from_json(loads_json(child_result_json))
|
|
1942
|
+
if parsed_result.is_ok():
|
|
1943
|
+
child_output = parsed_result.ok
|
|
1944
|
+
except Exception:
|
|
1945
|
+
pass
|
|
1946
|
+
|
|
1947
|
+
error_summary: str | None = None
|
|
1948
|
+
if child_error:
|
|
1949
|
+
try:
|
|
1950
|
+
error_data = loads_json(child_error)
|
|
1951
|
+
if isinstance(error_data, dict):
|
|
1952
|
+
msg = error_data.get('message')
|
|
1953
|
+
if isinstance(msg, str):
|
|
1954
|
+
error_summary = msg
|
|
1955
|
+
except Exception:
|
|
1956
|
+
error_summary = str(child_error)[:200]
|
|
1957
|
+
|
|
1958
|
+
child_summary = SubWorkflowSummary(
|
|
1959
|
+
status=WorkflowStatus(child_status),
|
|
1960
|
+
success_case=None, # TODO: extract from child if success_policy matched
|
|
1961
|
+
output=child_output,
|
|
1962
|
+
total_tasks=total_tasks,
|
|
1963
|
+
completed_tasks=completed_tasks,
|
|
1964
|
+
failed_tasks=failed_tasks,
|
|
1965
|
+
skipped_tasks=skipped_tasks,
|
|
1966
|
+
error_summary=error_summary,
|
|
1967
|
+
)
|
|
1968
|
+
|
|
1969
|
+
# 3. Determine parent node status and result
|
|
1970
|
+
if child_status == 'COMPLETED':
|
|
1971
|
+
parent_node_status = 'COMPLETED'
|
|
1972
|
+
# Pass through child's output as TaskResult
|
|
1973
|
+
parent_node_result = child_result_json
|
|
1974
|
+
else:
|
|
1975
|
+
parent_node_status = 'FAILED'
|
|
1976
|
+
# Create SubWorkflowError
|
|
1977
|
+
error = SubWorkflowError(
|
|
1978
|
+
error_code='SUBWORKFLOW_FAILED',
|
|
1979
|
+
message=f'Subworkflow {child_workflow_id} failed with status {child_status}',
|
|
1980
|
+
sub_workflow_id=child_workflow_id,
|
|
1981
|
+
sub_workflow_summary=child_summary,
|
|
1982
|
+
)
|
|
1983
|
+
parent_node_result = dumps_json(TaskResult(err=error))
|
|
1984
|
+
|
|
1985
|
+
# 4. Update parent node
|
|
1986
|
+
await session.execute(
|
|
1987
|
+
text("""
|
|
1988
|
+
UPDATE horsies_workflow_tasks
|
|
1989
|
+
SET status = :status, result = :result, sub_workflow_summary = :summary, completed_at = NOW()
|
|
1990
|
+
WHERE workflow_id = :wf_id AND task_index = :idx
|
|
1991
|
+
"""),
|
|
1992
|
+
{
|
|
1993
|
+
'status': parent_node_status,
|
|
1994
|
+
'result': parent_node_result,
|
|
1995
|
+
'summary': dumps_json(child_summary),
|
|
1996
|
+
'wf_id': parent_wf_id,
|
|
1997
|
+
'idx': parent_task_idx,
|
|
1998
|
+
},
|
|
1999
|
+
)
|
|
2000
|
+
|
|
2001
|
+
# 5. Handle failure (same as task failure)
|
|
2002
|
+
if parent_node_status == 'FAILED':
|
|
2003
|
+
# Create a TaskResult for the failure handler
|
|
2004
|
+
failure_result: TaskResult[Any, TaskError] = TaskResult(
|
|
2005
|
+
err=SubWorkflowError(
|
|
2006
|
+
error_code='SUBWORKFLOW_FAILED',
|
|
2007
|
+
message=f'Subworkflow {child_workflow_id} failed',
|
|
2008
|
+
sub_workflow_id=child_workflow_id,
|
|
2009
|
+
sub_workflow_summary=child_summary,
|
|
2010
|
+
)
|
|
2011
|
+
)
|
|
2012
|
+
should_continue = await _handle_workflow_task_failure(
|
|
2013
|
+
session, parent_wf_id, parent_task_idx, failure_result
|
|
2014
|
+
)
|
|
2015
|
+
if not should_continue:
|
|
2016
|
+
# PAUSE mode - stop processing
|
|
2017
|
+
return
|
|
2018
|
+
|
|
2019
|
+
# 6. Check if parent workflow is PAUSED
|
|
2020
|
+
parent_status_check = await session.execute(
|
|
2021
|
+
text('SELECT status FROM horsies_workflows WHERE id = :wf_id'),
|
|
2022
|
+
{'wf_id': parent_wf_id},
|
|
2023
|
+
)
|
|
2024
|
+
parent_status_row = parent_status_check.fetchone()
|
|
2025
|
+
if parent_status_row and parent_status_row[0] == 'PAUSED':
|
|
2026
|
+
return # Don't propagate - parent is paused
|
|
2027
|
+
|
|
2028
|
+
# 7. Process parent dependents
|
|
2029
|
+
await _process_dependents(session, parent_wf_id, parent_task_idx, broker)
|
|
2030
|
+
|
|
2031
|
+
# 8. Check parent completion
|
|
2032
|
+
await _check_workflow_completion(session, parent_wf_id, broker)
|
|
2033
|
+
|
|
2034
|
+
|
|
2035
|
+
async def _evaluate_workflow_success(
|
|
2036
|
+
session: AsyncSession,
|
|
2037
|
+
workflow_id: str,
|
|
2038
|
+
success_policy_data: dict[str, Any] | str | None,
|
|
2039
|
+
has_error: bool,
|
|
2040
|
+
failed: int,
|
|
2041
|
+
) -> bool:
|
|
2042
|
+
"""
|
|
2043
|
+
Evaluate whether workflow succeeded.
|
|
2044
|
+
|
|
2045
|
+
If success_policy is None: default behavior (any failed → False)
|
|
2046
|
+
If success_policy is set: True if any SuccessCase is satisfied
|
|
2047
|
+
"""
|
|
2048
|
+
if success_policy_data is None:
|
|
2049
|
+
# Default behavior: succeed only if no failures and no stored error
|
|
2050
|
+
return not has_error and failed == 0
|
|
2051
|
+
|
|
2052
|
+
# Guard: JSONB may come back as string depending on driver
|
|
2053
|
+
policy: dict[str, Any]
|
|
2054
|
+
if isinstance(success_policy_data, str):
|
|
2055
|
+
policy = loads_json(success_policy_data) # type: ignore[assignment]
|
|
2056
|
+
else:
|
|
2057
|
+
policy = success_policy_data
|
|
2058
|
+
|
|
2059
|
+
# Build status map by task_index
|
|
2060
|
+
result = await session.execute(
|
|
2061
|
+
text("""
|
|
2062
|
+
SELECT task_index, status
|
|
2063
|
+
FROM horsies_workflow_tasks
|
|
2064
|
+
WHERE workflow_id = :wf_id
|
|
2065
|
+
"""),
|
|
2066
|
+
{'wf_id': workflow_id},
|
|
2067
|
+
)
|
|
2068
|
+
|
|
2069
|
+
status_by_index: dict[int, str] = {row[0]: row[1] for row in result.fetchall()}
|
|
2070
|
+
|
|
2071
|
+
# Note: optional_indices from success_policy are ignored here because
|
|
2072
|
+
# optional tasks can fail without affecting whether a success case is satisfied.
|
|
2073
|
+
# We only check if required tasks in each case are COMPLETED.
|
|
2074
|
+
|
|
2075
|
+
# Evaluate each success case
|
|
2076
|
+
cases = policy.get('cases', [])
|
|
2077
|
+
for case in cases:
|
|
2078
|
+
required_indices = case.get('required_indices', [])
|
|
2079
|
+
if not required_indices:
|
|
2080
|
+
continue
|
|
2081
|
+
|
|
2082
|
+
# Case is satisfied if ALL required tasks are COMPLETED
|
|
2083
|
+
all_completed = all(
|
|
2084
|
+
status_by_index.get(idx) == 'COMPLETED' for idx in required_indices
|
|
2085
|
+
)
|
|
2086
|
+
if all_completed:
|
|
2087
|
+
return True
|
|
2088
|
+
|
|
2089
|
+
# No case satisfied
|
|
2090
|
+
return False
|
|
2091
|
+
|
|
2092
|
+
|
|
2093
|
+
async def _get_workflow_failure_error(
|
|
2094
|
+
session: AsyncSession,
|
|
2095
|
+
workflow_id: str,
|
|
2096
|
+
success_policy_data: dict[str, Any] | str | None,
|
|
2097
|
+
) -> str | None:
|
|
2098
|
+
"""
|
|
2099
|
+
Get error for a failed workflow.
|
|
2100
|
+
|
|
2101
|
+
If success_policy is set but no case satisfied, returns WORKFLOW_SUCCESS_CASE_NOT_MET.
|
|
2102
|
+
Otherwise, returns the first failed required task's error.
|
|
2103
|
+
"""
|
|
2104
|
+
from horsies.core.models.tasks import LibraryErrorCode, TaskError
|
|
2105
|
+
|
|
2106
|
+
if success_policy_data is None:
|
|
2107
|
+
# Default: get first failed task's error
|
|
2108
|
+
result = await session.execute(
|
|
2109
|
+
text("""
|
|
2110
|
+
SELECT result FROM horsies_workflow_tasks
|
|
2111
|
+
WHERE workflow_id = :wf_id AND status = 'FAILED'
|
|
2112
|
+
ORDER BY task_index ASC LIMIT 1
|
|
2113
|
+
"""),
|
|
2114
|
+
{'wf_id': workflow_id},
|
|
2115
|
+
)
|
|
2116
|
+
row = result.fetchone()
|
|
2117
|
+
if row and row[0]:
|
|
2118
|
+
task_result = task_result_from_json(loads_json(row[0]))
|
|
2119
|
+
if task_result.is_err() and task_result.err:
|
|
2120
|
+
return dumps_json(task_result.err)
|
|
2121
|
+
return None
|
|
2122
|
+
|
|
2123
|
+
# Guard: JSONB may come back as string depending on driver
|
|
2124
|
+
policy: dict[str, Any]
|
|
2125
|
+
if isinstance(success_policy_data, str):
|
|
2126
|
+
policy = loads_json(success_policy_data) # type: ignore[assignment]
|
|
2127
|
+
else:
|
|
2128
|
+
policy = success_policy_data
|
|
2129
|
+
|
|
2130
|
+
# With success_policy: find first failed required task or use sentinel error
|
|
2131
|
+
# Collect all required indices across all cases
|
|
2132
|
+
all_required: set[int] = set()
|
|
2133
|
+
for case in policy.get('cases', []):
|
|
2134
|
+
all_required.update(case.get('required_indices', []))
|
|
2135
|
+
|
|
2136
|
+
if all_required:
|
|
2137
|
+
# Get first failed required task
|
|
2138
|
+
result = await session.execute(
|
|
2139
|
+
text("""
|
|
2140
|
+
SELECT result FROM horsies_workflow_tasks
|
|
2141
|
+
WHERE workflow_id = :wf_id
|
|
2142
|
+
AND status = 'FAILED'
|
|
2143
|
+
AND task_index = ANY(:required)
|
|
2144
|
+
ORDER BY task_index ASC LIMIT 1
|
|
2145
|
+
"""),
|
|
2146
|
+
{'wf_id': workflow_id, 'required': list(all_required)},
|
|
2147
|
+
)
|
|
2148
|
+
row = result.fetchone()
|
|
2149
|
+
if row and row[0]:
|
|
2150
|
+
task_result = task_result_from_json(loads_json(row[0]))
|
|
2151
|
+
if task_result.is_err() and task_result.err:
|
|
2152
|
+
return dumps_json(task_result.err)
|
|
2153
|
+
|
|
2154
|
+
# No required task failed, but no case was satisfied (all SKIPPED?)
|
|
2155
|
+
return dumps_json(
|
|
2156
|
+
TaskError(
|
|
2157
|
+
error_code=LibraryErrorCode.WORKFLOW_SUCCESS_CASE_NOT_MET,
|
|
2158
|
+
message='No success case was satisfied',
|
|
2159
|
+
)
|
|
2160
|
+
)
|
|
2161
|
+
|
|
2162
|
+
|
|
2163
|
+
async def _get_workflow_final_result(
|
|
2164
|
+
session: AsyncSession,
|
|
2165
|
+
workflow_id: str,
|
|
2166
|
+
) -> str:
|
|
2167
|
+
"""
|
|
2168
|
+
Get the final workflow result.
|
|
2169
|
+
|
|
2170
|
+
If output_task_index is set: return that task's result
|
|
2171
|
+
Otherwise: return dict of terminal task results (tasks with no dependents)
|
|
2172
|
+
"""
|
|
2173
|
+
# Check for explicit output task
|
|
2174
|
+
wf_result = await session.execute(
|
|
2175
|
+
text('SELECT output_task_index FROM horsies_workflows WHERE id = :wf_id'),
|
|
2176
|
+
{'wf_id': workflow_id},
|
|
2177
|
+
)
|
|
2178
|
+
wf_row = wf_result.fetchone()
|
|
2179
|
+
|
|
2180
|
+
if wf_row and wf_row[0] is not None:
|
|
2181
|
+
# Return explicit output task's result
|
|
2182
|
+
output_result = await session.execute(
|
|
2183
|
+
text("""
|
|
2184
|
+
SELECT result FROM horsies_workflow_tasks
|
|
2185
|
+
WHERE workflow_id = :wf_id AND task_index = :idx
|
|
2186
|
+
"""),
|
|
2187
|
+
{'wf_id': workflow_id, 'idx': wf_row[0]},
|
|
2188
|
+
)
|
|
2189
|
+
output_row = output_result.fetchone()
|
|
2190
|
+
return output_row[0] if output_row and output_row[0] else dumps_json(None)
|
|
2191
|
+
|
|
2192
|
+
# Find terminal tasks (not in any other task's dependencies)
|
|
2193
|
+
terminal_results = await session.execute(
|
|
2194
|
+
text("""
|
|
2195
|
+
SELECT wt.node_id, wt.task_index, wt.result
|
|
2196
|
+
FROM horsies_workflow_tasks wt
|
|
2197
|
+
WHERE wt.workflow_id = :wf_id
|
|
2198
|
+
AND NOT EXISTS (
|
|
2199
|
+
SELECT 1 FROM horsies_workflow_tasks other
|
|
2200
|
+
WHERE other.workflow_id = wt.workflow_id
|
|
2201
|
+
AND wt.task_index = ANY(other.dependencies)
|
|
2202
|
+
)
|
|
2203
|
+
"""),
|
|
2204
|
+
{'wf_id': workflow_id},
|
|
2205
|
+
)
|
|
2206
|
+
|
|
2207
|
+
# Build dict of terminal results, keyed by node_id
|
|
2208
|
+
# This ensures WorkflowHandle.get() returns dict[str, TaskResult], not raw dicts
|
|
2209
|
+
results_dict: dict[str, Any] = {}
|
|
2210
|
+
for row in terminal_results.fetchall():
|
|
2211
|
+
node_id = row[0]
|
|
2212
|
+
if not isinstance(node_id, str):
|
|
2213
|
+
continue
|
|
2214
|
+
unique_key = node_id
|
|
2215
|
+
if row[2]:
|
|
2216
|
+
# Rehydrate to TaskResult and serialize back (will be parsed on get())
|
|
2217
|
+
results_dict[unique_key] = task_result_from_json(loads_json(row[2]))
|
|
2218
|
+
else:
|
|
2219
|
+
results_dict[unique_key] = None
|
|
2220
|
+
|
|
2221
|
+
# Wrap in TaskResult so WorkflowHandle._get_result() can parse it
|
|
2222
|
+
from horsies.core.models.tasks import TaskResult
|
|
2223
|
+
|
|
2224
|
+
wrapped_result: TaskResult[dict[str, Any], Any] = TaskResult(ok=results_dict)
|
|
2225
|
+
return dumps_json(wrapped_result)
|
|
2226
|
+
|
|
2227
|
+
|
|
2228
|
+
async def _handle_workflow_task_failure(
|
|
2229
|
+
session: AsyncSession,
|
|
2230
|
+
workflow_id: str,
|
|
2231
|
+
_task_index: int, # Unused but kept for API consistency
|
|
2232
|
+
result: 'TaskResult[Any, TaskError]',
|
|
2233
|
+
) -> bool:
|
|
2234
|
+
"""
|
|
2235
|
+
Handle a failed workflow task based on on_error policy.
|
|
2236
|
+
|
|
2237
|
+
Returns True if dependency propagation should continue, False to stop (PAUSE mode).
|
|
2238
|
+
|
|
2239
|
+
Note: The failed task's result is already stored. Dependents will receive
|
|
2240
|
+
the TaskResult with is_err()=True if they have args_from pointing to this task.
|
|
2241
|
+
"""
|
|
2242
|
+
# Get workflow's on_error policy
|
|
2243
|
+
wf_result = await session.execute(
|
|
2244
|
+
text('SELECT on_error FROM horsies_workflows WHERE id = :wf_id'),
|
|
2245
|
+
{'wf_id': workflow_id},
|
|
2246
|
+
)
|
|
2247
|
+
|
|
2248
|
+
wf_row = wf_result.fetchone()
|
|
2249
|
+
if wf_row is None:
|
|
2250
|
+
return True
|
|
2251
|
+
|
|
2252
|
+
on_error = wf_row[0]
|
|
2253
|
+
|
|
2254
|
+
# Extract TaskError for storage (not the full TaskResult)
|
|
2255
|
+
error_payload = dumps_json(result.err) if result.is_err() and result.err else None
|
|
2256
|
+
|
|
2257
|
+
if on_error == 'fail':
|
|
2258
|
+
# Store error but keep status RUNNING until DAG fully resolves
|
|
2259
|
+
# This allows allow_failed_deps tasks to run and produce meaningful final result
|
|
2260
|
+
# Status will be set to FAILED in _check_workflow_completion when all tasks are terminal
|
|
2261
|
+
await session.execute(
|
|
2262
|
+
text("""
|
|
2263
|
+
UPDATE horsies_workflows
|
|
2264
|
+
SET error = :error, updated_at = NOW()
|
|
2265
|
+
WHERE id = :wf_id AND status = 'RUNNING'
|
|
2266
|
+
"""),
|
|
2267
|
+
{'wf_id': workflow_id, 'error': error_payload},
|
|
2268
|
+
)
|
|
2269
|
+
return True # Continue dependency propagation
|
|
2270
|
+
|
|
2271
|
+
elif on_error == 'pause':
|
|
2272
|
+
# Pause workflow for manual intervention - STOP all processing
|
|
2273
|
+
await session.execute(
|
|
2274
|
+
text("""
|
|
2275
|
+
UPDATE horsies_workflows
|
|
2276
|
+
SET status = 'PAUSED', error = :error, updated_at = NOW()
|
|
2277
|
+
WHERE id = :wf_id AND status = 'RUNNING'
|
|
2278
|
+
"""),
|
|
2279
|
+
{'wf_id': workflow_id, 'error': error_payload},
|
|
2280
|
+
)
|
|
2281
|
+
|
|
2282
|
+
# Notify of pause (so clients can react via get())
|
|
2283
|
+
await session.execute(
|
|
2284
|
+
text("SELECT pg_notify('workflow_done', :wf_id)"),
|
|
2285
|
+
{'wf_id': workflow_id},
|
|
2286
|
+
)
|
|
2287
|
+
|
|
2288
|
+
return (
|
|
2289
|
+
False # Stop dependency propagation - pending tasks stay pending for resume
|
|
2290
|
+
)
|
|
2291
|
+
|
|
2292
|
+
return True # Default: continue
|
|
2293
|
+
|
|
2294
|
+
|
|
2295
|
+
def _load_workflow_def_from_path(
|
|
2296
|
+
module_path: str,
|
|
2297
|
+
qualname: str,
|
|
2298
|
+
) -> 'type[WorkflowDefinition[Any]] | None':
|
|
2299
|
+
"""
|
|
2300
|
+
Load a SubWorkflowNode via module path fallback when registry is missing.
|
|
2301
|
+
Returns a SubWorkflowNode instance if found, otherwise None.
|
|
2302
|
+
"""
|
|
2303
|
+
try:
|
|
2304
|
+
import importlib
|
|
2305
|
+
|
|
2306
|
+
module = importlib.import_module(module_path)
|
|
2307
|
+
obj: Any = module
|
|
2308
|
+
for attr in qualname.split('.'):
|
|
2309
|
+
obj = getattr(obj, attr, None)
|
|
2310
|
+
if obj is None:
|
|
2311
|
+
return None
|
|
2312
|
+
|
|
2313
|
+
if isinstance(obj, type) and issubclass(obj, WorkflowDefinition):
|
|
2314
|
+
wf_def = cast('type[WorkflowDefinition[Any]]', obj)
|
|
2315
|
+
return wf_def
|
|
2316
|
+
except Exception as exc:
|
|
2317
|
+
logger.error(f'Failed to load subworkflow def {module_path}:{qualname}: {exc}')
|
|
2318
|
+
return None
|
|
2319
|
+
|
|
2320
|
+
return None
|
|
2321
|
+
|
|
2322
|
+
|
|
2323
|
+
def _node_from_workflow_def(
|
|
2324
|
+
workflow_def: type[WorkflowDefinition[Any]],
|
|
2325
|
+
task_index: int,
|
|
2326
|
+
) -> 'AnyNode | None':
|
|
2327
|
+
"""
|
|
2328
|
+
Reconstruct a node by index from a WorkflowDefinition class.
|
|
2329
|
+
|
|
2330
|
+
Assigns indices and node_ids in definition order to mirror WorkflowSpec.
|
|
2331
|
+
"""
|
|
2332
|
+
nodes = workflow_def.get_workflow_nodes()
|
|
2333
|
+
if not nodes:
|
|
2334
|
+
return None
|
|
2335
|
+
|
|
2336
|
+
nodes_typed: list[tuple[str, AnyNode]] = nodes
|
|
2337
|
+
for idx, (attr_name, node) in enumerate(nodes_typed):
|
|
2338
|
+
if node.index is None:
|
|
2339
|
+
node.index = idx
|
|
2340
|
+
if node.node_id is None:
|
|
2341
|
+
node.node_id = attr_name
|
|
2342
|
+
if idx == task_index:
|
|
2343
|
+
return node
|
|
2344
|
+
return None
|