horsies 0.1.0a4__py3-none-any.whl → 0.1.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- horsies/core/app.py +67 -47
- horsies/core/banner.py +27 -27
- horsies/core/brokers/postgres.py +315 -288
- horsies/core/cli.py +7 -2
- horsies/core/errors.py +3 -0
- horsies/core/models/app.py +87 -64
- horsies/core/models/recovery.py +30 -21
- horsies/core/models/schedule.py +30 -19
- horsies/core/models/tasks.py +1 -0
- horsies/core/models/workflow.py +489 -202
- horsies/core/models/workflow_pg.py +3 -1
- horsies/core/scheduler/service.py +5 -1
- horsies/core/scheduler/state.py +39 -27
- horsies/core/task_decorator.py +138 -0
- horsies/core/types/status.py +7 -5
- horsies/core/utils/imports.py +10 -10
- horsies/core/worker/worker.py +197 -139
- horsies/core/workflows/engine.py +487 -352
- horsies/core/workflows/recovery.py +148 -119
- {horsies-0.1.0a4.dist-info → horsies-0.1.0a5.dist-info}/METADATA +1 -1
- horsies-0.1.0a5.dist-info/RECORD +42 -0
- horsies-0.1.0a4.dist-info/RECORD +0 -42
- {horsies-0.1.0a4.dist-info → horsies-0.1.0a5.dist-info}/WHEEL +0 -0
- {horsies-0.1.0a4.dist-info → horsies-0.1.0a5.dist-info}/entry_points.txt +0 -0
- {horsies-0.1.0a4.dist-info → horsies-0.1.0a5.dist-info}/top_level.txt +0 -0
horsies/core/brokers/postgres.py
CHANGED
|
@@ -23,6 +23,289 @@ from horsies.core.logging import get_logger
|
|
|
23
23
|
if TYPE_CHECKING:
|
|
24
24
|
from horsies.core.models.tasks import TaskResult, TaskError
|
|
25
25
|
|
|
26
|
+
# ---- Task notification trigger queries ----
|
|
27
|
+
|
|
28
|
+
CREATE_TASK_NOTIFY_FUNCTION_SQL = text("""
|
|
29
|
+
CREATE OR REPLACE FUNCTION horsies_notify_task_changes()
|
|
30
|
+
RETURNS trigger AS $$
|
|
31
|
+
BEGIN
|
|
32
|
+
IF TG_OP = 'INSERT' AND NEW.status = 'PENDING' THEN
|
|
33
|
+
-- New task notifications: wake up workers
|
|
34
|
+
PERFORM pg_notify('task_new', NEW.id); -- Global worker notification
|
|
35
|
+
PERFORM pg_notify('task_queue_' || NEW.queue_name, NEW.id); -- Queue-specific notification
|
|
36
|
+
ELSIF TG_OP = 'UPDATE' AND OLD.status != NEW.status THEN
|
|
37
|
+
-- Task completion notifications: wake up result waiters
|
|
38
|
+
IF NEW.status IN ('COMPLETED', 'FAILED') THEN
|
|
39
|
+
PERFORM pg_notify('task_done', NEW.id); -- Send task_id as payload
|
|
40
|
+
END IF;
|
|
41
|
+
END IF;
|
|
42
|
+
RETURN NEW;
|
|
43
|
+
END;
|
|
44
|
+
$$ LANGUAGE plpgsql;
|
|
45
|
+
""")
|
|
46
|
+
|
|
47
|
+
CREATE_TASK_NOTIFY_TRIGGER_SQL = text("""
|
|
48
|
+
DROP TRIGGER IF EXISTS horsies_task_notify_trigger ON horsies_tasks;
|
|
49
|
+
CREATE TRIGGER horsies_task_notify_trigger
|
|
50
|
+
AFTER INSERT OR UPDATE ON horsies_tasks
|
|
51
|
+
FOR EACH ROW
|
|
52
|
+
EXECUTE FUNCTION horsies_notify_task_changes();
|
|
53
|
+
""")
|
|
54
|
+
|
|
55
|
+
# ---- Workflow schema queries ----
|
|
56
|
+
|
|
57
|
+
CREATE_WORKFLOW_TASKS_DEPS_INDEX_SQL = text("""
|
|
58
|
+
CREATE INDEX IF NOT EXISTS idx_horsies_workflow_tasks_deps
|
|
59
|
+
ON horsies_workflow_tasks USING GIN(dependencies);
|
|
60
|
+
""")
|
|
61
|
+
|
|
62
|
+
# Schema migration queries
|
|
63
|
+
|
|
64
|
+
ADD_TASK_OPTIONS_COLUMN_SQL = text("""
|
|
65
|
+
ALTER TABLE horsies_workflow_tasks
|
|
66
|
+
ADD COLUMN IF NOT EXISTS task_options TEXT;
|
|
67
|
+
""")
|
|
68
|
+
|
|
69
|
+
ADD_SUCCESS_POLICY_COLUMN_SQL = text("""
|
|
70
|
+
ALTER TABLE horsies_workflows
|
|
71
|
+
ADD COLUMN IF NOT EXISTS success_policy JSONB;
|
|
72
|
+
""")
|
|
73
|
+
|
|
74
|
+
ADD_JOIN_TYPE_COLUMN_SQL = text("""
|
|
75
|
+
ALTER TABLE horsies_workflow_tasks
|
|
76
|
+
ADD COLUMN IF NOT EXISTS join_type VARCHAR(10) NOT NULL DEFAULT 'all';
|
|
77
|
+
""")
|
|
78
|
+
|
|
79
|
+
ADD_MIN_SUCCESS_COLUMN_SQL = text("""
|
|
80
|
+
ALTER TABLE horsies_workflow_tasks
|
|
81
|
+
ADD COLUMN IF NOT EXISTS min_success INTEGER;
|
|
82
|
+
""")
|
|
83
|
+
|
|
84
|
+
ADD_NODE_ID_COLUMN_SQL = text("""
|
|
85
|
+
ALTER TABLE horsies_workflow_tasks
|
|
86
|
+
ADD COLUMN IF NOT EXISTS node_id VARCHAR(128);
|
|
87
|
+
""")
|
|
88
|
+
|
|
89
|
+
ALTER_WORKFLOW_CTX_FROM_TYPE_SQL = text("""
|
|
90
|
+
ALTER TABLE horsies_workflow_tasks
|
|
91
|
+
ALTER COLUMN workflow_ctx_from
|
|
92
|
+
TYPE VARCHAR(128)[]
|
|
93
|
+
USING workflow_ctx_from::VARCHAR(128)[];
|
|
94
|
+
""")
|
|
95
|
+
|
|
96
|
+
ADD_PARENT_WORKFLOW_ID_COLUMN_SQL = text("""
|
|
97
|
+
ALTER TABLE horsies_workflows
|
|
98
|
+
ADD COLUMN IF NOT EXISTS parent_workflow_id VARCHAR(36);
|
|
99
|
+
""")
|
|
100
|
+
|
|
101
|
+
ADD_PARENT_TASK_INDEX_COLUMN_SQL = text("""
|
|
102
|
+
ALTER TABLE horsies_workflows
|
|
103
|
+
ADD COLUMN IF NOT EXISTS parent_task_index INTEGER;
|
|
104
|
+
""")
|
|
105
|
+
|
|
106
|
+
ADD_DEPTH_COLUMN_SQL = text("""
|
|
107
|
+
ALTER TABLE horsies_workflows
|
|
108
|
+
ADD COLUMN IF NOT EXISTS depth INTEGER NOT NULL DEFAULT 0;
|
|
109
|
+
""")
|
|
110
|
+
|
|
111
|
+
ADD_ROOT_WORKFLOW_ID_COLUMN_SQL = text("""
|
|
112
|
+
ALTER TABLE horsies_workflows
|
|
113
|
+
ADD COLUMN IF NOT EXISTS root_workflow_id VARCHAR(36);
|
|
114
|
+
""")
|
|
115
|
+
|
|
116
|
+
ADD_WORKFLOW_DEF_MODULE_COLUMN_SQL = text("""
|
|
117
|
+
ALTER TABLE horsies_workflows
|
|
118
|
+
ADD COLUMN IF NOT EXISTS workflow_def_module VARCHAR(512);
|
|
119
|
+
""")
|
|
120
|
+
|
|
121
|
+
ADD_WORKFLOW_DEF_QUALNAME_COLUMN_SQL = text("""
|
|
122
|
+
ALTER TABLE horsies_workflows
|
|
123
|
+
ADD COLUMN IF NOT EXISTS workflow_def_qualname VARCHAR(512);
|
|
124
|
+
""")
|
|
125
|
+
|
|
126
|
+
ADD_IS_SUBWORKFLOW_COLUMN_SQL = text("""
|
|
127
|
+
ALTER TABLE horsies_workflow_tasks
|
|
128
|
+
ADD COLUMN IF NOT EXISTS is_subworkflow BOOLEAN NOT NULL DEFAULT FALSE;
|
|
129
|
+
""")
|
|
130
|
+
|
|
131
|
+
ADD_SUB_WORKFLOW_ID_COLUMN_SQL = text("""
|
|
132
|
+
ALTER TABLE horsies_workflow_tasks
|
|
133
|
+
ADD COLUMN IF NOT EXISTS sub_workflow_id VARCHAR(36);
|
|
134
|
+
""")
|
|
135
|
+
|
|
136
|
+
ADD_SUB_WORKFLOW_NAME_COLUMN_SQL = text("""
|
|
137
|
+
ALTER TABLE horsies_workflow_tasks
|
|
138
|
+
ADD COLUMN IF NOT EXISTS sub_workflow_name VARCHAR(255);
|
|
139
|
+
""")
|
|
140
|
+
|
|
141
|
+
ADD_SUB_WORKFLOW_RETRY_MODE_COLUMN_SQL = text("""
|
|
142
|
+
ALTER TABLE horsies_workflow_tasks
|
|
143
|
+
ADD COLUMN IF NOT EXISTS sub_workflow_retry_mode VARCHAR(50);
|
|
144
|
+
""")
|
|
145
|
+
|
|
146
|
+
ADD_SUB_WORKFLOW_SUMMARY_COLUMN_SQL = text("""
|
|
147
|
+
ALTER TABLE horsies_workflow_tasks
|
|
148
|
+
ADD COLUMN IF NOT EXISTS sub_workflow_summary TEXT;
|
|
149
|
+
""")
|
|
150
|
+
|
|
151
|
+
ADD_SUB_WORKFLOW_MODULE_COLUMN_SQL = text("""
|
|
152
|
+
ALTER TABLE horsies_workflow_tasks
|
|
153
|
+
ADD COLUMN IF NOT EXISTS sub_workflow_module VARCHAR(512);
|
|
154
|
+
""")
|
|
155
|
+
|
|
156
|
+
ADD_SUB_WORKFLOW_QUALNAME_COLUMN_SQL = text("""
|
|
157
|
+
ALTER TABLE horsies_workflow_tasks
|
|
158
|
+
ADD COLUMN IF NOT EXISTS sub_workflow_qualname VARCHAR(512);
|
|
159
|
+
""")
|
|
160
|
+
|
|
161
|
+
CREATE_WORKFLOW_NOTIFY_FUNCTION_SQL = text("""
|
|
162
|
+
CREATE OR REPLACE FUNCTION horsies_notify_workflow_changes()
|
|
163
|
+
RETURNS trigger AS $$
|
|
164
|
+
BEGIN
|
|
165
|
+
IF TG_OP = 'UPDATE' AND OLD.status != NEW.status THEN
|
|
166
|
+
-- Workflow completion notifications
|
|
167
|
+
IF NEW.status IN ('COMPLETED', 'FAILED', 'CANCELLED', 'PAUSED') THEN
|
|
168
|
+
PERFORM pg_notify('workflow_done', NEW.id);
|
|
169
|
+
END IF;
|
|
170
|
+
END IF;
|
|
171
|
+
RETURN NEW;
|
|
172
|
+
END;
|
|
173
|
+
$$ LANGUAGE plpgsql;
|
|
174
|
+
""")
|
|
175
|
+
|
|
176
|
+
CREATE_WORKFLOW_NOTIFY_TRIGGER_SQL = text("""
|
|
177
|
+
DROP TRIGGER IF EXISTS horsies_workflow_notify_trigger ON horsies_workflows;
|
|
178
|
+
CREATE TRIGGER horsies_workflow_notify_trigger
|
|
179
|
+
AFTER UPDATE ON horsies_workflows
|
|
180
|
+
FOR EACH ROW
|
|
181
|
+
EXECUTE FUNCTION horsies_notify_workflow_changes();
|
|
182
|
+
""")
|
|
183
|
+
|
|
184
|
+
# ---- Schema initialization queries ----
|
|
185
|
+
|
|
186
|
+
SCHEMA_ADVISORY_LOCK_SQL = text("""
|
|
187
|
+
SELECT pg_advisory_xact_lock(CAST(:key AS BIGINT))
|
|
188
|
+
""")
|
|
189
|
+
|
|
190
|
+
# ---- Monitoring queries ----
|
|
191
|
+
|
|
192
|
+
GET_STALE_TASKS_SQL = text("""
|
|
193
|
+
SELECT
|
|
194
|
+
t.id,
|
|
195
|
+
t.worker_hostname,
|
|
196
|
+
t.worker_pid,
|
|
197
|
+
t.worker_process_name,
|
|
198
|
+
hb.last_heartbeat,
|
|
199
|
+
t.started_at,
|
|
200
|
+
t.task_name
|
|
201
|
+
FROM horsies_tasks t
|
|
202
|
+
LEFT JOIN LATERAL (
|
|
203
|
+
SELECT sent_at AS last_heartbeat
|
|
204
|
+
FROM horsies_heartbeats h
|
|
205
|
+
WHERE h.task_id = t.id AND h.role = 'runner'
|
|
206
|
+
ORDER BY sent_at DESC
|
|
207
|
+
LIMIT 1
|
|
208
|
+
) hb ON TRUE
|
|
209
|
+
WHERE t.status = 'RUNNING'
|
|
210
|
+
AND t.started_at IS NOT NULL
|
|
211
|
+
AND COALESCE(hb.last_heartbeat, t.started_at) < NOW() - CAST(:stale_threshold || ' minutes' AS INTERVAL)
|
|
212
|
+
ORDER BY hb.last_heartbeat NULLS FIRST
|
|
213
|
+
""")
|
|
214
|
+
|
|
215
|
+
GET_WORKER_STATS_SQL = text("""
|
|
216
|
+
SELECT
|
|
217
|
+
t.worker_hostname,
|
|
218
|
+
t.worker_pid,
|
|
219
|
+
t.worker_process_name,
|
|
220
|
+
COUNT(*) AS active_tasks,
|
|
221
|
+
MIN(t.started_at) AS oldest_task_start,
|
|
222
|
+
MAX(hb.last_heartbeat) AS latest_heartbeat
|
|
223
|
+
FROM horsies_tasks t
|
|
224
|
+
LEFT JOIN LATERAL (
|
|
225
|
+
SELECT sent_at AS last_heartbeat
|
|
226
|
+
FROM horsies_heartbeats h
|
|
227
|
+
WHERE h.task_id = t.id AND h.role = 'runner'
|
|
228
|
+
ORDER BY sent_at DESC
|
|
229
|
+
LIMIT 1
|
|
230
|
+
) hb ON TRUE
|
|
231
|
+
WHERE t.status = 'RUNNING'
|
|
232
|
+
AND t.worker_hostname IS NOT NULL
|
|
233
|
+
GROUP BY t.worker_hostname, t.worker_pid, t.worker_process_name
|
|
234
|
+
ORDER BY active_tasks DESC
|
|
235
|
+
""")
|
|
236
|
+
|
|
237
|
+
GET_EXPIRED_TASKS_SQL = text("""
|
|
238
|
+
SELECT
|
|
239
|
+
id,
|
|
240
|
+
task_name,
|
|
241
|
+
queue_name,
|
|
242
|
+
priority,
|
|
243
|
+
sent_at,
|
|
244
|
+
good_until,
|
|
245
|
+
NOW() - good_until as expired_for
|
|
246
|
+
FROM horsies_tasks
|
|
247
|
+
WHERE status = 'PENDING'
|
|
248
|
+
AND good_until < NOW()
|
|
249
|
+
ORDER BY good_until ASC
|
|
250
|
+
""")
|
|
251
|
+
|
|
252
|
+
# ---- Cleanup queries ----
|
|
253
|
+
|
|
254
|
+
SELECT_STALE_RUNNING_TASKS_SQL = text("""
|
|
255
|
+
SELECT t2.id, t2.worker_pid, t2.worker_hostname, t2.claimed_by_worker_id,
|
|
256
|
+
t2.started_at, hb.last_heartbeat
|
|
257
|
+
FROM horsies_tasks t2
|
|
258
|
+
LEFT JOIN LATERAL (
|
|
259
|
+
SELECT sent_at AS last_heartbeat
|
|
260
|
+
FROM horsies_heartbeats h
|
|
261
|
+
WHERE h.task_id = t2.id AND h.role = 'runner'
|
|
262
|
+
ORDER BY sent_at DESC
|
|
263
|
+
LIMIT 1
|
|
264
|
+
) hb ON TRUE
|
|
265
|
+
WHERE t2.status = 'RUNNING'
|
|
266
|
+
AND t2.started_at IS NOT NULL
|
|
267
|
+
AND COALESCE(hb.last_heartbeat, t2.started_at) < NOW() - CAST(:stale_threshold || ' seconds' AS INTERVAL)
|
|
268
|
+
FOR UPDATE OF t2 SKIP LOCKED
|
|
269
|
+
""")
|
|
270
|
+
|
|
271
|
+
MARK_STALE_TASK_FAILED_SQL = text("""
|
|
272
|
+
UPDATE horsies_tasks
|
|
273
|
+
SET status = 'FAILED',
|
|
274
|
+
failed_at = NOW(),
|
|
275
|
+
failed_reason = :failed_reason,
|
|
276
|
+
result = :result,
|
|
277
|
+
updated_at = NOW()
|
|
278
|
+
WHERE id = :task_id
|
|
279
|
+
AND status = 'RUNNING'
|
|
280
|
+
""")
|
|
281
|
+
|
|
282
|
+
REQUEUE_STALE_CLAIMED_SQL = text("""
|
|
283
|
+
UPDATE horsies_tasks AS t
|
|
284
|
+
SET status = 'PENDING',
|
|
285
|
+
claimed = FALSE,
|
|
286
|
+
claimed_at = NULL,
|
|
287
|
+
claimed_by_worker_id = NULL,
|
|
288
|
+
updated_at = NOW()
|
|
289
|
+
FROM (
|
|
290
|
+
SELECT t2.id, hb.last_heartbeat, t2.claimed_at
|
|
291
|
+
FROM horsies_tasks t2
|
|
292
|
+
LEFT JOIN LATERAL (
|
|
293
|
+
SELECT sent_at AS last_heartbeat
|
|
294
|
+
FROM horsies_heartbeats h
|
|
295
|
+
WHERE h.task_id = t2.id AND h.role = 'claimer'
|
|
296
|
+
ORDER BY sent_at DESC
|
|
297
|
+
LIMIT 1
|
|
298
|
+
) hb ON TRUE
|
|
299
|
+
WHERE t2.status = 'CLAIMED'
|
|
300
|
+
FOR UPDATE OF t2 SKIP LOCKED
|
|
301
|
+
) s
|
|
302
|
+
WHERE t.id = s.id
|
|
303
|
+
AND (
|
|
304
|
+
(s.last_heartbeat IS NULL AND s.claimed_at IS NOT NULL AND s.claimed_at < NOW() - CAST(:stale_threshold || ' seconds' AS INTERVAL))
|
|
305
|
+
OR (s.last_heartbeat IS NOT NULL AND s.last_heartbeat < NOW() - CAST(:stale_threshold || ' seconds' AS INTERVAL))
|
|
306
|
+
)
|
|
307
|
+
""")
|
|
308
|
+
|
|
26
309
|
|
|
27
310
|
class PostgresBroker:
|
|
28
311
|
"""
|
|
@@ -93,37 +376,10 @@ class PostgresBroker:
|
|
|
93
376
|
"""
|
|
94
377
|
async with self.async_engine.begin() as conn:
|
|
95
378
|
# Create trigger function
|
|
96
|
-
await conn.execute(
|
|
97
|
-
text("""
|
|
98
|
-
CREATE OR REPLACE FUNCTION horsies_notify_task_changes()
|
|
99
|
-
RETURNS trigger AS $$
|
|
100
|
-
BEGIN
|
|
101
|
-
IF TG_OP = 'INSERT' AND NEW.status = 'PENDING' THEN
|
|
102
|
-
-- New task notifications: wake up workers
|
|
103
|
-
PERFORM pg_notify('task_new', NEW.id); -- Global worker notification
|
|
104
|
-
PERFORM pg_notify('task_queue_' || NEW.queue_name, NEW.id); -- Queue-specific notification
|
|
105
|
-
ELSIF TG_OP = 'UPDATE' AND OLD.status != NEW.status THEN
|
|
106
|
-
-- Task completion notifications: wake up result waiters
|
|
107
|
-
IF NEW.status IN ('COMPLETED', 'FAILED') THEN
|
|
108
|
-
PERFORM pg_notify('task_done', NEW.id); -- Send task_id as payload
|
|
109
|
-
END IF;
|
|
110
|
-
END IF;
|
|
111
|
-
RETURN NEW;
|
|
112
|
-
END;
|
|
113
|
-
$$ LANGUAGE plpgsql;
|
|
114
|
-
""")
|
|
115
|
-
)
|
|
379
|
+
await conn.execute(CREATE_TASK_NOTIFY_FUNCTION_SQL)
|
|
116
380
|
|
|
117
381
|
# Create trigger
|
|
118
|
-
await conn.execute(
|
|
119
|
-
text("""
|
|
120
|
-
DROP TRIGGER IF EXISTS horsies_task_notify_trigger ON horsies_tasks;
|
|
121
|
-
CREATE TRIGGER horsies_task_notify_trigger
|
|
122
|
-
AFTER INSERT OR UPDATE ON horsies_tasks
|
|
123
|
-
FOR EACH ROW
|
|
124
|
-
EXECUTE FUNCTION horsies_notify_task_changes();
|
|
125
|
-
""")
|
|
126
|
-
)
|
|
382
|
+
await conn.execute(CREATE_TASK_NOTIFY_TRIGGER_SQL)
|
|
127
383
|
|
|
128
384
|
async def _create_workflow_schema(self) -> None:
|
|
129
385
|
"""
|
|
@@ -136,166 +392,41 @@ class PostgresBroker:
|
|
|
136
392
|
"""
|
|
137
393
|
async with self.async_engine.begin() as conn:
|
|
138
394
|
# GIN index for efficient dependency array lookups
|
|
139
|
-
await conn.execute(
|
|
140
|
-
text("""
|
|
141
|
-
CREATE INDEX IF NOT EXISTS idx_horsies_workflow_tasks_deps
|
|
142
|
-
ON horsies_workflow_tasks USING GIN(dependencies);
|
|
143
|
-
""")
|
|
144
|
-
)
|
|
395
|
+
await conn.execute(CREATE_WORKFLOW_TASKS_DEPS_INDEX_SQL)
|
|
145
396
|
|
|
146
397
|
# Migration: add task_options column for existing installs
|
|
147
|
-
await conn.execute(
|
|
148
|
-
text("""
|
|
149
|
-
ALTER TABLE horsies_workflow_tasks
|
|
150
|
-
ADD COLUMN IF NOT EXISTS task_options TEXT;
|
|
151
|
-
""")
|
|
152
|
-
)
|
|
398
|
+
await conn.execute(ADD_TASK_OPTIONS_COLUMN_SQL)
|
|
153
399
|
|
|
154
400
|
# Migration: add success_policy column for existing installs
|
|
155
|
-
await conn.execute(
|
|
156
|
-
text("""
|
|
157
|
-
ALTER TABLE horsies_workflows
|
|
158
|
-
ADD COLUMN IF NOT EXISTS success_policy JSONB;
|
|
159
|
-
""")
|
|
160
|
-
)
|
|
401
|
+
await conn.execute(ADD_SUCCESS_POLICY_COLUMN_SQL)
|
|
161
402
|
|
|
162
403
|
# Migration: add join_type and min_success columns for existing installs
|
|
163
|
-
await conn.execute(
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
""")
|
|
168
|
-
)
|
|
169
|
-
await conn.execute(
|
|
170
|
-
text("""
|
|
171
|
-
ALTER TABLE horsies_workflow_tasks
|
|
172
|
-
ADD COLUMN IF NOT EXISTS min_success INTEGER;
|
|
173
|
-
""")
|
|
174
|
-
)
|
|
175
|
-
await conn.execute(
|
|
176
|
-
text("""
|
|
177
|
-
ALTER TABLE horsies_workflow_tasks
|
|
178
|
-
ADD COLUMN IF NOT EXISTS node_id VARCHAR(128);
|
|
179
|
-
""")
|
|
180
|
-
)
|
|
181
|
-
await conn.execute(
|
|
182
|
-
text("""
|
|
183
|
-
ALTER TABLE horsies_workflow_tasks
|
|
184
|
-
ALTER COLUMN workflow_ctx_from
|
|
185
|
-
TYPE VARCHAR(128)[]
|
|
186
|
-
USING workflow_ctx_from::VARCHAR(128)[];
|
|
187
|
-
""")
|
|
188
|
-
)
|
|
404
|
+
await conn.execute(ADD_JOIN_TYPE_COLUMN_SQL)
|
|
405
|
+
await conn.execute(ADD_MIN_SUCCESS_COLUMN_SQL)
|
|
406
|
+
await conn.execute(ADD_NODE_ID_COLUMN_SQL)
|
|
407
|
+
await conn.execute(ALTER_WORKFLOW_CTX_FROM_TYPE_SQL)
|
|
189
408
|
|
|
190
409
|
# Subworkflow support columns
|
|
191
|
-
await conn.execute(
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
)
|
|
203
|
-
await conn.execute(
|
|
204
|
-
|
|
205
|
-
ALTER TABLE horsies_workflows
|
|
206
|
-
ADD COLUMN IF NOT EXISTS depth INTEGER NOT NULL DEFAULT 0;
|
|
207
|
-
""")
|
|
208
|
-
)
|
|
209
|
-
await conn.execute(
|
|
210
|
-
text("""
|
|
211
|
-
ALTER TABLE horsies_workflows
|
|
212
|
-
ADD COLUMN IF NOT EXISTS root_workflow_id VARCHAR(36);
|
|
213
|
-
""")
|
|
214
|
-
)
|
|
215
|
-
await conn.execute(
|
|
216
|
-
text("""
|
|
217
|
-
ALTER TABLE horsies_workflows
|
|
218
|
-
ADD COLUMN IF NOT EXISTS workflow_def_module VARCHAR(512);
|
|
219
|
-
""")
|
|
220
|
-
)
|
|
221
|
-
await conn.execute(
|
|
222
|
-
text("""
|
|
223
|
-
ALTER TABLE horsies_workflows
|
|
224
|
-
ADD COLUMN IF NOT EXISTS workflow_def_qualname VARCHAR(512);
|
|
225
|
-
""")
|
|
226
|
-
)
|
|
227
|
-
|
|
228
|
-
await conn.execute(
|
|
229
|
-
text("""
|
|
230
|
-
ALTER TABLE horsies_workflow_tasks
|
|
231
|
-
ADD COLUMN IF NOT EXISTS is_subworkflow BOOLEAN NOT NULL DEFAULT FALSE;
|
|
232
|
-
""")
|
|
233
|
-
)
|
|
234
|
-
await conn.execute(
|
|
235
|
-
text("""
|
|
236
|
-
ALTER TABLE horsies_workflow_tasks
|
|
237
|
-
ADD COLUMN IF NOT EXISTS sub_workflow_id VARCHAR(36);
|
|
238
|
-
""")
|
|
239
|
-
)
|
|
240
|
-
await conn.execute(
|
|
241
|
-
text("""
|
|
242
|
-
ALTER TABLE horsies_workflow_tasks
|
|
243
|
-
ADD COLUMN IF NOT EXISTS sub_workflow_name VARCHAR(255);
|
|
244
|
-
""")
|
|
245
|
-
)
|
|
246
|
-
await conn.execute(
|
|
247
|
-
text("""
|
|
248
|
-
ALTER TABLE horsies_workflow_tasks
|
|
249
|
-
ADD COLUMN IF NOT EXISTS sub_workflow_retry_mode VARCHAR(50);
|
|
250
|
-
""")
|
|
251
|
-
)
|
|
252
|
-
await conn.execute(
|
|
253
|
-
text("""
|
|
254
|
-
ALTER TABLE horsies_workflow_tasks
|
|
255
|
-
ADD COLUMN IF NOT EXISTS sub_workflow_summary TEXT;
|
|
256
|
-
""")
|
|
257
|
-
)
|
|
258
|
-
await conn.execute(
|
|
259
|
-
text("""
|
|
260
|
-
ALTER TABLE horsies_workflow_tasks
|
|
261
|
-
ADD COLUMN IF NOT EXISTS sub_workflow_module VARCHAR(512);
|
|
262
|
-
""")
|
|
263
|
-
)
|
|
264
|
-
await conn.execute(
|
|
265
|
-
text("""
|
|
266
|
-
ALTER TABLE horsies_workflow_tasks
|
|
267
|
-
ADD COLUMN IF NOT EXISTS sub_workflow_qualname VARCHAR(512);
|
|
268
|
-
""")
|
|
269
|
-
)
|
|
410
|
+
await conn.execute(ADD_PARENT_WORKFLOW_ID_COLUMN_SQL)
|
|
411
|
+
await conn.execute(ADD_PARENT_TASK_INDEX_COLUMN_SQL)
|
|
412
|
+
await conn.execute(ADD_DEPTH_COLUMN_SQL)
|
|
413
|
+
await conn.execute(ADD_ROOT_WORKFLOW_ID_COLUMN_SQL)
|
|
414
|
+
await conn.execute(ADD_WORKFLOW_DEF_MODULE_COLUMN_SQL)
|
|
415
|
+
await conn.execute(ADD_WORKFLOW_DEF_QUALNAME_COLUMN_SQL)
|
|
416
|
+
|
|
417
|
+
await conn.execute(ADD_IS_SUBWORKFLOW_COLUMN_SQL)
|
|
418
|
+
await conn.execute(ADD_SUB_WORKFLOW_ID_COLUMN_SQL)
|
|
419
|
+
await conn.execute(ADD_SUB_WORKFLOW_NAME_COLUMN_SQL)
|
|
420
|
+
await conn.execute(ADD_SUB_WORKFLOW_RETRY_MODE_COLUMN_SQL)
|
|
421
|
+
await conn.execute(ADD_SUB_WORKFLOW_SUMMARY_COLUMN_SQL)
|
|
422
|
+
await conn.execute(ADD_SUB_WORKFLOW_MODULE_COLUMN_SQL)
|
|
423
|
+
await conn.execute(ADD_SUB_WORKFLOW_QUALNAME_COLUMN_SQL)
|
|
270
424
|
|
|
271
425
|
# Workflow notification trigger function
|
|
272
|
-
await conn.execute(
|
|
273
|
-
text("""
|
|
274
|
-
CREATE OR REPLACE FUNCTION horsies_notify_workflow_changes()
|
|
275
|
-
RETURNS trigger AS $$
|
|
276
|
-
BEGIN
|
|
277
|
-
IF TG_OP = 'UPDATE' AND OLD.status != NEW.status THEN
|
|
278
|
-
-- Workflow completion notifications
|
|
279
|
-
IF NEW.status IN ('COMPLETED', 'FAILED', 'CANCELLED', 'PAUSED') THEN
|
|
280
|
-
PERFORM pg_notify('workflow_done', NEW.id);
|
|
281
|
-
END IF;
|
|
282
|
-
END IF;
|
|
283
|
-
RETURN NEW;
|
|
284
|
-
END;
|
|
285
|
-
$$ LANGUAGE plpgsql;
|
|
286
|
-
""")
|
|
287
|
-
)
|
|
426
|
+
await conn.execute(CREATE_WORKFLOW_NOTIFY_FUNCTION_SQL)
|
|
288
427
|
|
|
289
428
|
# Create workflow trigger
|
|
290
|
-
await conn.execute(
|
|
291
|
-
text("""
|
|
292
|
-
DROP TRIGGER IF EXISTS horsies_workflow_notify_trigger ON horsies_workflows;
|
|
293
|
-
CREATE TRIGGER horsies_workflow_notify_trigger
|
|
294
|
-
AFTER UPDATE ON horsies_workflows
|
|
295
|
-
FOR EACH ROW
|
|
296
|
-
EXECUTE FUNCTION horsies_notify_workflow_changes();
|
|
297
|
-
""")
|
|
298
|
-
)
|
|
429
|
+
await conn.execute(CREATE_WORKFLOW_NOTIFY_TRIGGER_SQL)
|
|
299
430
|
|
|
300
431
|
async def _ensure_initialized(self) -> None:
|
|
301
432
|
if self._initialized:
|
|
@@ -304,7 +435,7 @@ class PostgresBroker:
|
|
|
304
435
|
# Take a short-lived, cluster-wide advisory lock to serialize
|
|
305
436
|
# schema creation across workers and producers.
|
|
306
437
|
await conn.execute(
|
|
307
|
-
|
|
438
|
+
SCHEMA_ADVISORY_LOCK_SQL,
|
|
308
439
|
{'key': self._schema_advisory_key()},
|
|
309
440
|
)
|
|
310
441
|
await conn.run_sync(Base.metadata.create_all)
|
|
@@ -573,28 +704,7 @@ class PostgresBroker:
|
|
|
573
704
|
"""
|
|
574
705
|
async with self.session_factory() as session:
|
|
575
706
|
result = await session.execute(
|
|
576
|
-
|
|
577
|
-
SELECT
|
|
578
|
-
t.id,
|
|
579
|
-
t.worker_hostname,
|
|
580
|
-
t.worker_pid,
|
|
581
|
-
t.worker_process_name,
|
|
582
|
-
hb.last_heartbeat,
|
|
583
|
-
t.started_at,
|
|
584
|
-
t.task_name
|
|
585
|
-
FROM horsies_tasks t
|
|
586
|
-
LEFT JOIN LATERAL (
|
|
587
|
-
SELECT sent_at AS last_heartbeat
|
|
588
|
-
FROM horsies_heartbeats h
|
|
589
|
-
WHERE h.task_id = t.id AND h.role = 'runner'
|
|
590
|
-
ORDER BY sent_at DESC
|
|
591
|
-
LIMIT 1
|
|
592
|
-
) hb ON TRUE
|
|
593
|
-
WHERE t.status = 'RUNNING'
|
|
594
|
-
AND t.started_at IS NOT NULL
|
|
595
|
-
AND COALESCE(hb.last_heartbeat, t.started_at) < NOW() - CAST(:stale_threshold || ' minutes' AS INTERVAL)
|
|
596
|
-
ORDER BY hb.last_heartbeat NULLS FIRST
|
|
597
|
-
"""),
|
|
707
|
+
GET_STALE_TASKS_SQL,
|
|
598
708
|
{'stale_threshold': stale_threshold_minutes},
|
|
599
709
|
)
|
|
600
710
|
columns = result.keys()
|
|
@@ -611,29 +721,7 @@ class PostgresBroker:
|
|
|
611
721
|
List of worker stats: worker_hostname, worker_pid, active_tasks, oldest_task_start
|
|
612
722
|
"""
|
|
613
723
|
async with self.session_factory() as session:
|
|
614
|
-
result = await session.execute(
|
|
615
|
-
text("""
|
|
616
|
-
SELECT
|
|
617
|
-
t.worker_hostname,
|
|
618
|
-
t.worker_pid,
|
|
619
|
-
t.worker_process_name,
|
|
620
|
-
COUNT(*) AS active_tasks,
|
|
621
|
-
MIN(t.started_at) AS oldest_task_start,
|
|
622
|
-
MAX(hb.last_heartbeat) AS latest_heartbeat
|
|
623
|
-
FROM horsies_tasks t
|
|
624
|
-
LEFT JOIN LATERAL (
|
|
625
|
-
SELECT sent_at AS last_heartbeat
|
|
626
|
-
FROM horsies_heartbeats h
|
|
627
|
-
WHERE h.task_id = t.id AND h.role = 'runner'
|
|
628
|
-
ORDER BY sent_at DESC
|
|
629
|
-
LIMIT 1
|
|
630
|
-
) hb ON TRUE
|
|
631
|
-
WHERE t.status = 'RUNNING'
|
|
632
|
-
AND t.worker_hostname IS NOT NULL
|
|
633
|
-
GROUP BY t.worker_hostname, t.worker_pid, t.worker_process_name
|
|
634
|
-
ORDER BY active_tasks DESC
|
|
635
|
-
""")
|
|
636
|
-
)
|
|
724
|
+
result = await session.execute(GET_WORKER_STATS_SQL)
|
|
637
725
|
|
|
638
726
|
columns = result.keys()
|
|
639
727
|
return [dict(zip(columns, row)) for row in result.fetchall()]
|
|
@@ -649,22 +737,7 @@ class PostgresBroker:
|
|
|
649
737
|
List of expired task info: id, task_name, queue_name, good_until, expired_for
|
|
650
738
|
"""
|
|
651
739
|
async with self.session_factory() as session:
|
|
652
|
-
result = await session.execute(
|
|
653
|
-
text("""
|
|
654
|
-
SELECT
|
|
655
|
-
id,
|
|
656
|
-
task_name,
|
|
657
|
-
queue_name,
|
|
658
|
-
priority,
|
|
659
|
-
sent_at,
|
|
660
|
-
good_until,
|
|
661
|
-
NOW() - good_until as expired_for
|
|
662
|
-
FROM horsies_tasks
|
|
663
|
-
WHERE status = 'PENDING'
|
|
664
|
-
AND good_until < NOW()
|
|
665
|
-
ORDER BY good_until ASC
|
|
666
|
-
""")
|
|
667
|
-
)
|
|
740
|
+
result = await session.execute(GET_EXPIRED_TASKS_SQL)
|
|
668
741
|
|
|
669
742
|
columns = result.keys()
|
|
670
743
|
return [dict(zip(columns, row)) for row in result.fetchall()]
|
|
@@ -694,21 +767,7 @@ class PostgresBroker:
|
|
|
694
767
|
async with self.session_factory() as session:
|
|
695
768
|
# First, find stale tasks and get their metadata
|
|
696
769
|
stale_tasks_result = await session.execute(
|
|
697
|
-
|
|
698
|
-
SELECT t2.id, t2.worker_pid, t2.worker_hostname, t2.claimed_by_worker_id,
|
|
699
|
-
t2.started_at, hb.last_heartbeat
|
|
700
|
-
FROM horsies_tasks t2
|
|
701
|
-
LEFT JOIN LATERAL (
|
|
702
|
-
SELECT sent_at AS last_heartbeat
|
|
703
|
-
FROM horsies_heartbeats h
|
|
704
|
-
WHERE h.task_id = t2.id AND h.role = 'runner'
|
|
705
|
-
ORDER BY sent_at DESC
|
|
706
|
-
LIMIT 1
|
|
707
|
-
) hb ON TRUE
|
|
708
|
-
WHERE t2.status = 'RUNNING'
|
|
709
|
-
AND t2.started_at IS NOT NULL
|
|
710
|
-
AND COALESCE(hb.last_heartbeat, t2.started_at) < NOW() - CAST(:stale_threshold || ' seconds' AS INTERVAL)
|
|
711
|
-
"""),
|
|
770
|
+
SELECT_STALE_RUNNING_TASKS_SQL,
|
|
712
771
|
{'stale_threshold': stale_threshold_seconds},
|
|
713
772
|
)
|
|
714
773
|
|
|
@@ -747,15 +806,7 @@ class PostgresBroker:
|
|
|
747
806
|
|
|
748
807
|
# Update task with proper result
|
|
749
808
|
await session.execute(
|
|
750
|
-
|
|
751
|
-
UPDATE horsies_tasks
|
|
752
|
-
SET status = 'FAILED',
|
|
753
|
-
failed_at = NOW(),
|
|
754
|
-
failed_reason = :failed_reason,
|
|
755
|
-
result = :result,
|
|
756
|
-
updated_at = NOW()
|
|
757
|
-
WHERE id = :task_id
|
|
758
|
-
"""),
|
|
809
|
+
MARK_STALE_TASK_FAILED_SQL,
|
|
759
810
|
{
|
|
760
811
|
'task_id': task_id,
|
|
761
812
|
'failed_reason': f'Worker process crashed (no runner heartbeat for {stale_threshold_ms}ms = {stale_threshold_ms/1000:.1f}s)',
|
|
@@ -781,31 +832,7 @@ class PostgresBroker:
|
|
|
781
832
|
|
|
782
833
|
async with self.session_factory() as session:
|
|
783
834
|
result = await session.execute(
|
|
784
|
-
|
|
785
|
-
UPDATE horsies_tasks AS t
|
|
786
|
-
SET status = 'PENDING',
|
|
787
|
-
claimed = FALSE,
|
|
788
|
-
claimed_at = NULL,
|
|
789
|
-
claimed_by_worker_id = NULL,
|
|
790
|
-
updated_at = NOW()
|
|
791
|
-
FROM (
|
|
792
|
-
SELECT t2.id, hb.last_heartbeat, t2.claimed_at
|
|
793
|
-
FROM horsies_tasks t2
|
|
794
|
-
LEFT JOIN LATERAL (
|
|
795
|
-
SELECT sent_at AS last_heartbeat
|
|
796
|
-
FROM horsies_heartbeats h
|
|
797
|
-
WHERE h.task_id = t2.id AND h.role = 'claimer'
|
|
798
|
-
ORDER BY sent_at DESC
|
|
799
|
-
LIMIT 1
|
|
800
|
-
) hb ON TRUE
|
|
801
|
-
WHERE t2.status = 'CLAIMED'
|
|
802
|
-
) s
|
|
803
|
-
WHERE t.id = s.id
|
|
804
|
-
AND (
|
|
805
|
-
(s.last_heartbeat IS NULL AND s.claimed_at IS NOT NULL AND s.claimed_at < NOW() - CAST(:stale_threshold || ' seconds' AS INTERVAL))
|
|
806
|
-
OR (s.last_heartbeat IS NOT NULL AND s.last_heartbeat < NOW() - CAST(:stale_threshold || ' seconds' AS INTERVAL))
|
|
807
|
-
)
|
|
808
|
-
"""),
|
|
835
|
+
REQUEUE_STALE_CLAIMED_SQL,
|
|
809
836
|
{'stale_threshold': stale_threshold_seconds},
|
|
810
837
|
)
|
|
811
838
|
await session.commit()
|