@cascade-flow/backend-postgres 0.2.17 → 0.2.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db.d.ts +30 -4
- package/dist/db.d.ts.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +426 -267
- package/dist/index.js.map +5 -5
- package/dist/migrations.d.ts.map +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -5125,13 +5125,15 @@ class DatabaseClient {
|
|
|
5125
5125
|
client.release();
|
|
5126
5126
|
}
|
|
5127
5127
|
}
|
|
5128
|
-
async claimScheduledStep(workflowSlug, runId, stepId,
|
|
5128
|
+
async claimScheduledStep(workflowSlug, runId, stepId, eventToWrite) {
|
|
5129
5129
|
const client = await this.pool.connect();
|
|
5130
5130
|
try {
|
|
5131
5131
|
await client.query("BEGIN");
|
|
5132
|
+
const currentTimeUs = Date.now() * 1000;
|
|
5132
5133
|
const checkQuery = `
|
|
5133
|
-
SELECT event_data FROM ${this.schema}.step_events
|
|
5134
|
+
SELECT event_data, attempt_number, available_at_us FROM ${this.schema}.step_events
|
|
5134
5135
|
WHERE workflow_slug = $1 AND run_id = $2 AND step_id = $3
|
|
5136
|
+
AND type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
5135
5137
|
ORDER BY timestamp_us DESC, event_id DESC
|
|
5136
5138
|
LIMIT 1
|
|
5137
5139
|
FOR UPDATE SKIP LOCKED
|
|
@@ -5139,14 +5141,22 @@ class DatabaseClient {
|
|
|
5139
5141
|
const checkResult = await client.query(checkQuery, [workflowSlug, runId, stepId]);
|
|
5140
5142
|
if (checkResult.rows.length === 0) {
|
|
5141
5143
|
await client.query("ROLLBACK");
|
|
5142
|
-
return
|
|
5144
|
+
return null;
|
|
5143
5145
|
}
|
|
5144
|
-
const
|
|
5146
|
+
const latestRow = checkResult.rows[0];
|
|
5147
|
+
const latestEvent = latestRow.event_data;
|
|
5145
5148
|
if (latestEvent.type !== "StepScheduled" && latestEvent.type !== "StepReclaimed" && latestEvent.type !== "StepRetrying") {
|
|
5146
5149
|
await client.query("ROLLBACK");
|
|
5147
|
-
return
|
|
5150
|
+
return null;
|
|
5151
|
+
}
|
|
5152
|
+
const availableAtUs = latestRow.available_at_us ?? latestEvent.availableAtUs ?? null;
|
|
5153
|
+
if (availableAtUs !== null && availableAtUs > currentTimeUs) {
|
|
5154
|
+
await client.query("ROLLBACK");
|
|
5155
|
+
return null;
|
|
5148
5156
|
}
|
|
5149
|
-
|
|
5157
|
+
const resolvedAttemptNumber = latestRow.attempt_number ?? latestEvent.attemptNumber ?? (eventToWrite.type === "StepStarted" ? eventToWrite.attemptNumber : null) ?? 1;
|
|
5158
|
+
const eventPayload = eventToWrite.type === "StepStarted" ? { ...eventToWrite, attemptNumber: resolvedAttemptNumber } : eventToWrite;
|
|
5159
|
+
let eventWorkerId = null;
|
|
5150
5160
|
let attemptNumber = null;
|
|
5151
5161
|
let slotIndex = null;
|
|
5152
5162
|
let workerConcurrency = null;
|
|
@@ -5155,18 +5165,18 @@ class DatabaseClient {
|
|
|
5155
5165
|
let errorStackExactHash = "";
|
|
5156
5166
|
let errorStackNormalizedHash = "";
|
|
5157
5167
|
let errorStackPortableHash = "";
|
|
5158
|
-
if (
|
|
5159
|
-
|
|
5160
|
-
attemptNumber =
|
|
5161
|
-
slotIndex =
|
|
5162
|
-
workerConcurrency =
|
|
5163
|
-
}
|
|
5164
|
-
if (
|
|
5165
|
-
errorNameHash =
|
|
5166
|
-
errorMessageHash =
|
|
5167
|
-
errorStackExactHash =
|
|
5168
|
-
errorStackNormalizedHash =
|
|
5169
|
-
errorStackPortableHash =
|
|
5168
|
+
if (eventPayload.type === "StepStarted") {
|
|
5169
|
+
eventWorkerId = eventPayload.workerId;
|
|
5170
|
+
attemptNumber = eventPayload.attemptNumber;
|
|
5171
|
+
slotIndex = eventPayload.slotIndex ?? null;
|
|
5172
|
+
workerConcurrency = eventPayload.workerConcurrency ?? null;
|
|
5173
|
+
}
|
|
5174
|
+
if (eventPayload.type === "StepFailed") {
|
|
5175
|
+
errorNameHash = eventPayload.errorFingerprints.nameHash;
|
|
5176
|
+
errorMessageHash = eventPayload.errorFingerprints.messageHash;
|
|
5177
|
+
errorStackExactHash = eventPayload.errorFingerprints.stackExactHash;
|
|
5178
|
+
errorStackNormalizedHash = eventPayload.errorFingerprints.stackNormalizedHash;
|
|
5179
|
+
errorStackPortableHash = eventPayload.errorFingerprints.stackPortableHash;
|
|
5170
5180
|
}
|
|
5171
5181
|
const versionResult = await client.query(`SELECT version_id FROM ${this.schema}.workflow_events
|
|
5172
5182
|
WHERE workflow_slug = $1 AND run_id = $2
|
|
@@ -5183,15 +5193,15 @@ class DatabaseClient {
|
|
|
5183
5193
|
slot_index, worker_concurrency
|
|
5184
5194
|
)
|
|
5185
5195
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20)`, [
|
|
5186
|
-
|
|
5187
|
-
|
|
5188
|
-
|
|
5189
|
-
|
|
5190
|
-
|
|
5191
|
-
|
|
5192
|
-
|
|
5193
|
-
JSON.stringify(stripEventIdFromJson(
|
|
5194
|
-
|
|
5196
|
+
eventPayload.eventId,
|
|
5197
|
+
eventPayload.workflowSlug,
|
|
5198
|
+
eventPayload.runId,
|
|
5199
|
+
eventPayload.stepId,
|
|
5200
|
+
eventPayload.timestampUs,
|
|
5201
|
+
eventPayload.category,
|
|
5202
|
+
eventPayload.type,
|
|
5203
|
+
JSON.stringify(stripEventIdFromJson(eventPayload)),
|
|
5204
|
+
eventWorkerId,
|
|
5195
5205
|
attemptNumber,
|
|
5196
5206
|
null,
|
|
5197
5207
|
null,
|
|
@@ -5205,7 +5215,7 @@ class DatabaseClient {
|
|
|
5205
5215
|
workerConcurrency
|
|
5206
5216
|
]);
|
|
5207
5217
|
await client.query("COMMIT");
|
|
5208
|
-
return
|
|
5218
|
+
return resolvedAttemptNumber;
|
|
5209
5219
|
} catch (error) {
|
|
5210
5220
|
await client.query("ROLLBACK");
|
|
5211
5221
|
throw error;
|
|
@@ -5218,31 +5228,55 @@ class DatabaseClient {
|
|
|
5218
5228
|
try {
|
|
5219
5229
|
const currentTimeUs = Date.now() * 1000;
|
|
5220
5230
|
const scheduledTypes = ["StepScheduled", "StepReclaimed", "StepRetrying"];
|
|
5221
|
-
|
|
5222
|
-
|
|
5223
|
-
|
|
5224
|
-
|
|
5225
|
-
|
|
5226
|
-
|
|
5227
|
-
|
|
5228
|
-
|
|
5229
|
-
|
|
5230
|
-
|
|
5231
|
-
|
|
5232
|
-
|
|
5233
|
-
|
|
5234
|
-
|
|
5235
|
-
|
|
5236
|
-
|
|
5237
|
-
|
|
5238
|
-
|
|
5239
|
-
|
|
5240
|
-
|
|
5241
|
-
|
|
5242
|
-
|
|
5243
|
-
|
|
5244
|
-
|
|
5245
|
-
|
|
5231
|
+
const result = await client.query(`WITH latest_workflow_events AS (
|
|
5232
|
+
SELECT DISTINCT ON (workflow_slug, run_id)
|
|
5233
|
+
workflow_slug,
|
|
5234
|
+
run_id,
|
|
5235
|
+
type
|
|
5236
|
+
FROM ${this.schema}.workflow_events
|
|
5237
|
+
WHERE ($1::text[] IS NULL OR workflow_slug = ANY($1))
|
|
5238
|
+
AND type IN (
|
|
5239
|
+
'RunSubmitted',
|
|
5240
|
+
'WorkflowRetryStarted',
|
|
5241
|
+
'WorkflowStarted',
|
|
5242
|
+
'WorkflowResumed',
|
|
5243
|
+
'WorkflowCompleted',
|
|
5244
|
+
'WorkflowFailed',
|
|
5245
|
+
'WorkflowCancelled'
|
|
5246
|
+
)
|
|
5247
|
+
ORDER BY workflow_slug, run_id, timestamp_us DESC, event_id DESC
|
|
5248
|
+
),
|
|
5249
|
+
active_runs AS (
|
|
5250
|
+
SELECT workflow_slug, run_id
|
|
5251
|
+
FROM latest_workflow_events
|
|
5252
|
+
WHERE type IN ('RunSubmitted', 'WorkflowRetryStarted', 'WorkflowStarted', 'WorkflowResumed')
|
|
5253
|
+
),
|
|
5254
|
+
latest_step_events AS (
|
|
5255
|
+
SELECT DISTINCT ON (se.workflow_slug, se.run_id, se.step_id)
|
|
5256
|
+
se.workflow_slug,
|
|
5257
|
+
se.run_id,
|
|
5258
|
+
se.step_id,
|
|
5259
|
+
se.type,
|
|
5260
|
+
se.available_at_us,
|
|
5261
|
+
se.priority
|
|
5262
|
+
FROM ${this.schema}.step_events se
|
|
5263
|
+
INNER JOIN active_runs ar
|
|
5264
|
+
ON ar.workflow_slug = se.workflow_slug
|
|
5265
|
+
AND ar.run_id = se.run_id
|
|
5266
|
+
WHERE se.type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
5267
|
+
ORDER BY se.workflow_slug, se.run_id, se.step_id, se.timestamp_us DESC, se.event_id DESC
|
|
5268
|
+
)
|
|
5269
|
+
SELECT workflow_slug, run_id, step_id
|
|
5270
|
+
FROM latest_step_events
|
|
5271
|
+
WHERE type = ANY($2)
|
|
5272
|
+
AND (available_at_us IS NULL OR available_at_us <= $3)
|
|
5273
|
+
ORDER BY priority DESC NULLS LAST, available_at_us ASC
|
|
5274
|
+
LIMIT COALESCE($4::int, 2147483647)`, [
|
|
5275
|
+
options?.workflowSlugs ?? null,
|
|
5276
|
+
scheduledTypes,
|
|
5277
|
+
currentTimeUs,
|
|
5278
|
+
options?.limit ?? null
|
|
5279
|
+
]);
|
|
5246
5280
|
return result.rows.map((row) => ({
|
|
5247
5281
|
workflowSlug: row.workflow_slug,
|
|
5248
5282
|
runId: row.run_id,
|
|
@@ -5256,24 +5290,56 @@ class DatabaseClient {
|
|
|
5256
5290
|
const client = await this.pool.connect();
|
|
5257
5291
|
try {
|
|
5258
5292
|
const currentTimeUs = Date.now() * 1000;
|
|
5259
|
-
const
|
|
5260
|
-
|
|
5261
|
-
|
|
5262
|
-
|
|
5263
|
-
|
|
5293
|
+
const result = await client.query(`WITH latest_workflow_events AS (
|
|
5294
|
+
SELECT DISTINCT ON (workflow_slug, run_id)
|
|
5295
|
+
workflow_slug,
|
|
5296
|
+
run_id,
|
|
5297
|
+
type
|
|
5298
|
+
FROM ${this.schema}.workflow_events
|
|
5299
|
+
WHERE type IN (
|
|
5300
|
+
'RunSubmitted',
|
|
5301
|
+
'WorkflowRetryStarted',
|
|
5302
|
+
'WorkflowStarted',
|
|
5303
|
+
'WorkflowResumed',
|
|
5304
|
+
'WorkflowCompleted',
|
|
5305
|
+
'WorkflowFailed',
|
|
5306
|
+
'WorkflowCancelled'
|
|
5307
|
+
)
|
|
5308
|
+
ORDER BY workflow_slug, run_id, timestamp_us DESC, event_id DESC
|
|
5309
|
+
),
|
|
5310
|
+
running_runs AS (
|
|
5311
|
+
SELECT workflow_slug, run_id
|
|
5312
|
+
FROM latest_workflow_events
|
|
5313
|
+
WHERE type IN ('WorkflowStarted', 'WorkflowResumed')
|
|
5314
|
+
),
|
|
5315
|
+
latest_step_events AS (
|
|
5316
|
+
SELECT DISTINCT ON (se.workflow_slug, se.run_id, se.step_id)
|
|
5317
|
+
se.workflow_slug,
|
|
5318
|
+
se.run_id,
|
|
5319
|
+
se.step_id,
|
|
5320
|
+
se.type,
|
|
5321
|
+
se.timestamp_us,
|
|
5322
|
+
se.worker_id,
|
|
5323
|
+
se.attempt_number
|
|
5324
|
+
FROM ${this.schema}.step_events se
|
|
5325
|
+
INNER JOIN running_runs rr
|
|
5326
|
+
ON rr.workflow_slug = se.workflow_slug
|
|
5327
|
+
AND rr.run_id = se.run_id
|
|
5328
|
+
WHERE se.type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
5329
|
+
ORDER BY se.workflow_slug, se.run_id, se.step_id, se.timestamp_us DESC, se.event_id DESC
|
|
5330
|
+
)
|
|
5331
|
+
SELECT workflow_slug, run_id, step_id, worker_id, attempt_number, timestamp_us
|
|
5332
|
+
FROM latest_step_events
|
|
5264
5333
|
WHERE type IN ('StepStarted', 'StepHeartbeat')
|
|
5265
|
-
|
|
5266
|
-
|
|
5267
|
-
SELECT workflow_slug, run_id, step_id, worker_id
|
|
5268
|
-
FROM latest_step_events
|
|
5269
|
-
WHERE timestamp_us < $1 AND worker_id IS NOT NULL
|
|
5270
|
-
`;
|
|
5271
|
-
const result = await client.query(query, [currentTimeUs - staleThresholdUs]);
|
|
5334
|
+
AND timestamp_us < $1
|
|
5335
|
+
AND worker_id IS NOT NULL`, [currentTimeUs - staleThresholdUs]);
|
|
5272
5336
|
return result.rows.map((row) => ({
|
|
5273
5337
|
workflowSlug: row.workflow_slug,
|
|
5274
5338
|
runId: row.run_id,
|
|
5275
5339
|
stepId: row.step_id,
|
|
5276
|
-
workerId: row.worker_id
|
|
5340
|
+
workerId: row.worker_id,
|
|
5341
|
+
attemptNumber: row.attempt_number ?? 1,
|
|
5342
|
+
lastHeartbeatUs: Number(row.timestamp_us)
|
|
5277
5343
|
}));
|
|
5278
5344
|
} finally {
|
|
5279
5345
|
client.release();
|
|
@@ -5480,16 +5546,27 @@ class DatabaseClient {
|
|
|
5480
5546
|
async listActiveWorkflows() {
|
|
5481
5547
|
const client = await this.pool.connect();
|
|
5482
5548
|
try {
|
|
5483
|
-
const result = await client.query(`
|
|
5484
|
-
|
|
5485
|
-
|
|
5486
|
-
|
|
5487
|
-
|
|
5488
|
-
|
|
5489
|
-
|
|
5490
|
-
|
|
5491
|
-
|
|
5492
|
-
|
|
5549
|
+
const result = await client.query(`WITH latest_workflow_events AS (
|
|
5550
|
+
SELECT DISTINCT ON (workflow_slug, run_id)
|
|
5551
|
+
workflow_slug,
|
|
5552
|
+
run_id,
|
|
5553
|
+
type
|
|
5554
|
+
FROM ${this.schema}.workflow_events
|
|
5555
|
+
WHERE type IN (
|
|
5556
|
+
'RunSubmitted',
|
|
5557
|
+
'WorkflowRetryStarted',
|
|
5558
|
+
'WorkflowStarted',
|
|
5559
|
+
'WorkflowResumed',
|
|
5560
|
+
'WorkflowCompleted',
|
|
5561
|
+
'WorkflowFailed',
|
|
5562
|
+
'WorkflowCancelled'
|
|
5563
|
+
)
|
|
5564
|
+
ORDER BY workflow_slug, run_id, timestamp_us DESC, event_id DESC
|
|
5565
|
+
)
|
|
5566
|
+
SELECT DISTINCT workflow_slug
|
|
5567
|
+
FROM latest_workflow_events
|
|
5568
|
+
WHERE type IN ('RunSubmitted', 'WorkflowRetryStarted', 'WorkflowStarted', 'WorkflowResumed')
|
|
5569
|
+
ORDER BY workflow_slug ASC`);
|
|
5493
5570
|
return result.rows.map((row) => row.workflow_slug);
|
|
5494
5571
|
} finally {
|
|
5495
5572
|
client.release();
|
|
@@ -5630,6 +5707,15 @@ class DatabaseClient {
|
|
|
5630
5707
|
type
|
|
5631
5708
|
FROM ${this.schema}.workflow_events
|
|
5632
5709
|
WHERE ($1::text IS NULL OR workflow_slug = $1)
|
|
5710
|
+
AND type IN (
|
|
5711
|
+
'RunSubmitted',
|
|
5712
|
+
'WorkflowRetryStarted',
|
|
5713
|
+
'WorkflowStarted',
|
|
5714
|
+
'WorkflowResumed',
|
|
5715
|
+
'WorkflowCompleted',
|
|
5716
|
+
'WorkflowFailed',
|
|
5717
|
+
'WorkflowCancelled'
|
|
5718
|
+
)
|
|
5633
5719
|
ORDER BY workflow_slug, run_id, timestamp_us DESC, event_id DESC
|
|
5634
5720
|
)
|
|
5635
5721
|
SELECT
|
|
@@ -5767,90 +5853,33 @@ class DatabaseClient {
|
|
|
5767
5853
|
client.release();
|
|
5768
5854
|
}
|
|
5769
5855
|
}
|
|
5770
|
-
async getActiveWorkersAggregation(
|
|
5856
|
+
async getActiveWorkersAggregation(_options) {
|
|
5771
5857
|
const client = await this.pool.connect();
|
|
5772
5858
|
try {
|
|
5773
|
-
const
|
|
5774
|
-
|
|
5775
|
-
|
|
5776
|
-
|
|
5777
|
-
|
|
5778
|
-
|
|
5779
|
-
|
|
5780
|
-
|
|
5781
|
-
worker_id,
|
|
5782
|
-
MAX(timestamp_us) AS last_seen_us
|
|
5783
|
-
FROM ${this.schema}.step_events
|
|
5784
|
-
WHERE worker_id IS NOT NULL
|
|
5785
|
-
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
5786
|
-
${options?.timeRange ? `AND timestamp_us >= $1 AND timestamp_us <= $2` : ""}
|
|
5787
|
-
GROUP BY worker_id
|
|
5788
|
-
),
|
|
5789
|
-
-- Steps started by each worker
|
|
5790
|
-
steps_started AS (
|
|
5791
|
-
SELECT DISTINCT
|
|
5792
|
-
se.worker_id,
|
|
5793
|
-
se.workflow_slug,
|
|
5794
|
-
se.run_id,
|
|
5795
|
-
se.step_id,
|
|
5796
|
-
se.attempt_number
|
|
5797
|
-
FROM ${this.schema}.step_events se
|
|
5798
|
-
WHERE se.type = 'StepStarted'
|
|
5799
|
-
AND se.worker_id IS NOT NULL
|
|
5800
|
-
),
|
|
5801
|
-
-- Steps completed
|
|
5802
|
-
completed_steps AS (
|
|
5803
|
-
SELECT
|
|
5804
|
-
ss.worker_id,
|
|
5805
|
-
COUNT(*) AS completed_count
|
|
5806
|
-
FROM steps_started ss
|
|
5807
|
-
INNER JOIN ${this.schema}.step_events se
|
|
5808
|
-
ON se.workflow_slug = ss.workflow_slug
|
|
5809
|
-
AND se.run_id = ss.run_id
|
|
5810
|
-
AND se.step_id = ss.step_id
|
|
5811
|
-
AND se.attempt_number = ss.attempt_number
|
|
5812
|
-
AND se.type = 'StepCompleted'
|
|
5813
|
-
GROUP BY ss.worker_id
|
|
5814
|
-
),
|
|
5815
|
-
-- Steps failed
|
|
5816
|
-
failed_steps AS (
|
|
5817
|
-
SELECT
|
|
5818
|
-
ss.worker_id,
|
|
5819
|
-
COUNT(*) AS failed_count
|
|
5820
|
-
FROM steps_started ss
|
|
5821
|
-
INNER JOIN ${this.schema}.step_events se
|
|
5822
|
-
ON se.workflow_slug = ss.workflow_slug
|
|
5823
|
-
AND se.run_id = ss.run_id
|
|
5824
|
-
AND se.step_id = ss.step_id
|
|
5825
|
-
AND se.attempt_number = ss.attempt_number
|
|
5826
|
-
AND se.type = 'StepFailed'
|
|
5827
|
-
GROUP BY ss.worker_id
|
|
5828
|
-
),
|
|
5829
|
-
-- Reclamation counts (times this worker's steps were reclaimed)
|
|
5830
|
-
reclaimed_counts AS (
|
|
5831
|
-
SELECT
|
|
5832
|
-
(event_data->>'originalWorkerId') AS worker_id,
|
|
5833
|
-
COUNT(*) AS reclaimed_count
|
|
5834
|
-
FROM ${this.schema}.step_events
|
|
5835
|
-
WHERE type = 'StepReclaimed'
|
|
5836
|
-
GROUP BY (event_data->>'originalWorkerId')
|
|
5859
|
+
const runningRunsResult = await client.query(`
|
|
5860
|
+
WITH latest_workflow_events AS (
|
|
5861
|
+
SELECT DISTINCT ON (workflow_slug, run_id)
|
|
5862
|
+
workflow_slug,
|
|
5863
|
+
run_id,
|
|
5864
|
+
type
|
|
5865
|
+
FROM ${this.schema}.workflow_events
|
|
5866
|
+
ORDER BY workflow_slug, run_id, timestamp_us DESC, event_id DESC
|
|
5837
5867
|
)
|
|
5838
|
-
SELECT
|
|
5839
|
-
|
|
5840
|
-
|
|
5841
|
-
|
|
5842
|
-
|
|
5843
|
-
|
|
5844
|
-
|
|
5845
|
-
|
|
5846
|
-
|
|
5847
|
-
|
|
5848
|
-
|
|
5849
|
-
|
|
5850
|
-
|
|
5868
|
+
SELECT workflow_slug, run_id
|
|
5869
|
+
FROM latest_workflow_events
|
|
5870
|
+
WHERE type IN ('WorkflowStarted', 'WorkflowResumed')
|
|
5871
|
+
LIMIT 500
|
|
5872
|
+
`);
|
|
5873
|
+
if (runningRunsResult.rows.length === 0) {
|
|
5874
|
+
return {
|
|
5875
|
+
workers: [],
|
|
5876
|
+
totalActiveWorkers: 0,
|
|
5877
|
+
totalRunningSteps: 0
|
|
5878
|
+
};
|
|
5879
|
+
}
|
|
5880
|
+
const runIds = runningRunsResult.rows.map((r) => r.run_id);
|
|
5851
5881
|
const runningStepsResult = await client.query(`
|
|
5852
5882
|
WITH latest_step_events AS (
|
|
5853
|
-
-- Get the latest event per step (excluding LogEntry)
|
|
5854
5883
|
SELECT DISTINCT ON (workflow_slug, run_id, step_id)
|
|
5855
5884
|
workflow_slug,
|
|
5856
5885
|
run_id,
|
|
@@ -5861,78 +5890,24 @@ class DatabaseClient {
|
|
|
5861
5890
|
slot_index,
|
|
5862
5891
|
worker_concurrency
|
|
5863
5892
|
FROM ${this.schema}.step_events
|
|
5864
|
-
WHERE
|
|
5893
|
+
WHERE run_id = ANY($1)
|
|
5894
|
+
AND type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
5865
5895
|
ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC
|
|
5866
|
-
),
|
|
5867
|
-
-- Steps currently running (latest event is StepStarted or StepHeartbeat)
|
|
5868
|
-
running_steps AS (
|
|
5869
|
-
SELECT
|
|
5870
|
-
workflow_slug,
|
|
5871
|
-
run_id,
|
|
5872
|
-
step_id,
|
|
5873
|
-
worker_id,
|
|
5874
|
-
slot_index,
|
|
5875
|
-
worker_concurrency
|
|
5876
|
-
FROM latest_step_events
|
|
5877
|
-
WHERE type IN ('StepStarted', 'StepHeartbeat')
|
|
5878
|
-
AND worker_id IS NOT NULL
|
|
5879
|
-
),
|
|
5880
|
-
-- Get start time and last heartbeat for each running step
|
|
5881
|
-
step_times AS (
|
|
5882
|
-
SELECT
|
|
5883
|
-
rs.workflow_slug,
|
|
5884
|
-
rs.run_id,
|
|
5885
|
-
rs.step_id,
|
|
5886
|
-
rs.worker_id,
|
|
5887
|
-
rs.slot_index,
|
|
5888
|
-
rs.worker_concurrency,
|
|
5889
|
-
MIN(se.timestamp_us) FILTER (WHERE se.type = 'StepStarted') AS started_at_us,
|
|
5890
|
-
MAX(se.timestamp_us) FILTER (WHERE se.type IN ('StepStarted', 'StepHeartbeat')) AS last_heartbeat_us
|
|
5891
|
-
FROM running_steps rs
|
|
5892
|
-
INNER JOIN ${this.schema}.step_events se
|
|
5893
|
-
ON se.workflow_slug = rs.workflow_slug
|
|
5894
|
-
AND se.run_id = rs.run_id
|
|
5895
|
-
AND se.step_id = rs.step_id
|
|
5896
|
-
AND se.type IN ('StepStarted', 'StepHeartbeat')
|
|
5897
|
-
GROUP BY rs.workflow_slug, rs.run_id, rs.step_id, rs.worker_id, rs.slot_index, rs.worker_concurrency
|
|
5898
5896
|
)
|
|
5899
5897
|
SELECT
|
|
5900
5898
|
worker_id,
|
|
5901
5899
|
workflow_slug,
|
|
5902
5900
|
run_id,
|
|
5903
5901
|
step_id,
|
|
5904
|
-
|
|
5905
|
-
last_heartbeat_us,
|
|
5902
|
+
timestamp_us as last_heartbeat_us,
|
|
5906
5903
|
slot_index,
|
|
5907
5904
|
worker_concurrency
|
|
5908
|
-
FROM
|
|
5909
|
-
|
|
5910
|
-
|
|
5911
|
-
|
|
5912
|
-
|
|
5913
|
-
worker_id,
|
|
5914
|
-
worker_concurrency
|
|
5915
|
-
FROM ${this.schema}.step_events
|
|
5916
|
-
WHERE worker_id IS NOT NULL
|
|
5917
|
-
AND worker_concurrency IS NOT NULL
|
|
5918
|
-
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
5919
|
-
ORDER BY worker_id, timestamp_us DESC
|
|
5920
|
-
`);
|
|
5921
|
-
const workerConcurrencyMap = new Map;
|
|
5922
|
-
for (const row of workerConcurrencyResult.rows) {
|
|
5923
|
-
workerConcurrencyMap.set(row.worker_id, row.worker_concurrency);
|
|
5924
|
-
}
|
|
5905
|
+
FROM latest_step_events
|
|
5906
|
+
WHERE type IN ('StepStarted', 'StepHeartbeat')
|
|
5907
|
+
AND worker_id IS NOT NULL
|
|
5908
|
+
LIMIT 1000
|
|
5909
|
+
`, [runIds]);
|
|
5925
5910
|
const workerMap = new Map;
|
|
5926
|
-
for (const row of workerStatsResult.rows) {
|
|
5927
|
-
workerMap.set(row.worker_id, {
|
|
5928
|
-
workerId: row.worker_id,
|
|
5929
|
-
lastSeenUs: parseInt(row.last_seen_us, 10),
|
|
5930
|
-
totalStepsProcessed: parseInt(row.total_steps_processed, 10),
|
|
5931
|
-
failedSteps: parseInt(row.failed_steps, 10),
|
|
5932
|
-
reclaimedFromCount: parseInt(row.reclaimed_from_count, 10),
|
|
5933
|
-
activeSteps: []
|
|
5934
|
-
});
|
|
5935
|
-
}
|
|
5936
5911
|
for (const row of runningStepsResult.rows) {
|
|
5937
5912
|
let worker = workerMap.get(row.worker_id);
|
|
5938
5913
|
if (!worker) {
|
|
@@ -5942,10 +5917,15 @@ class DatabaseClient {
|
|
|
5942
5917
|
totalStepsProcessed: 0,
|
|
5943
5918
|
failedSteps: 0,
|
|
5944
5919
|
reclaimedFromCount: 0,
|
|
5920
|
+
workerConcurrency: row.worker_concurrency ?? undefined,
|
|
5945
5921
|
activeSteps: []
|
|
5946
5922
|
};
|
|
5947
5923
|
workerMap.set(row.worker_id, worker);
|
|
5948
5924
|
}
|
|
5925
|
+
const lastHeartbeatUs = parseInt(row.last_heartbeat_us, 10);
|
|
5926
|
+
if (lastHeartbeatUs > worker.lastSeenUs) {
|
|
5927
|
+
worker.lastSeenUs = lastHeartbeatUs;
|
|
5928
|
+
}
|
|
5949
5929
|
if (row.worker_concurrency != null && worker.workerConcurrency == null) {
|
|
5950
5930
|
worker.workerConcurrency = row.worker_concurrency;
|
|
5951
5931
|
}
|
|
@@ -5953,19 +5933,11 @@ class DatabaseClient {
|
|
|
5953
5933
|
workflowSlug: row.workflow_slug,
|
|
5954
5934
|
runId: row.run_id,
|
|
5955
5935
|
stepId: row.step_id,
|
|
5956
|
-
startedAtUs:
|
|
5957
|
-
lastHeartbeatUs
|
|
5936
|
+
startedAtUs: lastHeartbeatUs,
|
|
5937
|
+
lastHeartbeatUs,
|
|
5958
5938
|
slotIndex: row.slot_index ?? undefined
|
|
5959
5939
|
});
|
|
5960
5940
|
}
|
|
5961
|
-
for (const worker of workerMap.values()) {
|
|
5962
|
-
if (worker.workerConcurrency == null) {
|
|
5963
|
-
const historicalConcurrency = workerConcurrencyMap.get(worker.workerId);
|
|
5964
|
-
if (historicalConcurrency != null) {
|
|
5965
|
-
worker.workerConcurrency = historicalConcurrency;
|
|
5966
|
-
}
|
|
5967
|
-
}
|
|
5968
|
-
}
|
|
5969
5941
|
const workers = Array.from(workerMap.values()).sort((a, b) => {
|
|
5970
5942
|
if (b.activeSteps.length !== a.activeSteps.length) {
|
|
5971
5943
|
return b.activeSteps.length - a.activeSteps.length;
|
|
@@ -5973,7 +5945,7 @@ class DatabaseClient {
|
|
|
5973
5945
|
return b.lastSeenUs - a.lastSeenUs;
|
|
5974
5946
|
});
|
|
5975
5947
|
const totalRunningSteps = workers.reduce((sum, w) => sum + w.activeSteps.length, 0);
|
|
5976
|
-
const totalActiveWorkers = workers.
|
|
5948
|
+
const totalActiveWorkers = workers.length;
|
|
5977
5949
|
return {
|
|
5978
5950
|
workers,
|
|
5979
5951
|
totalActiveWorkers,
|
|
@@ -5983,6 +5955,112 @@ class DatabaseClient {
|
|
|
5983
5955
|
client.release();
|
|
5984
5956
|
}
|
|
5985
5957
|
}
|
|
5958
|
+
async getWorkerById(workerId) {
|
|
5959
|
+
const client = await this.pool.connect();
|
|
5960
|
+
try {
|
|
5961
|
+
const result = await client.query(`
|
|
5962
|
+
WITH worker_step_events AS (
|
|
5963
|
+
-- Get all step events for this worker
|
|
5964
|
+
SELECT
|
|
5965
|
+
workflow_slug,
|
|
5966
|
+
run_id,
|
|
5967
|
+
step_id,
|
|
5968
|
+
type,
|
|
5969
|
+
timestamp_us,
|
|
5970
|
+
slot_index,
|
|
5971
|
+
worker_concurrency
|
|
5972
|
+
FROM ${this.schema}.step_events
|
|
5973
|
+
WHERE worker_id = $1
|
|
5974
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
5975
|
+
),
|
|
5976
|
+
latest_per_step AS (
|
|
5977
|
+
-- For each step this worker touched, get the latest event
|
|
5978
|
+
SELECT DISTINCT ON (workflow_slug, run_id, step_id)
|
|
5979
|
+
workflow_slug,
|
|
5980
|
+
run_id,
|
|
5981
|
+
step_id,
|
|
5982
|
+
timestamp_us,
|
|
5983
|
+
slot_index,
|
|
5984
|
+
worker_concurrency
|
|
5985
|
+
FROM worker_step_events
|
|
5986
|
+
ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC
|
|
5987
|
+
),
|
|
5988
|
+
-- Check if these steps are still running (no completion/failure after our heartbeat)
|
|
5989
|
+
still_running AS (
|
|
5990
|
+
SELECT
|
|
5991
|
+
lps.workflow_slug,
|
|
5992
|
+
lps.run_id,
|
|
5993
|
+
lps.step_id,
|
|
5994
|
+
lps.timestamp_us as last_heartbeat_us,
|
|
5995
|
+
lps.slot_index,
|
|
5996
|
+
lps.worker_concurrency
|
|
5997
|
+
FROM latest_per_step lps
|
|
5998
|
+
WHERE NOT EXISTS (
|
|
5999
|
+
SELECT 1 FROM ${this.schema}.step_events se
|
|
6000
|
+
WHERE se.workflow_slug = lps.workflow_slug
|
|
6001
|
+
AND se.run_id = lps.run_id
|
|
6002
|
+
AND se.step_id = lps.step_id
|
|
6003
|
+
AND se.timestamp_us > lps.timestamp_us
|
|
6004
|
+
AND se.type IN ('StepCompleted', 'StepFailed', 'StepSkipped', 'StepReclaimed')
|
|
6005
|
+
)
|
|
6006
|
+
)
|
|
6007
|
+
SELECT * FROM still_running
|
|
6008
|
+
ORDER BY last_heartbeat_us DESC
|
|
6009
|
+
LIMIT 100
|
|
6010
|
+
`, [workerId]);
|
|
6011
|
+
if (result.rows.length === 0) {
|
|
6012
|
+
const lastSeenResult = await client.query(`
|
|
6013
|
+
SELECT MAX(timestamp_us) as last_seen_us, MAX(worker_concurrency) as worker_concurrency
|
|
6014
|
+
FROM ${this.schema}.step_events
|
|
6015
|
+
WHERE worker_id = $1
|
|
6016
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
6017
|
+
`, [workerId]);
|
|
6018
|
+
if (!lastSeenResult.rows[0]?.last_seen_us) {
|
|
6019
|
+
return null;
|
|
6020
|
+
}
|
|
6021
|
+
return {
|
|
6022
|
+
workerId,
|
|
6023
|
+
lastSeenUs: parseInt(lastSeenResult.rows[0].last_seen_us, 10),
|
|
6024
|
+
totalStepsProcessed: 0,
|
|
6025
|
+
failedSteps: 0,
|
|
6026
|
+
reclaimedFromCount: 0,
|
|
6027
|
+
workerConcurrency: lastSeenResult.rows[0].worker_concurrency ?? undefined,
|
|
6028
|
+
activeSteps: []
|
|
6029
|
+
};
|
|
6030
|
+
}
|
|
6031
|
+
let lastSeenUs = 0;
|
|
6032
|
+
let workerConcurrency;
|
|
6033
|
+
const activeSteps = [];
|
|
6034
|
+
for (const row of result.rows) {
|
|
6035
|
+
const heartbeatUs = parseInt(row.last_heartbeat_us, 10);
|
|
6036
|
+
if (heartbeatUs > lastSeenUs) {
|
|
6037
|
+
lastSeenUs = heartbeatUs;
|
|
6038
|
+
}
|
|
6039
|
+
if (row.worker_concurrency != null && workerConcurrency == null) {
|
|
6040
|
+
workerConcurrency = row.worker_concurrency;
|
|
6041
|
+
}
|
|
6042
|
+
activeSteps.push({
|
|
6043
|
+
workflowSlug: row.workflow_slug,
|
|
6044
|
+
runId: row.run_id,
|
|
6045
|
+
stepId: row.step_id,
|
|
6046
|
+
startedAtUs: heartbeatUs,
|
|
6047
|
+
lastHeartbeatUs: heartbeatUs,
|
|
6048
|
+
slotIndex: row.slot_index ?? undefined
|
|
6049
|
+
});
|
|
6050
|
+
}
|
|
6051
|
+
return {
|
|
6052
|
+
workerId,
|
|
6053
|
+
lastSeenUs,
|
|
6054
|
+
totalStepsProcessed: 0,
|
|
6055
|
+
failedSteps: 0,
|
|
6056
|
+
reclaimedFromCount: 0,
|
|
6057
|
+
workerConcurrency,
|
|
6058
|
+
activeSteps
|
|
6059
|
+
};
|
|
6060
|
+
} finally {
|
|
6061
|
+
client.release();
|
|
6062
|
+
}
|
|
6063
|
+
}
|
|
5986
6064
|
}
|
|
5987
6065
|
function createPool(connectionString) {
|
|
5988
6066
|
return new Pool2({ connectionString });
|
|
@@ -6384,6 +6462,87 @@ async function migration011_addWorkerConcurrencyIndex(pool, schema) {
|
|
|
6384
6462
|
client.release();
|
|
6385
6463
|
}
|
|
6386
6464
|
}
|
|
6465
|
+
async function migration012_addWorkerAnalyticsIndexes(pool, schema) {
|
|
6466
|
+
const client = await pool.connect();
|
|
6467
|
+
try {
|
|
6468
|
+
await client.query(`
|
|
6469
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_worker_activity
|
|
6470
|
+
ON ${schema}.step_events (timestamp_us, type, worker_id)
|
|
6471
|
+
WHERE worker_id IS NOT NULL
|
|
6472
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
6473
|
+
`);
|
|
6474
|
+
await client.query(`
|
|
6475
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_recent_by_step
|
|
6476
|
+
ON ${schema}.step_events (timestamp_us DESC, workflow_slug, run_id, step_id, type, event_id DESC)
|
|
6477
|
+
WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
6478
|
+
`);
|
|
6479
|
+
console.log("[Migration 012] Worker analytics indexes added successfully");
|
|
6480
|
+
} catch (error) {
|
|
6481
|
+
console.error("[Migration 012] Error adding worker analytics indexes:", error);
|
|
6482
|
+
throw error;
|
|
6483
|
+
} finally {
|
|
6484
|
+
client.release();
|
|
6485
|
+
}
|
|
6486
|
+
}
|
|
6487
|
+
async function migration013_addWorkersObservabilityIndexes(pool, schema) {
|
|
6488
|
+
const client = await pool.connect();
|
|
6489
|
+
try {
|
|
6490
|
+
await client.query(`
|
|
6491
|
+
CREATE INDEX IF NOT EXISTS idx_workflow_events_run_status
|
|
6492
|
+
ON ${schema}.workflow_events (run_id, timestamp_us DESC, event_id DESC)
|
|
6493
|
+
INCLUDE (workflow_slug, type)
|
|
6494
|
+
`);
|
|
6495
|
+
await client.query(`
|
|
6496
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_by_run
|
|
6497
|
+
ON ${schema}.step_events (run_id, workflow_slug, step_id, timestamp_us DESC, event_id DESC)
|
|
6498
|
+
INCLUDE (type, worker_id, slot_index, worker_concurrency)
|
|
6499
|
+
WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
6500
|
+
`);
|
|
6501
|
+
console.log("[Migration 013] Workers observability indexes added successfully");
|
|
6502
|
+
} catch (error) {
|
|
6503
|
+
console.error("[Migration 013] Error adding workers observability indexes:", error);
|
|
6504
|
+
throw error;
|
|
6505
|
+
} finally {
|
|
6506
|
+
client.release();
|
|
6507
|
+
}
|
|
6508
|
+
}
|
|
6509
|
+
async function migration014_addWorkerHotPathIndexes(pool, schema) {
|
|
6510
|
+
const client = await pool.connect();
|
|
6511
|
+
try {
|
|
6512
|
+
await client.query(`
|
|
6513
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_latest_cover
|
|
6514
|
+
ON ${schema}.step_events (workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC)
|
|
6515
|
+
INCLUDE (type, available_at_us, worker_id, attempt_number, priority)
|
|
6516
|
+
`);
|
|
6517
|
+
await client.query(`
|
|
6518
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_terminal_latest
|
|
6519
|
+
ON ${schema}.step_events (workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC)
|
|
6520
|
+
WHERE type IN ('StepCompleted', 'StepFailed', 'StepSkipped', 'StepReclaimed')
|
|
6521
|
+
`);
|
|
6522
|
+
await client.query(`
|
|
6523
|
+
CREATE INDEX IF NOT EXISTS idx_workflow_events_active_latest
|
|
6524
|
+
ON ${schema}.workflow_events (type, workflow_slug, run_id, timestamp_us DESC, event_id DESC)
|
|
6525
|
+
WHERE type IN ('RunSubmitted', 'WorkflowRetryStarted', 'WorkflowStarted', 'WorkflowResumed')
|
|
6526
|
+
`);
|
|
6527
|
+
await client.query(`
|
|
6528
|
+
CREATE INDEX IF NOT EXISTS idx_workflow_events_run_chrono
|
|
6529
|
+
ON ${schema}.workflow_events (workflow_slug, run_id, timestamp_us ASC, event_id ASC)
|
|
6530
|
+
`);
|
|
6531
|
+
await client.query(`
|
|
6532
|
+
CREATE INDEX IF NOT EXISTS idx_workflow_events_version_lookup
|
|
6533
|
+
ON ${schema}.workflow_events (workflow_slug, run_id, timestamp_us DESC, event_id DESC)
|
|
6534
|
+
INCLUDE (version_id)
|
|
6535
|
+
WHERE type IN ('WorkflowStarted', 'RunSubmitted')
|
|
6536
|
+
AND version_id IS NOT NULL
|
|
6537
|
+
`);
|
|
6538
|
+
console.log("[Migration 014] Worker hot-path indexes added successfully");
|
|
6539
|
+
} catch (error) {
|
|
6540
|
+
console.error("[Migration 014] Error adding worker hot-path indexes:", error);
|
|
6541
|
+
throw error;
|
|
6542
|
+
} finally {
|
|
6543
|
+
client.release();
|
|
6544
|
+
}
|
|
6545
|
+
}
|
|
6387
6546
|
async function runMigrations(pool, schema = "cascadeflow") {
|
|
6388
6547
|
console.log(`[Migrations] Starting database migrations in schema '${schema}'...`);
|
|
6389
6548
|
try {
|
|
@@ -6399,6 +6558,9 @@ async function runMigrations(pool, schema = "cascadeflow") {
|
|
|
6399
6558
|
await migration009_addStepPriority(pool, schema);
|
|
6400
6559
|
await migration010_addSlotTracking(pool, schema);
|
|
6401
6560
|
await migration011_addWorkerConcurrencyIndex(pool, schema);
|
|
6561
|
+
await migration012_addWorkerAnalyticsIndexes(pool, schema);
|
|
6562
|
+
await migration013_addWorkersObservabilityIndexes(pool, schema);
|
|
6563
|
+
await migration014_addWorkerHotPathIndexes(pool, schema);
|
|
6402
6564
|
console.log("[Migrations] All migrations completed successfully");
|
|
6403
6565
|
} catch (error) {
|
|
6404
6566
|
console.error("[Migrations] Migration failed:", error);
|
|
@@ -7264,16 +7426,6 @@ class PostgresBackend extends Backend {
|
|
|
7264
7426
|
return !!(latestEvent && (latestEvent.type === "StepScheduled" || latestEvent.type === "StepReclaimed" || latestEvent.type === "StepRetrying"));
|
|
7265
7427
|
}
|
|
7266
7428
|
async claimScheduledStep(workflowSlug, runId, stepId, workerId, metadata) {
|
|
7267
|
-
const initialEvents = await this.loadEvents(workflowSlug, runId, { category: "step", stepId });
|
|
7268
|
-
if (initialEvents.length === 0) {
|
|
7269
|
-
return null;
|
|
7270
|
-
}
|
|
7271
|
-
const now = getMicrosecondTimestamp();
|
|
7272
|
-
const initialState = projectStepState(initialEvents, workflowSlug);
|
|
7273
|
-
if (initialState.status !== "scheduled" || initialState.availableAt === undefined || initialState.availableAt > now) {
|
|
7274
|
-
return null;
|
|
7275
|
-
}
|
|
7276
|
-
const attemptNumber = initialState.attemptNumber;
|
|
7277
7429
|
const timestamp = getMicrosecondTimestamp();
|
|
7278
7430
|
const event = {
|
|
7279
7431
|
category: "step",
|
|
@@ -7285,43 +7437,34 @@ class PostgresBackend extends Backend {
|
|
|
7285
7437
|
stepId,
|
|
7286
7438
|
workerId,
|
|
7287
7439
|
dependencies: metadata.dependencies,
|
|
7288
|
-
attemptNumber,
|
|
7440
|
+
attemptNumber: metadata.attemptNumber,
|
|
7289
7441
|
slotIndex: metadata.slotIndex,
|
|
7290
7442
|
workerConcurrency: metadata.workerConcurrency
|
|
7291
7443
|
};
|
|
7292
|
-
const
|
|
7293
|
-
return
|
|
7444
|
+
const claimedAttemptNumber = await this.db.claimScheduledStep(workflowSlug, runId, stepId, event);
|
|
7445
|
+
return claimedAttemptNumber !== null ? { attemptNumber: claimedAttemptNumber } : null;
|
|
7294
7446
|
}
|
|
7295
7447
|
async reclaimStaleSteps(staleThreshold, reclaimedBy) {
|
|
7296
7448
|
const reclaimed = [];
|
|
7297
7449
|
const now = getMicrosecondTimestamp();
|
|
7298
7450
|
const staleSteps = await this.db.findStaleSteps(staleThreshold);
|
|
7299
7451
|
for (const step of staleSteps) {
|
|
7300
|
-
const
|
|
7301
|
-
|
|
7302
|
-
|
|
7303
|
-
|
|
7304
|
-
|
|
7305
|
-
|
|
7306
|
-
|
|
7307
|
-
|
|
7308
|
-
|
|
7309
|
-
|
|
7310
|
-
|
|
7311
|
-
|
|
7312
|
-
|
|
7313
|
-
|
|
7314
|
-
|
|
7315
|
-
|
|
7316
|
-
});
|
|
7317
|
-
await this.saveStepScheduled(step.workflowSlug, step.runId, step.stepId, {
|
|
7318
|
-
availableAt: now,
|
|
7319
|
-
reason: "retry",
|
|
7320
|
-
attemptNumber: state.attemptNumber + 1,
|
|
7321
|
-
retryDelayMs: 0
|
|
7322
|
-
});
|
|
7323
|
-
reclaimed.push({ workflowSlug: step.workflowSlug, runId: step.runId, stepId: step.stepId });
|
|
7324
|
-
}
|
|
7452
|
+
const staleDuration = now - step.lastHeartbeatUs;
|
|
7453
|
+
await this.saveStepReclaimed(step.workflowSlug, step.runId, step.stepId, {
|
|
7454
|
+
originalWorkerId: step.workerId,
|
|
7455
|
+
reclaimedBy,
|
|
7456
|
+
lastHeartbeat: step.lastHeartbeatUs,
|
|
7457
|
+
staleThreshold,
|
|
7458
|
+
staleDuration,
|
|
7459
|
+
attemptNumber: step.attemptNumber
|
|
7460
|
+
});
|
|
7461
|
+
await this.saveStepScheduled(step.workflowSlug, step.runId, step.stepId, {
|
|
7462
|
+
availableAt: now,
|
|
7463
|
+
reason: "retry",
|
|
7464
|
+
attemptNumber: step.attemptNumber + 1,
|
|
7465
|
+
retryDelayMs: 0
|
|
7466
|
+
});
|
|
7467
|
+
reclaimed.push({ workflowSlug: step.workflowSlug, runId: step.runId, stepId: step.stepId });
|
|
7325
7468
|
}
|
|
7326
7469
|
return reclaimed;
|
|
7327
7470
|
}
|
|
@@ -7754,9 +7897,25 @@ class PostgresBackend extends Backend {
|
|
|
7754
7897
|
staleThresholdUs
|
|
7755
7898
|
};
|
|
7756
7899
|
}
|
|
7900
|
+
async getWorkerById(workerId) {
|
|
7901
|
+
const result = await this.db.getWorkerById(workerId);
|
|
7902
|
+
if (!result) {
|
|
7903
|
+
return null;
|
|
7904
|
+
}
|
|
7905
|
+
return {
|
|
7906
|
+
workerId: result.workerId,
|
|
7907
|
+
lastSeenUs: result.lastSeenUs,
|
|
7908
|
+
currentlyRunningSteps: result.activeSteps.length,
|
|
7909
|
+
totalStepsProcessed: result.totalStepsProcessed,
|
|
7910
|
+
failedSteps: result.failedSteps,
|
|
7911
|
+
reclaimedFromCount: result.reclaimedFromCount,
|
|
7912
|
+
workerConcurrency: result.workerConcurrency,
|
|
7913
|
+
activeSteps: result.activeSteps
|
|
7914
|
+
};
|
|
7915
|
+
}
|
|
7757
7916
|
}
|
|
7758
7917
|
export {
|
|
7759
7918
|
PostgresBackend
|
|
7760
7919
|
};
|
|
7761
7920
|
|
|
7762
|
-
//# debugId=
|
|
7921
|
+
//# debugId=70613711DE018DE364756E2164756E21
|