@cascade-flow/backend-postgres 0.2.16 → 0.2.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db.d.ts +59 -3
- package/dist/db.d.ts.map +1 -1
- package/dist/index.d.ts +16 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +512 -84
- package/dist/index.js.map +5 -5
- package/dist/migrations.d.ts.map +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -4849,6 +4849,9 @@ class DatabaseClient {
|
|
|
4849
4849
|
let attemptNumber = null;
|
|
4850
4850
|
let availableAtUs = null;
|
|
4851
4851
|
let exportOutput = null;
|
|
4852
|
+
let priority = null;
|
|
4853
|
+
let slotIndex = null;
|
|
4854
|
+
let workerConcurrency = null;
|
|
4852
4855
|
let errorNameHash = "";
|
|
4853
4856
|
let errorMessageHash = "";
|
|
4854
4857
|
let errorStackExactHash = "";
|
|
@@ -4856,12 +4859,15 @@ class DatabaseClient {
|
|
|
4856
4859
|
let errorStackPortableHash = "";
|
|
4857
4860
|
if (se.type === "StepStarted" || se.type === "StepHeartbeat") {
|
|
4858
4861
|
workerId = se.workerId;
|
|
4862
|
+
slotIndex = se.slotIndex ?? null;
|
|
4863
|
+
workerConcurrency = se.workerConcurrency ?? null;
|
|
4859
4864
|
}
|
|
4860
4865
|
if ("attemptNumber" in se) {
|
|
4861
4866
|
attemptNumber = se.attemptNumber;
|
|
4862
4867
|
}
|
|
4863
4868
|
if (se.type === "StepScheduled") {
|
|
4864
4869
|
availableAtUs = se.availableAtUs;
|
|
4870
|
+
priority = se.priority ?? null;
|
|
4865
4871
|
}
|
|
4866
4872
|
if (se.type === "StepCompleted") {
|
|
4867
4873
|
exportOutput = se.exportOutput;
|
|
@@ -4884,9 +4890,10 @@ class DatabaseClient {
|
|
|
4884
4890
|
event_id, workflow_slug, run_id, step_id, timestamp_us, category, type, event_data,
|
|
4885
4891
|
worker_id, attempt_number, available_at_us, export_output,
|
|
4886
4892
|
error_name_hash, error_message_hash, error_stack_exact_hash,
|
|
4887
|
-
error_stack_normalized_hash, error_stack_portable_hash, version_id
|
|
4893
|
+
error_stack_normalized_hash, error_stack_portable_hash, version_id, priority,
|
|
4894
|
+
slot_index, worker_concurrency
|
|
4888
4895
|
)
|
|
4889
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)`, [
|
|
4896
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21)`, [
|
|
4890
4897
|
se.eventId,
|
|
4891
4898
|
se.workflowSlug,
|
|
4892
4899
|
se.runId,
|
|
@@ -4904,7 +4911,10 @@ class DatabaseClient {
|
|
|
4904
4911
|
errorStackExactHash,
|
|
4905
4912
|
errorStackNormalizedHash,
|
|
4906
4913
|
errorStackPortableHash,
|
|
4907
|
-
versionId
|
|
4914
|
+
versionId,
|
|
4915
|
+
priority,
|
|
4916
|
+
slotIndex,
|
|
4917
|
+
workerConcurrency
|
|
4908
4918
|
]);
|
|
4909
4919
|
}
|
|
4910
4920
|
} finally {
|
|
@@ -4966,6 +4976,9 @@ class DatabaseClient {
|
|
|
4966
4976
|
let attemptNumber = null;
|
|
4967
4977
|
let availableAtUs = null;
|
|
4968
4978
|
let exportOutput = null;
|
|
4979
|
+
let priority = null;
|
|
4980
|
+
let slotIndex = null;
|
|
4981
|
+
let workerConcurrency = null;
|
|
4969
4982
|
let errorNameHash = "";
|
|
4970
4983
|
let errorMessageHash = "";
|
|
4971
4984
|
let errorStackExactHash = "";
|
|
@@ -4973,12 +4986,15 @@ class DatabaseClient {
|
|
|
4973
4986
|
let errorStackPortableHash = "";
|
|
4974
4987
|
if (se.type === "StepStarted" || se.type === "StepHeartbeat") {
|
|
4975
4988
|
workerId = se.workerId;
|
|
4989
|
+
slotIndex = se.slotIndex ?? null;
|
|
4990
|
+
workerConcurrency = se.workerConcurrency ?? null;
|
|
4976
4991
|
}
|
|
4977
4992
|
if ("attemptNumber" in se) {
|
|
4978
4993
|
attemptNumber = se.attemptNumber;
|
|
4979
4994
|
}
|
|
4980
4995
|
if (se.type === "StepScheduled") {
|
|
4981
4996
|
availableAtUs = se.availableAtUs;
|
|
4997
|
+
priority = se.priority ?? null;
|
|
4982
4998
|
}
|
|
4983
4999
|
if (se.type === "StepCompleted") {
|
|
4984
5000
|
exportOutput = se.exportOutput;
|
|
@@ -5001,9 +5017,10 @@ class DatabaseClient {
|
|
|
5001
5017
|
event_id, workflow_slug, run_id, step_id, timestamp_us, category, type, event_data,
|
|
5002
5018
|
worker_id, attempt_number, available_at_us, export_output,
|
|
5003
5019
|
error_name_hash, error_message_hash, error_stack_exact_hash,
|
|
5004
|
-
error_stack_normalized_hash, error_stack_portable_hash, version_id
|
|
5020
|
+
error_stack_normalized_hash, error_stack_portable_hash, version_id, priority,
|
|
5021
|
+
slot_index, worker_concurrency
|
|
5005
5022
|
)
|
|
5006
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)`, [
|
|
5023
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21)`, [
|
|
5007
5024
|
se.eventId,
|
|
5008
5025
|
se.workflowSlug,
|
|
5009
5026
|
se.runId,
|
|
@@ -5021,7 +5038,10 @@ class DatabaseClient {
|
|
|
5021
5038
|
errorStackExactHash,
|
|
5022
5039
|
errorStackNormalizedHash,
|
|
5023
5040
|
errorStackPortableHash,
|
|
5024
|
-
versionId
|
|
5041
|
+
versionId,
|
|
5042
|
+
priority,
|
|
5043
|
+
slotIndex,
|
|
5044
|
+
workerConcurrency
|
|
5025
5045
|
]);
|
|
5026
5046
|
}
|
|
5027
5047
|
}
|
|
@@ -5105,13 +5125,15 @@ class DatabaseClient {
|
|
|
5105
5125
|
client.release();
|
|
5106
5126
|
}
|
|
5107
5127
|
}
|
|
5108
|
-
async claimScheduledStep(workflowSlug, runId, stepId,
|
|
5128
|
+
async claimScheduledStep(workflowSlug, runId, stepId, eventToWrite) {
|
|
5109
5129
|
const client = await this.pool.connect();
|
|
5110
5130
|
try {
|
|
5111
5131
|
await client.query("BEGIN");
|
|
5132
|
+
const currentTimeUs = Date.now() * 1000;
|
|
5112
5133
|
const checkQuery = `
|
|
5113
|
-
SELECT event_data FROM ${this.schema}.step_events
|
|
5134
|
+
SELECT event_data, attempt_number, available_at_us FROM ${this.schema}.step_events
|
|
5114
5135
|
WHERE workflow_slug = $1 AND run_id = $2 AND step_id = $3
|
|
5136
|
+
AND type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
5115
5137
|
ORDER BY timestamp_us DESC, event_id DESC
|
|
5116
5138
|
LIMIT 1
|
|
5117
5139
|
FOR UPDATE SKIP LOCKED
|
|
@@ -5119,30 +5141,42 @@ class DatabaseClient {
|
|
|
5119
5141
|
const checkResult = await client.query(checkQuery, [workflowSlug, runId, stepId]);
|
|
5120
5142
|
if (checkResult.rows.length === 0) {
|
|
5121
5143
|
await client.query("ROLLBACK");
|
|
5122
|
-
return
|
|
5144
|
+
return null;
|
|
5123
5145
|
}
|
|
5124
|
-
const
|
|
5146
|
+
const latestRow = checkResult.rows[0];
|
|
5147
|
+
const latestEvent = latestRow.event_data;
|
|
5125
5148
|
if (latestEvent.type !== "StepScheduled" && latestEvent.type !== "StepReclaimed" && latestEvent.type !== "StepRetrying") {
|
|
5126
5149
|
await client.query("ROLLBACK");
|
|
5127
|
-
return
|
|
5150
|
+
return null;
|
|
5128
5151
|
}
|
|
5129
|
-
|
|
5152
|
+
const availableAtUs = latestRow.available_at_us ?? latestEvent.availableAtUs ?? null;
|
|
5153
|
+
if (availableAtUs !== null && availableAtUs > currentTimeUs) {
|
|
5154
|
+
await client.query("ROLLBACK");
|
|
5155
|
+
return null;
|
|
5156
|
+
}
|
|
5157
|
+
const resolvedAttemptNumber = latestRow.attempt_number ?? latestEvent.attemptNumber ?? (eventToWrite.type === "StepStarted" ? eventToWrite.attemptNumber : null) ?? 1;
|
|
5158
|
+
const eventPayload = eventToWrite.type === "StepStarted" ? { ...eventToWrite, attemptNumber: resolvedAttemptNumber } : eventToWrite;
|
|
5159
|
+
let eventWorkerId = null;
|
|
5130
5160
|
let attemptNumber = null;
|
|
5161
|
+
let slotIndex = null;
|
|
5162
|
+
let workerConcurrency = null;
|
|
5131
5163
|
let errorNameHash = "";
|
|
5132
5164
|
let errorMessageHash = "";
|
|
5133
5165
|
let errorStackExactHash = "";
|
|
5134
5166
|
let errorStackNormalizedHash = "";
|
|
5135
5167
|
let errorStackPortableHash = "";
|
|
5136
|
-
if (
|
|
5137
|
-
|
|
5138
|
-
attemptNumber =
|
|
5139
|
-
|
|
5140
|
-
|
|
5141
|
-
|
|
5142
|
-
|
|
5143
|
-
|
|
5144
|
-
|
|
5145
|
-
|
|
5168
|
+
if (eventPayload.type === "StepStarted") {
|
|
5169
|
+
eventWorkerId = eventPayload.workerId;
|
|
5170
|
+
attemptNumber = eventPayload.attemptNumber;
|
|
5171
|
+
slotIndex = eventPayload.slotIndex ?? null;
|
|
5172
|
+
workerConcurrency = eventPayload.workerConcurrency ?? null;
|
|
5173
|
+
}
|
|
5174
|
+
if (eventPayload.type === "StepFailed") {
|
|
5175
|
+
errorNameHash = eventPayload.errorFingerprints.nameHash;
|
|
5176
|
+
errorMessageHash = eventPayload.errorFingerprints.messageHash;
|
|
5177
|
+
errorStackExactHash = eventPayload.errorFingerprints.stackExactHash;
|
|
5178
|
+
errorStackNormalizedHash = eventPayload.errorFingerprints.stackNormalizedHash;
|
|
5179
|
+
errorStackPortableHash = eventPayload.errorFingerprints.stackPortableHash;
|
|
5146
5180
|
}
|
|
5147
5181
|
const versionResult = await client.query(`SELECT version_id FROM ${this.schema}.workflow_events
|
|
5148
5182
|
WHERE workflow_slug = $1 AND run_id = $2
|
|
@@ -5155,18 +5189,19 @@ class DatabaseClient {
|
|
|
5155
5189
|
event_id, workflow_slug, run_id, step_id, timestamp_us, category, type, event_data,
|
|
5156
5190
|
worker_id, attempt_number, available_at_us, export_output,
|
|
5157
5191
|
error_name_hash, error_message_hash, error_stack_exact_hash,
|
|
5158
|
-
error_stack_normalized_hash, error_stack_portable_hash, version_id
|
|
5192
|
+
error_stack_normalized_hash, error_stack_portable_hash, version_id,
|
|
5193
|
+
slot_index, worker_concurrency
|
|
5159
5194
|
)
|
|
5160
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)`, [
|
|
5161
|
-
|
|
5162
|
-
|
|
5163
|
-
|
|
5164
|
-
|
|
5165
|
-
|
|
5166
|
-
|
|
5167
|
-
|
|
5168
|
-
JSON.stringify(stripEventIdFromJson(
|
|
5169
|
-
|
|
5195
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20)`, [
|
|
5196
|
+
eventPayload.eventId,
|
|
5197
|
+
eventPayload.workflowSlug,
|
|
5198
|
+
eventPayload.runId,
|
|
5199
|
+
eventPayload.stepId,
|
|
5200
|
+
eventPayload.timestampUs,
|
|
5201
|
+
eventPayload.category,
|
|
5202
|
+
eventPayload.type,
|
|
5203
|
+
JSON.stringify(stripEventIdFromJson(eventPayload)),
|
|
5204
|
+
eventWorkerId,
|
|
5170
5205
|
attemptNumber,
|
|
5171
5206
|
null,
|
|
5172
5207
|
null,
|
|
@@ -5175,10 +5210,12 @@ class DatabaseClient {
|
|
|
5175
5210
|
errorStackExactHash,
|
|
5176
5211
|
errorStackNormalizedHash,
|
|
5177
5212
|
errorStackPortableHash,
|
|
5178
|
-
versionId
|
|
5213
|
+
versionId,
|
|
5214
|
+
slotIndex,
|
|
5215
|
+
workerConcurrency
|
|
5179
5216
|
]);
|
|
5180
5217
|
await client.query("COMMIT");
|
|
5181
|
-
return
|
|
5218
|
+
return resolvedAttemptNumber;
|
|
5182
5219
|
} catch (error) {
|
|
5183
5220
|
await client.query("ROLLBACK");
|
|
5184
5221
|
throw error;
|
|
@@ -5194,7 +5231,7 @@ class DatabaseClient {
|
|
|
5194
5231
|
let query = `
|
|
5195
5232
|
WITH latest_step_events AS (
|
|
5196
5233
|
SELECT DISTINCT ON (workflow_slug, run_id, step_id)
|
|
5197
|
-
workflow_slug, run_id, step_id, type, available_at_us
|
|
5234
|
+
workflow_slug, run_id, step_id, type, available_at_us, priority
|
|
5198
5235
|
FROM ${this.schema}.step_events
|
|
5199
5236
|
${options?.workflowSlugs ? "WHERE workflow_slug = ANY($1)" : ""}
|
|
5200
5237
|
ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC
|
|
@@ -5203,6 +5240,7 @@ class DatabaseClient {
|
|
|
5203
5240
|
FROM latest_step_events
|
|
5204
5241
|
WHERE type = ANY($${options?.workflowSlugs ? "2" : "1"})
|
|
5205
5242
|
AND (available_at_us IS NULL OR available_at_us <= $${options?.workflowSlugs ? "3" : "2"})
|
|
5243
|
+
ORDER BY priority DESC NULLS LAST, available_at_us ASC
|
|
5206
5244
|
${options?.limit ? `LIMIT $${options?.workflowSlugs ? "4" : "3"}` : ""}
|
|
5207
5245
|
`;
|
|
5208
5246
|
const params = [];
|
|
@@ -5231,21 +5269,25 @@ class DatabaseClient {
|
|
|
5231
5269
|
const query = `
|
|
5232
5270
|
WITH latest_step_events AS (
|
|
5233
5271
|
SELECT DISTINCT ON (workflow_slug, run_id, step_id)
|
|
5234
|
-
workflow_slug, run_id, step_id, type, timestamp_us, worker_id
|
|
5272
|
+
workflow_slug, run_id, step_id, type, timestamp_us, worker_id, attempt_number
|
|
5235
5273
|
FROM ${this.schema}.step_events
|
|
5236
|
-
WHERE type IN ('
|
|
5274
|
+
WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
5237
5275
|
ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC
|
|
5238
5276
|
)
|
|
5239
|
-
SELECT workflow_slug, run_id, step_id, worker_id
|
|
5277
|
+
SELECT workflow_slug, run_id, step_id, worker_id, attempt_number, timestamp_us
|
|
5240
5278
|
FROM latest_step_events
|
|
5241
|
-
WHERE
|
|
5279
|
+
WHERE type IN ('StepStarted', 'StepHeartbeat')
|
|
5280
|
+
AND timestamp_us < $1
|
|
5281
|
+
AND worker_id IS NOT NULL
|
|
5242
5282
|
`;
|
|
5243
5283
|
const result = await client.query(query, [currentTimeUs - staleThresholdUs]);
|
|
5244
5284
|
return result.rows.map((row) => ({
|
|
5245
5285
|
workflowSlug: row.workflow_slug,
|
|
5246
5286
|
runId: row.run_id,
|
|
5247
5287
|
stepId: row.step_id,
|
|
5248
|
-
workerId: row.worker_id
|
|
5288
|
+
workerId: row.worker_id,
|
|
5289
|
+
attemptNumber: row.attempt_number ?? 1,
|
|
5290
|
+
lastHeartbeatUs: Number(row.timestamp_us)
|
|
5249
5291
|
}));
|
|
5250
5292
|
} finally {
|
|
5251
5293
|
client.release();
|
|
@@ -5602,6 +5644,15 @@ class DatabaseClient {
|
|
|
5602
5644
|
type
|
|
5603
5645
|
FROM ${this.schema}.workflow_events
|
|
5604
5646
|
WHERE ($1::text IS NULL OR workflow_slug = $1)
|
|
5647
|
+
AND type IN (
|
|
5648
|
+
'RunSubmitted',
|
|
5649
|
+
'WorkflowRetryStarted',
|
|
5650
|
+
'WorkflowStarted',
|
|
5651
|
+
'WorkflowResumed',
|
|
5652
|
+
'WorkflowCompleted',
|
|
5653
|
+
'WorkflowFailed',
|
|
5654
|
+
'WorkflowCancelled'
|
|
5655
|
+
)
|
|
5605
5656
|
ORDER BY workflow_slug, run_id, timestamp_us DESC, event_id DESC
|
|
5606
5657
|
)
|
|
5607
5658
|
SELECT
|
|
@@ -5739,6 +5790,214 @@ class DatabaseClient {
|
|
|
5739
5790
|
client.release();
|
|
5740
5791
|
}
|
|
5741
5792
|
}
|
|
5793
|
+
async getActiveWorkersAggregation(_options) {
|
|
5794
|
+
const client = await this.pool.connect();
|
|
5795
|
+
try {
|
|
5796
|
+
const runningRunsResult = await client.query(`
|
|
5797
|
+
WITH latest_workflow_events AS (
|
|
5798
|
+
SELECT DISTINCT ON (workflow_slug, run_id)
|
|
5799
|
+
workflow_slug,
|
|
5800
|
+
run_id,
|
|
5801
|
+
type
|
|
5802
|
+
FROM ${this.schema}.workflow_events
|
|
5803
|
+
ORDER BY workflow_slug, run_id, timestamp_us DESC, event_id DESC
|
|
5804
|
+
)
|
|
5805
|
+
SELECT workflow_slug, run_id
|
|
5806
|
+
FROM latest_workflow_events
|
|
5807
|
+
WHERE type IN ('WorkflowStarted', 'WorkflowResumed')
|
|
5808
|
+
LIMIT 500
|
|
5809
|
+
`);
|
|
5810
|
+
if (runningRunsResult.rows.length === 0) {
|
|
5811
|
+
return {
|
|
5812
|
+
workers: [],
|
|
5813
|
+
totalActiveWorkers: 0,
|
|
5814
|
+
totalRunningSteps: 0
|
|
5815
|
+
};
|
|
5816
|
+
}
|
|
5817
|
+
const runIds = runningRunsResult.rows.map((r) => r.run_id);
|
|
5818
|
+
const runningStepsResult = await client.query(`
|
|
5819
|
+
WITH latest_step_events AS (
|
|
5820
|
+
SELECT DISTINCT ON (workflow_slug, run_id, step_id)
|
|
5821
|
+
workflow_slug,
|
|
5822
|
+
run_id,
|
|
5823
|
+
step_id,
|
|
5824
|
+
type,
|
|
5825
|
+
worker_id,
|
|
5826
|
+
timestamp_us,
|
|
5827
|
+
slot_index,
|
|
5828
|
+
worker_concurrency
|
|
5829
|
+
FROM ${this.schema}.step_events
|
|
5830
|
+
WHERE run_id = ANY($1)
|
|
5831
|
+
AND type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
5832
|
+
ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC
|
|
5833
|
+
)
|
|
5834
|
+
SELECT
|
|
5835
|
+
worker_id,
|
|
5836
|
+
workflow_slug,
|
|
5837
|
+
run_id,
|
|
5838
|
+
step_id,
|
|
5839
|
+
timestamp_us as last_heartbeat_us,
|
|
5840
|
+
slot_index,
|
|
5841
|
+
worker_concurrency
|
|
5842
|
+
FROM latest_step_events
|
|
5843
|
+
WHERE type IN ('StepStarted', 'StepHeartbeat')
|
|
5844
|
+
AND worker_id IS NOT NULL
|
|
5845
|
+
LIMIT 1000
|
|
5846
|
+
`, [runIds]);
|
|
5847
|
+
const workerMap = new Map;
|
|
5848
|
+
for (const row of runningStepsResult.rows) {
|
|
5849
|
+
let worker = workerMap.get(row.worker_id);
|
|
5850
|
+
if (!worker) {
|
|
5851
|
+
worker = {
|
|
5852
|
+
workerId: row.worker_id,
|
|
5853
|
+
lastSeenUs: parseInt(row.last_heartbeat_us, 10),
|
|
5854
|
+
totalStepsProcessed: 0,
|
|
5855
|
+
failedSteps: 0,
|
|
5856
|
+
reclaimedFromCount: 0,
|
|
5857
|
+
workerConcurrency: row.worker_concurrency ?? undefined,
|
|
5858
|
+
activeSteps: []
|
|
5859
|
+
};
|
|
5860
|
+
workerMap.set(row.worker_id, worker);
|
|
5861
|
+
}
|
|
5862
|
+
const lastHeartbeatUs = parseInt(row.last_heartbeat_us, 10);
|
|
5863
|
+
if (lastHeartbeatUs > worker.lastSeenUs) {
|
|
5864
|
+
worker.lastSeenUs = lastHeartbeatUs;
|
|
5865
|
+
}
|
|
5866
|
+
if (row.worker_concurrency != null && worker.workerConcurrency == null) {
|
|
5867
|
+
worker.workerConcurrency = row.worker_concurrency;
|
|
5868
|
+
}
|
|
5869
|
+
worker.activeSteps.push({
|
|
5870
|
+
workflowSlug: row.workflow_slug,
|
|
5871
|
+
runId: row.run_id,
|
|
5872
|
+
stepId: row.step_id,
|
|
5873
|
+
startedAtUs: lastHeartbeatUs,
|
|
5874
|
+
lastHeartbeatUs,
|
|
5875
|
+
slotIndex: row.slot_index ?? undefined
|
|
5876
|
+
});
|
|
5877
|
+
}
|
|
5878
|
+
const workers = Array.from(workerMap.values()).sort((a, b) => {
|
|
5879
|
+
if (b.activeSteps.length !== a.activeSteps.length) {
|
|
5880
|
+
return b.activeSteps.length - a.activeSteps.length;
|
|
5881
|
+
}
|
|
5882
|
+
return b.lastSeenUs - a.lastSeenUs;
|
|
5883
|
+
});
|
|
5884
|
+
const totalRunningSteps = workers.reduce((sum, w) => sum + w.activeSteps.length, 0);
|
|
5885
|
+
const totalActiveWorkers = workers.length;
|
|
5886
|
+
return {
|
|
5887
|
+
workers,
|
|
5888
|
+
totalActiveWorkers,
|
|
5889
|
+
totalRunningSteps
|
|
5890
|
+
};
|
|
5891
|
+
} finally {
|
|
5892
|
+
client.release();
|
|
5893
|
+
}
|
|
5894
|
+
}
|
|
5895
|
+
async getWorkerById(workerId) {
|
|
5896
|
+
const client = await this.pool.connect();
|
|
5897
|
+
try {
|
|
5898
|
+
const result = await client.query(`
|
|
5899
|
+
WITH worker_step_events AS (
|
|
5900
|
+
-- Get all step events for this worker
|
|
5901
|
+
SELECT
|
|
5902
|
+
workflow_slug,
|
|
5903
|
+
run_id,
|
|
5904
|
+
step_id,
|
|
5905
|
+
type,
|
|
5906
|
+
timestamp_us,
|
|
5907
|
+
slot_index,
|
|
5908
|
+
worker_concurrency
|
|
5909
|
+
FROM ${this.schema}.step_events
|
|
5910
|
+
WHERE worker_id = $1
|
|
5911
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
5912
|
+
),
|
|
5913
|
+
latest_per_step AS (
|
|
5914
|
+
-- For each step this worker touched, get the latest event
|
|
5915
|
+
SELECT DISTINCT ON (workflow_slug, run_id, step_id)
|
|
5916
|
+
workflow_slug,
|
|
5917
|
+
run_id,
|
|
5918
|
+
step_id,
|
|
5919
|
+
timestamp_us,
|
|
5920
|
+
slot_index,
|
|
5921
|
+
worker_concurrency
|
|
5922
|
+
FROM worker_step_events
|
|
5923
|
+
ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC
|
|
5924
|
+
),
|
|
5925
|
+
-- Check if these steps are still running (no completion/failure after our heartbeat)
|
|
5926
|
+
still_running AS (
|
|
5927
|
+
SELECT
|
|
5928
|
+
lps.workflow_slug,
|
|
5929
|
+
lps.run_id,
|
|
5930
|
+
lps.step_id,
|
|
5931
|
+
lps.timestamp_us as last_heartbeat_us,
|
|
5932
|
+
lps.slot_index,
|
|
5933
|
+
lps.worker_concurrency
|
|
5934
|
+
FROM latest_per_step lps
|
|
5935
|
+
WHERE NOT EXISTS (
|
|
5936
|
+
SELECT 1 FROM ${this.schema}.step_events se
|
|
5937
|
+
WHERE se.workflow_slug = lps.workflow_slug
|
|
5938
|
+
AND se.run_id = lps.run_id
|
|
5939
|
+
AND se.step_id = lps.step_id
|
|
5940
|
+
AND se.timestamp_us > lps.timestamp_us
|
|
5941
|
+
AND se.type IN ('StepCompleted', 'StepFailed', 'StepSkipped', 'StepReclaimed')
|
|
5942
|
+
)
|
|
5943
|
+
)
|
|
5944
|
+
SELECT * FROM still_running
|
|
5945
|
+
ORDER BY last_heartbeat_us DESC
|
|
5946
|
+
LIMIT 100
|
|
5947
|
+
`, [workerId]);
|
|
5948
|
+
if (result.rows.length === 0) {
|
|
5949
|
+
const lastSeenResult = await client.query(`
|
|
5950
|
+
SELECT MAX(timestamp_us) as last_seen_us, MAX(worker_concurrency) as worker_concurrency
|
|
5951
|
+
FROM ${this.schema}.step_events
|
|
5952
|
+
WHERE worker_id = $1
|
|
5953
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
5954
|
+
`, [workerId]);
|
|
5955
|
+
if (!lastSeenResult.rows[0]?.last_seen_us) {
|
|
5956
|
+
return null;
|
|
5957
|
+
}
|
|
5958
|
+
return {
|
|
5959
|
+
workerId,
|
|
5960
|
+
lastSeenUs: parseInt(lastSeenResult.rows[0].last_seen_us, 10),
|
|
5961
|
+
totalStepsProcessed: 0,
|
|
5962
|
+
failedSteps: 0,
|
|
5963
|
+
reclaimedFromCount: 0,
|
|
5964
|
+
workerConcurrency: lastSeenResult.rows[0].worker_concurrency ?? undefined,
|
|
5965
|
+
activeSteps: []
|
|
5966
|
+
};
|
|
5967
|
+
}
|
|
5968
|
+
let lastSeenUs = 0;
|
|
5969
|
+
let workerConcurrency;
|
|
5970
|
+
const activeSteps = [];
|
|
5971
|
+
for (const row of result.rows) {
|
|
5972
|
+
const heartbeatUs = parseInt(row.last_heartbeat_us, 10);
|
|
5973
|
+
if (heartbeatUs > lastSeenUs) {
|
|
5974
|
+
lastSeenUs = heartbeatUs;
|
|
5975
|
+
}
|
|
5976
|
+
if (row.worker_concurrency != null && workerConcurrency == null) {
|
|
5977
|
+
workerConcurrency = row.worker_concurrency;
|
|
5978
|
+
}
|
|
5979
|
+
activeSteps.push({
|
|
5980
|
+
workflowSlug: row.workflow_slug,
|
|
5981
|
+
runId: row.run_id,
|
|
5982
|
+
stepId: row.step_id,
|
|
5983
|
+
startedAtUs: heartbeatUs,
|
|
5984
|
+
lastHeartbeatUs: heartbeatUs,
|
|
5985
|
+
slotIndex: row.slot_index ?? undefined
|
|
5986
|
+
});
|
|
5987
|
+
}
|
|
5988
|
+
return {
|
|
5989
|
+
workerId,
|
|
5990
|
+
lastSeenUs,
|
|
5991
|
+
totalStepsProcessed: 0,
|
|
5992
|
+
failedSteps: 0,
|
|
5993
|
+
reclaimedFromCount: 0,
|
|
5994
|
+
workerConcurrency,
|
|
5995
|
+
activeSteps
|
|
5996
|
+
};
|
|
5997
|
+
} finally {
|
|
5998
|
+
client.release();
|
|
5999
|
+
}
|
|
6000
|
+
}
|
|
5742
6001
|
}
|
|
5743
6002
|
function createPool(connectionString) {
|
|
5744
6003
|
return new Pool2({ connectionString });
|
|
@@ -6086,6 +6345,141 @@ async function migration008_addRunIdIndex(pool, schema) {
|
|
|
6086
6345
|
client.release();
|
|
6087
6346
|
}
|
|
6088
6347
|
}
|
|
6348
|
+
async function migration009_addStepPriority(pool, schema) {
|
|
6349
|
+
const client = await pool.connect();
|
|
6350
|
+
try {
|
|
6351
|
+
await client.query(`
|
|
6352
|
+
ALTER TABLE ${schema}.step_events
|
|
6353
|
+
ADD COLUMN IF NOT EXISTS priority INTEGER
|
|
6354
|
+
`);
|
|
6355
|
+
await client.query(`
|
|
6356
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_priority_queue
|
|
6357
|
+
ON ${schema}.step_events (priority DESC NULLS LAST, available_at_us ASC, timestamp_us ASC)
|
|
6358
|
+
WHERE type IN ('StepScheduled', 'StepReclaimed', 'StepRetrying')
|
|
6359
|
+
`);
|
|
6360
|
+
console.log("[Migration 009] Step priority column and index added successfully");
|
|
6361
|
+
} catch (error) {
|
|
6362
|
+
console.error("[Migration 009] Error adding step priority:", error);
|
|
6363
|
+
throw error;
|
|
6364
|
+
} finally {
|
|
6365
|
+
client.release();
|
|
6366
|
+
}
|
|
6367
|
+
}
|
|
6368
|
+
async function migration010_addSlotTracking(pool, schema) {
|
|
6369
|
+
const client = await pool.connect();
|
|
6370
|
+
try {
|
|
6371
|
+
await client.query(`
|
|
6372
|
+
ALTER TABLE ${schema}.step_events
|
|
6373
|
+
ADD COLUMN IF NOT EXISTS slot_index INTEGER,
|
|
6374
|
+
ADD COLUMN IF NOT EXISTS worker_concurrency INTEGER
|
|
6375
|
+
`);
|
|
6376
|
+
console.log("[Migration 010] Slot tracking columns added successfully");
|
|
6377
|
+
} catch (error) {
|
|
6378
|
+
console.error("[Migration 010] Error adding slot tracking columns:", error);
|
|
6379
|
+
throw error;
|
|
6380
|
+
} finally {
|
|
6381
|
+
client.release();
|
|
6382
|
+
}
|
|
6383
|
+
}
|
|
6384
|
+
async function migration011_addWorkerConcurrencyIndex(pool, schema) {
|
|
6385
|
+
const client = await pool.connect();
|
|
6386
|
+
try {
|
|
6387
|
+
await client.query(`
|
|
6388
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_worker_concurrency
|
|
6389
|
+
ON ${schema}.step_events (worker_id, timestamp_us DESC)
|
|
6390
|
+
WHERE worker_id IS NOT NULL
|
|
6391
|
+
AND worker_concurrency IS NOT NULL
|
|
6392
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
6393
|
+
`);
|
|
6394
|
+
console.log("[Migration 011] Worker concurrency index added successfully");
|
|
6395
|
+
} catch (error) {
|
|
6396
|
+
console.error("[Migration 011] Error adding worker concurrency index:", error);
|
|
6397
|
+
throw error;
|
|
6398
|
+
} finally {
|
|
6399
|
+
client.release();
|
|
6400
|
+
}
|
|
6401
|
+
}
|
|
6402
|
+
async function migration012_addWorkerAnalyticsIndexes(pool, schema) {
|
|
6403
|
+
const client = await pool.connect();
|
|
6404
|
+
try {
|
|
6405
|
+
await client.query(`
|
|
6406
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_worker_activity
|
|
6407
|
+
ON ${schema}.step_events (timestamp_us, type, worker_id)
|
|
6408
|
+
WHERE worker_id IS NOT NULL
|
|
6409
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
6410
|
+
`);
|
|
6411
|
+
await client.query(`
|
|
6412
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_recent_by_step
|
|
6413
|
+
ON ${schema}.step_events (timestamp_us DESC, workflow_slug, run_id, step_id, type, event_id DESC)
|
|
6414
|
+
WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
6415
|
+
`);
|
|
6416
|
+
console.log("[Migration 012] Worker analytics indexes added successfully");
|
|
6417
|
+
} catch (error) {
|
|
6418
|
+
console.error("[Migration 012] Error adding worker analytics indexes:", error);
|
|
6419
|
+
throw error;
|
|
6420
|
+
} finally {
|
|
6421
|
+
client.release();
|
|
6422
|
+
}
|
|
6423
|
+
}
|
|
6424
|
+
async function migration013_addWorkersObservabilityIndexes(pool, schema) {
|
|
6425
|
+
const client = await pool.connect();
|
|
6426
|
+
try {
|
|
6427
|
+
await client.query(`
|
|
6428
|
+
CREATE INDEX IF NOT EXISTS idx_workflow_events_run_status
|
|
6429
|
+
ON ${schema}.workflow_events (run_id, timestamp_us DESC, event_id DESC)
|
|
6430
|
+
INCLUDE (workflow_slug, type)
|
|
6431
|
+
`);
|
|
6432
|
+
await client.query(`
|
|
6433
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_by_run
|
|
6434
|
+
ON ${schema}.step_events (run_id, workflow_slug, step_id, timestamp_us DESC, event_id DESC)
|
|
6435
|
+
INCLUDE (type, worker_id, slot_index, worker_concurrency)
|
|
6436
|
+
WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
6437
|
+
`);
|
|
6438
|
+
console.log("[Migration 013] Workers observability indexes added successfully");
|
|
6439
|
+
} catch (error) {
|
|
6440
|
+
console.error("[Migration 013] Error adding workers observability indexes:", error);
|
|
6441
|
+
throw error;
|
|
6442
|
+
} finally {
|
|
6443
|
+
client.release();
|
|
6444
|
+
}
|
|
6445
|
+
}
|
|
6446
|
+
async function migration014_addWorkerHotPathIndexes(pool, schema) {
|
|
6447
|
+
const client = await pool.connect();
|
|
6448
|
+
try {
|
|
6449
|
+
await client.query(`
|
|
6450
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_latest_cover
|
|
6451
|
+
ON ${schema}.step_events (workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC)
|
|
6452
|
+
INCLUDE (type, available_at_us, worker_id, attempt_number, priority)
|
|
6453
|
+
`);
|
|
6454
|
+
await client.query(`
|
|
6455
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_terminal_latest
|
|
6456
|
+
ON ${schema}.step_events (workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC)
|
|
6457
|
+
WHERE type IN ('StepCompleted', 'StepFailed', 'StepSkipped', 'StepReclaimed')
|
|
6458
|
+
`);
|
|
6459
|
+
await client.query(`
|
|
6460
|
+
CREATE INDEX IF NOT EXISTS idx_workflow_events_active_latest
|
|
6461
|
+
ON ${schema}.workflow_events (type, workflow_slug, run_id, timestamp_us DESC, event_id DESC)
|
|
6462
|
+
WHERE type IN ('RunSubmitted', 'WorkflowRetryStarted', 'WorkflowStarted', 'WorkflowResumed')
|
|
6463
|
+
`);
|
|
6464
|
+
await client.query(`
|
|
6465
|
+
CREATE INDEX IF NOT EXISTS idx_workflow_events_run_chrono
|
|
6466
|
+
ON ${schema}.workflow_events (workflow_slug, run_id, timestamp_us ASC, event_id ASC)
|
|
6467
|
+
`);
|
|
6468
|
+
await client.query(`
|
|
6469
|
+
CREATE INDEX IF NOT EXISTS idx_workflow_events_version_lookup
|
|
6470
|
+
ON ${schema}.workflow_events (workflow_slug, run_id, timestamp_us DESC, event_id DESC)
|
|
6471
|
+
INCLUDE (version_id)
|
|
6472
|
+
WHERE type IN ('WorkflowStarted', 'RunSubmitted')
|
|
6473
|
+
AND version_id IS NOT NULL
|
|
6474
|
+
`);
|
|
6475
|
+
console.log("[Migration 014] Worker hot-path indexes added successfully");
|
|
6476
|
+
} catch (error) {
|
|
6477
|
+
console.error("[Migration 014] Error adding worker hot-path indexes:", error);
|
|
6478
|
+
throw error;
|
|
6479
|
+
} finally {
|
|
6480
|
+
client.release();
|
|
6481
|
+
}
|
|
6482
|
+
}
|
|
6089
6483
|
async function runMigrations(pool, schema = "cascadeflow") {
|
|
6090
6484
|
console.log(`[Migrations] Starting database migrations in schema '${schema}'...`);
|
|
6091
6485
|
try {
|
|
@@ -6098,6 +6492,12 @@ async function runMigrations(pool, schema = "cascadeflow") {
|
|
|
6098
6492
|
await migration006_addDescIndexes(pool, schema);
|
|
6099
6493
|
await migration007_addWorkerIndexes(pool, schema);
|
|
6100
6494
|
await migration008_addRunIdIndex(pool, schema);
|
|
6495
|
+
await migration009_addStepPriority(pool, schema);
|
|
6496
|
+
await migration010_addSlotTracking(pool, schema);
|
|
6497
|
+
await migration011_addWorkerConcurrencyIndex(pool, schema);
|
|
6498
|
+
await migration012_addWorkerAnalyticsIndexes(pool, schema);
|
|
6499
|
+
await migration013_addWorkersObservabilityIndexes(pool, schema);
|
|
6500
|
+
await migration014_addWorkerHotPathIndexes(pool, schema);
|
|
6101
6501
|
console.log("[Migrations] All migrations completed successfully");
|
|
6102
6502
|
} catch (error) {
|
|
6103
6503
|
console.error("[Migrations] Migration failed:", error);
|
|
@@ -6408,7 +6808,8 @@ class PostgresBackend extends Backend {
|
|
|
6408
6808
|
availableAtUs: metadata.availableAt,
|
|
6409
6809
|
reason: metadata.reason,
|
|
6410
6810
|
attemptNumber: metadata.attemptNumber,
|
|
6411
|
-
retryDelayMs: metadata.retryDelayMs
|
|
6811
|
+
retryDelayMs: metadata.retryDelayMs,
|
|
6812
|
+
priority: metadata.priority
|
|
6412
6813
|
};
|
|
6413
6814
|
await this.db.appendEvent("step_events", event);
|
|
6414
6815
|
}
|
|
@@ -6426,7 +6827,9 @@ class PostgresBackend extends Backend {
|
|
|
6426
6827
|
stepId,
|
|
6427
6828
|
attemptNumber,
|
|
6428
6829
|
workerId,
|
|
6429
|
-
dependencies: metadata.dependencies
|
|
6830
|
+
dependencies: metadata.dependencies,
|
|
6831
|
+
slotIndex: metadata.slotIndex,
|
|
6832
|
+
workerConcurrency: metadata.workerConcurrency
|
|
6430
6833
|
};
|
|
6431
6834
|
await this.db.appendEvent("step_events", event);
|
|
6432
6835
|
}
|
|
@@ -6545,6 +6948,7 @@ class PostgresBackend extends Backend {
|
|
|
6545
6948
|
reason: "retry",
|
|
6546
6949
|
attemptNumber: scheduleMetadata.nextAttemptNumber,
|
|
6547
6950
|
retryDelayMs: scheduleMetadata.retryDelayMs,
|
|
6951
|
+
priority: scheduleMetadata.priority,
|
|
6548
6952
|
policyIndex: scheduleMetadata.policyIndex,
|
|
6549
6953
|
attemptInPolicy: scheduleMetadata.attemptInPolicy
|
|
6550
6954
|
};
|
|
@@ -6614,7 +7018,7 @@ class PostgresBackend extends Backend {
|
|
|
6614
7018
|
};
|
|
6615
7019
|
await this.db.appendEvent("step_events", event);
|
|
6616
7020
|
}
|
|
6617
|
-
async saveStepHeartbeat(workflowSlug, runId, stepId, workerId, attemptNumber) {
|
|
7021
|
+
async saveStepHeartbeat(workflowSlug, runId, stepId, workerId, attemptNumber, slotInfo) {
|
|
6618
7022
|
const now = getMicrosecondTimestamp();
|
|
6619
7023
|
const event = {
|
|
6620
7024
|
category: "step",
|
|
@@ -6625,7 +7029,9 @@ class PostgresBackend extends Backend {
|
|
|
6625
7029
|
runId,
|
|
6626
7030
|
stepId,
|
|
6627
7031
|
workerId,
|
|
6628
|
-
attemptNumber
|
|
7032
|
+
attemptNumber,
|
|
7033
|
+
slotIndex: slotInfo?.slotIndex,
|
|
7034
|
+
workerConcurrency: slotInfo?.workerConcurrency
|
|
6629
7035
|
};
|
|
6630
7036
|
await this.db.appendEvent("step_events", event);
|
|
6631
7037
|
}
|
|
@@ -6957,16 +7363,6 @@ class PostgresBackend extends Backend {
|
|
|
6957
7363
|
return !!(latestEvent && (latestEvent.type === "StepScheduled" || latestEvent.type === "StepReclaimed" || latestEvent.type === "StepRetrying"));
|
|
6958
7364
|
}
|
|
6959
7365
|
async claimScheduledStep(workflowSlug, runId, stepId, workerId, metadata) {
|
|
6960
|
-
const initialEvents = await this.loadEvents(workflowSlug, runId, { category: "step", stepId });
|
|
6961
|
-
if (initialEvents.length === 0) {
|
|
6962
|
-
return null;
|
|
6963
|
-
}
|
|
6964
|
-
const now = getMicrosecondTimestamp();
|
|
6965
|
-
const initialState = projectStepState(initialEvents, workflowSlug);
|
|
6966
|
-
if (initialState.status !== "scheduled" || initialState.availableAt === undefined || initialState.availableAt > now) {
|
|
6967
|
-
return null;
|
|
6968
|
-
}
|
|
6969
|
-
const attemptNumber = initialState.attemptNumber;
|
|
6970
7366
|
const timestamp = getMicrosecondTimestamp();
|
|
6971
7367
|
const event = {
|
|
6972
7368
|
category: "step",
|
|
@@ -6978,41 +7374,34 @@ class PostgresBackend extends Backend {
|
|
|
6978
7374
|
stepId,
|
|
6979
7375
|
workerId,
|
|
6980
7376
|
dependencies: metadata.dependencies,
|
|
6981
|
-
attemptNumber
|
|
7377
|
+
attemptNumber: metadata.attemptNumber,
|
|
7378
|
+
slotIndex: metadata.slotIndex,
|
|
7379
|
+
workerConcurrency: metadata.workerConcurrency
|
|
6982
7380
|
};
|
|
6983
|
-
const
|
|
6984
|
-
return
|
|
7381
|
+
const claimedAttemptNumber = await this.db.claimScheduledStep(workflowSlug, runId, stepId, event);
|
|
7382
|
+
return claimedAttemptNumber !== null ? { attemptNumber: claimedAttemptNumber } : null;
|
|
6985
7383
|
}
|
|
6986
7384
|
async reclaimStaleSteps(staleThreshold, reclaimedBy) {
|
|
6987
7385
|
const reclaimed = [];
|
|
6988
7386
|
const now = getMicrosecondTimestamp();
|
|
6989
7387
|
const staleSteps = await this.db.findStaleSteps(staleThreshold);
|
|
6990
7388
|
for (const step of staleSteps) {
|
|
6991
|
-
const
|
|
6992
|
-
|
|
6993
|
-
|
|
6994
|
-
|
|
6995
|
-
|
|
6996
|
-
|
|
6997
|
-
|
|
6998
|
-
|
|
6999
|
-
|
|
7000
|
-
|
|
7001
|
-
|
|
7002
|
-
|
|
7003
|
-
|
|
7004
|
-
|
|
7005
|
-
|
|
7006
|
-
|
|
7007
|
-
});
|
|
7008
|
-
await this.saveStepScheduled(step.workflowSlug, step.runId, step.stepId, {
|
|
7009
|
-
availableAt: now,
|
|
7010
|
-
reason: "retry",
|
|
7011
|
-
attemptNumber: state.attemptNumber + 1,
|
|
7012
|
-
retryDelayMs: 0
|
|
7013
|
-
});
|
|
7014
|
-
reclaimed.push({ workflowSlug: step.workflowSlug, runId: step.runId, stepId: step.stepId });
|
|
7015
|
-
}
|
|
7389
|
+
const staleDuration = now - step.lastHeartbeatUs;
|
|
7390
|
+
await this.saveStepReclaimed(step.workflowSlug, step.runId, step.stepId, {
|
|
7391
|
+
originalWorkerId: step.workerId,
|
|
7392
|
+
reclaimedBy,
|
|
7393
|
+
lastHeartbeat: step.lastHeartbeatUs,
|
|
7394
|
+
staleThreshold,
|
|
7395
|
+
staleDuration,
|
|
7396
|
+
attemptNumber: step.attemptNumber
|
|
7397
|
+
});
|
|
7398
|
+
await this.saveStepScheduled(step.workflowSlug, step.runId, step.stepId, {
|
|
7399
|
+
availableAt: now,
|
|
7400
|
+
reason: "retry",
|
|
7401
|
+
attemptNumber: step.attemptNumber + 1,
|
|
7402
|
+
retryDelayMs: 0
|
|
7403
|
+
});
|
|
7404
|
+
reclaimed.push({ workflowSlug: step.workflowSlug, runId: step.runId, stepId: step.stepId });
|
|
7016
7405
|
}
|
|
7017
7406
|
return reclaimed;
|
|
7018
7407
|
}
|
|
@@ -7422,9 +7811,48 @@ class PostgresBackend extends Backend {
|
|
|
7422
7811
|
successRate
|
|
7423
7812
|
};
|
|
7424
7813
|
}
|
|
7814
|
+
async getActiveWorkers(options) {
|
|
7815
|
+
const staleThresholdUs = options?.staleThresholdUs ?? 30 * 1000 * 1000;
|
|
7816
|
+
const result = await this.db.getActiveWorkersAggregation({
|
|
7817
|
+
staleThresholdUs,
|
|
7818
|
+
includeInactive: options?.includeInactive,
|
|
7819
|
+
timeRange: options?.timeRange
|
|
7820
|
+
});
|
|
7821
|
+
return {
|
|
7822
|
+
workers: result.workers.map((w) => ({
|
|
7823
|
+
workerId: w.workerId,
|
|
7824
|
+
lastSeenUs: w.lastSeenUs,
|
|
7825
|
+
currentlyRunningSteps: w.activeSteps.length,
|
|
7826
|
+
totalStepsProcessed: w.totalStepsProcessed,
|
|
7827
|
+
failedSteps: w.failedSteps,
|
|
7828
|
+
reclaimedFromCount: w.reclaimedFromCount,
|
|
7829
|
+
workerConcurrency: w.workerConcurrency,
|
|
7830
|
+
activeSteps: w.activeSteps
|
|
7831
|
+
})),
|
|
7832
|
+
totalActiveWorkers: result.totalActiveWorkers,
|
|
7833
|
+
totalRunningSteps: result.totalRunningSteps,
|
|
7834
|
+
staleThresholdUs
|
|
7835
|
+
};
|
|
7836
|
+
}
|
|
7837
|
+
async getWorkerById(workerId) {
|
|
7838
|
+
const result = await this.db.getWorkerById(workerId);
|
|
7839
|
+
if (!result) {
|
|
7840
|
+
return null;
|
|
7841
|
+
}
|
|
7842
|
+
return {
|
|
7843
|
+
workerId: result.workerId,
|
|
7844
|
+
lastSeenUs: result.lastSeenUs,
|
|
7845
|
+
currentlyRunningSteps: result.activeSteps.length,
|
|
7846
|
+
totalStepsProcessed: result.totalStepsProcessed,
|
|
7847
|
+
failedSteps: result.failedSteps,
|
|
7848
|
+
reclaimedFromCount: result.reclaimedFromCount,
|
|
7849
|
+
workerConcurrency: result.workerConcurrency,
|
|
7850
|
+
activeSteps: result.activeSteps
|
|
7851
|
+
};
|
|
7852
|
+
}
|
|
7425
7853
|
}
|
|
7426
7854
|
export {
|
|
7427
7855
|
PostgresBackend
|
|
7428
7856
|
};
|
|
7429
7857
|
|
|
7430
|
-
//# debugId=
|
|
7858
|
+
//# debugId=43E020BB2B7D86DF64756E2164756E21
|