@checkstack/automation-backend 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +544 -0
- package/drizzle/0003_sparkling_xorn.sql +17 -0
- package/drizzle/0004_cultured_spyke.sql +2 -0
- package/drizzle/0005_classy_the_hand.sql +19 -0
- package/drizzle/0006_burly_wallop.sql +10 -0
- package/drizzle/0007_nappy_jackal.sql +1 -0
- package/drizzle/0008_remove_seeded_auto_incident_automations.sql +13 -0
- package/drizzle/0009_steady_liz_osborn.sql +12 -0
- package/drizzle/0010_chunky_changeling.sql +2 -0
- package/drizzle/meta/0003_snapshot.json +1007 -0
- package/drizzle/meta/0004_snapshot.json +1028 -0
- package/drizzle/meta/0005_snapshot.json +1164 -0
- package/drizzle/meta/0006_snapshot.json +1261 -0
- package/drizzle/meta/0007_snapshot.json +1215 -0
- package/drizzle/meta/0008_snapshot.json +1215 -0
- package/drizzle/meta/0009_snapshot.json +1328 -0
- package/drizzle/meta/0010_snapshot.json +1349 -0
- package/drizzle/meta/_journal.json +56 -0
- package/package.json +23 -12
- package/src/action-types.ts +23 -0
- package/src/artifact-store.ts +16 -1
- package/src/automation-store.test.ts +143 -0
- package/src/automation-store.ts +30 -8
- package/src/builtin-triggers.test.ts +77 -74
- package/src/builtin-triggers.ts +105 -108
- package/src/dispatch/action-kind.ts +2 -0
- package/src/dispatch/assemble-get-service.ts +31 -0
- package/src/dispatch/cancel-resurrect.test.ts +147 -0
- package/src/dispatch/concurrency-race.test.ts +255 -0
- package/src/dispatch/concurrency-scope.test.ts +166 -0
- package/src/dispatch/condition.ts +24 -5
- package/src/dispatch/dwell-queue.ts +65 -0
- package/src/dispatch/dwell-store.ts +154 -0
- package/src/dispatch/dwell.it.test.ts +142 -0
- package/src/dispatch/dwell.test.ts +799 -0
- package/src/dispatch/dwell.ts +257 -0
- package/src/dispatch/engine.test.ts +189 -2
- package/src/dispatch/engine.ts +555 -9
- package/src/dispatch/entity-scope.test.ts +176 -0
- package/src/dispatch/get-service-wiring.test.ts +318 -0
- package/src/dispatch/numeric.test.ts +71 -0
- package/src/dispatch/numeric.ts +96 -0
- package/src/dispatch/render.test.ts +34 -0
- package/src/dispatch/render.ts +31 -11
- package/src/dispatch/reseed-run-secrets.ts +230 -0
- package/src/dispatch/run-secret-registry.test.ts +189 -0
- package/src/dispatch/run-secret-registry.ts +247 -0
- package/src/dispatch/run-state-masking.test.ts +376 -0
- package/src/dispatch/run-state-store.ts +95 -38
- package/src/dispatch/run-state.ts +226 -59
- package/src/dispatch/scope-artifact-masking.test.ts +138 -0
- package/src/dispatch/secret-ref-ids.test.ts +19 -0
- package/src/dispatch/secret-ref-ids.ts +17 -0
- package/src/dispatch/snapshots.test.ts +86 -0
- package/src/dispatch/snapshots.ts +79 -0
- package/src/dispatch/stage1-router.test.ts +324 -0
- package/src/dispatch/stage1-router.ts +152 -0
- package/src/dispatch/stage1.it.test.ts +84 -0
- package/src/dispatch/stage2-dispatch.test.ts +285 -0
- package/src/dispatch/stage2-dispatch.ts +207 -0
- package/src/dispatch/stage2-stalled.it.test.ts +132 -0
- package/src/dispatch/stalled-sweeper.test.ts +197 -0
- package/src/dispatch/stalled-sweeper.ts +112 -5
- package/src/dispatch/state-scope.test.ts +234 -0
- package/src/dispatch/state-scope.ts +322 -0
- package/src/dispatch/structured-conditions.test.ts +246 -0
- package/src/dispatch/structured-conditions.ts +146 -0
- package/src/dispatch/test-fixtures.ts +306 -38
- package/src/dispatch/trigger-fanin.test.ts +111 -0
- package/src/dispatch/trigger-subscriber.ts +316 -14
- package/src/dispatch/types.ts +263 -8
- package/src/dispatch/wait-timeout-queue.ts +89 -0
- package/src/dispatch/wait-until-entity-wake.test.ts +544 -0
- package/src/dispatch/wait-until.test.ts +540 -0
- package/src/dispatch/wake-refs.test.ts +158 -0
- package/src/dispatch/wake-refs.ts +348 -0
- package/src/dispatch/window-gate.test.ts +513 -0
- package/src/dispatch/window-store.test.ts +162 -0
- package/src/dispatch/window-store.ts +102 -0
- package/src/entity/change-derivers.test.ts +148 -0
- package/src/entity/change-derivers.ts +143 -0
- package/src/entity/change-emitter.test.ts +66 -0
- package/src/entity/change-emitter.ts +76 -0
- package/src/entity/create-handle.ts +344 -0
- package/src/entity/cross-pod-read-consistency.it.test.ts +281 -0
- package/src/entity/define-entity.ts +157 -0
- package/src/entity/diff.test.ts +57 -0
- package/src/entity/diff.ts +54 -0
- package/src/entity/entity-store.test.ts +30 -0
- package/src/entity/entity-store.ts +171 -0
- package/src/entity/extension-point.ts +56 -0
- package/src/entity/fake-entity-store.ts +130 -0
- package/src/entity/hook.ts +19 -0
- package/src/entity/index.ts +50 -0
- package/src/entity/mutate-handle.test.ts +517 -0
- package/src/entity/on-entity-changed.test.ts +189 -0
- package/src/entity/on-entity-changed.ts +214 -0
- package/src/entity/registry.test.ts +181 -0
- package/src/entity/registry.ts +200 -0
- package/src/entity/stable-stringify.test.ts +55 -0
- package/src/entity/stable-stringify.ts +49 -0
- package/src/entity/wake-index.it.test.ts +251 -0
- package/src/entity/with-entity-write.test.ts +100 -0
- package/src/entity/with-entity-write.ts +69 -0
- package/src/entity-driven-trigger.ts +46 -0
- package/src/extension-points.ts +35 -0
- package/src/gitops-docs.test.ts +215 -0
- package/src/gitops-docs.ts +151 -0
- package/src/gitops-kinds.test.ts +174 -0
- package/src/gitops-kinds.ts +137 -0
- package/src/index.ts +355 -11
- package/src/migration/flapping-to-window.test.ts +123 -0
- package/src/migration/flapping-to-window.ts +205 -0
- package/src/router.test.ts +182 -1
- package/src/router.ts +73 -2
- package/src/schema.ts +236 -3
- package/src/script-test-replay.test.ts +88 -0
- package/src/script-test-replay.ts +100 -0
- package/src/script-test-shell-env.test.ts +41 -0
- package/src/script-test-shell-env.ts +89 -0
- package/src/script-test.test.ts +386 -0
- package/src/script-test.ts +258 -0
- package/src/trigger-registry.ts +2 -0
- package/src/validate-definition.test.ts +1 -0
- package/tsconfig.json +24 -0
|
@@ -11,10 +11,15 @@
|
|
|
11
11
|
* at a time. The lock auto-releases when the holding connection dies —
|
|
12
12
|
* exactly what we want during crash recovery.
|
|
13
13
|
*/
|
|
14
|
-
import {
|
|
15
|
-
import type {
|
|
14
|
+
import { and, eq, lt } from "drizzle-orm";
|
|
15
|
+
import type {
|
|
16
|
+
AdvisoryLockHandle,
|
|
17
|
+
AdvisoryLockService,
|
|
18
|
+
SafeDatabase,
|
|
19
|
+
} from "@checkstack/backend-api";
|
|
16
20
|
|
|
17
|
-
import { automationRunState } from "../schema";
|
|
21
|
+
import { automationRunState, automationRuns } from "../schema";
|
|
22
|
+
import type { RunSecretRegistry } from "./run-secret-registry";
|
|
18
23
|
|
|
19
24
|
export interface RunStateSnapshot {
|
|
20
25
|
scopeSnapshot: Record<string, unknown>;
|
|
@@ -27,11 +32,18 @@ export interface RunStateStore {
|
|
|
27
32
|
* Write or update the per-run durable state. `lastActionPath` is the
|
|
28
33
|
* path of the most recently completed action — resume walks the tree
|
|
29
34
|
* looking for this path and treats the action at it as already done.
|
|
35
|
+
*
|
|
36
|
+
* Omitting `lastActionPath` (vs. passing `null`) on an UPDATE preserves
|
|
37
|
+
* the existing checkpoint. This matters at suspend-finalisation: the
|
|
38
|
+
* checkpoint written by the suspending action (its real path) must
|
|
39
|
+
* survive so a crash-recovery resumes from it rather than re-walking
|
|
40
|
+
* from `actions[0]`. Passing `null` explicitly still clobbers it (used
|
|
41
|
+
* only for the initial pre-first-step snapshot).
|
|
30
42
|
*/
|
|
31
43
|
upsert(input: {
|
|
32
44
|
runId: string;
|
|
33
45
|
scopeSnapshot: Record<string, unknown>;
|
|
34
|
-
lastActionPath
|
|
46
|
+
lastActionPath?: string | null;
|
|
35
47
|
}): Promise<void>;
|
|
36
48
|
|
|
37
49
|
load(runId: string): Promise<RunStateSnapshot | undefined>;
|
|
@@ -43,46 +55,89 @@ export interface RunStateStore {
|
|
|
43
55
|
heartbeat(runId: string): Promise<void>;
|
|
44
56
|
|
|
45
57
|
/**
|
|
46
|
-
* Run ids whose heartbeat is older than
|
|
47
|
-
* heartbeat-ascending order so the sweeper
|
|
48
|
-
* stale first.
|
|
58
|
+
* Run ids of `status = 'running'` runs whose heartbeat is older than
|
|
59
|
+
* `threshold`. Returned in heartbeat-ascending order so the sweeper
|
|
60
|
+
* processes the most stale first.
|
|
61
|
+
*
|
|
62
|
+
* The status filter is load-bearing: `waiting` runs (suspended on a
|
|
63
|
+
* `delay` / `wait_for_trigger` / `wait_until`) keep their state row but
|
|
64
|
+
* are NOT stalled - they are owned by the wait-lock / queue resume
|
|
65
|
+
* paths. Returning them here would let the sweeper re-walk an
|
|
66
|
+
* intentional wait every cycle, re-firing pre-wait side effects and
|
|
67
|
+
* leaking wait locks. Only a `running` run whose heartbeat went cold is
|
|
68
|
+
* a genuine crash.
|
|
49
69
|
*/
|
|
50
70
|
findStalledRunIds(threshold: Date): Promise<string[]>;
|
|
51
71
|
|
|
52
72
|
/**
|
|
53
|
-
* Try to acquire a Postgres session-level advisory lock for the run
|
|
54
|
-
* Returns
|
|
55
|
-
*
|
|
73
|
+
* Try to acquire a Postgres session-level advisory lock for the run on a
|
|
74
|
+
* dedicated pooled client. Returns a handle on acquisition (release it in
|
|
75
|
+
* a `finally`), or `null` if another instance already holds it.
|
|
76
|
+
*
|
|
77
|
+
* A dedicated client is required because the lock is held across the whole
|
|
78
|
+
* resume (which executes the run's actions — potentially long and
|
|
79
|
+
* involving external calls), so a transaction-scoped lock would mean a
|
|
80
|
+
* minutes-long open transaction. The session lock auto-releases when the
|
|
81
|
+
* holding connection dies (e.g. on process crash), so dead instances don't
|
|
56
82
|
* leak locks.
|
|
57
83
|
*/
|
|
58
|
-
tryAdvisoryLock(runId: string): Promise<
|
|
59
|
-
|
|
60
|
-
/** Release a previously-acquired advisory lock. */
|
|
61
|
-
releaseAdvisoryLock(runId: string): Promise<void>;
|
|
84
|
+
tryAdvisoryLock(runId: string): Promise<AdvisoryLockHandle | null>;
|
|
62
85
|
}
|
|
63
86
|
|
|
64
|
-
type Schema = {
|
|
87
|
+
type Schema = {
|
|
88
|
+
automationRunState: typeof automationRunState;
|
|
89
|
+
automationRuns: typeof automationRuns;
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
/** Namespace run locks in the global advisory-lock space. */
|
|
93
|
+
function runLockKey(runId: string): string {
|
|
94
|
+
return `automation.run:${runId}`;
|
|
95
|
+
}
|
|
65
96
|
|
|
66
97
|
export function createRunStateStore(
|
|
67
98
|
db: SafeDatabase<Schema>,
|
|
99
|
+
advisoryLock: AdvisoryLockService,
|
|
100
|
+
/**
|
|
101
|
+
* Run-scoped secret values accumulated during dispatch. When provided,
|
|
102
|
+
* the persisted `scopeSnapshot` is masked (Jenkins-style, by-value)
|
|
103
|
+
* BEFORE write — so a resolved connection credential threaded into
|
|
104
|
+
* `scope.variables` / `scope.artifacts` can't reach a replay reader
|
|
105
|
+
* (`getRunScopeForReplay`) unmasked. The registry is in-memory and gone
|
|
106
|
+
* by replay time, so persist-time is the only place masking can happen.
|
|
107
|
+
* Optional so tests / older boots degrade to no masking.
|
|
108
|
+
*/
|
|
109
|
+
secretRegistry?: RunSecretRegistry,
|
|
68
110
|
): RunStateStore {
|
|
69
111
|
return {
|
|
70
112
|
async upsert(input) {
|
|
113
|
+
// Mask the scope snapshot at the persistence choke point — same
|
|
114
|
+
// pattern the run store uses for step / run output.
|
|
115
|
+
const maskedScope = (secretRegistry?.maskDeep(
|
|
116
|
+
input.runId,
|
|
117
|
+
input.scopeSnapshot,
|
|
118
|
+
) ?? input.scopeSnapshot) as Record<string, unknown>;
|
|
119
|
+
// Omitting `lastActionPath` preserves the existing checkpoint on an
|
|
120
|
+
// UPDATE (so a suspend-finalisation doesn't clobber the suspending
|
|
121
|
+
// action's path to null). The INSERT still needs a value, so a fresh
|
|
122
|
+
// row defaults to null.
|
|
123
|
+
const updateSet: Record<string, unknown> = {
|
|
124
|
+
scopeSnapshot: maskedScope,
|
|
125
|
+
lastHeartbeatAt: new Date(),
|
|
126
|
+
updatedAt: new Date(),
|
|
127
|
+
};
|
|
128
|
+
if (input.lastActionPath !== undefined) {
|
|
129
|
+
updateSet.lastActionPath = input.lastActionPath;
|
|
130
|
+
}
|
|
71
131
|
await db
|
|
72
132
|
.insert(automationRunState)
|
|
73
133
|
.values({
|
|
74
134
|
runId: input.runId,
|
|
75
|
-
scopeSnapshot:
|
|
76
|
-
lastActionPath: input.lastActionPath,
|
|
135
|
+
scopeSnapshot: maskedScope,
|
|
136
|
+
lastActionPath: input.lastActionPath ?? null,
|
|
77
137
|
})
|
|
78
138
|
.onConflictDoUpdate({
|
|
79
139
|
target: automationRunState.runId,
|
|
80
|
-
set:
|
|
81
|
-
scopeSnapshot: input.scopeSnapshot,
|
|
82
|
-
lastActionPath: input.lastActionPath,
|
|
83
|
-
lastHeartbeatAt: new Date(),
|
|
84
|
-
updatedAt: new Date(),
|
|
85
|
-
},
|
|
140
|
+
set: updateSet,
|
|
86
141
|
});
|
|
87
142
|
},
|
|
88
143
|
|
|
@@ -115,29 +170,31 @@ export function createRunStateStore(
|
|
|
115
170
|
},
|
|
116
171
|
|
|
117
172
|
async findStalledRunIds(threshold) {
|
|
173
|
+
// Join the run row so we only return runs that are actually
|
|
174
|
+
// `running`. A `waiting` run keeps its state snapshot but must NOT
|
|
175
|
+
// be re-walked by the sweeper - it is owned by the wait-lock /
|
|
176
|
+
// queue resume paths.
|
|
118
177
|
const rows = await db
|
|
119
178
|
.select({ runId: automationRunState.runId })
|
|
120
179
|
.from(automationRunState)
|
|
121
|
-
.
|
|
180
|
+
.innerJoin(
|
|
181
|
+
automationRuns,
|
|
182
|
+
eq(automationRuns.id, automationRunState.runId),
|
|
183
|
+
)
|
|
184
|
+
.where(
|
|
185
|
+
and(
|
|
186
|
+
lt(automationRunState.lastHeartbeatAt, threshold),
|
|
187
|
+
eq(automationRuns.status, "running"),
|
|
188
|
+
),
|
|
189
|
+
)
|
|
122
190
|
.orderBy(automationRunState.lastHeartbeatAt);
|
|
123
191
|
return rows.map((r) => r.runId);
|
|
124
192
|
},
|
|
125
193
|
|
|
126
194
|
async tryAdvisoryLock(runId) {
|
|
127
|
-
//
|
|
128
|
-
//
|
|
129
|
-
|
|
130
|
-
const result = await db.execute<{ ok: boolean }>(sql`
|
|
131
|
-
SELECT pg_try_advisory_lock(hashtextextended(${runId}, 0)) AS ok
|
|
132
|
-
`);
|
|
133
|
-
const rows = result as unknown as { rows: Array<{ ok: boolean }> };
|
|
134
|
-
return Boolean(rows.rows?.[0]?.ok);
|
|
135
|
-
},
|
|
136
|
-
|
|
137
|
-
async releaseAdvisoryLock(runId) {
|
|
138
|
-
await db.execute(sql`
|
|
139
|
-
SELECT pg_advisory_unlock(hashtextextended(${runId}, 0))
|
|
140
|
-
`);
|
|
195
|
+
// Acquire on a dedicated client (see interface doc) — the lock is held
|
|
196
|
+
// for the whole resume, so it must not ride a long-open transaction.
|
|
197
|
+
return advisoryLock.tryAcquire(runLockKey(runId));
|
|
141
198
|
},
|
|
142
199
|
};
|
|
143
200
|
}
|
|
@@ -8,32 +8,83 @@
|
|
|
8
8
|
* trigger subscriber).
|
|
9
9
|
*/
|
|
10
10
|
import { and, desc, eq, inArray, isNotNull, isNull, lte, sql } from "drizzle-orm";
|
|
11
|
-
import type { SafeDatabase } from "@checkstack/backend-api";
|
|
11
|
+
import type { Logger, SafeDatabase } from "@checkstack/backend-api";
|
|
12
12
|
|
|
13
13
|
import {
|
|
14
|
+
automationRunState,
|
|
14
15
|
automationRunSteps,
|
|
15
16
|
automationRuns,
|
|
16
17
|
automationWaitLocks,
|
|
18
|
+
automationWakeIndex,
|
|
17
19
|
} from "../schema";
|
|
18
20
|
import type {
|
|
19
21
|
CreateRunInput,
|
|
20
22
|
CreateStepInput,
|
|
21
23
|
CreateWaitLockInput,
|
|
24
|
+
CreateWaitLockWithRefsInput,
|
|
22
25
|
LoadedRun,
|
|
23
26
|
LoadedStep,
|
|
24
27
|
LoadedWaitLock,
|
|
25
28
|
RunStore,
|
|
29
|
+
WaitLockKind,
|
|
26
30
|
} from "./types";
|
|
31
|
+
import { parseWaitConfig } from "./snapshots";
|
|
32
|
+
import type { RunSecretRegistry } from "./run-secret-registry";
|
|
27
33
|
|
|
28
34
|
type Schema = {
|
|
29
35
|
automationRuns: typeof automationRuns;
|
|
30
36
|
automationRunSteps: typeof automationRunSteps;
|
|
31
37
|
automationWaitLocks: typeof automationWaitLocks;
|
|
38
|
+
automationRunState: typeof automationRunState;
|
|
39
|
+
automationWakeIndex: typeof automationWakeIndex;
|
|
32
40
|
};
|
|
33
41
|
|
|
42
|
+
/** The kind-level wildcard ref for a `${kind}:${id}` ref. */
|
|
43
|
+
function wildcardRefFor(ref: string): string {
|
|
44
|
+
const colon = ref.indexOf(":");
|
|
45
|
+
const kind = colon === -1 ? ref : ref.slice(0, colon);
|
|
46
|
+
return `${kind}:*`;
|
|
47
|
+
}
|
|
48
|
+
|
|
34
49
|
const ACTIVE_STATUSES = ["pending", "running", "waiting"] as const;
|
|
35
50
|
|
|
36
|
-
|
|
51
|
+
/**
|
|
52
|
+
* Predicate for "active runs of this automation". When `contextKey` is
|
|
53
|
+
* `undefined` the filter is per-automation (the default concurrency
|
|
54
|
+
* scope); when provided (string or `null`) it additionally narrows to
|
|
55
|
+
* that context key (the per-context-key scope) - `null` matches runs
|
|
56
|
+
* with no context key.
|
|
57
|
+
*/
|
|
58
|
+
function activeRunsPredicate(
|
|
59
|
+
automationId: string,
|
|
60
|
+
contextKey: string | null | undefined,
|
|
61
|
+
) {
|
|
62
|
+
const conditions = [
|
|
63
|
+
eq(automationRuns.automationId, automationId),
|
|
64
|
+
inArray(automationRuns.status, [...ACTIVE_STATUSES]),
|
|
65
|
+
];
|
|
66
|
+
if (contextKey !== undefined) {
|
|
67
|
+
conditions.push(
|
|
68
|
+
contextKey === null
|
|
69
|
+
? isNull(automationRuns.contextKey)
|
|
70
|
+
: eq(automationRuns.contextKey, contextKey),
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
return and(...conditions);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function createRunStore(
|
|
77
|
+
db: SafeDatabase<Schema>,
|
|
78
|
+
logger?: Logger,
|
|
79
|
+
/**
|
|
80
|
+
* Run-scoped secret values accumulated during dispatch. When provided,
|
|
81
|
+
* step `resultPayload` / `errorMessage` and run-level `errorMessage` are
|
|
82
|
+
* masked (Jenkins-style, by-value) BEFORE persistence, so no resolved
|
|
83
|
+
* secret can reach a DTO / run-detail page. Optional so tests / older
|
|
84
|
+
* boots degrade to no masking.
|
|
85
|
+
*/
|
|
86
|
+
secretRegistry?: RunSecretRegistry,
|
|
87
|
+
): RunStore {
|
|
37
88
|
return {
|
|
38
89
|
async createRun(input: CreateRunInput): Promise<string> {
|
|
39
90
|
const [row] = await db
|
|
@@ -57,14 +108,22 @@ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
|
57
108
|
status === "failed" ||
|
|
58
109
|
status === "cancelled" ||
|
|
59
110
|
status === "skipped";
|
|
111
|
+
// Mask the run-level error before persisting (a provider HTTP error
|
|
112
|
+
// could embed a resolved credential).
|
|
113
|
+
const maskedError =
|
|
114
|
+
errorMessage === undefined
|
|
115
|
+
? null
|
|
116
|
+
: (secretRegistry?.maskText(runId, errorMessage) ?? errorMessage);
|
|
60
117
|
await db
|
|
61
118
|
.update(automationRuns)
|
|
62
119
|
.set({
|
|
63
120
|
status,
|
|
64
|
-
errorMessage:
|
|
121
|
+
errorMessage: maskedError,
|
|
65
122
|
finishedAt: isTerminal ? new Date() : null,
|
|
66
123
|
})
|
|
67
124
|
.where(eq(automationRuns.id, runId));
|
|
125
|
+
// Drop the run's accumulated mask set once it is terminal (memory-only).
|
|
126
|
+
if (isTerminal) secretRegistry?.drop(runId);
|
|
68
127
|
},
|
|
69
128
|
|
|
70
129
|
async loadRun(runId: string): Promise<LoadedRun | undefined> {
|
|
@@ -89,29 +148,25 @@ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
|
89
148
|
};
|
|
90
149
|
},
|
|
91
150
|
|
|
92
|
-
async countActiveRuns(
|
|
151
|
+
async countActiveRuns(
|
|
152
|
+
automationId: string,
|
|
153
|
+
contextKey?: string | null,
|
|
154
|
+
): Promise<number> {
|
|
93
155
|
const rows = await db
|
|
94
156
|
.select({ count: sql<number>`count(*)::int` })
|
|
95
157
|
.from(automationRuns)
|
|
96
|
-
.where(
|
|
97
|
-
and(
|
|
98
|
-
eq(automationRuns.automationId, automationId),
|
|
99
|
-
inArray(automationRuns.status, [...ACTIVE_STATUSES]),
|
|
100
|
-
),
|
|
101
|
-
);
|
|
158
|
+
.where(activeRunsPredicate(automationId, contextKey));
|
|
102
159
|
return rows[0]?.count ?? 0;
|
|
103
160
|
},
|
|
104
161
|
|
|
105
|
-
async hasActiveRun(
|
|
162
|
+
async hasActiveRun(
|
|
163
|
+
automationId: string,
|
|
164
|
+
contextKey?: string | null,
|
|
165
|
+
): Promise<boolean> {
|
|
106
166
|
const rows = await db
|
|
107
167
|
.select({ id: automationRuns.id })
|
|
108
168
|
.from(automationRuns)
|
|
109
|
-
.where(
|
|
110
|
-
and(
|
|
111
|
-
eq(automationRuns.automationId, automationId),
|
|
112
|
-
inArray(automationRuns.status, [...ACTIVE_STATUSES]),
|
|
113
|
-
),
|
|
114
|
-
)
|
|
169
|
+
.where(activeRunsPredicate(automationId, contextKey))
|
|
115
170
|
.limit(1);
|
|
116
171
|
return rows.length > 0;
|
|
117
172
|
},
|
|
@@ -119,6 +174,7 @@ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
|
119
174
|
async cancelActiveRuns(
|
|
120
175
|
automationId: string,
|
|
121
176
|
reason: string,
|
|
177
|
+
contextKey?: string | null,
|
|
122
178
|
): Promise<string[]> {
|
|
123
179
|
const rows = await db
|
|
124
180
|
.update(automationRuns)
|
|
@@ -127,14 +183,26 @@ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
|
127
183
|
errorMessage: reason,
|
|
128
184
|
finishedAt: new Date(),
|
|
129
185
|
})
|
|
130
|
-
.where(
|
|
131
|
-
and(
|
|
132
|
-
eq(automationRuns.automationId, automationId),
|
|
133
|
-
inArray(automationRuns.status, [...ACTIVE_STATUSES]),
|
|
134
|
-
),
|
|
135
|
-
)
|
|
186
|
+
.where(activeRunsPredicate(automationId, contextKey))
|
|
136
187
|
.returning({ id: automationRuns.id });
|
|
137
|
-
|
|
188
|
+
const ids = rows.map((r) => r.id);
|
|
189
|
+
// Tear down the cancelled runs' suspension state in the SAME
|
|
190
|
+
// operation: delete their wait locks and durable run-state so a
|
|
191
|
+
// later wake (wakeWaitingRuns / delay-expiry / a racing queue job)
|
|
192
|
+
// can't resurrect a cancelled run. Mirrors the operator cancelRun
|
|
193
|
+
// path. (resumeRun also guards on status, but cleaning up here stops
|
|
194
|
+
// the sweeper from even re-ticking an orphaned lock.)
|
|
195
|
+
if (ids.length > 0) {
|
|
196
|
+
await db
|
|
197
|
+
.delete(automationWaitLocks)
|
|
198
|
+
.where(inArray(automationWaitLocks.runId, ids));
|
|
199
|
+
await db
|
|
200
|
+
.delete(automationRunState)
|
|
201
|
+
.where(inArray(automationRunState.runId, ids));
|
|
202
|
+
// Drop each run's in-memory mask set (terminal).
|
|
203
|
+
for (const id of ids) secretRegistry?.drop(id);
|
|
204
|
+
}
|
|
205
|
+
return ids;
|
|
138
206
|
},
|
|
139
207
|
|
|
140
208
|
async createStep(input: CreateStepInput): Promise<string> {
|
|
@@ -151,6 +219,9 @@ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
|
151
219
|
})
|
|
152
220
|
.returning({ id: automationRunSteps.id });
|
|
153
221
|
if (!row) throw new Error("createStep: insert returned no rows");
|
|
222
|
+
// Link the step to its run so updateStep (which carries only stepId)
|
|
223
|
+
// can find the run's mask set.
|
|
224
|
+
secretRegistry?.linkStep(row.id, input.runId);
|
|
154
225
|
return row.id;
|
|
155
226
|
},
|
|
156
227
|
|
|
@@ -159,10 +230,23 @@ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
|
159
230
|
patch.status === "success" ||
|
|
160
231
|
patch.status === "failed" ||
|
|
161
232
|
patch.status === "skipped";
|
|
233
|
+
// Mask resolved secret values out of the step output BEFORE persist —
|
|
234
|
+
// this is the run-wide choke point covering ALL actions (provider,
|
|
235
|
+
// log, etc.), not just the script/collector source-side masking.
|
|
236
|
+
const maskedError =
|
|
237
|
+
patch.errorMessage === undefined
|
|
238
|
+
? null
|
|
239
|
+
: (secretRegistry?.maskTextForStep(stepId, patch.errorMessage) ??
|
|
240
|
+
patch.errorMessage);
|
|
241
|
+
const maskedPayload =
|
|
242
|
+
patch.resultPayload === undefined
|
|
243
|
+
? null
|
|
244
|
+
: (secretRegistry?.maskDeepForStep(stepId, patch.resultPayload) ??
|
|
245
|
+
patch.resultPayload);
|
|
162
246
|
const set: Record<string, unknown> = {
|
|
163
247
|
status: patch.status,
|
|
164
|
-
errorMessage:
|
|
165
|
-
resultPayload:
|
|
248
|
+
errorMessage: maskedError,
|
|
249
|
+
resultPayload: maskedPayload,
|
|
166
250
|
};
|
|
167
251
|
if (isTerminal) set.finishedAt = new Date();
|
|
168
252
|
if (patch.incrementAttempts) {
|
|
@@ -214,12 +298,58 @@ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
|
214
298
|
contextKey: input.contextKey,
|
|
215
299
|
filterTemplate: input.filterTemplate,
|
|
216
300
|
timeoutAt: input.timeoutAt,
|
|
301
|
+
// Serialisation boundary: UntilWaitConfig is a plain JSON object
|
|
302
|
+
// but its `condition` union isn't structurally a Record, so cast.
|
|
303
|
+
waitConfig: input.waitConfig
|
|
304
|
+
? (input.waitConfig as unknown as Record<string, unknown>)
|
|
305
|
+
: undefined,
|
|
217
306
|
})
|
|
218
307
|
.returning({ id: automationWaitLocks.id });
|
|
219
308
|
if (!row) throw new Error("createWaitLock: insert returned no rows");
|
|
220
309
|
return row.id;
|
|
221
310
|
},
|
|
222
311
|
|
|
312
|
+
async createWaitLockWithWakeRefs(
|
|
313
|
+
input: CreateWaitLockWithRefsInput,
|
|
314
|
+
): Promise<string> {
|
|
315
|
+
return db.transaction(async (tx) => {
|
|
316
|
+
const [row] = await tx
|
|
317
|
+
.insert(automationWaitLocks)
|
|
318
|
+
.values({
|
|
319
|
+
runId: input.runId,
|
|
320
|
+
actionPath: input.actionPath,
|
|
321
|
+
kind: "until",
|
|
322
|
+
eventId: input.eventId,
|
|
323
|
+
contextKey: input.contextKey,
|
|
324
|
+
filterTemplate: null,
|
|
325
|
+
timeoutAt: input.timeoutAt,
|
|
326
|
+
// Serialisation boundary — see createWaitLock.
|
|
327
|
+
waitConfig: input.waitConfig as unknown as Record<string, unknown>,
|
|
328
|
+
})
|
|
329
|
+
.returning({ id: automationWaitLocks.id });
|
|
330
|
+
if (!row) {
|
|
331
|
+
throw new Error("createWaitLockWithWakeRefs: insert returned no rows");
|
|
332
|
+
}
|
|
333
|
+
// De-dupe refs in-process before the insert (the unique index is the
|
|
334
|
+
// cross-process arm-race guard; this keeps the VALUES list tight).
|
|
335
|
+
const uniqueRefs = [...new Set(input.wakeRefs)];
|
|
336
|
+
if (uniqueRefs.length > 0) {
|
|
337
|
+
await tx
|
|
338
|
+
.insert(automationWakeIndex)
|
|
339
|
+
.values(
|
|
340
|
+
uniqueRefs.map((ref) => ({ waitLockId: row.id, ref })),
|
|
341
|
+
)
|
|
342
|
+
.onConflictDoNothing({
|
|
343
|
+
target: [
|
|
344
|
+
automationWakeIndex.waitLockId,
|
|
345
|
+
automationWakeIndex.ref,
|
|
346
|
+
],
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
return row.id;
|
|
350
|
+
});
|
|
351
|
+
},
|
|
352
|
+
|
|
223
353
|
async loadWaitLock(id) {
|
|
224
354
|
const rows = await db
|
|
225
355
|
.select()
|
|
@@ -228,17 +358,7 @@ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
|
228
358
|
.limit(1);
|
|
229
359
|
const row = rows[0];
|
|
230
360
|
if (!row) return;
|
|
231
|
-
return
|
|
232
|
-
id: row.id,
|
|
233
|
-
runId: row.runId,
|
|
234
|
-
actionPath: row.actionPath,
|
|
235
|
-
kind: row.kind as "trigger" | "delay",
|
|
236
|
-
eventId: row.eventId,
|
|
237
|
-
contextKey: row.contextKey,
|
|
238
|
-
filterTemplate: row.filterTemplate,
|
|
239
|
-
timeoutAt: row.timeoutAt,
|
|
240
|
-
createdAt: row.createdAt,
|
|
241
|
-
};
|
|
361
|
+
return mapWaitLock(row, logger);
|
|
242
362
|
},
|
|
243
363
|
|
|
244
364
|
async findWaitLocksFor(
|
|
@@ -255,17 +375,48 @@ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
|
255
375
|
.select()
|
|
256
376
|
.from(automationWaitLocks)
|
|
257
377
|
.where(and(...filters));
|
|
258
|
-
return rows.map((r) => (
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
378
|
+
return rows.map((r) => mapWaitLock(r, logger));
|
|
379
|
+
},
|
|
380
|
+
|
|
381
|
+
async findWaitLocksByWakeRef(ref: string): Promise<LoadedWaitLock[]> {
|
|
382
|
+
// The generalized form of findWaitLocksFor: join the wake-index onto
|
|
383
|
+
// the wait locks and match the exact ref OR the kind-level wildcard.
|
|
384
|
+
const wildcard = wildcardRefFor(ref);
|
|
385
|
+
const rows = await db
|
|
386
|
+
.select({ lock: automationWaitLocks })
|
|
387
|
+
.from(automationWaitLocks)
|
|
388
|
+
.innerJoin(
|
|
389
|
+
automationWakeIndex,
|
|
390
|
+
eq(automationWakeIndex.waitLockId, automationWaitLocks.id),
|
|
391
|
+
)
|
|
392
|
+
.where(
|
|
393
|
+
and(
|
|
394
|
+
eq(automationWaitLocks.kind, "until"),
|
|
395
|
+
inArray(automationWakeIndex.ref, [ref, wildcard]),
|
|
396
|
+
),
|
|
397
|
+
);
|
|
398
|
+
// A wait may match on both the exact ref and the wildcard; de-dupe by id.
|
|
399
|
+
const byId = new Map<string, LoadedWaitLock>();
|
|
400
|
+
for (const r of rows) {
|
|
401
|
+
if (!byId.has(r.lock.id)) byId.set(r.lock.id, mapWaitLock(r.lock, logger));
|
|
402
|
+
}
|
|
403
|
+
return [...byId.values()];
|
|
404
|
+
},
|
|
405
|
+
|
|
406
|
+
async findWaitLocksByKind(kind): Promise<LoadedWaitLock[]> {
|
|
407
|
+
const rows = await db
|
|
408
|
+
.select()
|
|
409
|
+
.from(automationWaitLocks)
|
|
410
|
+
.where(eq(automationWaitLocks.kind, kind));
|
|
411
|
+
return rows.map((r) => mapWaitLock(r, logger));
|
|
412
|
+
},
|
|
413
|
+
|
|
414
|
+
async findWaitLocksByRun(runId): Promise<LoadedWaitLock[]> {
|
|
415
|
+
const rows = await db
|
|
416
|
+
.select()
|
|
417
|
+
.from(automationWaitLocks)
|
|
418
|
+
.where(eq(automationWaitLocks.runId, runId));
|
|
419
|
+
return rows.map((r) => mapWaitLock(r, logger));
|
|
269
420
|
},
|
|
270
421
|
|
|
271
422
|
async deleteWaitLock(id: string): Promise<void> {
|
|
@@ -282,17 +433,33 @@ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
|
282
433
|
lte(automationWaitLocks.timeoutAt, now),
|
|
283
434
|
),
|
|
284
435
|
);
|
|
285
|
-
return rows.map((r) => (
|
|
286
|
-
id: r.id,
|
|
287
|
-
runId: r.runId,
|
|
288
|
-
actionPath: r.actionPath,
|
|
289
|
-
kind: r.kind as "trigger" | "delay",
|
|
290
|
-
eventId: r.eventId,
|
|
291
|
-
contextKey: r.contextKey,
|
|
292
|
-
filterTemplate: r.filterTemplate,
|
|
293
|
-
timeoutAt: r.timeoutAt,
|
|
294
|
-
createdAt: r.createdAt,
|
|
295
|
-
}));
|
|
436
|
+
return rows.map((r) => mapWaitLock(r, logger));
|
|
296
437
|
},
|
|
297
438
|
};
|
|
298
439
|
}
|
|
440
|
+
|
|
441
|
+
/** Map a wait-lock row to the engine's {@link LoadedWaitLock}. */
|
|
442
|
+
function mapWaitLock(
|
|
443
|
+
row: typeof automationWaitLocks.$inferSelect,
|
|
444
|
+
logger?: Logger,
|
|
445
|
+
): LoadedWaitLock {
|
|
446
|
+
return {
|
|
447
|
+
id: row.id,
|
|
448
|
+
runId: row.runId,
|
|
449
|
+
actionPath: row.actionPath,
|
|
450
|
+
kind: row.kind as WaitLockKind,
|
|
451
|
+
eventId: row.eventId,
|
|
452
|
+
contextKey: row.contextKey,
|
|
453
|
+
filterTemplate: row.filterTemplate,
|
|
454
|
+
timeoutAt: row.timeoutAt,
|
|
455
|
+
// Parse the stored config on load — a drifted/hand-edited row degrades
|
|
456
|
+
// to null (engine treats the `until` lock as gone) instead of being
|
|
457
|
+
// trusted as a wrongly-typed UntilWaitConfig.
|
|
458
|
+
waitConfig: parseWaitConfig({
|
|
459
|
+
value: row.waitConfig,
|
|
460
|
+
logger,
|
|
461
|
+
context: `Wait lock ${row.id}`,
|
|
462
|
+
}),
|
|
463
|
+
createdAt: row.createdAt,
|
|
464
|
+
};
|
|
465
|
+
}
|