@smithers-orchestrator/scheduler 0.24.2 → 0.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/RenderContext.ts +16 -0
- package/src/RunResult.ts +18 -1
- package/src/TaskState.ts +1 -0
- package/src/WaitReason.ts +6 -1
- package/src/WorkflowSessionLive.js +12 -1
- package/src/index.d.ts +85 -30
- package/src/makeWorkflowSession.js +191 -72
- package/src/scheduleTasks.js +1 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@smithers-orchestrator/scheduler",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.25.1",
|
|
4
4
|
"description": "Pure decision engine: session, scheduler, and task state management for Smithers workflows",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -176,8 +176,8 @@
|
|
|
176
176
|
],
|
|
177
177
|
"dependencies": {
|
|
178
178
|
"effect": "^3.21.1",
|
|
179
|
-
"@smithers-orchestrator/errors": "0.
|
|
180
|
-
"@smithers-orchestrator/graph": "0.
|
|
179
|
+
"@smithers-orchestrator/errors": "0.25.1",
|
|
180
|
+
"@smithers-orchestrator/graph": "0.25.1"
|
|
181
181
|
},
|
|
182
182
|
"devDependencies": {
|
|
183
183
|
"@types/bun": "latest",
|
package/src/RenderContext.ts
CHANGED
|
@@ -1,6 +1,21 @@
|
|
|
1
1
|
import type { WorkflowGraph } from "@smithers-orchestrator/graph";
|
|
2
2
|
import type { TaskOutput } from "./TaskOutput.ts";
|
|
3
3
|
|
|
4
|
+
export type RenderTriggerReason =
|
|
5
|
+
| "task-finished"
|
|
6
|
+
| "timer-fired"
|
|
7
|
+
| "cache-resolved"
|
|
8
|
+
| "loop-advanced"
|
|
9
|
+
| "deadlock-check"
|
|
10
|
+
| "stability-check"
|
|
11
|
+
| (string & {});
|
|
12
|
+
|
|
13
|
+
export type RenderTrigger = {
|
|
14
|
+
readonly reason: RenderTriggerReason;
|
|
15
|
+
readonly nodeId?: string;
|
|
16
|
+
readonly iteration?: number;
|
|
17
|
+
};
|
|
18
|
+
|
|
4
19
|
export type RenderContext = {
|
|
5
20
|
readonly runId: string;
|
|
6
21
|
readonly graph?: WorkflowGraph | null;
|
|
@@ -11,4 +26,5 @@ export type RenderContext = {
|
|
|
11
26
|
readonly auth?: unknown;
|
|
12
27
|
readonly taskStates?: unknown;
|
|
13
28
|
readonly ralphIterations?: ReadonlyMap<string, number>;
|
|
29
|
+
readonly trigger?: RenderTrigger;
|
|
14
30
|
};
|
package/src/RunResult.ts
CHANGED
|
@@ -8,8 +8,25 @@ export type RunResult = {
|
|
|
8
8
|
| "continued"
|
|
9
9
|
| "waiting-approval"
|
|
10
10
|
| "waiting-event"
|
|
11
|
-
| "waiting-timer"
|
|
11
|
+
| "waiting-timer"
|
|
12
|
+
| "waiting-quota";
|
|
12
13
|
readonly output?: unknown;
|
|
13
14
|
readonly error?: unknown;
|
|
14
15
|
readonly nextRunId?: string;
|
|
16
|
+
/**
|
|
17
|
+
* Number of tasks that ended in a `failed` state yet did not fail the run —
|
|
18
|
+
* "masked" child failures the run-level status cannot express. Present (and
|
|
19
|
+
* `> 0`) only on a `finished` result that tolerated at least one failure
|
|
20
|
+
* (a {@link https://smithers.sh/components/task `continueOnFail`} task, or an
|
|
21
|
+
* agent task that failed transiently: rate limit, timeout, abort). A binary
|
|
22
|
+
* `finished` status would otherwise read as a clean success. See
|
|
23
|
+
* `docs/runtime/run-state.mdx`.
|
|
24
|
+
*/
|
|
25
|
+
readonly failedChildren?: number;
|
|
26
|
+
/**
|
|
27
|
+
* Task state keys (`nodeId::iteration`) of the tasks counted by
|
|
28
|
+
* {@link failedChildren}. The iteration disambiguates the same `nodeId` failing
|
|
29
|
+
* across loop/Ralph iterations.
|
|
30
|
+
*/
|
|
31
|
+
readonly failedChildKeys?: readonly string[];
|
|
15
32
|
};
|
package/src/TaskState.ts
CHANGED
package/src/WaitReason.ts
CHANGED
|
@@ -5,4 +5,9 @@ export type WaitReason =
|
|
|
5
5
|
| { readonly _tag: "RetryBackoff"; readonly waitMs: number }
|
|
6
6
|
| { readonly _tag: "HotReload" }
|
|
7
7
|
| { readonly _tag: "OrphanRecovery"; readonly count: number }
|
|
8
|
-
| { readonly _tag: "ExternalTrigger" }
|
|
8
|
+
| { readonly _tag: "ExternalTrigger" }
|
|
9
|
+
| {
|
|
10
|
+
readonly _tag: "Quota";
|
|
11
|
+
readonly quotaBlockedCount: number;
|
|
12
|
+
readonly resetAtMs?: number;
|
|
13
|
+
};
|
|
@@ -2,5 +2,16 @@ import { Layer } from "effect";
|
|
|
2
2
|
import { WorkflowSession } from "./WorkflowSession.js";
|
|
3
3
|
import { makeWorkflowSession } from "./makeWorkflowSession.js";
|
|
4
4
|
|
|
5
|
-
/**
|
|
5
|
+
/**
|
|
6
|
+
* WARNING — do not consume this layer as-is. `Layer.sync` builds **one** shared
|
|
7
|
+
* `makeWorkflowSession()` instance for the whole layer scope, but a workflow
|
|
8
|
+
* session carries per-run state, so sharing it across runs is a correctness bug.
|
|
9
|
+
* The engine intentionally bypasses this Tag and constructs a fresh session per
|
|
10
|
+
* run via `makeWorkflowSession()` directly — which is why nothing yields
|
|
11
|
+
* `WorkflowSession` today. Before any consumer reads the Tag, rework this into a
|
|
12
|
+
* per-run/scoped provider (e.g. `Layer.scoped` or a factory service) so each run
|
|
13
|
+
* gets its own session.
|
|
14
|
+
*
|
|
15
|
+
* @type {Layer.Layer<WorkflowSession, never, never>}
|
|
16
|
+
*/
|
|
6
17
|
export const WorkflowSessionLive = Layer.sync(WorkflowSession, makeWorkflowSession);
|
package/src/index.d.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import * as effect from 'effect';
|
|
2
|
-
import { Context, Layer, Effect, Schedule } from 'effect';
|
|
3
|
-
import * as
|
|
2
|
+
import { Context, Layer, Effect, Schedule as Schedule$1 } from 'effect';
|
|
3
|
+
import * as _smithers_orchestrator_graph from '@smithers-orchestrator/graph';
|
|
4
4
|
import { TaskDescriptor as TaskDescriptor$3, WorkflowGraph } from '@smithers-orchestrator/graph';
|
|
5
|
+
import { TaskDescriptor as TaskDescriptor$4 } from '@smithers-orchestrator/graph/TaskDescriptor';
|
|
5
6
|
|
|
6
|
-
type TaskState$2 = "pending" | "waiting-approval" | "waiting-event" | "waiting-timer" | "in-progress" | "finished" | "failed" | "cancelled" | "skipped";
|
|
7
|
+
type TaskState$2 = "pending" | "waiting-approval" | "waiting-event" | "waiting-timer" | "waiting-quota" | "in-progress" | "finished" | "failed" | "cancelled" | "skipped";
|
|
7
8
|
|
|
8
9
|
type TaskStateMap$4 = Map<string, TaskState$2>;
|
|
9
10
|
|
|
@@ -112,6 +113,12 @@ type TaskOutput$1 = {
|
|
|
112
113
|
readonly usage?: TokenUsage$1 | null;
|
|
113
114
|
};
|
|
114
115
|
|
|
116
|
+
type RenderTriggerReason = "task-finished" | "timer-fired" | "cache-resolved" | "loop-advanced" | "deadlock-check" | "stability-check" | (string & {});
|
|
117
|
+
type RenderTrigger = {
|
|
118
|
+
readonly reason: RenderTriggerReason;
|
|
119
|
+
readonly nodeId?: string;
|
|
120
|
+
readonly iteration?: number;
|
|
121
|
+
};
|
|
115
122
|
type RenderContext$1 = {
|
|
116
123
|
readonly runId: string;
|
|
117
124
|
readonly graph?: WorkflowGraph | null;
|
|
@@ -122,14 +129,31 @@ type RenderContext$1 = {
|
|
|
122
129
|
readonly auth?: unknown;
|
|
123
130
|
readonly taskStates?: unknown;
|
|
124
131
|
readonly ralphIterations?: ReadonlyMap<string, number>;
|
|
132
|
+
readonly trigger?: RenderTrigger;
|
|
125
133
|
};
|
|
126
134
|
|
|
127
135
|
type RunResult$1 = {
|
|
128
136
|
readonly runId: string;
|
|
129
|
-
readonly status: "running" | "finished" | "failed" | "cancelled" | "continued" | "waiting-approval" | "waiting-event" | "waiting-timer";
|
|
137
|
+
readonly status: "running" | "finished" | "failed" | "cancelled" | "continued" | "waiting-approval" | "waiting-event" | "waiting-timer" | "waiting-quota";
|
|
130
138
|
readonly output?: unknown;
|
|
131
139
|
readonly error?: unknown;
|
|
132
140
|
readonly nextRunId?: string;
|
|
141
|
+
/**
|
|
142
|
+
* Number of tasks that ended in a `failed` state yet did not fail the run —
|
|
143
|
+
* "masked" child failures the run-level status cannot express. Present (and
|
|
144
|
+
* `> 0`) only on a `finished` result that tolerated at least one failure
|
|
145
|
+
* (a {@link https://smithers.sh/components/task `continueOnFail`} task, or an
|
|
146
|
+
* agent task that failed transiently: rate limit, timeout, abort). A binary
|
|
147
|
+
* `finished` status would otherwise read as a clean success. See
|
|
148
|
+
* `docs/runtime/run-state.mdx`.
|
|
149
|
+
*/
|
|
150
|
+
readonly failedChildren?: number;
|
|
151
|
+
/**
|
|
152
|
+
* Task state keys (`nodeId::iteration`) of the tasks counted by
|
|
153
|
+
* {@link failedChildren}. The iteration disambiguates the same `nodeId` failing
|
|
154
|
+
* across loop/Ralph iterations.
|
|
155
|
+
*/
|
|
156
|
+
readonly failedChildKeys?: readonly string[];
|
|
133
157
|
};
|
|
134
158
|
|
|
135
159
|
type WaitReason$1 = {
|
|
@@ -151,6 +175,10 @@ type WaitReason$1 = {
|
|
|
151
175
|
readonly count: number;
|
|
152
176
|
} | {
|
|
153
177
|
readonly _tag: "ExternalTrigger";
|
|
178
|
+
} | {
|
|
179
|
+
readonly _tag: "Quota";
|
|
180
|
+
readonly quotaBlockedCount: number;
|
|
181
|
+
readonly resetAtMs?: number;
|
|
154
182
|
};
|
|
155
183
|
|
|
156
184
|
type EngineDecision$1 = {
|
|
@@ -199,7 +227,8 @@ type WorkflowSessionService$2 = {
|
|
|
199
227
|
readonly getCurrentGraph: () => Effect.Effect<WorkflowGraph | null>;
|
|
200
228
|
};
|
|
201
229
|
|
|
202
|
-
|
|
230
|
+
/** A breached Aspects budget for a task that is about to be dispatched. */
|
|
231
|
+
type AspectBudgetBreach = {
|
|
203
232
|
readonly kind: "tokens" | "latency";
|
|
204
233
|
readonly limit: number;
|
|
205
234
|
readonly current: number;
|
|
@@ -214,9 +243,16 @@ type WorkflowSessionOptions$2 = {
|
|
|
214
243
|
readonly iteration: number;
|
|
215
244
|
readonly done: boolean;
|
|
216
245
|
}>;
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
246
|
+
/**
|
|
247
|
+
* Evaluate a runnable task's Aspects budgets against the run's accumulated
|
|
248
|
+
* usage. Return the first breach, or `null`/`undefined` when within budget.
|
|
249
|
+
* Only invoked for tasks that would otherwise execute.
|
|
250
|
+
*/
|
|
251
|
+
readonly evaluateAspectBudget?: (descriptor: TaskDescriptor$4) => AspectBudgetBreach | null | undefined;
|
|
252
|
+
/** Called when a task is skipped because its budget was exceeded (`skip-remaining`). */
|
|
253
|
+
readonly onAspectBudgetSkip?: (descriptor: TaskDescriptor$4, breach: AspectBudgetBreach) => void;
|
|
254
|
+
/** Called when a task continues despite an exceeded budget (`warn`). */
|
|
255
|
+
readonly onAspectBudgetWarn?: (descriptor: TaskDescriptor$4, breach: AspectBudgetBreach) => void;
|
|
220
256
|
};
|
|
221
257
|
|
|
222
258
|
type TaskRecord$1 = {
|
|
@@ -227,10 +263,10 @@ type TaskRecord$1 = {
|
|
|
227
263
|
readonly updatedAtMs: number;
|
|
228
264
|
};
|
|
229
265
|
|
|
230
|
-
type SmithersAlertSeverity = "info" | "warning" | "critical";
|
|
231
|
-
type SmithersAlertLabels = Record<string, string>;
|
|
232
|
-
type SmithersAlertReactionKind = "emit-only" | "pause" | "cancel" | "open-approval" | "deliver";
|
|
233
|
-
type SmithersAlertReaction = {
|
|
266
|
+
type SmithersAlertSeverity$1 = "info" | "warning" | "critical";
|
|
267
|
+
type SmithersAlertLabels$1 = Record<string, string>;
|
|
268
|
+
type SmithersAlertReactionKind$1 = "emit-only" | "pause" | "cancel" | "open-approval" | "deliver";
|
|
269
|
+
type SmithersAlertReaction$1 = {
|
|
234
270
|
kind: "emit-only";
|
|
235
271
|
} | {
|
|
236
272
|
kind: "pause";
|
|
@@ -242,24 +278,24 @@ type SmithersAlertReaction = {
|
|
|
242
278
|
kind: "deliver";
|
|
243
279
|
destination: string;
|
|
244
280
|
};
|
|
245
|
-
type SmithersAlertReactionRef = string | SmithersAlertReaction;
|
|
246
|
-
type SmithersAlertPolicyDefaults = {
|
|
281
|
+
type SmithersAlertReactionRef$1 = string | SmithersAlertReaction$1;
|
|
282
|
+
type SmithersAlertPolicyDefaults$1 = {
|
|
247
283
|
owner?: string;
|
|
248
|
-
severity?: SmithersAlertSeverity;
|
|
284
|
+
severity?: SmithersAlertSeverity$1;
|
|
249
285
|
runbook?: string;
|
|
250
|
-
labels?: SmithersAlertLabels;
|
|
286
|
+
labels?: SmithersAlertLabels$1;
|
|
251
287
|
};
|
|
252
|
-
type SmithersAlertPolicyRule = SmithersAlertPolicyDefaults & {
|
|
288
|
+
type SmithersAlertPolicyRule$1 = SmithersAlertPolicyDefaults$1 & {
|
|
253
289
|
afterMs?: number;
|
|
254
|
-
reaction?: SmithersAlertReactionRef;
|
|
290
|
+
reaction?: SmithersAlertReactionRef$1;
|
|
255
291
|
};
|
|
256
|
-
type SmithersAlertPolicy = {
|
|
257
|
-
defaults?: SmithersAlertPolicyDefaults;
|
|
258
|
-
rules?: Record<string, SmithersAlertPolicyRule>;
|
|
259
|
-
reactions?: Record<string, SmithersAlertReaction>;
|
|
292
|
+
type SmithersAlertPolicy$1 = {
|
|
293
|
+
defaults?: SmithersAlertPolicyDefaults$1;
|
|
294
|
+
rules?: Record<string, SmithersAlertPolicyRule$1>;
|
|
295
|
+
reactions?: Record<string, SmithersAlertReaction$1>;
|
|
260
296
|
};
|
|
261
297
|
type SmithersWorkflowOptions$1 = {
|
|
262
|
-
alertPolicy?: SmithersAlertPolicy;
|
|
298
|
+
alertPolicy?: SmithersAlertPolicy$1;
|
|
263
299
|
cache?: boolean;
|
|
264
300
|
workflowHash?: string;
|
|
265
301
|
};
|
|
@@ -281,7 +317,7 @@ type RalphState$1 = {
|
|
|
281
317
|
|
|
282
318
|
type RalphStateMap$4 = Map<string, RalphState$1>;
|
|
283
319
|
|
|
284
|
-
type CachePolicy$1<Ctx =
|
|
320
|
+
type CachePolicy$1<Ctx = unknown> = {
|
|
285
321
|
by?: (ctx: Ctx) => unknown;
|
|
286
322
|
version?: string;
|
|
287
323
|
key?: string;
|
|
@@ -324,12 +360,12 @@ type TaskStateMap$3 = TaskStateMap$4;
|
|
|
324
360
|
* @returns {boolean}
|
|
325
361
|
*/
|
|
326
362
|
declare function isTerminalState(state: TaskState$1, descriptor?: Pick<TaskDescriptor$2, "continueOnFail">): boolean;
|
|
327
|
-
type TaskDescriptor$2 =
|
|
363
|
+
type TaskDescriptor$2 = _smithers_orchestrator_graph.TaskDescriptor;
|
|
328
364
|
type TaskState$1 = TaskState$2;
|
|
329
365
|
|
|
330
366
|
declare class Scheduler extends Context.TagClassShape<"Scheduler", SchedulerService> {
|
|
331
367
|
}
|
|
332
|
-
type TaskDescriptor$1 =
|
|
368
|
+
type TaskDescriptor$1 = _smithers_orchestrator_graph.TaskDescriptor;
|
|
333
369
|
type TaskStateMap$2 = TaskStateMap$4;
|
|
334
370
|
type PlanNode$3 = PlanNode$4;
|
|
335
371
|
type RalphStateMap$3 = RalphStateMap$4;
|
|
@@ -354,7 +390,7 @@ declare function buildPlanTree(xml: XmlNode | null, ralphState?: RalphStateMap$2
|
|
|
354
390
|
type PlanNode$2 = PlanNode$4;
|
|
355
391
|
type RalphMeta$1 = RalphMeta$2;
|
|
356
392
|
type RalphStateMap$2 = RalphStateMap$4;
|
|
357
|
-
type XmlNode =
|
|
393
|
+
type XmlNode = _smithers_orchestrator_graph.XmlNode;
|
|
358
394
|
|
|
359
395
|
/**
|
|
360
396
|
* @param {PlanNode | null} plan
|
|
@@ -370,7 +406,7 @@ type PlanNode$1 = PlanNode$4;
|
|
|
370
406
|
type RalphStateMap$1 = RalphStateMap$4;
|
|
371
407
|
type RetryWaitMap$1 = RetryWaitMap$3;
|
|
372
408
|
type ScheduleResult$1 = ScheduleResult$3;
|
|
373
|
-
type TaskDescriptor =
|
|
409
|
+
type TaskDescriptor = _smithers_orchestrator_graph.TaskDescriptor;
|
|
374
410
|
type TaskStateMap$1 = TaskStateMap$4;
|
|
375
411
|
|
|
376
412
|
declare class WorkflowSession extends Context.TagClassShape<"WorkflowSession", WorkflowSessionService$2> {
|
|
@@ -384,7 +420,18 @@ declare function makeWorkflowSession(options?: WorkflowSessionOptions$1): Workfl
|
|
|
384
420
|
type WorkflowSessionOptions$1 = WorkflowSessionOptions$2;
|
|
385
421
|
type WorkflowSessionService$1 = WorkflowSessionService$2;
|
|
386
422
|
|
|
387
|
-
/**
|
|
423
|
+
/**
|
|
424
|
+
* WARNING — do not consume this layer as-is. `Layer.sync` builds **one** shared
|
|
425
|
+
* `makeWorkflowSession()` instance for the whole layer scope, but a workflow
|
|
426
|
+
* session carries per-run state, so sharing it across runs is a correctness bug.
|
|
427
|
+
* The engine intentionally bypasses this Tag and constructs a fresh session per
|
|
428
|
+
* run via `makeWorkflowSession()` directly — which is why nothing yields
|
|
429
|
+
* `WorkflowSession` today. Before any consumer reads the Tag, rework this into a
|
|
430
|
+
* per-run/scoped provider (e.g. `Layer.scoped` or a factory service) so each run
|
|
431
|
+
* gets its own session.
|
|
432
|
+
*
|
|
433
|
+
* @type {Layer.Layer<WorkflowSession, never, never>}
|
|
434
|
+
*/
|
|
388
435
|
declare const WorkflowSessionLive: Layer.Layer<WorkflowSession, never, never>;
|
|
389
436
|
|
|
390
437
|
/**
|
|
@@ -398,7 +445,7 @@ declare function nowMs(): number;
|
|
|
398
445
|
* @param {RetryPolicy} policy
|
|
399
446
|
* @returns {Schedule.Schedule<unknown>}
|
|
400
447
|
*/
|
|
401
|
-
declare function retryPolicyToSchedule(policy: RetryPolicy$2): Schedule.Schedule<unknown>;
|
|
448
|
+
declare function retryPolicyToSchedule(policy: RetryPolicy$2): Schedule$1.Schedule<unknown>;
|
|
402
449
|
type RetryPolicy$2 = RetryPolicy$3;
|
|
403
450
|
|
|
404
451
|
/**
|
|
@@ -434,6 +481,14 @@ type RetryWaitMap = RetryWaitMap$3;
|
|
|
434
481
|
type RunResult = RunResult$1;
|
|
435
482
|
type ScheduleResult = ScheduleResult$3;
|
|
436
483
|
type ScheduleSnapshot = ScheduleSnapshot$1;
|
|
484
|
+
type SmithersAlertLabels = SmithersAlertLabels$1;
|
|
485
|
+
type SmithersAlertPolicy = SmithersAlertPolicy$1;
|
|
486
|
+
type SmithersAlertPolicyDefaults = SmithersAlertPolicyDefaults$1;
|
|
487
|
+
type SmithersAlertPolicyRule = SmithersAlertPolicyRule$1;
|
|
488
|
+
type SmithersAlertReaction = SmithersAlertReaction$1;
|
|
489
|
+
type SmithersAlertReactionKind = SmithersAlertReactionKind$1;
|
|
490
|
+
type SmithersAlertReactionRef = SmithersAlertReactionRef$1;
|
|
491
|
+
type SmithersAlertSeverity = SmithersAlertSeverity$1;
|
|
437
492
|
type SmithersWorkflowOptions = SmithersWorkflowOptions$1;
|
|
438
493
|
type TaskFailure = TaskFailure$1;
|
|
439
494
|
type TaskOutput = TaskOutput$1;
|
|
@@ -65,9 +65,10 @@ function mountedSignature(graph) {
|
|
|
65
65
|
/**
|
|
66
66
|
* @param {SessionState} state
|
|
67
67
|
* @param {number} [iterationOverride]
|
|
68
|
+
* @param {RenderContext["trigger"]} [trigger]
|
|
68
69
|
* @returns {RenderContext}
|
|
69
70
|
*/
|
|
70
|
-
function renderContext(state, iterationOverride) {
|
|
71
|
+
function renderContext(state, iterationOverride, trigger) {
|
|
71
72
|
const ralphIterations = [...state.ralphState.values()].map((value) => value.iteration);
|
|
72
73
|
return {
|
|
73
74
|
runId: state.runId,
|
|
@@ -77,6 +78,7 @@ function renderContext(state, iterationOverride) {
|
|
|
77
78
|
taskStates: cloneTaskStateMap(state.states),
|
|
78
79
|
outputs: new Map(state.outputs),
|
|
79
80
|
ralphIterations: new Map([...state.ralphState.entries()].map(([id, value]) => [id, value.iteration])),
|
|
81
|
+
...(trigger ? { trigger } : {}),
|
|
80
82
|
};
|
|
81
83
|
}
|
|
82
84
|
/**
|
|
@@ -85,23 +87,48 @@ function renderContext(state, iterationOverride) {
|
|
|
85
87
|
* @returns {WaitReason | undefined}
|
|
86
88
|
*/
|
|
87
89
|
function findWaitingReason(state, currentTimeMs) {
|
|
90
|
+
// Do a full pass to accumulate quota count and find the highest-priority
|
|
91
|
+
// non-quota wait reason. This prevents an early-return from shadowing
|
|
92
|
+
// quota-blocked tasks when mixed wait types coexist in the same run.
|
|
93
|
+
let primaryReason;
|
|
94
|
+
let quotaBlockedCount = 0;
|
|
95
|
+
let earliestQuotaResetAtMs;
|
|
88
96
|
for (const descriptor of state.descriptors.values()) {
|
|
89
97
|
const taskState = state.states.get(stateKeyFor(descriptor));
|
|
90
|
-
if (taskState === "waiting-approval") {
|
|
91
|
-
|
|
98
|
+
if (taskState === "waiting-approval" && !primaryReason) {
|
|
99
|
+
primaryReason = { _tag: "Approval", nodeId: descriptor.nodeId };
|
|
92
100
|
}
|
|
93
|
-
if (taskState === "waiting-event") {
|
|
101
|
+
else if (taskState === "waiting-event" && !primaryReason) {
|
|
94
102
|
const eventName = typeof descriptor.meta?.__eventName === "string"
|
|
95
103
|
? descriptor.meta.__eventName
|
|
96
104
|
: "";
|
|
97
|
-
|
|
105
|
+
primaryReason = { _tag: "Event", eventName };
|
|
98
106
|
}
|
|
99
|
-
if (taskState === "waiting-timer") {
|
|
100
|
-
|
|
107
|
+
else if (taskState === "waiting-timer" && !primaryReason) {
|
|
108
|
+
primaryReason = {
|
|
101
109
|
_tag: "Timer",
|
|
102
110
|
resumeAtMs: timerResumeAtMs(descriptor, currentTimeMs),
|
|
103
111
|
};
|
|
104
112
|
}
|
|
113
|
+
else if (taskState === "waiting-quota") {
|
|
114
|
+
quotaBlockedCount += 1;
|
|
115
|
+
const resetAtMs = state.quotaResetTimes.get(stateKeyFor(descriptor));
|
|
116
|
+
if (resetAtMs != null) {
|
|
117
|
+
earliestQuotaResetAtMs = earliestQuotaResetAtMs == null
|
|
118
|
+
? resetAtMs
|
|
119
|
+
: Math.min(earliestQuotaResetAtMs, resetAtMs);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
if (primaryReason) {
|
|
124
|
+
return primaryReason;
|
|
125
|
+
}
|
|
126
|
+
if (quotaBlockedCount > 0) {
|
|
127
|
+
return {
|
|
128
|
+
_tag: "Quota",
|
|
129
|
+
quotaBlockedCount,
|
|
130
|
+
...(earliestQuotaResetAtMs != null ? { resetAtMs: earliestQuotaResetAtMs } : {}),
|
|
131
|
+
};
|
|
105
132
|
}
|
|
106
133
|
return undefined;
|
|
107
134
|
}
|
|
@@ -179,6 +206,39 @@ function isRetryableFailure(descriptor, error) {
|
|
|
179
206
|
}
|
|
180
207
|
return true;
|
|
181
208
|
}
|
|
209
|
+
/**
|
|
210
|
+
* @param {unknown} error
|
|
211
|
+
* @returns {boolean}
|
|
212
|
+
*/
|
|
213
|
+
function isQuotaFailure(error) {
|
|
214
|
+
const payloadCode = error && typeof error === "object" && typeof error.code === "string"
|
|
215
|
+
? error.code
|
|
216
|
+
: undefined;
|
|
217
|
+
const payloadDetails = error && typeof error === "object" && error.details && typeof error.details === "object"
|
|
218
|
+
? error.details
|
|
219
|
+
: undefined;
|
|
220
|
+
const normalized = toSmithersError(error);
|
|
221
|
+
const code = payloadCode ?? normalized.code;
|
|
222
|
+
if (code === "AGENT_QUOTA_EXCEEDED")
|
|
223
|
+
return true;
|
|
224
|
+
const details = payloadDetails ?? normalized.details;
|
|
225
|
+
return Boolean(details && typeof details === "object" && details.failureQuota === true);
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* @param {unknown} error
|
|
229
|
+
* @returns {number | undefined}
|
|
230
|
+
*/
|
|
231
|
+
function getQuotaResetAtMs(error) {
|
|
232
|
+
const payloadDetails = error && typeof error === "object" && error.details && typeof error.details === "object"
|
|
233
|
+
? error.details
|
|
234
|
+
: undefined;
|
|
235
|
+
const normalized = toSmithersError(error);
|
|
236
|
+
const details = payloadDetails ?? normalized.details;
|
|
237
|
+
if (!details || typeof details !== "object")
|
|
238
|
+
return undefined;
|
|
239
|
+
const resetAtMs = details.quotaResetAtMs;
|
|
240
|
+
return typeof resetAtMs === "number" && Number.isFinite(resetAtMs) ? resetAtMs : undefined;
|
|
241
|
+
}
|
|
182
242
|
/**
|
|
183
243
|
* @param {unknown} error
|
|
184
244
|
* @returns {boolean}
|
|
@@ -220,7 +280,11 @@ function describeDeadlock(state) {
|
|
|
220
280
|
}
|
|
221
281
|
else {
|
|
222
282
|
const depState = state.states.get(stateKeyFor(dep)) ?? "pending";
|
|
223
|
-
|
|
283
|
+
if (depState !== "finished" &&
|
|
284
|
+
depState !== "skipped" &&
|
|
285
|
+
!(depState === "failed" && dep.continueOnFail)) {
|
|
286
|
+
unmet.push(`'${depId}' (${depState})`);
|
|
287
|
+
}
|
|
224
288
|
}
|
|
225
289
|
}
|
|
226
290
|
if (unmet.length > 0) {
|
|
@@ -270,6 +334,8 @@ export function makeWorkflowSession(options = {}) {
|
|
|
270
334
|
retryWait: new Map(),
|
|
271
335
|
approvals: new Set(),
|
|
272
336
|
ralphState: new Map(options.initialRalphState ?? []),
|
|
337
|
+
/** @type {Map<string, number>} Maps state key → quota reset timestamp (ms) */
|
|
338
|
+
quotaResetTimes: new Map(),
|
|
273
339
|
schedule: null,
|
|
274
340
|
cancelled: false,
|
|
275
341
|
lastMountedSignature: null,
|
|
@@ -287,14 +353,37 @@ export function makeWorkflowSession(options = {}) {
|
|
|
287
353
|
* @returns {EngineDecision}
|
|
288
354
|
*/
|
|
289
355
|
function finishedResult(status = "finished") {
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
output: [...state.outputs.values()].at(-1)?.output,
|
|
296
|
-
},
|
|
356
|
+
/** @type {RunResult} */
|
|
357
|
+
const result = {
|
|
358
|
+
runId: state.runId,
|
|
359
|
+
status,
|
|
360
|
+
output: [...state.outputs.values()].at(-1)?.output,
|
|
297
361
|
};
|
|
362
|
+
if (status === "finished") {
|
|
363
|
+
// At a `finished` terminal, any task still in `failed` state is a
|
|
364
|
+
// *tolerated* failure — an unhandled one would have produced a `Failed`
|
|
365
|
+
// decision via unhandledFailureDecision() and never reached here. Those
|
|
366
|
+
// are exactly the masked children (continueOnFail tasks, transient agent
|
|
367
|
+
// failures) the binary run status cannot express. Surface them so callers
|
|
368
|
+
// can detect a run that "succeeded" while children failed. See issue #295
|
|
369
|
+
// and docs/runtime/run-state.mdx.
|
|
370
|
+
//
|
|
371
|
+
// Keys are the canonical task state keys (`nodeId::iteration`), not bare
|
|
372
|
+
// node ids: a looped/Ralph workflow can fail the same nodeId across
|
|
373
|
+
// iterations, and the iteration is what disambiguates which child to
|
|
374
|
+
// inspect.
|
|
375
|
+
const failedChildKeys = [];
|
|
376
|
+
for (const [key, taskState] of state.states) {
|
|
377
|
+
if (taskState === "failed") {
|
|
378
|
+
failedChildKeys.push(key);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
if (failedChildKeys.length > 0) {
|
|
382
|
+
result.failedChildren = failedChildKeys.length;
|
|
383
|
+
result.failedChildKeys = failedChildKeys;
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
return { _tag: "Finished", result };
|
|
298
387
|
}
|
|
299
388
|
/**
|
|
300
389
|
* @returns {ScheduleResult}
|
|
@@ -332,6 +421,7 @@ export function makeWorkflowSession(options = {}) {
|
|
|
332
421
|
state.approvals.delete(key);
|
|
333
422
|
state.retryCounts.delete(key);
|
|
334
423
|
state.failureDescriptors.delete(key);
|
|
424
|
+
state.quotaResetTimes.delete(key);
|
|
335
425
|
}
|
|
336
426
|
}
|
|
337
427
|
for (const ralph of ralphs) {
|
|
@@ -362,14 +452,16 @@ export function makeWorkflowSession(options = {}) {
|
|
|
362
452
|
state.outputs.set(key, output);
|
|
363
453
|
state.retryWait.delete(key);
|
|
364
454
|
state.failureDescriptors.delete(key);
|
|
455
|
+
state.quotaResetTimes.delete(key);
|
|
365
456
|
}
|
|
366
457
|
/**
|
|
367
458
|
* @param {number} [iteration]
|
|
459
|
+
* @param {RenderContext["trigger"]} [trigger]
|
|
368
460
|
* @returns {EngineDecision}
|
|
369
461
|
*/
|
|
370
|
-
function decideAfterOutputChange(iteration) {
|
|
462
|
+
function decideAfterOutputChange(iteration, trigger) {
|
|
371
463
|
if (options.requireRerenderOnOutputChange) {
|
|
372
|
-
return { _tag: "ReRender", context: renderContext(state, iteration) };
|
|
464
|
+
return { _tag: "ReRender", context: renderContext(state, iteration, trigger) };
|
|
373
465
|
}
|
|
374
466
|
return decide();
|
|
375
467
|
}
|
|
@@ -400,12 +492,54 @@ export function makeWorkflowSession(options = {}) {
|
|
|
400
492
|
}
|
|
401
493
|
}
|
|
402
494
|
/**
|
|
495
|
+
* @param {string} eventName
|
|
496
|
+
* @param {unknown} payload
|
|
497
|
+
* @param {string | null} correlationId
|
|
498
|
+
*/
|
|
499
|
+
function applyEventReceived(eventName, payload, correlationId) {
|
|
500
|
+
for (const descriptor of state.descriptors.values()) {
|
|
501
|
+
const key = stateKeyFor(descriptor);
|
|
502
|
+
const taskState = state.states.get(key);
|
|
503
|
+
const expected = typeof descriptor.meta?.__eventName === "string"
|
|
504
|
+
? descriptor.meta.__eventName
|
|
505
|
+
: undefined;
|
|
506
|
+
const expectedCorrelation = typeof descriptor.meta?.__correlationId === "string"
|
|
507
|
+
? descriptor.meta.__correlationId
|
|
508
|
+
: undefined;
|
|
509
|
+
if (taskState === "waiting-event" &&
|
|
510
|
+
(!expected || expected === eventName) &&
|
|
511
|
+
(expectedCorrelation === undefined || expectedCorrelation === correlationId)) {
|
|
512
|
+
state.states.set(key, "finished");
|
|
513
|
+
state.outputs.set(key, {
|
|
514
|
+
nodeId: descriptor.nodeId,
|
|
515
|
+
iteration: descriptor.iteration,
|
|
516
|
+
output: payload,
|
|
517
|
+
});
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
403
522
|
* @param {TaskDescriptor} descriptor
|
|
404
523
|
* @param {unknown} error
|
|
405
524
|
* @returns {EngineDecision}
|
|
406
525
|
*/
|
|
407
526
|
function applyFailure(descriptor, error) {
|
|
408
527
|
const key = stateKeyFor(descriptor);
|
|
528
|
+
// Quota/usage-limit errors do not consume the task's retry budget.
|
|
529
|
+
// Instead, put the task into "waiting-quota" so the run can pause
|
|
530
|
+
// durably and resume cleanly after the provider resets.
|
|
531
|
+
if (isQuotaFailure(error)) {
|
|
532
|
+
state.states.set(key, "waiting-quota");
|
|
533
|
+
state.failures.set(key, error);
|
|
534
|
+
const resetAtMs = getQuotaResetAtMs(error);
|
|
535
|
+
if (resetAtMs != null) {
|
|
536
|
+
state.quotaResetTimes.set(key, resetAtMs);
|
|
537
|
+
}
|
|
538
|
+
else {
|
|
539
|
+
state.quotaResetTimes.delete(key);
|
|
540
|
+
}
|
|
541
|
+
return decide();
|
|
542
|
+
}
|
|
409
543
|
const failureCount = (state.retryCounts.get(key) ?? 0) + 1;
|
|
410
544
|
state.retryCounts.set(key, failureCount);
|
|
411
545
|
const retryable = isRetryableFailure(descriptor, error);
|
|
@@ -425,7 +559,11 @@ export function makeWorkflowSession(options = {}) {
|
|
|
425
559
|
state.states.set(key, "failed");
|
|
426
560
|
state.failures.set(key, error);
|
|
427
561
|
state.failureDescriptors.set(key, descriptor);
|
|
428
|
-
return
|
|
562
|
+
return decideAfterOutputChange(descriptor.iteration, {
|
|
563
|
+
reason: "task-finished",
|
|
564
|
+
nodeId: descriptor.nodeId,
|
|
565
|
+
iteration: descriptor.iteration,
|
|
566
|
+
});
|
|
429
567
|
}
|
|
430
568
|
/**
|
|
431
569
|
* @returns {EngineDecision | null}
|
|
@@ -459,11 +597,15 @@ export function makeWorkflowSession(options = {}) {
|
|
|
459
597
|
};
|
|
460
598
|
}
|
|
461
599
|
/**
|
|
600
|
+
* @param {number} [depth] recursion depth; guarded at 10 to catch decision cycles
|
|
462
601
|
* @returns {EngineDecision}
|
|
463
602
|
*/
|
|
464
603
|
function decide(depth = 0) {
|
|
465
604
|
if (depth > 10) {
|
|
466
|
-
return {
|
|
605
|
+
return {
|
|
606
|
+
_tag: "Failed",
|
|
607
|
+
error: new SmithersError("SCHEDULER_ERROR", "Exceeded scheduler decide() depth guard.", { depth }),
|
|
608
|
+
};
|
|
467
609
|
}
|
|
468
610
|
if (state.cancelled) {
|
|
469
611
|
return finishedResult("cancelled");
|
|
@@ -623,7 +765,7 @@ export function makeWorkflowSession(options = {}) {
|
|
|
623
765
|
advanced = true;
|
|
624
766
|
}
|
|
625
767
|
if (advanced) {
|
|
626
|
-
return { _tag: "ReRender", context: renderContext(state) };
|
|
768
|
+
return { _tag: "ReRender", context: renderContext(state, undefined, { reason: "loop-advanced" }) };
|
|
627
769
|
}
|
|
628
770
|
}
|
|
629
771
|
if (schedule.pendingExists) {
|
|
@@ -649,7 +791,7 @@ export function makeWorkflowSession(options = {}) {
|
|
|
649
791
|
const signature = mountedSignature(state.graph);
|
|
650
792
|
if (state.lastDeadlockSignature !== signature) {
|
|
651
793
|
state.lastDeadlockSignature = signature;
|
|
652
|
-
return { _tag: "ReRender", context: renderContext(state) };
|
|
794
|
+
return { _tag: "ReRender", context: renderContext(state, undefined, { reason: "deadlock-check" }) };
|
|
653
795
|
}
|
|
654
796
|
}
|
|
655
797
|
return {
|
|
@@ -670,7 +812,7 @@ export function makeWorkflowSession(options = {}) {
|
|
|
670
812
|
const signature = mountedSignature(state.graph);
|
|
671
813
|
if (state.lastMountedSignature !== signature) {
|
|
672
814
|
state.lastMountedSignature = signature;
|
|
673
|
-
return { _tag: "ReRender", context: renderContext(state) };
|
|
815
|
+
return { _tag: "ReRender", context: renderContext(state, undefined, { reason: "stability-check" }) };
|
|
674
816
|
}
|
|
675
817
|
}
|
|
676
818
|
return finishedResult();
|
|
@@ -686,17 +828,26 @@ export function makeWorkflowSession(options = {}) {
|
|
|
686
828
|
}
|
|
687
829
|
}),
|
|
688
830
|
taskCompleted: (output) => Effect.sync(() => {
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
831
|
+
// A completion can legitimately arrive for a task that is no longer in the
|
|
832
|
+
// current graph: a conditionally-rendered task (e.g. `{done ? <Task pr/> : null}`)
|
|
833
|
+
// whose parent re-rendered it out while it was still running in the background.
|
|
834
|
+
// That result is stale, not fatal — record it (so it is available if the task
|
|
835
|
+
// re-mounts) and let the current graph drive the next decision. Failing here
|
|
836
|
+
// would discard every other in-flight task in the run.
|
|
693
837
|
markTaskFinished(output);
|
|
694
|
-
return decideAfterOutputChange(output.iteration
|
|
838
|
+
return decideAfterOutputChange(output.iteration, {
|
|
839
|
+
reason: "task-finished",
|
|
840
|
+
nodeId: output.nodeId,
|
|
841
|
+
iteration: output.iteration,
|
|
842
|
+
});
|
|
695
843
|
}),
|
|
696
844
|
taskFailed: (failure) => Effect.sync(() => {
|
|
697
845
|
const descriptor = findDescriptor(state, failure.nodeId, failure.iteration);
|
|
698
846
|
if (!descriptor) {
|
|
699
|
-
|
|
847
|
+
// Stale failure for a task that already left the graph (see taskCompleted) —
|
|
848
|
+
// the task is gone, so its failure is moot. Re-decide on the current graph
|
|
849
|
+
// rather than failing the whole run.
|
|
850
|
+
return decide();
|
|
700
851
|
}
|
|
701
852
|
return applyFailure(descriptor, failure.error);
|
|
702
853
|
}),
|
|
@@ -727,51 +878,11 @@ export function makeWorkflowSession(options = {}) {
|
|
|
727
878
|
return decide();
|
|
728
879
|
}),
|
|
729
880
|
eventReceived: (eventName, payload, correlationId = null) => Effect.sync(() => {
|
|
730
|
-
|
|
731
|
-
const key = stateKeyFor(descriptor);
|
|
732
|
-
const taskState = state.states.get(key);
|
|
733
|
-
const expected = typeof descriptor.meta?.__eventName === "string"
|
|
734
|
-
? descriptor.meta.__eventName
|
|
735
|
-
: undefined;
|
|
736
|
-
const expectedCorrelation = typeof descriptor.meta?.__correlationId === "string"
|
|
737
|
-
? descriptor.meta.__correlationId
|
|
738
|
-
: undefined;
|
|
739
|
-
if (taskState === "waiting-event" &&
|
|
740
|
-
(!expected || expected === eventName) &&
|
|
741
|
-
(expectedCorrelation === undefined || expectedCorrelation === correlationId)) {
|
|
742
|
-
state.states.set(key, "finished");
|
|
743
|
-
state.outputs.set(key, {
|
|
744
|
-
nodeId: descriptor.nodeId,
|
|
745
|
-
iteration: descriptor.iteration,
|
|
746
|
-
output: payload,
|
|
747
|
-
});
|
|
748
|
-
}
|
|
749
|
-
}
|
|
881
|
+
applyEventReceived(eventName, payload, correlationId);
|
|
750
882
|
return decide();
|
|
751
883
|
}),
|
|
752
884
|
signalReceived: (signalName, payload, correlationId = null) => Effect.sync(() => {
|
|
753
|
-
|
|
754
|
-
const key = stateKeyFor(descriptor);
|
|
755
|
-
const taskState = state.states.get(key);
|
|
756
|
-
const expected = typeof descriptor.meta?.__signalName === "string"
|
|
757
|
-
? descriptor.meta.__signalName
|
|
758
|
-
: typeof descriptor.meta?.__eventName === "string"
|
|
759
|
-
? descriptor.meta.__eventName
|
|
760
|
-
: undefined;
|
|
761
|
-
const expectedCorrelation = typeof descriptor.meta?.__correlationId === "string"
|
|
762
|
-
? descriptor.meta.__correlationId
|
|
763
|
-
: undefined;
|
|
764
|
-
if (taskState === "waiting-event" &&
|
|
765
|
-
(!expected || expected === signalName) &&
|
|
766
|
-
(expectedCorrelation === undefined || expectedCorrelation === correlationId)) {
|
|
767
|
-
state.states.set(key, "finished");
|
|
768
|
-
state.outputs.set(key, {
|
|
769
|
-
nodeId: descriptor.nodeId,
|
|
770
|
-
iteration: descriptor.iteration,
|
|
771
|
-
output: payload,
|
|
772
|
-
});
|
|
773
|
-
}
|
|
774
|
-
}
|
|
885
|
+
applyEventReceived(signalName, payload, correlationId);
|
|
775
886
|
return decide();
|
|
776
887
|
}),
|
|
777
888
|
timerFired: (nodeId, firedAtMs = nowMs()) => Effect.sync(() => {
|
|
@@ -788,7 +899,11 @@ export function makeWorkflowSession(options = {}) {
|
|
|
788
899
|
iteration: descriptor.iteration,
|
|
789
900
|
output: { firedAtMs },
|
|
790
901
|
});
|
|
791
|
-
return decideAfterOutputChange(descriptor.iteration
|
|
902
|
+
return decideAfterOutputChange(descriptor.iteration, {
|
|
903
|
+
reason: "timer-fired",
|
|
904
|
+
nodeId: descriptor.nodeId,
|
|
905
|
+
iteration: descriptor.iteration,
|
|
906
|
+
});
|
|
792
907
|
}),
|
|
793
908
|
hotReloaded: (graph) => Effect.sync(() => {
|
|
794
909
|
try {
|
|
@@ -822,7 +937,11 @@ export function makeWorkflowSession(options = {}) {
|
|
|
822
937
|
usage: output.usage ?? null,
|
|
823
938
|
output: output.output,
|
|
824
939
|
});
|
|
825
|
-
return decideAfterOutputChange(output.iteration
|
|
940
|
+
return decideAfterOutputChange(output.iteration, {
|
|
941
|
+
reason: "cache-resolved",
|
|
942
|
+
nodeId: output.nodeId,
|
|
943
|
+
iteration: output.iteration,
|
|
944
|
+
});
|
|
826
945
|
}),
|
|
827
946
|
cacheMissed: (nodeId, iteration) => Effect.sync(() => {
|
|
828
947
|
const descriptor = findDescriptor(state, nodeId, iteration);
|
package/src/scheduleTasks.js
CHANGED
|
@@ -123,6 +123,7 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
123
123
|
}
|
|
124
124
|
/**
|
|
125
125
|
* @param {PlanNode} node
|
|
126
|
+
* @param {{ includeContinuedFailures?: boolean }} [options]
|
|
126
127
|
* @returns {{ readonly terminal: boolean; readonly failed: boolean }}
|
|
127
128
|
*/
|
|
128
129
|
function inspect(node, options = {}) {
|