@smithers-orchestrator/scheduler 0.24.2 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@smithers-orchestrator/scheduler",
3
- "version": "0.24.2",
3
+ "version": "0.25.0",
4
4
  "description": "Pure decision engine: session, scheduler, and task state management for Smithers workflows",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -176,8 +176,8 @@
176
176
  ],
177
177
  "dependencies": {
178
178
  "effect": "^3.21.1",
179
- "@smithers-orchestrator/errors": "0.24.2",
180
- "@smithers-orchestrator/graph": "0.24.2"
179
+ "@smithers-orchestrator/errors": "0.25.0",
180
+ "@smithers-orchestrator/graph": "0.25.0"
181
181
  },
182
182
  "devDependencies": {
183
183
  "@types/bun": "latest",
@@ -1,6 +1,21 @@
1
1
  import type { WorkflowGraph } from "@smithers-orchestrator/graph";
2
2
  import type { TaskOutput } from "./TaskOutput.ts";
3
3
 
4
+ export type RenderTriggerReason =
5
+ | "task-finished"
6
+ | "timer-fired"
7
+ | "cache-resolved"
8
+ | "loop-advanced"
9
+ | "deadlock-check"
10
+ | "stability-check"
11
+ | (string & {});
12
+
13
+ export type RenderTrigger = {
14
+ readonly reason: RenderTriggerReason;
15
+ readonly nodeId?: string;
16
+ readonly iteration?: number;
17
+ };
18
+
4
19
  export type RenderContext = {
5
20
  readonly runId: string;
6
21
  readonly graph?: WorkflowGraph | null;
@@ -11,4 +26,5 @@ export type RenderContext = {
11
26
  readonly auth?: unknown;
12
27
  readonly taskStates?: unknown;
13
28
  readonly ralphIterations?: ReadonlyMap<string, number>;
29
+ readonly trigger?: RenderTrigger;
14
30
  };
package/src/RunResult.ts CHANGED
@@ -8,7 +8,8 @@ export type RunResult = {
8
8
  | "continued"
9
9
  | "waiting-approval"
10
10
  | "waiting-event"
11
- | "waiting-timer";
11
+ | "waiting-timer"
12
+ | "waiting-quota";
12
13
  readonly output?: unknown;
13
14
  readonly error?: unknown;
14
15
  readonly nextRunId?: string;
package/src/TaskState.ts CHANGED
@@ -3,6 +3,7 @@ export type TaskState =
3
3
  | "waiting-approval"
4
4
  | "waiting-event"
5
5
  | "waiting-timer"
6
+ | "waiting-quota"
6
7
  | "in-progress"
7
8
  | "finished"
8
9
  | "failed"
package/src/WaitReason.ts CHANGED
@@ -5,4 +5,9 @@ export type WaitReason =
5
5
  | { readonly _tag: "RetryBackoff"; readonly waitMs: number }
6
6
  | { readonly _tag: "HotReload" }
7
7
  | { readonly _tag: "OrphanRecovery"; readonly count: number }
8
- | { readonly _tag: "ExternalTrigger" };
8
+ | { readonly _tag: "ExternalTrigger" }
9
+ | {
10
+ readonly _tag: "Quota";
11
+ readonly quotaBlockedCount: number;
12
+ readonly resetAtMs?: number;
13
+ };
@@ -2,5 +2,16 @@ import { Layer } from "effect";
2
2
  import { WorkflowSession } from "./WorkflowSession.js";
3
3
  import { makeWorkflowSession } from "./makeWorkflowSession.js";
4
4
 
5
- /** @type {Layer.Layer<WorkflowSession, never, never>} */
5
+ /**
6
+ * WARNING — do not consume this layer as-is. `Layer.sync` builds **one** shared
7
+ * `makeWorkflowSession()` instance for the whole layer scope, but a workflow
8
+ * session carries per-run state, so sharing it across runs is a correctness bug.
9
+ * The engine intentionally bypasses this Tag and constructs a fresh session per
10
+ * run via `makeWorkflowSession()` directly — which is why nothing yields
11
+ * `WorkflowSession` today. Before any consumer reads the Tag, rework this into a
12
+ * per-run/scoped provider (e.g. `Layer.scoped` or a factory service) so each run
13
+ * gets its own session.
14
+ *
15
+ * @type {Layer.Layer<WorkflowSession, never, never>}
16
+ */
6
17
  export const WorkflowSessionLive = Layer.sync(WorkflowSession, makeWorkflowSession);
package/src/index.d.ts CHANGED
@@ -1,9 +1,10 @@
1
1
  import * as effect from 'effect';
2
- import { Context, Layer, Effect, Schedule } from 'effect';
3
- import * as _smithers_graph from '@smithers-orchestrator/graph';
2
+ import { Context, Layer, Effect, Schedule as Schedule$1 } from 'effect';
3
+ import * as _smithers_orchestrator_graph from '@smithers-orchestrator/graph';
4
4
  import { TaskDescriptor as TaskDescriptor$3, WorkflowGraph } from '@smithers-orchestrator/graph';
5
+ import { TaskDescriptor as TaskDescriptor$4 } from '@smithers-orchestrator/graph/TaskDescriptor';
5
6
 
6
- type TaskState$2 = "pending" | "waiting-approval" | "waiting-event" | "waiting-timer" | "in-progress" | "finished" | "failed" | "cancelled" | "skipped";
7
+ type TaskState$2 = "pending" | "waiting-approval" | "waiting-event" | "waiting-timer" | "waiting-quota" | "in-progress" | "finished" | "failed" | "cancelled" | "skipped";
7
8
 
8
9
  type TaskStateMap$4 = Map<string, TaskState$2>;
9
10
 
@@ -112,6 +113,12 @@ type TaskOutput$1 = {
112
113
  readonly usage?: TokenUsage$1 | null;
113
114
  };
114
115
 
116
+ type RenderTriggerReason = "task-finished" | "timer-fired" | "cache-resolved" | "loop-advanced" | "deadlock-check" | "stability-check" | (string & {});
117
+ type RenderTrigger = {
118
+ readonly reason: RenderTriggerReason;
119
+ readonly nodeId?: string;
120
+ readonly iteration?: number;
121
+ };
115
122
  type RenderContext$1 = {
116
123
  readonly runId: string;
117
124
  readonly graph?: WorkflowGraph | null;
@@ -122,11 +129,12 @@ type RenderContext$1 = {
122
129
  readonly auth?: unknown;
123
130
  readonly taskStates?: unknown;
124
131
  readonly ralphIterations?: ReadonlyMap<string, number>;
132
+ readonly trigger?: RenderTrigger;
125
133
  };
126
134
 
127
135
  type RunResult$1 = {
128
136
  readonly runId: string;
129
- readonly status: "running" | "finished" | "failed" | "cancelled" | "continued" | "waiting-approval" | "waiting-event" | "waiting-timer";
137
+ readonly status: "running" | "finished" | "failed" | "cancelled" | "continued" | "waiting-approval" | "waiting-event" | "waiting-timer" | "waiting-quota";
130
138
  readonly output?: unknown;
131
139
  readonly error?: unknown;
132
140
  readonly nextRunId?: string;
@@ -151,6 +159,10 @@ type WaitReason$1 = {
151
159
  readonly count: number;
152
160
  } | {
153
161
  readonly _tag: "ExternalTrigger";
162
+ } | {
163
+ readonly _tag: "Quota";
164
+ readonly quotaBlockedCount: number;
165
+ readonly resetAtMs?: number;
154
166
  };
155
167
 
156
168
  type EngineDecision$1 = {
@@ -199,7 +211,8 @@ type WorkflowSessionService$2 = {
199
211
  readonly getCurrentGraph: () => Effect.Effect<WorkflowGraph | null>;
200
212
  };
201
213
 
202
- type AspectBudgetBreach$1 = {
214
+ /** A breached Aspects budget for a task that is about to be dispatched. */
215
+ type AspectBudgetBreach = {
203
216
  readonly kind: "tokens" | "latency";
204
217
  readonly limit: number;
205
218
  readonly current: number;
@@ -214,9 +227,16 @@ type WorkflowSessionOptions$2 = {
214
227
  readonly iteration: number;
215
228
  readonly done: boolean;
216
229
  }>;
217
- readonly evaluateAspectBudget?: (descriptor: TaskDescriptor$3) => AspectBudgetBreach$1 | null | undefined;
218
- readonly onAspectBudgetSkip?: (descriptor: TaskDescriptor$3, breach: AspectBudgetBreach$1) => void;
219
- readonly onAspectBudgetWarn?: (descriptor: TaskDescriptor$3, breach: AspectBudgetBreach$1) => void;
230
+ /**
231
+ * Evaluate a runnable task's Aspects budgets against the run's accumulated
232
+ * usage. Return the first breach, or `null`/`undefined` when within budget.
233
+ * Only invoked for tasks that would otherwise execute.
234
+ */
235
+ readonly evaluateAspectBudget?: (descriptor: TaskDescriptor$4) => AspectBudgetBreach | null | undefined;
236
+ /** Called when a task is skipped because its budget was exceeded (`skip-remaining`). */
237
+ readonly onAspectBudgetSkip?: (descriptor: TaskDescriptor$4, breach: AspectBudgetBreach) => void;
238
+ /** Called when a task continues despite an exceeded budget (`warn`). */
239
+ readonly onAspectBudgetWarn?: (descriptor: TaskDescriptor$4, breach: AspectBudgetBreach) => void;
220
240
  };
221
241
 
222
242
  type TaskRecord$1 = {
@@ -227,10 +247,10 @@ type TaskRecord$1 = {
227
247
  readonly updatedAtMs: number;
228
248
  };
229
249
 
230
- type SmithersAlertSeverity = "info" | "warning" | "critical";
231
- type SmithersAlertLabels = Record<string, string>;
232
- type SmithersAlertReactionKind = "emit-only" | "pause" | "cancel" | "open-approval" | "deliver";
233
- type SmithersAlertReaction = {
250
+ type SmithersAlertSeverity$1 = "info" | "warning" | "critical";
251
+ type SmithersAlertLabels$1 = Record<string, string>;
252
+ type SmithersAlertReactionKind$1 = "emit-only" | "pause" | "cancel" | "open-approval" | "deliver";
253
+ type SmithersAlertReaction$1 = {
234
254
  kind: "emit-only";
235
255
  } | {
236
256
  kind: "pause";
@@ -242,24 +262,24 @@ type SmithersAlertReaction = {
242
262
  kind: "deliver";
243
263
  destination: string;
244
264
  };
245
- type SmithersAlertReactionRef = string | SmithersAlertReaction;
246
- type SmithersAlertPolicyDefaults = {
265
+ type SmithersAlertReactionRef$1 = string | SmithersAlertReaction$1;
266
+ type SmithersAlertPolicyDefaults$1 = {
247
267
  owner?: string;
248
- severity?: SmithersAlertSeverity;
268
+ severity?: SmithersAlertSeverity$1;
249
269
  runbook?: string;
250
- labels?: SmithersAlertLabels;
270
+ labels?: SmithersAlertLabels$1;
251
271
  };
252
- type SmithersAlertPolicyRule = SmithersAlertPolicyDefaults & {
272
+ type SmithersAlertPolicyRule$1 = SmithersAlertPolicyDefaults$1 & {
253
273
  afterMs?: number;
254
- reaction?: SmithersAlertReactionRef;
274
+ reaction?: SmithersAlertReactionRef$1;
255
275
  };
256
- type SmithersAlertPolicy = {
257
- defaults?: SmithersAlertPolicyDefaults;
258
- rules?: Record<string, SmithersAlertPolicyRule>;
259
- reactions?: Record<string, SmithersAlertReaction>;
276
+ type SmithersAlertPolicy$1 = {
277
+ defaults?: SmithersAlertPolicyDefaults$1;
278
+ rules?: Record<string, SmithersAlertPolicyRule$1>;
279
+ reactions?: Record<string, SmithersAlertReaction$1>;
260
280
  };
261
281
  type SmithersWorkflowOptions$1 = {
262
- alertPolicy?: SmithersAlertPolicy;
282
+ alertPolicy?: SmithersAlertPolicy$1;
263
283
  cache?: boolean;
264
284
  workflowHash?: string;
265
285
  };
@@ -281,7 +301,7 @@ type RalphState$1 = {
281
301
 
282
302
  type RalphStateMap$4 = Map<string, RalphState$1>;
283
303
 
284
- type CachePolicy$1<Ctx = any> = {
304
+ type CachePolicy$1<Ctx = unknown> = {
285
305
  by?: (ctx: Ctx) => unknown;
286
306
  version?: string;
287
307
  key?: string;
@@ -324,12 +344,12 @@ type TaskStateMap$3 = TaskStateMap$4;
324
344
  * @returns {boolean}
325
345
  */
326
346
  declare function isTerminalState(state: TaskState$1, descriptor?: Pick<TaskDescriptor$2, "continueOnFail">): boolean;
327
- type TaskDescriptor$2 = _smithers_graph.TaskDescriptor;
347
+ type TaskDescriptor$2 = _smithers_orchestrator_graph.TaskDescriptor;
328
348
  type TaskState$1 = TaskState$2;
329
349
 
330
350
  declare class Scheduler extends Context.TagClassShape<"Scheduler", SchedulerService> {
331
351
  }
332
- type TaskDescriptor$1 = _smithers_graph.TaskDescriptor;
352
+ type TaskDescriptor$1 = _smithers_orchestrator_graph.TaskDescriptor;
333
353
  type TaskStateMap$2 = TaskStateMap$4;
334
354
  type PlanNode$3 = PlanNode$4;
335
355
  type RalphStateMap$3 = RalphStateMap$4;
@@ -354,7 +374,7 @@ declare function buildPlanTree(xml: XmlNode | null, ralphState?: RalphStateMap$2
354
374
  type PlanNode$2 = PlanNode$4;
355
375
  type RalphMeta$1 = RalphMeta$2;
356
376
  type RalphStateMap$2 = RalphStateMap$4;
357
- type XmlNode = _smithers_graph.XmlNode;
377
+ type XmlNode = _smithers_orchestrator_graph.XmlNode;
358
378
 
359
379
  /**
360
380
  * @param {PlanNode | null} plan
@@ -370,7 +390,7 @@ type PlanNode$1 = PlanNode$4;
370
390
  type RalphStateMap$1 = RalphStateMap$4;
371
391
  type RetryWaitMap$1 = RetryWaitMap$3;
372
392
  type ScheduleResult$1 = ScheduleResult$3;
373
- type TaskDescriptor = _smithers_graph.TaskDescriptor;
393
+ type TaskDescriptor = _smithers_orchestrator_graph.TaskDescriptor;
374
394
  type TaskStateMap$1 = TaskStateMap$4;
375
395
 
376
396
  declare class WorkflowSession extends Context.TagClassShape<"WorkflowSession", WorkflowSessionService$2> {
@@ -384,7 +404,18 @@ declare function makeWorkflowSession(options?: WorkflowSessionOptions$1): Workfl
384
404
  type WorkflowSessionOptions$1 = WorkflowSessionOptions$2;
385
405
  type WorkflowSessionService$1 = WorkflowSessionService$2;
386
406
 
387
- /** @type {Layer.Layer<WorkflowSession, never, never>} */
407
+ /**
408
+ * WARNING — do not consume this layer as-is. `Layer.sync` builds **one** shared
409
+ * `makeWorkflowSession()` instance for the whole layer scope, but a workflow
410
+ * session carries per-run state, so sharing it across runs is a correctness bug.
411
+ * The engine intentionally bypasses this Tag and constructs a fresh session per
412
+ * run via `makeWorkflowSession()` directly — which is why nothing yields
413
+ * `WorkflowSession` today. Before any consumer reads the Tag, rework this into a
414
+ * per-run/scoped provider (e.g. `Layer.scoped` or a factory service) so each run
415
+ * gets its own session.
416
+ *
417
+ * @type {Layer.Layer<WorkflowSession, never, never>}
418
+ */
388
419
  declare const WorkflowSessionLive: Layer.Layer<WorkflowSession, never, never>;
389
420
 
390
421
  /**
@@ -398,7 +429,7 @@ declare function nowMs(): number;
398
429
  * @param {RetryPolicy} policy
399
430
  * @returns {Schedule.Schedule<unknown>}
400
431
  */
401
- declare function retryPolicyToSchedule(policy: RetryPolicy$2): Schedule.Schedule<unknown>;
432
+ declare function retryPolicyToSchedule(policy: RetryPolicy$2): Schedule$1.Schedule<unknown>;
402
433
  type RetryPolicy$2 = RetryPolicy$3;
403
434
 
404
435
  /**
@@ -434,6 +465,14 @@ type RetryWaitMap = RetryWaitMap$3;
434
465
  type RunResult = RunResult$1;
435
466
  type ScheduleResult = ScheduleResult$3;
436
467
  type ScheduleSnapshot = ScheduleSnapshot$1;
468
+ type SmithersAlertLabels = SmithersAlertLabels$1;
469
+ type SmithersAlertPolicy = SmithersAlertPolicy$1;
470
+ type SmithersAlertPolicyDefaults = SmithersAlertPolicyDefaults$1;
471
+ type SmithersAlertPolicyRule = SmithersAlertPolicyRule$1;
472
+ type SmithersAlertReaction = SmithersAlertReaction$1;
473
+ type SmithersAlertReactionKind = SmithersAlertReactionKind$1;
474
+ type SmithersAlertReactionRef = SmithersAlertReactionRef$1;
475
+ type SmithersAlertSeverity = SmithersAlertSeverity$1;
437
476
  type SmithersWorkflowOptions = SmithersWorkflowOptions$1;
438
477
  type TaskFailure = TaskFailure$1;
439
478
  type TaskOutput = TaskOutput$1;
@@ -65,9 +65,10 @@ function mountedSignature(graph) {
65
65
  /**
66
66
  * @param {SessionState} state
67
67
  * @param {number} [iterationOverride]
68
+ * @param {RenderContext["trigger"]} [trigger]
68
69
  * @returns {RenderContext}
69
70
  */
70
- function renderContext(state, iterationOverride) {
71
+ function renderContext(state, iterationOverride, trigger) {
71
72
  const ralphIterations = [...state.ralphState.values()].map((value) => value.iteration);
72
73
  return {
73
74
  runId: state.runId,
@@ -77,6 +78,7 @@ function renderContext(state, iterationOverride) {
77
78
  taskStates: cloneTaskStateMap(state.states),
78
79
  outputs: new Map(state.outputs),
79
80
  ralphIterations: new Map([...state.ralphState.entries()].map(([id, value]) => [id, value.iteration])),
81
+ ...(trigger ? { trigger } : {}),
80
82
  };
81
83
  }
82
84
  /**
@@ -85,23 +87,48 @@ function renderContext(state, iterationOverride) {
85
87
  * @returns {WaitReason | undefined}
86
88
  */
87
89
  function findWaitingReason(state, currentTimeMs) {
90
+ // Do a full pass to accumulate quota count and find the highest-priority
91
+ // non-quota wait reason. This prevents an early-return from shadowing
92
+ // quota-blocked tasks when mixed wait types coexist in the same run.
93
+ let primaryReason;
94
+ let quotaBlockedCount = 0;
95
+ let earliestQuotaResetAtMs;
88
96
  for (const descriptor of state.descriptors.values()) {
89
97
  const taskState = state.states.get(stateKeyFor(descriptor));
90
- if (taskState === "waiting-approval") {
91
- return { _tag: "Approval", nodeId: descriptor.nodeId };
98
+ if (taskState === "waiting-approval" && !primaryReason) {
99
+ primaryReason = { _tag: "Approval", nodeId: descriptor.nodeId };
92
100
  }
93
- if (taskState === "waiting-event") {
101
+ else if (taskState === "waiting-event" && !primaryReason) {
94
102
  const eventName = typeof descriptor.meta?.__eventName === "string"
95
103
  ? descriptor.meta.__eventName
96
104
  : "";
97
- return { _tag: "Event", eventName };
105
+ primaryReason = { _tag: "Event", eventName };
98
106
  }
99
- if (taskState === "waiting-timer") {
100
- return {
107
+ else if (taskState === "waiting-timer" && !primaryReason) {
108
+ primaryReason = {
101
109
  _tag: "Timer",
102
110
  resumeAtMs: timerResumeAtMs(descriptor, currentTimeMs),
103
111
  };
104
112
  }
113
+ else if (taskState === "waiting-quota") {
114
+ quotaBlockedCount += 1;
115
+ const resetAtMs = state.quotaResetTimes.get(stateKeyFor(descriptor));
116
+ if (resetAtMs != null) {
117
+ earliestQuotaResetAtMs = earliestQuotaResetAtMs == null
118
+ ? resetAtMs
119
+ : Math.min(earliestQuotaResetAtMs, resetAtMs);
120
+ }
121
+ }
122
+ }
123
+ if (primaryReason) {
124
+ return primaryReason;
125
+ }
126
+ if (quotaBlockedCount > 0) {
127
+ return {
128
+ _tag: "Quota",
129
+ quotaBlockedCount,
130
+ ...(earliestQuotaResetAtMs != null ? { resetAtMs: earliestQuotaResetAtMs } : {}),
131
+ };
105
132
  }
106
133
  return undefined;
107
134
  }
@@ -179,6 +206,39 @@ function isRetryableFailure(descriptor, error) {
179
206
  }
180
207
  return true;
181
208
  }
209
+ /**
210
+ * @param {unknown} error
211
+ * @returns {boolean}
212
+ */
213
+ function isQuotaFailure(error) {
214
+ const payloadCode = error && typeof error === "object" && typeof error.code === "string"
215
+ ? error.code
216
+ : undefined;
217
+ const payloadDetails = error && typeof error === "object" && error.details && typeof error.details === "object"
218
+ ? error.details
219
+ : undefined;
220
+ const normalized = toSmithersError(error);
221
+ const code = payloadCode ?? normalized.code;
222
+ if (code === "AGENT_QUOTA_EXCEEDED")
223
+ return true;
224
+ const details = payloadDetails ?? normalized.details;
225
+ return Boolean(details && typeof details === "object" && details.failureQuota === true);
226
+ }
227
+ /**
228
+ * @param {unknown} error
229
+ * @returns {number | undefined}
230
+ */
231
+ function getQuotaResetAtMs(error) {
232
+ const payloadDetails = error && typeof error === "object" && error.details && typeof error.details === "object"
233
+ ? error.details
234
+ : undefined;
235
+ const normalized = toSmithersError(error);
236
+ const details = payloadDetails ?? normalized.details;
237
+ if (!details || typeof details !== "object")
238
+ return undefined;
239
+ const resetAtMs = details.quotaResetAtMs;
240
+ return typeof resetAtMs === "number" && Number.isFinite(resetAtMs) ? resetAtMs : undefined;
241
+ }
182
242
  /**
183
243
  * @param {unknown} error
184
244
  * @returns {boolean}
@@ -220,7 +280,11 @@ function describeDeadlock(state) {
220
280
  }
221
281
  else {
222
282
  const depState = state.states.get(stateKeyFor(dep)) ?? "pending";
223
- unmet.push(`'${depId}' (${depState})`);
283
+ if (depState !== "finished" &&
284
+ depState !== "skipped" &&
285
+ !(depState === "failed" && dep.continueOnFail)) {
286
+ unmet.push(`'${depId}' (${depState})`);
287
+ }
224
288
  }
225
289
  }
226
290
  if (unmet.length > 0) {
@@ -270,6 +334,8 @@ export function makeWorkflowSession(options = {}) {
270
334
  retryWait: new Map(),
271
335
  approvals: new Set(),
272
336
  ralphState: new Map(options.initialRalphState ?? []),
337
+ /** @type {Map<string, number>} Maps state key → quota reset timestamp (ms) */
338
+ quotaResetTimes: new Map(),
273
339
  schedule: null,
274
340
  cancelled: false,
275
341
  lastMountedSignature: null,
@@ -332,6 +398,7 @@ export function makeWorkflowSession(options = {}) {
332
398
  state.approvals.delete(key);
333
399
  state.retryCounts.delete(key);
334
400
  state.failureDescriptors.delete(key);
401
+ state.quotaResetTimes.delete(key);
335
402
  }
336
403
  }
337
404
  for (const ralph of ralphs) {
@@ -362,14 +429,16 @@ export function makeWorkflowSession(options = {}) {
362
429
  state.outputs.set(key, output);
363
430
  state.retryWait.delete(key);
364
431
  state.failureDescriptors.delete(key);
432
+ state.quotaResetTimes.delete(key);
365
433
  }
366
434
  /**
367
435
  * @param {number} [iteration]
436
+ * @param {RenderContext["trigger"]} [trigger]
368
437
  * @returns {EngineDecision}
369
438
  */
370
- function decideAfterOutputChange(iteration) {
439
+ function decideAfterOutputChange(iteration, trigger) {
371
440
  if (options.requireRerenderOnOutputChange) {
372
- return { _tag: "ReRender", context: renderContext(state, iteration) };
441
+ return { _tag: "ReRender", context: renderContext(state, iteration, trigger) };
373
442
  }
374
443
  return decide();
375
444
  }
@@ -400,12 +469,54 @@ export function makeWorkflowSession(options = {}) {
400
469
  }
401
470
  }
402
471
  /**
472
+ * @param {string} eventName
473
+ * @param {unknown} payload
474
+ * @param {string | null} correlationId
475
+ */
476
+ function applyEventReceived(eventName, payload, correlationId) {
477
+ for (const descriptor of state.descriptors.values()) {
478
+ const key = stateKeyFor(descriptor);
479
+ const taskState = state.states.get(key);
480
+ const expected = typeof descriptor.meta?.__eventName === "string"
481
+ ? descriptor.meta.__eventName
482
+ : undefined;
483
+ const expectedCorrelation = typeof descriptor.meta?.__correlationId === "string"
484
+ ? descriptor.meta.__correlationId
485
+ : undefined;
486
+ if (taskState === "waiting-event" &&
487
+ (!expected || expected === eventName) &&
488
+ (expectedCorrelation === undefined || expectedCorrelation === correlationId)) {
489
+ state.states.set(key, "finished");
490
+ state.outputs.set(key, {
491
+ nodeId: descriptor.nodeId,
492
+ iteration: descriptor.iteration,
493
+ output: payload,
494
+ });
495
+ }
496
+ }
497
+ }
498
+ /**
403
499
  * @param {TaskDescriptor} descriptor
404
500
  * @param {unknown} error
405
501
  * @returns {EngineDecision}
406
502
  */
407
503
  function applyFailure(descriptor, error) {
408
504
  const key = stateKeyFor(descriptor);
505
+ // Quota/usage-limit errors do not consume the task's retry budget.
506
+ // Instead, put the task into "waiting-quota" so the run can pause
507
+ // durably and resume cleanly after the provider resets.
508
+ if (isQuotaFailure(error)) {
509
+ state.states.set(key, "waiting-quota");
510
+ state.failures.set(key, error);
511
+ const resetAtMs = getQuotaResetAtMs(error);
512
+ if (resetAtMs != null) {
513
+ state.quotaResetTimes.set(key, resetAtMs);
514
+ }
515
+ else {
516
+ state.quotaResetTimes.delete(key);
517
+ }
518
+ return decide();
519
+ }
409
520
  const failureCount = (state.retryCounts.get(key) ?? 0) + 1;
410
521
  state.retryCounts.set(key, failureCount);
411
522
  const retryable = isRetryableFailure(descriptor, error);
@@ -425,7 +536,11 @@ export function makeWorkflowSession(options = {}) {
425
536
  state.states.set(key, "failed");
426
537
  state.failures.set(key, error);
427
538
  state.failureDescriptors.set(key, descriptor);
428
- return decide();
539
+ return decideAfterOutputChange(descriptor.iteration, {
540
+ reason: "task-finished",
541
+ nodeId: descriptor.nodeId,
542
+ iteration: descriptor.iteration,
543
+ });
429
544
  }
430
545
  /**
431
546
  * @returns {EngineDecision | null}
@@ -459,11 +574,15 @@ export function makeWorkflowSession(options = {}) {
459
574
  };
460
575
  }
461
576
  /**
577
+ * @param {number} [depth] recursion depth; guarded at 10 to catch decision cycles
462
578
  * @returns {EngineDecision}
463
579
  */
464
580
  function decide(depth = 0) {
465
581
  if (depth > 10) {
466
- return { _tag: "Wait", reason: { _tag: "ExternalTrigger" } };
582
+ return {
583
+ _tag: "Failed",
584
+ error: new SmithersError("SCHEDULER_ERROR", "Exceeded scheduler decide() depth guard.", { depth }),
585
+ };
467
586
  }
468
587
  if (state.cancelled) {
469
588
  return finishedResult("cancelled");
@@ -623,7 +742,7 @@ export function makeWorkflowSession(options = {}) {
623
742
  advanced = true;
624
743
  }
625
744
  if (advanced) {
626
- return { _tag: "ReRender", context: renderContext(state) };
745
+ return { _tag: "ReRender", context: renderContext(state, undefined, { reason: "loop-advanced" }) };
627
746
  }
628
747
  }
629
748
  if (schedule.pendingExists) {
@@ -649,7 +768,7 @@ export function makeWorkflowSession(options = {}) {
649
768
  const signature = mountedSignature(state.graph);
650
769
  if (state.lastDeadlockSignature !== signature) {
651
770
  state.lastDeadlockSignature = signature;
652
- return { _tag: "ReRender", context: renderContext(state) };
771
+ return { _tag: "ReRender", context: renderContext(state, undefined, { reason: "deadlock-check" }) };
653
772
  }
654
773
  }
655
774
  return {
@@ -670,7 +789,7 @@ export function makeWorkflowSession(options = {}) {
670
789
  const signature = mountedSignature(state.graph);
671
790
  if (state.lastMountedSignature !== signature) {
672
791
  state.lastMountedSignature = signature;
673
- return { _tag: "ReRender", context: renderContext(state) };
792
+ return { _tag: "ReRender", context: renderContext(state, undefined, { reason: "stability-check" }) };
674
793
  }
675
794
  }
676
795
  return finishedResult();
@@ -686,17 +805,26 @@ export function makeWorkflowSession(options = {}) {
686
805
  }
687
806
  }),
688
807
  taskCompleted: (output) => Effect.sync(() => {
689
- const descriptor = findDescriptor(state, output.nodeId, output.iteration);
690
- if (!descriptor) {
691
- return failedDecision(new SmithersError("NODE_NOT_FOUND", `Unknown task ${output.nodeId}`), "taskCompleted");
692
- }
808
+ // A completion can legitimately arrive for a task that is no longer in the
809
+ // current graph: a conditionally-rendered task (e.g. `{done ? <Task pr/> : null}`)
810
+ // whose parent re-rendered it out while it was still running in the background.
811
+ // That result is stale, not fatal — record it (so it is available if the task
812
+ // re-mounts) and let the current graph drive the next decision. Failing here
813
+ // would discard every other in-flight task in the run.
693
814
  markTaskFinished(output);
694
- return decideAfterOutputChange(output.iteration);
815
+ return decideAfterOutputChange(output.iteration, {
816
+ reason: "task-finished",
817
+ nodeId: output.nodeId,
818
+ iteration: output.iteration,
819
+ });
695
820
  }),
696
821
  taskFailed: (failure) => Effect.sync(() => {
697
822
  const descriptor = findDescriptor(state, failure.nodeId, failure.iteration);
698
823
  if (!descriptor) {
699
- return failedDecision(new SmithersError("NODE_NOT_FOUND", `Unknown task ${failure.nodeId}`), "taskFailed");
824
+ // Stale failure for a task that already left the graph (see taskCompleted)
825
+ // the task is gone, so its failure is moot. Re-decide on the current graph
826
+ // rather than failing the whole run.
827
+ return decide();
700
828
  }
701
829
  return applyFailure(descriptor, failure.error);
702
830
  }),
@@ -727,51 +855,11 @@ export function makeWorkflowSession(options = {}) {
727
855
  return decide();
728
856
  }),
729
857
  eventReceived: (eventName, payload, correlationId = null) => Effect.sync(() => {
730
- for (const descriptor of state.descriptors.values()) {
731
- const key = stateKeyFor(descriptor);
732
- const taskState = state.states.get(key);
733
- const expected = typeof descriptor.meta?.__eventName === "string"
734
- ? descriptor.meta.__eventName
735
- : undefined;
736
- const expectedCorrelation = typeof descriptor.meta?.__correlationId === "string"
737
- ? descriptor.meta.__correlationId
738
- : undefined;
739
- if (taskState === "waiting-event" &&
740
- (!expected || expected === eventName) &&
741
- (expectedCorrelation === undefined || expectedCorrelation === correlationId)) {
742
- state.states.set(key, "finished");
743
- state.outputs.set(key, {
744
- nodeId: descriptor.nodeId,
745
- iteration: descriptor.iteration,
746
- output: payload,
747
- });
748
- }
749
- }
858
+ applyEventReceived(eventName, payload, correlationId);
750
859
  return decide();
751
860
  }),
752
861
  signalReceived: (signalName, payload, correlationId = null) => Effect.sync(() => {
753
- for (const descriptor of state.descriptors.values()) {
754
- const key = stateKeyFor(descriptor);
755
- const taskState = state.states.get(key);
756
- const expected = typeof descriptor.meta?.__signalName === "string"
757
- ? descriptor.meta.__signalName
758
- : typeof descriptor.meta?.__eventName === "string"
759
- ? descriptor.meta.__eventName
760
- : undefined;
761
- const expectedCorrelation = typeof descriptor.meta?.__correlationId === "string"
762
- ? descriptor.meta.__correlationId
763
- : undefined;
764
- if (taskState === "waiting-event" &&
765
- (!expected || expected === signalName) &&
766
- (expectedCorrelation === undefined || expectedCorrelation === correlationId)) {
767
- state.states.set(key, "finished");
768
- state.outputs.set(key, {
769
- nodeId: descriptor.nodeId,
770
- iteration: descriptor.iteration,
771
- output: payload,
772
- });
773
- }
774
- }
862
+ applyEventReceived(signalName, payload, correlationId);
775
863
  return decide();
776
864
  }),
777
865
  timerFired: (nodeId, firedAtMs = nowMs()) => Effect.sync(() => {
@@ -788,7 +876,11 @@ export function makeWorkflowSession(options = {}) {
788
876
  iteration: descriptor.iteration,
789
877
  output: { firedAtMs },
790
878
  });
791
- return decideAfterOutputChange(descriptor.iteration);
879
+ return decideAfterOutputChange(descriptor.iteration, {
880
+ reason: "timer-fired",
881
+ nodeId: descriptor.nodeId,
882
+ iteration: descriptor.iteration,
883
+ });
792
884
  }),
793
885
  hotReloaded: (graph) => Effect.sync(() => {
794
886
  try {
@@ -822,7 +914,11 @@ export function makeWorkflowSession(options = {}) {
822
914
  usage: output.usage ?? null,
823
915
  output: output.output,
824
916
  });
825
- return decideAfterOutputChange(output.iteration);
917
+ return decideAfterOutputChange(output.iteration, {
918
+ reason: "cache-resolved",
919
+ nodeId: output.nodeId,
920
+ iteration: output.iteration,
921
+ });
826
922
  }),
827
923
  cacheMissed: (nodeId, iteration) => Effect.sync(() => {
828
924
  const descriptor = findDescriptor(state, nodeId, iteration);
@@ -123,6 +123,7 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
123
123
  }
124
124
  /**
125
125
  * @param {PlanNode} node
126
+ * @param {{ includeContinuedFailures?: boolean }} [options]
126
127
  * @returns {{ readonly terminal: boolean; readonly failed: boolean }}
127
128
  */
128
129
  function inspect(node, options = {}) {