@smithers-orchestrator/scheduler 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@smithers-orchestrator/scheduler",
3
- "version": "0.24.0",
3
+ "version": "0.25.0",
4
4
  "description": "Pure decision engine: session, scheduler, and task state management for Smithers workflows",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -176,8 +176,8 @@
176
176
  ],
177
177
  "dependencies": {
178
178
  "effect": "^3.21.1",
179
- "@smithers-orchestrator/errors": "0.24.0",
180
- "@smithers-orchestrator/graph": "0.24.0"
179
+ "@smithers-orchestrator/errors": "0.25.0",
180
+ "@smithers-orchestrator/graph": "0.25.0"
181
181
  },
182
182
  "devDependencies": {
183
183
  "@types/bun": "latest",
@@ -1,6 +1,21 @@
1
1
  import type { WorkflowGraph } from "@smithers-orchestrator/graph";
2
2
  import type { TaskOutput } from "./TaskOutput.ts";
3
3
 
4
+ export type RenderTriggerReason =
5
+ | "task-finished"
6
+ | "timer-fired"
7
+ | "cache-resolved"
8
+ | "loop-advanced"
9
+ | "deadlock-check"
10
+ | "stability-check"
11
+ | (string & {});
12
+
13
+ export type RenderTrigger = {
14
+ readonly reason: RenderTriggerReason;
15
+ readonly nodeId?: string;
16
+ readonly iteration?: number;
17
+ };
18
+
4
19
  export type RenderContext = {
5
20
  readonly runId: string;
6
21
  readonly graph?: WorkflowGraph | null;
@@ -11,4 +26,5 @@ export type RenderContext = {
11
26
  readonly auth?: unknown;
12
27
  readonly taskStates?: unknown;
13
28
  readonly ralphIterations?: ReadonlyMap<string, number>;
29
+ readonly trigger?: RenderTrigger;
14
30
  };
package/src/RunResult.ts CHANGED
@@ -8,7 +8,8 @@ export type RunResult = {
8
8
  | "continued"
9
9
  | "waiting-approval"
10
10
  | "waiting-event"
11
- | "waiting-timer";
11
+ | "waiting-timer"
12
+ | "waiting-quota";
12
13
  readonly output?: unknown;
13
14
  readonly error?: unknown;
14
15
  readonly nextRunId?: string;
package/src/TaskState.ts CHANGED
@@ -3,6 +3,7 @@ export type TaskState =
3
3
  | "waiting-approval"
4
4
  | "waiting-event"
5
5
  | "waiting-timer"
6
+ | "waiting-quota"
6
7
  | "in-progress"
7
8
  | "finished"
8
9
  | "failed"
package/src/WaitReason.ts CHANGED
@@ -5,4 +5,9 @@ export type WaitReason =
5
5
  | { readonly _tag: "RetryBackoff"; readonly waitMs: number }
6
6
  | { readonly _tag: "HotReload" }
7
7
  | { readonly _tag: "OrphanRecovery"; readonly count: number }
8
- | { readonly _tag: "ExternalTrigger" };
8
+ | { readonly _tag: "ExternalTrigger" }
9
+ | {
10
+ readonly _tag: "Quota";
11
+ readonly quotaBlockedCount: number;
12
+ readonly resetAtMs?: number;
13
+ };
@@ -2,5 +2,16 @@ import { Layer } from "effect";
2
2
  import { WorkflowSession } from "./WorkflowSession.js";
3
3
  import { makeWorkflowSession } from "./makeWorkflowSession.js";
4
4
 
5
- /** @type {Layer.Layer<WorkflowSession, never, never>} */
5
+ /**
6
+ * WARNING — do not consume this layer as-is. `Layer.sync` builds **one** shared
7
+ * `makeWorkflowSession()` instance for the whole layer scope, but a workflow
8
+ * session carries per-run state, so sharing it across runs is a correctness bug.
9
+ * The engine intentionally bypasses this Tag and constructs a fresh session per
10
+ * run via `makeWorkflowSession()` directly — which is why nothing yields
11
+ * `WorkflowSession` today. Before any consumer reads the Tag, rework this into a
12
+ * per-run/scoped provider (e.g. `Layer.scoped` or a factory service) so each run
13
+ * gets its own session.
14
+ *
15
+ * @type {Layer.Layer<WorkflowSession, never, never>}
16
+ */
6
17
  export const WorkflowSessionLive = Layer.sync(WorkflowSession, makeWorkflowSession);
@@ -1,3 +1,13 @@
1
+ import type { TaskDescriptor } from "@smithers-orchestrator/graph/TaskDescriptor";
2
+
3
+ /** A breached Aspects budget for a task that is about to be dispatched. */
4
+ export type AspectBudgetBreach = {
5
+ readonly kind: "tokens" | "latency";
6
+ readonly limit: number;
7
+ readonly current: number;
8
+ readonly onExceeded: "fail" | "warn" | "skip-remaining";
9
+ };
10
+
1
11
  export type WorkflowSessionOptions = {
2
12
  readonly runId?: string;
3
13
  readonly nowMs?: () => number;
@@ -7,4 +17,22 @@ export type WorkflowSessionOptions = {
7
17
  readonly iteration: number;
8
18
  readonly done: boolean;
9
19
  }>;
20
+ /**
21
+ * Evaluate a runnable task's Aspects budgets against the run's accumulated
22
+ * usage. Return the first breach, or `null`/`undefined` when within budget.
23
+ * Only invoked for tasks that would otherwise execute.
24
+ */
25
+ readonly evaluateAspectBudget?: (
26
+ descriptor: TaskDescriptor,
27
+ ) => AspectBudgetBreach | null | undefined;
28
+ /** Called when a task is skipped because its budget was exceeded (`skip-remaining`). */
29
+ readonly onAspectBudgetSkip?: (
30
+ descriptor: TaskDescriptor,
31
+ breach: AspectBudgetBreach,
32
+ ) => void;
33
+ /** Called when a task continues despite an exceeded budget (`warn`). */
34
+ readonly onAspectBudgetWarn?: (
35
+ descriptor: TaskDescriptor,
36
+ breach: AspectBudgetBreach,
37
+ ) => void;
10
38
  };
package/src/index.d.ts CHANGED
@@ -1,9 +1,10 @@
1
1
  import * as effect from 'effect';
2
- import { Context, Layer, Effect, Schedule } from 'effect';
3
- import * as _smithers_graph from '@smithers-orchestrator/graph';
2
+ import { Context, Layer, Effect, Schedule as Schedule$1 } from 'effect';
3
+ import * as _smithers_orchestrator_graph from '@smithers-orchestrator/graph';
4
4
  import { TaskDescriptor as TaskDescriptor$3, WorkflowGraph } from '@smithers-orchestrator/graph';
5
+ import { TaskDescriptor as TaskDescriptor$4 } from '@smithers-orchestrator/graph/TaskDescriptor';
5
6
 
6
- type TaskState$2 = "pending" | "waiting-approval" | "waiting-event" | "waiting-timer" | "in-progress" | "finished" | "failed" | "cancelled" | "skipped";
7
+ type TaskState$2 = "pending" | "waiting-approval" | "waiting-event" | "waiting-timer" | "waiting-quota" | "in-progress" | "finished" | "failed" | "cancelled" | "skipped";
7
8
 
8
9
  type TaskStateMap$4 = Map<string, TaskState$2>;
9
10
 
@@ -112,6 +113,12 @@ type TaskOutput$1 = {
112
113
  readonly usage?: TokenUsage$1 | null;
113
114
  };
114
115
 
116
+ type RenderTriggerReason = "task-finished" | "timer-fired" | "cache-resolved" | "loop-advanced" | "deadlock-check" | "stability-check" | (string & {});
117
+ type RenderTrigger = {
118
+ readonly reason: RenderTriggerReason;
119
+ readonly nodeId?: string;
120
+ readonly iteration?: number;
121
+ };
115
122
  type RenderContext$1 = {
116
123
  readonly runId: string;
117
124
  readonly graph?: WorkflowGraph | null;
@@ -122,11 +129,12 @@ type RenderContext$1 = {
122
129
  readonly auth?: unknown;
123
130
  readonly taskStates?: unknown;
124
131
  readonly ralphIterations?: ReadonlyMap<string, number>;
132
+ readonly trigger?: RenderTrigger;
125
133
  };
126
134
 
127
135
  type RunResult$1 = {
128
136
  readonly runId: string;
129
- readonly status: "running" | "finished" | "failed" | "cancelled" | "continued" | "waiting-approval" | "waiting-event" | "waiting-timer";
137
+ readonly status: "running" | "finished" | "failed" | "cancelled" | "continued" | "waiting-approval" | "waiting-event" | "waiting-timer" | "waiting-quota";
130
138
  readonly output?: unknown;
131
139
  readonly error?: unknown;
132
140
  readonly nextRunId?: string;
@@ -151,6 +159,10 @@ type WaitReason$1 = {
151
159
  readonly count: number;
152
160
  } | {
153
161
  readonly _tag: "ExternalTrigger";
162
+ } | {
163
+ readonly _tag: "Quota";
164
+ readonly quotaBlockedCount: number;
165
+ readonly resetAtMs?: number;
154
166
  };
155
167
 
156
168
  type EngineDecision$1 = {
@@ -199,6 +211,13 @@ type WorkflowSessionService$2 = {
199
211
  readonly getCurrentGraph: () => Effect.Effect<WorkflowGraph | null>;
200
212
  };
201
213
 
214
+ /** A breached Aspects budget for a task that is about to be dispatched. */
215
+ type AspectBudgetBreach = {
216
+ readonly kind: "tokens" | "latency";
217
+ readonly limit: number;
218
+ readonly current: number;
219
+ readonly onExceeded: "fail" | "warn" | "skip-remaining";
220
+ };
202
221
  type WorkflowSessionOptions$2 = {
203
222
  readonly runId?: string;
204
223
  readonly nowMs?: () => number;
@@ -208,6 +227,16 @@ type WorkflowSessionOptions$2 = {
208
227
  readonly iteration: number;
209
228
  readonly done: boolean;
210
229
  }>;
230
+ /**
231
+ * Evaluate a runnable task's Aspects budgets against the run's accumulated
232
+ * usage. Return the first breach, or `null`/`undefined` when within budget.
233
+ * Only invoked for tasks that would otherwise execute.
234
+ */
235
+ readonly evaluateAspectBudget?: (descriptor: TaskDescriptor$4) => AspectBudgetBreach | null | undefined;
236
+ /** Called when a task is skipped because its budget was exceeded (`skip-remaining`). */
237
+ readonly onAspectBudgetSkip?: (descriptor: TaskDescriptor$4, breach: AspectBudgetBreach) => void;
238
+ /** Called when a task continues despite an exceeded budget (`warn`). */
239
+ readonly onAspectBudgetWarn?: (descriptor: TaskDescriptor$4, breach: AspectBudgetBreach) => void;
211
240
  };
212
241
 
213
242
  type TaskRecord$1 = {
@@ -218,10 +247,10 @@ type TaskRecord$1 = {
218
247
  readonly updatedAtMs: number;
219
248
  };
220
249
 
221
- type SmithersAlertSeverity = "info" | "warning" | "critical";
222
- type SmithersAlertLabels = Record<string, string>;
223
- type SmithersAlertReactionKind = "emit-only" | "pause" | "cancel" | "open-approval" | "deliver";
224
- type SmithersAlertReaction = {
250
+ type SmithersAlertSeverity$1 = "info" | "warning" | "critical";
251
+ type SmithersAlertLabels$1 = Record<string, string>;
252
+ type SmithersAlertReactionKind$1 = "emit-only" | "pause" | "cancel" | "open-approval" | "deliver";
253
+ type SmithersAlertReaction$1 = {
225
254
  kind: "emit-only";
226
255
  } | {
227
256
  kind: "pause";
@@ -233,24 +262,24 @@ type SmithersAlertReaction = {
233
262
  kind: "deliver";
234
263
  destination: string;
235
264
  };
236
- type SmithersAlertReactionRef = string | SmithersAlertReaction;
237
- type SmithersAlertPolicyDefaults = {
265
+ type SmithersAlertReactionRef$1 = string | SmithersAlertReaction$1;
266
+ type SmithersAlertPolicyDefaults$1 = {
238
267
  owner?: string;
239
- severity?: SmithersAlertSeverity;
268
+ severity?: SmithersAlertSeverity$1;
240
269
  runbook?: string;
241
- labels?: SmithersAlertLabels;
270
+ labels?: SmithersAlertLabels$1;
242
271
  };
243
- type SmithersAlertPolicyRule = SmithersAlertPolicyDefaults & {
272
+ type SmithersAlertPolicyRule$1 = SmithersAlertPolicyDefaults$1 & {
244
273
  afterMs?: number;
245
- reaction?: SmithersAlertReactionRef;
274
+ reaction?: SmithersAlertReactionRef$1;
246
275
  };
247
- type SmithersAlertPolicy = {
248
- defaults?: SmithersAlertPolicyDefaults;
249
- rules?: Record<string, SmithersAlertPolicyRule>;
250
- reactions?: Record<string, SmithersAlertReaction>;
276
+ type SmithersAlertPolicy$1 = {
277
+ defaults?: SmithersAlertPolicyDefaults$1;
278
+ rules?: Record<string, SmithersAlertPolicyRule$1>;
279
+ reactions?: Record<string, SmithersAlertReaction$1>;
251
280
  };
252
281
  type SmithersWorkflowOptions$1 = {
253
- alertPolicy?: SmithersAlertPolicy;
282
+ alertPolicy?: SmithersAlertPolicy$1;
254
283
  cache?: boolean;
255
284
  workflowHash?: string;
256
285
  };
@@ -272,7 +301,7 @@ type RalphState$1 = {
272
301
 
273
302
  type RalphStateMap$4 = Map<string, RalphState$1>;
274
303
 
275
- type CachePolicy$1<Ctx = any> = {
304
+ type CachePolicy$1<Ctx = unknown> = {
276
305
  by?: (ctx: Ctx) => unknown;
277
306
  version?: string;
278
307
  key?: string;
@@ -315,12 +344,12 @@ type TaskStateMap$3 = TaskStateMap$4;
315
344
  * @returns {boolean}
316
345
  */
317
346
  declare function isTerminalState(state: TaskState$1, descriptor?: Pick<TaskDescriptor$2, "continueOnFail">): boolean;
318
- type TaskDescriptor$2 = _smithers_graph.TaskDescriptor;
347
+ type TaskDescriptor$2 = _smithers_orchestrator_graph.TaskDescriptor;
319
348
  type TaskState$1 = TaskState$2;
320
349
 
321
350
  declare class Scheduler extends Context.TagClassShape<"Scheduler", SchedulerService> {
322
351
  }
323
- type TaskDescriptor$1 = _smithers_graph.TaskDescriptor;
352
+ type TaskDescriptor$1 = _smithers_orchestrator_graph.TaskDescriptor;
324
353
  type TaskStateMap$2 = TaskStateMap$4;
325
354
  type PlanNode$3 = PlanNode$4;
326
355
  type RalphStateMap$3 = RalphStateMap$4;
@@ -345,7 +374,7 @@ declare function buildPlanTree(xml: XmlNode | null, ralphState?: RalphStateMap$2
345
374
  type PlanNode$2 = PlanNode$4;
346
375
  type RalphMeta$1 = RalphMeta$2;
347
376
  type RalphStateMap$2 = RalphStateMap$4;
348
- type XmlNode = _smithers_graph.XmlNode;
377
+ type XmlNode = _smithers_orchestrator_graph.XmlNode;
349
378
 
350
379
  /**
351
380
  * @param {PlanNode | null} plan
@@ -361,7 +390,7 @@ type PlanNode$1 = PlanNode$4;
361
390
  type RalphStateMap$1 = RalphStateMap$4;
362
391
  type RetryWaitMap$1 = RetryWaitMap$3;
363
392
  type ScheduleResult$1 = ScheduleResult$3;
364
- type TaskDescriptor = _smithers_graph.TaskDescriptor;
393
+ type TaskDescriptor = _smithers_orchestrator_graph.TaskDescriptor;
365
394
  type TaskStateMap$1 = TaskStateMap$4;
366
395
 
367
396
  declare class WorkflowSession extends Context.TagClassShape<"WorkflowSession", WorkflowSessionService$2> {
@@ -375,7 +404,18 @@ declare function makeWorkflowSession(options?: WorkflowSessionOptions$1): Workfl
375
404
  type WorkflowSessionOptions$1 = WorkflowSessionOptions$2;
376
405
  type WorkflowSessionService$1 = WorkflowSessionService$2;
377
406
 
378
- /** @type {Layer.Layer<WorkflowSession, never, never>} */
407
+ /**
408
+ * WARNING — do not consume this layer as-is. `Layer.sync` builds **one** shared
409
+ * `makeWorkflowSession()` instance for the whole layer scope, but a workflow
410
+ * session carries per-run state, so sharing it across runs is a correctness bug.
411
+ * The engine intentionally bypasses this Tag and constructs a fresh session per
412
+ * run via `makeWorkflowSession()` directly — which is why nothing yields
413
+ * `WorkflowSession` today. Before any consumer reads the Tag, rework this into a
414
+ * per-run/scoped provider (e.g. `Layer.scoped` or a factory service) so each run
415
+ * gets its own session.
416
+ *
417
+ * @type {Layer.Layer<WorkflowSession, never, never>}
418
+ */
379
419
  declare const WorkflowSessionLive: Layer.Layer<WorkflowSession, never, never>;
380
420
 
381
421
  /**
@@ -389,7 +429,7 @@ declare function nowMs(): number;
389
429
  * @param {RetryPolicy} policy
390
430
  * @returns {Schedule.Schedule<unknown>}
391
431
  */
392
- declare function retryPolicyToSchedule(policy: RetryPolicy$2): Schedule.Schedule<unknown>;
432
+ declare function retryPolicyToSchedule(policy: RetryPolicy$2): Schedule$1.Schedule<unknown>;
393
433
  type RetryPolicy$2 = RetryPolicy$3;
394
434
 
395
435
  /**
@@ -425,6 +465,14 @@ type RetryWaitMap = RetryWaitMap$3;
425
465
  type RunResult = RunResult$1;
426
466
  type ScheduleResult = ScheduleResult$3;
427
467
  type ScheduleSnapshot = ScheduleSnapshot$1;
468
+ type SmithersAlertLabels = SmithersAlertLabels$1;
469
+ type SmithersAlertPolicy = SmithersAlertPolicy$1;
470
+ type SmithersAlertPolicyDefaults = SmithersAlertPolicyDefaults$1;
471
+ type SmithersAlertPolicyRule = SmithersAlertPolicyRule$1;
472
+ type SmithersAlertReaction = SmithersAlertReaction$1;
473
+ type SmithersAlertReactionKind = SmithersAlertReactionKind$1;
474
+ type SmithersAlertReactionRef = SmithersAlertReactionRef$1;
475
+ type SmithersAlertSeverity = SmithersAlertSeverity$1;
428
476
  type SmithersWorkflowOptions = SmithersWorkflowOptions$1;
429
477
  type TaskFailure = TaskFailure$1;
430
478
  type TaskOutput = TaskOutput$1;
@@ -65,9 +65,10 @@ function mountedSignature(graph) {
65
65
  /**
66
66
  * @param {SessionState} state
67
67
  * @param {number} [iterationOverride]
68
+ * @param {RenderContext["trigger"]} [trigger]
68
69
  * @returns {RenderContext}
69
70
  */
70
- function renderContext(state, iterationOverride) {
71
+ function renderContext(state, iterationOverride, trigger) {
71
72
  const ralphIterations = [...state.ralphState.values()].map((value) => value.iteration);
72
73
  return {
73
74
  runId: state.runId,
@@ -77,6 +78,7 @@ function renderContext(state, iterationOverride) {
77
78
  taskStates: cloneTaskStateMap(state.states),
78
79
  outputs: new Map(state.outputs),
79
80
  ralphIterations: new Map([...state.ralphState.entries()].map(([id, value]) => [id, value.iteration])),
81
+ ...(trigger ? { trigger } : {}),
80
82
  };
81
83
  }
82
84
  /**
@@ -85,23 +87,48 @@ function renderContext(state, iterationOverride) {
85
87
  * @returns {WaitReason | undefined}
86
88
  */
87
89
  function findWaitingReason(state, currentTimeMs) {
90
+ // Do a full pass to accumulate quota count and find the highest-priority
91
+ // non-quota wait reason. This prevents an early-return from shadowing
92
+ // quota-blocked tasks when mixed wait types coexist in the same run.
93
+ let primaryReason;
94
+ let quotaBlockedCount = 0;
95
+ let earliestQuotaResetAtMs;
88
96
  for (const descriptor of state.descriptors.values()) {
89
97
  const taskState = state.states.get(stateKeyFor(descriptor));
90
- if (taskState === "waiting-approval") {
91
- return { _tag: "Approval", nodeId: descriptor.nodeId };
98
+ if (taskState === "waiting-approval" && !primaryReason) {
99
+ primaryReason = { _tag: "Approval", nodeId: descriptor.nodeId };
92
100
  }
93
- if (taskState === "waiting-event") {
101
+ else if (taskState === "waiting-event" && !primaryReason) {
94
102
  const eventName = typeof descriptor.meta?.__eventName === "string"
95
103
  ? descriptor.meta.__eventName
96
104
  : "";
97
- return { _tag: "Event", eventName };
105
+ primaryReason = { _tag: "Event", eventName };
98
106
  }
99
- if (taskState === "waiting-timer") {
100
- return {
107
+ else if (taskState === "waiting-timer" && !primaryReason) {
108
+ primaryReason = {
101
109
  _tag: "Timer",
102
110
  resumeAtMs: timerResumeAtMs(descriptor, currentTimeMs),
103
111
  };
104
112
  }
113
+ else if (taskState === "waiting-quota") {
114
+ quotaBlockedCount += 1;
115
+ const resetAtMs = state.quotaResetTimes.get(stateKeyFor(descriptor));
116
+ if (resetAtMs != null) {
117
+ earliestQuotaResetAtMs = earliestQuotaResetAtMs == null
118
+ ? resetAtMs
119
+ : Math.min(earliestQuotaResetAtMs, resetAtMs);
120
+ }
121
+ }
122
+ }
123
+ if (primaryReason) {
124
+ return primaryReason;
125
+ }
126
+ if (quotaBlockedCount > 0) {
127
+ return {
128
+ _tag: "Quota",
129
+ quotaBlockedCount,
130
+ ...(earliestQuotaResetAtMs != null ? { resetAtMs: earliestQuotaResetAtMs } : {}),
131
+ };
105
132
  }
106
133
  return undefined;
107
134
  }
@@ -179,6 +206,39 @@ function isRetryableFailure(descriptor, error) {
179
206
  }
180
207
  return true;
181
208
  }
209
+ /**
210
+ * @param {unknown} error
211
+ * @returns {boolean}
212
+ */
213
+ function isQuotaFailure(error) {
214
+ const payloadCode = error && typeof error === "object" && typeof error.code === "string"
215
+ ? error.code
216
+ : undefined;
217
+ const payloadDetails = error && typeof error === "object" && error.details && typeof error.details === "object"
218
+ ? error.details
219
+ : undefined;
220
+ const normalized = toSmithersError(error);
221
+ const code = payloadCode ?? normalized.code;
222
+ if (code === "AGENT_QUOTA_EXCEEDED")
223
+ return true;
224
+ const details = payloadDetails ?? normalized.details;
225
+ return Boolean(details && typeof details === "object" && details.failureQuota === true);
226
+ }
227
+ /**
228
+ * @param {unknown} error
229
+ * @returns {number | undefined}
230
+ */
231
+ function getQuotaResetAtMs(error) {
232
+ const payloadDetails = error && typeof error === "object" && error.details && typeof error.details === "object"
233
+ ? error.details
234
+ : undefined;
235
+ const normalized = toSmithersError(error);
236
+ const details = payloadDetails ?? normalized.details;
237
+ if (!details || typeof details !== "object")
238
+ return undefined;
239
+ const resetAtMs = details.quotaResetAtMs;
240
+ return typeof resetAtMs === "number" && Number.isFinite(resetAtMs) ? resetAtMs : undefined;
241
+ }
182
242
  /**
183
243
  * @param {unknown} error
184
244
  * @returns {boolean}
@@ -220,7 +280,11 @@ function describeDeadlock(state) {
220
280
  }
221
281
  else {
222
282
  const depState = state.states.get(stateKeyFor(dep)) ?? "pending";
223
- unmet.push(`'${depId}' (${depState})`);
283
+ if (depState !== "finished" &&
284
+ depState !== "skipped" &&
285
+ !(depState === "failed" && dep.continueOnFail)) {
286
+ unmet.push(`'${depId}' (${depState})`);
287
+ }
224
288
  }
225
289
  }
226
290
  if (unmet.length > 0) {
@@ -270,6 +334,8 @@ export function makeWorkflowSession(options = {}) {
270
334
  retryWait: new Map(),
271
335
  approvals: new Set(),
272
336
  ralphState: new Map(options.initialRalphState ?? []),
337
+ /** @type {Map<string, number>} Maps state key → quota reset timestamp (ms) */
338
+ quotaResetTimes: new Map(),
273
339
  schedule: null,
274
340
  cancelled: false,
275
341
  lastMountedSignature: null,
@@ -332,6 +398,7 @@ export function makeWorkflowSession(options = {}) {
332
398
  state.approvals.delete(key);
333
399
  state.retryCounts.delete(key);
334
400
  state.failureDescriptors.delete(key);
401
+ state.quotaResetTimes.delete(key);
335
402
  }
336
403
  }
337
404
  for (const ralph of ralphs) {
@@ -362,14 +429,16 @@ export function makeWorkflowSession(options = {}) {
362
429
  state.outputs.set(key, output);
363
430
  state.retryWait.delete(key);
364
431
  state.failureDescriptors.delete(key);
432
+ state.quotaResetTimes.delete(key);
365
433
  }
366
434
  /**
367
435
  * @param {number} [iteration]
436
+ * @param {RenderContext["trigger"]} [trigger]
368
437
  * @returns {EngineDecision}
369
438
  */
370
- function decideAfterOutputChange(iteration) {
439
+ function decideAfterOutputChange(iteration, trigger) {
371
440
  if (options.requireRerenderOnOutputChange) {
372
- return { _tag: "ReRender", context: renderContext(state, iteration) };
441
+ return { _tag: "ReRender", context: renderContext(state, iteration, trigger) };
373
442
  }
374
443
  return decide();
375
444
  }
@@ -400,12 +469,54 @@ export function makeWorkflowSession(options = {}) {
400
469
  }
401
470
  }
402
471
  /**
472
+ * @param {string} eventName
473
+ * @param {unknown} payload
474
+ * @param {string | null} correlationId
475
+ */
476
+ function applyEventReceived(eventName, payload, correlationId) {
477
+ for (const descriptor of state.descriptors.values()) {
478
+ const key = stateKeyFor(descriptor);
479
+ const taskState = state.states.get(key);
480
+ const expected = typeof descriptor.meta?.__eventName === "string"
481
+ ? descriptor.meta.__eventName
482
+ : undefined;
483
+ const expectedCorrelation = typeof descriptor.meta?.__correlationId === "string"
484
+ ? descriptor.meta.__correlationId
485
+ : undefined;
486
+ if (taskState === "waiting-event" &&
487
+ (!expected || expected === eventName) &&
488
+ (expectedCorrelation === undefined || expectedCorrelation === correlationId)) {
489
+ state.states.set(key, "finished");
490
+ state.outputs.set(key, {
491
+ nodeId: descriptor.nodeId,
492
+ iteration: descriptor.iteration,
493
+ output: payload,
494
+ });
495
+ }
496
+ }
497
+ }
498
+ /**
403
499
  * @param {TaskDescriptor} descriptor
404
500
  * @param {unknown} error
405
501
  * @returns {EngineDecision}
406
502
  */
407
503
  function applyFailure(descriptor, error) {
408
504
  const key = stateKeyFor(descriptor);
505
+ // Quota/usage-limit errors do not consume the task's retry budget.
506
+ // Instead, put the task into "waiting-quota" so the run can pause
507
+ // durably and resume cleanly after the provider resets.
508
+ if (isQuotaFailure(error)) {
509
+ state.states.set(key, "waiting-quota");
510
+ state.failures.set(key, error);
511
+ const resetAtMs = getQuotaResetAtMs(error);
512
+ if (resetAtMs != null) {
513
+ state.quotaResetTimes.set(key, resetAtMs);
514
+ }
515
+ else {
516
+ state.quotaResetTimes.delete(key);
517
+ }
518
+ return decide();
519
+ }
409
520
  const failureCount = (state.retryCounts.get(key) ?? 0) + 1;
410
521
  state.retryCounts.set(key, failureCount);
411
522
  const retryable = isRetryableFailure(descriptor, error);
@@ -425,7 +536,11 @@ export function makeWorkflowSession(options = {}) {
425
536
  state.states.set(key, "failed");
426
537
  state.failures.set(key, error);
427
538
  state.failureDescriptors.set(key, descriptor);
428
- return decide();
539
+ return decideAfterOutputChange(descriptor.iteration, {
540
+ reason: "task-finished",
541
+ nodeId: descriptor.nodeId,
542
+ iteration: descriptor.iteration,
543
+ });
429
544
  }
430
545
  /**
431
546
  * @returns {EngineDecision | null}
@@ -459,11 +574,15 @@ export function makeWorkflowSession(options = {}) {
459
574
  };
460
575
  }
461
576
  /**
577
+ * @param {number} [depth] recursion depth; guarded at 10 to catch decision cycles
462
578
  * @returns {EngineDecision}
463
579
  */
464
580
  function decide(depth = 0) {
465
581
  if (depth > 10) {
466
- return { _tag: "Wait", reason: { _tag: "ExternalTrigger" } };
582
+ return {
583
+ _tag: "Failed",
584
+ error: new SmithersError("SCHEDULER_ERROR", "Exceeded scheduler decide() depth guard.", { depth }),
585
+ };
467
586
  }
468
587
  if (state.cancelled) {
469
588
  return finishedResult("cancelled");
@@ -530,6 +649,28 @@ export function makeWorkflowSession(options = {}) {
530
649
  changed = true;
531
650
  continue;
532
651
  }
652
+ const budgetBreach = options.evaluateAspectBudget?.(task);
653
+ if (budgetBreach) {
654
+ if (budgetBreach.onExceeded === "skip-remaining") {
655
+ options.onAspectBudgetSkip?.(task, budgetBreach);
656
+ state.states.set(key, "skipped");
657
+ changed = true;
658
+ continue;
659
+ }
660
+ if (budgetBreach.onExceeded === "warn") {
661
+ options.onAspectBudgetWarn?.(task, budgetBreach);
662
+ }
663
+ else {
664
+ return {
665
+ _tag: "Failed",
666
+ error: new SmithersError("ASPECT_BUDGET_EXCEEDED", `Aspects ${budgetBreach.kind} budget exceeded for task "${task.nodeId}": ${budgetBreach.current} >= ${budgetBreach.limit}`, {
667
+ kind: budgetBreach.kind,
668
+ limit: budgetBreach.limit,
669
+ current: budgetBreach.current,
670
+ }),
671
+ };
672
+ }
673
+ }
533
674
  state.states.set(key, "in-progress");
534
675
  executable.push(task);
535
676
  changed = true;
@@ -601,7 +742,7 @@ export function makeWorkflowSession(options = {}) {
601
742
  advanced = true;
602
743
  }
603
744
  if (advanced) {
604
- return { _tag: "ReRender", context: renderContext(state) };
745
+ return { _tag: "ReRender", context: renderContext(state, undefined, { reason: "loop-advanced" }) };
605
746
  }
606
747
  }
607
748
  if (schedule.pendingExists) {
@@ -627,7 +768,7 @@ export function makeWorkflowSession(options = {}) {
627
768
  const signature = mountedSignature(state.graph);
628
769
  if (state.lastDeadlockSignature !== signature) {
629
770
  state.lastDeadlockSignature = signature;
630
- return { _tag: "ReRender", context: renderContext(state) };
771
+ return { _tag: "ReRender", context: renderContext(state, undefined, { reason: "deadlock-check" }) };
631
772
  }
632
773
  }
633
774
  return {
@@ -648,7 +789,7 @@ export function makeWorkflowSession(options = {}) {
648
789
  const signature = mountedSignature(state.graph);
649
790
  if (state.lastMountedSignature !== signature) {
650
791
  state.lastMountedSignature = signature;
651
- return { _tag: "ReRender", context: renderContext(state) };
792
+ return { _tag: "ReRender", context: renderContext(state, undefined, { reason: "stability-check" }) };
652
793
  }
653
794
  }
654
795
  return finishedResult();
@@ -664,17 +805,26 @@ export function makeWorkflowSession(options = {}) {
664
805
  }
665
806
  }),
666
807
  taskCompleted: (output) => Effect.sync(() => {
667
- const descriptor = findDescriptor(state, output.nodeId, output.iteration);
668
- if (!descriptor) {
669
- return failedDecision(new SmithersError("NODE_NOT_FOUND", `Unknown task ${output.nodeId}`), "taskCompleted");
670
- }
808
+ // A completion can legitimately arrive for a task that is no longer in the
809
+ // current graph: a conditionally-rendered task (e.g. `{done ? <Task pr/> : null}`)
810
+ // whose parent re-rendered it out while it was still running in the background.
811
+ // That result is stale, not fatal — record it (so it is available if the task
812
+ // re-mounts) and let the current graph drive the next decision. Failing here
813
+ // would discard every other in-flight task in the run.
671
814
  markTaskFinished(output);
672
- return decideAfterOutputChange(output.iteration);
815
+ return decideAfterOutputChange(output.iteration, {
816
+ reason: "task-finished",
817
+ nodeId: output.nodeId,
818
+ iteration: output.iteration,
819
+ });
673
820
  }),
674
821
  taskFailed: (failure) => Effect.sync(() => {
675
822
  const descriptor = findDescriptor(state, failure.nodeId, failure.iteration);
676
823
  if (!descriptor) {
677
- return failedDecision(new SmithersError("NODE_NOT_FOUND", `Unknown task ${failure.nodeId}`), "taskFailed");
824
+ // Stale failure for a task that already left the graph (see taskCompleted)
825
+ // the task is gone, so its failure is moot. Re-decide on the current graph
826
+ // rather than failing the whole run.
827
+ return decide();
678
828
  }
679
829
  return applyFailure(descriptor, failure.error);
680
830
  }),
@@ -705,51 +855,11 @@ export function makeWorkflowSession(options = {}) {
705
855
  return decide();
706
856
  }),
707
857
  eventReceived: (eventName, payload, correlationId = null) => Effect.sync(() => {
708
- for (const descriptor of state.descriptors.values()) {
709
- const key = stateKeyFor(descriptor);
710
- const taskState = state.states.get(key);
711
- const expected = typeof descriptor.meta?.__eventName === "string"
712
- ? descriptor.meta.__eventName
713
- : undefined;
714
- const expectedCorrelation = typeof descriptor.meta?.__correlationId === "string"
715
- ? descriptor.meta.__correlationId
716
- : undefined;
717
- if (taskState === "waiting-event" &&
718
- (!expected || expected === eventName) &&
719
- (expectedCorrelation === undefined || expectedCorrelation === correlationId)) {
720
- state.states.set(key, "finished");
721
- state.outputs.set(key, {
722
- nodeId: descriptor.nodeId,
723
- iteration: descriptor.iteration,
724
- output: payload,
725
- });
726
- }
727
- }
858
+ applyEventReceived(eventName, payload, correlationId);
728
859
  return decide();
729
860
  }),
730
861
  signalReceived: (signalName, payload, correlationId = null) => Effect.sync(() => {
731
- for (const descriptor of state.descriptors.values()) {
732
- const key = stateKeyFor(descriptor);
733
- const taskState = state.states.get(key);
734
- const expected = typeof descriptor.meta?.__signalName === "string"
735
- ? descriptor.meta.__signalName
736
- : typeof descriptor.meta?.__eventName === "string"
737
- ? descriptor.meta.__eventName
738
- : undefined;
739
- const expectedCorrelation = typeof descriptor.meta?.__correlationId === "string"
740
- ? descriptor.meta.__correlationId
741
- : undefined;
742
- if (taskState === "waiting-event" &&
743
- (!expected || expected === signalName) &&
744
- (expectedCorrelation === undefined || expectedCorrelation === correlationId)) {
745
- state.states.set(key, "finished");
746
- state.outputs.set(key, {
747
- nodeId: descriptor.nodeId,
748
- iteration: descriptor.iteration,
749
- output: payload,
750
- });
751
- }
752
- }
862
+ applyEventReceived(signalName, payload, correlationId);
753
863
  return decide();
754
864
  }),
755
865
  timerFired: (nodeId, firedAtMs = nowMs()) => Effect.sync(() => {
@@ -766,7 +876,11 @@ export function makeWorkflowSession(options = {}) {
766
876
  iteration: descriptor.iteration,
767
877
  output: { firedAtMs },
768
878
  });
769
- return decideAfterOutputChange(descriptor.iteration);
879
+ return decideAfterOutputChange(descriptor.iteration, {
880
+ reason: "timer-fired",
881
+ nodeId: descriptor.nodeId,
882
+ iteration: descriptor.iteration,
883
+ });
770
884
  }),
771
885
  hotReloaded: (graph) => Effect.sync(() => {
772
886
  try {
@@ -800,7 +914,11 @@ export function makeWorkflowSession(options = {}) {
800
914
  usage: output.usage ?? null,
801
915
  output: output.output,
802
916
  });
803
- return decideAfterOutputChange(output.iteration);
917
+ return decideAfterOutputChange(output.iteration, {
918
+ reason: "cache-resolved",
919
+ nodeId: output.nodeId,
920
+ iteration: output.iteration,
921
+ });
804
922
  }),
805
923
  cacheMissed: (nodeId, iteration) => Effect.sync(() => {
806
924
  const descriptor = findDescriptor(state, nodeId, iteration);
@@ -123,6 +123,7 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
123
123
  }
124
124
  /**
125
125
  * @param {PlanNode} node
126
+ * @param {{ includeContinuedFailures?: boolean }} [options]
126
127
  * @returns {{ readonly terminal: boolean; readonly failed: boolean }}
127
128
  */
128
129
  function inspect(node, options = {}) {