@purista/harness 1.2.6 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +6 -0
  2. package/dist/agents/index.d.ts +7 -1
  3. package/dist/agents/index.js +126 -44
  4. package/dist/errors/catalog.d.ts +18 -2
  5. package/dist/errors/catalog.js +10 -0
  6. package/dist/eval/index.d.ts +3 -3
  7. package/dist/eval/index.js +15 -1
  8. package/dist/harness/defineHarness.d.ts +149 -3
  9. package/dist/harness/defineHarness.js +110 -1
  10. package/dist/index.d.ts +38 -18
  11. package/dist/index.js +30 -16
  12. package/dist/local/index.d.ts +36 -0
  13. package/dist/local/index.js +24 -0
  14. package/dist/local/local-sandbox.d.ts +25 -0
  15. package/dist/local/local-sandbox.js +368 -0
  16. package/dist/local/local-workspace.d.ts +56 -0
  17. package/dist/local/local-workspace.js +496 -0
  18. package/dist/local/ref-hash.d.ts +6 -0
  19. package/dist/local/ref-hash.js +9 -0
  20. package/dist/local/sqlite-storage.d.ts +106 -0
  21. package/dist/local/sqlite-storage.js +680 -0
  22. package/dist/models/adapter-utils.d.ts +52 -0
  23. package/dist/models/adapter-utils.js +81 -0
  24. package/dist/models/registry.js +28 -37
  25. package/dist/models/stream-pump.d.ts +16 -0
  26. package/dist/models/stream-pump.js +77 -0
  27. package/dist/ports/base-model-provider.d.ts +7 -1
  28. package/dist/ports/base-model-provider.js +384 -87
  29. package/dist/ports/capabilities.d.ts +16 -2
  30. package/dist/ports/context-checkpoints.d.ts +63 -0
  31. package/dist/ports/context-checkpoints.js +33 -0
  32. package/dist/ports/index.d.ts +1 -0
  33. package/dist/ports/index.js +1 -0
  34. package/dist/ports/model-provider.d.ts +94 -0
  35. package/dist/runtime/durable.d.ts +11 -0
  36. package/dist/runtime/durable.js +15 -2
  37. package/dist/runtime/sessionDurable.js +47 -21
  38. package/dist/runtime/steps.d.ts +22 -1
  39. package/dist/runtime/steps.js +53 -2
  40. package/dist/sessions/index.d.ts +17 -6
  41. package/dist/sessions/index.js +345 -84
  42. package/dist/skills/index.d.ts +0 -2
  43. package/dist/skills/index.js +0 -8
  44. package/dist/state/in-memory.js +6 -6
  45. package/dist/telemetry/shim.js +2 -6
  46. package/dist/telemetry/span-attrs.d.ts +9 -0
  47. package/dist/telemetry/span-attrs.js +27 -0
  48. package/dist/testing/durableWorkspaceStoreContract.js +69 -0
  49. package/dist/testing/fakeLogger.d.ts +29 -0
  50. package/dist/testing/fakeLogger.js +47 -0
  51. package/dist/testing/fakeSandbox.d.ts +27 -0
  52. package/dist/testing/fakeSandbox.js +153 -0
  53. package/dist/testing/fakeStateStore.d.ts +36 -0
  54. package/dist/testing/fakeStateStore.js +66 -0
  55. package/dist/testing/index.d.ts +10 -4
  56. package/dist/testing/index.js +14 -4
  57. package/dist/testing/loggerContract.d.ts +9 -0
  58. package/dist/testing/loggerContract.js +62 -0
  59. package/dist/testing/modelProviderContract.d.ts +12 -0
  60. package/dist/testing/modelProviderContract.js +222 -0
  61. package/dist/testing/recordEvents.d.ts +3 -0
  62. package/dist/testing/recordEvents.js +8 -0
  63. package/dist/testing/stateStoreContract.js +27 -0
  64. package/dist/tools/index.js +26 -1
  65. package/dist/tools/mcp/http.d.ts +2 -0
  66. package/dist/tools/mcp/http.js +34 -21
  67. package/dist/tools/mcp/runner.d.ts +4 -0
  68. package/dist/tools/mcp/runner.js +75 -21
  69. package/dist/tools/mcp/stdio.d.ts +7 -1
  70. package/dist/tools/mcp/stdio.js +102 -23
  71. package/dist/version.d.ts +1 -1
  72. package/dist/version.js +1 -1
  73. package/dist/workspace/in-memory.d.ts +1 -0
  74. package/dist/workspace/in-memory.js +47 -12
  75. package/package.json +5 -4
@@ -0,0 +1,33 @@
1
+ import { HarnessConfigError } from '../errors/catalog.js';
2
+ const adapterIdPattern = /^[a-z][a-z0-9_.-]{1,63}$/;
3
+ /** Validates the context checkpoint adapter descriptor at harness build time. */
4
+ export function validateContextCheckpointStore(adapter) {
5
+ if (!adapterIdPattern.test(adapter.info.id)) {
6
+ throw new HarnessConfigError('Context checkpoint store id is invalid.', {
7
+ reason: 'invalid_context_checkpoint_store',
8
+ path: 'checkpoints.info.id',
9
+ id: adapter.info.id
10
+ });
11
+ }
12
+ if (!adapter.info.packageName.trim()) {
13
+ throw new HarnessConfigError('Context checkpoint store packageName is required.', {
14
+ reason: 'invalid_context_checkpoint_store',
15
+ path: 'checkpoints.info.packageName',
16
+ id: adapter.info.id
17
+ });
18
+ }
19
+ if (!adapter.info.capabilities.includes('context_checkpoint.write')) {
20
+ throw new HarnessConfigError('Context checkpoint store must support context_checkpoint.write.', {
21
+ reason: 'invalid_context_checkpoint_store',
22
+ path: 'checkpoints.info.capabilities',
23
+ id: adapter.info.id
24
+ });
25
+ }
26
+ if (adapter.info.capabilities.some((capability) => !adapter.capabilities.includes(capability))) {
27
+ throw new HarnessConfigError('Context checkpoint store capabilities must include info.capabilities.', {
28
+ reason: 'invalid_context_checkpoint_store',
29
+ path: 'checkpoints.capabilities',
30
+ id: adapter.info.id
31
+ });
32
+ }
33
+ }
@@ -6,3 +6,4 @@ export * from './capabilities.js';
6
6
  export * from './feedback.js';
7
7
  export * from './memory.js';
8
8
  export * from './workspace.js';
9
+ export * from './context-checkpoints.js';
@@ -6,3 +6,4 @@ export * from './capabilities.js';
6
6
  export * from './feedback.js';
7
7
  export * from './memory.js';
8
8
  export * from './workspace.js';
9
+ export * from './context-checkpoints.js';
@@ -23,6 +23,80 @@ export type ModelCapability =
23
23
  | 'embeddings'
24
24
  /** Document reranking. */
25
25
  | 'rerank';
26
+ /** Provider-neutral retry setting used by model aliases and per-call overrides. */
27
+ export type ModelRetrySetting = boolean | ModelRetryPolicy;
28
+ /** Transient failure classes that can be retried by the harness. */
29
+ export interface ModelRetryOnPolicy {
30
+ /** Retry transport-level/network failures. Default: `true`. */
31
+ network?: boolean;
32
+ /** Retry model call timeouts. Default: `true`. */
33
+ timeout?: boolean;
34
+ /** Retry HTTP 429/rate-limit failures. Default: `true`. */
35
+ rateLimit?: boolean;
36
+ /** Retry HTTP 5xx/provider-unavailable failures. Default: `true`. */
37
+ serverError?: boolean;
38
+ }
39
+ /**
40
+ * Provider-neutral retry policy.
41
+ *
42
+ * The harness actively retries only inside `maxActiveElapsedMs` and
43
+ * `maxActiveDelayMs`. Longer provider retry instructions fail fast with
44
+ * `retryKind: 'none'` by default; `longRetry: 'defer'` classifies them as
45
+ * deferred retry errors carrying the provider-supplied `retryAfterMs`.
46
+ */
47
+ export interface ModelRetryPolicy {
48
+ /** Total active attempts including the first call. Default: `3`. */
49
+ maxAttempts?: number;
50
+ /** Maximum wall-clock time spent in active retries. Default: `60_000`. */
51
+ maxActiveElapsedMs?: number;
52
+ /** Maximum single active sleep. Default: `20_000`. */
53
+ maxActiveDelayMs?: number;
54
+ /** Optional cap for deferred retry classification with `longRetry: 'defer'`. Default: unlimited. */
55
+ maxDeferredDelayMs?: number;
56
+ /** Honor provider Retry-After/reset headers when present. Default: `true`. */
57
+ respectRetryAfter?: boolean;
58
+ /** Base delay for exponential jitter when no provider delay exists. Default: `500`. */
59
+ minDelayMs?: number;
60
+ /** Maximum computed backoff delay. Default: `8_000`. */
61
+ maxDelayMs?: number;
62
+ /** Retryable failure classes. Omitted fields default to `true`. */
63
+ retryOn?: ModelRetryOnPolicy;
64
+ /**
65
+ * Handling for provider-instructed delays beyond `maxActiveDelayMs`:
66
+ * `'error'` fails immediately with `retryKind: 'none'`; `'defer'` fails with
67
+ * `retryKind: 'deferred'` plus the provider-supplied `retryAfterMs` so a
68
+ * queue/scheduler can retry later. Default: `'error'`.
69
+ */
70
+ longRetry?: 'error' | 'defer';
71
+ }
72
+ /** Normalized retry classification for provider failures. */
73
+ export type ModelRetryKind = 'none' | 'active' | 'deferred';
74
+ /** Structured provider outcome metadata preserved across adapters. */
75
+ export interface ModelOutcome {
76
+ /** Normalized finish reason. */
77
+ finishReason: FinishReason;
78
+ /** Raw provider finish/stop/status reason when available. */
79
+ providerFinishReason?: string;
80
+ /** Raw provider status when it carries outcome semantics. */
81
+ providerStatus?: string;
82
+ /** Whether the outcome is eligible for retry under policy. */
83
+ retryable?: boolean;
84
+ /** Active/deferred retry classification when relevant. */
85
+ retryKind?: ModelRetryKind;
86
+ /** Provider-suggested or computed retry delay. */
87
+ retryAfterMs?: number;
88
+ /** Parsed provider rate-limit metadata. */
89
+ rateLimit?: ModelRateLimitInfo;
90
+ /** Extra provider-specific structured outcome details. */
91
+ details?: Record<string, JsonValue>;
92
+ }
93
+ /** Parsed, provider-neutral rate-limit metadata. */
94
+ export interface ModelRateLimitInfo {
95
+ scope?: 'requests' | 'input_tokens' | 'output_tokens' | 'tokens' | 'unknown';
96
+ limit?: number;
97
+ remaining?: number;
98
+ resetAt?: string;
99
+ }
26
100
  /** Default generation parameters applied per alias. */
27
101
  export interface ModelDefaults {
28
102
  temperature?: number;
@@ -31,6 +105,8 @@ export interface ModelDefaults {
31
105
  stopSequences?: string[];
32
106
  /** Whether providers should allow the model to emit multiple independent tool calls in one turn. */
33
107
  parallelToolCalls?: boolean;
108
+ /** Alias-level retry behavior inherited by model calls. Default: `true`. */
109
+ retry?: ModelRetrySetting;
34
110
  providerOptions?: Record<string, unknown>;
35
111
  }
36
112
  /** Per-call generation overrides. */
@@ -41,6 +117,8 @@ export interface ModelCallOptions {
41
117
  stopSequences?: string[];
42
118
  /** Overrides whether providers should allow multiple tool calls in one model turn. */
43
119
  parallelToolCalls?: boolean;
120
+ /** Per-call retry override. Default: alias retry setting, then `true`. */
121
+ retry?: ModelRetrySetting;
44
122
  providerOptions?: Record<string, unknown>;
45
123
  }
46
124
  /** Tool call envelope emitted by model adapters. */
@@ -157,10 +235,20 @@ export type FinishReason =
157
235
  'stop'
158
236
  /** Token budget reached. */
159
237
  | 'length'
238
+ /** Context window reached before a valid answer could be produced. */
239
+ | 'context_limit'
160
240
  /** Model requested tool calls. */
161
241
  | 'tool_calls'
162
242
  /** Provider content filter interrupted generation. */
163
243
  | 'content_filter'
244
+ /** Provider/model refused the requested output. */
245
+ | 'refusal'
246
+ /** Provider asked the caller to resume/continue later. */
247
+ | 'pause'
248
+ /** Provider produced malformed output or malformed tool use. */
249
+ | 'malformed'
250
+ /** Cooperative cancellation interrupted generation. */
251
+ | 'cancelled'
164
252
  /** Provider or adapter error fallback. */
165
253
  | 'error';
166
254
  /** Tool declaration exposed to model adapters. */
@@ -180,6 +268,7 @@ export interface TextResponse {
180
268
  providerItems?: ProviderItems;
181
269
  usage: TokenUsage;
182
270
  finishReason: FinishReason;
271
+ outcome?: ModelOutcome;
183
272
  raw?: unknown;
184
273
  }
185
274
  /** Stream chunk from text-stream generation. */
@@ -193,6 +282,7 @@ export type TextStreamChunk = {
193
282
  kind: 'finish';
194
283
  usage: TokenUsage;
195
284
  finishReason: FinishReason;
285
+ outcome?: ModelOutcome;
196
286
  providerItems?: ProviderItems;
197
287
  };
198
288
  /** Request for object/object-stream model methods. */
@@ -208,6 +298,7 @@ export interface ObjectResponse<T extends JsonValue = JsonValue> {
208
298
  providerItems?: ProviderItems;
209
299
  usage: TokenUsage;
210
300
  finishReason: FinishReason;
301
+ outcome?: ModelOutcome;
211
302
  raw?: unknown;
212
303
  }
213
304
  /** Stream chunk from structured object streaming. */
@@ -226,6 +317,7 @@ export type ObjectStreamChunk<T extends JsonValue = JsonValue> = {
226
317
  object: T;
227
318
  usage: TokenUsage;
228
319
  finishReason: FinishReason;
320
+ outcome?: ModelOutcome;
229
321
  providerItems?: ProviderItems;
230
322
  };
231
323
  /** Request for embedding generation. */
@@ -296,5 +388,7 @@ export interface ModelAlias {
296
388
  model: string;
297
389
  capabilities: readonly ModelCapability[];
298
390
  defaults?: ModelDefaults;
391
+ /** Alias-level retry behavior. Default: `true`. */
392
+ retry?: ModelRetrySetting;
299
393
  providerOptions?: Record<string, unknown>;
300
394
  }
@@ -121,6 +121,17 @@ export declare class DurableRunLeaseError extends Error {
121
121
  }
122
122
  /** Returns true when a durable run status is terminal. */
123
123
  export declare function isTerminalRunStatus(status: DurableRunStatus): status is DurableTerminalRunStatus;
124
+ /**
125
+ * Returns true when a durable run status blocks resume. A `failed` run is
126
+ * terminal for reporting but stays resumable by a retry with the same run id
127
+ * (spec 22 §3); only `succeeded` and `cancelled` reject `startRun`.
128
+ */
129
+ export declare function isResumeBlockingRunStatus(status: DurableRunStatus): boolean;
130
+ /** Internal in-process FIFO mutex shared by durable runtime implementations. */
131
+ export declare class AsyncMutex {
132
+ private current;
133
+ lock<T>(fn: () => Promise<T>): Promise<T>;
134
+ }
124
135
  /**
125
136
  * Creates a self-contained in-memory durable runtime for tests and prototypes.
126
137
  *
@@ -16,7 +16,16 @@ export class DurableRunLeaseError extends Error {
16
16
  export function isTerminalRunStatus(status) {
17
17
  return status === 'succeeded' || status === 'failed' || status === 'cancelled';
18
18
  }
19
- class AsyncMutex {
19
+ /**
20
+ * Returns true when a durable run status blocks resume. A `failed` run is
21
+ * terminal for reporting but stays resumable by a retry with the same run id
22
+ * (spec 22 §3); only `succeeded` and `cancelled` reject `startRun`.
23
+ */
24
+ export function isResumeBlockingRunStatus(status) {
25
+ return status === 'succeeded' || status === 'cancelled';
26
+ }
27
+ /** Internal in-process FIFO mutex shared by durable runtime implementations. */
28
+ export class AsyncMutex {
20
29
  current = Promise.resolve();
21
30
  async lock(fn) {
22
31
  const prev = this.current;
@@ -54,7 +63,9 @@ class InMemoryDurableRuntime {
54
63
  async startRun(record) {
55
64
  return this.withSessionLock(record.sessionId, async () => {
56
65
  const current = this.runs.get(record.runId);
57
- if (current && isTerminalRunStatus(current.status)) {
66
+ // Only succeeded/cancelled block resume; a failed run is recorded
67
+ // terminal but stays resumable for a retry with the same run id.
68
+ if (current && isResumeBlockingRunStatus(current.status)) {
58
69
  throw new DurableTerminalRunError(record.runId, current.status);
59
70
  }
60
71
  this.assertNoConflictingLease(record);
@@ -66,6 +77,8 @@ class InMemoryDurableRuntime {
66
77
  };
67
78
  if (current) {
68
79
  state.attempt += 1;
80
+ state.status = 'running';
81
+ delete state.finished;
69
82
  }
70
83
  this.runs.set(record.runId, state);
71
84
  const lease = {
@@ -30,28 +30,47 @@ export async function beginDurableWorkflow(args) {
30
30
  });
31
31
  let handle;
32
32
  if (workspaceStore) {
33
- const priorReplay = lease.checkpoint?.replay;
34
- if (lease.resumed && priorReplay?.workspaceRef) {
35
- handle = await workspaceStore.resumeWorkspace({
36
- workspaceRef: priorReplay.workspaceRef,
37
- ...(priorReplay.checkpointRef ? { checkpointRef: priorReplay.checkpointRef } : {}),
38
- runId: lease.runId,
39
- sessionId,
40
- attempt: lease.attempt,
41
- idempotencyKey: `${lease.runId}:${lease.attempt}:resume`,
42
- signal
43
- });
33
+ try {
34
+ const priorReplay = lease.checkpoint?.replay;
35
+ if (lease.resumed && priorReplay?.workspaceRef) {
36
+ handle = await workspaceStore.resumeWorkspace({
37
+ workspaceRef: priorReplay.workspaceRef,
38
+ ...(priorReplay.checkpointRef ? { checkpointRef: priorReplay.checkpointRef } : {}),
39
+ runId: lease.runId,
40
+ sessionId,
41
+ attempt: lease.attempt,
42
+ idempotencyKey: `${lease.runId}:${lease.attempt}:resume`,
43
+ signal
44
+ });
45
+ }
46
+ else {
47
+ handle = await workspaceStore.startWorkspace({
48
+ runId: lease.runId,
49
+ sessionId,
50
+ workflowId,
51
+ workerId,
52
+ attempt: lease.attempt,
53
+ idempotencyKey: `${lease.runId}:start`,
54
+ signal
55
+ });
56
+ }
44
57
  }
45
- else {
46
- handle = await workspaceStore.startWorkspace({
47
- runId: lease.runId,
48
- sessionId,
49
- workflowId,
50
- workerId,
51
- attempt: lease.attempt,
52
- idempotencyKey: `${lease.runId}:start`,
53
- signal
54
- });
58
+ catch (workspaceError) {
59
+ // The caller never receives the binding when the workspace phase fails,
60
+ // so release the acquired lease here or it stays locked for the TTL.
61
+ try {
62
+ await lease.release();
63
+ }
64
+ catch (releaseError) {
65
+ logger.warn('Failed to release durable lease after workspace failure.', {
66
+ harness: harnessName,
67
+ session_id: sessionId,
68
+ run_id: lease.runId,
69
+ workflow_id: workflowId,
70
+ error: serializeError(releaseError)
71
+ });
72
+ }
73
+ throw workspaceError;
55
74
  }
56
75
  }
57
76
  const activeHandle = handle;
@@ -86,6 +105,12 @@ export async function beginDurableWorkflow(args) {
86
105
  const ctx = createDurableWorkflowContext(runtime, lease, onStepCommit ? { onStepCommit } : {});
87
106
  const autoCleanup = workspaceStore?.info.policy.retention?.cleanupMode === 'adapter_automatic';
88
107
  let settled = false;
108
+ // Stores that bind run sandboxes to active workspaces (localDirectoryWorkspaceStore)
109
+ // expose an unbind hook so the binding never outlives the durable run.
110
+ const releaseRunBinding = () => {
111
+ const candidate = workspaceStore;
112
+ candidate?.releaseRunBinding?.(lease.runId, sessionId);
113
+ };
89
114
  return {
90
115
  runId: lease.runId,
91
116
  attempt: lease.attempt,
@@ -116,6 +141,7 @@ export async function beginDurableWorkflow(args) {
116
141
  }
117
142
  },
118
143
  async dispose() {
144
+ releaseRunBinding();
119
145
  if (settled)
120
146
  return;
121
147
  try {
@@ -18,6 +18,26 @@ export interface DurableWorkflowContextOptions {
18
18
  */
19
19
  readonly onStepCommit?: (commit: DurableStepCommit) => Promise<DurableReplayCheckpoint | undefined>;
20
20
  }
21
+ /** Retry policy for a single explicit workflow step. */
22
+ export type DurableStepRetrySetting = boolean | DurableStepRetryPolicy;
23
+ /** Provider-neutral retry policy for `ctx.step(...)` boundaries. */
24
+ export interface DurableStepRetryPolicy {
25
+ /** Total attempts including the first call. Default: `3`. */
26
+ readonly maxAttempts?: number;
27
+ /** Base delay before retrying in milliseconds. Default: `100`. */
28
+ readonly minDelayMs?: number;
29
+ /** Maximum delay before retrying in milliseconds. Default: `1_000`. */
30
+ readonly maxDelayMs?: number;
31
+ /** Delay strategy. Default: `exponential`. */
32
+ readonly backoff?: 'fixed' | 'exponential';
33
+ /** Optional predicate to suppress retries for non-transient failures. */
34
+ readonly shouldRetry?: (error: unknown, attempt: number) => boolean | Promise<boolean>;
35
+ }
36
+ /** Per-call options for an explicit workflow step. */
37
+ export interface DurableStepOptions {
38
+ /** Retry failed step functions before a checkpoint is committed. Default: no retry. */
39
+ readonly retry?: DurableStepRetrySetting;
40
+ }
21
41
  /** Durable workflow context that exposes explicit checkpoint boundaries. */
22
42
  export interface DurableWorkflowContext {
23
43
  /** Current durable run lease. */
@@ -30,7 +50,7 @@ export interface DurableWorkflowContext {
30
50
  * const prepared = await ctx.step('prepare-inputs', async () => ({ ok: true }))
31
51
  * ```
32
52
  */
33
- step<T extends JsonValue>(stepId: string, fn: () => Promise<T>): Promise<T>;
53
+ step<T extends JsonValue>(stepId: string, fn: () => Promise<T>, options?: DurableStepOptions): Promise<T>;
34
54
  }
35
55
  /** Error thrown when a durable step id is invalid or duplicated. */
36
56
  export declare class DurableStepError extends Error {
@@ -38,3 +58,4 @@ export declare class DurableStepError extends Error {
38
58
  }
39
59
  /** Creates a durable workflow context bound to an acquired runtime lease. */
40
60
  export declare function createDurableWorkflowContext(runtime: DurableRuntime, lease: DurableRunLease, options?: DurableWorkflowContextOptions): DurableWorkflowContext;
61
+ export declare function runStepWithRetry<T>(fn: () => Promise<T>, retry: DurableStepRetrySetting | undefined): Promise<T>;
@@ -18,7 +18,7 @@ export function createDurableWorkflowContext(runtime, lease, options = {}) {
18
18
  let sequence = (lease.checkpoints ?? []).reduce((max, checkpoint) => Math.max(max, checkpoint.sequence), 0);
19
19
  return {
20
20
  lease,
21
- async step(stepId, fn) {
21
+ async step(stepId, fn, stepOptions = {}) {
22
22
  validateStepId(stepId);
23
23
  if (completed.has(stepId)) {
24
24
  throw new DurableStepError(`Duplicate durable step id "${stepId}".`);
@@ -29,7 +29,7 @@ export function createDurableWorkflowContext(runtime, lease, options = {}) {
29
29
  if (replay.has(stepId)) {
30
30
  return replay.get(stepId);
31
31
  }
32
- const output = await fn();
32
+ const output = await runStepWithRetry(fn, stepOptions.retry);
33
33
  assertJsonSerializable(output, stepId);
34
34
  sequence += 1;
35
35
  // Workspace state is written before the runtime checkpoint (spec 21 §10),
@@ -54,6 +54,57 @@ export function createDurableWorkflowContext(runtime, lease, options = {}) {
54
54
  }
55
55
  };
56
56
  }
57
+ export async function runStepWithRetry(fn, retry) {
58
+ const policy = normalizeRetryPolicy(retry);
59
+ let attempt = 0;
60
+ let lastError;
61
+ while (attempt < policy.maxAttempts) {
62
+ attempt += 1;
63
+ try {
64
+ return await fn();
65
+ }
66
+ catch (error) {
67
+ lastError = error;
68
+ if (attempt >= policy.maxAttempts)
69
+ break;
70
+ if (policy.shouldRetry && !await policy.shouldRetry(error, attempt))
71
+ break;
72
+ await sleep(retryDelayMs(policy, attempt));
73
+ }
74
+ }
75
+ throw lastError;
76
+ }
77
+ function normalizeRetryPolicy(retry) {
78
+ if (!retry) {
79
+ return { maxAttempts: 1, minDelayMs: 0, maxDelayMs: 0, backoff: 'fixed' };
80
+ }
81
+ if (retry === true) {
82
+ return { maxAttempts: 3, minDelayMs: 100, maxDelayMs: 1_000, backoff: 'exponential' };
83
+ }
84
+ return {
85
+ maxAttempts: clampPositiveInteger(retry.maxAttempts ?? 3),
86
+ minDelayMs: Math.max(0, retry.minDelayMs ?? 100),
87
+ maxDelayMs: Math.max(0, retry.maxDelayMs ?? 1_000),
88
+ backoff: retry.backoff ?? 'exponential',
89
+ ...(retry.shouldRetry ? { shouldRetry: retry.shouldRetry } : {})
90
+ };
91
+ }
92
+ function clampPositiveInteger(value) {
93
+ return Number.isFinite(value) && value > 0 ? Math.floor(value) : 1;
94
+ }
95
+ function retryDelayMs(policy, attempt) {
96
+ if (policy.maxDelayMs === 0)
97
+ return 0;
98
+ const base = policy.backoff === 'fixed'
99
+ ? policy.minDelayMs
100
+ : policy.minDelayMs * 2 ** Math.max(0, attempt - 1);
101
+ return Math.min(policy.maxDelayMs, base);
102
+ }
103
+ function sleep(ms) {
104
+ if (ms <= 0)
105
+ return Promise.resolve();
106
+ return new Promise((resolve) => setTimeout(resolve, ms));
107
+ }
57
108
  function validateStepId(stepId) {
58
109
  if (!STEP_ID_PATTERN.test(stepId)) {
59
110
  throw new DurableStepError(`Invalid durable step id "${stepId}".`);
@@ -3,6 +3,7 @@ import type { RunEvent, Harness, HarnessDefaults, BuilderState, TelemetryOptions
3
3
  import type { MemoryAdapter } from '../ports/memory.js';
4
4
  import type { DurableRuntimeAdapter, HarnessInspection } from '../ports/capabilities.js';
5
5
  import type { DurableWorkspaceStore } from '../ports/workspace.js';
6
+ import type { ContextCheckpointStore } from '../ports/context-checkpoints.js';
6
7
  import type { Sandbox } from '../sandbox/index.js';
7
8
  import type { StateStore } from '../ports/state.js';
8
9
  import { type TelemetryShim } from '../telemetry/index.js';
@@ -16,6 +17,7 @@ type HarnessDefinition<S extends BuilderState> = {
16
17
  memory: MemoryAdapter;
17
18
  runtime?: DurableRuntimeAdapter;
18
19
  workspaceStore?: DurableWorkspaceStore;
20
+ checkpoints?: ContextCheckpointStore;
19
21
  defaults: HarnessDefaults;
20
22
  models: NonNullable<S['models']>;
21
23
  tools: NonNullable<S['tools']>;
@@ -27,12 +29,21 @@ type HarnessDefinition<S extends BuilderState> = {
27
29
  /**
28
30
  * Relay run events from an in-process run to a stream consumer.
29
31
  *
30
- * The unread events live in a bounded queue: consumed events are removed (no
31
- * growing cursor over a shared array), and on overflow the oldest non-terminal
32
- * unread event is dropped and counted, so a slow consumer never silently skips
33
- * an unread event. Delivery is promise-notified rather than time-polled, so
34
- * there is no fixed per-event latency or periodic timer.
32
+ * The unread events live in a bounded queue (cap: STREAM_MAX_BUFFERED_EVENTS):
33
+ * consumed events are removed (no growing cursor over a shared array), and on
34
+ * overflow the oldest droppable unread event is dropped and counted, so a slow
35
+ * consumer never silently skips an event without an accompanying
36
+ * `stream.overflow` notice. Only `run.finished` is undroppable; all other
37
+ * event types — including `agent.finished` — may be evicted under pressure.
38
+ * If no droppable event exists when the queue is full, the incoming event is
39
+ * discarded (counted) rather than growing the queue past the cap. Delivery is
40
+ * promise-notified rather than time-polled, so there is no fixed per-event
41
+ * latency or periodic timer.
42
+ *
43
+ * Abandoning the stream (`break` / `iterator.return()`) only detaches that
44
+ * consumer. It does not abort `relaySignal`; callers must pass `opts.signal`
45
+ * when they intend to cancel the underlying run.
35
46
  */
36
- export declare function relayRunEvents(run: (onEvent: (event: RunEvent) => Promise<void>) => Promise<unknown>): AsyncIterable<RunEvent>;
47
+ export declare function relayRunEvents(run: (onEvent: (event: RunEvent) => Promise<void>, relaySignal: AbortSignal) => Promise<unknown>): AsyncIterable<RunEvent>;
37
48
  export declare function createSessionHarness<S extends BuilderState>(definition: HarnessDefinition<S>): Harness<S>;
38
49
  export {};