npm - @purista/harness - Versions diffs - 1.2.1 → 1.2.2 - Mend

@purista/harness 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/dist/agents/index.d.ts +1 -0
package/dist/agents/index.js +276 -141
package/dist/errors/catalog.d.ts +4 -3
package/dist/harness/defineHarness.d.ts +26 -2
package/dist/harness/defineHarness.js +51 -2
package/dist/index.d.ts +1 -1
package/dist/memory/sandbox/index.js +7 -1
package/dist/models/registry.js +45 -3
package/dist/ports/base-model-provider.js +2 -0
package/dist/ports/capabilities.d.ts +2 -0
package/dist/ports/harness-context.d.ts +1 -0
package/dist/ports/model-provider.d.ts +4 -0
package/dist/ports/state.d.ts +6 -0
package/dist/runtime/abort.d.ts +5 -0
package/dist/runtime/abort.js +33 -0
package/dist/runtime/durable.d.ts +2 -0
package/dist/runtime/durable.js +6 -2
package/dist/runtime/sessionDurable.d.ts +49 -0
package/dist/runtime/sessionDurable.js +135 -0
package/dist/runtime/steps.d.ts +19 -1
package/dist/runtime/steps.js +21 -3
package/dist/sandbox/index.d.ts +34 -0
package/dist/sandbox/index.js +40 -3
package/dist/sessions/index.d.ts +15 -2
package/dist/sessions/index.js +212 -99
package/dist/skills/index.js +19 -6
package/dist/state/in-memory.d.ts +1 -0
package/dist/state/in-memory.js +15 -0
package/dist/telemetry/shim.js +9 -4
package/dist/testing/durableWorkspaceStoreContract.d.ts +1 -1
package/dist/testing/durableWorkspaceStoreContract.js +64 -28
package/dist/tools/index.d.ts +2 -0
package/dist/tools/index.js +15 -1
package/dist/tools/mcp/runner.js +11 -6
package/dist/tools/mcp/stdio.js +170 -1
package/dist/ulid/index.d.ts +6 -1
package/dist/ulid/index.js +31 -13
package/dist/version.d.ts +2 -0
package/dist/version.js +2 -0
package/dist/workflows/index.js +7 -1
package/dist/workspace/in-memory.d.ts +9 -10
package/dist/workspace/in-memory.js +191 -48
package/package.json +1 -1
package/dist/harness/errors.d.ts +0 -62
package/dist/harness/errors.js +0 -67

package/dist/harness/defineHarness.js CHANGED Viewed

@@ -4,12 +4,13 @@ import { sandboxMemory } from '../memory/sandbox/index.js';
 import { validateMemoryAdapter } from '../ports/memory.js';
 import { validateDurableWorkspaceStore } from '../ports/workspace.js';
 import { InMemoryStateStore } from '../state/in-memory.js';
-import { HarnessConfigError } from '../errors/catalog.js';
+import { HarnessConfigError, SkillManifestError } from '../errors/catalog.js';
+import { BUILTIN_TOOL_NAMES } from '../tools/index.js';
 import { autoDetectSandbox } from '../sandbox/index.js';
 import { createSessionHarness } from '../sessions/index.js';
 import { hasAdapterCapabilities, missingCapabilities, uniqueCapabilities } from '../ports/capabilities.js';
 /** Stable harness version string for diagnostics and generated documentation. */
-export const HARNESS_VERSION = '0.0.0';
+export { HARNESS_VERSION } from '../version.js';
 class Builder {
     options;
     configured;
@@ -53,6 +54,9 @@ class Builder {
         if (defaults.historyWindow !== undefined && defaults.historyWindow < 0) {
             throw new HarnessConfigError('historyWindow must be >= 0', { reason: 'invalid_defaults', path: 'defaults.historyWindow' });
         }
+        if (defaults.maxParallelToolCalls !== undefined && (!Number.isInteger(defaults.maxParallelToolCalls) || defaults.maxParallelToolCalls < 1)) {
+            throw new HarnessConfigError('maxParallelToolCalls must be a positive integer', { reason: 'invalid_defaults', path: 'defaults.maxParallelToolCalls' });
+        }
         return this.clone({ defaults });
     }
     models(models) {
@@ -62,6 +66,11 @@ class Builder {
         return this.clone({ models });
     }
     tools(tools) {
+        for (const id of Object.keys(tools)) {
+            if (!/^[a-z][a-z0-9_]*$/.test(id) || id.length > 64) {
+                throw new HarnessConfigError('Invalid tool id. Tool ids must match /^[a-z][a-z0-9_]*$/ and be at most 64 characters.', { reason: 'invalid_tool_id', path: `tools.${id}`, id });
+            }
+        }
         return this.clone({ tools });
     }
     skills(skills) {
@@ -85,6 +94,7 @@ class Builder {
         if (!models || Object.keys(models).length === 0) {
             throw new HarnessConfigError('At least one model alias is required.', { reason: 'missing_models', path: 'models' });
         }
+        this.validateToolSkillNamespace();
         const sandbox = this.configured.sandbox ?? autoDetectSandbox();
         const memory = this.configured.memory ?? sandboxMemory();
         validateMemoryAdapter(memory);
@@ -106,12 +116,15 @@ class Builder {
             state: this.configured.state ?? new InMemoryStateStore(),
             sandbox,
             memory,
+            ...(this.configured.runtime ? { runtime: this.configured.runtime } : {}),
+            ...(this.configured.workspaceStore ? { workspaceStore: this.configured.workspaceStore } : {}),
             defaults: {
                 agentMaxIterations: this.configured.defaults?.agentMaxIterations ?? 16,
                 runTimeoutMs: this.configured.defaults?.runTimeoutMs ?? 600_000,
                 toolTimeoutMs: this.configured.defaults?.toolTimeoutMs ?? 120_000,
                 skillTimeoutMs: this.configured.defaults?.skillTimeoutMs ?? 60_000,
                 modelTimeoutMs: this.configured.defaults?.modelTimeoutMs ?? 300_000,
+                maxParallelToolCalls: this.configured.defaults?.maxParallelToolCalls ?? 8,
                 ...(this.configured.defaults?.historyWindow !== undefined ? { historyWindow: this.configured.defaults.historyWindow } : {})
             },
             models,
@@ -126,6 +139,42 @@ class Builder {
     clone(patch) {
         return new Builder(this.options, { ...this.configured, ...patch });
     }
+    /**
+     * Tool ids, skill ids, and built-in tool names share one model-facing
+     * namespace (spec 08 §6). A custom tool id must not collide with a built-in
+     * tool name or a skill id, and a skill id must not collide with a built-in
+     * tool name.
+     */
+    validateToolSkillNamespace() {
+        const toolIds = Object.keys(this.configured.tools ?? {});
+        const skillIds = new Set(Object.keys(this.configured.skills ?? {}));
+        const builtinNames = new Set(BUILTIN_TOOL_NAMES);
+        for (const id of toolIds) {
+            if (builtinNames.has(id)) {
+                throw new SkillManifestError(`Custom tool id "${id}" collides with a built-in tool name.`, {
+                    reason: 'reserved_name',
+                    skill_id: id,
+                    source: 'tool'
+                });
+            }
+            if (skillIds.has(id)) {
+                throw new SkillManifestError(`Custom tool id "${id}" collides with a skill id.`, {
+                    reason: 'reserved_name',
+                    skill_id: id,
+                    source: 'tool'
+                });
+            }
+        }
+        for (const id of skillIds) {
+            if (builtinNames.has(id)) {
+                throw new SkillManifestError(`Skill id "${id}" collides with a built-in tool name.`, {
+                    reason: 'reserved_name',
+                    skill_id: id,
+                    source: 'skill'
+                });
+            }
+        }
+    }
     validateAgentSkillReferences(agents) {
         const configuredSkills = new Set(Object.keys(this.configured.skills ?? {}));
         for (const [agentId, agent] of Object.entries(agents)) {

package/dist/index.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@ export * from './telemetry/index.js';
 export * from './ulid/index.js';
 export * from './ports/index.js';
 export { createDurableWorkflowContext, DurableStepError, DurableRunLeaseError, DurableTerminalRunError, inMemoryDurableRuntime, isTerminalRunStatus } from './runtime/index.js';
-export type { DurableActiveRunStatus, DurableWorkflowContext, DurableRunLease, DurableRunStart, DurableRunStatus, DurableRuntime, DurableTerminalRunStatus, FinishRunPatch, InMemoryDurableRuntimeOptions, RunCheckpoint } from './runtime/index.js';
+export type { DurableActiveRunStatus, DurableWorkflowContext, DurableWorkflowContextOptions, DurableStepCommit, DurableRunLease, DurableRunStart, DurableRunStatus, DurableRuntime, DurableTerminalRunStatus, FinishRunPatch, InMemoryDurableRuntimeOptions, RunCheckpoint } from './runtime/index.js';
 export * from './state/in-memory.js';
 export * from './models/json.js';
 export type { SessionRecord, Message, RunRecord, PersistedRunEvent, RunStatus } from './models/state.js';

package/dist/memory/sandbox/index.js CHANGED Viewed

@@ -45,7 +45,13 @@ class SandboxMemoryAdapter {
                 const path = `${root}/${key}.json`;
                 if (!(await sandbox.exists(path)))
                     return undefined;
-                return JSON.parse(await sandbox.readText(path));
+                const raw = await sandbox.readText(path);
+                try {
+                    return JSON.parse(raw);
+                }
+                catch (error) {
+                    throw new StateError('Stored memory value is not valid JSON.', { op: 'memory.get', reason: 'corrupt_value' }, error);
+                }
             },
             set: async (key, value, op) => {
                 op.signal.throwIfAborted();

package/dist/models/registry.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ModelCapabilityError } from '../errors/index.js';
+import { ModelCapabilityError, ModelError } from '../errors/index.js';
 import { ATTR_GEN_AI_REQUEST_MODEL, ATTR_GEN_AI_RESPONSE_FINISH_REASONS, ATTR_GEN_AI_SYSTEM, ATTR_GEN_AI_TOKEN_TYPE, ATTR_GEN_AI_USAGE_INPUT_TOKENS, ATTR_GEN_AI_USAGE_OUTPUT_TOKENS, GEN_AI_TOKEN_TYPE_VALUE_INPUT, GEN_AI_TOKEN_TYPE_VALUE_OUTPUT } from '@opentelemetry/semantic-conventions/incubating';
 /**
  * Creates per-alias model handles that enforce capability gates before provider invocation.
@@ -92,7 +92,7 @@ function createHandle(aliasKey, alias, options) {
                 signal,
                 traceparent: req.traceparent ?? options.telemetry?.currentTraceparent()
             };
-            return withModelSpan(options, aliasKey, alias, 'embeddings', ctx, () => alias.provider.embed(fullReq));
+            return withModelSpan(options, aliasKey, alias, 'embeddings', ctx, () => alias.provider.embed(fullReq)).then((response) => validateEmbeddingResponse(aliasKey, alias, fullReq, response));
         },
         rerank(req, signal, ctx) {
             ensureCapabilities(aliasKey, alias, 'rerank', req);
@@ -107,10 +107,51 @@ function createHandle(aliasKey, alias, options) {
                 signal,
                 traceparent: req.traceparent ?? options.telemetry?.currentTraceparent()
             };
-            return withModelSpan(options, aliasKey, alias, 'rerank', ctx, () => alias.provider.rerank(fullReq));
+            return withModelSpan(options, aliasKey, alias, 'rerank', ctx, () => alias.provider.rerank(fullReq)).then((response) => validateRerankResponse(aliasKey, alias, fullReq, response));
         }
     };
 }
+/**
+ * Provider-neutral guard: the number of embeddings must match the number of
+ * inputs, and indices must cover every input exactly once. Protects callers
+ * that associate vectors with inputs by position.
+ */
+function validateEmbeddingResponse(aliasKey, alias, req, response) {
+    const expected = Array.isArray(req.input) ? req.input.length : 1;
+    const indices = new Set(response.embeddings.map((item) => item.index));
+    const validIndices = response.embeddings.every((item) => Number.isInteger(item.index) && item.index >= 0 && item.index < expected);
+    if (response.embeddings.length !== expected || indices.size !== expected || !validIndices) {
+        throw new ModelError('Embedding response does not match the request input count.', {
+            provider: alias.provider.id,
+            model: alias.model,
+            method: 'embed',
+            reason: 'embedding_count_mismatch',
+            providerBody: { expected, received: response.embeddings.length, alias: aliasKey }
+        });
+    }
+    return response;
+}
+/**
+ * Provider-neutral guard: every rerank result must reference a distinct, valid
+ * document index, and the count must not exceed the requested document count
+ * (or `topN` when supplied).
+ */
+function validateRerankResponse(aliasKey, alias, req, response) {
+    const documentCount = req.documents.length;
+    const limit = req.topN !== undefined ? Math.min(req.topN, documentCount) : documentCount;
+    const indices = new Set(response.results.map((item) => item.index));
+    const validIndices = response.results.every((item) => Number.isInteger(item.index) && item.index >= 0 && item.index < documentCount);
+    if (response.results.length > limit || indices.size !== response.results.length || !validIndices) {
+        throw new ModelError('Rerank response does not map back to the request documents.', {
+            provider: alias.provider.id,
+            model: alias.model,
+            method: 'rerank',
+            reason: 'rerank_result_mismatch',
+            providerBody: { documentCount, limit, received: response.results.length, alias: aliasKey }
+        });
+    }
+    return response;
+}
 function withModelStreamSpan(options, aliasKey, alias, method, ctx, fn) {
     if (!options.telemetry)
         return fn();
@@ -306,6 +347,7 @@ function mergeDefaults(alias, call) {
         || merged.maxTokens !== undefined
         || merged.topP !== undefined
         || merged.stopSequences !== undefined
+        || merged.parallelToolCalls !== undefined
         || Object.keys(merged.providerOptions ?? {}).length > 0;
     return hasTopLevel ? merged : undefined;
 }

package/dist/ports/base-model-provider.js CHANGED Viewed

@@ -161,6 +161,8 @@ export class BaseModelProvider {
         const controller = new AbortController();
         const relay = () => controller.abort(req.signal.reason);
         req.signal.addEventListener('abort', relay, { once: true });
+        if (req.signal.aborted)
+            relay();
         let rejectTimeout;
         const timeoutPromise = new Promise((_, reject) => { rejectTimeout = reject; });
         const timeout = setTimeout(() => {

package/dist/ports/capabilities.d.ts CHANGED Viewed

@@ -17,6 +17,8 @@ export type AdapterCapability =
  | 'sandbox.resume'
 /** Sandbox can snapshot and release active compute. */
  | 'sandbox.hibernate'
+/** Sandbox can host a long-lived process with streaming stdin/stdout. */
+ | 'sandbox.spawn'
 /** Runtime can commit stable checkpoints. */
  | 'runtime.checkpoint'
 /** Runtime can retry durable boundaries. */

package/dist/ports/harness-context.d.ts CHANGED Viewed

@@ -14,6 +14,7 @@ export interface HarnessAdapterContext {
         toolTimeoutMs: number;
         skillTimeoutMs: number;
         modelTimeoutMs: number;
+        maxParallelToolCalls: number;
         historyWindow?: number;
     };
 }

package/dist/ports/model-provider.d.ts CHANGED Viewed

@@ -29,6 +29,8 @@ export interface ModelDefaults {
     maxTokens?: number;
     topP?: number;
     stopSequences?: string[];
+    /** Whether providers should allow the model to emit multiple independent tool calls in one turn. */
+    parallelToolCalls?: boolean;
     providerOptions?: Record<string, unknown>;
 }
 /** Per-call generation overrides. */
@@ -37,6 +39,8 @@ export interface ModelCallOptions {
     maxTokens?: number;
     topP?: number;
     stopSequences?: string[];
+    /** Overrides whether providers should allow multiple tool calls in one model turn. */
+    parallelToolCalls?: boolean;
     providerOptions?: Record<string, unknown>;
 }
 /** Tool call envelope emitted by model adapters. */

package/dist/ports/state.d.ts CHANGED Viewed

@@ -20,6 +20,12 @@ export interface StateStore {
         before?: string;
     }): Promise<Message[]>;
     clearMessages(sessionId: string): Promise<void>;
+    /**
+     * Atomically replace all messages for a session under one lock (clear +
+     * append). Adapters that implement this provide the spec-mandated atomic
+     * `replaceHistory`; the session layer falls back to clear+append when absent.
+     */
+    replaceMessages?(sessionId: string, messages: Message[]): Promise<void>;
     createRun(record: RunRecord): Promise<void>;
     finishRun(runId: string, patch: FinishRunPatch): Promise<void>;
     getRun(runId: string): Promise<RunRecord | undefined>;

package/dist/runtime/abort.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import { OperationCancelledError, OperationTimeoutError } from '../errors/index.js';
+type AbortScope = 'run' | 'model' | 'tool' | 'workflow' | 'agent' | 'sandbox' | 'memory' | 'workspace';
+export declare function abortError(signal: AbortSignal, scope: AbortScope, message: string): OperationCancelledError | OperationTimeoutError;
+export declare function withAbortSignal<T>(signal: AbortSignal, scope: AbortScope, message: string, fn: () => Promise<T>): Promise<T>;
+export {};

package/dist/runtime/abort.js ADDED Viewed

@@ -0,0 +1,33 @@
+import { OperationCancelledError, OperationTimeoutError } from '../errors/index.js';
+export function abortError(signal, scope, message) {
+    if (signal.reason instanceof OperationTimeoutError)
+        return signal.reason;
+    if (signal.reason instanceof OperationCancelledError)
+        return signal.reason;
+    return new OperationCancelledError(message, { scope }, signal.reason);
+}
+export async function withAbortSignal(signal, scope, message, fn) {
+    if (signal.aborted)
+        throw abortError(signal, scope, message);
+    let abortListener;
+    const abortPromise = new Promise((_, reject) => {
+        abortListener = () => reject(abortError(signal, scope, message));
+        signal.addEventListener('abort', abortListener, { once: true });
+        if (signal.aborted)
+            abortListener();
+    });
+    try {
+        return await Promise.race([fn(), abortPromise]);
+    }
+    catch (error) {
+        if (error instanceof OperationCancelledError || error instanceof OperationTimeoutError)
+            throw error;
+        if (signal.aborted)
+            throw abortError(signal, scope, message);
+        throw error;
+    }
+    finally {
+        if (abortListener)
+            signal.removeEventListener('abort', abortListener);
+    }
+}

package/dist/runtime/durable.d.ts CHANGED Viewed

@@ -45,6 +45,8 @@ export interface DurableRunLease {
     };
     /** Last committed checkpoint, if any. */
     readonly checkpoint?: RunCheckpoint;
+    /** All committed checkpoints for this run, in commit order, for step replay. */
+    readonly checkpoints?: readonly RunCheckpoint[];
     /** Releases this in-memory lease without making the run terminal. */
     release(): Promise<void>;
 }

package/dist/runtime/durable.js CHANGED Viewed

@@ -61,7 +61,8 @@ class InMemoryDurableRuntime {
             const state = current ?? {
                 start: record,
                 status: 'running',
-                attempt: Math.max(1, record.attempt ?? 1)
+                attempt: Math.max(1, record.attempt ?? 1),
+                checkpoints: new Map()
             };
             if (current) {
                 state.attempt += 1;
@@ -95,7 +96,9 @@ class InMemoryDurableRuntime {
                 throw new DurableTerminalRunError(checkpoint.runId, state.status);
             }
             const committedAt = checkpoint.committedAt ?? new Date().toISOString();
-            state.checkpoint = { ...checkpoint, committedAt };
+            const stored = { ...checkpoint, committedAt };
+            state.checkpoint = stored;
+            state.checkpoints.set(stored.stepId, stored);
             this.checkpointCommitCount += 1;
             if (this.options.failAfterCheckpoint === this.checkpointCommitCount) {
                 this.releaseLease(lease);
@@ -148,6 +151,7 @@ class InMemoryDurableRuntime {
                 attempt: state.attempt
             },
             ...(state.checkpoint ? { checkpoint: state.checkpoint } : {}),
+            checkpoints: [...state.checkpoints.values()].sort((a, b) => a.sequence - b.sequence),
             release: async () => {
                 await this.withSessionLock(lease.sessionId, async () => {
                     this.releaseLease(lease);

package/dist/runtime/sessionDurable.d.ts ADDED Viewed

@@ -0,0 +1,49 @@
+import type { Logger } from '../logger/index.js';
+import type { JsonValue } from '../models/json.js';
+import type { DurableWorkspaceStore } from '../ports/workspace.js';
+import type { DurableRuntime } from './durable.js';
+import { type DurableWorkflowContext } from './steps.js';
+/** Run-id format accepted for durable invocations. */
+export declare const DURABLE_RUN_ID_PATTERN: RegExp;
+/** Caller-supplied durable invocation options (mirror of `InvokeOptions.durable`). */
+export interface DurableInvokeOptions {
+    runId: string;
+    workerId?: string;
+    stepId?: string;
+    attempt?: number;
+}
+/** Durable binding driving one workflow run's lease and workspace lifecycle. */
+export interface DurableWorkflowBinding {
+    readonly runId: string;
+    readonly attempt: number;
+    readonly resumed: boolean;
+    readonly step: DurableWorkflowContext['step'];
+    /** Marks the run successfully terminal and, when policy permits, cleans up the workspace. */
+    finishSuccess(output: JsonValue): Promise<void>;
+    /** Marks the run cancelled-terminal and aborts the workspace (blocks resume). */
+    finishCancelled(error: unknown): Promise<void>;
+    /**
+     * Releases the lease without making the run terminal when it was not settled,
+     * leaving a failed run resumable by a later retry with the same run id.
+     */
+    dispose(): Promise<void>;
+}
+/** Narrows a configured runtime adapter to an executable durable runtime. */
+export declare function isExecutableDurableRuntime(runtime: unknown): runtime is DurableRuntime;
+/**
+ * Acquires a durable runtime lease for a workflow run and, when a workspace
+ * store is configured, starts or resumes the durable workspace and links each
+ * new step checkpoint to a workspace checkpoint (spec 21 §16.1).
+ */
+export declare function beginDurableWorkflow(args: {
+    runtime: DurableRuntime;
+    workspaceStore?: DurableWorkspaceStore;
+    durable: DurableInvokeOptions;
+    defaultWorkerId: string;
+    sessionId: string;
+    workflowId: string;
+    input: JsonValue;
+    signal: AbortSignal;
+    logger: Logger;
+    harnessName: string;
+}): Promise<DurableWorkflowBinding>;

package/dist/runtime/sessionDurable.js ADDED Viewed

@@ -0,0 +1,135 @@
+import { serializeError } from '../errors/index.js';
+import { createDurableWorkflowContext } from './steps.js';
+/** Run-id format accepted for durable invocations. */
+export const DURABLE_RUN_ID_PATTERN = /^[A-Za-z0-9_.:-]{1,200}$/;
+/** Narrows a configured runtime adapter to an executable durable runtime. */
+export function isExecutableDurableRuntime(runtime) {
+    if (!runtime || typeof runtime !== 'object')
+        return false;
+    const candidate = runtime;
+    return typeof candidate.startRun === 'function'
+        && typeof candidate.commitCheckpoint === 'function'
+        && typeof candidate.finishRun === 'function'
+        && typeof candidate.withSessionLock === 'function';
+}
+/**
+ * Acquires a durable runtime lease for a workflow run and, when a workspace
+ * store is configured, starts or resumes the durable workspace and links each
+ * new step checkpoint to a workspace checkpoint (spec 21 §16.1).
+ */
+export async function beginDurableWorkflow(args) {
+    const { runtime, workspaceStore, durable, sessionId, workflowId, input, signal, logger, harnessName } = args;
+    const workerId = durable.workerId ?? args.defaultWorkerId;
+    const lease = await runtime.startRun({
+        runId: durable.runId,
+        sessionId,
+        workerId,
+        stepId: durable.stepId ?? workflowId,
+        input,
+        ...(durable.attempt !== undefined ? { attempt: durable.attempt } : {})
+    });
+    let handle;
+    if (workspaceStore) {
+        const priorReplay = lease.checkpoint?.replay;
+        if (lease.resumed && priorReplay?.workspaceRef) {
+            handle = await workspaceStore.resumeWorkspace({
+                workspaceRef: priorReplay.workspaceRef,
+                ...(priorReplay.checkpointRef ? { checkpointRef: priorReplay.checkpointRef } : {}),
+                runId: lease.runId,
+                sessionId,
+                attempt: lease.attempt,
+                idempotencyKey: `${lease.runId}:${lease.attempt}:resume`,
+                signal
+            });
+        }
+        else {
+            handle = await workspaceStore.startWorkspace({
+                runId: lease.runId,
+                sessionId,
+                workflowId,
+                workerId,
+                attempt: lease.attempt,
+                idempotencyKey: `${lease.runId}:start`,
+                signal
+            });
+        }
+    }
+    const activeHandle = handle;
+    const onStepCommit = workspaceStore && activeHandle
+        ? async (commit) => {
+            const checkpoint = await workspaceStore.pauseWorkspace({
+                handle: activeHandle,
+                stepId: commit.stepId,
+                sequence: commit.sequence,
+                attempt: commit.attempt,
+                reason: 'step_completed',
+                idempotencyKey: `${lease.runId}:${commit.attempt}:pause:${commit.stepId}`,
+                signal
+            });
+            return {
+                runId: lease.runId,
+                sessionId,
+                workerId,
+                leaseId: lease.leaseId,
+                stepId: commit.stepId,
+                sequence: commit.sequence,
+                attempt: commit.attempt,
+                checkpointRef: checkpoint.checkpointRef,
+                workspaceRef: checkpoint.workspaceRef,
+                ...(checkpoint.snapshotRef ? { snapshotRef: checkpoint.snapshotRef } : {}),
+                schemaVersion: 1,
+                committedAt: checkpoint.committedAt,
+                ...(checkpoint.expiresAt ? { expiresAt: checkpoint.expiresAt } : {})
+            };
+        }
+        : undefined;
+    const ctx = createDurableWorkflowContext(runtime, lease, onStepCommit ? { onStepCommit } : {});
+    const autoCleanup = workspaceStore?.info.policy.retention?.cleanupMode === 'adapter_automatic';
+    let settled = false;
+    return {
+        runId: lease.runId,
+        attempt: lease.attempt,
+        resumed: lease.resumed,
+        step: ctx.step,
+        async finishSuccess(output) {
+            await runtime.finishRun(lease.runId, { status: 'succeeded', output });
+            settled = true;
+            if (workspaceStore && activeHandle && autoCleanup) {
+                await workspaceStore.cleanupWorkspace({
+                    workspaceRef: activeHandle.workspaceRef,
+                    reason: 'terminal_success',
+                    idempotencyKey: `${lease.runId}:cleanup`
+                });
+            }
+        },
+        async finishCancelled(error) {
+            await runtime.finishRun(lease.runId, { status: 'cancelled', error: serializeError(error) });
+            settled = true;
+            if (workspaceStore && activeHandle) {
+                await workspaceStore.abortWorkspace({
+                    workspaceRef: activeHandle.workspaceRef,
+                    runId: lease.runId,
+                    sessionId,
+                    reason: 'cancelled',
+                    idempotencyKey: `${lease.runId}:abort`
+                });
+            }
+        },
+        async dispose() {
+            if (settled)
+                return;
+            try {
+                await lease.release();
+            }
+            catch (error) {
+                logger.warn('Failed to release durable lease for retry.', {
+                    harness: harnessName,
+                    session_id: sessionId,
+                    run_id: lease.runId,
+                    workflow_id: workflowId,
+                    error: serializeError(error)
+                });
+            }
+        }
+    };
+}

package/dist/runtime/steps.d.ts CHANGED Viewed

@@ -1,5 +1,23 @@
 import type { JsonValue } from '../models/json.js';
+import type { DurableReplayCheckpoint } from '../ports/workspace.js';
 import type { DurableRunLease, DurableRuntime } from './durable.js';
+/** Metadata describing a new step checkpoint about to be committed. */
+export interface DurableStepCommit {
+    readonly stepId: string;
+    readonly sequence: number;
+    readonly attempt: number;
+    readonly output: JsonValue;
+}
+/** Optional hooks for binding durable steps to a durable workspace store. */
+export interface DurableWorkflowContextOptions {
+    /**
+     * Invoked before each NEW step checkpoint is committed (never on replay). The
+     * returned record is stored on the runtime checkpoint's `replay` field so a
+     * later resume can locate the durable workspace checkpoint. This enforces the
+     * "workspace state first, runtime checkpoint second" ordering (spec 21 §10).
+     */
+    readonly onStepCommit?: (commit: DurableStepCommit) => Promise<DurableReplayCheckpoint | undefined>;
+}
 /** Durable workflow context that exposes explicit checkpoint boundaries. */
 export interface DurableWorkflowContext {
     /** Current durable run lease. */
@@ -19,4 +37,4 @@ export declare class DurableStepError extends Error {
     constructor(message: string);
 }
 /** Creates a durable workflow context bound to an acquired runtime lease. */
-export declare function createDurableWorkflowContext(runtime: DurableRuntime, lease: DurableRunLease): DurableWorkflowContext;
+export declare function createDurableWorkflowContext(runtime: DurableRuntime, lease: DurableRunLease, options?: DurableWorkflowContextOptions): DurableWorkflowContext;

package/dist/runtime/steps.js CHANGED Viewed

@@ -7,8 +7,15 @@ export class DurableStepError extends Error {
     }
 }
 /** Creates a durable workflow context bound to an acquired runtime lease. */
-export function createDurableWorkflowContext(runtime, lease) {
+export function createDurableWorkflowContext(runtime, lease, options = {}) {
     const completed = new Set();
+    // Committed step outputs from prior attempts, keyed by stepId. On resume,
+    // these steps replay their stored output instead of re-running side effects.
+    const replay = new Map();
+    for (const checkpoint of lease.checkpoints ?? []) {
+        replay.set(checkpoint.stepId, checkpoint.output);
+    }
+    let sequence = (lease.checkpoints ?? []).reduce((max, checkpoint) => Math.max(max, checkpoint.sequence), 0);
     return {
         lease,
         async step(stepId, fn) {
@@ -17,9 +24,19 @@ export function createDurableWorkflowContext(runtime, lease) {
                 throw new DurableStepError(`Duplicate durable step id "${stepId}".`);
             }
             completed.add(stepId);
+            // Durable replay: a step committed on a prior attempt returns its stored
+            // output without re-executing `fn()` or re-committing a checkpoint.
+            if (replay.has(stepId)) {
+                return replay.get(stepId);
+            }
             const output = await fn();
             assertJsonSerializable(output, stepId);
-            const sequence = (lease.checkpoint?.sequence ?? 0) + completed.size;
+            sequence += 1;
+            // Workspace state is written before the runtime checkpoint (spec 21 §10),
+            // and the returned reference is linked on the runtime checkpoint.
+            const replayCheckpoint = options.onStepCommit
+                ? await options.onStepCommit({ stepId, sequence, attempt: lease.attempt, output })
+                : undefined;
             const checkpoint = {
                 runId: lease.runId,
                 sessionId: lease.sessionId,
@@ -29,7 +46,8 @@ export function createDurableWorkflowContext(runtime, lease) {
                 input: lease.start.input,
                 attempt: lease.attempt,
                 sequence,
-                output
+                output,
+                ...(replayCheckpoint ? { replay: replayCheckpoint } : {})
             };
             await runtime.commitCheckpoint(checkpoint);
             return output;

package/dist/sandbox/index.d.ts CHANGED Viewed

@@ -22,6 +22,40 @@ export interface ExecCapableSandboxSession extends SandboxSessionBase {
     readonly executor: 'available';
     exec(command: string, opts?: ExecOptions): Promise<ExecResult>;
 }
+/** Options for spawning a long-lived process inside the sandbox. */
+export interface SpawnOptions {
+    /** Command arguments. */
+    args?: readonly string[];
+    /** Working directory inside the sandbox. */
+    cwd?: string;
+    /** Extra environment variables. */
+    env?: Record<string, string>;
+    /** Cancellation signal; aborting terminates the process. */
+    signal?: AbortSignal;
+}
+/** A long-lived process owned by a sandbox session with streaming stdio. */
+export interface SandboxProcess {
+    /** Writes a chunk to the process stdin. */
+    writeStdin(chunk: string): Promise<void>;
+    /** Decoded stdout chunks. Completes when the process exits. */
+    readonly stdout: AsyncIterable<string>;
+    /** Decoded stderr chunks. Completes when the process exits. */
+    readonly stderr: AsyncIterable<string>;
+    /** Resolves with the exit code when the process terminates. Never rejects. */
+    readonly exit: Promise<{
+        exitCode: number;
+        signal?: string;
+    }>;
+    /** Terminates the process. Idempotent. */
+    kill(signal?: 'SIGTERM' | 'SIGKILL'): Promise<void>;
+}
+/** Sandbox session that can host long-lived processes (`sandbox.spawn`). */
+export interface SpawnCapableSandboxSession extends SandboxSessionBase {
+    readonly executor: 'available';
+    spawn(command: string, opts?: SpawnOptions): Promise<SandboxProcess>;
+}
+/** Returns true when a sandbox session can spawn long-lived processes. */
+export declare function isSpawnCapableSession(session: SandboxSessionBase): session is SpawnCapableSandboxSession;
 export type SandboxSession = SandboxSessionBase & {
     exec(command: string, opts?: ExecOptions): Promise<ExecResult>;
 };