@purista/harness 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/agents/index.d.ts +1 -0
  2. package/dist/agents/index.js +278 -142
  3. package/dist/errors/catalog.d.ts +4 -3
  4. package/dist/harness/defineHarness.d.ts +26 -2
  5. package/dist/harness/defineHarness.js +51 -2
  6. package/dist/index.d.ts +1 -1
  7. package/dist/memory/sandbox/index.js +7 -1
  8. package/dist/models/registry.js +45 -3
  9. package/dist/ports/base-model-provider.js +2 -0
  10. package/dist/ports/capabilities.d.ts +2 -0
  11. package/dist/ports/harness-context.d.ts +1 -0
  12. package/dist/ports/model-provider.d.ts +4 -0
  13. package/dist/ports/state.d.ts +6 -0
  14. package/dist/runtime/abort.d.ts +5 -0
  15. package/dist/runtime/abort.js +33 -0
  16. package/dist/runtime/durable.d.ts +2 -0
  17. package/dist/runtime/durable.js +6 -2
  18. package/dist/runtime/sessionDurable.d.ts +49 -0
  19. package/dist/runtime/sessionDurable.js +135 -0
  20. package/dist/runtime/steps.d.ts +19 -1
  21. package/dist/runtime/steps.js +21 -3
  22. package/dist/sandbox/index.d.ts +34 -0
  23. package/dist/sandbox/index.js +40 -3
  24. package/dist/sessions/index.d.ts +15 -2
  25. package/dist/sessions/index.js +212 -99
  26. package/dist/skills/index.js +19 -6
  27. package/dist/state/in-memory.d.ts +1 -0
  28. package/dist/state/in-memory.js +15 -0
  29. package/dist/telemetry/shim.js +9 -4
  30. package/dist/testing/durableWorkspaceStoreContract.d.ts +1 -1
  31. package/dist/testing/durableWorkspaceStoreContract.js +64 -28
  32. package/dist/tools/index.d.ts +2 -0
  33. package/dist/tools/index.js +17 -2
  34. package/dist/tools/mcp/runner.js +11 -6
  35. package/dist/tools/mcp/stdio.js +170 -1
  36. package/dist/ulid/index.d.ts +6 -1
  37. package/dist/ulid/index.js +31 -13
  38. package/dist/version.d.ts +2 -0
  39. package/dist/version.js +2 -0
  40. package/dist/workflows/index.js +7 -1
  41. package/dist/workspace/in-memory.d.ts +9 -10
  42. package/dist/workspace/in-memory.js +191 -48
  43. package/package.json +1 -1
  44. package/dist/harness/errors.d.ts +0 -62
  45. package/dist/harness/errors.js +0 -67
@@ -2,6 +2,10 @@ import { createRequire } from 'node:module';
2
2
  import path from 'node:path';
3
3
  import { OperationCancelledError, OperationTimeoutError, HarnessConfigError, SandboxError, SandboxNoExecutorError } from '../errors/index.js';
4
4
  const require = createRequire(import.meta.url);
5
+ /** Returns true when a sandbox session can spawn long-lived processes. */
6
+ export function isSpawnCapableSession(session) {
7
+ return typeof session.spawn === 'function';
8
+ }
5
9
  function now() { return new Date().toISOString(); }
6
10
  function normalizePath(input) {
7
11
  if (!input.startsWith('/'))
@@ -63,7 +67,7 @@ class MemorySandboxSession {
63
67
  const relative = root === '/' ? k.slice(1) : k.slice(root.length + 1);
64
68
  if (!opts?.recursive && relative.includes('/'))
65
69
  continue;
66
- if (opts?.glob && !new RegExp(opts.glob.replaceAll('.', '\\.').replaceAll('*', '.*')).test(k))
70
+ if (opts?.glob && !globToRegExp(opts.glob).test(k))
67
71
  continue;
68
72
  out.push({ name: k.split('/').at(-1) ?? '', path: k, kind: v.kind, ...(v.kind === 'file' ? { size: v.data.byteLength } : {}) });
69
73
  }
@@ -155,11 +159,44 @@ export function bashSandbox(opts) {
155
159
  }
156
160
  };
157
161
  }
162
+ /**
163
+ * Translate a glob to a fully-anchored RegExp matched against the absolute
164
+ * path. `*`/`**` match any characters and `?` matches a single character; all
165
+ * other regex metacharacters are escaped to literals so a pattern can never
166
+ * throw a `SyntaxError` or trigger catastrophic backtracking. Anchoring both
167
+ * ends fixes the previous over-match (e.g. `*.ts` no longer matches `a.tsx`).
168
+ */
169
+ function globToRegExp(glob) {
170
+ let out = '^';
171
+ for (let i = 0; i < glob.length; i += 1) {
172
+ const char = glob[i];
173
+ if (char === '*') {
174
+ out += '.*';
175
+ if (glob[i + 1] === '*')
176
+ i += 1;
177
+ }
178
+ else if (char === '?') {
179
+ out += '.';
180
+ }
181
+ else if (/[.+^${}()|[\]\\]/.test(char)) {
182
+ out += `\\${char}`;
183
+ }
184
+ else {
185
+ out += char;
186
+ }
187
+ }
188
+ return new RegExp(`${out}$`);
189
+ }
158
190
  export function autoDetectSandbox() {
159
191
  try {
160
192
  return bashSandbox();
161
193
  }
162
- catch {
163
- return inMemorySandbox();
194
+ catch (error) {
195
+ // Only fall back to the no-executor sandbox when just-bash is absent.
196
+ // A real configuration/init error must surface, not silently downgrade.
197
+ if (error instanceof HarnessConfigError && error.meta?.reason === 'just_bash_not_installed') {
198
+ return inMemorySandbox();
199
+ }
200
+ throw error;
164
201
  }
165
202
  }
@@ -1,7 +1,8 @@
1
1
  import type { Logger } from '../logger/index.js';
2
- import type { Harness, HarnessDefaults, BuilderState, TelemetryOptions } from '../harness/defineHarness.js';
2
+ import type { RunEvent, Harness, HarnessDefaults, BuilderState, TelemetryOptions } from '../harness/defineHarness.js';
3
3
  import type { MemoryAdapter } from '../ports/memory.js';
4
- import type { HarnessInspection } from '../ports/capabilities.js';
4
+ import type { DurableRuntimeAdapter, HarnessInspection } from '../ports/capabilities.js';
5
+ import type { DurableWorkspaceStore } from '../ports/workspace.js';
5
6
  import type { Sandbox } from '../sandbox/index.js';
6
7
  import type { StateStore } from '../ports/state.js';
7
8
  import { type TelemetryShim } from '../telemetry/index.js';
@@ -13,6 +14,8 @@ type HarnessDefinition<S extends BuilderState> = {
13
14
  state: StateStore;
14
15
  sandbox: Sandbox;
15
16
  memory: MemoryAdapter;
17
+ runtime?: DurableRuntimeAdapter;
18
+ workspaceStore?: DurableWorkspaceStore;
16
19
  defaults: HarnessDefaults;
17
20
  models: NonNullable<S['models']>;
18
21
  tools: NonNullable<S['tools']>;
@@ -21,5 +24,15 @@ type HarnessDefinition<S extends BuilderState> = {
21
24
  workflows: NonNullable<S['workflows']>;
22
25
  inspection: HarnessInspection;
23
26
  };
27
+ /**
28
+ * Relay run events from an in-process run to a stream consumer.
29
+ *
30
+ * The unread events live in a bounded queue: consumed events are removed (no
31
+ * growing cursor over a shared array), and on overflow the oldest non-terminal
32
+ * unread event is dropped and counted, so a slow consumer never silently skips
33
+ * an unread event. Delivery is promise-notified rather than time-polled, so
34
+ * there is no fixed per-event latency or periodic timer.
35
+ */
36
+ export declare function relayRunEvents(run: (onEvent: (event: RunEvent) => Promise<void>) => Promise<unknown>): AsyncIterable<RunEvent>;
24
37
  export declare function createSessionHarness<S extends BuilderState>(definition: HarnessDefinition<S>): Harness<S>;
25
38
  export {};
@@ -3,6 +3,8 @@ import { ulid } from '../ulid/index.js';
3
3
  import { runDefaultAgent } from '../agents/index.js';
4
4
  import { runWorkflow } from '../workflows/index.js';
5
5
  import { createMemoryFacade, createSessionMemory } from '../ports/memory.js';
6
+ import { beginDurableWorkflow, DURABLE_RUN_ID_PATTERN, isExecutableDurableRuntime } from '../runtime/sessionDurable.js';
7
+ import { HarnessConfigError } from '../errors/catalog.js';
6
8
  import { loadSkillsSync } from '../skills/index.js';
7
9
  import { createModelRegistry } from '../models/registry.js';
8
10
  import { createMetrics, createTelemetryShim } from '../telemetry/index.js';
@@ -11,6 +13,82 @@ const NEVER_ABORT_SIGNAL = new AbortController().signal;
11
13
  function now() {
12
14
  return new Date().toISOString();
13
15
  }
16
+ const STREAM_MAX_BUFFERED_EVENTS = 1024;
17
+ const STREAM_TERMINAL_EVENT_TYPES = new Set(['run.finished', 'agent.finished']);
18
+ /**
19
+ * Relay run events from an in-process run to a stream consumer.
20
+ *
21
+ * The unread events live in a bounded queue: consumed events are removed (no
22
+ * growing cursor over a shared array), and on overflow the oldest non-terminal
23
+ * unread event is dropped and counted, so a slow consumer never silently skips
24
+ * an unread event. Delivery is promise-notified rather than time-polled, so
25
+ * there is no fixed per-event latency or periodic timer.
26
+ */
27
+ export async function* relayRunEvents(run) {
28
+ const queue = [];
29
+ let dropped = 0;
30
+ let liveRunId = 'unknown';
31
+ let done = false;
32
+ let failure;
33
+ let wake;
34
+ const notify = () => {
35
+ const resolve = wake;
36
+ wake = undefined;
37
+ resolve?.();
38
+ };
39
+ const result = run((event) => {
40
+ if ('runId' in event)
41
+ liveRunId = event.runId;
42
+ if (queue.length >= STREAM_MAX_BUFFERED_EVENTS) {
43
+ const dropIndex = queue.findIndex((candidate) => !STREAM_TERMINAL_EVENT_TYPES.has(candidate.type));
44
+ if (dropIndex >= 0) {
45
+ queue.splice(dropIndex, 1);
46
+ dropped += 1;
47
+ }
48
+ }
49
+ queue.push(event);
50
+ notify();
51
+ return Promise.resolve();
52
+ })
53
+ .catch((error) => {
54
+ failure = error;
55
+ return undefined;
56
+ })
57
+ .finally(() => {
58
+ done = true;
59
+ notify();
60
+ });
61
+ try {
62
+ while (true) {
63
+ if (dropped > 0) {
64
+ const droppedCount = dropped;
65
+ dropped = 0;
66
+ yield { type: 'stream.overflow', runId: liveRunId, at: now(), dropped: droppedCount };
67
+ }
68
+ while (queue.length > 0) {
69
+ yield queue.shift();
70
+ // Surface a fresh overflow notice promptly between events.
71
+ if (dropped > 0)
72
+ break;
73
+ }
74
+ if (queue.length === 0 && dropped === 0) {
75
+ if (done) {
76
+ break;
77
+ }
78
+ // No await between the empty check and installing `wake`, so a producer
79
+ // push cannot be lost between them.
80
+ await new Promise((resolve) => {
81
+ wake = resolve;
82
+ });
83
+ }
84
+ }
85
+ }
86
+ finally {
87
+ await result.catch(() => undefined);
88
+ }
89
+ if (failure)
90
+ throw failure;
91
+ }
14
92
  function validateInvokeOptions(opts) {
15
93
  if (opts?.historyWindow !== undefined && opts.historyWindow < 0) {
16
94
  throw new ValidationError('Invoke options are invalid.', { where: 'invoke_options', issues: { historyWindow: opts.historyWindow } });
@@ -30,6 +108,12 @@ function normalizeMessage(message, sessionId) {
30
108
  export function createSessionHarness(definition) {
31
109
  const resolvedSkills = loadSkillsSync(definition.skills);
32
110
  const sessionStates = new Map();
111
+ // In-flight session-state creations, memoized so concurrent first-time callers
112
+ // share one sandbox open (no orphaned sessions) and one SessionState object
113
+ // (so the synchronous busy check/set below serializes runs correctly).
114
+ const sessionStateOpenings = new Map();
115
+ // Stable per-harness-instance worker id used as the default durable lease owner.
116
+ const durableWorkerId = `worker_${ulid()}`;
33
117
  const contentCaptureMode = resolveContentCaptureMode(definition.telemetry);
34
118
  const telemetry = withTelemetryFlavor(definition.telemetryShim ?? createTelemetryShim(), definition.telemetry);
35
119
  const adapterMetrics = createMetrics(telemetry, { 'harness.name': definition.name });
@@ -45,6 +129,7 @@ export function createSessionHarness(definition) {
45
129
  toolTimeoutMs: definition.defaults.toolTimeoutMs ?? 120_000,
46
130
  skillTimeoutMs: definition.defaults.skillTimeoutMs ?? 60_000,
47
131
  modelTimeoutMs: definition.defaults.modelTimeoutMs ?? 300_000,
132
+ maxParallelToolCalls: definition.defaults.maxParallelToolCalls ?? 8,
48
133
  ...(definition.defaults.historyWindow !== undefined ? { historyWindow: definition.defaults.historyWindow } : {})
49
134
  }
50
135
  };
@@ -56,24 +141,36 @@ export function createSessionHarness(definition) {
56
141
  if (existing) {
57
142
  return existing;
58
143
  }
144
+ const createdAt = now();
59
145
  const created = {
60
146
  id: sessionId,
61
- createdAt: now(),
62
- updatedAt: now(),
147
+ createdAt,
148
+ updatedAt: createdAt,
63
149
  runCount: 0
64
150
  };
65
151
  await definition.state.upsertSession(created);
66
152
  return created;
67
153
  }
68
- async function getSessionState(sessionId) {
154
+ function getSessionState(sessionId) {
69
155
  const existing = sessionStates.get(sessionId);
70
156
  if (existing) {
71
- return existing;
157
+ return Promise.resolve(existing);
72
158
  }
73
- const sandboxSession = await definition.sandbox.open({ sessionId, runId: `init_${ulid()}` });
74
- const created = { busy: false, sandboxSession, mountedSkills: new Set() };
75
- sessionStates.set(sessionId, created);
76
- return created;
159
+ const pending = sessionStateOpenings.get(sessionId);
160
+ if (pending) {
161
+ return pending;
162
+ }
163
+ const opening = (async () => {
164
+ const sandboxSession = await definition.sandbox.open({ sessionId, runId: `init_${ulid()}` });
165
+ const created = { busy: false, sandboxSession, mountedSkills: new Set() };
166
+ sessionStates.set(sessionId, created);
167
+ sessionStateOpenings.delete(sessionId);
168
+ return created;
169
+ })();
170
+ // Let a failed open be retried instead of caching the rejection forever.
171
+ opening.catch(() => sessionStateOpenings.delete(sessionId));
172
+ sessionStateOpenings.set(sessionId, opening);
173
+ return opening;
77
174
  }
78
175
  async function appendEvents(runId, events) {
79
176
  try {
@@ -141,6 +238,21 @@ export function createSessionHarness(definition) {
141
238
  function memoryFacade(opts) {
142
239
  return createMemoryFacade(memoryOptions(opts.sessionId, opts.sandboxSession, opts.signal, opts));
143
240
  }
241
+ /**
242
+ * Validates `opts.durable` and returns the executable durable runtime, or
243
+ * `undefined` for an ephemeral run. Throws before any run record is created.
244
+ */
245
+ function resolveDurableRuntime(opts) {
246
+ if (!opts?.durable)
247
+ return undefined;
248
+ if (!DURABLE_RUN_ID_PATTERN.test(opts.durable.runId)) {
249
+ throw new ValidationError('Durable run id is invalid.', { where: 'invoke_options', issues: { 'durable.runId': opts.durable.runId } });
250
+ }
251
+ if (!isExecutableDurableRuntime(definition.runtime)) {
252
+ throw new HarnessConfigError('Durable execution requires an executable .runtime(...) adapter.', { reason: 'durable_runtime_required', path: 'runtime' });
253
+ }
254
+ return definition.runtime;
255
+ }
144
256
  return {
145
257
  inspect() {
146
258
  return definition.inspection;
@@ -202,14 +314,21 @@ export function createSessionHarness(definition) {
202
314
  throw new ValidationError('Session history replacement failed validation.', { where: 'session_history', issues: { message } }, error);
203
315
  }
204
316
  });
205
- await definition.state.clearMessages(sessionId);
206
- if (parsed.length > 0) {
207
- await definition.state.appendMessages(sessionId, parsed);
317
+ if (definition.state.replaceMessages) {
318
+ await definition.state.replaceMessages(sessionId, parsed);
319
+ }
320
+ else {
321
+ // Non-atomic fallback for adapters without atomic replace.
322
+ await definition.state.clearMessages(sessionId);
323
+ if (parsed.length > 0) {
324
+ await definition.state.appendMessages(sessionId, parsed);
325
+ }
208
326
  }
209
327
  },
210
328
  async close() {
211
329
  await definition.state.closeSession(sessionId);
212
330
  sessionStates.delete(sessionId);
331
+ sessionStateOpenings.delete(sessionId);
213
332
  await state.sandboxSession.close();
214
333
  }
215
334
  };
@@ -248,51 +367,13 @@ export function createSessionHarness(definition) {
248
367
  $infer: {}
249
368
  };
250
369
  async function* streamAgentCall(sessionId, agentId, agent, input, opts) {
251
- const buffer = [];
252
- const maxBufferedEvents = 1024;
253
- let dropped = 0;
254
- let done = false;
255
- let failure;
256
- let liveRunId = 'unknown';
257
- const result = runAgentCall(sessionId, agentId, agent, input, opts, (event) => {
258
- if ('runId' in event)
259
- liveRunId = event.runId;
260
- if (buffer.length >= maxBufferedEvents) {
261
- const dropIndex = buffer.findIndex((candidate) => candidate.type !== 'run.finished');
262
- if (dropIndex >= 0) {
263
- buffer.splice(dropIndex, 1);
264
- dropped += 1;
265
- }
266
- }
267
- buffer.push(event);
268
- return Promise.resolve();
269
- }).catch((error) => {
270
- failure = error;
271
- return undefined;
272
- }).finally(() => {
273
- done = true;
274
- });
275
- let cursor = 0;
276
- while (true) {
277
- if (dropped > 0) {
278
- yield { type: 'stream.overflow', runId: liveRunId, at: now(), dropped };
279
- dropped = 0;
280
- }
281
- while (cursor < buffer.length) {
282
- yield buffer[cursor];
283
- cursor += 1;
284
- }
285
- if (done) {
286
- await result.catch(() => undefined);
287
- if (failure)
288
- throw failure;
289
- return;
290
- }
291
- await new Promise((resolve) => setTimeout(resolve, 5));
292
- }
370
+ yield* relayRunEvents((onEvent) => runAgentCall(sessionId, agentId, agent, input, opts, onEvent));
293
371
  }
294
372
  async function runAgentCall(sessionId, agentId, agent, input, opts, onEvent) {
295
373
  validateInvokeOptions(opts);
374
+ if (opts?.durable) {
375
+ throw new ValidationError('Durable execution is only supported for workflow runs.', { where: 'invoke_options', issues: { durable: 'agent_run' } });
376
+ }
296
377
  if (opts?.signal?.aborted) {
297
378
  throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
298
379
  }
@@ -363,6 +444,7 @@ export function createSessionHarness(definition) {
363
444
  maxSteps: definition.defaults.agentMaxIterations ?? 16,
364
445
  signal: runSignal.signal,
365
446
  toolTimeoutMs: definition.defaults.toolTimeoutMs ?? 120_000,
447
+ maxParallelToolCalls: definition.defaults.maxParallelToolCalls ?? 8,
366
448
  logger: definition.logger,
367
449
  telemetry,
368
450
  emitEvent: emit,
@@ -418,51 +500,11 @@ export function createSessionHarness(definition) {
418
500
  }
419
501
  }
420
502
  async function* streamWorkflowCall(sessionId, workflowId, workflow, input, opts) {
421
- const buffer = [];
422
- const maxBufferedEvents = 1024;
423
- let dropped = 0;
424
- let done = false;
425
- let failure;
426
- let liveRunId = 'unknown';
427
- const result = runWorkflowCall(sessionId, workflowId, workflow, input, opts, (event) => {
428
- if ('runId' in event)
429
- liveRunId = event.runId;
430
- if (buffer.length >= maxBufferedEvents) {
431
- const dropIndex = buffer.findIndex((candidate) => candidate.type !== 'run.finished');
432
- if (dropIndex >= 0) {
433
- buffer.splice(dropIndex, 1);
434
- dropped += 1;
435
- }
436
- }
437
- buffer.push(event);
438
- return Promise.resolve();
439
- }).catch((error) => {
440
- failure = error;
441
- return undefined;
442
- }).finally(() => {
443
- done = true;
444
- });
445
- let cursor = 0;
446
- while (true) {
447
- if (dropped > 0) {
448
- yield { type: 'stream.overflow', runId: liveRunId, at: now(), dropped };
449
- dropped = 0;
450
- }
451
- while (cursor < buffer.length) {
452
- yield buffer[cursor];
453
- cursor += 1;
454
- }
455
- if (done) {
456
- await result.catch(() => undefined);
457
- if (failure)
458
- throw failure;
459
- return;
460
- }
461
- await new Promise((resolve) => setTimeout(resolve, 5));
462
- }
503
+ yield* relayRunEvents((onEvent) => runWorkflowCall(sessionId, workflowId, workflow, input, opts, onEvent));
463
504
  }
464
505
  async function runWorkflowCall(sessionId, workflowId, workflow, input, opts, onEvent) {
465
506
  validateInvokeOptions(opts);
507
+ const durableRuntime = resolveDurableRuntime(opts);
466
508
  if (opts?.signal?.aborted) {
467
509
  throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
468
510
  }
@@ -473,7 +515,7 @@ export function createSessionHarness(definition) {
473
515
  }
474
516
  state.busy = true;
475
517
  const startedAt = now();
476
- const runId = ulid();
518
+ const runId = opts?.durable ? opts.durable.runId : ulid();
477
519
  const memory = memoryFacade({
478
520
  sessionId,
479
521
  runId,
@@ -503,7 +545,22 @@ export function createSessionHarness(definition) {
503
545
  state.busy = false;
504
546
  throw error;
505
547
  }
548
+ let durableBinding;
506
549
  try {
550
+ if (durableRuntime && opts?.durable) {
551
+ durableBinding = await beginDurableWorkflow({
552
+ runtime: durableRuntime,
553
+ ...(definition.workspaceStore ? { workspaceStore: definition.workspaceStore } : {}),
554
+ durable: opts.durable,
555
+ defaultWorkerId: durableWorkerId,
556
+ sessionId,
557
+ workflowId,
558
+ input: input,
559
+ signal: runSignal.signal,
560
+ logger: definition.logger,
561
+ harnessName: definition.name
562
+ });
563
+ }
507
564
  const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.prompt', {
508
565
  'harness.name': definition.name,
509
566
  'harness.session.id': sessionId,
@@ -532,6 +589,7 @@ export function createSessionHarness(definition) {
532
589
  metadata: opts?.metadata ?? {},
533
590
  metrics: workflowMetrics,
534
591
  memory,
592
+ step: durableBinding ? durableBinding.step : passthroughStep,
535
593
  agents: Object.fromEntries(Object.entries(definition.agents).map(([agentId, agent]) => [
536
594
  agentId,
537
595
  async (agentInput, agentOpts) => {
@@ -568,6 +626,7 @@ export function createSessionHarness(definition) {
568
626
  maxSteps: definition.defaults.agentMaxIterations ?? 16,
569
627
  signal: agentSignal.signal,
570
628
  toolTimeoutMs: definition.defaults.toolTimeoutMs ?? 120_000,
629
+ maxParallelToolCalls: definition.defaults.maxParallelToolCalls ?? 8,
571
630
  logger: definition.logger,
572
631
  telemetry,
573
632
  emitEvent: emit,
@@ -597,6 +656,9 @@ export function createSessionHarness(definition) {
597
656
  }));
598
657
  }));
599
658
  const finishedAt = now();
659
+ if (durableBinding) {
660
+ await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_success' }, () => durableBinding.finishSuccess(result));
661
+ }
600
662
  const runFinished = { type: 'run.finished', runId, at: finishedAt, output: result };
601
663
  await emit(runFinished);
602
664
  await definition.state.finishRun(runId, { status: 'succeeded', finishedAt, output: result });
@@ -608,6 +670,9 @@ export function createSessionHarness(definition) {
608
670
  const finalError = normalizeRunError(error, runSignal.signal);
609
671
  const finishedAt = now();
610
672
  const serialized = serializeError(finalError);
673
+ if (durableBinding && finalError instanceof OperationCancelledError) {
674
+ await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_cancelled' }, () => durableBinding.finishCancelled(finalError));
675
+ }
611
676
  const log = finalError instanceof OperationCancelledError ? definition.logger.warn.bind(definition.logger) : definition.logger.error.bind(definition.logger);
612
677
  log('Harness workflow run failed.', {
613
678
  harness: definition.name,
@@ -637,10 +702,41 @@ export function createSessionHarness(definition) {
637
702
  throw finalError;
638
703
  }
639
704
  finally {
705
+ // Releases the lease for a non-cancel failure so a retry with the same run
706
+ // id can resume; a no-op once the run was settled (success/cancel).
707
+ if (durableBinding)
708
+ await durableBinding.dispose();
640
709
  runSignal.cleanup();
641
710
  state.busy = false;
642
711
  }
643
712
  }
713
+ /** Pass-through step used when a workflow runs without durable execution. */
714
+ function passthroughStep(_stepId, fn) {
715
+ return fn();
716
+ }
717
+ /**
718
+ * Runs a durable finalization side effect (runtime finish / workspace lifecycle)
719
+ * without ever masking the primary run outcome (spec 21 §16.1 step 7).
720
+ */
721
+ async function guardDurableStep(args, step) {
722
+ try {
723
+ await step();
724
+ }
725
+ catch (error) {
726
+ telemetry.recordCounter('harness.runs.durable_errors', 1, {
727
+ harness: definition.name,
728
+ 'harness.run.durable.operation': args.operation
729
+ });
730
+ definition.logger.error('Durable finalization step failed; preserving run outcome.', {
731
+ harness: definition.name,
732
+ session_id: args.sessionId,
733
+ run_id: args.runId,
734
+ workflow_id: args.workflowId,
735
+ operation: args.operation,
736
+ error: serializeError(error)
737
+ });
738
+ }
739
+ }
644
740
  async function terminalizeFailedRun(args) {
645
741
  await runFailureTerminalizationStep(args, 'emit_run_finished', args.emitRunFinished);
646
742
  await runFailureTerminalizationStep(args, 'finish_run', args.finishRun);
@@ -869,6 +965,12 @@ function sanitizeEventForPersistence(event) {
869
965
  };
870
966
  case 'stream.overflow':
871
967
  return { dropped: event.dropped };
968
+ default: {
969
+ // Exhaustiveness guard: adding a RunEvent variant without updating this
970
+ // sanitizer becomes a compile error instead of silently persisting undefined.
971
+ event;
972
+ return {};
973
+ }
872
974
  }
873
975
  }
874
976
  function isJsonRecord(value) {
@@ -891,9 +993,11 @@ function normalizeSerializedRunError(error) {
891
993
  }
892
994
  function createRunSignal(parent, timeoutMs) {
893
995
  const controller = new AbortController();
894
- const relay = () => controller.abort(parent?.reason);
996
+ const relay = () => controller.abort(runAbortReason(parent?.reason));
895
997
  if (parent)
896
998
  parent.addEventListener('abort', relay, { once: true });
999
+ if (parent?.aborted)
1000
+ relay();
897
1001
  const timeout = timeoutMs && timeoutMs > 0
898
1002
  ? setTimeout(() => controller.abort(new OperationTimeoutError('Run timed out.', { scope: 'run', timeout_ms: timeoutMs })), timeoutMs)
899
1003
  : undefined;
@@ -911,10 +1015,14 @@ function combineSignals(primary, secondary) {
911
1015
  if (!secondary)
912
1016
  return { signal: primary, cleanup: () => undefined };
913
1017
  const controller = new AbortController();
914
- const relayPrimary = () => controller.abort(primary.reason);
915
- const relaySecondary = () => controller.abort(secondary.reason);
1018
+ const relayPrimary = () => controller.abort(runAbortReason(primary.reason));
1019
+ const relaySecondary = () => controller.abort(runAbortReason(secondary.reason));
916
1020
  primary.addEventListener('abort', relayPrimary, { once: true });
917
1021
  secondary.addEventListener('abort', relaySecondary, { once: true });
1022
+ if (primary.aborted)
1023
+ relayPrimary();
1024
+ else if (secondary.aborted)
1025
+ relaySecondary();
918
1026
  return {
919
1027
  signal: controller.signal,
920
1028
  cleanup: () => {
@@ -923,3 +1031,8 @@ function combineSignals(primary, secondary) {
923
1031
  }
924
1032
  };
925
1033
  }
1034
+ function runAbortReason(reason) {
1035
+ if (reason instanceof OperationCancelledError || reason instanceof OperationTimeoutError)
1036
+ return reason;
1037
+ return new OperationCancelledError('Run was cancelled.', { scope: 'run' }, reason);
1038
+ }
@@ -178,16 +178,29 @@ export function loadSkillsSync(skills) {
178
178
  export async function loadSkills(skills) {
179
179
  return loadSkillsSync(skills);
180
180
  }
181
- async function readDirRecursive(root) {
181
+ const SKILL_MOUNT_MAX_FILES = 5_000;
182
+ const SKILL_MOUNT_MAX_BYTES = 100_000_000;
183
+ async function readDirRecursive(root, skillId) {
182
184
  const files = new Map();
185
+ let totalBytes = 0;
183
186
  const walk = async (dir) => {
184
187
  const entries = await fsp.readdir(dir, { withFileTypes: true });
185
188
  for (const entry of entries) {
186
189
  const abs = path.join(dir, entry.name);
187
- if (entry.isDirectory())
190
+ if (entry.isDirectory()) {
188
191
  await walk(abs);
189
- else if (entry.isFile())
190
- files.set(path.posix.normalize(path.relative(root, abs).split(path.sep).join('/')), await fsp.readFile(abs));
192
+ }
193
+ else if (entry.isFile()) {
194
+ if (files.size >= SKILL_MOUNT_MAX_FILES) {
195
+ throw new SkillManifestError('Skill exceeds the mount file-count limit.', { reason: 'scan_limit_reached', skill_id: skillId, directory: root });
196
+ }
197
+ const data = await fsp.readFile(abs);
198
+ totalBytes += data.byteLength;
199
+ if (totalBytes > SKILL_MOUNT_MAX_BYTES) {
200
+ throw new SkillManifestError('Skill exceeds the mount byte limit.', { reason: 'scan_limit_reached', skill_id: skillId, directory: root });
201
+ }
202
+ files.set(path.posix.normalize(path.relative(root, abs).split(path.sep).join('/')), data);
203
+ }
191
204
  }
192
205
  };
193
206
  await walk(root);
@@ -195,7 +208,7 @@ async function readDirRecursive(root) {
195
208
  }
196
209
  export async function mountSkillsOnce(session, mounted, skills, skillIds) {
197
210
  if (skillIds.length > 0 && typeof session.mount !== 'function') {
198
- throw new SkillManifestError('Sandbox does not support skill mounting.', { reason: 'invalid_frontmatter', directory: '' });
211
+ throw new SkillManifestError('Sandbox does not support skill mounting.', { reason: 'skill_sandbox_unsupported' });
199
212
  }
200
213
  for (const skillId of skillIds) {
201
214
  if (mounted.has(skillId))
@@ -203,7 +216,7 @@ export async function mountSkillsOnce(session, mounted, skills, skillIds) {
203
216
  const skill = skills[skillId];
204
217
  if (!skill)
205
218
  throw new SkillNotFoundError('Skill not found.', { skill_id: skillId });
206
- const files = await readDirRecursive(skill.directory);
219
+ const files = await readDirRecursive(skill.directory, skillId);
207
220
  await session.mount(files, skill.mountPath);
208
221
  mounted.add(skillId);
209
222
  }
@@ -18,6 +18,7 @@ export declare class InMemoryStateStore implements StateStore {
18
18
  before?: string;
19
19
  }): Promise<Message[]>;
20
20
  clearMessages(sessionId: string): Promise<void>;
21
+ replaceMessages(sessionId: string, messages: Message[]): Promise<void>;
21
22
  createRun(record: RunRecord): Promise<void>;
22
23
  finishRun(runId: string, patch: FinishRunPatch): Promise<void>;
23
24
  getRun(runId: string): Promise<RunRecord | undefined>;
@@ -32,6 +32,7 @@ export class InMemoryStateStore {
32
32
  async closeSession(id) {
33
33
  this.sessions.delete(id);
34
34
  this.messages.delete(id);
35
+ this.messageLocks.delete(id);
35
36
  for (const [runId, run] of this.runs) {
36
37
  if (run.sessionId === id) {
37
38
  this.runs.delete(runId);
@@ -71,6 +72,20 @@ export class InMemoryStateStore {
71
72
  this.messages.delete(sessionId);
72
73
  });
73
74
  }
75
+ async replaceMessages(sessionId, messages) {
76
+ return this.withMessageLock(sessionId, async () => {
77
+ const ids = new Set();
78
+ for (const message of messages) {
79
+ if (ids.has(message.id)) {
80
+ throw new StateError('Duplicate message id.', { op: 'appendMessages', reason: 'duplicate_message_id' });
81
+ }
82
+ ids.add(message.id);
83
+ }
84
+ // Atomic clear+append under one lock: validate first, then commit so a
85
+ // failure never leaves history partially replaced.
86
+ this.messages.set(sessionId, [...messages]);
87
+ });
88
+ }
74
89
  async createRun(record) {
75
90
  this.runs.set(record.id, record);
76
91
  }