@purista/harness 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/agents/index.d.ts +1 -0
  2. package/dist/agents/index.js +276 -141
  3. package/dist/errors/catalog.d.ts +4 -3
  4. package/dist/harness/defineHarness.d.ts +45 -4
  5. package/dist/harness/defineHarness.js +51 -2
  6. package/dist/index.d.ts +1 -1
  7. package/dist/memory/sandbox/index.js +7 -1
  8. package/dist/models/registry.d.ts +10 -3
  9. package/dist/models/registry.js +45 -3
  10. package/dist/ports/base-model-provider.js +2 -0
  11. package/dist/ports/capabilities.d.ts +2 -0
  12. package/dist/ports/harness-context.d.ts +1 -0
  13. package/dist/ports/model-provider.d.ts +4 -0
  14. package/dist/ports/state.d.ts +6 -0
  15. package/dist/runtime/abort.d.ts +5 -0
  16. package/dist/runtime/abort.js +33 -0
  17. package/dist/runtime/durable.d.ts +2 -0
  18. package/dist/runtime/durable.js +6 -2
  19. package/dist/runtime/sessionDurable.d.ts +49 -0
  20. package/dist/runtime/sessionDurable.js +135 -0
  21. package/dist/runtime/steps.d.ts +19 -1
  22. package/dist/runtime/steps.js +21 -3
  23. package/dist/sandbox/index.d.ts +34 -0
  24. package/dist/sandbox/index.js +40 -3
  25. package/dist/sessions/index.d.ts +15 -2
  26. package/dist/sessions/index.js +336 -105
  27. package/dist/skills/index.js +19 -6
  28. package/dist/state/in-memory.d.ts +1 -0
  29. package/dist/state/in-memory.js +15 -0
  30. package/dist/telemetry/shim.js +9 -4
  31. package/dist/testing/durableWorkspaceStoreContract.d.ts +1 -1
  32. package/dist/testing/durableWorkspaceStoreContract.js +64 -28
  33. package/dist/tools/index.d.ts +2 -0
  34. package/dist/tools/index.js +15 -1
  35. package/dist/tools/mcp/runner.js +11 -6
  36. package/dist/tools/mcp/stdio.js +170 -1
  37. package/dist/ulid/index.d.ts +6 -1
  38. package/dist/ulid/index.js +31 -13
  39. package/dist/version.d.ts +2 -0
  40. package/dist/version.js +2 -0
  41. package/dist/workflows/index.js +7 -1
  42. package/dist/workspace/in-memory.d.ts +9 -10
  43. package/dist/workspace/in-memory.js +191 -48
  44. package/package.json +1 -1
  45. package/dist/harness/errors.d.ts +0 -62
  46. package/dist/harness/errors.js +0 -67
@@ -178,16 +178,29 @@ export function loadSkillsSync(skills) {
178
178
  export async function loadSkills(skills) {
179
179
  return loadSkillsSync(skills);
180
180
  }
181
- async function readDirRecursive(root) {
181
+ const SKILL_MOUNT_MAX_FILES = 5_000;
182
+ const SKILL_MOUNT_MAX_BYTES = 100_000_000;
183
+ async function readDirRecursive(root, skillId) {
182
184
  const files = new Map();
185
+ let totalBytes = 0;
183
186
  const walk = async (dir) => {
184
187
  const entries = await fsp.readdir(dir, { withFileTypes: true });
185
188
  for (const entry of entries) {
186
189
  const abs = path.join(dir, entry.name);
187
- if (entry.isDirectory())
190
+ if (entry.isDirectory()) {
188
191
  await walk(abs);
189
- else if (entry.isFile())
190
- files.set(path.posix.normalize(path.relative(root, abs).split(path.sep).join('/')), await fsp.readFile(abs));
192
+ }
193
+ else if (entry.isFile()) {
194
+ if (files.size >= SKILL_MOUNT_MAX_FILES) {
195
+ throw new SkillManifestError('Skill exceeds the mount file-count limit.', { reason: 'scan_limit_reached', skill_id: skillId, directory: root });
196
+ }
197
+ const data = await fsp.readFile(abs);
198
+ totalBytes += data.byteLength;
199
+ if (totalBytes > SKILL_MOUNT_MAX_BYTES) {
200
+ throw new SkillManifestError('Skill exceeds the mount byte limit.', { reason: 'scan_limit_reached', skill_id: skillId, directory: root });
201
+ }
202
+ files.set(path.posix.normalize(path.relative(root, abs).split(path.sep).join('/')), data);
203
+ }
191
204
  }
192
205
  };
193
206
  await walk(root);
@@ -195,7 +208,7 @@ async function readDirRecursive(root) {
195
208
  }
196
209
  export async function mountSkillsOnce(session, mounted, skills, skillIds) {
197
210
  if (skillIds.length > 0 && typeof session.mount !== 'function') {
198
- throw new SkillManifestError('Sandbox does not support skill mounting.', { reason: 'invalid_frontmatter', directory: '' });
211
+ throw new SkillManifestError('Sandbox does not support skill mounting.', { reason: 'skill_sandbox_unsupported' });
199
212
  }
200
213
  for (const skillId of skillIds) {
201
214
  if (mounted.has(skillId))
@@ -203,7 +216,7 @@ export async function mountSkillsOnce(session, mounted, skills, skillIds) {
203
216
  const skill = skills[skillId];
204
217
  if (!skill)
205
218
  throw new SkillNotFoundError('Skill not found.', { skill_id: skillId });
206
- const files = await readDirRecursive(skill.directory);
219
+ const files = await readDirRecursive(skill.directory, skillId);
207
220
  await session.mount(files, skill.mountPath);
208
221
  mounted.add(skillId);
209
222
  }
@@ -18,6 +18,7 @@ export declare class InMemoryStateStore implements StateStore {
18
18
  before?: string;
19
19
  }): Promise<Message[]>;
20
20
  clearMessages(sessionId: string): Promise<void>;
21
+ replaceMessages(sessionId: string, messages: Message[]): Promise<void>;
21
22
  createRun(record: RunRecord): Promise<void>;
22
23
  finishRun(runId: string, patch: FinishRunPatch): Promise<void>;
23
24
  getRun(runId: string): Promise<RunRecord | undefined>;
@@ -32,6 +32,7 @@ export class InMemoryStateStore {
32
32
  async closeSession(id) {
33
33
  this.sessions.delete(id);
34
34
  this.messages.delete(id);
35
+ this.messageLocks.delete(id);
35
36
  for (const [runId, run] of this.runs) {
36
37
  if (run.sessionId === id) {
37
38
  this.runs.delete(runId);
@@ -71,6 +72,20 @@ export class InMemoryStateStore {
71
72
  this.messages.delete(sessionId);
72
73
  });
73
74
  }
75
+ async replaceMessages(sessionId, messages) {
76
+ return this.withMessageLock(sessionId, async () => {
77
+ const ids = new Set();
78
+ for (const message of messages) {
79
+ if (ids.has(message.id)) {
80
+ throw new StateError('Duplicate message id.', { op: 'appendMessages', reason: 'duplicate_message_id' });
81
+ }
82
+ ids.add(message.id);
83
+ }
84
+ // Atomic clear+append under one lock: validate first, then commit so a
85
+ // failure never leaves history partially replaced.
86
+ this.messages.set(sessionId, [...messages]);
87
+ });
88
+ }
74
89
  async createRun(record) {
75
90
  this.runs.set(record.id, record);
76
91
  }
@@ -1,7 +1,8 @@
1
1
  import { SpanStatusCode, context, metrics, propagation, trace } from '@opentelemetry/api';
2
2
  import { ATTR_ERROR_TYPE } from '@opentelemetry/semantic-conventions';
3
3
  import { HarnessError } from '../errors/index.js';
4
- import { sanitizeForLog } from '../errors/redaction.js';
4
+ import { sanitizeForLog, sanitizeProviderBody } from '../errors/redaction.js';
5
+ import { HARNESS_VERSION } from '../version.js';
5
6
  function sanitizeAttrs(attrs) {
6
7
  const out = {};
7
8
  for (const [key, value] of Object.entries(attrs)) {
@@ -19,12 +20,16 @@ function sanitizeAttrs(attrs) {
19
20
  function errorAttributes(error) {
20
21
  if (error instanceof HarnessError) {
21
22
  const meta = asRecord(error.meta);
22
- const providerBody = meta ? jsonAttr(sanitizeForLog(meta['providerBody'])) : undefined;
23
+ // Content-aware redaction so prompt/message/output content in a provider
24
+ // body never reaches a span, independent of content-capture mode.
25
+ const providerBody = meta ? jsonAttr(sanitizeProviderBody(meta['providerBody'])) : undefined;
23
26
  return {
24
27
  [ATTR_ERROR_TYPE]: error.code,
25
28
  'harness.error.code': error.code,
26
29
  'harness.error.category': error.category,
27
30
  'harness.error.retriable': error.retriable,
31
+ 'harness.error.scope': stringAttr(meta?.['scope']),
32
+ 'harness.error.timeout_ms': numberAttr(meta?.['timeout_ms']),
28
33
  'harness.error.provider': stringAttr(meta?.['provider']),
29
34
  'harness.error.model': stringAttr(meta?.['model']),
30
35
  'harness.error.model_provider_status': numberAttr(meta?.['status']),
@@ -65,8 +70,8 @@ function jsonAttr(value) {
65
70
  }
66
71
  /** OpenTelemetry-backed implementation of {@link TelemetryShim}. */
67
72
  export class OtelTelemetryShim {
68
- tracer = trace.getTracer('@purista/harness');
69
- meter = metrics.getMeter('@purista/harness');
73
+ tracer = trace.getTracer('@purista/harness', HARNESS_VERSION);
74
+ meter = metrics.getMeter('@purista/harness', HARNESS_VERSION);
70
75
  histograms = new Map();
71
76
  counters = new Map();
72
77
  async span(name, attrs, fn) {
@@ -1,3 +1,3 @@
1
1
  import type { DurableWorkspaceStore } from '../ports/workspace.js';
2
- /** Shared Vitest contract for durable workspace store implementations. */
2
+ /** Shared Vitest contract for durable workspace store implementations (spec 21 §18). */
3
3
  export declare function durableWorkspaceStoreContract(make: () => DurableWorkspaceStore | Promise<DurableWorkspaceStore>): void;
@@ -1,41 +1,77 @@
1
1
  import { describe, expect, it } from 'vitest';
2
+ import { WorkspaceError, WorkspaceQuotaExceededError } from '../errors/index.js';
2
3
  import { validateDurableWorkspaceStore } from '../ports/workspace.js';
3
- /** Shared Vitest contract for durable workspace store implementations. */
4
+ /** Shared Vitest contract for durable workspace store implementations (spec 21 §18). */
4
5
  export function durableWorkspaceStoreContract(make) {
5
6
  describe('durableWorkspaceStoreContract', () => {
7
+ const signal = new AbortController().signal;
6
8
  it('validates metadata and round-trips checkpointed workspaces', async () => {
7
9
  const adapter = await make();
8
10
  validateDurableWorkspaceStore(adapter);
9
- const signal = new AbortController().signal;
10
- const handle = await adapter.startWorkspace({
11
- sessionId: 'session-1',
12
- runId: 'run-1',
13
- agentId: 'agent-1',
14
- attempt: 1,
15
- idempotencyKey: 'start-1',
16
- signal
17
- });
18
- const checkpoint = await adapter.pauseWorkspace({
19
- handle,
20
- stepId: 'step-1',
21
- sequence: 1,
22
- attempt: 1,
23
- reason: 'step_completed',
24
- idempotencyKey: 'pause-1',
25
- signal
26
- });
27
- const resumed = await adapter.resumeWorkspace({
28
- workspaceRef: handle.workspaceRef,
29
- checkpointRef: checkpoint.checkpointRef,
30
- sessionId: 'session-1',
31
- runId: 'run-2',
32
- attempt: 2,
33
- idempotencyKey: 'resume-1',
34
- signal
35
- });
11
+ const handle = await adapter.startWorkspace({ sessionId: 'session-1', runId: 'run-1', agentId: 'agent-1', attempt: 1, idempotencyKey: 'start-1', signal });
12
+ const checkpoint = await adapter.pauseWorkspace({ handle, stepId: 'step-1', sequence: 1, attempt: 1, reason: 'step_completed', idempotencyKey: 'pause-1', signal });
13
+ const resumed = await adapter.resumeWorkspace({ workspaceRef: handle.workspaceRef, checkpointRef: checkpoint.checkpointRef, sessionId: 'session-1', runId: 'run-2', attempt: 2, idempotencyKey: 'resume-1', signal });
36
14
  const inspection = await adapter.inspectWorkspace?.({ workspaceRef: resumed.workspaceRef, signal });
37
15
  expect(resumed.workspaceRef).toBe(handle.workspaceRef);
38
16
  expect(inspection?.checkpoints.map((item) => item.checkpointRef)).toEqual([checkpoint.checkpointRef]);
39
17
  });
18
+ it('start is idempotent and conflicts on a reused key with a different identity', async () => {
19
+ const adapter = await make();
20
+ const first = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'k', signal });
21
+ const replay = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'k', signal });
22
+ expect(replay.workspaceRef).toBe(first.workspaceRef);
23
+ await expect(adapter.startWorkspace({ sessionId: 's2', runId: 'r2', attempt: 1, idempotencyKey: 'k', signal })).rejects.toMatchObject({
24
+ constructor: WorkspaceError,
25
+ meta: { reason: 'idempotency_conflict' }
26
+ });
27
+ });
28
+ it('blocks resume after abort and is idempotent on repeated cleanup', async () => {
29
+ const adapter = await make();
30
+ const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
31
+ await adapter.abortWorkspace?.({ workspaceRef: handle.workspaceRef, runId: 'r', sessionId: 's', reason: 'cancelled', idempotencyKey: 'abort', signal });
32
+ await expect(adapter.resumeWorkspace({ workspaceRef: handle.workspaceRef, sessionId: 's', runId: 'r2', attempt: 2, idempotencyKey: 'resume', signal })).rejects.toMatchObject({
33
+ constructor: WorkspaceError,
34
+ meta: { reason: 'aborted' }
35
+ });
36
+ const cleaned = await adapter.cleanupWorkspace?.({ workspaceRef: handle.workspaceRef, reason: 'aborted', idempotencyKey: 'cleanup-1', signal });
37
+ expect(cleaned?.state).toBe('cleaned');
38
+ const cleanedAgain = await adapter.cleanupWorkspace?.({ workspaceRef: handle.workspaceRef, reason: 'aborted', idempotencyKey: 'cleanup-2', signal });
39
+ expect(cleanedAgain?.state).toBe('cleaned');
40
+ });
41
+ it('resume of a cleaned workspace reports not_found', async () => {
42
+ const adapter = await make();
43
+ const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
44
+ await adapter.cleanupWorkspace?.({ workspaceRef: handle.workspaceRef, reason: 'manual', idempotencyKey: 'cleanup', signal });
45
+ await expect(adapter.resumeWorkspace({ workspaceRef: handle.workspaceRef, sessionId: 's', runId: 'r2', attempt: 2, idempotencyKey: 'resume', signal })).rejects.toMatchObject({
46
+ constructor: WorkspaceError,
47
+ meta: { reason: 'not_found' }
48
+ });
49
+ });
50
+ it('missing checkpoint on resume reports missing_checkpoint', async () => {
51
+ const adapter = await make();
52
+ const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
53
+ await expect(adapter.resumeWorkspace({ workspaceRef: handle.workspaceRef, checkpointRef: 'nope', sessionId: 's', runId: 'r2', attempt: 2, idempotencyKey: 'resume', signal })).rejects.toMatchObject({
54
+ constructor: WorkspaceError,
55
+ meta: { reason: 'missing_checkpoint' }
56
+ });
57
+ });
58
+ it('cancellation surfaces OperationCancelledError with workspace scope', async () => {
59
+ const adapter = await make();
60
+ const aborted = AbortSignal.abort();
61
+ await expect(adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal: aborted })).rejects.toMatchObject({
62
+ code: 'OPERATION_CANCELLED',
63
+ meta: { scope: 'workspace' }
64
+ });
65
+ });
66
+ it('enforces the active workspace quota when advertised', async () => {
67
+ const adapter = await make();
68
+ const quota = adapter.info?.policy?.quota?.maxActiveWorkspaces;
69
+ if (!quota || quota > 200)
70
+ return; // only exercise small, declared quotas
71
+ for (let i = 0; i < quota; i += 1) {
72
+ await adapter.startWorkspace({ sessionId: 's', runId: `r${i}`, attempt: 1, idempotencyKey: `start-${i}`, signal });
73
+ }
74
+ await expect(adapter.startWorkspace({ sessionId: 's', runId: 'overflow', attempt: 1, idempotencyKey: 'overflow', signal })).rejects.toBeInstanceOf(WorkspaceQuotaExceededError);
75
+ });
40
76
  });
41
77
  }
@@ -3,6 +3,8 @@ import type { Message } from '../models/state.js';
3
3
  import type { BuiltinToolName } from '../harness/defineHarness.js';
4
4
  import type { ModelToolSpec } from '../ports/model-provider.js';
5
5
  import type { SandboxSession } from '../sandbox/index.js';
6
+ /** Canonical built-in tool names. Custom tool ids and skill ids must not collide with these. */
7
+ export declare const BUILTIN_TOOL_NAMES: readonly BuiltinToolName[];
6
8
  export declare const BUILTIN_ALIAS_TO_CANONICAL: Record<string, BuiltinToolName>;
7
9
  export declare function getBuiltinToolSpecs(enabled: readonly BuiltinToolName[], session: SandboxSession): ModelToolSpec[];
8
10
  export declare function invokeBuiltinTool(nameOrAlias: string, input: unknown, session: SandboxSession, signal?: AbortSignal): Promise<JsonValue>;
@@ -1,6 +1,11 @@
1
1
  import { z } from 'zod';
2
2
  import { SandboxNoExecutorError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
3
3
  import { ulid } from '../ulid/index.js';
4
+ /** Canonical built-in tool names. Custom tool ids and skill ids must not collide with these. */
5
+ export const BUILTIN_TOOL_NAMES = ['bash', 'read', 'write', 'edit', 'glob', 'grep', 'list'];
6
+ /** Per-file and total byte caps for the built-in `grep` read-and-match fallback. */
7
+ const GREP_MAX_FILE_BYTES = 2_000_000;
8
+ const GREP_MAX_TOTAL_BYTES = 50_000_000;
4
9
  export const BUILTIN_ALIAS_TO_CANONICAL = {
5
10
  bash: 'bash', Bash: 'bash',
6
11
  read: 'read', Read: 'read',
@@ -81,10 +86,19 @@ export async function invokeBuiltinTool(nameOrAlias, input, session, signal) {
81
86
  }
82
87
  const entries = await session.list(parsed.path, { recursive: true });
83
88
  const matches = [];
89
+ let scannedBytes = 0;
84
90
  for (const entry of entries) {
85
91
  if (entry.kind !== 'file')
86
92
  continue;
87
- const lines = (await session.readText(entry.path)).split('\n');
93
+ // Bound memory and regex work: skip individual files over the cap and
94
+ // stop once the total scanned size cap is reached.
95
+ if (entry.size !== undefined && entry.size > GREP_MAX_FILE_BYTES)
96
+ continue;
97
+ if (scannedBytes >= GREP_MAX_TOTAL_BYTES)
98
+ break;
99
+ const content = await session.readText(entry.path);
100
+ scannedBytes += content.length;
101
+ const lines = content.split('\n');
88
102
  for (let i = 0; i < lines.length; i += 1) {
89
103
  const currentLine = lines[i];
90
104
  if (currentLine !== undefined && rx.test(currentLine))
@@ -3,15 +3,14 @@ import { assertMcpJsonSchema, validateMcpJsonSchema } from './schema.js';
3
3
  const discoveredCache = new WeakMap();
4
4
  export async function getMcpToolSpecs(tools, allowlist, ctx = {}) {
5
5
  const allowed = new Set(allowlist);
6
- const specs = [];
7
6
  const registry = ctx.registry ?? createMcpRunnerRegistry();
8
- for (const [toolId, tool] of Object.entries(tools)) {
7
+ const specs = await Promise.all(Object.entries(tools).map(async ([toolId, tool]) => {
9
8
  if (!allowed.has(toolId) || !isMcpToolDefinition(tool))
10
- continue;
9
+ return undefined;
11
10
  const config = resolveMcpTool(toolId, tool, ctx);
12
- specs.push(await getResolvedModelToolSpec(config, registry.getRunner(config), ctx.signal, ctx.warn));
13
- }
14
- return specs;
11
+ return getResolvedModelToolSpec(config, registry.getRunner(config), ctx.signal, ctx.warn);
12
+ }));
13
+ return specs.filter((spec) => spec !== undefined);
15
14
  }
16
15
  export async function invokeMcpTool(first, second, input, fourth) {
17
16
  if (typeof first === 'string') {
@@ -90,6 +89,10 @@ async function discoverConfiguredTool(config, runner, signal, warn) {
90
89
  let promise = discoveredCache.get(runner);
91
90
  if (!promise) {
92
91
  promise = runner.listTools({ ...(signal ? { signal } : {}), timeoutMs: config.timeoutMs });
92
+ void promise.catch(() => {
93
+ if (discoveredCache.get(runner) === promise)
94
+ discoveredCache.delete(runner);
95
+ });
93
96
  discoveredCache.set(runner, promise);
94
97
  }
95
98
  const tools = await promise;
@@ -196,6 +199,8 @@ export async function withMcpTimeout(opts, fn) {
196
199
  const controller = new AbortController();
197
200
  const relay = () => controller.abort(opts.signal?.reason);
198
201
  opts.signal?.addEventListener('abort', relay, { once: true });
202
+ if (opts.signal?.aborted)
203
+ relay();
199
204
  let timeoutId;
200
205
  const timeout = new Promise((_, reject) => {
201
206
  timeoutId = setTimeout(() => {
@@ -1,7 +1,18 @@
1
1
  import { McpProtocolError, OperationTimeoutError, SandboxNoExecutorError } from '../../errors/index.js';
2
+ import { isSpawnCapableSession } from '../../sandbox/index.js';
3
+ import { HARNESS_VERSION } from '../../version.js';
2
4
  import { withMcpTimeout } from './runner.js';
3
5
  const protocolVersion = '2025-06-18';
4
6
  export function createStdioMcpTransportRunner(config) {
7
+ // A spawn-capable sandbox hosts a single long-lived server multiplexed across
8
+ // calls (server-side state is preserved); otherwise each call is a one-shot
9
+ // exec exchange (leak-free but stateless). See spec 07.
10
+ if (isSpawnCapableSession(config.sandbox)) {
11
+ return createPersistentStdioRunner(config, config.sandbox);
12
+ }
13
+ return createOneShotStdioRunner(config);
14
+ }
15
+ function createOneShotStdioRunner(config) {
5
16
  let installPromise;
6
17
  async function ensureInstalled(signal) {
7
18
  if (!config.install)
@@ -33,6 +44,164 @@ export function createStdioMcpTransportRunner(config) {
33
44
  }
34
45
  };
35
46
  }
47
+ /**
48
+ * Persistent stdio transport: spawns the server once, performs the MCP
49
+ * `initialize` handshake a single time, and multiplexes every subsequent
50
+ * request over the same pipe correlating responses by JSON-RPC id.
51
+ */
52
+ function createPersistentStdioRunner(config, session) {
53
+ let installPromise;
54
+ let session_proc;
55
+ let readyPromise;
56
+ let nextId = 1;
57
+ const pending = new Map();
58
+ async function ensureInstalled(signal) {
59
+ if (!config.install)
60
+ return;
61
+ installPromise ??= runInstall(config, signal);
62
+ return installPromise;
63
+ }
64
+ function rejectAllPending(error) {
65
+ for (const request of pending.values())
66
+ request.reject(error);
67
+ pending.clear();
68
+ }
69
+ function teardown() {
70
+ session_proc = undefined;
71
+ readyPromise = undefined;
72
+ }
73
+ async function spawnAndInitialize(signal) {
74
+ const proc = await session.spawn(config.command, {
75
+ ...(config.args ? { args: config.args } : {}),
76
+ ...(config.env ? { env: config.env } : {}),
77
+ ...(signal ? { signal } : {})
78
+ });
79
+ session_proc = proc;
80
+ // Consume stdout line-by-line, dispatching responses to pending requests.
81
+ void (async () => {
82
+ let buffer = '';
83
+ try {
84
+ for await (const chunk of proc.stdout) {
85
+ buffer += chunk;
86
+ let newlineIndex = buffer.indexOf('\n');
87
+ while (newlineIndex >= 0) {
88
+ const line = buffer.slice(0, newlineIndex).trim();
89
+ buffer = buffer.slice(newlineIndex + 1);
90
+ if (line.startsWith('{'))
91
+ dispatchLine(line, pending);
92
+ newlineIndex = buffer.indexOf('\n');
93
+ }
94
+ }
95
+ }
96
+ catch {
97
+ // stdout ended or aborted; exit handler performs cleanup.
98
+ }
99
+ })();
100
+ // When the process exits, fail every in-flight request and force a respawn.
101
+ void proc.exit.then((result) => {
102
+ rejectAllPending(mapStdioError(config, 'call', new Error(`MCP server exited with code ${result.exitCode}.`)));
103
+ if (session_proc === proc)
104
+ teardown();
105
+ });
106
+ await writeMessage(proc, {
107
+ jsonrpc: '2.0',
108
+ id: 0,
109
+ method: 'initialize',
110
+ params: { protocolVersion, capabilities: {}, clientInfo: { name: '@purista/harness', version: HARNESS_VERSION } }
111
+ }, pending, 0, signal);
112
+ await proc.writeStdin(`${JSON.stringify({ jsonrpc: '2.0', method: 'notifications/initialized', params: {} })}\n`);
113
+ }
114
+ async function ensureReady(signal) {
115
+ await ensureInstalled(signal);
116
+ if (!readyPromise) {
117
+ readyPromise = spawnAndInitialize(signal).catch((error) => {
118
+ teardown();
119
+ throw error;
120
+ });
121
+ }
122
+ await readyPromise;
123
+ if (!session_proc)
124
+ throw mapStdioError(config, 'connect', new Error('MCP server is not running.'));
125
+ return session_proc;
126
+ }
127
+ async function request(method, params, phase, map, options) {
128
+ return withMcpTimeout({ ...(options?.signal ? { signal: options.signal } : {}), timeoutMs: options?.timeoutMs ?? config.timeoutMs, scope: 'tool' }, async (signal) => {
129
+ const proc = await ensureReady(signal);
130
+ const id = ++nextId;
131
+ try {
132
+ const response = await writeMessage(proc, { jsonrpc: '2.0', id, method, params }, pending, id, signal);
133
+ if (response.error)
134
+ throw mapStdioError(config, phase, new Error(response.error.message ?? `MCP ${phase} failed.`));
135
+ return map(response.result);
136
+ }
137
+ catch (error) {
138
+ pending.delete(id);
139
+ if (error instanceof OperationTimeoutError)
140
+ throw error;
141
+ if (error instanceof McpProtocolError)
142
+ throw error;
143
+ throw mapStdioError(config, phase, error);
144
+ }
145
+ });
146
+ }
147
+ return {
148
+ async listTools(options) {
149
+ return request('tools/list', {}, 'list', (value) => {
150
+ if (!isRecord(value) || !Array.isArray(value['tools']))
151
+ return [];
152
+ return value['tools'];
153
+ }, options);
154
+ },
155
+ async callTool(name, input, options) {
156
+ return request('tools/call', { name, arguments: input }, 'call', (value) => value, options);
157
+ },
158
+ async close() {
159
+ const proc = session_proc;
160
+ teardown();
161
+ installPromise = undefined;
162
+ rejectAllPending(mapStdioError(config, 'call', new Error('MCP runner closed.')));
163
+ if (proc)
164
+ await proc.kill('SIGTERM').catch(() => undefined);
165
+ }
166
+ };
167
+ }
168
+ /** Sends one JSON-RPC message and (when `id` is set) awaits the correlated response. */
169
+ async function writeMessage(proc, message, pending, id, signal) {
170
+ const response = new Promise((resolve, reject) => {
171
+ pending.set(id, { resolve, reject });
172
+ if (signal) {
173
+ const onAbort = () => {
174
+ pending.delete(id);
175
+ reject(signal.reason ?? new Error('MCP request was aborted.'));
176
+ };
177
+ if (signal.aborted)
178
+ onAbort();
179
+ else
180
+ signal.addEventListener('abort', onAbort, { once: true });
181
+ }
182
+ });
183
+ await proc.writeStdin(`${JSON.stringify(message)}\n`);
184
+ return response;
185
+ }
186
+ function dispatchLine(line, pending) {
187
+ let parsed;
188
+ try {
189
+ parsed = JSON.parse(line);
190
+ }
191
+ catch {
192
+ return;
193
+ }
194
+ if (!isRecord(parsed) || !('id' in parsed))
195
+ return;
196
+ const id = parsed['id'];
197
+ if (typeof id !== 'number')
198
+ return;
199
+ const request = pending.get(id);
200
+ if (!request)
201
+ return;
202
+ pending.delete(id);
203
+ request.resolve(parsed);
204
+ }
36
205
  async function runInstall(config, signal) {
37
206
  if (config.sandbox.executor !== 'available') {
38
207
  throw new SandboxNoExecutorError('MCP stdio install requires a sandbox executor.', { session_id: 'unknown' });
@@ -62,7 +231,7 @@ async function exchange(config, calls, signal, timeoutMs) {
62
231
  params: {
63
232
  protocolVersion,
64
233
  capabilities: {},
65
- clientInfo: { name: '@purista/harness', version: '0.0.0' }
234
+ clientInfo: { name: '@purista/harness', version: HARNESS_VERSION }
66
235
  }
67
236
  }),
68
237
  JSON.stringify({ jsonrpc: '2.0', method: 'notifications/initialized', params: {} }),
@@ -1,6 +1,11 @@
1
1
  /**
2
2
  * Generates a monotonic ULID-like identifier.
3
3
  *
4
- * Subsequent calls within the same millisecond increment the random suffix to preserve ordering.
4
+ * Ordering is guaranteed even across same-millisecond bursts and wall-clock
5
+ * regressions: the time component never moves backward (it is clamped to a
6
+ * monotonic high-water mark), and within a millisecond the 80-bit random
7
+ * component is incremented. Each new millisecond seeds the random component
8
+ * from a cryptographically-strong source, so intra-millisecond collisions are
9
+ * negligible across calls and processes.
5
10
  */
6
11
  export declare function ulid(): string;
@@ -1,4 +1,6 @@
1
+ import { webcrypto } from 'node:crypto';
1
2
  const ENCODING = '0123456789ABCDEFGHJKMNPQRSTVWXYZ';
3
+ const RANDOM_MAX = (1n << 80n) - 1n;
2
4
  let lastTime = -1;
3
5
  let lastRandom = 0n;
4
6
  function encode(value, length) {
@@ -12,24 +14,40 @@ function encode(value, length) {
12
14
  }
13
15
  return out;
14
16
  }
15
- function nextRandom() {
16
- const now = Date.now();
17
- if (now !== lastTime) {
18
- lastTime = now;
19
- const seed = BigInt(Math.floor(Math.random() * 2 ** 24));
20
- lastRandom = seed << 56n;
21
- return lastRandom;
17
+ /** Cryptographically-strong 80-bit random component. */
18
+ function randomEntropy() {
19
+ const bytes = new Uint8Array(10);
20
+ webcrypto.getRandomValues(bytes);
21
+ let value = 0n;
22
+ for (const byte of bytes) {
23
+ value = (value << 8n) | BigInt(byte);
22
24
  }
23
- lastRandom += 1n;
24
- return lastRandom;
25
+ return value;
25
26
  }
26
27
  /**
27
28
  * Generates a monotonic ULID-like identifier.
28
29
  *
29
- * Subsequent calls within the same millisecond increment the random suffix to preserve ordering.
30
+ * Ordering is guaranteed even across same-millisecond bursts and wall-clock
31
+ * regressions: the time component never moves backward (it is clamped to a
32
+ * monotonic high-water mark), and within a millisecond the 80-bit random
33
+ * component is incremented. Each new millisecond seeds the random component
34
+ * from a cryptographically-strong source, so intra-millisecond collisions are
35
+ * negligible across calls and processes.
30
36
  */
31
37
  export function ulid() {
32
- const timePart = encode(BigInt(Date.now()), 10);
33
- const randomPart = encode(nextRandom(), 16);
34
- return `${timePart}${randomPart}`;
38
+ const now = Date.now();
39
+ if (now > lastTime) {
40
+ lastTime = now;
41
+ lastRandom = randomEntropy();
42
+ }
43
+ else {
44
+ // Same millisecond or a backward clock step: keep ordering by never
45
+ // emitting a smaller time, and advance the random component instead.
46
+ lastRandom += 1n;
47
+ if (lastRandom > RANDOM_MAX) {
48
+ lastTime += 1;
49
+ lastRandom = randomEntropy();
50
+ }
51
+ }
52
+ return `${encode(BigInt(lastTime), 10)}${encode(lastRandom, 16)}`;
35
53
  }
@@ -0,0 +1,2 @@
1
+ /** Harness package version, used as the OpenTelemetry instrumentation scope version. */
2
+ export declare const HARNESS_VERSION = "0.0.0";
@@ -0,0 +1,2 @@
1
+ /** Harness package version, used as the OpenTelemetry instrumentation scope version. */
2
+ export const HARNESS_VERSION = '0.0.0';
@@ -1,5 +1,6 @@
1
1
  import { z } from 'zod';
2
2
  import { OperationCancelledError, ValidationError } from '../errors/index.js';
3
+ import { withAbortSignal } from '../runtime/abort.js';
3
4
  export async function runWorkflow(args) {
4
5
  if (args.ctx['signal'].aborted)
5
6
  throw new OperationCancelledError('Workflow execution was cancelled.', { scope: 'workflow' });
@@ -11,7 +12,12 @@ export async function runWorkflow(args) {
11
12
  catch (error) {
12
13
  throw new ValidationError('Workflow input validation failed.', { where: 'workflow_input', issues: validationIssues(error) }, error);
13
14
  }
14
- const output = await args.workflow.handler({ ...args.ctx, input: parsed });
15
+ // The handler error (including errors bubbling from agent/model/tool calls) is
16
+ // intentionally preserved by identity so failure terminalization never masks
17
+ // the original failure. See spec 10 "Errors".
18
+ const output = await withAbortSignal(args.ctx['signal'], 'workflow', 'Workflow execution was cancelled.', () => args.workflow.handler({ ...args.ctx, input: parsed }));
19
+ if (args.ctx['signal'].aborted)
20
+ throw new OperationCancelledError('Workflow execution was cancelled.', { scope: 'workflow' });
15
21
  if (!args.workflow.output)
16
22
  return output;
17
23
  try {