@purista/harness 1.2.6 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +6 -0
  2. package/dist/agents/index.d.ts +7 -1
  3. package/dist/agents/index.js +56 -38
  4. package/dist/errors/catalog.d.ts +18 -2
  5. package/dist/errors/catalog.js +10 -0
  6. package/dist/eval/index.d.ts +3 -3
  7. package/dist/eval/index.js +15 -1
  8. package/dist/harness/defineHarness.d.ts +91 -1
  9. package/dist/harness/defineHarness.js +110 -1
  10. package/dist/index.d.ts +37 -17
  11. package/dist/index.js +30 -16
  12. package/dist/local/index.d.ts +36 -0
  13. package/dist/local/index.js +24 -0
  14. package/dist/local/local-sandbox.d.ts +25 -0
  15. package/dist/local/local-sandbox.js +368 -0
  16. package/dist/local/local-workspace.d.ts +56 -0
  17. package/dist/local/local-workspace.js +496 -0
  18. package/dist/local/ref-hash.d.ts +6 -0
  19. package/dist/local/ref-hash.js +9 -0
  20. package/dist/local/sqlite-storage.d.ts +106 -0
  21. package/dist/local/sqlite-storage.js +680 -0
  22. package/dist/models/adapter-utils.d.ts +52 -0
  23. package/dist/models/adapter-utils.js +81 -0
  24. package/dist/models/registry.js +28 -37
  25. package/dist/models/stream-pump.d.ts +16 -0
  26. package/dist/models/stream-pump.js +77 -0
  27. package/dist/ports/base-model-provider.d.ts +7 -1
  28. package/dist/ports/base-model-provider.js +384 -87
  29. package/dist/ports/capabilities.d.ts +16 -2
  30. package/dist/ports/context-checkpoints.d.ts +63 -0
  31. package/dist/ports/context-checkpoints.js +33 -0
  32. package/dist/ports/index.d.ts +1 -0
  33. package/dist/ports/index.js +1 -0
  34. package/dist/ports/model-provider.d.ts +94 -0
  35. package/dist/runtime/durable.d.ts +11 -0
  36. package/dist/runtime/durable.js +15 -2
  37. package/dist/runtime/sessionDurable.js +47 -21
  38. package/dist/sessions/index.d.ts +17 -6
  39. package/dist/sessions/index.js +337 -81
  40. package/dist/skills/index.d.ts +0 -2
  41. package/dist/skills/index.js +0 -8
  42. package/dist/state/in-memory.js +6 -6
  43. package/dist/telemetry/shim.js +2 -6
  44. package/dist/telemetry/span-attrs.d.ts +9 -0
  45. package/dist/telemetry/span-attrs.js +27 -0
  46. package/dist/testing/durableWorkspaceStoreContract.js +69 -0
  47. package/dist/testing/fakeLogger.d.ts +29 -0
  48. package/dist/testing/fakeLogger.js +47 -0
  49. package/dist/testing/fakeSandbox.d.ts +27 -0
  50. package/dist/testing/fakeSandbox.js +153 -0
  51. package/dist/testing/fakeStateStore.d.ts +36 -0
  52. package/dist/testing/fakeStateStore.js +66 -0
  53. package/dist/testing/index.d.ts +10 -4
  54. package/dist/testing/index.js +14 -4
  55. package/dist/testing/loggerContract.d.ts +9 -0
  56. package/dist/testing/loggerContract.js +62 -0
  57. package/dist/testing/modelProviderContract.d.ts +12 -0
  58. package/dist/testing/modelProviderContract.js +222 -0
  59. package/dist/testing/recordEvents.d.ts +3 -0
  60. package/dist/testing/recordEvents.js +8 -0
  61. package/dist/testing/stateStoreContract.js +27 -0
  62. package/dist/tools/index.js +26 -1
  63. package/dist/tools/mcp/http.d.ts +2 -0
  64. package/dist/tools/mcp/http.js +34 -21
  65. package/dist/tools/mcp/runner.d.ts +4 -0
  66. package/dist/tools/mcp/runner.js +75 -21
  67. package/dist/tools/mcp/stdio.d.ts +7 -1
  68. package/dist/tools/mcp/stdio.js +102 -23
  69. package/dist/version.d.ts +1 -1
  70. package/dist/version.js +1 -1
  71. package/dist/workspace/in-memory.d.ts +1 -0
  72. package/dist/workspace/in-memory.js +47 -12
  73. package/package.json +2 -1
@@ -1,2 +1,8 @@
1
1
  import type { McpTransportRunner, ResolvedMcpStdioTool } from './runner.js';
2
- export declare function createStdioMcpTransportRunner(config: ResolvedMcpStdioTool): McpTransportRunner;
2
+ export interface StdioRunnerHooks {
3
+ /** Invoked whenever the persistent server process is discarded (exit, handshake failure, close). */
4
+ onReset?: () => void;
5
+ /** Grace period before SIGKILL escalation on close (test override). */
6
+ closeGraceMs?: number;
7
+ }
8
+ export declare function createStdioMcpTransportRunner(config: ResolvedMcpStdioTool, hooks?: StdioRunnerHooks): McpTransportRunner;
@@ -3,12 +3,16 @@ import { isSpawnCapableSession } from '../../sandbox/index.js';
3
3
  import { HARNESS_VERSION } from '../../version.js';
4
4
  import { withMcpTimeout } from './runner.js';
5
5
  const protocolVersion = '2025-06-18';
6
- export function createStdioMcpTransportRunner(config) {
6
+ /** Maximum number of recent stderr characters retained to enrich failure messages. */
7
+ const STDERR_TAIL_LIMIT = 8_192;
8
+ /** How long `close()` waits for a SIGTERM'd server before escalating to SIGKILL. */
9
+ const DEFAULT_CLOSE_GRACE_MS = 2_000;
10
+ export function createStdioMcpTransportRunner(config, hooks = {}) {
7
11
  // A spawn-capable sandbox hosts a single long-lived server multiplexed across
8
12
  // calls (server-side state is preserved); otherwise each call is a one-shot
9
13
  // exec exchange (leak-free but stateless). See spec 07.
10
14
  if (isSpawnCapableSession(config.sandbox)) {
11
- return createPersistentStdioRunner(config, config.sandbox);
15
+ return createPersistentStdioRunner(config, config.sandbox, hooks);
12
16
  }
13
17
  return createOneShotStdioRunner(config);
14
18
  }
@@ -17,7 +21,15 @@ function createOneShotStdioRunner(config) {
17
21
  async function ensureInstalled(signal) {
18
22
  if (!config.install)
19
23
  return;
20
- installPromise ??= runInstall(config, signal);
24
+ if (!installPromise) {
25
+ const promise = runInstall(config, signal);
26
+ // A transient/aborted install failure must not poison later calls.
27
+ void promise.catch(() => {
28
+ if (installPromise === promise)
29
+ installPromise = undefined;
30
+ });
31
+ installPromise = promise;
32
+ }
21
33
  return installPromise;
22
34
  }
23
35
  return {
@@ -49,16 +61,26 @@ function createOneShotStdioRunner(config) {
49
61
  * `initialize` handshake a single time, and multiplexes every subsequent
50
62
  * request over the same pipe correlating responses by JSON-RPC id.
51
63
  */
52
- function createPersistentStdioRunner(config, session) {
64
+ function createPersistentStdioRunner(config, session, hooks = {}) {
53
65
  let installPromise;
54
- let session_proc;
66
+ let serverProcess;
55
67
  let readyPromise;
68
+ let stderrTail = '';
56
69
  let nextId = 1;
57
70
  const pending = new Map();
71
+ const closeGraceMs = hooks.closeGraceMs ?? DEFAULT_CLOSE_GRACE_MS;
58
72
  async function ensureInstalled(signal) {
59
73
  if (!config.install)
60
74
  return;
61
- installPromise ??= runInstall(config, signal);
75
+ if (!installPromise) {
76
+ const promise = runInstall(config, signal);
77
+ // A transient/aborted install failure must not poison later calls.
78
+ void promise.catch(() => {
79
+ if (installPromise === promise)
80
+ installPromise = undefined;
81
+ });
82
+ installPromise = promise;
83
+ }
62
84
  return installPromise;
63
85
  }
64
86
  function rejectAllPending(error) {
@@ -67,8 +89,13 @@ function createPersistentStdioRunner(config, session) {
67
89
  pending.clear();
68
90
  }
69
91
  function teardown() {
70
- session_proc = undefined;
92
+ serverProcess = undefined;
71
93
  readyPromise = undefined;
94
+ hooks.onReset?.();
95
+ }
96
+ function stderrSuffix() {
97
+ const tail = stderrTail.trim();
98
+ return tail ? ` stderr: ${tail}` : '';
72
99
  }
73
100
  async function spawnAndInitialize(signal) {
74
101
  const proc = await session.spawn(config.command, {
@@ -76,7 +103,8 @@ function createPersistentStdioRunner(config, session) {
76
103
  ...(config.env ? { env: config.env } : {}),
77
104
  ...(signal ? { signal } : {})
78
105
  });
79
- session_proc = proc;
106
+ serverProcess = proc;
107
+ stderrTail = '';
80
108
  // Consume stdout line-by-line, dispatching responses to pending requests.
81
109
  void (async () => {
82
110
  let buffer = '';
@@ -97,19 +125,42 @@ function createPersistentStdioRunner(config, session) {
97
125
  // stdout ended or aborted; exit handler performs cleanup.
98
126
  }
99
127
  })();
128
+ // Drain stderr so the child never blocks on a full pipe; keep only a small
129
+ // tail to enrich failure messages.
130
+ void (async () => {
131
+ try {
132
+ for await (const chunk of proc.stderr) {
133
+ stderrTail = (stderrTail + chunk).slice(-STDERR_TAIL_LIMIT);
134
+ }
135
+ }
136
+ catch {
137
+ // stderr ended or aborted; exit handler performs cleanup.
138
+ }
139
+ })();
100
140
  // When the process exits, fail every in-flight request and force a respawn.
101
141
  void proc.exit.then((result) => {
102
- rejectAllPending(mapStdioError(config, 'call', new Error(`MCP server exited with code ${result.exitCode}.`)));
103
- if (session_proc === proc)
142
+ rejectAllPending(mapStdioError(config, 'call', new Error(`MCP server exited with code ${result.exitCode}.${stderrSuffix()}`)));
143
+ if (serverProcess === proc)
104
144
  teardown();
105
145
  });
106
- await writeMessage(proc, {
107
- jsonrpc: '2.0',
108
- id: 0,
109
- method: 'initialize',
110
- params: { protocolVersion, capabilities: {}, clientInfo: { name: '@purista/harness', version: HARNESS_VERSION } }
111
- }, pending, 0, signal);
112
- await proc.writeStdin(`${JSON.stringify({ jsonrpc: '2.0', method: 'notifications/initialized', params: {} })}\n`);
146
+ try {
147
+ const initResponse = await writeMessage(proc, {
148
+ jsonrpc: '2.0',
149
+ id: 0,
150
+ method: 'initialize',
151
+ params: { protocolVersion, capabilities: {}, clientInfo: { name: '@purista/harness', version: HARNESS_VERSION } }
152
+ }, pending, 0, signal);
153
+ if (initResponse.error) {
154
+ throw mapStdioError(config, 'connect', new Error(initResponse.error.message ?? 'MCP initialize failed.'));
155
+ }
156
+ await proc.writeStdin(`${JSON.stringify({ jsonrpc: '2.0', method: 'notifications/initialized', params: {} })}\n`);
157
+ }
158
+ catch (error) {
159
+ // Never leave an orphaned server behind a failed handshake.
160
+ pending.delete(0);
161
+ await terminateProcess(proc, closeGraceMs);
162
+ throw error;
163
+ }
113
164
  }
114
165
  async function ensureReady(signal) {
115
166
  await ensureInstalled(signal);
@@ -120,9 +171,9 @@ function createPersistentStdioRunner(config, session) {
120
171
  });
121
172
  }
122
173
  await readyPromise;
123
- if (!session_proc)
174
+ if (!serverProcess)
124
175
  throw mapStdioError(config, 'connect', new Error('MCP server is not running.'));
125
- return session_proc;
176
+ return serverProcess;
126
177
  }
127
178
  async function request(method, params, phase, map, options) {
128
179
  return withMcpTimeout({ ...(options?.signal ? { signal: options.signal } : {}), timeoutMs: options?.timeoutMs ?? config.timeoutMs, scope: 'tool' }, async (signal) => {
@@ -156,15 +207,33 @@ function createPersistentStdioRunner(config, session) {
156
207
  return request('tools/call', { name, arguments: input }, 'call', (value) => value, options);
157
208
  },
158
209
  async close() {
159
- const proc = session_proc;
210
+ const proc = serverProcess;
160
211
  teardown();
161
212
  installPromise = undefined;
162
213
  rejectAllPending(mapStdioError(config, 'call', new Error('MCP runner closed.')));
163
214
  if (proc)
164
- await proc.kill('SIGTERM').catch(() => undefined);
215
+ await terminateProcess(proc, closeGraceMs);
165
216
  }
166
217
  };
167
218
  }
219
+ /** SIGTERMs a server process and escalates to SIGKILL when it ignores the grace window. */
220
+ async function terminateProcess(proc, graceMs) {
221
+ await proc.kill('SIGTERM').catch(() => undefined);
222
+ let timer;
223
+ const exited = await Promise.race([
224
+ proc.exit.then(() => true),
225
+ new Promise((resolve) => {
226
+ timer = setTimeout(() => resolve(false), graceMs);
227
+ timer.unref?.();
228
+ })
229
+ ]);
230
+ if (timer)
231
+ clearTimeout(timer);
232
+ if (!exited) {
233
+ await proc.kill('SIGKILL').catch(() => undefined);
234
+ await proc.exit;
235
+ }
236
+ }
168
237
  /** Sends one JSON-RPC message and (when `id` is set) awaits the correlated response. */
169
238
  async function writeMessage(proc, message, pending, id, signal) {
170
239
  const response = new Promise((resolve, reject) => {
@@ -180,7 +249,16 @@ async function writeMessage(proc, message, pending, id, signal) {
180
249
  signal.addEventListener('abort', onAbort, { once: true });
181
250
  }
182
251
  });
183
- await proc.writeStdin(`${JSON.stringify(message)}\n`);
252
+ try {
253
+ await proc.writeStdin(`${JSON.stringify(message)}\n`);
254
+ }
255
+ catch (error) {
256
+ // Drop the orphaned pending entry and mark its promise handled so a later
257
+ // rejectAllPending/abort cannot surface an unhandled rejection.
258
+ pending.delete(id);
259
+ void response.catch(() => undefined);
260
+ throw error;
261
+ }
184
262
  return response;
185
263
  }
186
264
  function dispatchLine(line, pending) {
@@ -250,8 +328,9 @@ async function exchange(config, calls, signal, timeoutMs) {
250
328
  return parseResponses(result.stdout);
251
329
  }
252
330
  catch (error) {
331
+ // Timeouts propagate unwrapped — consistent with the persistent runner.
253
332
  if (error instanceof OperationTimeoutError)
254
- throw mapStdioError(config, calls[0]?.method === 'tools/list' ? 'list' : 'call', error);
333
+ throw error;
255
334
  throw mapStdioError(config, calls[0]?.method === 'tools/list' ? 'list' : 'call', error);
256
335
  }
257
336
  }
package/dist/version.d.ts CHANGED
@@ -1,2 +1,2 @@
1
1
  /** Harness package version, used as the OpenTelemetry instrumentation scope version. */
2
- export declare const HARNESS_VERSION = "0.0.0";
2
+ export declare const HARNESS_VERSION = "1.5.0";
package/dist/version.js CHANGED
@@ -1,2 +1,2 @@
1
1
  /** Harness package version, used as the OpenTelemetry instrumentation scope version. */
2
- export const HARNESS_VERSION = '0.0.0';
2
+ export const HARNESS_VERSION = '1.5.0';
@@ -28,6 +28,7 @@ export declare class InMemoryDurableWorkspaceStore implements DurableWorkspaceSt
28
28
  private toHandle;
29
29
  private isExpired;
30
30
  private findWorkspaceByCheckpoint;
31
+ private evictWorkspaceOps;
31
32
  private requireLiveWorkspace;
32
33
  }
33
34
  /** Creates a fresh in-process durable workspace store. */
@@ -79,8 +79,10 @@ export class InMemoryDurableWorkspaceStore {
79
79
  async pauseWorkspace(opts) {
80
80
  throwIfAborted(opts.signal);
81
81
  const replay = this.opResults.get(opts.idempotencyKey);
82
- if (replay)
83
- return replay;
82
+ if (replay) {
83
+ assertReplayMatches(replay, 'pause', opts.handle.runId, opts.handle.sessionId);
84
+ return replay.result;
85
+ }
84
86
  const workspace = this.requireLiveWorkspace(opts.handle.workspaceRef);
85
87
  const payloadBytes = opts.checkpointPayload === undefined ? 0 : byteLength(opts.checkpointPayload);
86
88
  if (payloadBytes > QUOTA.maxCheckpointPayloadBytes) {
@@ -121,14 +123,16 @@ export class InMemoryDurableWorkspaceStore {
121
123
  }
122
124
  };
123
125
  workspace.checkpoints.push(checkpoint);
124
- this.opResults.set(opts.idempotencyKey, checkpoint);
126
+ this.opResults.set(opts.idempotencyKey, { kind: 'pause', runId: opts.handle.runId, sessionId: opts.handle.sessionId, workspaceRef: workspace.workspaceRef, result: checkpoint });
125
127
  return checkpoint;
126
128
  }
127
129
  async resumeWorkspace(opts) {
128
130
  throwIfAborted(opts.signal);
129
131
  const replay = this.opResults.get(opts.idempotencyKey);
130
- if (replay)
131
- return replay;
132
+ if (replay) {
133
+ assertReplayMatches(replay, 'resume', opts.runId, opts.sessionId);
134
+ return replay.result;
135
+ }
132
136
  const workspace = this.workspaces.get(opts.workspaceRef);
133
137
  if (!workspace || workspace.state === 'cleaned') {
134
138
  throw new WorkspaceError('Workspace not found.', { reason: 'not_found', workspace_ref: opts.workspaceRef });
@@ -148,14 +152,16 @@ export class InMemoryDurableWorkspaceStore {
148
152
  workspace.attempt = opts.attempt;
149
153
  workspace.updatedAt = new Date().toISOString();
150
154
  const handle = this.toHandle(workspace);
151
- this.opResults.set(opts.idempotencyKey, handle);
155
+ this.opResults.set(opts.idempotencyKey, { kind: 'resume', runId: opts.runId, sessionId: opts.sessionId, workspaceRef: workspace.workspaceRef, result: handle });
152
156
  return handle;
153
157
  }
154
158
  async abortWorkspace(opts) {
155
159
  throwIfAborted(opts.signal);
156
160
  const replay = this.opResults.get(opts.idempotencyKey);
157
- if (replay)
158
- return replay;
161
+ if (replay) {
162
+ assertReplayMatches(replay, 'abort', opts.runId, opts.sessionId);
163
+ return replay.result;
164
+ }
159
165
  const workspace = this.workspaces.get(opts.workspaceRef);
160
166
  if (!workspace || workspace.state === 'cleaned') {
161
167
  throw new WorkspaceError('Workspace not found.', { reason: 'not_found', workspace_ref: opts.workspaceRef });
@@ -165,31 +171,35 @@ export class InMemoryDurableWorkspaceStore {
165
171
  workspace.updatedAt = abortedAt;
166
172
  const cleanupEligibleAt = expiryFor('aborted', abortedAt);
167
173
  const result = { workspaceRef: opts.workspaceRef, state: 'aborted', abortedAt, ...(cleanupEligibleAt ? { cleanupEligibleAt } : {}) };
168
- this.opResults.set(opts.idempotencyKey, result);
174
+ this.opResults.set(opts.idempotencyKey, { kind: 'abort', runId: opts.runId, sessionId: opts.sessionId, workspaceRef: opts.workspaceRef, result });
169
175
  return result;
170
176
  }
171
177
  async cleanupWorkspace(opts) {
172
178
  throwIfAborted(opts.signal);
173
179
  const replay = this.opResults.get(opts.idempotencyKey);
174
180
  if (replay)
175
- return replay;
181
+ return replay.result;
176
182
  const workspace = this.workspaces.get(opts.workspaceRef);
177
183
  const completedAt = new Date().toISOString();
178
184
  // Cleanup is idempotent: an already-cleaned (or unknown) workspace returns a
179
185
  // terminal cleaned result rather than throwing.
180
186
  if (!workspace || workspace.state === 'cleaned') {
181
187
  const result = { workspaceRef: opts.workspaceRef, state: 'cleaned', completedAt, deletedBytes: 0, deletedFiles: 0 };
182
- this.opResults.set(opts.idempotencyKey, result);
188
+ this.opResults.set(opts.idempotencyKey, { kind: 'cleanup', runId: workspace?.runId ?? '', sessionId: workspace?.sessionId ?? '', workspaceRef: opts.workspaceRef, result });
183
189
  return result;
184
190
  }
185
191
  const deletedBytes = workspace.bytes;
186
192
  const deletedFiles = workspace.checkpoints.length;
193
+ const { runId, sessionId } = workspace;
187
194
  workspace.state = 'cleaned';
188
195
  workspace.updatedAt = completedAt;
189
196
  workspace.checkpoints = [];
190
197
  workspace.bytes = 0;
198
+ // A cleaned workspace keeps only its slim terminal record; idempotency
199
+ // entries referencing it are evicted so the store does not grow unbounded.
200
+ this.evictWorkspaceOps(opts.workspaceRef);
191
201
  const result = { workspaceRef: opts.workspaceRef, state: 'cleaned', completedAt, deletedBytes, deletedFiles };
192
- this.opResults.set(opts.idempotencyKey, result);
202
+ this.opResults.set(opts.idempotencyKey, { kind: 'cleanup', runId, sessionId, workspaceRef: opts.workspaceRef, result });
193
203
  return result;
194
204
  }
195
205
  async inspectWorkspace(opts) {
@@ -239,6 +249,16 @@ export class InMemoryDurableWorkspaceStore {
239
249
  }
240
250
  return found.workspaceRef;
241
251
  }
252
+ evictWorkspaceOps(workspaceRef) {
253
+ for (const [key, entry] of this.startKeys) {
254
+ if (entry.workspaceRef === workspaceRef)
255
+ this.startKeys.delete(key);
256
+ }
257
+ for (const [key, value] of this.opResults) {
258
+ if (value.workspaceRef === workspaceRef)
259
+ this.opResults.delete(key);
260
+ }
261
+ }
242
262
  requireLiveWorkspace(workspaceRef) {
243
263
  const workspace = this.workspaces.get(workspaceRef);
244
264
  if (!workspace || workspace.state === 'cleaned') {
@@ -250,6 +270,21 @@ export class InMemoryDurableWorkspaceStore {
250
270
  return workspace;
251
271
  }
252
272
  }
273
+ /**
274
+ * Guards a persisted-op replay: a stored entry may only replay when it belongs
275
+ * to the same operation kind and run/session identity, otherwise the reused key
276
+ * is an `idempotency_conflict` (spec 21 §9).
277
+ */
278
+ function assertReplayMatches(op, kind, runId, sessionId) {
279
+ if (op.kind !== kind || op.runId !== runId || op.sessionId !== sessionId) {
280
+ throw new WorkspaceError(`Workspace ${kind} idempotency key reused with a different operation or run/session.`, {
281
+ reason: 'idempotency_conflict',
282
+ workspace_ref: op.workspaceRef,
283
+ run_id: runId,
284
+ session_id: sessionId
285
+ });
286
+ }
287
+ }
253
288
  function throwIfAborted(signal) {
254
289
  if (signal?.aborted) {
255
290
  throw new OperationCancelledError('Workspace operation was cancelled.', { scope: 'workspace' });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@purista/harness",
3
- "version": "1.2.6",
3
+ "version": "1.5.0",
4
4
  "description": "Self-hosted enterprise agent harness for typed tools, agents, workflows, state, sandboxing, and telemetry.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -71,6 +71,7 @@
71
71
  "@modelcontextprotocol/sdk": "^1.29.0",
72
72
  "@opentelemetry/context-async-hooks": "^2.7.1",
73
73
  "@types/node": "^25.9.1",
74
+ "@vitest/coverage-v8": "^4.1.8",
74
75
  "just-bash": "^3.0.1",
75
76
  "typescript": "^6.0.3",
76
77
  "vitest": "^4.1.8"