@purista/harness 1.2.5 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +6 -0
  2. package/dist/agents/index.d.ts +7 -1
  3. package/dist/agents/index.js +59 -39
  4. package/dist/errors/catalog.d.ts +18 -2
  5. package/dist/errors/catalog.js +10 -0
  6. package/dist/eval/index.d.ts +3 -3
  7. package/dist/eval/index.js +15 -1
  8. package/dist/harness/defineHarness.d.ts +91 -1
  9. package/dist/harness/defineHarness.js +110 -1
  10. package/dist/index.d.ts +37 -17
  11. package/dist/index.js +30 -16
  12. package/dist/local/index.d.ts +36 -0
  13. package/dist/local/index.js +24 -0
  14. package/dist/local/local-sandbox.d.ts +25 -0
  15. package/dist/local/local-sandbox.js +368 -0
  16. package/dist/local/local-workspace.d.ts +56 -0
  17. package/dist/local/local-workspace.js +496 -0
  18. package/dist/local/ref-hash.d.ts +6 -0
  19. package/dist/local/ref-hash.js +9 -0
  20. package/dist/local/sqlite-storage.d.ts +106 -0
  21. package/dist/local/sqlite-storage.js +680 -0
  22. package/dist/models/adapter-utils.d.ts +52 -0
  23. package/dist/models/adapter-utils.js +81 -0
  24. package/dist/models/registry.d.ts +2 -1
  25. package/dist/models/registry.js +28 -37
  26. package/dist/models/stream-pump.d.ts +16 -0
  27. package/dist/models/stream-pump.js +77 -0
  28. package/dist/ports/base-model-provider.d.ts +7 -1
  29. package/dist/ports/base-model-provider.js +384 -87
  30. package/dist/ports/capabilities.d.ts +16 -2
  31. package/dist/ports/context-checkpoints.d.ts +63 -0
  32. package/dist/ports/context-checkpoints.js +33 -0
  33. package/dist/ports/index.d.ts +1 -0
  34. package/dist/ports/index.js +1 -0
  35. package/dist/ports/model-provider.d.ts +110 -0
  36. package/dist/runtime/durable.d.ts +11 -0
  37. package/dist/runtime/durable.js +15 -2
  38. package/dist/runtime/sessionDurable.js +47 -21
  39. package/dist/sessions/index.d.ts +17 -6
  40. package/dist/sessions/index.js +337 -81
  41. package/dist/skills/index.d.ts +0 -2
  42. package/dist/skills/index.js +0 -8
  43. package/dist/state/in-memory.js +6 -6
  44. package/dist/telemetry/shim.js +2 -6
  45. package/dist/telemetry/span-attrs.d.ts +9 -0
  46. package/dist/telemetry/span-attrs.js +27 -0
  47. package/dist/testing/durableWorkspaceStoreContract.js +69 -0
  48. package/dist/testing/fakeLogger.d.ts +29 -0
  49. package/dist/testing/fakeLogger.js +47 -0
  50. package/dist/testing/fakeSandbox.d.ts +27 -0
  51. package/dist/testing/fakeSandbox.js +153 -0
  52. package/dist/testing/fakeStateStore.d.ts +36 -0
  53. package/dist/testing/fakeStateStore.js +66 -0
  54. package/dist/testing/index.d.ts +10 -4
  55. package/dist/testing/index.js +14 -4
  56. package/dist/testing/loggerContract.d.ts +9 -0
  57. package/dist/testing/loggerContract.js +62 -0
  58. package/dist/testing/modelProviderContract.d.ts +12 -0
  59. package/dist/testing/modelProviderContract.js +222 -0
  60. package/dist/testing/recordEvents.d.ts +3 -0
  61. package/dist/testing/recordEvents.js +8 -0
  62. package/dist/testing/stateStoreContract.js +27 -0
  63. package/dist/tools/index.js +26 -1
  64. package/dist/tools/mcp/http.d.ts +2 -0
  65. package/dist/tools/mcp/http.js +34 -21
  66. package/dist/tools/mcp/runner.d.ts +4 -0
  67. package/dist/tools/mcp/runner.js +75 -21
  68. package/dist/tools/mcp/stdio.d.ts +7 -1
  69. package/dist/tools/mcp/stdio.js +102 -23
  70. package/dist/version.d.ts +1 -1
  71. package/dist/version.js +1 -1
  72. package/dist/workspace/in-memory.d.ts +1 -0
  73. package/dist/workspace/in-memory.js +47 -12
  74. package/package.json +2 -1
@@ -0,0 +1,62 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ const LEVELS = ['trace', 'debug', 'info', 'warn', 'error', 'fatal'];
3
+ const RFC3339 = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})$/;
4
+ function capturedRecords(logger) {
5
+ const records = logger.records;
6
+ return Array.isArray(records) ? records : undefined;
7
+ }
8
+ /**
9
+ * Shared contract for `Logger` implementations.
10
+ *
11
+ * Record-shape assertions require a capturing logger that exposes its emitted
12
+ * records via a `records` array (e.g. `FakeLogger`); non-capturing loggers are
13
+ * verified for the behavioral contract only.
14
+ */
15
+ export function loggerContract(make) {
16
+ describe('loggerContract', () => {
17
+ it('exposes every level method and none of them throw', () => {
18
+ const logger = make();
19
+ for (const level of LEVELS) {
20
+ expect(typeof logger[level]).toBe('function');
21
+ expect(() => logger[level](`${level} message`, { level })).not.toThrow();
22
+ }
23
+ });
24
+ it('child(bindings) returns a logger with the full level surface', () => {
25
+ const logger = make();
26
+ const child = logger.child({ component: 'contract' });
27
+ for (const level of LEVELS) {
28
+ expect(typeof child[level]).toBe('function');
29
+ }
30
+ expect(() => child.info('child message')).not.toThrow();
31
+ });
32
+ it('emits one record per level with an RFC3339 time when records are capturable', () => {
33
+ const logger = make();
34
+ const records = capturedRecords(logger);
35
+ if (!records)
36
+ return;
37
+ records.length = 0;
38
+ for (const level of LEVELS) {
39
+ logger[level](`${level} message`);
40
+ }
41
+ expect(records).toHaveLength(LEVELS.length);
42
+ for (const [index, level] of LEVELS.entries()) {
43
+ expect(records[index]?.level).toBe(level);
44
+ expect(records[index]?.msg).toBe(`${level} message`);
45
+ expect(String(records[index]?.time)).toMatch(RFC3339);
46
+ }
47
+ });
48
+ it('child bindings merge with and shadow parent bindings when records are capturable', () => {
49
+ const logger = make();
50
+ const records = capturedRecords(logger);
51
+ if (!records)
52
+ return;
53
+ const parent = logger.child({ scope: 'parent', keep: true });
54
+ const child = parent.child({ scope: 'child' });
55
+ const childRecords = capturedRecords(child) ?? records;
56
+ childRecords.length = 0;
57
+ child.info('bound message');
58
+ expect(childRecords).toHaveLength(1);
59
+ expect(childRecords[0]?.bindings).toMatchObject({ scope: 'child', keep: true });
60
+ });
61
+ });
62
+ }
@@ -0,0 +1,12 @@
1
+ import type { ModelCapability, ModelProvider } from '../ports/model-provider.js';
2
+ /**
3
+ * Shared provider contract for `ModelProvider` implementations.
4
+ *
5
+ * `make()` must return a provider wired to an offline (fake/mock) client whose
6
+ * scripted responses satisfy the requested capabilities: text content for
7
+ * `text`/`text_stream` and a JSON object matching `{ ok: boolean }` for
8
+ * `object`/`object_stream`.
9
+ */
10
+ export declare function modelProviderContract(make: () => ModelProvider, opts: {
11
+ capabilities: ModelCapability[];
12
+ }): void;
@@ -0,0 +1,222 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { OperationCancelledError } from '../errors/index.js';
3
+ const FINISH_REASONS = [
4
+ 'stop',
5
+ 'length',
6
+ 'context_limit',
7
+ 'tool_calls',
8
+ 'content_filter',
9
+ 'refusal',
10
+ 'pause',
11
+ 'malformed',
12
+ 'cancelled',
13
+ 'error'
14
+ ];
15
+ const METHOD_BY_CAPABILITY = {
16
+ text: 'text',
17
+ text_stream: 'textStream',
18
+ object: 'object',
19
+ object_stream: 'objectStream',
20
+ embeddings: 'embed',
21
+ rerank: 'rerank'
22
+ };
23
+ const CONTRACT_SCHEMA = {
24
+ type: 'object',
25
+ required: ['ok'],
26
+ properties: { ok: { type: 'boolean' } }
27
+ };
28
+ function signal() {
29
+ return new AbortController().signal;
30
+ }
31
+ function abortedSignal() {
32
+ const controller = new AbortController();
33
+ controller.abort();
34
+ return controller.signal;
35
+ }
36
+ /**
37
+ * Pre-aborted signals must reject. `BaseModelProvider` currently rethrows the
38
+ * raw abort reason at the entry point (before error normalization), so both
39
+ * the normalized `OperationCancelledError` and a raw `AbortError` satisfy the
40
+ * contract; mid-flight aborts always normalize to `OperationCancelledError`.
41
+ */
42
+ function expectAbortRejection(error) {
43
+ return error instanceof OperationCancelledError || (error instanceof Error && error.name === 'AbortError');
44
+ }
45
+ function expectUsage(usage) {
46
+ expect(usage.inputTokens).toBeGreaterThanOrEqual(0);
47
+ expect(usage.outputTokens).toBeGreaterThanOrEqual(0);
48
+ expect(usage.totalTokens).toBeGreaterThanOrEqual(0);
49
+ }
50
+ function expectOutcome(outcome, finishReason) {
51
+ if (!outcome)
52
+ return;
53
+ expect(outcome.finishReason).toBe(finishReason);
54
+ if (outcome.providerFinishReason !== undefined) {
55
+ expect(typeof outcome.providerFinishReason).toBe('string');
56
+ }
57
+ if (outcome.retryKind !== undefined) {
58
+ expect(['none', 'active', 'deferred']).toContain(outcome.retryKind);
59
+ }
60
+ }
61
+ /**
62
+ * Shared provider contract for `ModelProvider` implementations.
63
+ *
64
+ * `make()` must return a provider wired to an offline (fake/mock) client whose
65
+ * scripted responses satisfy the requested capabilities: text content for
66
+ * `text`/`text_stream` and a JSON object matching `{ ok: boolean }` for
67
+ * `object`/`object_stream`.
68
+ */
69
+ export function modelProviderContract(make, opts) {
70
+ const operations = opts.capabilities.filter((capability) => capability in METHOD_BY_CAPABILITY);
71
+ const has = (capability) => operations.includes(capability);
72
+ describe('modelProviderContract', () => {
73
+ it('reports stable provider identifiers', () => {
74
+ const provider = make();
75
+ expect(typeof provider.id).toBe('string');
76
+ expect(provider.id.length).toBeGreaterThan(0);
77
+ expect(typeof provider.genAiSystem).toBe('string');
78
+ expect(provider.genAiSystem.length).toBeGreaterThan(0);
79
+ });
80
+ it('implements a method for each claimed operation capability', () => {
81
+ const provider = make();
82
+ for (const capability of operations) {
83
+ expect(typeof provider[METHOD_BY_CAPABILITY[capability]]).toBe('function');
84
+ }
85
+ });
86
+ if (has('text')) {
87
+ it('text returns normalized content, usage, finish reason, and outcome shape', async () => {
88
+ const provider = make();
89
+ const response = await provider.text({
90
+ model: 'contract-model',
91
+ messages: [{ role: 'user', content: 'contract' }],
92
+ signal: signal()
93
+ });
94
+ expect(typeof response.content).toBe('string');
95
+ expectUsage(response.usage);
96
+ expect(FINISH_REASONS).toContain(response.finishReason);
97
+ expectOutcome(response.outcome, response.finishReason);
98
+ });
99
+ it('text rejects an already-aborted signal with a cancellation error', async () => {
100
+ const provider = make();
101
+ await expect(provider.text({
102
+ model: 'contract-model',
103
+ messages: [{ role: 'user', content: 'contract' }],
104
+ signal: abortedSignal()
105
+ })).rejects.toSatisfy(expectAbortRejection);
106
+ });
107
+ }
108
+ if (has('text_stream')) {
109
+ it('textStream yields valid chunk kinds and exactly one trailing finish', async () => {
110
+ const provider = make();
111
+ const chunks = [];
112
+ for await (const chunk of provider.textStream({
113
+ model: 'contract-model',
114
+ messages: [{ role: 'user', content: 'contract' }],
115
+ signal: signal()
116
+ })) {
117
+ chunks.push(chunk);
118
+ }
119
+ expect(chunks.length).toBeGreaterThan(0);
120
+ for (const chunk of chunks) {
121
+ expect(['delta', 'tool_call', 'finish']).toContain(chunk.kind);
122
+ }
123
+ const finishes = chunks.filter((chunk) => chunk.kind === 'finish');
124
+ expect(finishes).toHaveLength(1);
125
+ const finish = chunks.at(-1);
126
+ expect(finish?.kind).toBe('finish');
127
+ if (finish?.kind === 'finish') {
128
+ expectUsage(finish.usage);
129
+ expect(FINISH_REASONS).toContain(finish.finishReason);
130
+ expectOutcome(finish.outcome, finish.finishReason);
131
+ }
132
+ });
133
+ }
134
+ if (has('object')) {
135
+ it('object returns the structured object with normalized usage and outcome shape', async () => {
136
+ const provider = make();
137
+ const response = await provider.object({
138
+ model: 'contract-model',
139
+ messages: [{ role: 'user', content: 'contract' }],
140
+ schema: CONTRACT_SCHEMA,
141
+ signal: signal()
142
+ });
143
+ expect(response.object).not.toBeUndefined();
144
+ expectUsage(response.usage);
145
+ expect(FINISH_REASONS).toContain(response.finishReason);
146
+ expectOutcome(response.outcome, response.finishReason);
147
+ });
148
+ it('object rejects an already-aborted signal with a cancellation error', async () => {
149
+ const provider = make();
150
+ await expect(provider.object({
151
+ model: 'contract-model',
152
+ messages: [{ role: 'user', content: 'contract' }],
153
+ schema: CONTRACT_SCHEMA,
154
+ signal: abortedSignal()
155
+ })).rejects.toSatisfy(expectAbortRejection);
156
+ });
157
+ }
158
+ if (has('object_stream')) {
159
+ it('objectStream yields valid chunk kinds and a final object', async () => {
160
+ const provider = make();
161
+ const chunks = [];
162
+ for await (const chunk of provider.objectStream({
163
+ model: 'contract-model',
164
+ messages: [{ role: 'user', content: 'contract' }],
165
+ schema: CONTRACT_SCHEMA,
166
+ signal: signal()
167
+ })) {
168
+ chunks.push(chunk);
169
+ }
170
+ expect(chunks.length).toBeGreaterThan(0);
171
+ for (const chunk of chunks) {
172
+ expect(['partial', 'delta', 'tool_call', 'finish']).toContain(chunk.kind);
173
+ }
174
+ const finish = chunks.at(-1);
175
+ expect(finish?.kind).toBe('finish');
176
+ if (finish?.kind === 'finish') {
177
+ expect(finish.object).not.toBeUndefined();
178
+ expectUsage(finish.usage);
179
+ expect(FINISH_REASONS).toContain(finish.finishReason);
180
+ expectOutcome(finish.outcome, finish.finishReason);
181
+ }
182
+ });
183
+ }
184
+ if (has('embeddings')) {
185
+ it('embed returns one embedding per input', async () => {
186
+ const provider = make();
187
+ const response = await provider.embed({
188
+ model: 'contract-model',
189
+ input: ['alpha', 'beta'],
190
+ signal: signal()
191
+ });
192
+ expect(response.embeddings).toHaveLength(2);
193
+ for (const [index, embedding] of response.embeddings.entries()) {
194
+ expect(embedding.index).toBe(index);
195
+ expect(embedding.vector.length).toBeGreaterThan(0);
196
+ }
197
+ expectUsage(response.usage);
198
+ });
199
+ }
200
+ if (has('rerank')) {
201
+ it('rerank returns scores referencing submitted documents, sorted descending', async () => {
202
+ const provider = make();
203
+ const documents = [
204
+ { id: 'doc-1', text: 'alpha' },
205
+ { id: 'doc-2', text: 'beta' }
206
+ ];
207
+ const response = await provider.rerank({
208
+ model: 'contract-model',
209
+ query: 'contract',
210
+ documents,
211
+ signal: signal()
212
+ });
213
+ const ids = documents.map((document) => document.id);
214
+ for (const result of response.results) {
215
+ expect(ids).toContain(result.id);
216
+ }
217
+ const scores = response.results.map((result) => result.score);
218
+ expect([...scores].sort((a, b) => b - a)).toEqual(scores);
219
+ });
220
+ }
221
+ });
222
+ }
@@ -0,0 +1,3 @@
1
+ import type { RunEvent } from '../harness/defineHarness.js';
2
+ /** Collects every event from a run-event stream into an array. */
3
+ export declare function recordEvents(iter: AsyncIterable<RunEvent>): Promise<RunEvent[]>;
@@ -0,0 +1,8 @@
1
+ /** Collects every event from a run-event stream into an array. */
2
+ export async function recordEvents(iter) {
3
+ const events = [];
4
+ for await (const event of iter) {
5
+ events.push(event);
6
+ }
7
+ return events;
8
+ }
@@ -95,6 +95,33 @@ export function stateStoreContract(make) {
95
95
  expect.objectContaining({ id: '01EVT2' })
96
96
  ]);
97
97
  });
98
+ it('replaceMessages atomically replaces the history when supported', async () => {
99
+ const store = await make();
100
+ if (!store.replaceMessages)
101
+ return;
102
+ await store.appendMessages(session.id, [m1]);
103
+ await store.replaceMessages(session.id, [m2, m3]);
104
+ await expect(store.listMessages(session.id)).resolves.toEqual([m2, m3]);
105
+ });
106
+ it('getRun returns undefined for an unknown id', async () => {
107
+ const store = await make();
108
+ await expect(store.getRun('missing')).resolves.toBeUndefined();
109
+ });
110
+ it('listRuns honors limit', async () => {
111
+ const store = await make();
112
+ await store.createRun(run);
113
+ await store.createRun({ ...run, id: 'run_2', startedAt: '2026-01-01T00:00:05.000Z' });
114
+ await expect(store.listRuns(session.id, { limit: 1 })).resolves.toEqual([
115
+ expect.objectContaining({ id: 'run_2' })
116
+ ]);
117
+ });
118
+ it('listEvents honors limit', async () => {
119
+ const store = await make();
120
+ await store.appendEvents(run.id, [event, { ...event, id: '01EVT2' }]);
121
+ await expect(store.listEvents(run.id, { limit: 1 })).resolves.toEqual([
122
+ expect.objectContaining({ id: '01EVT' })
123
+ ]);
124
+ });
98
125
  it('duplicate message id throws StateError', async () => {
99
126
  const store = await make();
100
127
  await store.appendMessages(session.id, [m1]);
@@ -6,6 +6,18 @@ export const BUILTIN_TOOL_NAMES = ['bash', 'read', 'write', 'edit', 'glob', 'gre
6
6
  /** Per-file and total byte caps for the built-in `grep` read-and-match fallback. */
7
7
  const GREP_MAX_FILE_BYTES = 2_000_000;
8
8
  const GREP_MAX_TOTAL_BYTES = 50_000_000;
9
+ /** Maximum accepted length for a model-supplied `grep` pattern. */
10
+ const GREP_MAX_PATTERN_LENGTH = 1_000;
11
+ /**
12
+ * Matches a quantified group whose (paren-free) body contains an unbounded
13
+ * quantifier — the classic catastrophic-backtracking shapes such as `(x+)+`,
14
+ * `(x*)*`, or `(a+b){2,}`. The check is intentionally syntactic and
15
+ * conservative. Residual risk: ambiguous alternations like `(a|a)+` and
16
+ * quantifiers nested deeper than one group level still pass; the byte caps
17
+ * above bound the scanned input but cannot prevent a stalled event loop for
18
+ * adversarial patterns beyond this check.
19
+ */
20
+ const GREP_NESTED_UNBOUNDED_QUANTIFIER = /\((?:[^()\\]|\\.)*(?:[*+]|\{\d+,\})(?:[^()\\]|\\.)*\)(?:[*+]|\{\d+,\})/;
9
21
  export const BUILTIN_ALIAS_TO_CANONICAL = {
10
22
  bash: 'bash', Bash: 'bash',
11
23
  read: 'read', Read: 'read',
@@ -64,7 +76,8 @@ export async function invokeBuiltinTool(nameOrAlias, input, session, signal) {
64
76
  const count = content.split(parsed.old_string).length - 1;
65
77
  if (count !== 1)
66
78
  throw new ValidationError('edit requires exactly one match', { where: 'tool_input', issues: { path: parsed.path, matches: count } });
67
- await session.write(parsed.path, content.replace(parsed.old_string, parsed.new_string));
79
+ // Replacer function so `$&`, `$$`, `` $` `` etc. in new_string are written literally.
80
+ await session.write(parsed.path, content.replace(parsed.old_string, () => parsed.new_string));
68
81
  return { replaced: 1 };
69
82
  }
70
83
  case 'glob': {
@@ -74,6 +87,18 @@ export async function invokeBuiltinTool(nameOrAlias, input, session, signal) {
74
87
  }
75
88
  case 'grep': {
76
89
  const parsed = schemas.grep.input.parse(input);
90
+ if (parsed.pattern.length > GREP_MAX_PATTERN_LENGTH) {
91
+ throw new ValidationError('grep pattern exceeds the maximum supported length', {
92
+ where: 'tool_input',
93
+ issues: [{ path: 'pattern', message: `Pattern must be at most ${GREP_MAX_PATTERN_LENGTH} characters.` }]
94
+ });
95
+ }
96
+ if (GREP_NESTED_UNBOUNDED_QUANTIFIER.test(parsed.pattern)) {
97
+ throw new ValidationError('grep pattern contains a nested unbounded quantifier', {
98
+ where: 'tool_input',
99
+ issues: [{ path: 'pattern', message: 'Patterns like (x+)+ can cause catastrophic backtracking and are rejected.' }]
100
+ });
101
+ }
77
102
  let rx;
78
103
  try {
79
104
  rx = new RegExp(parsed.pattern);
@@ -1,2 +1,4 @@
1
1
  import type { ResolvedMcpHttpTool, McpTransportRunner } from './runner.js';
2
2
  export declare function createHttpMcpTransportRunner(config: ResolvedMcpHttpTool): McpTransportRunner;
3
+ /** Exported for tests. Extracts an HTTP status from structured fields or explicit status phrasing only. */
4
+ export declare function statusFromError(error: unknown): number | undefined;
@@ -3,24 +3,32 @@ import { withMcpTimeout } from './runner.js';
3
3
  export function createHttpMcpTransportRunner(config) {
4
4
  let connected;
5
5
  async function connect(options) {
6
- connected ??= (async () => {
7
- const [{ Client }, { StreamableHTTPClientTransport }] = await Promise.all([
8
- import('@modelcontextprotocol/sdk/client/index.js'),
9
- import('@modelcontextprotocol/sdk/client/streamableHttp.js')
10
- ]);
11
- const transport = new StreamableHTTPClientTransport(new URL(config.url), {
12
- requestInit: { headers: buildHeaders(config.headers, config.auth) }
6
+ if (!connected) {
7
+ const promise = (async () => {
8
+ const [{ Client }, { StreamableHTTPClientTransport }] = await Promise.all([
9
+ import('@modelcontextprotocol/sdk/client/index.js'),
10
+ import('@modelcontextprotocol/sdk/client/streamableHttp.js')
11
+ ]);
12
+ const transport = new StreamableHTTPClientTransport(new URL(config.url), {
13
+ requestInit: { headers: buildHeaders(config.headers, config.auth) }
14
+ });
15
+ const client = new Client({ name: `purista-harness-${config.localToolId}`, version: '0.0.0' });
16
+ try {
17
+ await client.connect(transport, toSdkOptions(options));
18
+ }
19
+ catch (error) {
20
+ throw mapHttpError(config, 'connect', error);
21
+ }
22
+ return { client, transport };
23
+ })();
24
+ // Never cache a rejected connection (import or connect failure); the
25
+ // next call must retry from scratch.
26
+ void promise.catch(() => {
27
+ if (connected === promise)
28
+ connected = undefined;
13
29
  });
14
- const client = new Client({ name: `purista-harness-${config.localToolId}`, version: '0.0.0' });
15
- try {
16
- await client.connect(transport, toSdkOptions(options));
17
- }
18
- catch (error) {
19
- connected = undefined;
20
- throw mapHttpError(config, 'connect', error);
21
- }
22
- return { client, transport };
23
- })();
30
+ connected = promise;
31
+ }
24
32
  return connected;
25
33
  }
26
34
  return {
@@ -53,8 +61,10 @@ export function createHttpMcpTransportRunner(config) {
53
61
  return;
54
62
  const current = await connected.catch(() => undefined);
55
63
  connected = undefined;
56
- await current?.transport.close();
57
- await current?.client.close();
64
+ if (!current)
65
+ return;
66
+ // Client first per SDK guidance; close both even when one throws.
67
+ await Promise.allSettled([current.client.close(), current.transport.close()]);
58
68
  }
59
69
  };
60
70
  }
@@ -87,7 +97,8 @@ function mapHttpError(config, phase, error) {
87
97
  }
88
98
  return new McpProtocolError('MCP HTTP transport failed.', { tool_id: config.localToolId, transport: 'http', phase }, error);
89
99
  }
90
- function statusFromError(error) {
100
+ /** Exported for tests. Extracts an HTTP status from structured fields or explicit status phrasing only. */
101
+ export function statusFromError(error) {
91
102
  if (typeof error === 'object' && error !== null) {
92
103
  const maybe = error;
93
104
  if (typeof maybe.status === 'number')
@@ -101,7 +112,9 @@ function statusFromError(error) {
101
112
  if (error instanceof Error) {
102
113
  if (/unauthorized/i.test(error.message))
103
114
  return 401;
104
- const match = /\b(401|403|4\d\d|5\d\d)\b/.exec(error.message);
115
+ // Only trust explicit "HTTP 503" / "status: 503" phrasing — a bare number
116
+ // in a message (e.g. "took 401ms") must not classify as an HTTP status.
117
+ const match = /HTTP (\d{3})/.exec(error.message) ?? /status[: ] ?(\d{3})/i.exec(error.message);
105
118
  if (match?.[1])
106
119
  return Number(match[1]);
107
120
  }
@@ -12,6 +12,8 @@ export interface ResolvedMcpTool {
12
12
  upstreamToolName: string;
13
13
  timeoutMs: number;
14
14
  serverKey: string;
15
+ /** Sandbox binding key (session id) used to evict session-scoped runners. */
16
+ sandboxKey?: string;
15
17
  inputAdapter?: (input: unknown) => unknown;
16
18
  outputAdapter?: (output: unknown) => unknown;
17
19
  }
@@ -49,6 +51,8 @@ export interface McpTransportRunner {
49
51
  }
50
52
  export interface McpRunnerRegistry {
51
53
  getRunner(config: ResolvedMcpToolConfig): McpTransportRunner;
54
+ /** Closes and evicts every runner bound to the given sandbox key (e.g. when a session closes). */
55
+ closeForSandboxKey(sandboxKey: string): Promise<void>;
52
56
  close(): Promise<void>;
53
57
  }
54
58
  export interface McpFacadeContext {
@@ -3,23 +3,39 @@ import { assertMcpJsonSchema, validateMcpJsonSchema } from './schema.js';
3
3
  const discoveredCache = new WeakMap();
4
4
  export async function getMcpToolSpecs(tools, allowlist, ctx = {}) {
5
5
  const allowed = new Set(allowlist);
6
- const registry = ctx.registry ?? createMcpRunnerRegistry();
7
- const specs = await Promise.all(Object.entries(tools).map(async ([toolId, tool]) => {
8
- if (!allowed.has(toolId) || !isMcpToolDefinition(tool))
9
- return undefined;
10
- const config = resolveMcpTool(toolId, tool, ctx);
11
- return getResolvedModelToolSpec(config, registry.getRunner(config), ctx.signal, ctx.warn);
12
- }));
13
- return specs.filter((spec) => spec !== undefined);
6
+ // An ad-hoc registry can spawn persistent server processes; it must be
7
+ // closed before returning so direct library callers do not leak processes.
8
+ const localRegistry = ctx.registry ? undefined : createMcpRunnerRegistry();
9
+ const registry = ctx.registry ?? localRegistry;
10
+ try {
11
+ const specs = await Promise.all(Object.entries(tools).map(async ([toolId, tool]) => {
12
+ if (!allowed.has(toolId) || !isMcpToolDefinition(tool))
13
+ return undefined;
14
+ const config = resolveMcpTool(toolId, tool, ctx);
15
+ return getResolvedModelToolSpec(config, registry.getRunner(config), ctx.signal, ctx.warn);
16
+ }));
17
+ return specs.filter((spec) => spec !== undefined);
18
+ }
19
+ finally {
20
+ await localRegistry?.close();
21
+ }
14
22
  }
15
23
  export async function invokeMcpTool(first, second, input, fourth) {
16
24
  if (typeof first === 'string') {
17
25
  if (!isMcpToolDefinition(second))
18
26
  throw new ToolNotFoundError('Tool is not an MCP tool.', { tool_id: first, where: 'registry' });
19
27
  const ctx = isAbortSignal(fourth) ? { signal: fourth } : fourth ?? {};
20
- const registry = ctx.registry ?? createMcpRunnerRegistry();
28
+ // An ad-hoc registry can spawn persistent server processes; it must be
29
+ // closed before returning so direct library callers do not leak processes.
30
+ const localRegistry = ctx.registry ? undefined : createMcpRunnerRegistry();
31
+ const registry = ctx.registry ?? localRegistry;
21
32
  const config = resolveMcpTool(first, second, ctx);
22
- return invokeResolvedMcpTool(config, registry.getRunner(config), input, ctx.signal, ctx.warn);
33
+ try {
34
+ return await invokeResolvedMcpTool(config, registry.getRunner(config), input, ctx.signal, ctx.warn);
35
+ }
36
+ finally {
37
+ await localRegistry?.close();
38
+ }
23
39
  }
24
40
  return invokeResolvedMcpTool(first, second, input, isAbortSignal(fourth) ? fourth : fourth?.signal, isAbortSignal(fourth) ? undefined : fourth?.warn);
25
41
  }
@@ -30,18 +46,32 @@ export function createMcpRunnerRegistry() {
30
46
  const runners = new Map();
31
47
  return {
32
48
  getRunner(config) {
33
- const existing = runners.get(config.localToolId);
49
+ // Keyed by serverKey: a stdio runner binds a concrete sandbox session, so
50
+ // two sessions must never share one runner even for the same tool id.
51
+ const key = config.serverKey || config.localToolId;
52
+ const existing = runners.get(key);
34
53
  if (existing)
35
- return existing;
54
+ return existing.runner;
36
55
  const runner = config.kind === 'mcp_stdio'
37
56
  ? createDynamicStdioRunner(config)
38
57
  : createDynamicHttpRunner(config);
39
- runners.set(config.localToolId, runner);
58
+ runners.set(key, { runner, ...(config.sandboxKey !== undefined ? { sandboxKey: config.sandboxKey } : {}) });
40
59
  return runner;
41
60
  },
61
+ async closeForSandboxKey(sandboxKey) {
62
+ const evicted = [];
63
+ for (const [key, entry] of runners) {
64
+ if (entry.sandboxKey === sandboxKey) {
65
+ runners.delete(key);
66
+ evicted.push(entry.runner);
67
+ }
68
+ }
69
+ await Promise.all(evicted.map((runner) => runner.close()));
70
+ },
42
71
  async close() {
43
- await Promise.all([...runners.values()].map((runner) => runner.close()));
72
+ const open = [...runners.values()].map((entry) => entry.runner);
44
73
  runners.clear();
74
+ await Promise.all(open.map((runner) => runner.close()));
45
75
  }
46
76
  };
47
77
  }
@@ -73,7 +103,7 @@ async function invokeResolvedMcpTool(config, runner, input, signal, warn) {
73
103
  const validatedInput = validateMcpJsonSchema({ toolId: config.localToolId, where: 'mcp_input', schema: tool.inputSchema, value: adaptedInput, ...(warn ? { warn } : {}) });
74
104
  const result = await runner.callTool(config.upstreamToolName, validatedInput, { ...(signal ? { signal } : {}), timeoutMs: config.timeoutMs });
75
105
  if (isRecord(result) && result.isError === true) {
76
- throw new ToolError('MCP tool returned an error.', { tool_id: config.localToolId, tool_kind: config.kind });
106
+ throw new ToolError(`MCP tool returned an error.${describeMcpErrorResult(result)}`, { tool_id: config.localToolId, tool_kind: config.kind });
77
107
  }
78
108
  const normalized = normalizeMcpOutput(result);
79
109
  const validatedOutput = tool.outputSchema
@@ -128,6 +158,7 @@ function resolveMcpTool(toolId, tool, ctx) {
128
158
  ...base,
129
159
  kind: 'mcp_stdio',
130
160
  serverKey: `${toolId}:${ctx.sandboxKey ?? 'sandbox'}`,
161
+ sandboxKey: ctx.sandboxKey ?? 'sandbox',
131
162
  command: tool.command,
132
163
  ...(tool.args ? { args: tool.args } : {}),
133
164
  ...(tool.env ? { env: tool.env } : {}),
@@ -148,25 +179,48 @@ export function isMcpToolDefinition(tool) {
148
179
  }
149
180
  function createDynamicStdioRunner(config) {
150
181
  let runnerPromise;
151
- return dynamicRunner(() => {
152
- runnerPromise ??= import('./stdio.js').then((module) => module.createStdioMcpTransportRunner(config));
182
+ const runner = dynamicRunner(() => {
183
+ runnerPromise ??= import('./stdio.js').then((module) => module.createStdioMcpTransportRunner(config, {
184
+ // A respawned server may expose a different tool list; drop the memoized
185
+ // discovery whenever the persistent server process is discarded.
186
+ onReset: () => { discoveredCache.delete(runner); }
187
+ }));
153
188
  return runnerPromise;
154
- });
189
+ }, () => runnerPromise);
190
+ return runner;
155
191
  }
156
192
  function createDynamicHttpRunner(config) {
157
193
  let runnerPromise;
158
194
  return dynamicRunner(() => {
159
195
  runnerPromise ??= import('./http.js').then((module) => module.createHttpMcpTransportRunner(config));
160
196
  return runnerPromise;
161
- });
197
+ }, () => runnerPromise);
162
198
  }
163
- function dynamicRunner(load) {
199
+ function dynamicRunner(load, peek) {
164
200
  return {
165
201
  async listTools(options) { return (await load()).listTools(options); },
166
202
  async callTool(name, input, options) { return (await load()).callTool(name, input, options); },
167
- async close() { await (await load()).close(); }
203
+ async close() {
204
+ // Never trigger a fresh transport load just to close it, and never let
205
+ // an earlier load failure escape from registry shutdown.
206
+ const pending = peek();
207
+ if (!pending)
208
+ return;
209
+ const loaded = await pending.catch(() => undefined);
210
+ await loaded?.close();
211
+ }
168
212
  };
169
213
  }
214
+ /** Renders a short, truncated description of an MCP `isError` result for error messages. */
215
+ function describeMcpErrorResult(result) {
216
+ const normalized = normalizeMcpOutput(result);
217
+ if (normalized === null)
218
+ return '';
219
+ const text = typeof normalized === 'string' ? normalized : JSON.stringify(normalized);
220
+ if (!text)
221
+ return '';
222
+ return ` ${text.slice(0, 512)}${text.length > 512 ? '…' : ''}`;
223
+ }
170
224
  function normalizeContentBlock(block) {
171
225
  if (!isRecord(block))
172
226
  return null;