@purista/harness 1.2.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/agents/index.d.ts +7 -1
- package/dist/agents/index.js +59 -39
- package/dist/errors/catalog.d.ts +18 -2
- package/dist/errors/catalog.js +10 -0
- package/dist/eval/index.d.ts +3 -3
- package/dist/eval/index.js +15 -1
- package/dist/harness/defineHarness.d.ts +91 -1
- package/dist/harness/defineHarness.js +110 -1
- package/dist/index.d.ts +37 -17
- package/dist/index.js +30 -16
- package/dist/local/index.d.ts +36 -0
- package/dist/local/index.js +24 -0
- package/dist/local/local-sandbox.d.ts +25 -0
- package/dist/local/local-sandbox.js +368 -0
- package/dist/local/local-workspace.d.ts +56 -0
- package/dist/local/local-workspace.js +496 -0
- package/dist/local/ref-hash.d.ts +6 -0
- package/dist/local/ref-hash.js +9 -0
- package/dist/local/sqlite-storage.d.ts +106 -0
- package/dist/local/sqlite-storage.js +680 -0
- package/dist/models/adapter-utils.d.ts +52 -0
- package/dist/models/adapter-utils.js +81 -0
- package/dist/models/registry.d.ts +2 -1
- package/dist/models/registry.js +28 -37
- package/dist/models/stream-pump.d.ts +16 -0
- package/dist/models/stream-pump.js +77 -0
- package/dist/ports/base-model-provider.d.ts +7 -1
- package/dist/ports/base-model-provider.js +384 -87
- package/dist/ports/capabilities.d.ts +16 -2
- package/dist/ports/context-checkpoints.d.ts +63 -0
- package/dist/ports/context-checkpoints.js +33 -0
- package/dist/ports/index.d.ts +1 -0
- package/dist/ports/index.js +1 -0
- package/dist/ports/model-provider.d.ts +110 -0
- package/dist/runtime/durable.d.ts +11 -0
- package/dist/runtime/durable.js +15 -2
- package/dist/runtime/sessionDurable.js +47 -21
- package/dist/sessions/index.d.ts +17 -6
- package/dist/sessions/index.js +337 -81
- package/dist/skills/index.d.ts +0 -2
- package/dist/skills/index.js +0 -8
- package/dist/state/in-memory.js +6 -6
- package/dist/telemetry/shim.js +2 -6
- package/dist/telemetry/span-attrs.d.ts +9 -0
- package/dist/telemetry/span-attrs.js +27 -0
- package/dist/testing/durableWorkspaceStoreContract.js +69 -0
- package/dist/testing/fakeLogger.d.ts +29 -0
- package/dist/testing/fakeLogger.js +47 -0
- package/dist/testing/fakeSandbox.d.ts +27 -0
- package/dist/testing/fakeSandbox.js +153 -0
- package/dist/testing/fakeStateStore.d.ts +36 -0
- package/dist/testing/fakeStateStore.js +66 -0
- package/dist/testing/index.d.ts +10 -4
- package/dist/testing/index.js +14 -4
- package/dist/testing/loggerContract.d.ts +9 -0
- package/dist/testing/loggerContract.js +62 -0
- package/dist/testing/modelProviderContract.d.ts +12 -0
- package/dist/testing/modelProviderContract.js +222 -0
- package/dist/testing/recordEvents.d.ts +3 -0
- package/dist/testing/recordEvents.js +8 -0
- package/dist/testing/stateStoreContract.js +27 -0
- package/dist/tools/index.js +26 -1
- package/dist/tools/mcp/http.d.ts +2 -0
- package/dist/tools/mcp/http.js +34 -21
- package/dist/tools/mcp/runner.d.ts +4 -0
- package/dist/tools/mcp/runner.js +75 -21
- package/dist/tools/mcp/stdio.d.ts +7 -1
- package/dist/tools/mcp/stdio.js +102 -23
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workspace/in-memory.d.ts +1 -0
- package/dist/workspace/in-memory.js +47 -12
- package/package.json +2 -1
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
const LEVELS = ['trace', 'debug', 'info', 'warn', 'error', 'fatal'];
|
|
3
|
+
const RFC3339 = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})$/;
|
|
4
|
+
function capturedRecords(logger) {
|
|
5
|
+
const records = logger.records;
|
|
6
|
+
return Array.isArray(records) ? records : undefined;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Shared contract for `Logger` implementations.
|
|
10
|
+
*
|
|
11
|
+
* Record-shape assertions require a capturing logger that exposes its emitted
|
|
12
|
+
* records via a `records` array (e.g. `FakeLogger`); non-capturing loggers are
|
|
13
|
+
* verified for the behavioral contract only.
|
|
14
|
+
*/
|
|
15
|
+
export function loggerContract(make) {
|
|
16
|
+
describe('loggerContract', () => {
|
|
17
|
+
it('exposes every level method and none of them throw', () => {
|
|
18
|
+
const logger = make();
|
|
19
|
+
for (const level of LEVELS) {
|
|
20
|
+
expect(typeof logger[level]).toBe('function');
|
|
21
|
+
expect(() => logger[level](`${level} message`, { level })).not.toThrow();
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
it('child(bindings) returns a logger with the full level surface', () => {
|
|
25
|
+
const logger = make();
|
|
26
|
+
const child = logger.child({ component: 'contract' });
|
|
27
|
+
for (const level of LEVELS) {
|
|
28
|
+
expect(typeof child[level]).toBe('function');
|
|
29
|
+
}
|
|
30
|
+
expect(() => child.info('child message')).not.toThrow();
|
|
31
|
+
});
|
|
32
|
+
it('emits one record per level with an RFC3339 time when records are capturable', () => {
|
|
33
|
+
const logger = make();
|
|
34
|
+
const records = capturedRecords(logger);
|
|
35
|
+
if (!records)
|
|
36
|
+
return;
|
|
37
|
+
records.length = 0;
|
|
38
|
+
for (const level of LEVELS) {
|
|
39
|
+
logger[level](`${level} message`);
|
|
40
|
+
}
|
|
41
|
+
expect(records).toHaveLength(LEVELS.length);
|
|
42
|
+
for (const [index, level] of LEVELS.entries()) {
|
|
43
|
+
expect(records[index]?.level).toBe(level);
|
|
44
|
+
expect(records[index]?.msg).toBe(`${level} message`);
|
|
45
|
+
expect(String(records[index]?.time)).toMatch(RFC3339);
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
it('child bindings merge with and shadow parent bindings when records are capturable', () => {
|
|
49
|
+
const logger = make();
|
|
50
|
+
const records = capturedRecords(logger);
|
|
51
|
+
if (!records)
|
|
52
|
+
return;
|
|
53
|
+
const parent = logger.child({ scope: 'parent', keep: true });
|
|
54
|
+
const child = parent.child({ scope: 'child' });
|
|
55
|
+
const childRecords = capturedRecords(child) ?? records;
|
|
56
|
+
childRecords.length = 0;
|
|
57
|
+
child.info('bound message');
|
|
58
|
+
expect(childRecords).toHaveLength(1);
|
|
59
|
+
expect(childRecords[0]?.bindings).toMatchObject({ scope: 'child', keep: true });
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ModelCapability, ModelProvider } from '../ports/model-provider.js';
|
|
2
|
+
/**
|
|
3
|
+
* Shared provider contract for `ModelProvider` implementations.
|
|
4
|
+
*
|
|
5
|
+
* `make()` must return a provider wired to an offline (fake/mock) client whose
|
|
6
|
+
* scripted responses satisfy the requested capabilities: text content for
|
|
7
|
+
* `text`/`text_stream` and a JSON object matching `{ ok: boolean }` for
|
|
8
|
+
* `object`/`object_stream`.
|
|
9
|
+
*/
|
|
10
|
+
export declare function modelProviderContract(make: () => ModelProvider, opts: {
|
|
11
|
+
capabilities: ModelCapability[];
|
|
12
|
+
}): void;
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { OperationCancelledError } from '../errors/index.js';
|
|
3
|
+
const FINISH_REASONS = [
|
|
4
|
+
'stop',
|
|
5
|
+
'length',
|
|
6
|
+
'context_limit',
|
|
7
|
+
'tool_calls',
|
|
8
|
+
'content_filter',
|
|
9
|
+
'refusal',
|
|
10
|
+
'pause',
|
|
11
|
+
'malformed',
|
|
12
|
+
'cancelled',
|
|
13
|
+
'error'
|
|
14
|
+
];
|
|
15
|
+
const METHOD_BY_CAPABILITY = {
|
|
16
|
+
text: 'text',
|
|
17
|
+
text_stream: 'textStream',
|
|
18
|
+
object: 'object',
|
|
19
|
+
object_stream: 'objectStream',
|
|
20
|
+
embeddings: 'embed',
|
|
21
|
+
rerank: 'rerank'
|
|
22
|
+
};
|
|
23
|
+
const CONTRACT_SCHEMA = {
|
|
24
|
+
type: 'object',
|
|
25
|
+
required: ['ok'],
|
|
26
|
+
properties: { ok: { type: 'boolean' } }
|
|
27
|
+
};
|
|
28
|
+
function signal() {
|
|
29
|
+
return new AbortController().signal;
|
|
30
|
+
}
|
|
31
|
+
function abortedSignal() {
|
|
32
|
+
const controller = new AbortController();
|
|
33
|
+
controller.abort();
|
|
34
|
+
return controller.signal;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Pre-aborted signals must reject. `BaseModelProvider` currently rethrows the
|
|
38
|
+
* raw abort reason at the entry point (before error normalization), so both
|
|
39
|
+
* the normalized `OperationCancelledError` and a raw `AbortError` satisfy the
|
|
40
|
+
* contract; mid-flight aborts always normalize to `OperationCancelledError`.
|
|
41
|
+
*/
|
|
42
|
+
function expectAbortRejection(error) {
|
|
43
|
+
return error instanceof OperationCancelledError || (error instanceof Error && error.name === 'AbortError');
|
|
44
|
+
}
|
|
45
|
+
function expectUsage(usage) {
|
|
46
|
+
expect(usage.inputTokens).toBeGreaterThanOrEqual(0);
|
|
47
|
+
expect(usage.outputTokens).toBeGreaterThanOrEqual(0);
|
|
48
|
+
expect(usage.totalTokens).toBeGreaterThanOrEqual(0);
|
|
49
|
+
}
|
|
50
|
+
function expectOutcome(outcome, finishReason) {
|
|
51
|
+
if (!outcome)
|
|
52
|
+
return;
|
|
53
|
+
expect(outcome.finishReason).toBe(finishReason);
|
|
54
|
+
if (outcome.providerFinishReason !== undefined) {
|
|
55
|
+
expect(typeof outcome.providerFinishReason).toBe('string');
|
|
56
|
+
}
|
|
57
|
+
if (outcome.retryKind !== undefined) {
|
|
58
|
+
expect(['none', 'active', 'deferred']).toContain(outcome.retryKind);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Shared provider contract for `ModelProvider` implementations.
|
|
63
|
+
*
|
|
64
|
+
* `make()` must return a provider wired to an offline (fake/mock) client whose
|
|
65
|
+
* scripted responses satisfy the requested capabilities: text content for
|
|
66
|
+
* `text`/`text_stream` and a JSON object matching `{ ok: boolean }` for
|
|
67
|
+
* `object`/`object_stream`.
|
|
68
|
+
*/
|
|
69
|
+
export function modelProviderContract(make, opts) {
|
|
70
|
+
const operations = opts.capabilities.filter((capability) => capability in METHOD_BY_CAPABILITY);
|
|
71
|
+
const has = (capability) => operations.includes(capability);
|
|
72
|
+
describe('modelProviderContract', () => {
|
|
73
|
+
it('reports stable provider identifiers', () => {
|
|
74
|
+
const provider = make();
|
|
75
|
+
expect(typeof provider.id).toBe('string');
|
|
76
|
+
expect(provider.id.length).toBeGreaterThan(0);
|
|
77
|
+
expect(typeof provider.genAiSystem).toBe('string');
|
|
78
|
+
expect(provider.genAiSystem.length).toBeGreaterThan(0);
|
|
79
|
+
});
|
|
80
|
+
it('implements a method for each claimed operation capability', () => {
|
|
81
|
+
const provider = make();
|
|
82
|
+
for (const capability of operations) {
|
|
83
|
+
expect(typeof provider[METHOD_BY_CAPABILITY[capability]]).toBe('function');
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
if (has('text')) {
|
|
87
|
+
it('text returns normalized content, usage, finish reason, and outcome shape', async () => {
|
|
88
|
+
const provider = make();
|
|
89
|
+
const response = await provider.text({
|
|
90
|
+
model: 'contract-model',
|
|
91
|
+
messages: [{ role: 'user', content: 'contract' }],
|
|
92
|
+
signal: signal()
|
|
93
|
+
});
|
|
94
|
+
expect(typeof response.content).toBe('string');
|
|
95
|
+
expectUsage(response.usage);
|
|
96
|
+
expect(FINISH_REASONS).toContain(response.finishReason);
|
|
97
|
+
expectOutcome(response.outcome, response.finishReason);
|
|
98
|
+
});
|
|
99
|
+
it('text rejects an already-aborted signal with a cancellation error', async () => {
|
|
100
|
+
const provider = make();
|
|
101
|
+
await expect(provider.text({
|
|
102
|
+
model: 'contract-model',
|
|
103
|
+
messages: [{ role: 'user', content: 'contract' }],
|
|
104
|
+
signal: abortedSignal()
|
|
105
|
+
})).rejects.toSatisfy(expectAbortRejection);
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
if (has('text_stream')) {
|
|
109
|
+
it('textStream yields valid chunk kinds and exactly one trailing finish', async () => {
|
|
110
|
+
const provider = make();
|
|
111
|
+
const chunks = [];
|
|
112
|
+
for await (const chunk of provider.textStream({
|
|
113
|
+
model: 'contract-model',
|
|
114
|
+
messages: [{ role: 'user', content: 'contract' }],
|
|
115
|
+
signal: signal()
|
|
116
|
+
})) {
|
|
117
|
+
chunks.push(chunk);
|
|
118
|
+
}
|
|
119
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
120
|
+
for (const chunk of chunks) {
|
|
121
|
+
expect(['delta', 'tool_call', 'finish']).toContain(chunk.kind);
|
|
122
|
+
}
|
|
123
|
+
const finishes = chunks.filter((chunk) => chunk.kind === 'finish');
|
|
124
|
+
expect(finishes).toHaveLength(1);
|
|
125
|
+
const finish = chunks.at(-1);
|
|
126
|
+
expect(finish?.kind).toBe('finish');
|
|
127
|
+
if (finish?.kind === 'finish') {
|
|
128
|
+
expectUsage(finish.usage);
|
|
129
|
+
expect(FINISH_REASONS).toContain(finish.finishReason);
|
|
130
|
+
expectOutcome(finish.outcome, finish.finishReason);
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
if (has('object')) {
|
|
135
|
+
it('object returns the structured object with normalized usage and outcome shape', async () => {
|
|
136
|
+
const provider = make();
|
|
137
|
+
const response = await provider.object({
|
|
138
|
+
model: 'contract-model',
|
|
139
|
+
messages: [{ role: 'user', content: 'contract' }],
|
|
140
|
+
schema: CONTRACT_SCHEMA,
|
|
141
|
+
signal: signal()
|
|
142
|
+
});
|
|
143
|
+
expect(response.object).not.toBeUndefined();
|
|
144
|
+
expectUsage(response.usage);
|
|
145
|
+
expect(FINISH_REASONS).toContain(response.finishReason);
|
|
146
|
+
expectOutcome(response.outcome, response.finishReason);
|
|
147
|
+
});
|
|
148
|
+
it('object rejects an already-aborted signal with a cancellation error', async () => {
|
|
149
|
+
const provider = make();
|
|
150
|
+
await expect(provider.object({
|
|
151
|
+
model: 'contract-model',
|
|
152
|
+
messages: [{ role: 'user', content: 'contract' }],
|
|
153
|
+
schema: CONTRACT_SCHEMA,
|
|
154
|
+
signal: abortedSignal()
|
|
155
|
+
})).rejects.toSatisfy(expectAbortRejection);
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
if (has('object_stream')) {
|
|
159
|
+
it('objectStream yields valid chunk kinds and a final object', async () => {
|
|
160
|
+
const provider = make();
|
|
161
|
+
const chunks = [];
|
|
162
|
+
for await (const chunk of provider.objectStream({
|
|
163
|
+
model: 'contract-model',
|
|
164
|
+
messages: [{ role: 'user', content: 'contract' }],
|
|
165
|
+
schema: CONTRACT_SCHEMA,
|
|
166
|
+
signal: signal()
|
|
167
|
+
})) {
|
|
168
|
+
chunks.push(chunk);
|
|
169
|
+
}
|
|
170
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
171
|
+
for (const chunk of chunks) {
|
|
172
|
+
expect(['partial', 'delta', 'tool_call', 'finish']).toContain(chunk.kind);
|
|
173
|
+
}
|
|
174
|
+
const finish = chunks.at(-1);
|
|
175
|
+
expect(finish?.kind).toBe('finish');
|
|
176
|
+
if (finish?.kind === 'finish') {
|
|
177
|
+
expect(finish.object).not.toBeUndefined();
|
|
178
|
+
expectUsage(finish.usage);
|
|
179
|
+
expect(FINISH_REASONS).toContain(finish.finishReason);
|
|
180
|
+
expectOutcome(finish.outcome, finish.finishReason);
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
if (has('embeddings')) {
|
|
185
|
+
it('embed returns one embedding per input', async () => {
|
|
186
|
+
const provider = make();
|
|
187
|
+
const response = await provider.embed({
|
|
188
|
+
model: 'contract-model',
|
|
189
|
+
input: ['alpha', 'beta'],
|
|
190
|
+
signal: signal()
|
|
191
|
+
});
|
|
192
|
+
expect(response.embeddings).toHaveLength(2);
|
|
193
|
+
for (const [index, embedding] of response.embeddings.entries()) {
|
|
194
|
+
expect(embedding.index).toBe(index);
|
|
195
|
+
expect(embedding.vector.length).toBeGreaterThan(0);
|
|
196
|
+
}
|
|
197
|
+
expectUsage(response.usage);
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
if (has('rerank')) {
|
|
201
|
+
it('rerank returns scores referencing submitted documents, sorted descending', async () => {
|
|
202
|
+
const provider = make();
|
|
203
|
+
const documents = [
|
|
204
|
+
{ id: 'doc-1', text: 'alpha' },
|
|
205
|
+
{ id: 'doc-2', text: 'beta' }
|
|
206
|
+
];
|
|
207
|
+
const response = await provider.rerank({
|
|
208
|
+
model: 'contract-model',
|
|
209
|
+
query: 'contract',
|
|
210
|
+
documents,
|
|
211
|
+
signal: signal()
|
|
212
|
+
});
|
|
213
|
+
const ids = documents.map((document) => document.id);
|
|
214
|
+
for (const result of response.results) {
|
|
215
|
+
expect(ids).toContain(result.id);
|
|
216
|
+
}
|
|
217
|
+
const scores = response.results.map((result) => result.score);
|
|
218
|
+
expect([...scores].sort((a, b) => b - a)).toEqual(scores);
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
}
|
|
@@ -95,6 +95,33 @@ export function stateStoreContract(make) {
|
|
|
95
95
|
expect.objectContaining({ id: '01EVT2' })
|
|
96
96
|
]);
|
|
97
97
|
});
|
|
98
|
+
it('replaceMessages atomically replaces the history when supported', async () => {
|
|
99
|
+
const store = await make();
|
|
100
|
+
if (!store.replaceMessages)
|
|
101
|
+
return;
|
|
102
|
+
await store.appendMessages(session.id, [m1]);
|
|
103
|
+
await store.replaceMessages(session.id, [m2, m3]);
|
|
104
|
+
await expect(store.listMessages(session.id)).resolves.toEqual([m2, m3]);
|
|
105
|
+
});
|
|
106
|
+
it('getRun returns undefined for an unknown id', async () => {
|
|
107
|
+
const store = await make();
|
|
108
|
+
await expect(store.getRun('missing')).resolves.toBeUndefined();
|
|
109
|
+
});
|
|
110
|
+
it('listRuns honors limit', async () => {
|
|
111
|
+
const store = await make();
|
|
112
|
+
await store.createRun(run);
|
|
113
|
+
await store.createRun({ ...run, id: 'run_2', startedAt: '2026-01-01T00:00:05.000Z' });
|
|
114
|
+
await expect(store.listRuns(session.id, { limit: 1 })).resolves.toEqual([
|
|
115
|
+
expect.objectContaining({ id: 'run_2' })
|
|
116
|
+
]);
|
|
117
|
+
});
|
|
118
|
+
it('listEvents honors limit', async () => {
|
|
119
|
+
const store = await make();
|
|
120
|
+
await store.appendEvents(run.id, [event, { ...event, id: '01EVT2' }]);
|
|
121
|
+
await expect(store.listEvents(run.id, { limit: 1 })).resolves.toEqual([
|
|
122
|
+
expect.objectContaining({ id: '01EVT' })
|
|
123
|
+
]);
|
|
124
|
+
});
|
|
98
125
|
it('duplicate message id throws StateError', async () => {
|
|
99
126
|
const store = await make();
|
|
100
127
|
await store.appendMessages(session.id, [m1]);
|
package/dist/tools/index.js
CHANGED
|
@@ -6,6 +6,18 @@ export const BUILTIN_TOOL_NAMES = ['bash', 'read', 'write', 'edit', 'glob', 'gre
|
|
|
6
6
|
/** Per-file and total byte caps for the built-in `grep` read-and-match fallback. */
|
|
7
7
|
const GREP_MAX_FILE_BYTES = 2_000_000;
|
|
8
8
|
const GREP_MAX_TOTAL_BYTES = 50_000_000;
|
|
9
|
+
/** Maximum accepted length for a model-supplied `grep` pattern. */
|
|
10
|
+
const GREP_MAX_PATTERN_LENGTH = 1_000;
|
|
11
|
+
/**
|
|
12
|
+
* Matches a quantified group whose (paren-free) body contains an unbounded
|
|
13
|
+
* quantifier — the classic catastrophic-backtracking shapes such as `(x+)+`,
|
|
14
|
+
* `(x*)*`, or `(a+b){2,}`. The check is intentionally syntactic and
|
|
15
|
+
* conservative. Residual risk: ambiguous alternations like `(a|a)+` and
|
|
16
|
+
* quantifiers nested deeper than one group level still pass; the byte caps
|
|
17
|
+
* above bound the scanned input but cannot prevent a stalled event loop for
|
|
18
|
+
* adversarial patterns beyond this check.
|
|
19
|
+
*/
|
|
20
|
+
const GREP_NESTED_UNBOUNDED_QUANTIFIER = /\((?:[^()\\]|\\.)*(?:[*+]|\{\d+,\})(?:[^()\\]|\\.)*\)(?:[*+]|\{\d+,\})/;
|
|
9
21
|
export const BUILTIN_ALIAS_TO_CANONICAL = {
|
|
10
22
|
bash: 'bash', Bash: 'bash',
|
|
11
23
|
read: 'read', Read: 'read',
|
|
@@ -64,7 +76,8 @@ export async function invokeBuiltinTool(nameOrAlias, input, session, signal) {
|
|
|
64
76
|
const count = content.split(parsed.old_string).length - 1;
|
|
65
77
|
if (count !== 1)
|
|
66
78
|
throw new ValidationError('edit requires exactly one match', { where: 'tool_input', issues: { path: parsed.path, matches: count } });
|
|
67
|
-
|
|
79
|
+
// Replacer function so `$&`, `$$`, `` $` `` etc. in new_string are written literally.
|
|
80
|
+
await session.write(parsed.path, content.replace(parsed.old_string, () => parsed.new_string));
|
|
68
81
|
return { replaced: 1 };
|
|
69
82
|
}
|
|
70
83
|
case 'glob': {
|
|
@@ -74,6 +87,18 @@ export async function invokeBuiltinTool(nameOrAlias, input, session, signal) {
|
|
|
74
87
|
}
|
|
75
88
|
case 'grep': {
|
|
76
89
|
const parsed = schemas.grep.input.parse(input);
|
|
90
|
+
if (parsed.pattern.length > GREP_MAX_PATTERN_LENGTH) {
|
|
91
|
+
throw new ValidationError('grep pattern exceeds the maximum supported length', {
|
|
92
|
+
where: 'tool_input',
|
|
93
|
+
issues: [{ path: 'pattern', message: `Pattern must be at most ${GREP_MAX_PATTERN_LENGTH} characters.` }]
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
if (GREP_NESTED_UNBOUNDED_QUANTIFIER.test(parsed.pattern)) {
|
|
97
|
+
throw new ValidationError('grep pattern contains a nested unbounded quantifier', {
|
|
98
|
+
where: 'tool_input',
|
|
99
|
+
issues: [{ path: 'pattern', message: 'Patterns like (x+)+ can cause catastrophic backtracking and are rejected.' }]
|
|
100
|
+
});
|
|
101
|
+
}
|
|
77
102
|
let rx;
|
|
78
103
|
try {
|
|
79
104
|
rx = new RegExp(parsed.pattern);
|
package/dist/tools/mcp/http.d.ts
CHANGED
|
@@ -1,2 +1,4 @@
|
|
|
1
1
|
import type { ResolvedMcpHttpTool, McpTransportRunner } from './runner.js';
|
|
2
2
|
export declare function createHttpMcpTransportRunner(config: ResolvedMcpHttpTool): McpTransportRunner;
|
|
3
|
+
/** Exported for tests. Extracts an HTTP status from structured fields or explicit status phrasing only. */
|
|
4
|
+
export declare function statusFromError(error: unknown): number | undefined;
|
package/dist/tools/mcp/http.js
CHANGED
|
@@ -3,24 +3,32 @@ import { withMcpTimeout } from './runner.js';
|
|
|
3
3
|
export function createHttpMcpTransportRunner(config) {
|
|
4
4
|
let connected;
|
|
5
5
|
async function connect(options) {
|
|
6
|
-
|
|
7
|
-
const
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
6
|
+
if (!connected) {
|
|
7
|
+
const promise = (async () => {
|
|
8
|
+
const [{ Client }, { StreamableHTTPClientTransport }] = await Promise.all([
|
|
9
|
+
import('@modelcontextprotocol/sdk/client/index.js'),
|
|
10
|
+
import('@modelcontextprotocol/sdk/client/streamableHttp.js')
|
|
11
|
+
]);
|
|
12
|
+
const transport = new StreamableHTTPClientTransport(new URL(config.url), {
|
|
13
|
+
requestInit: { headers: buildHeaders(config.headers, config.auth) }
|
|
14
|
+
});
|
|
15
|
+
const client = new Client({ name: `purista-harness-${config.localToolId}`, version: '0.0.0' });
|
|
16
|
+
try {
|
|
17
|
+
await client.connect(transport, toSdkOptions(options));
|
|
18
|
+
}
|
|
19
|
+
catch (error) {
|
|
20
|
+
throw mapHttpError(config, 'connect', error);
|
|
21
|
+
}
|
|
22
|
+
return { client, transport };
|
|
23
|
+
})();
|
|
24
|
+
// Never cache a rejected connection (import or connect failure); the
|
|
25
|
+
// next call must retry from scratch.
|
|
26
|
+
void promise.catch(() => {
|
|
27
|
+
if (connected === promise)
|
|
28
|
+
connected = undefined;
|
|
13
29
|
});
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
await client.connect(transport, toSdkOptions(options));
|
|
17
|
-
}
|
|
18
|
-
catch (error) {
|
|
19
|
-
connected = undefined;
|
|
20
|
-
throw mapHttpError(config, 'connect', error);
|
|
21
|
-
}
|
|
22
|
-
return { client, transport };
|
|
23
|
-
})();
|
|
30
|
+
connected = promise;
|
|
31
|
+
}
|
|
24
32
|
return connected;
|
|
25
33
|
}
|
|
26
34
|
return {
|
|
@@ -53,8 +61,10 @@ export function createHttpMcpTransportRunner(config) {
|
|
|
53
61
|
return;
|
|
54
62
|
const current = await connected.catch(() => undefined);
|
|
55
63
|
connected = undefined;
|
|
56
|
-
|
|
57
|
-
|
|
64
|
+
if (!current)
|
|
65
|
+
return;
|
|
66
|
+
// Client first per SDK guidance; close both even when one throws.
|
|
67
|
+
await Promise.allSettled([current.client.close(), current.transport.close()]);
|
|
58
68
|
}
|
|
59
69
|
};
|
|
60
70
|
}
|
|
@@ -87,7 +97,8 @@ function mapHttpError(config, phase, error) {
|
|
|
87
97
|
}
|
|
88
98
|
return new McpProtocolError('MCP HTTP transport failed.', { tool_id: config.localToolId, transport: 'http', phase }, error);
|
|
89
99
|
}
|
|
90
|
-
|
|
100
|
+
/** Exported for tests. Extracts an HTTP status from structured fields or explicit status phrasing only. */
|
|
101
|
+
export function statusFromError(error) {
|
|
91
102
|
if (typeof error === 'object' && error !== null) {
|
|
92
103
|
const maybe = error;
|
|
93
104
|
if (typeof maybe.status === 'number')
|
|
@@ -101,7 +112,9 @@ function statusFromError(error) {
|
|
|
101
112
|
if (error instanceof Error) {
|
|
102
113
|
if (/unauthorized/i.test(error.message))
|
|
103
114
|
return 401;
|
|
104
|
-
|
|
115
|
+
// Only trust explicit "HTTP 503" / "status: 503" phrasing — a bare number
|
|
116
|
+
// in a message (e.g. "took 401ms") must not classify as an HTTP status.
|
|
117
|
+
const match = /HTTP (\d{3})/.exec(error.message) ?? /status[: ] ?(\d{3})/i.exec(error.message);
|
|
105
118
|
if (match?.[1])
|
|
106
119
|
return Number(match[1]);
|
|
107
120
|
}
|
|
@@ -12,6 +12,8 @@ export interface ResolvedMcpTool {
|
|
|
12
12
|
upstreamToolName: string;
|
|
13
13
|
timeoutMs: number;
|
|
14
14
|
serverKey: string;
|
|
15
|
+
/** Sandbox binding key (session id) used to evict session-scoped runners. */
|
|
16
|
+
sandboxKey?: string;
|
|
15
17
|
inputAdapter?: (input: unknown) => unknown;
|
|
16
18
|
outputAdapter?: (output: unknown) => unknown;
|
|
17
19
|
}
|
|
@@ -49,6 +51,8 @@ export interface McpTransportRunner {
|
|
|
49
51
|
}
|
|
50
52
|
export interface McpRunnerRegistry {
|
|
51
53
|
getRunner(config: ResolvedMcpToolConfig): McpTransportRunner;
|
|
54
|
+
/** Closes and evicts every runner bound to the given sandbox key (e.g. when a session closes). */
|
|
55
|
+
closeForSandboxKey(sandboxKey: string): Promise<void>;
|
|
52
56
|
close(): Promise<void>;
|
|
53
57
|
}
|
|
54
58
|
export interface McpFacadeContext {
|
package/dist/tools/mcp/runner.js
CHANGED
|
@@ -3,23 +3,39 @@ import { assertMcpJsonSchema, validateMcpJsonSchema } from './schema.js';
|
|
|
3
3
|
const discoveredCache = new WeakMap();
|
|
4
4
|
export async function getMcpToolSpecs(tools, allowlist, ctx = {}) {
|
|
5
5
|
const allowed = new Set(allowlist);
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
6
|
+
// An ad-hoc registry can spawn persistent server processes; it must be
|
|
7
|
+
// closed before returning so direct library callers do not leak processes.
|
|
8
|
+
const localRegistry = ctx.registry ? undefined : createMcpRunnerRegistry();
|
|
9
|
+
const registry = ctx.registry ?? localRegistry;
|
|
10
|
+
try {
|
|
11
|
+
const specs = await Promise.all(Object.entries(tools).map(async ([toolId, tool]) => {
|
|
12
|
+
if (!allowed.has(toolId) || !isMcpToolDefinition(tool))
|
|
13
|
+
return undefined;
|
|
14
|
+
const config = resolveMcpTool(toolId, tool, ctx);
|
|
15
|
+
return getResolvedModelToolSpec(config, registry.getRunner(config), ctx.signal, ctx.warn);
|
|
16
|
+
}));
|
|
17
|
+
return specs.filter((spec) => spec !== undefined);
|
|
18
|
+
}
|
|
19
|
+
finally {
|
|
20
|
+
await localRegistry?.close();
|
|
21
|
+
}
|
|
14
22
|
}
|
|
15
23
|
export async function invokeMcpTool(first, second, input, fourth) {
|
|
16
24
|
if (typeof first === 'string') {
|
|
17
25
|
if (!isMcpToolDefinition(second))
|
|
18
26
|
throw new ToolNotFoundError('Tool is not an MCP tool.', { tool_id: first, where: 'registry' });
|
|
19
27
|
const ctx = isAbortSignal(fourth) ? { signal: fourth } : fourth ?? {};
|
|
20
|
-
|
|
28
|
+
// An ad-hoc registry can spawn persistent server processes; it must be
|
|
29
|
+
// closed before returning so direct library callers do not leak processes.
|
|
30
|
+
const localRegistry = ctx.registry ? undefined : createMcpRunnerRegistry();
|
|
31
|
+
const registry = ctx.registry ?? localRegistry;
|
|
21
32
|
const config = resolveMcpTool(first, second, ctx);
|
|
22
|
-
|
|
33
|
+
try {
|
|
34
|
+
return await invokeResolvedMcpTool(config, registry.getRunner(config), input, ctx.signal, ctx.warn);
|
|
35
|
+
}
|
|
36
|
+
finally {
|
|
37
|
+
await localRegistry?.close();
|
|
38
|
+
}
|
|
23
39
|
}
|
|
24
40
|
return invokeResolvedMcpTool(first, second, input, isAbortSignal(fourth) ? fourth : fourth?.signal, isAbortSignal(fourth) ? undefined : fourth?.warn);
|
|
25
41
|
}
|
|
@@ -30,18 +46,32 @@ export function createMcpRunnerRegistry() {
|
|
|
30
46
|
const runners = new Map();
|
|
31
47
|
return {
|
|
32
48
|
getRunner(config) {
|
|
33
|
-
|
|
49
|
+
// Keyed by serverKey: a stdio runner binds a concrete sandbox session, so
|
|
50
|
+
// two sessions must never share one runner even for the same tool id.
|
|
51
|
+
const key = config.serverKey || config.localToolId;
|
|
52
|
+
const existing = runners.get(key);
|
|
34
53
|
if (existing)
|
|
35
|
-
return existing;
|
|
54
|
+
return existing.runner;
|
|
36
55
|
const runner = config.kind === 'mcp_stdio'
|
|
37
56
|
? createDynamicStdioRunner(config)
|
|
38
57
|
: createDynamicHttpRunner(config);
|
|
39
|
-
runners.set(config.
|
|
58
|
+
runners.set(key, { runner, ...(config.sandboxKey !== undefined ? { sandboxKey: config.sandboxKey } : {}) });
|
|
40
59
|
return runner;
|
|
41
60
|
},
|
|
61
|
+
async closeForSandboxKey(sandboxKey) {
|
|
62
|
+
const evicted = [];
|
|
63
|
+
for (const [key, entry] of runners) {
|
|
64
|
+
if (entry.sandboxKey === sandboxKey) {
|
|
65
|
+
runners.delete(key);
|
|
66
|
+
evicted.push(entry.runner);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
await Promise.all(evicted.map((runner) => runner.close()));
|
|
70
|
+
},
|
|
42
71
|
async close() {
|
|
43
|
-
|
|
72
|
+
const open = [...runners.values()].map((entry) => entry.runner);
|
|
44
73
|
runners.clear();
|
|
74
|
+
await Promise.all(open.map((runner) => runner.close()));
|
|
45
75
|
}
|
|
46
76
|
};
|
|
47
77
|
}
|
|
@@ -73,7 +103,7 @@ async function invokeResolvedMcpTool(config, runner, input, signal, warn) {
|
|
|
73
103
|
const validatedInput = validateMcpJsonSchema({ toolId: config.localToolId, where: 'mcp_input', schema: tool.inputSchema, value: adaptedInput, ...(warn ? { warn } : {}) });
|
|
74
104
|
const result = await runner.callTool(config.upstreamToolName, validatedInput, { ...(signal ? { signal } : {}), timeoutMs: config.timeoutMs });
|
|
75
105
|
if (isRecord(result) && result.isError === true) {
|
|
76
|
-
throw new ToolError(
|
|
106
|
+
throw new ToolError(`MCP tool returned an error.${describeMcpErrorResult(result)}`, { tool_id: config.localToolId, tool_kind: config.kind });
|
|
77
107
|
}
|
|
78
108
|
const normalized = normalizeMcpOutput(result);
|
|
79
109
|
const validatedOutput = tool.outputSchema
|
|
@@ -128,6 +158,7 @@ function resolveMcpTool(toolId, tool, ctx) {
|
|
|
128
158
|
...base,
|
|
129
159
|
kind: 'mcp_stdio',
|
|
130
160
|
serverKey: `${toolId}:${ctx.sandboxKey ?? 'sandbox'}`,
|
|
161
|
+
sandboxKey: ctx.sandboxKey ?? 'sandbox',
|
|
131
162
|
command: tool.command,
|
|
132
163
|
...(tool.args ? { args: tool.args } : {}),
|
|
133
164
|
...(tool.env ? { env: tool.env } : {}),
|
|
@@ -148,25 +179,48 @@ export function isMcpToolDefinition(tool) {
|
|
|
148
179
|
}
|
|
149
180
|
function createDynamicStdioRunner(config) {
|
|
150
181
|
let runnerPromise;
|
|
151
|
-
|
|
152
|
-
runnerPromise ??= import('./stdio.js').then((module) => module.createStdioMcpTransportRunner(config
|
|
182
|
+
const runner = dynamicRunner(() => {
|
|
183
|
+
runnerPromise ??= import('./stdio.js').then((module) => module.createStdioMcpTransportRunner(config, {
|
|
184
|
+
// A respawned server may expose a different tool list; drop the memoized
|
|
185
|
+
// discovery whenever the persistent server process is discarded.
|
|
186
|
+
onReset: () => { discoveredCache.delete(runner); }
|
|
187
|
+
}));
|
|
153
188
|
return runnerPromise;
|
|
154
|
-
});
|
|
189
|
+
}, () => runnerPromise);
|
|
190
|
+
return runner;
|
|
155
191
|
}
|
|
156
192
|
function createDynamicHttpRunner(config) {
|
|
157
193
|
let runnerPromise;
|
|
158
194
|
return dynamicRunner(() => {
|
|
159
195
|
runnerPromise ??= import('./http.js').then((module) => module.createHttpMcpTransportRunner(config));
|
|
160
196
|
return runnerPromise;
|
|
161
|
-
});
|
|
197
|
+
}, () => runnerPromise);
|
|
162
198
|
}
|
|
163
|
-
function dynamicRunner(load) {
|
|
199
|
+
function dynamicRunner(load, peek) {
|
|
164
200
|
return {
|
|
165
201
|
async listTools(options) { return (await load()).listTools(options); },
|
|
166
202
|
async callTool(name, input, options) { return (await load()).callTool(name, input, options); },
|
|
167
|
-
async close() {
|
|
203
|
+
async close() {
|
|
204
|
+
// Never trigger a fresh transport load just to close it, and never let
|
|
205
|
+
// an earlier load failure escape from registry shutdown.
|
|
206
|
+
const pending = peek();
|
|
207
|
+
if (!pending)
|
|
208
|
+
return;
|
|
209
|
+
const loaded = await pending.catch(() => undefined);
|
|
210
|
+
await loaded?.close();
|
|
211
|
+
}
|
|
168
212
|
};
|
|
169
213
|
}
|
|
214
|
+
/** Renders a short, truncated description of an MCP `isError` result for error messages. */
|
|
215
|
+
function describeMcpErrorResult(result) {
|
|
216
|
+
const normalized = normalizeMcpOutput(result);
|
|
217
|
+
if (normalized === null)
|
|
218
|
+
return '';
|
|
219
|
+
const text = typeof normalized === 'string' ? normalized : JSON.stringify(normalized);
|
|
220
|
+
if (!text)
|
|
221
|
+
return '';
|
|
222
|
+
return ` ${text.slice(0, 512)}${text.length > 512 ? '…' : ''}`;
|
|
223
|
+
}
|
|
170
224
|
function normalizeContentBlock(block) {
|
|
171
225
|
if (!isRecord(block))
|
|
172
226
|
return null;
|