@intx/harness 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -0
- package/package.json +19 -0
- package/src/config.ts +135 -0
- package/src/connector-router.test.ts +718 -0
- package/src/connector-router.ts +304 -0
- package/src/deploy-tree.test.ts +51 -0
- package/src/deploy-tree.ts +35 -0
- package/src/harness.test.ts +1747 -0
- package/src/harness.ts +379 -0
- package/src/index.ts +31 -0
- package/src/merge-tool-runners.test.ts +149 -0
- package/src/merge-tool-runners.ts +90 -0
- package/src/runtime-capabilities.test.ts +19 -0
- package/src/runtime-capabilities.ts +22 -0
- package/tsconfig.json +4 -0
- package/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,1747 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test";
|
|
2
|
+
|
|
3
|
+
import type {
|
|
4
|
+
MessageTransport,
|
|
5
|
+
CryptoProvider,
|
|
6
|
+
ContextStore,
|
|
7
|
+
AuditStore,
|
|
8
|
+
ToolRunner,
|
|
9
|
+
InboundMessage,
|
|
10
|
+
OutboundMessage,
|
|
11
|
+
SendReceipt,
|
|
12
|
+
MessageHeaders,
|
|
13
|
+
MessageRef,
|
|
14
|
+
Mailbox,
|
|
15
|
+
MailboxStatus,
|
|
16
|
+
SearchQuery,
|
|
17
|
+
Thread,
|
|
18
|
+
BodyStructure,
|
|
19
|
+
MessagePart,
|
|
20
|
+
SyncState,
|
|
21
|
+
SyncResult,
|
|
22
|
+
ListInfo,
|
|
23
|
+
MailboxEvent,
|
|
24
|
+
Unsubscribe,
|
|
25
|
+
ConversationTurn,
|
|
26
|
+
PendingOperation,
|
|
27
|
+
TokenUsage,
|
|
28
|
+
ContextCommit,
|
|
29
|
+
ToolCall,
|
|
30
|
+
ToolDefinition,
|
|
31
|
+
ToolResult,
|
|
32
|
+
InferenceEvent,
|
|
33
|
+
LastCycleSource,
|
|
34
|
+
} from "@intx/types/runtime";
|
|
35
|
+
import type { AuditRecord, ErrorRecord } from "@intx/types/audit";
|
|
36
|
+
import type { AuthzCallResult } from "@intx/inference";
|
|
37
|
+
import { createInboundMessage } from "@intx/mime";
|
|
38
|
+
import type {
|
|
39
|
+
ReactorInboundEvent,
|
|
40
|
+
ReactorDirector,
|
|
41
|
+
ReactorState,
|
|
42
|
+
ReactorCapabilities,
|
|
43
|
+
} from "@intx/types/runtime";
|
|
44
|
+
|
|
45
|
+
import { createHarness } from "./harness";
|
|
46
|
+
import { createDefaultDirector } from "@intx/inference";
|
|
47
|
+
import type { HarnessConfig } from "./config";
|
|
48
|
+
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
// Mock factory helpers
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
function emptyUsage(): TokenUsage {
|
|
54
|
+
return { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, thinking: 0 };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const TEST_SOURCE: LastCycleSource = {
|
|
58
|
+
sourceId: "test-source",
|
|
59
|
+
provider: "test-provider",
|
|
60
|
+
model: "test-model",
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
function makeContextStore(
|
|
64
|
+
opts: { blobs?: Map<string, Uint8Array> } = {},
|
|
65
|
+
): ContextStore {
|
|
66
|
+
const blobs = opts.blobs;
|
|
67
|
+
|
|
68
|
+
function commit(
|
|
69
|
+
options: { message: string },
|
|
70
|
+
signal?: AbortSignal,
|
|
71
|
+
): Promise<ContextCommit>;
|
|
72
|
+
function commit(
|
|
73
|
+
turns: ConversationTurn[],
|
|
74
|
+
pendingOperations: PendingOperation[],
|
|
75
|
+
tokenUsage: TokenUsage,
|
|
76
|
+
message: string,
|
|
77
|
+
signal?: AbortSignal,
|
|
78
|
+
): Promise<ContextCommit>;
|
|
79
|
+
async function commit(
|
|
80
|
+
first: { message: string } | ConversationTurn[],
|
|
81
|
+
_second?: PendingOperation[] | AbortSignal,
|
|
82
|
+
_third?: TokenUsage,
|
|
83
|
+
fourth?: string,
|
|
84
|
+
): Promise<ContextCommit> {
|
|
85
|
+
const message = Array.isArray(first) ? (fourth ?? "") : first.message;
|
|
86
|
+
return { hash: "mock-hash", message, timestamp: Date.now() };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
async load() {
|
|
91
|
+
const turns: ConversationTurn[] = [];
|
|
92
|
+
const pendingOperations: PendingOperation[] = [];
|
|
93
|
+
return {
|
|
94
|
+
turns,
|
|
95
|
+
pendingOperations,
|
|
96
|
+
tokenUsage: emptyUsage(),
|
|
97
|
+
connectorState: null,
|
|
98
|
+
};
|
|
99
|
+
},
|
|
100
|
+
setConnectorState() {
|
|
101
|
+
/* noop */
|
|
102
|
+
},
|
|
103
|
+
commit,
|
|
104
|
+
async branch(): Promise<void> {
|
|
105
|
+
/* noop */
|
|
106
|
+
},
|
|
107
|
+
async log(): Promise<ContextCommit[]> {
|
|
108
|
+
return [];
|
|
109
|
+
},
|
|
110
|
+
async readAt(): Promise<ConversationTurn[]> {
|
|
111
|
+
return [];
|
|
112
|
+
},
|
|
113
|
+
async writeBlob(key, bytes) {
|
|
114
|
+
if (blobs === undefined) {
|
|
115
|
+
throw new Error("not implemented");
|
|
116
|
+
}
|
|
117
|
+
blobs.set(key, bytes);
|
|
118
|
+
},
|
|
119
|
+
async readBlob(key) {
|
|
120
|
+
if (blobs === undefined) {
|
|
121
|
+
throw new Error("not implemented");
|
|
122
|
+
}
|
|
123
|
+
const bytes = blobs.get(key);
|
|
124
|
+
if (bytes === undefined) {
|
|
125
|
+
throw new Error(`Blob not found for key: ${key}`);
|
|
126
|
+
}
|
|
127
|
+
return bytes;
|
|
128
|
+
},
|
|
129
|
+
async writePrompt() {
|
|
130
|
+
/* noop */
|
|
131
|
+
},
|
|
132
|
+
async writeResponse() {
|
|
133
|
+
/* noop */
|
|
134
|
+
},
|
|
135
|
+
async writeManifest() {
|
|
136
|
+
/* noop */
|
|
137
|
+
},
|
|
138
|
+
async writeTurns() {
|
|
139
|
+
/* noop */
|
|
140
|
+
},
|
|
141
|
+
async writeMetadata() {
|
|
142
|
+
/* noop */
|
|
143
|
+
},
|
|
144
|
+
async readManifestHistory() {
|
|
145
|
+
throw new Error("not implemented");
|
|
146
|
+
},
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function makeCrypto(): CryptoProvider {
|
|
151
|
+
const key = new Uint8Array(32);
|
|
152
|
+
return {
|
|
153
|
+
async sign(_content: Uint8Array): Promise<Uint8Array> {
|
|
154
|
+
return new Uint8Array(64);
|
|
155
|
+
},
|
|
156
|
+
async signSSH(_payload: string): Promise<string> {
|
|
157
|
+
return "unused-in-this-test";
|
|
158
|
+
},
|
|
159
|
+
async verify(
|
|
160
|
+
_content: Uint8Array,
|
|
161
|
+
_signature: Uint8Array,
|
|
162
|
+
_publicKey: Uint8Array,
|
|
163
|
+
): Promise<boolean> {
|
|
164
|
+
return true;
|
|
165
|
+
},
|
|
166
|
+
getPublicKey(): Uint8Array {
|
|
167
|
+
return key;
|
|
168
|
+
},
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function makeToolRunner(): ToolRunner & { definitions: ToolDefinition[] } {
|
|
173
|
+
return {
|
|
174
|
+
definitions: [
|
|
175
|
+
{
|
|
176
|
+
name: "test_tool",
|
|
177
|
+
description: "Generic mock tool used by harness tests",
|
|
178
|
+
inputSchema: { type: "object", properties: {} },
|
|
179
|
+
},
|
|
180
|
+
],
|
|
181
|
+
async run(call: ToolCall): Promise<ToolResult> {
|
|
182
|
+
return { callId: call.id, content: "mock-result" };
|
|
183
|
+
},
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
type WatchCallback = (event: MailboxEvent) => void;
|
|
188
|
+
|
|
189
|
+
type MockTransport = MessageTransport & {
|
|
190
|
+
getSentMessages(): OutboundMessage[];
|
|
191
|
+
fireWatch(event: MailboxEvent): void;
|
|
192
|
+
getWatchCallbacks(): WatchCallback[];
|
|
193
|
+
enqueueMessage(ref: MessageRef, msg: InboundMessage): void;
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
function makeMockTransport(): MockTransport {
|
|
197
|
+
const sentMessages: OutboundMessage[] = [];
|
|
198
|
+
const watchCallbacks: WatchCallback[] = [];
|
|
199
|
+
const messageStore = new Map<string, InboundMessage>();
|
|
200
|
+
|
|
201
|
+
function refKey(ref: MessageRef): string {
|
|
202
|
+
return `${ref.mailbox}:${ref.uid}`;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const transport: MockTransport = {
|
|
206
|
+
getSentMessages() {
|
|
207
|
+
return sentMessages;
|
|
208
|
+
},
|
|
209
|
+
fireWatch(event: MailboxEvent): void {
|
|
210
|
+
for (const cb of watchCallbacks) {
|
|
211
|
+
cb(event);
|
|
212
|
+
}
|
|
213
|
+
},
|
|
214
|
+
getWatchCallbacks() {
|
|
215
|
+
return watchCallbacks;
|
|
216
|
+
},
|
|
217
|
+
enqueueMessage(ref: MessageRef, msg: InboundMessage): void {
|
|
218
|
+
messageStore.set(refKey(ref), msg);
|
|
219
|
+
},
|
|
220
|
+
|
|
221
|
+
// MessageTransport implementation
|
|
222
|
+
async send(message: OutboundMessage): Promise<SendReceipt> {
|
|
223
|
+
sentMessages.push(message);
|
|
224
|
+
return { messageId: `<msg-${Date.now()}@test>`, status: "delivered" };
|
|
225
|
+
},
|
|
226
|
+
|
|
227
|
+
async append(
|
|
228
|
+
_mailbox: string,
|
|
229
|
+
message: InboundMessage,
|
|
230
|
+
): Promise<MessageRef> {
|
|
231
|
+
const ref = { uid: 999, mailbox: _mailbox };
|
|
232
|
+
messageStore.set(refKey(ref), message);
|
|
233
|
+
return ref;
|
|
234
|
+
},
|
|
235
|
+
|
|
236
|
+
async listMailboxes(): Promise<Mailbox[]> {
|
|
237
|
+
return [{ name: "INBOX", role: "\\Inbox" }];
|
|
238
|
+
},
|
|
239
|
+
|
|
240
|
+
async createMailbox(name: string): Promise<Mailbox> {
|
|
241
|
+
return { name };
|
|
242
|
+
},
|
|
243
|
+
|
|
244
|
+
async deleteMailbox(): Promise<void> {
|
|
245
|
+
/* noop */
|
|
246
|
+
},
|
|
247
|
+
|
|
248
|
+
async getMailboxStatus(): Promise<MailboxStatus> {
|
|
249
|
+
return {
|
|
250
|
+
total: 0,
|
|
251
|
+
unseen: 0,
|
|
252
|
+
recent: 0,
|
|
253
|
+
uidNext: 1,
|
|
254
|
+
uidValidity: 1,
|
|
255
|
+
highestModSeq: 0,
|
|
256
|
+
};
|
|
257
|
+
},
|
|
258
|
+
|
|
259
|
+
async search(_mailbox: string, _query: SearchQuery): Promise<MessageRef[]> {
|
|
260
|
+
return [];
|
|
261
|
+
},
|
|
262
|
+
|
|
263
|
+
async thread(): Promise<Thread[]> {
|
|
264
|
+
return [];
|
|
265
|
+
},
|
|
266
|
+
|
|
267
|
+
async fetchHeaders(ref: MessageRef): Promise<MessageHeaders> {
|
|
268
|
+
const msg = messageStore.get(refKey(ref));
|
|
269
|
+
if (msg !== undefined) return msg.headers;
|
|
270
|
+
return {
|
|
271
|
+
from: "sender@test",
|
|
272
|
+
to: ["agent@test"],
|
|
273
|
+
date: new Date().toISOString(),
|
|
274
|
+
messageId: `<${ref.uid}@test>`,
|
|
275
|
+
};
|
|
276
|
+
},
|
|
277
|
+
|
|
278
|
+
async fetchStructure(): Promise<BodyStructure> {
|
|
279
|
+
return { contentType: "multipart/signed" };
|
|
280
|
+
},
|
|
281
|
+
|
|
282
|
+
async fetchPart(): Promise<MessagePart> {
|
|
283
|
+
return { contentType: "text/plain", content: new Uint8Array() };
|
|
284
|
+
},
|
|
285
|
+
|
|
286
|
+
async fetchFull(ref: MessageRef): Promise<InboundMessage> {
|
|
287
|
+
const stored = messageStore.get(refKey(ref));
|
|
288
|
+
if (stored !== undefined) return stored;
|
|
289
|
+
return {
|
|
290
|
+
ref,
|
|
291
|
+
headers: {
|
|
292
|
+
from: "sender@test",
|
|
293
|
+
to: ["agent@test"],
|
|
294
|
+
date: new Date().toISOString(),
|
|
295
|
+
messageId: `<${ref.uid}@test>`,
|
|
296
|
+
},
|
|
297
|
+
flags: [],
|
|
298
|
+
content: "hello",
|
|
299
|
+
signatureStatus: "missing",
|
|
300
|
+
};
|
|
301
|
+
},
|
|
302
|
+
|
|
303
|
+
async setFlags(): Promise<void> {
|
|
304
|
+
/* noop */
|
|
305
|
+
},
|
|
306
|
+
|
|
307
|
+
async clearFlags(): Promise<void> {
|
|
308
|
+
/* noop */
|
|
309
|
+
},
|
|
310
|
+
|
|
311
|
+
async move(): Promise<void> {
|
|
312
|
+
/* noop */
|
|
313
|
+
},
|
|
314
|
+
|
|
315
|
+
async copy(): Promise<void> {
|
|
316
|
+
/* noop */
|
|
317
|
+
},
|
|
318
|
+
|
|
319
|
+
async expunge(): Promise<void> {
|
|
320
|
+
/* noop */
|
|
321
|
+
},
|
|
322
|
+
|
|
323
|
+
watch(
|
|
324
|
+
_mailbox: string,
|
|
325
|
+
callback: (event: MailboxEvent) => void,
|
|
326
|
+
): Unsubscribe {
|
|
327
|
+
watchCallbacks.push(callback);
|
|
328
|
+
return () => {
|
|
329
|
+
const idx = watchCallbacks.indexOf(callback);
|
|
330
|
+
if (idx !== -1) watchCallbacks.splice(idx, 1);
|
|
331
|
+
};
|
|
332
|
+
},
|
|
333
|
+
|
|
334
|
+
async sync(_mailbox: string, _state: SyncState): Promise<SyncResult> {
|
|
335
|
+
return {
|
|
336
|
+
vanished: [],
|
|
337
|
+
changed: [],
|
|
338
|
+
newMessages: [],
|
|
339
|
+
fullResyncRequired: false,
|
|
340
|
+
};
|
|
341
|
+
},
|
|
342
|
+
|
|
343
|
+
async createList(_address: string, name: string): Promise<ListInfo> {
|
|
344
|
+
return {
|
|
345
|
+
address: _address,
|
|
346
|
+
name,
|
|
347
|
+
memberCount: 0,
|
|
348
|
+
createdAt: new Date().toISOString(),
|
|
349
|
+
};
|
|
350
|
+
},
|
|
351
|
+
|
|
352
|
+
async listMembers(): Promise<string[]> {
|
|
353
|
+
return [];
|
|
354
|
+
},
|
|
355
|
+
|
|
356
|
+
async subscribe(): Promise<void> {
|
|
357
|
+
/* noop */
|
|
358
|
+
},
|
|
359
|
+
|
|
360
|
+
async unsubscribe(): Promise<void> {
|
|
361
|
+
/* noop */
|
|
362
|
+
},
|
|
363
|
+
};
|
|
364
|
+
|
|
365
|
+
return transport;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
function makeInboundMessage(from = "user@test"): InboundMessage {
|
|
369
|
+
return createInboundMessage({
|
|
370
|
+
from,
|
|
371
|
+
to: "agent@local.interchange",
|
|
372
|
+
subject: "Test conversation",
|
|
373
|
+
content: "Hello, agent!",
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
function makeConfig(
|
|
378
|
+
transport: MockTransport,
|
|
379
|
+
overrides: Partial<HarnessConfig> = {},
|
|
380
|
+
): HarnessConfig {
|
|
381
|
+
return {
|
|
382
|
+
address: "agent@local.interchange",
|
|
383
|
+
systemPrompt: "You are a helpful agent.",
|
|
384
|
+
source: {
|
|
385
|
+
id: "anthropic:claude-test",
|
|
386
|
+
provider: "anthropic",
|
|
387
|
+
baseURL: "https://api.anthropic.com",
|
|
388
|
+
apiKey: "test-key",
|
|
389
|
+
model: "claude-test",
|
|
390
|
+
},
|
|
391
|
+
transport,
|
|
392
|
+
crypto: makeCrypto(),
|
|
393
|
+
storage: makeContextStore(),
|
|
394
|
+
tools: makeToolRunner(),
|
|
395
|
+
onEvent: () => {
|
|
396
|
+
/* noop */
|
|
397
|
+
},
|
|
398
|
+
...overrides,
|
|
399
|
+
};
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
function waitForEvent(
|
|
403
|
+
events: InferenceEvent[],
|
|
404
|
+
predicate: (e: InferenceEvent) => boolean,
|
|
405
|
+
timeoutMs = 2000,
|
|
406
|
+
): Promise<InferenceEvent> {
|
|
407
|
+
return new Promise((resolve, reject) => {
|
|
408
|
+
const deadline = setTimeout(
|
|
409
|
+
() => reject(new Error("Timed out waiting for event")),
|
|
410
|
+
timeoutMs,
|
|
411
|
+
);
|
|
412
|
+
|
|
413
|
+
function check() {
|
|
414
|
+
const found = events.find(predicate);
|
|
415
|
+
if (found !== undefined) {
|
|
416
|
+
clearTimeout(deadline);
|
|
417
|
+
resolve(found);
|
|
418
|
+
return;
|
|
419
|
+
}
|
|
420
|
+
setTimeout(check, 10);
|
|
421
|
+
}
|
|
422
|
+
check();
|
|
423
|
+
});
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// ---------------------------------------------------------------------------
|
|
427
|
+
// 1. Lifecycle: start and stop
|
|
428
|
+
// ---------------------------------------------------------------------------
|
|
429
|
+
|
|
430
|
+
describe("Harness lifecycle", () => {
|
|
431
|
+
test("start registers a watch callback on INBOX", () => {
|
|
432
|
+
const transport = makeMockTransport();
|
|
433
|
+
const harness = createHarness(makeConfig(transport));
|
|
434
|
+
|
|
435
|
+
expect(transport.getWatchCallbacks().length).toBe(0);
|
|
436
|
+
|
|
437
|
+
harness.start();
|
|
438
|
+
expect(transport.getWatchCallbacks().length).toBe(1);
|
|
439
|
+
|
|
440
|
+
harness.stop();
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
test("stop unsubscribes the watch callback", async () => {
|
|
444
|
+
const transport = makeMockTransport();
|
|
445
|
+
const events: InferenceEvent[] = [];
|
|
446
|
+
const harness = createHarness(
|
|
447
|
+
makeConfig(transport, { onEvent: (e) => events.push(e) }),
|
|
448
|
+
);
|
|
449
|
+
|
|
450
|
+
harness.start();
|
|
451
|
+
expect(transport.getWatchCallbacks().length).toBe(1);
|
|
452
|
+
|
|
453
|
+
harness.stop();
|
|
454
|
+
expect(transport.getWatchCallbacks().length).toBe(0);
|
|
455
|
+
|
|
456
|
+
// Reactor should receive abort signal and emit reactor.done eventually.
|
|
457
|
+
await waitForEvent(events, (e) => e.type === "reactor.done");
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
test("start throws if called twice", () => {
|
|
461
|
+
const transport = makeMockTransport();
|
|
462
|
+
const harness = createHarness(makeConfig(transport));
|
|
463
|
+
harness.start();
|
|
464
|
+
|
|
465
|
+
expect(() => harness.start()).toThrow("already started");
|
|
466
|
+
|
|
467
|
+
harness.stop();
|
|
468
|
+
});
|
|
469
|
+
});
|
|
470
|
+
|
|
471
|
+
// ---------------------------------------------------------------------------
|
|
472
|
+
// 2. Message delivery pipeline
|
|
473
|
+
// ---------------------------------------------------------------------------
|
|
474
|
+
|
|
475
|
+
describe("Message delivery pipeline", () => {
|
|
476
|
+
test("watch 'exists' event causes harness to fetch and deliver message to reactor", async () => {
|
|
477
|
+
const transport = makeMockTransport();
|
|
478
|
+
const events: InferenceEvent[] = [];
|
|
479
|
+
let deliveredCount = 0;
|
|
480
|
+
|
|
481
|
+
// Director that signals delivery by returning done() on message.received.
|
|
482
|
+
const director: ReactorDirector = {
|
|
483
|
+
async decide(
|
|
484
|
+
event: ReactorInboundEvent,
|
|
485
|
+
_state: ReactorState,
|
|
486
|
+
caps: ReactorCapabilities,
|
|
487
|
+
) {
|
|
488
|
+
if (event.type === "message.received") {
|
|
489
|
+
deliveredCount++;
|
|
490
|
+
return caps.done();
|
|
491
|
+
}
|
|
492
|
+
return caps.wait();
|
|
493
|
+
},
|
|
494
|
+
};
|
|
495
|
+
|
|
496
|
+
const inboundMsg = makeInboundMessage();
|
|
497
|
+
transport.enqueueMessage(inboundMsg.ref, inboundMsg);
|
|
498
|
+
|
|
499
|
+
const harness = createHarness(
|
|
500
|
+
makeConfig(transport, { onEvent: (e) => events.push(e), director }),
|
|
501
|
+
);
|
|
502
|
+
harness.start();
|
|
503
|
+
|
|
504
|
+
// Fire a watch event simulating IMAP IDLE notification.
|
|
505
|
+
transport.fireWatch({
|
|
506
|
+
type: "exists",
|
|
507
|
+
uid: inboundMsg.ref.uid,
|
|
508
|
+
headers: inboundMsg.headers,
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
// reactor.done signals the director received the message.
|
|
512
|
+
await waitForEvent(events, (e) => e.type === "reactor.done");
|
|
513
|
+
expect(deliveredCount).toBe(1);
|
|
514
|
+
|
|
515
|
+
harness.stop();
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
test("non-'exists' watch events are ignored", async () => {
|
|
519
|
+
const transport = makeMockTransport();
|
|
520
|
+
let deliveredCount = 0;
|
|
521
|
+
|
|
522
|
+
// Director that counts message.received deliveries.
|
|
523
|
+
const director: ReactorDirector = {
|
|
524
|
+
async decide(
|
|
525
|
+
event: ReactorInboundEvent,
|
|
526
|
+
_state: ReactorState,
|
|
527
|
+
caps: ReactorCapabilities,
|
|
528
|
+
) {
|
|
529
|
+
if (event.type === "message.received") {
|
|
530
|
+
deliveredCount++;
|
|
531
|
+
return caps.done();
|
|
532
|
+
}
|
|
533
|
+
return caps.wait();
|
|
534
|
+
},
|
|
535
|
+
};
|
|
536
|
+
|
|
537
|
+
const harness = createHarness(makeConfig(transport, { director }));
|
|
538
|
+
harness.start();
|
|
539
|
+
|
|
540
|
+
transport.fireWatch({ type: "flagsChanged", uid: 1, flags: ["\\Seen"] });
|
|
541
|
+
transport.fireWatch({ type: "expunged", uid: 1 });
|
|
542
|
+
|
|
543
|
+
// Give a brief window for any erroneous delivery to appear.
|
|
544
|
+
await new Promise<void>((r) => setTimeout(r, 50));
|
|
545
|
+
|
|
546
|
+
expect(deliveredCount).toBe(0);
|
|
547
|
+
|
|
548
|
+
harness.stop();
|
|
549
|
+
});
|
|
550
|
+
|
|
551
|
+
test("deliver() injects a message directly into the reactor", async () => {
|
|
552
|
+
const transport = makeMockTransport();
|
|
553
|
+
const events: InferenceEvent[] = [];
|
|
554
|
+
let deliveredCount = 0;
|
|
555
|
+
|
|
556
|
+
const director: ReactorDirector = {
|
|
557
|
+
async decide(
|
|
558
|
+
event: ReactorInboundEvent,
|
|
559
|
+
_state: ReactorState,
|
|
560
|
+
caps: ReactorCapabilities,
|
|
561
|
+
) {
|
|
562
|
+
if (event.type === "message.received") {
|
|
563
|
+
deliveredCount++;
|
|
564
|
+
return caps.done();
|
|
565
|
+
}
|
|
566
|
+
return caps.wait();
|
|
567
|
+
},
|
|
568
|
+
};
|
|
569
|
+
|
|
570
|
+
const harness = createHarness(
|
|
571
|
+
makeConfig(transport, { onEvent: (e) => events.push(e), director }),
|
|
572
|
+
);
|
|
573
|
+
harness.start();
|
|
574
|
+
|
|
575
|
+
// Wait for reactor to start before delivering.
|
|
576
|
+
await waitForEvent(events, (e) => e.type === "reactor.start");
|
|
577
|
+
|
|
578
|
+
const msg = makeInboundMessage();
|
|
579
|
+
harness.deliver(msg);
|
|
580
|
+
|
|
581
|
+
// reactor.done signals the director received the message.
|
|
582
|
+
await waitForEvent(events, (e) => e.type === "reactor.done");
|
|
583
|
+
expect(deliveredCount).toBe(1);
|
|
584
|
+
|
|
585
|
+
harness.stop();
|
|
586
|
+
});
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
// ---------------------------------------------------------------------------
|
|
590
|
+
// 3. Default director decision logic
|
|
591
|
+
// ---------------------------------------------------------------------------
|
|
592
|
+
|
|
593
|
+
describe("Default director", () => {
|
|
594
|
+
function makeCapabilities() {
|
|
595
|
+
return {
|
|
596
|
+
calls: [] as { type: string; args: unknown[] }[],
|
|
597
|
+
infer(options?: unknown) {
|
|
598
|
+
this.calls.push({ type: "infer", args: [options] });
|
|
599
|
+
return { type: "infer" as const };
|
|
600
|
+
},
|
|
601
|
+
executeTools(calls: ToolCall[], parallel?: boolean) {
|
|
602
|
+
this.calls.push({ type: "execute_tools", args: [calls, parallel] });
|
|
603
|
+
return {
|
|
604
|
+
type: "execute_tools" as const,
|
|
605
|
+
calls,
|
|
606
|
+
parallel: parallel ?? true,
|
|
607
|
+
};
|
|
608
|
+
},
|
|
609
|
+
suspend(gate: {
|
|
610
|
+
type: import("@intx/types/runtime").GateType;
|
|
611
|
+
gateId: string;
|
|
612
|
+
timeoutMs: number;
|
|
613
|
+
correlationId?: string;
|
|
614
|
+
}) {
|
|
615
|
+
this.calls.push({ type: "suspend", args: [gate] });
|
|
616
|
+
return { type: "suspend" as const, gate };
|
|
617
|
+
},
|
|
618
|
+
fork(mode: import("@intx/types/runtime").ForkMode, forkId: string) {
|
|
619
|
+
this.calls.push({ type: "fork", args: [mode, forkId] });
|
|
620
|
+
return { type: "fork" as const, mode, forkId };
|
|
621
|
+
},
|
|
622
|
+
reply(content: string) {
|
|
623
|
+
this.calls.push({ type: "reply", args: [content] });
|
|
624
|
+
return { type: "reply" as const, content };
|
|
625
|
+
},
|
|
626
|
+
emit(eventType: `custom.${string}`, data: Record<string, unknown>) {
|
|
627
|
+
this.calls.push({ type: "emit", args: [eventType, data] });
|
|
628
|
+
return { type: "emit" as const, eventType, data };
|
|
629
|
+
},
|
|
630
|
+
checkpoint(message?: string) {
|
|
631
|
+
this.calls.push({ type: "checkpoint", args: [message] });
|
|
632
|
+
return {
|
|
633
|
+
type: "checkpoint" as const,
|
|
634
|
+
message: message ?? "checkpoint",
|
|
635
|
+
};
|
|
636
|
+
},
|
|
637
|
+
compact(compactor: string, reason: string) {
|
|
638
|
+
this.calls.push({ type: "compact", args: [compactor, reason] });
|
|
639
|
+
return { type: "compact" as const, compactor, reason };
|
|
640
|
+
},
|
|
641
|
+
wait() {
|
|
642
|
+
this.calls.push({ type: "wait", args: [] });
|
|
643
|
+
return { type: "wait" as const };
|
|
644
|
+
},
|
|
645
|
+
done() {
|
|
646
|
+
this.calls.push({ type: "done", args: [] });
|
|
647
|
+
return { type: "done" as const };
|
|
648
|
+
},
|
|
649
|
+
};
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
function makeState(): import("@intx/types/runtime").ReactorState {
|
|
653
|
+
return {
|
|
654
|
+
turns: [],
|
|
655
|
+
activeForks: [],
|
|
656
|
+
pendingOperations: [],
|
|
657
|
+
activeGates: [],
|
|
658
|
+
tokenUsage: emptyUsage(),
|
|
659
|
+
lastCycleUsage: null,
|
|
660
|
+
lastCycleSource: null,
|
|
661
|
+
sessionId: "test-session",
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
test("message.received triggers infer action", async () => {
|
|
666
|
+
const director = createDefaultDirector("You are helpful.");
|
|
667
|
+
const caps = makeCapabilities();
|
|
668
|
+
const state = makeState();
|
|
669
|
+
|
|
670
|
+
const event: ReactorInboundEvent = {
|
|
671
|
+
type: "message.received",
|
|
672
|
+
message: makeInboundMessage(),
|
|
673
|
+
};
|
|
674
|
+
|
|
675
|
+
const actions = await director.decide(event, state, caps);
|
|
676
|
+
|
|
677
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
678
|
+
expect(normalized.some((a) => a.type === "infer")).toBe(true);
|
|
679
|
+
});
|
|
680
|
+
|
|
681
|
+
test("inference.done with tool calls triggers checkpoint and execute_tools", async () => {
|
|
682
|
+
const director = createDefaultDirector("You are helpful.");
|
|
683
|
+
const caps = makeCapabilities();
|
|
684
|
+
const state = makeState();
|
|
685
|
+
|
|
686
|
+
const event: ReactorInboundEvent = {
|
|
687
|
+
type: "inference.done",
|
|
688
|
+
turn: {
|
|
689
|
+
role: "assistant",
|
|
690
|
+
model: "claude-test",
|
|
691
|
+
content: [
|
|
692
|
+
{
|
|
693
|
+
type: "tool_call",
|
|
694
|
+
id: "tc1",
|
|
695
|
+
name: "read_file",
|
|
696
|
+
arguments: { path: "/test" },
|
|
697
|
+
},
|
|
698
|
+
],
|
|
699
|
+
timestamp: 1000,
|
|
700
|
+
},
|
|
701
|
+
usage: emptyUsage(),
|
|
702
|
+
source: TEST_SOURCE,
|
|
703
|
+
};
|
|
704
|
+
|
|
705
|
+
const actions = await director.decide(event, state, caps);
|
|
706
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
707
|
+
expect(normalized.some((a) => a.type === "checkpoint")).toBe(true);
|
|
708
|
+
expect(normalized.some((a) => a.type === "execute_tools")).toBe(true);
|
|
709
|
+
});
|
|
710
|
+
|
|
711
|
+
test("inference.done without tool calls returns checkpoint and reply", async () => {
|
|
712
|
+
const director = createDefaultDirector("You are helpful.");
|
|
713
|
+
const caps = makeCapabilities();
|
|
714
|
+
const state = makeState();
|
|
715
|
+
|
|
716
|
+
const doneEvent: ReactorInboundEvent = {
|
|
717
|
+
type: "inference.done",
|
|
718
|
+
turn: {
|
|
719
|
+
role: "assistant",
|
|
720
|
+
model: "claude-test",
|
|
721
|
+
content: [{ type: "text", text: "Here is my response." }],
|
|
722
|
+
timestamp: 1000,
|
|
723
|
+
},
|
|
724
|
+
usage: emptyUsage(),
|
|
725
|
+
source: TEST_SOURCE,
|
|
726
|
+
};
|
|
727
|
+
|
|
728
|
+
const actions = await director.decide(doneEvent, state, caps);
|
|
729
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
730
|
+
|
|
731
|
+
expect(normalized.some((a) => a.type === "checkpoint")).toBe(true);
|
|
732
|
+
expect(normalized.some((a) => a.type === "reply")).toBe(true);
|
|
733
|
+
const replyAction = normalized.find((a) => a.type === "reply");
|
|
734
|
+
if (replyAction === undefined || replyAction.type !== "reply")
|
|
735
|
+
throw new Error("unreachable");
|
|
736
|
+
expect(replyAction.content).toBe("Here is my response.");
|
|
737
|
+
});
|
|
738
|
+
|
|
739
|
+
test("tool.done triggers checkpoint and re-infer", async () => {
|
|
740
|
+
const director = createDefaultDirector("You are helpful.");
|
|
741
|
+
const caps = makeCapabilities();
|
|
742
|
+
const state = makeState();
|
|
743
|
+
|
|
744
|
+
const event: ReactorInboundEvent = {
|
|
745
|
+
type: "tool.done",
|
|
746
|
+
result: { callId: "tc1", content: "file contents" },
|
|
747
|
+
};
|
|
748
|
+
|
|
749
|
+
const actions = await director.decide(event, state, caps);
|
|
750
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
751
|
+
expect(normalized.some((a) => a.type === "checkpoint")).toBe(true);
|
|
752
|
+
expect(normalized.some((a) => a.type === "infer")).toBe(true);
|
|
753
|
+
});
|
|
754
|
+
|
|
755
|
+
test("inference.error returns checkpoint and reply with error message", async () => {
|
|
756
|
+
const director = createDefaultDirector("You are helpful.");
|
|
757
|
+
const caps = makeCapabilities();
|
|
758
|
+
const state = makeState();
|
|
759
|
+
|
|
760
|
+
const event: ReactorInboundEvent = {
|
|
761
|
+
type: "inference.error",
|
|
762
|
+
error: {
|
|
763
|
+
category: "credential_failure",
|
|
764
|
+
message: "invalid API key",
|
|
765
|
+
statusCode: 401,
|
|
766
|
+
},
|
|
767
|
+
partial: { text: "" },
|
|
768
|
+
};
|
|
769
|
+
|
|
770
|
+
const actions = await director.decide(event, state, caps);
|
|
771
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
772
|
+
expect(normalized.some((a) => a.type === "checkpoint")).toBe(true);
|
|
773
|
+
|
|
774
|
+
const replyAction = normalized.find((a) => a.type === "reply");
|
|
775
|
+
expect(replyAction).toBeDefined();
|
|
776
|
+
const content =
|
|
777
|
+
replyAction?.type === "reply" ? replyAction.content : undefined;
|
|
778
|
+
expect(content).toContain("credential error");
|
|
779
|
+
expect(content).toContain("invalid API key");
|
|
780
|
+
});
|
|
781
|
+
|
|
782
|
+
test("abort returns done", async () => {
|
|
783
|
+
const director = createDefaultDirector("You are helpful.");
|
|
784
|
+
const caps = makeCapabilities();
|
|
785
|
+
const state = makeState();
|
|
786
|
+
|
|
787
|
+
const event: ReactorInboundEvent = {
|
|
788
|
+
type: "abort",
|
|
789
|
+
reason: "user_disconnect",
|
|
790
|
+
};
|
|
791
|
+
|
|
792
|
+
const actions = await director.decide(event, state, caps);
|
|
793
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
794
|
+
expect(normalized.some((a) => a.type === "done")).toBe(true);
|
|
795
|
+
});
|
|
796
|
+
|
|
797
|
+
test("inference.done with empty content returns checkpoint and wait", async () => {
|
|
798
|
+
const director = createDefaultDirector("You are helpful.");
|
|
799
|
+
const caps = makeCapabilities();
|
|
800
|
+
const state = makeState();
|
|
801
|
+
|
|
802
|
+
const event: ReactorInboundEvent = {
|
|
803
|
+
type: "inference.done",
|
|
804
|
+
turn: {
|
|
805
|
+
role: "assistant",
|
|
806
|
+
model: "claude-test",
|
|
807
|
+
content: [],
|
|
808
|
+
timestamp: 1000,
|
|
809
|
+
},
|
|
810
|
+
usage: emptyUsage(),
|
|
811
|
+
source: TEST_SOURCE,
|
|
812
|
+
};
|
|
813
|
+
|
|
814
|
+
const actions = await director.decide(event, state, caps);
|
|
815
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
816
|
+
expect(normalized.some((a) => a.type === "checkpoint")).toBe(true);
|
|
817
|
+
expect(normalized.some((a) => a.type === "wait")).toBe(true);
|
|
818
|
+
expect(normalized.some((a) => a.type === "done")).toBe(false);
|
|
819
|
+
expect(normalized.some((a) => a.type === "reply")).toBe(false);
|
|
820
|
+
});
|
|
821
|
+
|
|
822
|
+
test("inference.done with a refusal-only turn replies with the refusal reason", async () => {
|
|
823
|
+
// RefusalBlock is the OpenAI strict-mode policy-decline shape:
|
|
824
|
+
// the model produced coherent output ("I cannot help with that")
|
|
825
|
+
// in the dedicated refusal field instead of content. The
|
|
826
|
+
// director's reply path must surface the refusal text to the
|
|
827
|
+
// caller, not route the turn through the empty-response branch
|
|
828
|
+
// — otherwise the human waits indefinitely for an answer the
|
|
829
|
+
// model already declined to give.
|
|
830
|
+
const director = createDefaultDirector("You are helpful.");
|
|
831
|
+
const caps = makeCapabilities();
|
|
832
|
+
const state = makeState();
|
|
833
|
+
|
|
834
|
+
const event: ReactorInboundEvent = {
|
|
835
|
+
type: "inference.done",
|
|
836
|
+
turn: {
|
|
837
|
+
role: "assistant",
|
|
838
|
+
model: "gpt-test",
|
|
839
|
+
content: [{ type: "refusal", reason: "I cannot help with that." }],
|
|
840
|
+
timestamp: 1000,
|
|
841
|
+
},
|
|
842
|
+
usage: emptyUsage(),
|
|
843
|
+
source: TEST_SOURCE,
|
|
844
|
+
};
|
|
845
|
+
|
|
846
|
+
const actions = await director.decide(event, state, caps);
|
|
847
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
848
|
+
expect(normalized.some((a) => a.type === "checkpoint")).toBe(true);
|
|
849
|
+
expect(normalized.some((a) => a.type === "reply")).toBe(true);
|
|
850
|
+
const replyAction = normalized.find((a) => a.type === "reply");
|
|
851
|
+
if (replyAction === undefined || replyAction.type !== "reply") {
|
|
852
|
+
throw new Error("unreachable");
|
|
853
|
+
}
|
|
854
|
+
expect(replyAction.content).toBe("I cannot help with that.");
|
|
855
|
+
});
|
|
856
|
+
|
|
857
|
+
test("inference.done with whitespace-only text returns checkpoint and wait", async () => {
|
|
858
|
+
const director = createDefaultDirector("You are helpful.");
|
|
859
|
+
const caps = makeCapabilities();
|
|
860
|
+
const state = makeState();
|
|
861
|
+
|
|
862
|
+
const event: ReactorInboundEvent = {
|
|
863
|
+
type: "inference.done",
|
|
864
|
+
turn: {
|
|
865
|
+
role: "assistant",
|
|
866
|
+
model: "claude-test",
|
|
867
|
+
content: [{ type: "text", text: " \n\t " }],
|
|
868
|
+
timestamp: 1000,
|
|
869
|
+
},
|
|
870
|
+
usage: emptyUsage(),
|
|
871
|
+
source: TEST_SOURCE,
|
|
872
|
+
};
|
|
873
|
+
|
|
874
|
+
const actions = await director.decide(event, state, caps);
|
|
875
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
876
|
+
expect(normalized.some((a) => a.type === "checkpoint")).toBe(true);
|
|
877
|
+
expect(normalized.some((a) => a.type === "wait")).toBe(true);
|
|
878
|
+
expect(normalized.some((a) => a.type === "done")).toBe(false);
|
|
879
|
+
expect(normalized.some((a) => a.type === "reply")).toBe(false);
|
|
880
|
+
});
|
|
881
|
+
|
|
882
|
+
test("reactive mode inference.done returns checkpoint and wait", async () => {
|
|
883
|
+
const director = createDefaultDirector("You are helpful.", [], {
|
|
884
|
+
mode: "reactive",
|
|
885
|
+
});
|
|
886
|
+
const caps = makeCapabilities();
|
|
887
|
+
const state = makeState();
|
|
888
|
+
|
|
889
|
+
const event: ReactorInboundEvent = {
|
|
890
|
+
type: "inference.done",
|
|
891
|
+
turn: {
|
|
892
|
+
role: "assistant",
|
|
893
|
+
model: "claude-test",
|
|
894
|
+
content: [{ type: "text", text: "done processing" }],
|
|
895
|
+
timestamp: 1000,
|
|
896
|
+
},
|
|
897
|
+
usage: emptyUsage(),
|
|
898
|
+
source: TEST_SOURCE,
|
|
899
|
+
};
|
|
900
|
+
|
|
901
|
+
const actions = await director.decide(event, state, caps);
|
|
902
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
903
|
+
expect(normalized.some((a) => a.type === "checkpoint")).toBe(true);
|
|
904
|
+
expect(normalized.some((a) => a.type === "wait")).toBe(true);
|
|
905
|
+
});
|
|
906
|
+
|
|
907
|
+
test("reactive mode tool.done returns checkpoint and wait", async () => {
|
|
908
|
+
const director = createDefaultDirector("You are helpful.", [], {
|
|
909
|
+
mode: "reactive",
|
|
910
|
+
});
|
|
911
|
+
const caps = makeCapabilities();
|
|
912
|
+
const state = makeState();
|
|
913
|
+
|
|
914
|
+
const event: ReactorInboundEvent = {
|
|
915
|
+
type: "tool.done",
|
|
916
|
+
result: { callId: "tc1", content: "result" },
|
|
917
|
+
};
|
|
918
|
+
|
|
919
|
+
const actions = await director.decide(event, state, caps);
|
|
920
|
+
const normalized = Array.isArray(actions) ? actions : [actions];
|
|
921
|
+
expect(normalized.some((a) => a.type === "checkpoint")).toBe(true);
|
|
922
|
+
expect(normalized.some((a) => a.type === "wait")).toBe(true);
|
|
923
|
+
expect(normalized.some((a) => a.type === "infer")).toBe(false);
|
|
924
|
+
});
|
|
925
|
+
|
|
926
|
+
test("tool.done batching waits for all results before checkpoint", async () => {
|
|
927
|
+
const director = createDefaultDirector("You are helpful.");
|
|
928
|
+
const caps = makeCapabilities();
|
|
929
|
+
const state = makeState();
|
|
930
|
+
|
|
931
|
+
// First trigger inference.done with 2 tool calls to set pendingToolResults.
|
|
932
|
+
const inferDone: ReactorInboundEvent = {
|
|
933
|
+
type: "inference.done",
|
|
934
|
+
turn: {
|
|
935
|
+
role: "assistant",
|
|
936
|
+
model: "claude-test",
|
|
937
|
+
content: [
|
|
938
|
+
{
|
|
939
|
+
type: "tool_call",
|
|
940
|
+
id: "tc1",
|
|
941
|
+
name: "read_file",
|
|
942
|
+
arguments: { path: "/a" },
|
|
943
|
+
},
|
|
944
|
+
{
|
|
945
|
+
type: "tool_call",
|
|
946
|
+
id: "tc2",
|
|
947
|
+
name: "read_file",
|
|
948
|
+
arguments: { path: "/b" },
|
|
949
|
+
},
|
|
950
|
+
],
|
|
951
|
+
timestamp: 1000,
|
|
952
|
+
},
|
|
953
|
+
usage: emptyUsage(),
|
|
954
|
+
source: TEST_SOURCE,
|
|
955
|
+
};
|
|
956
|
+
await director.decide(inferDone, state, caps);
|
|
957
|
+
|
|
958
|
+
// First tool.done — should return empty (still waiting for tc2).
|
|
959
|
+
const toolDone1: ReactorInboundEvent = {
|
|
960
|
+
type: "tool.done",
|
|
961
|
+
result: { callId: "tc1", content: "result1" },
|
|
962
|
+
};
|
|
963
|
+
const actions1 = await director.decide(toolDone1, state, caps);
|
|
964
|
+
const normalized1 = Array.isArray(actions1) ? actions1 : [actions1];
|
|
965
|
+
expect(normalized1).toEqual([]);
|
|
966
|
+
|
|
967
|
+
// Second tool.done — all results in, should checkpoint + infer.
|
|
968
|
+
const toolDone2: ReactorInboundEvent = {
|
|
969
|
+
type: "tool.done",
|
|
970
|
+
result: { callId: "tc2", content: "result2" },
|
|
971
|
+
};
|
|
972
|
+
const actions2 = await director.decide(toolDone2, state, caps);
|
|
973
|
+
const normalized2 = Array.isArray(actions2) ? actions2 : [actions2];
|
|
974
|
+
expect(normalized2.some((a) => a.type === "checkpoint")).toBe(true);
|
|
975
|
+
expect(normalized2.some((a) => a.type === "infer")).toBe(true);
|
|
976
|
+
});
|
|
977
|
+
});
|
|
978
|
+
|
|
979
|
+
// ---------------------------------------------------------------------------
|
|
980
|
+
// 4. Config validation
|
|
981
|
+
// ---------------------------------------------------------------------------
|
|
982
|
+
|
|
983
|
+
describe("Config validation", () => {
|
|
984
|
+
test("throws when address is empty", () => {
|
|
985
|
+
const transport = makeMockTransport();
|
|
986
|
+
expect(() => createHarness(makeConfig(transport, { address: "" }))).toThrow(
|
|
987
|
+
"address",
|
|
988
|
+
);
|
|
989
|
+
});
|
|
990
|
+
|
|
991
|
+
test("throws when systemPrompt is empty", () => {
|
|
992
|
+
const transport = makeMockTransport();
|
|
993
|
+
expect(() =>
|
|
994
|
+
createHarness(makeConfig(transport, { systemPrompt: "" })),
|
|
995
|
+
).toThrow("systemPrompt");
|
|
996
|
+
});
|
|
997
|
+
|
|
998
|
+
test("throws when both director and defaultDirectorPolicy are provided", () => {
|
|
999
|
+
const transport = makeMockTransport();
|
|
1000
|
+
const director: ReactorDirector = {
|
|
1001
|
+
async decide(_event, _state, caps) {
|
|
1002
|
+
return caps.wait();
|
|
1003
|
+
},
|
|
1004
|
+
};
|
|
1005
|
+
expect(() =>
|
|
1006
|
+
createHarness(
|
|
1007
|
+
makeConfig(transport, {
|
|
1008
|
+
director,
|
|
1009
|
+
defaultDirectorPolicy: { mode: "reactive" },
|
|
1010
|
+
}),
|
|
1011
|
+
),
|
|
1012
|
+
).toThrow(
|
|
1013
|
+
"HarnessConfig.director and HarnessConfig.defaultDirectorPolicy are mutually exclusive",
|
|
1014
|
+
);
|
|
1015
|
+
});
|
|
1016
|
+
|
|
1017
|
+
test("throws when auditStore is provided without authorize", () => {
|
|
1018
|
+
const transport = makeMockTransport();
|
|
1019
|
+
const auditStore: AuditStore = {
|
|
1020
|
+
async commitAudit() {
|
|
1021
|
+
/* noop */
|
|
1022
|
+
},
|
|
1023
|
+
async loadAudit() {
|
|
1024
|
+
return [];
|
|
1025
|
+
},
|
|
1026
|
+
async commitErrors() {
|
|
1027
|
+
/* noop */
|
|
1028
|
+
},
|
|
1029
|
+
};
|
|
1030
|
+
expect(() => createHarness(makeConfig(transport, { auditStore }))).toThrow(
|
|
1031
|
+
"authorize is required when auditStore is provided",
|
|
1032
|
+
);
|
|
1033
|
+
});
|
|
1034
|
+
|
|
1035
|
+
test("accepts auditStore with authorize", () => {
|
|
1036
|
+
const transport = makeMockTransport();
|
|
1037
|
+
const auditStore: AuditStore = {
|
|
1038
|
+
async commitAudit() {
|
|
1039
|
+
/* noop */
|
|
1040
|
+
},
|
|
1041
|
+
async loadAudit() {
|
|
1042
|
+
return [];
|
|
1043
|
+
},
|
|
1044
|
+
async commitErrors() {
|
|
1045
|
+
/* noop */
|
|
1046
|
+
},
|
|
1047
|
+
};
|
|
1048
|
+
const authorize = async (): Promise<AuthzCallResult> => ({
|
|
1049
|
+
effect: "allow",
|
|
1050
|
+
matchingGrants: [],
|
|
1051
|
+
resolvedBy: null,
|
|
1052
|
+
});
|
|
1053
|
+
expect(() =>
|
|
1054
|
+
createHarness(makeConfig(transport, { auditStore, authorize })),
|
|
1055
|
+
).not.toThrow();
|
|
1056
|
+
});
|
|
1057
|
+
|
|
1058
|
+
test("accepts authorize without auditStore", () => {
|
|
1059
|
+
const transport = makeMockTransport();
|
|
1060
|
+
const authorize = async (): Promise<AuthzCallResult> => ({
|
|
1061
|
+
effect: "allow",
|
|
1062
|
+
matchingGrants: [],
|
|
1063
|
+
resolvedBy: null,
|
|
1064
|
+
});
|
|
1065
|
+
expect(() =>
|
|
1066
|
+
createHarness(makeConfig(transport, { authorize })),
|
|
1067
|
+
).not.toThrow();
|
|
1068
|
+
});
|
|
1069
|
+
});
|
|
1070
|
+
|
|
1071
|
+
// ---------------------------------------------------------------------------
|
|
1072
|
+
// 5. Audit integration
|
|
1073
|
+
// ---------------------------------------------------------------------------
|
|
1074
|
+
|
|
1075
|
+
describe("Audit integration", () => {
|
|
1076
|
+
function makeAuditStore(): AuditStore & {
|
|
1077
|
+
getCommitted(): AuditRecord[][];
|
|
1078
|
+
} {
|
|
1079
|
+
const committed: AuditRecord[][] = [];
|
|
1080
|
+
return {
|
|
1081
|
+
async commitAudit(records: AuditRecord[]) {
|
|
1082
|
+
committed.push([...records]);
|
|
1083
|
+
},
|
|
1084
|
+
async loadAudit() {
|
|
1085
|
+
return committed.flat();
|
|
1086
|
+
},
|
|
1087
|
+
async commitErrors() {
|
|
1088
|
+
/* noop */
|
|
1089
|
+
},
|
|
1090
|
+
getCommitted() {
|
|
1091
|
+
return committed;
|
|
1092
|
+
},
|
|
1093
|
+
};
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
function allowAll(): Promise<AuthzCallResult> {
|
|
1097
|
+
return Promise.resolve({
|
|
1098
|
+
effect: "allow" as const,
|
|
1099
|
+
matchingGrants: [],
|
|
1100
|
+
resolvedBy: null,
|
|
1101
|
+
});
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
function denyAll(): Promise<AuthzCallResult> {
|
|
1105
|
+
return Promise.resolve({
|
|
1106
|
+
effect: "deny" as const,
|
|
1107
|
+
matchingGrants: [],
|
|
1108
|
+
resolvedBy: null,
|
|
1109
|
+
});
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
// A director that executes a single tool call on message.received,
|
|
1113
|
+
// then checkpoints and shuts down on tool.done. This exercises
|
|
1114
|
+
// the full audit pipeline without needing a real LLM.
|
|
1115
|
+
function makeToolExecDirector(
|
|
1116
|
+
toolName: string,
|
|
1117
|
+
toolArgs: Record<string, unknown>,
|
|
1118
|
+
): ReactorDirector {
|
|
1119
|
+
return {
|
|
1120
|
+
async decide(
|
|
1121
|
+
event: { type: string },
|
|
1122
|
+
_state: ReactorState,
|
|
1123
|
+
caps: ReactorCapabilities,
|
|
1124
|
+
) {
|
|
1125
|
+
if (event.type === "message.received") {
|
|
1126
|
+
return caps.executeTools([
|
|
1127
|
+
{ id: `call-${toolName}`, name: toolName, arguments: toolArgs },
|
|
1128
|
+
]);
|
|
1129
|
+
}
|
|
1130
|
+
if (event.type === "tool.done") {
|
|
1131
|
+
return [caps.checkpoint(), caps.done()];
|
|
1132
|
+
}
|
|
1133
|
+
return caps.done();
|
|
1134
|
+
},
|
|
1135
|
+
};
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
function waitForDone(events: InferenceEvent[]): Promise<void> {
|
|
1139
|
+
return new Promise((resolve, reject) => {
|
|
1140
|
+
const deadline = setTimeout(
|
|
1141
|
+
() => reject(new Error("Timed out waiting for reactor.done")),
|
|
1142
|
+
5000,
|
|
1143
|
+
);
|
|
1144
|
+
const check = () => {
|
|
1145
|
+
if (events.some((e) => e.type === "reactor.done")) {
|
|
1146
|
+
clearTimeout(deadline);
|
|
1147
|
+
resolve();
|
|
1148
|
+
return;
|
|
1149
|
+
}
|
|
1150
|
+
setTimeout(check, 10);
|
|
1151
|
+
};
|
|
1152
|
+
check();
|
|
1153
|
+
});
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
test("allowed tool call produces audit record with authz and result", async () => {
|
|
1157
|
+
const transport = makeMockTransport();
|
|
1158
|
+
const auditStore = makeAuditStore();
|
|
1159
|
+
const events: InferenceEvent[] = [];
|
|
1160
|
+
|
|
1161
|
+
const harness = createHarness(
|
|
1162
|
+
makeConfig(transport, {
|
|
1163
|
+
auditStore,
|
|
1164
|
+
authorize: () => allowAll(),
|
|
1165
|
+
onEvent: (e) => events.push(e),
|
|
1166
|
+
director: makeToolExecDirector("test_tool", { key: "value" }),
|
|
1167
|
+
}),
|
|
1168
|
+
);
|
|
1169
|
+
|
|
1170
|
+
harness.start();
|
|
1171
|
+
harness.deliver(makeInboundMessage());
|
|
1172
|
+
await waitForDone(events);
|
|
1173
|
+
|
|
1174
|
+
const records = auditStore.getCommitted().flat();
|
|
1175
|
+
expect(records.length).toBe(1);
|
|
1176
|
+
|
|
1177
|
+
const record = records[0];
|
|
1178
|
+
if (record === undefined) throw new Error("expected record");
|
|
1179
|
+
|
|
1180
|
+
expect(record.callId).toBe("call-test_tool");
|
|
1181
|
+
expect(record.tool).toBe("test_tool");
|
|
1182
|
+
expect(record.arguments).toEqual({ key: "value" });
|
|
1183
|
+
expect(record.authz).not.toBeNull();
|
|
1184
|
+
if (record.authz === null) throw new Error("expected authz");
|
|
1185
|
+
expect(record.authz.effect).toBe("allow");
|
|
1186
|
+
expect(record.authz.blocked).toBe(false);
|
|
1187
|
+
expect(record.result.content).toBe("mock-result");
|
|
1188
|
+
expect(record.result.isError).toBe(false);
|
|
1189
|
+
expect(record.sessionId).toBeDefined();
|
|
1190
|
+
// seq comes from the reactor's tool.done event; verify it matches.
|
|
1191
|
+
const toolDoneEvent = events.find(
|
|
1192
|
+
(e) =>
|
|
1193
|
+
e.type === "tool.done" &&
|
|
1194
|
+
e.data.result.callId === "call-test_tool" &&
|
|
1195
|
+
!e.data.result.isError,
|
|
1196
|
+
);
|
|
1197
|
+
if (toolDoneEvent === undefined)
|
|
1198
|
+
throw new Error("expected tool.done event");
|
|
1199
|
+
expect(record.seq).toBe(toolDoneEvent.seq);
|
|
1200
|
+
});
|
|
1201
|
+
|
|
1202
|
+
test("blocked tool call produces audit record with denied authz", async () => {
|
|
1203
|
+
const transport = makeMockTransport();
|
|
1204
|
+
const auditStore = makeAuditStore();
|
|
1205
|
+
const events: InferenceEvent[] = [];
|
|
1206
|
+
|
|
1207
|
+
const harness = createHarness(
|
|
1208
|
+
makeConfig(transport, {
|
|
1209
|
+
auditStore,
|
|
1210
|
+
authorize: () => denyAll(),
|
|
1211
|
+
onEvent: (e) => events.push(e),
|
|
1212
|
+
director: makeToolExecDirector("secret_tool", { path: "/etc/shadow" }),
|
|
1213
|
+
}),
|
|
1214
|
+
);
|
|
1215
|
+
|
|
1216
|
+
harness.start();
|
|
1217
|
+
harness.deliver(makeInboundMessage());
|
|
1218
|
+
await waitForDone(events);
|
|
1219
|
+
|
|
1220
|
+
const records = auditStore.getCommitted().flat();
|
|
1221
|
+
expect(records.length).toBe(1);
|
|
1222
|
+
|
|
1223
|
+
const record = records[0];
|
|
1224
|
+
if (record === undefined) throw new Error("expected record");
|
|
1225
|
+
|
|
1226
|
+
expect(record.callId).toBe("call-secret_tool");
|
|
1227
|
+
expect(record.tool).toBe("secret_tool");
|
|
1228
|
+
// Blocked calls never see tool.start, so arguments are not captured.
|
|
1229
|
+
expect(record.arguments).toEqual({});
|
|
1230
|
+
expect(record.authz).not.toBeNull();
|
|
1231
|
+
if (record.authz === null) throw new Error("expected authz");
|
|
1232
|
+
expect(record.authz.effect).toBe("deny");
|
|
1233
|
+
expect(record.authz.blocked).toBe(true);
|
|
1234
|
+
expect(record.authz.blockReason).toBeDefined();
|
|
1235
|
+
expect(record.result.isError).toBe(true);
|
|
1236
|
+
});
|
|
1237
|
+
|
|
1238
|
+
test("audit records are flushed at shutdown for unflushed records", async () => {
|
|
1239
|
+
const transport = makeMockTransport();
|
|
1240
|
+
const auditStore = makeAuditStore();
|
|
1241
|
+
const events: InferenceEvent[] = [];
|
|
1242
|
+
|
|
1243
|
+
// Director that executes tools but does NOT checkpoint before done.
|
|
1244
|
+
// Records should still be flushed via onShutdown.
|
|
1245
|
+
const director: ReactorDirector = {
|
|
1246
|
+
async decide(
|
|
1247
|
+
event: { type: string },
|
|
1248
|
+
_state: ReactorState,
|
|
1249
|
+
caps: ReactorCapabilities,
|
|
1250
|
+
) {
|
|
1251
|
+
if (event.type === "message.received") {
|
|
1252
|
+
return caps.executeTools([
|
|
1253
|
+
{ id: "call-1", name: "test_tool", arguments: {} },
|
|
1254
|
+
]);
|
|
1255
|
+
}
|
|
1256
|
+
if (event.type === "tool.done") {
|
|
1257
|
+
return caps.done();
|
|
1258
|
+
}
|
|
1259
|
+
return caps.done();
|
|
1260
|
+
},
|
|
1261
|
+
};
|
|
1262
|
+
|
|
1263
|
+
const harness = createHarness(
|
|
1264
|
+
makeConfig(transport, {
|
|
1265
|
+
auditStore,
|
|
1266
|
+
authorize: () => allowAll(),
|
|
1267
|
+
onEvent: (e) => events.push(e),
|
|
1268
|
+
director,
|
|
1269
|
+
}),
|
|
1270
|
+
);
|
|
1271
|
+
|
|
1272
|
+
harness.start();
|
|
1273
|
+
harness.deliver(makeInboundMessage());
|
|
1274
|
+
await waitForDone(events);
|
|
1275
|
+
|
|
1276
|
+
// Records should have been flushed via onShutdown (no checkpoint).
|
|
1277
|
+
const batches = auditStore.getCommitted();
|
|
1278
|
+
expect(batches.length).toBe(1);
|
|
1279
|
+
expect(batches[0]?.length).toBe(1);
|
|
1280
|
+
expect(batches[0]?.[0]?.callId).toBe("call-1");
|
|
1281
|
+
});
|
|
1282
|
+
|
|
1283
|
+
test("checkpoint then shutdown does not double-commit audit records", async () => {
|
|
1284
|
+
const transport = makeMockTransport();
|
|
1285
|
+
const auditStore = makeAuditStore();
|
|
1286
|
+
const events: InferenceEvent[] = [];
|
|
1287
|
+
|
|
1288
|
+
// Director checkpoints before done — both afterCheckpoint and onShutdown
|
|
1289
|
+
// fire. The second flush should be a no-op.
|
|
1290
|
+
const harness = createHarness(
|
|
1291
|
+
makeConfig(transport, {
|
|
1292
|
+
auditStore,
|
|
1293
|
+
authorize: () => allowAll(),
|
|
1294
|
+
onEvent: (e) => events.push(e),
|
|
1295
|
+
director: makeToolExecDirector("test_tool", { x: 1 }),
|
|
1296
|
+
}),
|
|
1297
|
+
);
|
|
1298
|
+
|
|
1299
|
+
harness.start();
|
|
1300
|
+
harness.deliver(makeInboundMessage());
|
|
1301
|
+
await waitForDone(events);
|
|
1302
|
+
|
|
1303
|
+
// commitAudit should be called exactly once (at checkpoint).
|
|
1304
|
+
// The onShutdown flush finds an empty buffer and skips.
|
|
1305
|
+
const batches = auditStore.getCommitted();
|
|
1306
|
+
expect(batches.length).toBe(1);
|
|
1307
|
+
expect(batches[0]?.length).toBe(1);
|
|
1308
|
+
});
|
|
1309
|
+
|
|
1310
|
+
test("authorize throwing produces blocked audit record", async () => {
|
|
1311
|
+
const transport = makeMockTransport();
|
|
1312
|
+
const auditStore = makeAuditStore();
|
|
1313
|
+
const events: InferenceEvent[] = [];
|
|
1314
|
+
|
|
1315
|
+
const harness = createHarness(
|
|
1316
|
+
makeConfig(transport, {
|
|
1317
|
+
auditStore,
|
|
1318
|
+
authorize: () => {
|
|
1319
|
+
throw new Error("authz service unavailable");
|
|
1320
|
+
},
|
|
1321
|
+
onEvent: (e) => events.push(e),
|
|
1322
|
+
director: makeToolExecDirector("risky_tool", { cmd: "rm -rf /" }),
|
|
1323
|
+
}),
|
|
1324
|
+
);
|
|
1325
|
+
|
|
1326
|
+
harness.start();
|
|
1327
|
+
harness.deliver(makeInboundMessage());
|
|
1328
|
+
await waitForDone(events);
|
|
1329
|
+
|
|
1330
|
+
const records = auditStore.getCommitted().flat();
|
|
1331
|
+
expect(records.length).toBe(1);
|
|
1332
|
+
|
|
1333
|
+
const record = records[0];
|
|
1334
|
+
if (record === undefined) throw new Error("expected record");
|
|
1335
|
+
|
|
1336
|
+
expect(record.tool).toBe("risky_tool");
|
|
1337
|
+
expect(record.authz).not.toBeNull();
|
|
1338
|
+
if (record.authz === null) throw new Error("expected authz");
|
|
1339
|
+
expect(record.authz.blocked).toBe(true);
|
|
1340
|
+
expect(record.authz.effect).toBeNull();
|
|
1341
|
+
expect(record.result.isError).toBe(true);
|
|
1342
|
+
});
|
|
1343
|
+
});
|
|
1344
|
+
|
|
1345
|
+
// ---------------------------------------------------------------------------
|
|
1346
|
+
// 6. Error flushing
|
|
1347
|
+
// ---------------------------------------------------------------------------
|
|
1348
|
+
|
|
1349
|
+
describe("Error flushing", () => {
|
|
1350
|
+
function makeErrorAuditStore(): AuditStore & {
|
|
1351
|
+
getCommittedErrors(): ErrorRecord[][];
|
|
1352
|
+
} {
|
|
1353
|
+
const committedErrors: ErrorRecord[][] = [];
|
|
1354
|
+
return {
|
|
1355
|
+
async commitAudit() {
|
|
1356
|
+
/* noop */
|
|
1357
|
+
},
|
|
1358
|
+
async loadAudit() {
|
|
1359
|
+
return [];
|
|
1360
|
+
},
|
|
1361
|
+
async commitErrors(records: ErrorRecord[]) {
|
|
1362
|
+
committedErrors.push([...records]);
|
|
1363
|
+
},
|
|
1364
|
+
getCommittedErrors() {
|
|
1365
|
+
return committedErrors;
|
|
1366
|
+
},
|
|
1367
|
+
};
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
function allowAll(): Promise<AuthzCallResult> {
|
|
1371
|
+
return Promise.resolve({
|
|
1372
|
+
effect: "allow" as const,
|
|
1373
|
+
matchingGrants: [],
|
|
1374
|
+
resolvedBy: null,
|
|
1375
|
+
});
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1378
|
+
function waitForDone(events: InferenceEvent[]): Promise<void> {
|
|
1379
|
+
return new Promise((resolve, reject) => {
|
|
1380
|
+
const deadline = setTimeout(
|
|
1381
|
+
() => reject(new Error("Timed out waiting for reactor.done")),
|
|
1382
|
+
5000,
|
|
1383
|
+
);
|
|
1384
|
+
const check = () => {
|
|
1385
|
+
if (events.some((e) => e.type === "reactor.done")) {
|
|
1386
|
+
clearTimeout(deadline);
|
|
1387
|
+
resolve();
|
|
1388
|
+
return;
|
|
1389
|
+
}
|
|
1390
|
+
setTimeout(check, 10);
|
|
1391
|
+
};
|
|
1392
|
+
check();
|
|
1393
|
+
});
|
|
1394
|
+
}
|
|
1395
|
+
|
|
1396
|
+
test("inference.error events are accumulated and flushed at checkpoint", async () => {
|
|
1397
|
+
const transport = makeMockTransport();
|
|
1398
|
+
const auditStore = makeErrorAuditStore();
|
|
1399
|
+
const events: InferenceEvent[] = [];
|
|
1400
|
+
|
|
1401
|
+
// Director that triggers inference (which will fail due to invalid provider
|
|
1402
|
+
// URL) and then checkpoints + completes on inference.error.
|
|
1403
|
+
const director: ReactorDirector = {
|
|
1404
|
+
async decide(
|
|
1405
|
+
event: { type: string },
|
|
1406
|
+
_state: ReactorState,
|
|
1407
|
+
caps: ReactorCapabilities,
|
|
1408
|
+
) {
|
|
1409
|
+
if (event.type === "message.received") {
|
|
1410
|
+
return caps.infer();
|
|
1411
|
+
}
|
|
1412
|
+
if (event.type === "inference.error") {
|
|
1413
|
+
return [caps.checkpoint("after-error"), caps.done()];
|
|
1414
|
+
}
|
|
1415
|
+
return caps.done();
|
|
1416
|
+
},
|
|
1417
|
+
};
|
|
1418
|
+
|
|
1419
|
+
// Use an unreachable URL so inference fails immediately with a network
|
|
1420
|
+
// error, causing the reactor to emit inference.error.
|
|
1421
|
+
const harness = createHarness(
|
|
1422
|
+
makeConfig(transport, {
|
|
1423
|
+
source: {
|
|
1424
|
+
id: "anthropic:claude-test",
|
|
1425
|
+
provider: "anthropic",
|
|
1426
|
+
baseURL: "http://localhost:1",
|
|
1427
|
+
apiKey: "test-key",
|
|
1428
|
+
model: "claude-test",
|
|
1429
|
+
},
|
|
1430
|
+
auditStore,
|
|
1431
|
+
authorize: () => allowAll(),
|
|
1432
|
+
onEvent: (e) => events.push(e),
|
|
1433
|
+
director,
|
|
1434
|
+
}),
|
|
1435
|
+
);
|
|
1436
|
+
|
|
1437
|
+
harness.start();
|
|
1438
|
+
harness.deliver(makeInboundMessage());
|
|
1439
|
+
await waitForDone(events);
|
|
1440
|
+
|
|
1441
|
+
const batches = auditStore.getCommittedErrors();
|
|
1442
|
+
expect(batches.length).toBe(1);
|
|
1443
|
+
const record = batches[0]?.[0];
|
|
1444
|
+
if (record === undefined) throw new Error("expected error record");
|
|
1445
|
+
expect(record.source).toBe("inference");
|
|
1446
|
+
expect(record.category).toBeDefined();
|
|
1447
|
+
expect(record.message).toBeDefined();
|
|
1448
|
+
expect(record.fatal).toBe(false);
|
|
1449
|
+
expect(record.sessionId).toBeDefined();
|
|
1450
|
+
});
|
|
1451
|
+
|
|
1452
|
+
test("reactor.error (fatal) events are accumulated and flushed", async () => {
|
|
1453
|
+
const transport = makeMockTransport();
|
|
1454
|
+
const auditStore = makeErrorAuditStore();
|
|
1455
|
+
const events: InferenceEvent[] = [];
|
|
1456
|
+
|
|
1457
|
+
// Director that throws on message.received, causing a fatal reactor.error.
|
|
1458
|
+
const director: ReactorDirector = {
|
|
1459
|
+
async decide(event: { type: string }, _state: ReactorState) {
|
|
1460
|
+
if (event.type === "message.received") {
|
|
1461
|
+
throw new Error("director explosion");
|
|
1462
|
+
}
|
|
1463
|
+
return { type: "done" as const };
|
|
1464
|
+
},
|
|
1465
|
+
};
|
|
1466
|
+
|
|
1467
|
+
const harness = createHarness(
|
|
1468
|
+
makeConfig(transport, {
|
|
1469
|
+
auditStore,
|
|
1470
|
+
authorize: () => allowAll(),
|
|
1471
|
+
onEvent: (e) => events.push(e),
|
|
1472
|
+
director,
|
|
1473
|
+
}),
|
|
1474
|
+
);
|
|
1475
|
+
|
|
1476
|
+
harness.start();
|
|
1477
|
+
harness.deliver(makeInboundMessage());
|
|
1478
|
+
await waitForDone(events);
|
|
1479
|
+
|
|
1480
|
+
const batches = auditStore.getCommittedErrors();
|
|
1481
|
+
expect(batches.length).toBe(1);
|
|
1482
|
+
const record = batches[0]?.[0];
|
|
1483
|
+
if (record === undefined) throw new Error("expected error record");
|
|
1484
|
+
expect(record.source).toBe("reactor");
|
|
1485
|
+
expect(record.category).toBe("reactor_error");
|
|
1486
|
+
expect(record.fatal).toBe(true);
|
|
1487
|
+
expect(record.message).toContain("director explosion");
|
|
1488
|
+
expect(record.sessionId).toBeDefined();
|
|
1489
|
+
});
|
|
1490
|
+
|
|
1491
|
+
test("no commitErrors call when no errors occurred", async () => {
|
|
1492
|
+
const transport = makeMockTransport();
|
|
1493
|
+
const auditStore = makeErrorAuditStore();
|
|
1494
|
+
const events: InferenceEvent[] = [];
|
|
1495
|
+
|
|
1496
|
+
// Director that completes without errors.
|
|
1497
|
+
const director: ReactorDirector = {
|
|
1498
|
+
async decide(
|
|
1499
|
+
event: { type: string },
|
|
1500
|
+
_state: ReactorState,
|
|
1501
|
+
caps: ReactorCapabilities,
|
|
1502
|
+
) {
|
|
1503
|
+
if (event.type === "message.received") {
|
|
1504
|
+
return [caps.checkpoint(), caps.done()];
|
|
1505
|
+
}
|
|
1506
|
+
return caps.done();
|
|
1507
|
+
},
|
|
1508
|
+
};
|
|
1509
|
+
|
|
1510
|
+
const harness = createHarness(
|
|
1511
|
+
makeConfig(transport, {
|
|
1512
|
+
auditStore,
|
|
1513
|
+
authorize: () => allowAll(),
|
|
1514
|
+
onEvent: (e) => events.push(e),
|
|
1515
|
+
director,
|
|
1516
|
+
}),
|
|
1517
|
+
);
|
|
1518
|
+
|
|
1519
|
+
harness.start();
|
|
1520
|
+
harness.deliver(makeInboundMessage());
|
|
1521
|
+
await waitForDone(events);
|
|
1522
|
+
|
|
1523
|
+
expect(auditStore.getCommittedErrors().length).toBe(0);
|
|
1524
|
+
});
|
|
1525
|
+
|
|
1526
|
+
test("non-fatal reactor.error events are recorded in the error audit trail", async () => {
|
|
1527
|
+
const transport = makeMockTransport();
|
|
1528
|
+
const auditStore = makeErrorAuditStore();
|
|
1529
|
+
const events: InferenceEvent[] = [];
|
|
1530
|
+
|
|
1531
|
+
// Director that triggers a checkpoint (which succeeds) then completes.
|
|
1532
|
+
// The reactor emits a non-fatal reactor.error for afterCheckpoint
|
|
1533
|
+
// hook failures, but we can simulate by using a director that causes
|
|
1534
|
+
// inference (which fails) and then checkpoints + completes.
|
|
1535
|
+
const director: ReactorDirector = {
|
|
1536
|
+
async decide(
|
|
1537
|
+
event: { type: string },
|
|
1538
|
+
_state: ReactorState,
|
|
1539
|
+
caps: ReactorCapabilities,
|
|
1540
|
+
) {
|
|
1541
|
+
if (event.type === "message.received") {
|
|
1542
|
+
return caps.infer();
|
|
1543
|
+
}
|
|
1544
|
+
if (event.type === "inference.error") {
|
|
1545
|
+
return [caps.checkpoint("after-error"), caps.done()];
|
|
1546
|
+
}
|
|
1547
|
+
return caps.done();
|
|
1548
|
+
},
|
|
1549
|
+
};
|
|
1550
|
+
|
|
1551
|
+
const harness = createHarness(
|
|
1552
|
+
makeConfig(transport, {
|
|
1553
|
+
source: {
|
|
1554
|
+
id: "anthropic:claude-test",
|
|
1555
|
+
provider: "anthropic",
|
|
1556
|
+
baseURL: "http://localhost:1",
|
|
1557
|
+
apiKey: "test-key",
|
|
1558
|
+
model: "claude-test",
|
|
1559
|
+
},
|
|
1560
|
+
auditStore,
|
|
1561
|
+
authorize: () => allowAll(),
|
|
1562
|
+
onEvent: (e) => events.push(e),
|
|
1563
|
+
director,
|
|
1564
|
+
}),
|
|
1565
|
+
);
|
|
1566
|
+
|
|
1567
|
+
harness.start();
|
|
1568
|
+
harness.deliver(makeInboundMessage());
|
|
1569
|
+
await waitForDone(events);
|
|
1570
|
+
|
|
1571
|
+
// The inference.error should be recorded regardless of fatal status.
|
|
1572
|
+
const allRecords = auditStore.getCommittedErrors().flat();
|
|
1573
|
+
const inferenceErrors = allRecords.filter((r) => r.source === "inference");
|
|
1574
|
+
expect(inferenceErrors.length).toBeGreaterThanOrEqual(1);
|
|
1575
|
+
const record = inferenceErrors[0];
|
|
1576
|
+
if (record === undefined) throw new Error("expected inference error");
|
|
1577
|
+
expect(record.fatal).toBe(false);
|
|
1578
|
+
});
|
|
1579
|
+
|
|
1580
|
+
test("errors survive a commitErrors failure", async () => {
|
|
1581
|
+
const transport = makeMockTransport();
|
|
1582
|
+
const committedErrors: ErrorRecord[][] = [];
|
|
1583
|
+
let shouldFail = true;
|
|
1584
|
+
const auditStore: AuditStore & { getCommittedErrors(): ErrorRecord[][] } = {
|
|
1585
|
+
async commitAudit() {
|
|
1586
|
+
/* noop */
|
|
1587
|
+
},
|
|
1588
|
+
async loadAudit() {
|
|
1589
|
+
return [];
|
|
1590
|
+
},
|
|
1591
|
+
async commitErrors(records: ErrorRecord[]) {
|
|
1592
|
+
if (shouldFail) {
|
|
1593
|
+
shouldFail = false;
|
|
1594
|
+
throw new Error("simulated storage failure");
|
|
1595
|
+
}
|
|
1596
|
+
committedErrors.push([...records]);
|
|
1597
|
+
},
|
|
1598
|
+
getCommittedErrors() {
|
|
1599
|
+
return committedErrors;
|
|
1600
|
+
},
|
|
1601
|
+
};
|
|
1602
|
+
const events: InferenceEvent[] = [];
|
|
1603
|
+
|
|
1604
|
+
// Director that triggers inference (fails due to bad URL), then
|
|
1605
|
+
// checkpoints (commitErrors throws on first call), then completes
|
|
1606
|
+
// (commitErrors succeeds on shutdown flush with the retained records).
|
|
1607
|
+
const director: ReactorDirector = {
|
|
1608
|
+
async decide(
|
|
1609
|
+
event: { type: string },
|
|
1610
|
+
_state: ReactorState,
|
|
1611
|
+
caps: ReactorCapabilities,
|
|
1612
|
+
) {
|
|
1613
|
+
if (event.type === "message.received") {
|
|
1614
|
+
return caps.infer();
|
|
1615
|
+
}
|
|
1616
|
+
if (event.type === "inference.error") {
|
|
1617
|
+
return [caps.checkpoint("will-fail"), caps.done()];
|
|
1618
|
+
}
|
|
1619
|
+
return caps.done();
|
|
1620
|
+
},
|
|
1621
|
+
};
|
|
1622
|
+
|
|
1623
|
+
const harness = createHarness(
|
|
1624
|
+
makeConfig(transport, {
|
|
1625
|
+
source: {
|
|
1626
|
+
id: "anthropic:claude-test",
|
|
1627
|
+
provider: "anthropic",
|
|
1628
|
+
baseURL: "http://localhost:1",
|
|
1629
|
+
apiKey: "test-key",
|
|
1630
|
+
model: "claude-test",
|
|
1631
|
+
},
|
|
1632
|
+
auditStore,
|
|
1633
|
+
authorize: () => allowAll(),
|
|
1634
|
+
onEvent: (e) => events.push(e),
|
|
1635
|
+
director,
|
|
1636
|
+
}),
|
|
1637
|
+
);
|
|
1638
|
+
|
|
1639
|
+
harness.start();
|
|
1640
|
+
harness.deliver(makeInboundMessage());
|
|
1641
|
+
await waitForDone(events);
|
|
1642
|
+
|
|
1643
|
+
// The first flush failed but the records should have been retained
|
|
1644
|
+
// and flushed on shutdown.
|
|
1645
|
+
expect(committedErrors.length).toBe(1);
|
|
1646
|
+
const record = committedErrors[0]?.[0];
|
|
1647
|
+
if (record === undefined) throw new Error("expected error record");
|
|
1648
|
+
expect(record.source).toBe("inference");
|
|
1649
|
+
});
|
|
1650
|
+
|
|
1651
|
+
test("errors are flushed at shutdown when no checkpoint occurred", async () => {
|
|
1652
|
+
const transport = makeMockTransport();
|
|
1653
|
+
const auditStore = makeErrorAuditStore();
|
|
1654
|
+
const events: InferenceEvent[] = [];
|
|
1655
|
+
|
|
1656
|
+
// Director that throws — reactor.error is emitted and then shutdown
|
|
1657
|
+
// happens (no explicit checkpoint). Errors must be flushed via
|
|
1658
|
+
// onShutdown.
|
|
1659
|
+
const director: ReactorDirector = {
|
|
1660
|
+
async decide(event: { type: string }, _state: ReactorState) {
|
|
1661
|
+
if (event.type === "message.received") {
|
|
1662
|
+
throw new Error("shutdown flush test");
|
|
1663
|
+
}
|
|
1664
|
+
return { type: "done" as const };
|
|
1665
|
+
},
|
|
1666
|
+
};
|
|
1667
|
+
|
|
1668
|
+
const harness = createHarness(
|
|
1669
|
+
makeConfig(transport, {
|
|
1670
|
+
auditStore,
|
|
1671
|
+
authorize: () => allowAll(),
|
|
1672
|
+
onEvent: (e) => events.push(e),
|
|
1673
|
+
director,
|
|
1674
|
+
}),
|
|
1675
|
+
);
|
|
1676
|
+
|
|
1677
|
+
harness.start();
|
|
1678
|
+
harness.deliver(makeInboundMessage());
|
|
1679
|
+
await waitForDone(events);
|
|
1680
|
+
|
|
1681
|
+
const batches = auditStore.getCommittedErrors();
|
|
1682
|
+
expect(batches.length).toBe(1);
|
|
1683
|
+
expect(batches[0]?.[0]?.source).toBe("reactor");
|
|
1684
|
+
});
|
|
1685
|
+
});
|
|
1686
|
+
|
|
1687
|
+
// ---------------------------------------------------------------------------
|
|
1688
|
+
// 7. BlobReader
|
|
1689
|
+
// ---------------------------------------------------------------------------
|
|
1690
|
+
|
|
1691
|
+
describe("Harness blobReader", () => {
|
|
1692
|
+
test("resolves a tool-output URI through the wrapped context store", async () => {
|
|
1693
|
+
const blobs = new Map<string, Uint8Array>();
|
|
1694
|
+
blobs.set("abc123", new TextEncoder().encode("spilled bytes"));
|
|
1695
|
+
|
|
1696
|
+
const transport = makeMockTransport();
|
|
1697
|
+
const harness = createHarness(
|
|
1698
|
+
makeConfig(transport, {
|
|
1699
|
+
storage: makeContextStore({ blobs }),
|
|
1700
|
+
}),
|
|
1701
|
+
);
|
|
1702
|
+
|
|
1703
|
+
const bytes = await harness.blobReader.read("tool-output:///abc123");
|
|
1704
|
+
expect(new TextDecoder().decode(bytes)).toBe("spilled bytes");
|
|
1705
|
+
});
|
|
1706
|
+
|
|
1707
|
+
test("throws when the underlying store has no matching blob", async () => {
|
|
1708
|
+
const blobs = new Map<string, Uint8Array>();
|
|
1709
|
+
const transport = makeMockTransport();
|
|
1710
|
+
const harness = createHarness(
|
|
1711
|
+
makeConfig(transport, {
|
|
1712
|
+
storage: makeContextStore({ blobs }),
|
|
1713
|
+
}),
|
|
1714
|
+
);
|
|
1715
|
+
|
|
1716
|
+
let thrown: Error | undefined;
|
|
1717
|
+
try {
|
|
1718
|
+
await harness.blobReader.read("tool-output:///missing");
|
|
1719
|
+
} catch (cause) {
|
|
1720
|
+
thrown = cause instanceof Error ? cause : new Error(String(cause));
|
|
1721
|
+
}
|
|
1722
|
+
expect(thrown?.message).toContain("Blob not found");
|
|
1723
|
+
});
|
|
1724
|
+
|
|
1725
|
+
test("throws on malformed tool-output URIs without reading from the store", async () => {
|
|
1726
|
+
let readCount = 0;
|
|
1727
|
+
const blobs = new Map<string, Uint8Array>();
|
|
1728
|
+
const wrapped = makeContextStore({ blobs });
|
|
1729
|
+
const originalReadBlob = wrapped.readBlob.bind(wrapped);
|
|
1730
|
+
wrapped.readBlob = async (key, signal) => {
|
|
1731
|
+
readCount++;
|
|
1732
|
+
return originalReadBlob(key, signal);
|
|
1733
|
+
};
|
|
1734
|
+
|
|
1735
|
+
const transport = makeMockTransport();
|
|
1736
|
+
const harness = createHarness(makeConfig(transport, { storage: wrapped }));
|
|
1737
|
+
|
|
1738
|
+
let thrown: Error | undefined;
|
|
1739
|
+
try {
|
|
1740
|
+
await harness.blobReader.read("file:///abc");
|
|
1741
|
+
} catch (cause) {
|
|
1742
|
+
thrown = cause instanceof Error ? cause : new Error(String(cause));
|
|
1743
|
+
}
|
|
1744
|
+
expect(thrown?.message).toContain("invalid tool-output URI scheme");
|
|
1745
|
+
expect(readCount).toBe(0);
|
|
1746
|
+
});
|
|
1747
|
+
});
|