@flowcodex/core 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/index-LbxYtxxS.d.ts +560 -0
  4. package/dist/index.d.ts +995 -0
  5. package/dist/index.js +3840 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/kernel/index.d.ts +1 -0
  8. package/dist/kernel/index.js +551 -0
  9. package/dist/kernel/index.js.map +1 -0
  10. package/package.json +39 -0
  11. package/src/agent/agent-loop.ts +254 -0
  12. package/src/agent/context.ts +99 -0
  13. package/src/agent/conversation-state.ts +44 -0
  14. package/src/agent/provider-runner.ts +241 -0
  15. package/src/agent/system-prompt-builder.ts +193 -0
  16. package/src/execution/compactor.ts +256 -0
  17. package/src/execution/index.ts +7 -0
  18. package/src/execution/output-serializer.ts +90 -0
  19. package/src/execution/schema-validator.ts +124 -0
  20. package/src/execution/tool-executor.ts +276 -0
  21. package/src/execution/tool-registry.ts +104 -0
  22. package/src/index.ts +215 -0
  23. package/src/infrastructure/catalog-parser.ts +218 -0
  24. package/src/infrastructure/index.ts +16 -0
  25. package/src/infrastructure/path-resolver.ts +123 -0
  26. package/src/infrastructure/provider-factory.ts +116 -0
  27. package/src/infrastructure/provider-presets.ts +19 -0
  28. package/src/infrastructure/retry-policy.ts +50 -0
  29. package/src/infrastructure/secret-scrubber.ts +67 -0
  30. package/src/infrastructure/token-counter.ts +156 -0
  31. package/src/infrastructure/tracer.ts +23 -0
  32. package/src/kernel/container.ts +166 -0
  33. package/src/kernel/events.ts +323 -0
  34. package/src/kernel/index.ts +18 -0
  35. package/src/kernel/pipeline.ts +152 -0
  36. package/src/kernel/run-controller.ts +85 -0
  37. package/src/kernel/tokens.ts +21 -0
  38. package/src/security/index.ts +13 -0
  39. package/src/security/permission-policy.ts +273 -0
  40. package/src/session/audit-log.ts +201 -0
  41. package/src/session/auth-service.ts +178 -0
  42. package/src/session/index.ts +26 -0
  43. package/src/session/secret-vault.ts +183 -0
  44. package/src/session/session-store.ts +339 -0
  45. package/src/session/types.ts +100 -0
  46. package/src/types/blocks.ts +56 -0
  47. package/src/types/context.ts +54 -0
  48. package/src/types/errors.ts +359 -0
  49. package/src/types/index.ts +34 -0
  50. package/src/types/provider.ts +58 -0
  51. package/src/types/tool.ts +39 -0
  52. package/src/utils/error.ts +3 -0
  53. package/src/utils/fs.ts +185 -0
  54. package/src/utils/image-resize.ts +76 -0
  55. package/src/utils/ssrf-guard.ts +133 -0
  56. package/src/utils/ulid.ts +72 -0
  57. package/src/utils/version-check.ts +59 -0
  58. package/tests/agent-loop.test.ts +490 -0
  59. package/tests/audit-log.test.ts +199 -0
  60. package/tests/auth-service.test.ts +170 -0
  61. package/tests/blocks.test.ts +79 -0
  62. package/tests/catalog-parser.test.ts +174 -0
  63. package/tests/compactor.test.ts +180 -0
  64. package/tests/container.test.ts +224 -0
  65. package/tests/conversation-state.test.ts +75 -0
  66. package/tests/errors.test.ts +429 -0
  67. package/tests/events-v021.test.ts +60 -0
  68. package/tests/events-v022.test.ts +75 -0
  69. package/tests/events.test.ts +340 -0
  70. package/tests/fixtures/large-image.png +0 -0
  71. package/tests/fixtures/small-image.png +0 -0
  72. package/tests/fs-utils.test.ts +164 -0
  73. package/tests/image-resize.test.ts +51 -0
  74. package/tests/output-serializer.test.ts +79 -0
  75. package/tests/path-resolver.test.ts +91 -0
  76. package/tests/permission-policy.test.ts +174 -0
  77. package/tests/pipeline.test.ts +193 -0
  78. package/tests/provider-factory.test.ts +245 -0
  79. package/tests/provider-runner.test.ts +535 -0
  80. package/tests/retry-policy.test.ts +104 -0
  81. package/tests/run-controller.test.ts +115 -0
  82. package/tests/sanity.test.ts +26 -0
  83. package/tests/schema-validator.test.ts +109 -0
  84. package/tests/secret-scrubber.test.ts +133 -0
  85. package/tests/secret-vault.test.ts +130 -0
  86. package/tests/session-store.test.ts +429 -0
  87. package/tests/ssrf-guard.test.ts +112 -0
  88. package/tests/system-prompt-builder.test.ts +116 -0
  89. package/tests/token-counter.test.ts +163 -0
  90. package/tests/tokens.test.ts +42 -0
  91. package/tests/tool-executor.test.ts +452 -0
  92. package/tests/tool-registry.test.ts +143 -0
  93. package/tests/tracer.test.ts +32 -0
  94. package/tests/ulid.test.ts +53 -0
  95. package/tests/version-check.test.ts +57 -0
  96. package/tsconfig.json +11 -0
  97. package/tsup.config.ts +16 -0
@@ -0,0 +1,490 @@
1
+ import { describe, expect, it, vi } from 'vitest';
2
+ import { runAgentLoop } from '../src/agent/agent-loop.js';
3
+ import { AgentContext } from '../src/agent/context.js';
4
+ import { DefaultRetryPolicy } from '../src/infrastructure/retry-policy.js';
5
+ import { EventBus } from '../src/kernel/events.js';
6
+ import type { Context } from '../src/types/context.js';
7
+ import type { LLMEvent, LLMRequest, LLMResponse, Provider } from '../src/types/provider.js';
8
+
9
+ function mockProvider(events: LLMEvent[]): Provider {
10
+ const i = 0;
11
+ return {
12
+ name: 'mock',
13
+ async *stream(_req: LLMRequest): AsyncIterable<LLMEvent> {
14
+ for (const ev of events) {
15
+ yield ev;
16
+ }
17
+ },
18
+ async complete(req: LLMRequest): Promise<LLMResponse> {
19
+ throw new Error('not implemented');
20
+ },
21
+ };
22
+ }
23
+
24
+ describe('AgentContext', () => {
25
+ it('creates with defaults', () => {
26
+ const ctx = new AgentContext({
27
+ model: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
28
+ projectRoot: '/tmp/project',
29
+ });
30
+ expect(ctx.model.model).toBe('claude-sonnet-4-6');
31
+ expect(ctx.workingDir).toBe('/tmp/project');
32
+ expect(ctx.budget.maxIterations).toBe(50);
33
+ expect(ctx.messages).toHaveLength(0);
34
+ expect(ctx.btwNotes).toHaveLength(0);
35
+ expect(ctx.tools).toHaveLength(0);
36
+ });
37
+
38
+ it('setWorkingDir validates containment', () => {
39
+ const ctx = new AgentContext({
40
+ model: { provider: 'anthropic', model: 'test' },
41
+ projectRoot: '/tmp/project',
42
+ });
43
+ ctx.setWorkingDir('src');
44
+ expect(ctx.workingDir).toContain('src');
45
+ expect(() => ctx.setWorkingDir('../../etc')).toThrow(/outside project root/);
46
+ });
47
+
48
+ it('registerAbortHook returns unsubscribe', () => {
49
+ const ctx = new AgentContext({
50
+ model: { provider: 'anthropic', model: 'test' },
51
+ projectRoot: '/tmp/project',
52
+ });
53
+ const fn = vi.fn();
54
+ const unsub = ctx.registerAbortHook(fn);
55
+ expect(ctx.abortHooks ?? fn).toBeTruthy();
56
+ unsub();
57
+ });
58
+
59
+ it('drainAbortHooks fires in LIFO and swallows errors', async () => {
60
+ const ctx = new AgentContext({
61
+ model: { provider: 'anthropic', model: 'test' },
62
+ projectRoot: '/tmp/project',
63
+ });
64
+ const order: string[] = [];
65
+ ctx.registerAbortHook(() => {
66
+ order.push('first');
67
+ });
68
+ ctx.registerAbortHook(() => {
69
+ order.push('second');
70
+ });
71
+ await ctx.drainAbortHooks();
72
+ expect(order).toEqual(['second', 'first']);
73
+ });
74
+
75
+ it('defaults readOnly to false', () => {
76
+ const ctx = new AgentContext({
77
+ model: { provider: 'anthropic', model: 'test' },
78
+ projectRoot: '/tmp/project',
79
+ });
80
+ expect(ctx.readOnly).toBe(false);
81
+ });
82
+
83
+ it('sets readOnly from init option', () => {
84
+ const ctx = new AgentContext({
85
+ model: { provider: 'anthropic', model: 'test' },
86
+ projectRoot: '/tmp/project',
87
+ readOnly: true,
88
+ });
89
+ expect(ctx.readOnly).toBe(true);
90
+ });
91
+ });
92
+
93
+ describe('runAgentLoop', () => {
94
+ it('completes a text-only response (no tool_use)', async () => {
95
+ const ctx = new AgentContext({
96
+ model: { provider: 'mock', model: 'test-model' },
97
+ projectRoot: '/tmp/project',
98
+ });
99
+ ctx.messages.push({ role: 'user', content: [{ type: 'text', text: 'hello' }] });
100
+
101
+ const events = new EventBus();
102
+ const provider = mockProvider([
103
+ { type: 'text_delta', text: 'Hello, ' },
104
+ { type: 'text_delta', text: 'world!' },
105
+ { type: 'finish', usage: { input: 10, output: 5 }, stopReason: 'end_turn' },
106
+ ]);
107
+
108
+ const result = await runAgentLoop({
109
+ ctx,
110
+ provider,
111
+ events,
112
+ retry: new DefaultRetryPolicy(),
113
+ signal: new AbortController().signal,
114
+ });
115
+
116
+ expect(result.status).toBe('completed');
117
+ expect(result.iterations).toBe(1);
118
+ expect(result.finalText).toBe('Hello, world!');
119
+ expect(ctx.messages).toHaveLength(2);
120
+ expect(ctx.messages[1]!.role).toBe('assistant');
121
+ });
122
+
123
+ it('continues on tool_use and then completes', async () => {
124
+ const ctx = new AgentContext({
125
+ model: { provider: 'mock', model: 'test-model' },
126
+ projectRoot: '/tmp/project',
127
+ });
128
+ ctx.messages.push({ role: 'user', content: [{ type: 'text', text: 'read file' }] });
129
+
130
+ const events = new EventBus();
131
+ let callCount = 0;
132
+ const provider: Provider = {
133
+ name: 'mock',
134
+ async *stream(_req: LLMRequest): AsyncIterable<LLMEvent> {
135
+ callCount++;
136
+ if (callCount === 1) {
137
+ yield { type: 'tool_use_start', id: 'toolu_1', name: 'read' };
138
+ yield { type: 'tool_use_input_delta', id: 'toolu_1', partialJson: '{"path":"file.txt"}' };
139
+ yield { type: 'tool_use_stop', id: 'toolu_1' };
140
+ yield { type: 'finish', usage: { input: 10, output: 5 }, stopReason: 'tool_use' };
141
+ } else {
142
+ yield { type: 'text_delta', text: 'Done reading.' };
143
+ yield { type: 'finish', usage: { input: 20, output: 3 }, stopReason: 'end_turn' };
144
+ }
145
+ },
146
+ async complete(): Promise<LLMResponse> {
147
+ throw new Error('not implemented');
148
+ },
149
+ };
150
+
151
+ let toolExecuted = false;
152
+ const result = await runAgentLoop({
153
+ ctx,
154
+ provider,
155
+ events,
156
+ retry: new DefaultRetryPolicy(),
157
+ signal: new AbortController().signal,
158
+ executeTools: async (toolUses) => {
159
+ toolExecuted = true;
160
+ ctx.messages.push({
161
+ role: 'user',
162
+ content: [
163
+ {
164
+ type: 'tool_result',
165
+ tool_use_id: toolUses[0]!.id,
166
+ content: 'file contents here',
167
+ },
168
+ ],
169
+ });
170
+ },
171
+ });
172
+
173
+ expect(result.status).toBe('completed');
174
+ expect(result.iterations).toBe(2);
175
+ expect(toolExecuted).toBe(true);
176
+ expect(result.finalText).toBe('Done reading.');
177
+ });
178
+
179
+ it('aborts cleanly on signal abort', async () => {
180
+ const ctx = new AgentContext({
181
+ model: { provider: 'mock', model: 'test' },
182
+ projectRoot: '/tmp/project',
183
+ });
184
+ const ctrl = new AbortController();
185
+ ctrl.abort();
186
+
187
+ const result = await runAgentLoop({
188
+ ctx,
189
+ provider: mockProvider([]),
190
+ events: new EventBus(),
191
+ retry: new DefaultRetryPolicy(),
192
+ signal: ctrl.signal,
193
+ });
194
+
195
+ expect(result.status).toBe('aborted');
196
+ });
197
+
198
+ it('hits iteration limit', async () => {
199
+ const ctx = new AgentContext({
200
+ model: { provider: 'mock', model: 'test' },
201
+ projectRoot: '/tmp/project',
202
+ budget: { maxIterations: 1, maxTokens: 1000, maxCost: 10 },
203
+ });
204
+ ctx.messages.push({ role: 'user', content: [{ type: 'text', text: 'hi' }] });
205
+
206
+ const events = new EventBus();
207
+ const provider: Provider = {
208
+ name: 'mock',
209
+ async *stream(): AsyncIterable<LLMEvent> {
210
+ yield { type: 'tool_use_start', id: 't1', name: 'loop' };
211
+ yield { type: 'tool_use_input_delta', id: 't1', partialJson: '{}' };
212
+ yield { type: 'tool_use_stop', id: 't1' };
213
+ yield { type: 'finish', usage: { input: 1, output: 1 }, stopReason: 'tool_use' };
214
+ },
215
+ async complete(): Promise<LLMResponse> {
216
+ throw new Error('not impl');
217
+ },
218
+ };
219
+
220
+ const result = await runAgentLoop({
221
+ ctx,
222
+ provider,
223
+ events,
224
+ retry: new DefaultRetryPolicy(),
225
+ signal: new AbortController().signal,
226
+ maxIterations: 1,
227
+ executeTools: async () => {
228
+ ctx.messages.push({
229
+ role: 'user',
230
+ content: [{ type: 'tool_result', tool_use_id: 't1', content: 'ok' }],
231
+ });
232
+ },
233
+ });
234
+
235
+ expect(result.status).toBe('limit_reached');
236
+ });
237
+
238
+ it('filters mutating tools when ctx.readOnly is true', async () => {
239
+ const ctx = new AgentContext({
240
+ model: { provider: 'mock', model: 'test' },
241
+ projectRoot: '/tmp/project',
242
+ readOnly: true,
243
+ });
244
+ ctx.messages.push({ role: 'user', content: [{ type: 'text', text: 'hi' }] });
245
+
246
+ ctx.tools = [
247
+ { name: 'read', mutating: false },
248
+ { name: 'grep', mutating: false },
249
+ { name: 'write', mutating: true },
250
+ { name: 'bash', mutating: true },
251
+ ];
252
+
253
+ let capturedRequest: LLMRequest | undefined;
254
+ const provider: Provider = {
255
+ name: 'mock',
256
+ async *stream(req: LLMRequest): AsyncIterable<LLMEvent> {
257
+ capturedRequest = req;
258
+ yield { type: 'text_delta', text: 'done' };
259
+ yield { type: 'finish', usage: { input: 1, output: 1 }, stopReason: 'end_turn' };
260
+ },
261
+ async complete(): Promise<LLMResponse> {
262
+ throw new Error('not impl');
263
+ },
264
+ };
265
+
266
+ await runAgentLoop({
267
+ ctx,
268
+ provider,
269
+ events: new EventBus(),
270
+ retry: new DefaultRetryPolicy(),
271
+ signal: new AbortController().signal,
272
+ });
273
+
274
+ const toolNames = (capturedRequest!.tools as Array<{ name: string; mutating: boolean }>).map(
275
+ (t) => t.name,
276
+ );
277
+ expect(toolNames).toEqual(['read', 'grep']);
278
+ expect(ctx.tools).toHaveLength(4);
279
+ });
280
+
281
+ it('sends all tools when ctx.readOnly is false (default)', async () => {
282
+ const ctx = new AgentContext({
283
+ model: { provider: 'mock', model: 'test' },
284
+ projectRoot: '/tmp/project',
285
+ });
286
+ ctx.messages.push({ role: 'user', content: [{ type: 'text', text: 'hi' }] });
287
+
288
+ ctx.tools = [
289
+ { name: 'read', mutating: false },
290
+ { name: 'write', mutating: true },
291
+ ];
292
+
293
+ let capturedRequest: LLMRequest | undefined;
294
+ const provider: Provider = {
295
+ name: 'mock',
296
+ async *stream(req: LLMRequest): AsyncIterable<LLMEvent> {
297
+ capturedRequest = req;
298
+ yield { type: 'text_delta', text: 'done' };
299
+ yield { type: 'finish', usage: { input: 1, output: 1 }, stopReason: 'end_turn' };
300
+ },
301
+ async complete(): Promise<LLMResponse> {
302
+ throw new Error('not impl');
303
+ },
304
+ };
305
+
306
+ await runAgentLoop({
307
+ ctx,
308
+ provider,
309
+ events: new EventBus(),
310
+ retry: new DefaultRetryPolicy(),
311
+ signal: new AbortController().signal,
312
+ });
313
+
314
+ const toolNames = (capturedRequest!.tools as Array<{ name: string }>).map((t) => t.name);
315
+ expect(toolNames).toEqual(['read', 'write']);
316
+ });
317
+
318
+ it('injects /btw notes at iteration boundary', async () => {
319
+ const ctx = new AgentContext({
320
+ model: { provider: 'mock', model: 'test' },
321
+ projectRoot: '/tmp/project',
322
+ });
323
+ ctx.messages.push({ role: 'user', content: [{ type: 'text', text: 'task' }] });
324
+ ctx.btwNotes.push('also check tests');
325
+
326
+ const events = new EventBus();
327
+ const provider: Provider = {
328
+ name: 'mock',
329
+ async *stream(): AsyncIterable<LLMEvent> {
330
+ yield { type: 'text_delta', text: 'ok' };
331
+ yield { type: 'finish', usage: { input: 1, output: 1 }, stopReason: 'end_turn' };
332
+ },
333
+ async complete(): Promise<LLMResponse> {
334
+ throw new Error('not impl');
335
+ },
336
+ };
337
+
338
+ await runAgentLoop({
339
+ ctx,
340
+ provider,
341
+ events,
342
+ retry: new DefaultRetryPolicy(),
343
+ signal: new AbortController().signal,
344
+ });
345
+
346
+ expect(ctx.btwNotes).toHaveLength(0);
347
+ const userMsg = ctx.messages[0]!;
348
+ expect(userMsg.role).toBe('user');
349
+ const content = userMsg.content;
350
+ if (typeof content !== 'string' && Array.isArray(content)) {
351
+ const btwBlock = content.find((b) => 'text' in b && b.text.includes('BY THE WAY'));
352
+ expect(btwBlock).toBeTruthy();
353
+ }
354
+ });
355
+ });
356
+
357
+ describe('agent-loop v0.2.2 structured output', () => {
358
+ it('hides tools and sets tool_choice when structuredOutput is set', async () => {
359
+ let capturedRequest: LLMRequest | null = null;
360
+ const provider: Provider = {
361
+ name: 'mock',
362
+ async *stream(req: LLMRequest): AsyncIterable<LLMEvent> {
363
+ capturedRequest = req;
364
+ yield { type: 'tool_use_start', id: 't1', name: 'structured_output' };
365
+ yield { type: 'tool_use_input_delta', id: 't1', partialJson: '{"answer":42}' };
366
+ yield { type: 'tool_use_stop', id: 't1' };
367
+ yield { type: 'finish', usage: { input: 10, output: 5 }, stopReason: 'tool_use' };
368
+ },
369
+ async complete(): Promise<LLMResponse> { throw new Error('not impl'); },
370
+ };
371
+ const ctx = new AgentContext({
372
+ model: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
373
+ projectRoot: '/tmp',
374
+ structuredOutput: {
375
+ name: 'structured_output',
376
+ schema: { type: 'object', properties: { answer: { type: 'number' } } },
377
+ },
378
+ });
379
+ ctx.tools = [{ name: 'read', mutating: false } as unknown];
380
+ const events = new EventBus();
381
+ const result = await runAgentLoop({
382
+ ctx,
383
+ provider,
384
+ events,
385
+ retry: new DefaultRetryPolicy(),
386
+ signal: new AbortController().signal,
387
+ maxIterations: 5,
388
+ });
389
+ expect(capturedRequest).not.toBeNull();
390
+ expect(capturedRequest!.tools).toHaveLength(1);
391
+ expect((capturedRequest!.tools![0] as { name: string }).name).toBe('structured_output');
392
+ expect(capturedRequest!.tool_choice).toEqual({ type: 'tool', name: 'structured_output' });
393
+ expect(result.status).toBe('structured');
394
+ expect(result.structuredResult).toEqual({ answer: 42 });
395
+ });
396
+
397
+ it('uses ctx.maxTokens instead of hardcoded 8192', async () => {
398
+ let capturedRequest: LLMRequest | null = null;
399
+ const provider: Provider = {
400
+ name: 'mock',
401
+ async *stream(req: LLMRequest): AsyncIterable<LLMEvent> {
402
+ capturedRequest = req;
403
+ yield { type: 'text_delta', text: 'done' };
404
+ yield { type: 'finish', usage: { input: 5, output: 5 }, stopReason: 'end_turn' };
405
+ },
406
+ async complete(): Promise<LLMResponse> { throw new Error('not impl'); },
407
+ };
408
+ const ctx = new AgentContext({
409
+ model: { provider: 'anthropic', model: 'test' },
410
+ projectRoot: '/tmp',
411
+ maxTokens: 4096,
412
+ });
413
+ ctx.tools = [];
414
+ const events = new EventBus();
415
+ await runAgentLoop({
416
+ ctx,
417
+ provider,
418
+ events,
419
+ retry: new DefaultRetryPolicy(),
420
+ signal: new AbortController().signal,
421
+ maxIterations: 5,
422
+ });
423
+ expect(capturedRequest!.max_tokens).toBe(4096);
424
+ });
425
+
426
+ it('throws AGENT_STRUCTURED_OUTPUT_NOT_PRODUCED when model does not call structured tool', async () => {
427
+ const provider: Provider = {
428
+ name: 'mock',
429
+ async *stream(): AsyncIterable<LLMEvent> {
430
+ yield { type: 'text_delta', text: 'no tool call' };
431
+ yield { type: 'finish', usage: { input: 5, output: 5 }, stopReason: 'end_turn' };
432
+ },
433
+ async complete(): Promise<LLMResponse> { throw new Error('not impl'); },
434
+ };
435
+ const ctx = new AgentContext({
436
+ model: { provider: 'anthropic', model: 'test' },
437
+ projectRoot: '/tmp',
438
+ structuredOutput: { name: 'structured_output', schema: {} },
439
+ });
440
+ ctx.tools = [];
441
+ const events = new EventBus();
442
+ const result = await runAgentLoop({
443
+ ctx,
444
+ provider,
445
+ events,
446
+ retry: new DefaultRetryPolicy(),
447
+ signal: new AbortController().signal,
448
+ maxIterations: 5,
449
+ });
450
+ expect(result.status).toBe('failed');
451
+ expect(result.error).toBeInstanceOf(Error);
452
+ });
453
+ });
454
+
455
+ describe('runAgentLoop — fallback forwarding', () => {
456
+ it('accepts a fallback chain option and forwards it without crashing', async () => {
457
+ const ctx = new AgentContext({
458
+ model: { provider: 'anthropic', model: 'claude-sonnet-4-6' },
459
+ projectRoot: '/tmp/p',
460
+ });
461
+ ctx.tools = [];
462
+
463
+ const provider: Provider = {
464
+ name: 'anthropic',
465
+ async *stream() {
466
+ yield { type: 'text_delta', text: 'done' } as LLMEvent;
467
+ yield {
468
+ type: 'finish',
469
+ usage: { input: 1, output: 1 },
470
+ stopReason: 'end_turn',
471
+ } as LLMEvent;
472
+ },
473
+ async complete() {
474
+ return { content: [], usage: { input: 0, output: 0 }, stopReason: 'end_turn' };
475
+ },
476
+ };
477
+
478
+ const result = await runAgentLoop({
479
+ ctx: ctx as Context,
480
+ provider,
481
+ events: new EventBus(),
482
+ retry: new DefaultRetryPolicy(),
483
+ signal: new AbortController().signal,
484
+ maxIterations: 1,
485
+ fallback: [{ providerId: 'openai', model: 'gpt-4o-mini', providerFactory: () => undefined }],
486
+ });
487
+
488
+ expect(result.status).toBe('completed');
489
+ });
490
+ });
@@ -0,0 +1,199 @@
1
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
2
+ import { promises as fsp } from 'node:fs';
3
+ import * as path from 'node:path';
4
+ import * as os from 'node:os';
5
+ import { DefaultAuditLog } from '../src/session/audit-log.js';
6
+ import { stableStringify } from '../src/session/audit-log.js';
7
+ import type { AuditEntry } from '../src/session/types.js';
8
+
9
+ let tmpDir: string;
10
+
11
+ describe('DefaultAuditLog', () => {
12
+ beforeEach(async () => {
13
+ tmpDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'fcx-audit-'));
14
+ });
15
+ afterEach(async () => {
16
+ await fsp.rm(tmpDir, { recursive: true, force: true }).catch(() => {});
17
+ });
18
+
19
+ it('records an entry and returns it with hash', async () => {
20
+ const log = new DefaultAuditLog({ dir: tmpDir });
21
+ const entry = await log.record({
22
+ sessionId: 'session-1',
23
+ toolName: 'read',
24
+ toolUseId: 'tu1',
25
+ input: { path: 'test.ts' },
26
+ output: 'file contents',
27
+ isError: false,
28
+ });
29
+ expect(entry.index).toBe(0);
30
+ expect(entry.prevHash).toBe('0'.repeat(64));
31
+ expect(entry.hash).toHaveLength(64);
32
+ expect(entry.toolName).toBe('read');
33
+ });
34
+
35
+ it('chains entries via prevHash', async () => {
36
+ const log = new DefaultAuditLog({ dir: tmpDir });
37
+ const e1 = await log.record({
38
+ sessionId: 's1', toolName: 'read', toolUseId: 'tu1',
39
+ input: {}, output: 'ok', isError: false,
40
+ });
41
+ const e2 = await log.record({
42
+ sessionId: 's1', toolName: 'write', toolUseId: 'tu2',
43
+ input: {}, output: 'ok', isError: false,
44
+ });
45
+ expect(e2.prevHash).toBe(e1.hash);
46
+ expect(e2.index).toBe(1);
47
+ });
48
+
49
+ it('verifies a valid chain', async () => {
50
+ const log = new DefaultAuditLog({ dir: tmpDir });
51
+ for (let i = 0; i < 5; i++) {
52
+ await log.record({
53
+ sessionId: 's1', toolName: 'read', toolUseId: `tu${i}`,
54
+ input: { i }, output: `result-${i}`, isError: false,
55
+ });
56
+ }
57
+ const result = await log.verify('s1');
58
+ expect(result.ok).toBe(true);
59
+ if (result.ok) expect(result.entries).toBe(5);
60
+ });
61
+
62
+ it('detects tampered entry (hash mismatch)', async () => {
63
+ const log = new DefaultAuditLog({ dir: tmpDir });
64
+ await log.record({
65
+ sessionId: 's1', toolName: 'read', toolUseId: 'tu1',
66
+ input: { data: 'original' }, output: 'ok', isError: false,
67
+ });
68
+ await log.record({
69
+ sessionId: 's1', toolName: 'read', toolUseId: 'tu2',
70
+ input: { data: 'original2' }, output: 'ok', isError: false,
71
+ });
72
+
73
+ const fp = path.join(tmpDir, 's1.audit.jsonl');
74
+ const raw = await fsp.readFile(fp, 'utf8');
75
+ const lines = raw.trim().split('\n');
76
+ const tampered = JSON.parse(lines[0]!) as AuditEntry;
77
+ tampered.input = { data: 'TAMPERED' };
78
+ lines[0] = JSON.stringify(tampered);
79
+ await fsp.writeFile(fp, lines.join('\n') + '\n', 'utf8');
80
+
81
+ const result = await log.verify('s1');
82
+ expect(result.ok).toBe(false);
83
+ if (!result.ok) {
84
+ expect(result.brokenAt).toBe(0);
85
+ expect(result.reason).toContain('hash mismatch');
86
+ }
87
+ });
88
+
89
+ it('detects broken chain (prevHash mismatch)', async () => {
90
+ const log = new DefaultAuditLog({ dir: tmpDir });
91
+ await log.record({
92
+ sessionId: 's1', toolName: 'read', toolUseId: 'tu1',
93
+ input: {}, output: 'ok', isError: false,
94
+ });
95
+ await log.record({
96
+ sessionId: 's1', toolName: 'read', toolUseId: 'tu2',
97
+ input: {}, output: 'ok', isError: false,
98
+ });
99
+
100
+ const fp = path.join(tmpDir, 's1.audit.jsonl');
101
+ const raw = await fsp.readFile(fp, 'utf8');
102
+ const lines = raw.trim().split('\n');
103
+ const tampered = JSON.parse(lines[1]!) as AuditEntry;
104
+ tampered.prevHash = 'a'.repeat(64);
105
+ lines[1] = JSON.stringify(tampered);
106
+ await fsp.writeFile(fp, lines.join('\n') + '\n', 'utf8');
107
+
108
+ const result = await log.verify('s1');
109
+ expect(result.ok).toBe(false);
110
+ if (!result.ok) expect(result.reason).toContain('prevHash mismatch');
111
+ });
112
+
113
+ it('loads all entries', async () => {
114
+ const log = new DefaultAuditLog({ dir: tmpDir });
115
+ for (let i = 0; i < 3; i++) {
116
+ await log.record({
117
+ sessionId: 's1', toolName: 'bash', toolUseId: `tu${i}`,
118
+ input: { cmd: `echo ${i}` }, output: `${i}`, isError: false,
119
+ });
120
+ }
121
+ const entries = await log.load('s1');
122
+ expect(entries).toHaveLength(3);
123
+ expect(entries[0]?.toolName).toBe('bash');
124
+ expect(entries[2]?.output).toBe('2');
125
+ });
126
+
127
+ it('returns empty for missing session', async () => {
128
+ const log = new DefaultAuditLog({ dir: tmpDir });
129
+ const entries = await log.load('nonexistent');
130
+ expect(entries).toHaveLength(0);
131
+ const result = await log.verify('nonexistent');
132
+ expect(result.ok).toBe(true);
133
+ });
134
+
135
+ it('handles error results', async () => {
136
+ const log = new DefaultAuditLog({ dir: tmpDir });
137
+ const entry = await log.record({
138
+ sessionId: 's1', toolName: 'bash', toolUseId: 'tu1',
139
+ input: { cmd: 'rm -rf /' }, output: 'permission denied',
140
+ isError: true,
141
+ });
142
+ expect(entry.isError).toBe(true);
143
+ });
144
+
145
+ it('isolates different sessions', async () => {
146
+ const log = new DefaultAuditLog({ dir: tmpDir });
147
+ await log.record({
148
+ sessionId: 's1', toolName: 'read', toolUseId: 'tu1',
149
+ input: {}, output: 'a', isError: false,
150
+ });
151
+ await log.record({
152
+ sessionId: 's2', toolName: 'read', toolUseId: 'tu1',
153
+ input: {}, output: 'b', isError: false,
154
+ });
155
+ const e1 = await log.load('s1');
156
+ const e2 = await log.load('s2');
157
+ expect(e1).toHaveLength(1);
158
+ expect(e2).toHaveLength(1);
159
+ expect(e1[0]?.index).toBe(0);
160
+ expect(e2[0]?.index).toBe(0);
161
+ });
162
+
163
+ it('survives instance recreation (reads from disk)', async () => {
164
+ const log1 = new DefaultAuditLog({ dir: tmpDir });
165
+ await log1.record({
166
+ sessionId: 's1', toolName: 'read', toolUseId: 'tu1',
167
+ input: {}, output: 'first', isError: false,
168
+ });
169
+
170
+ const log2 = new DefaultAuditLog({ dir: tmpDir });
171
+ const entry = await log2.record({
172
+ sessionId: 's1', toolName: 'read', toolUseId: 'tu2',
173
+ input: {}, output: 'second', isError: false,
174
+ });
175
+ expect(entry.index).toBe(1);
176
+ const result = await log2.verify('s1');
177
+ expect(result.ok).toBe(true);
178
+ });
179
+ });
180
+
181
+ describe('stableStringify', () => {
182
+ it('sorts object keys', () => {
183
+ expect(stableStringify({ b: 1, a: 2 })).toBe('{"a":2,"b":1}');
184
+ });
185
+
186
+ it('sorts nested keys', () => {
187
+ expect(stableStringify({ z: { y: 1, x: 2 } })).toBe('{"z":{"x":2,"y":1}}');
188
+ });
189
+
190
+ it('preserves array order', () => {
191
+ expect(stableStringify([3, 1, 2])).toBe('[3,1,2]');
192
+ });
193
+
194
+ it('is deterministic regardless of insertion order', () => {
195
+ const a = stableStringify({ c: 3, a: 1, b: { z: 26, a: 1 } });
196
+ const b = stableStringify({ a: 1, b: { a: 1, z: 26 }, c: 3 });
197
+ expect(a).toBe(b);
198
+ });
199
+ });