@renseiai/agentfactory 0.8.21 → 0.8.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/dist/src/config/repository-config.d.ts +3 -3
  2. package/dist/src/orchestrator/index.d.ts +1 -0
  3. package/dist/src/orchestrator/index.d.ts.map +1 -1
  4. package/dist/src/orchestrator/index.js +2 -0
  5. package/dist/src/orchestrator/null-issue-tracker-client.d.ts +34 -0
  6. package/dist/src/orchestrator/null-issue-tracker-client.d.ts.map +1 -0
  7. package/dist/src/orchestrator/null-issue-tracker-client.js +72 -0
  8. package/dist/src/orchestrator/orchestrator.d.ts +19 -0
  9. package/dist/src/orchestrator/orchestrator.d.ts.map +1 -1
  10. package/dist/src/orchestrator/orchestrator.js +134 -15
  11. package/dist/src/orchestrator/state-types.d.ts +3 -0
  12. package/dist/src/orchestrator/state-types.d.ts.map +1 -1
  13. package/dist/src/providers/codex-app-server-provider.d.ts +87 -0
  14. package/dist/src/providers/codex-app-server-provider.d.ts.map +1 -1
  15. package/dist/src/providers/codex-app-server-provider.integration.test.d.ts +14 -0
  16. package/dist/src/providers/codex-app-server-provider.integration.test.d.ts.map +1 -0
  17. package/dist/src/providers/codex-app-server-provider.integration.test.js +909 -0
  18. package/dist/src/providers/codex-app-server-provider.js +339 -52
  19. package/dist/src/providers/codex-app-server-provider.test.js +838 -10
  20. package/dist/src/providers/codex-provider.d.ts +2 -0
  21. package/dist/src/providers/codex-provider.d.ts.map +1 -1
  22. package/dist/src/providers/codex-provider.js +36 -6
  23. package/dist/src/providers/codex-provider.test.js +12 -3
  24. package/dist/src/providers/types.d.ts +17 -0
  25. package/dist/src/providers/types.d.ts.map +1 -1
  26. package/dist/src/workflow/workflow-types.d.ts +5 -5
  27. package/package.json +2 -2
@@ -0,0 +1,909 @@
1
+ /**
2
+ * Integration tests for the Codex App Server Provider
3
+ *
4
+ * Exercises the full AppServerAgentHandle lifecycle by mocking the
5
+ * `codex app-server` child process. Unlike the unit tests which test individual
6
+ * functions in isolation, these tests verify the end-to-end flow:
7
+ * process start -> handshake -> thread creation -> turn execution
8
+ * -> notification streaming -> event mapping -> message injection -> shutdown
9
+ *
10
+ * Uses a MockAppServer helper that captures JSON-RPC stdin writes and pushes
11
+ * fake stdout lines (responses + notifications) to exercise the real code path.
12
+ */
13
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
14
+ import { EventEmitter } from 'events';
15
+ // ---------------------------------------------------------------------------
16
+ // Mock child_process and readline BEFORE importing the module under test
17
+ // ---------------------------------------------------------------------------
18
+ /** Fake writable stdin stream */
19
+ function createMockStdin() {
20
+ return {
21
+ writable: true,
22
+ write: vi.fn(),
23
+ };
24
+ }
25
+ /** Fake ChildProcess — an EventEmitter with stdin, stdout, pid, killed, kill() */
26
+ function createMockChildProcess() {
27
+ const stdout = new EventEmitter();
28
+ const proc = new EventEmitter();
29
+ proc.stdin = createMockStdin();
30
+ proc.stdout = stdout;
31
+ proc.pid = 99999;
32
+ proc.killed = false;
33
+ proc.kill = vi.fn((signal) => {
34
+ if (signal === 'SIGKILL') {
35
+ proc.killed = true;
36
+ }
37
+ if (signal === 'SIGTERM') {
38
+ // Simulate graceful exit
39
+ setTimeout(() => proc.emit('exit', 0, 'SIGTERM'), 0);
40
+ }
41
+ });
42
+ return proc;
43
+ }
44
+ let mockProc;
45
+ let mockLineEmitter;
46
+ vi.mock('child_process', () => ({
47
+ spawn: vi.fn(() => mockProc),
48
+ }));
49
+ vi.mock('readline', () => ({
50
+ createInterface: vi.fn(() => {
51
+ mockLineEmitter = new EventEmitter();
52
+ mockLineEmitter.close = vi.fn();
53
+ return mockLineEmitter;
54
+ }),
55
+ }));
56
+ // ---------------------------------------------------------------------------
57
+ // MockAppServer — reusable test helper
58
+ // ---------------------------------------------------------------------------
59
+ /**
60
+ * Methods that should be auto-responded to immediately when received as
61
+ * JSON-RPC requests. The mock sends back `{ id, result: {} }` automatically.
62
+ */
63
+ const AUTO_RESPOND_METHODS = new Set([
64
+ 'turn/interrupt',
65
+ 'thread/unsubscribe',
66
+ 'turn/steer',
67
+ 'model/list',
68
+ ]);
69
+ /**
70
+ * MockAppServer wraps the mocked child_process to provide a high-level API
71
+ * for integration tests. It captures JSON-RPC requests written to stdin,
72
+ * can push responses/notifications to the fake stdout, and auto-responds
73
+ * to certain housekeeping requests (turn/interrupt, thread/unsubscribe, turn/steer)
74
+ * so that tests don't hang waiting for responses.
75
+ */
76
+ class MockAppServer {
77
+ /** All JSON-RPC messages written to stdin, parsed */
78
+ requests = [];
79
+ proc;
80
+ constructor(proc) {
81
+ this.proc = proc;
82
+ // Intercept stdin writes to capture requests and auto-respond to housekeeping
83
+ this.proc.stdin.write = vi.fn((data) => {
84
+ try {
85
+ const parsed = JSON.parse(data.trim());
86
+ this.requests.push(parsed);
87
+ // Auto-respond to housekeeping requests so stop()/steer don't hang
88
+ if (parsed.id && parsed.method && AUTO_RESPOND_METHODS.has(parsed.method)) {
89
+ // Use queueMicrotask so the response arrives after the write completes
90
+ queueMicrotask(() => {
91
+ this.pushResponse(parsed.id, {});
92
+ });
93
+ }
94
+ }
95
+ catch {
96
+ // Non-JSON data — ignore
97
+ }
98
+ });
99
+ }
100
+ /** Wait until N requests have been captured */
101
+ async waitForRequests(count, timeoutMs = 3000) {
102
+ await vi.waitFor(() => {
103
+ expect(this.requests.length).toBeGreaterThanOrEqual(count);
104
+ }, { timeout: timeoutMs });
105
+ }
106
+ /** Get all requests with a specific method */
107
+ requestsByMethod(method) {
108
+ return this.requests.filter((r) => r.method === method);
109
+ }
110
+ /** Get the last request with a specific method */
111
+ lastRequestByMethod(method) {
112
+ const matches = this.requestsByMethod(method);
113
+ return matches[matches.length - 1];
114
+ }
115
+ /** Push a JSON-RPC response to the mock stdout (readline line event) */
116
+ pushResponse(id, result) {
117
+ mockLineEmitter.emit('line', JSON.stringify({ id, result }));
118
+ }
119
+ /** Push a JSON-RPC error response */
120
+ pushErrorResponse(id, code, message) {
121
+ mockLineEmitter.emit('line', JSON.stringify({ id, error: { code, message } }));
122
+ }
123
+ /** Push a JSON-RPC notification (no id) */
124
+ pushNotification(method, params) {
125
+ mockLineEmitter.emit('line', JSON.stringify({ method, params }));
126
+ }
127
+ /**
128
+ * Complete the initialize handshake automatically.
129
+ * Waits for the `initialize` request, sends back a success response,
130
+ * then waits for the `initialized` notification.
131
+ */
132
+ async completeHandshake() {
133
+ // Wait for initialize request (id=1)
134
+ await this.waitForRequests(1);
135
+ const initReq = this.requests[0];
136
+ expect(initReq?.method).toBe('initialize');
137
+ this.pushResponse(initReq.id, { capabilities: {} });
138
+ // Wait for the `initialized` notification + model/list request (auto-responded)
139
+ await this.waitForRequests(3);
140
+ expect(this.requests[1]?.method).toBe('initialized');
141
+ expect(this.requests[2]?.method).toBe('model/list');
142
+ }
143
+ /**
144
+ * Complete the handshake + respond to thread/start + turn/start.
145
+ * Returns the threadId used.
146
+ */
147
+ async completeHandshakeAndThreadStart(threadId = 'thr_integ_001') {
148
+ await this.completeHandshake();
149
+ // Wait for thread/start request
150
+ await this.waitForRequests(4);
151
+ const threadStartReq = this.requests[3];
152
+ expect(threadStartReq?.method).toBe('thread/start');
153
+ this.pushResponse(threadStartReq.id, { thread: { id: threadId } });
154
+ // Wait for turn/start request
155
+ await this.waitForRequests(5);
156
+ expect(this.requests[4]?.method).toBe('turn/start');
157
+ this.pushResponse(this.requests[4].id, {});
158
+ return threadId;
159
+ }
160
+ /**
161
+ * Complete the handshake + respond to thread/resume + turn/start.
162
+ * Returns the threadId used.
163
+ */
164
+ async completeHandshakeAndThreadResume(threadId) {
165
+ await this.completeHandshake();
166
+ // Wait for thread/resume request
167
+ await this.waitForRequests(4);
168
+ const threadResumeReq = this.requests[3];
169
+ expect(threadResumeReq?.method).toBe('thread/resume');
170
+ this.pushResponse(threadResumeReq.id, { thread: { id: threadId } });
171
+ // Wait for turn/start request
172
+ await this.waitForRequests(5);
173
+ expect(this.requests[4]?.method).toBe('turn/start');
174
+ this.pushResponse(this.requests[4].id, {});
175
+ return threadId;
176
+ }
177
+ /** Push a turn/started notification */
178
+ pushTurnStarted(threadId, turnId) {
179
+ this.pushNotification('turn/started', {
180
+ threadId,
181
+ turn: { id: turnId },
182
+ });
183
+ }
184
+ /** Push an item/agentMessage/delta notification */
185
+ pushAgentMessageDelta(threadId, text) {
186
+ this.pushNotification('item/agentMessage/delta', { threadId, text });
187
+ }
188
+ /** Push an item/started notification */
189
+ pushItemStarted(threadId, item) {
190
+ this.pushNotification('item/started', { threadId, item });
191
+ }
192
+ /** Push an item/completed notification */
193
+ pushItemCompleted(threadId, item) {
194
+ this.pushNotification('item/completed', { threadId, item });
195
+ }
196
+ /** Push a turn/completed notification */
197
+ pushTurnCompleted(threadId, turnId, options = {}) {
198
+ const turn = {
199
+ id: turnId,
200
+ status: options.status ?? 'completed',
201
+ };
202
+ if (options.usage)
203
+ turn.usage = options.usage;
204
+ if (options.error)
205
+ turn.error = options.error;
206
+ this.pushNotification('turn/completed', { threadId, turn });
207
+ }
208
+ /** Simulate the process exiting */
209
+ emitExit(code, signal = null) {
210
+ this.proc.emit('exit', code, signal);
211
+ }
212
+ /** Simulate a process error */
213
+ emitError(err) {
214
+ this.proc.emit('error', err);
215
+ }
216
+ }
217
+ // ---------------------------------------------------------------------------
218
+ // Helpers
219
+ // ---------------------------------------------------------------------------
220
+ function makeConfig(overrides) {
221
+ return {
222
+ prompt: 'Implement the feature',
223
+ cwd: '/project/workspace',
224
+ env: {},
225
+ abortController: new AbortController(),
226
+ autonomous: true,
227
+ sandboxEnabled: false,
228
+ ...overrides,
229
+ };
230
+ }
231
+ async function collectEvents(stream, maxEvents = 100) {
232
+ const events = [];
233
+ for await (const event of stream) {
234
+ events.push(event);
235
+ if (events.length >= maxEvents)
236
+ break;
237
+ }
238
+ return events;
239
+ }
240
+ /** Collect events with a timeout — prevents hanging on infinite streams */
241
+ async function collectEventsWithTimeout(stream, timeoutMs = 2000, maxEvents = 100) {
242
+ const events = [];
243
+ const iter = stream[Symbol.asyncIterator]();
244
+ const deadline = Date.now() + timeoutMs;
245
+ while (Date.now() < deadline && events.length < maxEvents) {
246
+ const result = await Promise.race([
247
+ iter.next(),
248
+ new Promise((resolve) => setTimeout(() => resolve({ done: true, value: undefined }), Math.max(0, deadline - Date.now()))),
249
+ ]);
250
+ if (result.done)
251
+ break;
252
+ events.push(result.value);
253
+ }
254
+ return events;
255
+ }
256
+ // ---------------------------------------------------------------------------
257
+ // Tests
258
+ // ---------------------------------------------------------------------------
259
+ describe('CodexAppServerProvider integration', () => {
260
+ let CodexAppServerProvider;
261
+ let mock;
262
+ beforeEach(async () => {
263
+ vi.clearAllMocks();
264
+ mockProc = createMockChildProcess();
265
+ const mod = await import('./codex-app-server-provider.js');
266
+ CodexAppServerProvider = mod.CodexAppServerProvider;
267
+ mock = new MockAppServer(mockProc);
268
+ });
269
+ afterEach(() => {
270
+ vi.useRealTimers();
271
+ });
272
+ // -------------------------------------------------------------------------
273
+ // 1. Happy Path — Full Session Lifecycle
274
+ // -------------------------------------------------------------------------
275
+ describe('1. Happy Path — Full Session Lifecycle', () => {
276
+ it('exercises spawn -> handshake -> thread/start -> turn/start -> notifications -> stop -> result', async () => {
277
+ const provider = new CodexAppServerProvider();
278
+ const config = makeConfig();
279
+ const handle = provider.spawn(config);
280
+ // Start consuming the stream
281
+ const eventsPromise = collectEventsWithTimeout(handle.stream, 5000);
282
+ // Complete handshake and thread start
283
+ const threadId = await mock.completeHandshakeAndThreadStart('thr_happy_001');
284
+ // Push notification sequence
285
+ mock.pushTurnStarted(threadId, 'turn_001');
286
+ mock.pushAgentMessageDelta(threadId, 'Analyzing the codebase...');
287
+ mock.pushItemStarted(threadId, {
288
+ id: 'cmd_001', type: 'commandExecution', command: 'ls -la',
289
+ });
290
+ mock.pushItemCompleted(threadId, {
291
+ id: 'cmd_001', type: 'commandExecution', command: 'ls -la',
292
+ text: 'total 16\ndrwxr-xr-x 4 user staff 128 Jan 1 00:00 .',
293
+ exitCode: 0, status: 'completed',
294
+ });
295
+ mock.pushTurnCompleted(threadId, 'turn_001', {
296
+ status: 'completed',
297
+ usage: { input_tokens: 500, output_tokens: 150 },
298
+ });
299
+ // Give time for events to be processed
300
+ await new Promise((r) => setTimeout(r, 100));
301
+ // Stop the handle to end the stream
302
+ await handle.stop();
303
+ const events = await eventsPromise;
304
+ // Verify event sequence
305
+ const eventTypes = events.map((e) => e.type);
306
+ // Should have init from thread/start response
307
+ expect(eventTypes).toContain('init');
308
+ const initEvent = events.find((e) => e.type === 'init');
309
+ expect(initEvent).toMatchObject({ type: 'init', sessionId: threadId });
310
+ // Should have system turn_started
311
+ const turnStarted = events.find((e) => e.type === 'system' && 'subtype' in e && e.subtype === 'turn_started');
312
+ expect(turnStarted).toBeDefined();
313
+ // Should have assistant_text
314
+ const textEvent = events.find((e) => e.type === 'assistant_text');
315
+ expect(textEvent).toMatchObject({ type: 'assistant_text', text: 'Analyzing the codebase...' });
316
+ // Should have tool_use and tool_result for command execution
317
+ const toolUse = events.find((e) => e.type === 'tool_use');
318
+ expect(toolUse).toMatchObject({
319
+ type: 'tool_use',
320
+ toolName: 'shell',
321
+ toolUseId: 'cmd_001',
322
+ input: { command: 'ls -la' },
323
+ });
324
+ const toolResult = events.find((e) => e.type === 'tool_result');
325
+ expect(toolResult).toMatchObject({
326
+ type: 'tool_result',
327
+ toolName: 'shell',
328
+ toolUseId: 'cmd_001',
329
+ isError: false,
330
+ });
331
+ // In autonomous mode, turn/completed emits a real result event directly
332
+ // (stream ends without needing stop()). No turn_result system event.
333
+ const resultEvent = events.find((e) => e.type === 'result');
334
+ expect(resultEvent).toMatchObject({
335
+ type: 'result',
336
+ success: true,
337
+ cost: {
338
+ inputTokens: 500,
339
+ outputTokens: 150,
340
+ numTurns: 1,
341
+ },
342
+ });
343
+ // Verify JSON-RPC requests sent to stdin
344
+ expect(mock.requestsByMethod('thread/start')).toHaveLength(1);
345
+ const threadStartReq = mock.lastRequestByMethod('thread/start');
346
+ expect(threadStartReq.params).toMatchObject({
347
+ cwd: '/project/workspace',
348
+ approvalPolicy: 'on-request',
349
+ });
350
+ expect(mock.requestsByMethod('turn/start')).toHaveLength(1);
351
+ const turnStartReq = mock.lastRequestByMethod('turn/start');
352
+ expect(turnStartReq.params?.input).toEqual([{ type: 'text', text: 'Implement the feature' }]);
353
+ // stop() should have sent turn/interrupt and thread/unsubscribe
354
+ expect(mock.requestsByMethod('turn/interrupt').length).toBeGreaterThanOrEqual(1);
355
+ expect(mock.requestsByMethod('thread/unsubscribe').length).toBeGreaterThanOrEqual(1);
356
+ });
357
+ });
358
+ // -------------------------------------------------------------------------
359
+ // 2. Message Injection — Mid-Turn Steering
360
+ // -------------------------------------------------------------------------
361
+ describe('2. Message Injection — Mid-Turn Steering', () => {
362
+ it('calls turn/steer when injectMessage() is called during an active turn', async () => {
363
+ const provider = new CodexAppServerProvider();
364
+ const handle = provider.spawn(makeConfig());
365
+ const eventsPromise = collectEventsWithTimeout(handle.stream, 5000);
366
+ const threadId = await mock.completeHandshakeAndThreadStart('thr_steer_001');
367
+ // Push turn/started so activeTurnId is set
368
+ mock.pushTurnStarted(threadId, 'turn_steer_1');
369
+ // Wait for the turn/started event to be processed
370
+ await new Promise((r) => setTimeout(r, 100));
371
+ // Inject a message mid-turn — turn/steer is auto-responded
372
+ await handle.injectMessage('additional context for the current task');
373
+ const steerReq = mock.lastRequestByMethod('turn/steer');
374
+ expect(steerReq).toBeDefined();
375
+ expect(steerReq.params).toMatchObject({
376
+ threadId: 'thr_steer_001',
377
+ expectedTurnId: 'turn_steer_1',
378
+ input: [{ type: 'text', text: 'additional context for the current task' }],
379
+ });
380
+ // Cleanup — turn/interrupt and thread/unsubscribe are auto-responded
381
+ await handle.stop();
382
+ await eventsPromise;
383
+ });
384
+ });
385
+ // -------------------------------------------------------------------------
386
+ // 3. Message Injection — Between-Turn New Turn
387
+ // -------------------------------------------------------------------------
388
+ describe('3. Message Injection — Between-Turn New Turn', () => {
389
+ it('starts a new turn via turn/start when injectMessage() is called after turn completion', async () => {
390
+ const provider = new CodexAppServerProvider();
391
+ const handle = provider.spawn(makeConfig({ autonomous: false }));
392
+ const eventsPromise = collectEventsWithTimeout(handle.stream, 5000);
393
+ const threadId = await mock.completeHandshakeAndThreadStart('thr_inject_001');
394
+ // Complete the first turn
395
+ mock.pushTurnStarted(threadId, 'turn_first');
396
+ mock.pushTurnCompleted(threadId, 'turn_first', {
397
+ status: 'completed',
398
+ usage: { input_tokens: 100, output_tokens: 50 },
399
+ });
400
+ // Wait for turn completion to be processed
401
+ await new Promise((r) => setTimeout(r, 100));
402
+ // Inject a message between turns — should start a new turn.
403
+ // injectMessage calls startNewTurn which calls processManager.request('turn/start', ...)
404
+ // We need to respond to this. Since 'turn/start' is NOT auto-responded, we set up
405
+ // a waiter to respond to it as soon as it arrives.
406
+ const requestCountBefore = mock.requests.length;
407
+ // Start injectMessage (it will block until turn/start is responded to)
408
+ const injectPromise = handle.injectMessage('follow-up question');
409
+ // Wait for the new turn/start request to appear
410
+ await mock.waitForRequests(requestCountBefore + 1);
411
+ const newTurnReqs = mock.requestsByMethod('turn/start');
412
+ // Should have two turn/start requests total (initial + injected)
413
+ expect(newTurnReqs.length).toBe(2);
414
+ const secondTurnReq = newTurnReqs[1];
415
+ expect(secondTurnReq.params).toMatchObject({
416
+ threadId: 'thr_inject_001',
417
+ input: [{ type: 'text', text: 'follow-up question' }],
418
+ cwd: '/project/workspace',
419
+ approvalPolicy: 'untrusted',
420
+ });
421
+ // Respond to the new turn/start
422
+ mock.pushResponse(secondTurnReq.id, {});
423
+ await injectPromise;
424
+ // Mock pushes new turn sequence
425
+ mock.pushTurnStarted(threadId, 'turn_second');
426
+ mock.pushAgentMessageDelta(threadId, 'Addressing follow-up...');
427
+ mock.pushTurnCompleted(threadId, 'turn_second', {
428
+ status: 'completed',
429
+ usage: { input_tokens: 200, output_tokens: 80 },
430
+ });
431
+ // Wait for events
432
+ await new Promise((r) => setTimeout(r, 100));
433
+ await handle.stop();
434
+ const events = await eventsPromise;
435
+ // Verify new turn events appear
436
+ const textEvents = events.filter((e) => e.type === 'assistant_text');
437
+ expect(textEvents.some((e) => e.type === 'assistant_text' && e.text === 'Addressing follow-up...')).toBe(true);
438
+ // Verify accumulated tokens in the final result
439
+ const resultEvent = events.find((e) => e.type === 'result');
440
+ expect(resultEvent).toMatchObject({
441
+ type: 'result',
442
+ success: true,
443
+ cost: {
444
+ inputTokens: 300, // 100 + 200
445
+ outputTokens: 130, // 50 + 80
446
+ numTurns: 2,
447
+ },
448
+ });
449
+ });
450
+ });
451
+ // -------------------------------------------------------------------------
452
+ // 4. Session Resume
453
+ // -------------------------------------------------------------------------
454
+ describe('4. Session Resume', () => {
455
+ it('sends thread/resume (not thread/start) when provider.resume() is called', async () => {
456
+ const provider = new CodexAppServerProvider();
457
+ const handle = provider.resume('thr_existing_session', makeConfig());
458
+ const eventsPromise = collectEventsWithTimeout(handle.stream, 5000);
459
+ // Complete handshake and thread/resume
460
+ await mock.completeHandshake();
461
+ // Wait for thread/resume request (after initialize, initialized, model/list)
462
+ await mock.waitForRequests(4);
463
+ const threadResumeReq = mock.requests[3];
464
+ expect(threadResumeReq?.method).toBe('thread/resume');
465
+ expect(threadResumeReq?.params?.threadId).toBe('thr_existing_session');
466
+ // Respond to thread/resume
467
+ mock.pushResponse(threadResumeReq.id, { thread: { id: 'thr_existing_session' } });
468
+ // Wait for turn/start
469
+ await mock.waitForRequests(5);
470
+ expect(mock.requests[4]?.method).toBe('turn/start');
471
+ mock.pushResponse(mock.requests[4].id, {});
472
+ // Push some notifications and stop
473
+ mock.pushTurnStarted('thr_existing_session', 'turn_resume_1');
474
+ mock.pushTurnCompleted('thr_existing_session', 'turn_resume_1', { status: 'completed' });
475
+ await new Promise((r) => setTimeout(r, 100));
476
+ await handle.stop();
477
+ const events = await eventsPromise;
478
+ // Verify init event has the resumed session ID
479
+ const initEvent = events.find((e) => e.type === 'init');
480
+ expect(initEvent).toMatchObject({ type: 'init', sessionId: 'thr_existing_session' });
481
+ // Verify no thread/start was sent
482
+ expect(mock.requestsByMethod('thread/start')).toHaveLength(0);
483
+ expect(mock.requestsByMethod('thread/resume')).toHaveLength(1);
484
+ });
485
+ });
486
+ // -------------------------------------------------------------------------
487
+ // 5. Error Handling — Turn Failure
488
+ // -------------------------------------------------------------------------
489
+ describe('5. Error Handling — Turn Failure', () => {
490
+ it('emits result event with failure info when turn/completed has status failed (autonomous)', async () => {
491
+ const provider = new CodexAppServerProvider();
492
+ const handle = provider.spawn(makeConfig());
493
+ const eventsPromise = collectEventsWithTimeout(handle.stream, 5000);
494
+ const threadId = await mock.completeHandshakeAndThreadStart('thr_fail_001');
495
+ // Push a failed turn
496
+ mock.pushTurnStarted(threadId, 'turn_fail_1');
497
+ mock.pushTurnCompleted(threadId, 'turn_fail_1', {
498
+ status: 'failed',
499
+ error: { message: 'Context window exceeded', codexErrorInfo: 'ContextWindowExceeded' },
500
+ });
501
+ await new Promise((r) => setTimeout(r, 100));
502
+ const events = await eventsPromise;
503
+ // In autonomous mode, turn failure emits a direct result event
504
+ const resultEvent = events.find((e) => e.type === 'result');
505
+ expect(resultEvent).toMatchObject({
506
+ type: 'result',
507
+ success: false,
508
+ errors: ['Context window exceeded'],
509
+ });
510
+ });
511
+ });
512
+ // -------------------------------------------------------------------------
513
+ // 6. Error Handling — Process Crash
514
+ // -------------------------------------------------------------------------
515
+ describe('6. Error Handling — Process Crash', () => {
516
+ it('emits error and result events when the process exits unexpectedly', async () => {
517
+ const provider = new CodexAppServerProvider();
518
+ const handle = provider.spawn(makeConfig());
519
+ const eventsPromise = collectEventsWithTimeout(handle.stream, 5000);
520
+ // Complete handshake
521
+ await mock.completeHandshake();
522
+ // Wait for thread/start request
523
+ await mock.waitForRequests(3);
524
+ // Simulate process crash before thread/start can be responded to
525
+ mockProc.emit('exit', 1, null);
526
+ const events = await eventsPromise;
527
+ // Should have an error event from the crashed process
528
+ const errorEvent = events.find((e) => e.type === 'error');
529
+ expect(errorEvent).toBeDefined();
530
+ expect(errorEvent).toMatchObject({
531
+ type: 'error',
532
+ message: expect.stringContaining('App Server error'),
533
+ });
534
+ // Should have a result event indicating failure
535
+ const resultEvent = events.find((e) => e.type === 'result');
536
+ expect(resultEvent).toMatchObject({
537
+ type: 'result',
538
+ success: false,
539
+ });
540
+ });
541
+ });
542
+ // -------------------------------------------------------------------------
543
+ // 7. Error Handling — Request Timeout
544
+ // -------------------------------------------------------------------------
545
+ describe('7. Error Handling — Request Timeout', () => {
546
+ it('emits error events when thread/start request times out', async () => {
547
+ vi.useFakeTimers();
548
+ const provider = new CodexAppServerProvider();
549
+ const handle = provider.spawn(makeConfig());
550
+ const eventsPromise = collectEvents(handle.stream);
551
+ // Complete handshake
552
+ await mock.waitForRequests(1);
553
+ mock.pushResponse(mock.requests[0].id, { capabilities: {} });
554
+ // model/list is auto-responded
555
+ await mock.waitForRequests(3);
556
+ // Wait for thread/start request — but never respond
557
+ await mock.waitForRequests(4);
558
+ expect(mock.requests[3]?.method).toBe('thread/start');
559
+ // Advance past the timeout (default 30s)
560
+ vi.advanceTimersByTime(31000);
561
+ const events = await eventsPromise;
562
+ vi.useRealTimers();
563
+ // Should have error event from timeout
564
+ const errorEvent = events.find((e) => e.type === 'error');
565
+ expect(errorEvent).toBeDefined();
566
+ expect(errorEvent).toMatchObject({
567
+ type: 'error',
568
+ message: expect.stringContaining('timed out'),
569
+ });
570
+ // Should have result indicating failure
571
+ const resultEvent = events.find((e) => e.type === 'result');
572
+ expect(resultEvent).toMatchObject({
573
+ type: 'result',
574
+ success: false,
575
+ errorSubtype: 'app_server_error',
576
+ });
577
+ });
578
+ });
579
+ // -------------------------------------------------------------------------
580
+ // 8. Approval Bridge Mapping
581
+ // -------------------------------------------------------------------------
582
+ describe('8. Approval Bridge Mapping', () => {
583
+ /**
584
+ * Helper to drive the stream far enough that thread/start is written,
585
+ * then return the parsed request params.
586
+ */
587
+ async function getThreadStartParams(handle) {
588
+ const iter = handle.stream[Symbol.asyncIterator]();
589
+ // Kick off the generator
590
+ const firstNext = iter.next();
591
+ // Complete the handshake
592
+ await mock.waitForRequests(1);
593
+ mock.pushResponse(mock.requests[0].id, { capabilities: {} });
594
+ // model/list is auto-responded
595
+ await mock.waitForRequests(3);
596
+ // Wait for thread/start
597
+ await mock.waitForRequests(4);
598
+ const threadStartReq = mock.requests[3];
599
+ expect(threadStartReq?.method).toBe('thread/start');
600
+ // Clean up (don't wait — just prevent unhandled rejection)
601
+ firstNext.catch(() => { });
602
+ return threadStartReq.params;
603
+ }
604
+ it('autonomous: true resolves approvalPolicy to "onRequest"', async () => {
605
+ const provider = new CodexAppServerProvider();
606
+ const handle = provider.spawn(makeConfig({ autonomous: true, sandboxEnabled: false }));
607
+ const params = await getThreadStartParams(handle);
608
+ expect(params.approvalPolicy).toBe('on-request');
609
+ });
610
+ it('autonomous: false resolves approvalPolicy to "unlessTrusted"', async () => {
611
+ const provider = new CodexAppServerProvider();
612
+ // Reset mock for a fresh process
613
+ mockProc = createMockChildProcess();
614
+ mock = new MockAppServer(mockProc);
615
+ const handle = provider.spawn(makeConfig({ autonomous: false }));
616
+ const params = await getThreadStartParams(handle);
617
+ expect(params.approvalPolicy).toBe('untrusted');
618
+ });
619
+ it('sandboxEnabled: true includes sandbox mode string on thread/start', async () => {
620
+ const provider = new CodexAppServerProvider();
621
+ mockProc = createMockChildProcess();
622
+ mock = new MockAppServer(mockProc);
623
+ const handle = provider.spawn(makeConfig({ sandboxEnabled: true, cwd: '/my/project' }));
624
+ const params = await getThreadStartParams(handle);
625
+ expect(params.sandbox).toBe('workspace-write');
626
+ });
627
+ it('sandboxEnabled: false does not include sandbox on thread/start', async () => {
628
+ const provider = new CodexAppServerProvider();
629
+ mockProc = createMockChildProcess();
630
+ mock = new MockAppServer(mockProc);
631
+ const handle = provider.spawn(makeConfig({ sandboxEnabled: false }));
632
+ const params = await getThreadStartParams(handle);
633
+ expect(params.sandbox).toBeUndefined();
634
+ });
635
+ });
636
+ // -------------------------------------------------------------------------
637
+ // 9. MCP Tool Event Mapping (End-to-End)
638
+ // -------------------------------------------------------------------------
639
+ describe('9. MCP Tool Event Mapping (End-to-End)', () => {
640
+ it('maps mcpToolCall item/started -> tool_use and item/completed -> tool_result', async () => {
641
+ const provider = new CodexAppServerProvider();
642
+ const handle = provider.spawn(makeConfig());
643
+ const eventsPromise = collectEventsWithTimeout(handle.stream, 5000);
644
+ const threadId = await mock.completeHandshakeAndThreadStart('thr_mcp_001');
645
+ mock.pushTurnStarted(threadId, 'turn_mcp_1');
646
+ // Push MCP tool call start
647
+ mock.pushItemStarted(threadId, {
648
+ id: 'mcp_001',
649
+ type: 'mcpToolCall',
650
+ server: 'linear',
651
+ tool: 'create_issue',
652
+ arguments: { title: 'New feature', description: 'Build it' },
653
+ status: 'in_progress',
654
+ });
655
+ // Push MCP tool call completion
656
+ mock.pushItemCompleted(threadId, {
657
+ id: 'mcp_001',
658
+ type: 'mcpToolCall',
659
+ server: 'linear',
660
+ tool: 'create_issue',
661
+ arguments: { title: 'New feature', description: 'Build it' },
662
+ result: { content: [{ text: 'Issue LIN-123 created' }] },
663
+ status: 'completed',
664
+ });
665
+ mock.pushTurnCompleted(threadId, 'turn_mcp_1', { status: 'completed' });
666
+ await new Promise((r) => setTimeout(r, 100));
667
+ await handle.stop();
668
+ const events = await eventsPromise;
669
+ // Verify tool_use event with correct toolName format
670
+ const toolUseEvent = events.find((e) => e.type === 'tool_use');
671
+ expect(toolUseEvent).toMatchObject({
672
+ type: 'tool_use',
673
+ toolName: 'mcp__linear__create_issue',
674
+ toolUseId: 'mcp_001',
675
+ input: { title: 'New feature', description: 'Build it' },
676
+ });
677
+ // Verify tool_result event
678
+ const toolResultEvent = events.find((e) => e.type === 'tool_result');
679
+ expect(toolResultEvent).toMatchObject({
680
+ type: 'tool_result',
681
+ toolName: 'mcp__linear__create_issue',
682
+ toolUseId: 'mcp_001',
683
+ content: '[{"text":"Issue LIN-123 created"}]',
684
+ isError: false,
685
+ });
686
+ });
687
+ });
688
+ // -------------------------------------------------------------------------
689
+ // 10. Concurrent Thread Multiplexing
690
+ // -------------------------------------------------------------------------
691
+ describe('10. Concurrent Thread Multiplexing', () => {
692
+ it('each handle only receives events for its own thread when notifications are interleaved', async () => {
693
+ const provider = new CodexAppServerProvider();
694
+ // Spawn first handle
695
+ const handle1 = provider.spawn(makeConfig({ prompt: 'Task A' }));
696
+ const events1Promise = collectEventsWithTimeout(handle1.stream, 5000);
697
+ // Complete handshake + thread start for handle1
698
+ await mock.completeHandshake();
699
+ await mock.waitForRequests(4);
700
+ const threadStart1 = mock.requests[3];
701
+ expect(threadStart1?.method).toBe('thread/start');
702
+ mock.pushResponse(threadStart1.id, { thread: { id: 'thr_A' } });
703
+ // Wait for turn/start from handle1
704
+ await mock.waitForRequests(5);
705
+ expect(mock.requests[4]?.method).toBe('turn/start');
706
+ mock.pushResponse(mock.requests[4].id, {});
707
+ // Spawn second handle (reuses same process manager since PM is healthy)
708
+ const handle2 = provider.spawn(makeConfig({ prompt: 'Task B' }));
709
+ const events2Promise = collectEventsWithTimeout(handle2.stream, 5000);
710
+ // Handle2 will call processManager.start() (idempotent) then thread/start
711
+ await mock.waitForRequests(6);
712
+ const threadStart2 = mock.requests[5];
713
+ expect(threadStart2?.method).toBe('thread/start');
714
+ mock.pushResponse(threadStart2.id, { thread: { id: 'thr_B' } });
715
+ // Wait for turn/start from handle2
716
+ await mock.waitForRequests(7);
717
+ expect(mock.requests[6]?.method).toBe('turn/start');
718
+ mock.pushResponse(mock.requests[6].id, {});
719
+ // Push interleaved notifications for both threads
720
+ mock.pushTurnStarted('thr_A', 'turn_A1');
721
+ mock.pushTurnStarted('thr_B', 'turn_B1');
722
+ mock.pushAgentMessageDelta('thr_A', 'Working on Task A');
723
+ mock.pushAgentMessageDelta('thr_B', 'Working on Task B');
724
+ mock.pushTurnCompleted('thr_A', 'turn_A1', { status: 'completed' });
725
+ mock.pushTurnCompleted('thr_B', 'turn_B1', { status: 'completed' });
726
+ await new Promise((r) => setTimeout(r, 200));
727
+ await handle1.stop();
728
+ await handle2.stop();
729
+ const events1 = await events1Promise;
730
+ const events2 = await events2Promise;
731
+ // Handle1 should only have events from thr_A
732
+ const text1 = events1.filter((e) => e.type === 'assistant_text');
733
+ expect(text1).toHaveLength(1);
734
+ expect(text1[0]).toMatchObject({ text: 'Working on Task A' });
735
+ // Handle2 should only have events from thr_B
736
+ const text2 = events2.filter((e) => e.type === 'assistant_text');
737
+ expect(text2).toHaveLength(1);
738
+ expect(text2[0]).toMatchObject({ text: 'Working on Task B' });
739
+ });
740
+ });
741
+ // -------------------------------------------------------------------------
742
+ // 11. Provider Resolution Paths
743
+ // -------------------------------------------------------------------------
744
+ describe('11. Provider Resolution Paths', () => {
745
+ it('CODEX_USE_APP_SERVER=1 delegates to CodexAppServerProvider', async () => {
746
+ const { CodexProvider } = await import('./codex-provider.js');
747
+ const provider = new CodexProvider();
748
+ const config = makeConfig({
749
+ env: { CODEX_USE_APP_SERVER: '1' },
750
+ });
751
+ const handle = provider.spawn(config);
752
+ // Access internal appServerProvider
753
+ const appServerProvider = provider.appServerProvider;
754
+ expect(appServerProvider).toBeDefined();
755
+ expect(appServerProvider).toBeInstanceOf(CodexAppServerProvider);
756
+ expect(handle).toBeDefined();
757
+ expect(handle.stream).toBeDefined();
758
+ });
759
+ it('CODEX_USE_APP_SERVER=0 delegates to exec mode (no appServerProvider created)', async () => {
760
+ const { CodexProvider } = await import('./codex-provider.js');
761
+ const provider = new CodexProvider();
762
+ const config = makeConfig({
763
+ env: { CODEX_USE_APP_SERVER: '0' },
764
+ });
765
+ const handle = provider.spawn(config);
766
+ // appServerProvider should NOT be created
767
+ const appServerProvider = provider.appServerProvider;
768
+ expect(appServerProvider).toBeNull();
769
+ expect(handle).toBeDefined();
770
+ });
771
+ it('CODEX_USE_APP_SERVER not set defaults to exec mode', async () => {
772
+ const { CodexProvider } = await import('./codex-provider.js');
773
+ const provider = new CodexProvider();
774
+ // Remove CODEX_USE_APP_SERVER from process.env if present
775
+ const origVal = process.env.CODEX_USE_APP_SERVER;
776
+ delete process.env.CODEX_USE_APP_SERVER;
777
+ try {
778
+ const config = makeConfig({ env: {} });
779
+ const handle = provider.spawn(config);
780
+ const appServerProvider = provider.appServerProvider;
781
+ expect(appServerProvider).toBeNull();
782
+ expect(handle).toBeDefined();
783
+ }
784
+ finally {
785
+ // Restore
786
+ if (origVal !== undefined) {
787
+ process.env.CODEX_USE_APP_SERVER = origVal;
788
+ }
789
+ }
790
+ });
791
+ });
792
+ // -------------------------------------------------------------------------
793
+ // Additional: Thread/start returns no thread ID
794
+ // -------------------------------------------------------------------------
795
+ describe('Additional: thread/start returns no thread ID', () => {
796
+ it('emits an error event when thread/start response has no thread ID', async () => {
797
+ const provider = new CodexAppServerProvider();
798
+ const handle = provider.spawn(makeConfig());
799
+ const eventsPromise = collectEventsWithTimeout(handle.stream, 5000);
800
+ // Complete handshake
801
+ await mock.completeHandshake();
802
+ // Wait for thread/start (after initialize, initialized, model/list)
803
+ await mock.waitForRequests(4);
804
+ // Respond without a thread ID
805
+ mock.pushResponse(mock.requests[3].id, { thread: {} });
806
+ const events = await eventsPromise;
807
+ const errorEvent = events.find((e) => e.type === 'error');
808
+ expect(errorEvent).toBeDefined();
809
+ expect(errorEvent).toMatchObject({
810
+ type: 'error',
811
+ message: expect.stringContaining('no thread ID'),
812
+ });
813
+ });
814
+ });
815
+ // -------------------------------------------------------------------------
816
+ // Additional: Stop sends turn/interrupt and thread/unsubscribe
817
+ // -------------------------------------------------------------------------
818
+ describe('Additional: stop() cleanup', () => {
819
+ it('sends turn/interrupt and thread/unsubscribe when stop() is called', async () => {
820
+ const provider = new CodexAppServerProvider();
821
+ const handle = provider.spawn(makeConfig());
822
+ const eventsPromise = collectEventsWithTimeout(handle.stream, 5000);
823
+ const threadId = await mock.completeHandshakeAndThreadStart('thr_stop_test');
824
+ mock.pushTurnStarted(threadId, 'turn_stop_1');
825
+ await new Promise((r) => setTimeout(r, 50));
826
+ // stop() sends turn/interrupt and thread/unsubscribe — both are auto-responded
827
+ await handle.stop();
828
+ // Verify turn/interrupt was sent
829
+ const interruptReqs = mock.requestsByMethod('turn/interrupt');
830
+ expect(interruptReqs.length).toBeGreaterThanOrEqual(1);
831
+ expect(interruptReqs[0].params?.threadId).toBe('thr_stop_test');
832
+ // Verify thread/unsubscribe was sent
833
+ const unsubReqs = mock.requestsByMethod('thread/unsubscribe');
834
+ expect(unsubReqs.length).toBeGreaterThanOrEqual(1);
835
+ expect(unsubReqs[0].params?.threadId).toBe('thr_stop_test');
836
+ await eventsPromise;
837
+ });
838
+ });
839
+ // -------------------------------------------------------------------------
840
+ // Additional: InjectMessage before session is active throws
841
+ // -------------------------------------------------------------------------
842
+ describe('Additional: injectMessage before session', () => {
843
+ it('throws when injectMessage() is called before session is established', async () => {
844
+ const provider = new CodexAppServerProvider();
845
+ const handle = provider.spawn(makeConfig());
846
+ // Don't start consuming the stream — sessionId is null
847
+ await expect(handle.injectMessage('too early')).rejects.toThrow('No active session for message injection');
848
+ });
849
+ });
850
+ // -------------------------------------------------------------------------
851
+ // Additional: Multiple event types in a turn
852
+ // -------------------------------------------------------------------------
853
+ describe('Additional: Rich notification sequence', () => {
854
+ it('maps reasoning, file changes, and agent messages correctly', async () => {
855
+ const provider = new CodexAppServerProvider();
856
+ const handle = provider.spawn(makeConfig());
857
+ const eventsPromise = collectEventsWithTimeout(handle.stream, 5000);
858
+ const threadId = await mock.completeHandshakeAndThreadStart('thr_rich_001');
859
+ mock.pushTurnStarted(threadId, 'turn_rich_1');
860
+ // Reasoning delta
861
+ mock.pushNotification('item/reasoning/summaryTextDelta', {
862
+ threadId,
863
+ text: 'Analyzing project structure...',
864
+ });
865
+ // Agent message
866
+ mock.pushAgentMessageDelta(threadId, 'I will create the file.');
867
+ // File change completed
868
+ mock.pushItemCompleted(threadId, {
869
+ id: 'fc_001',
870
+ type: 'fileChange',
871
+ changes: [
872
+ { path: 'src/new-file.ts', kind: 'add' },
873
+ { path: 'src/existing.ts', kind: 'update' },
874
+ ],
875
+ status: 'completed',
876
+ });
877
+ mock.pushTurnCompleted(threadId, 'turn_rich_1', { status: 'completed' });
878
+ await new Promise((r) => setTimeout(r, 100));
879
+ await handle.stop();
880
+ const events = await eventsPromise;
881
+ // Verify reasoning event
882
+ const reasoningEvent = events.find((e) => e.type === 'system' && 'subtype' in e && e.subtype === 'reasoning');
883
+ expect(reasoningEvent).toBeDefined();
884
+ expect(reasoningEvent).toMatchObject({
885
+ type: 'system',
886
+ subtype: 'reasoning',
887
+ message: 'Analyzing project structure...',
888
+ });
889
+ // Verify file change tool_result
890
+ const fileResult = events.find((e) => e.type === 'tool_result' && 'toolName' in e && e.toolName === 'file_change');
891
+ expect(fileResult).toMatchObject({
892
+ type: 'tool_result',
893
+ toolName: 'file_change',
894
+ content: 'add: src/new-file.ts\nupdate: src/existing.ts',
895
+ isError: false,
896
+ });
897
+ });
898
+ });
899
+ // -------------------------------------------------------------------------
900
+ // Additional: Provider capabilities
901
+ // -------------------------------------------------------------------------
902
+ describe('Additional: Provider capabilities', () => {
903
+ it('reports supportsMessageInjection and supportsSessionResume as true', () => {
904
+ const provider = new CodexAppServerProvider();
905
+ expect(provider.capabilities.supportsMessageInjection).toBe(true);
906
+ expect(provider.capabilities.supportsSessionResume).toBe(true);
907
+ });
908
+ });
909
+ });