@renseiai/agentfactory 0.8.20 → 0.8.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,64 @@
1
- import { describe, it, expect } from 'vitest';
2
- import { mapAppServerNotification, mapAppServerItemEvent, normalizeMcpToolName, } from './codex-app-server-provider.js';
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
+ import { EventEmitter } from 'events';
3
+ import { mapAppServerNotification, mapAppServerItemEvent, normalizeMcpToolName, resolveSandboxPolicy, resolveCodexModel, calculateCostUsd, CODEX_MODEL_MAP, CODEX_DEFAULT_MODEL, } from './codex-app-server-provider.js';
4
+ // ---------------------------------------------------------------------------
5
+ // Mock child_process and readline for AppServerProcessManager tests
6
+ // ---------------------------------------------------------------------------
7
+ /** Fake writable stdin stream */
8
+ function createMockStdin() {
9
+ return {
10
+ writable: true,
11
+ write: vi.fn(),
12
+ };
13
+ }
14
+ /** Fake ChildProcess — an EventEmitter with stdin, stdout, pid, killed, kill() */
15
+ function createMockChildProcess() {
16
+ const stdout = new EventEmitter();
17
+ const proc = new EventEmitter();
18
+ proc.stdin = createMockStdin();
19
+ proc.stdout = stdout;
20
+ proc.pid = 12345;
21
+ proc.killed = false;
22
+ proc.kill = vi.fn((signal) => {
23
+ // For SIGKILL during shutdown tests
24
+ if (signal === 'SIGKILL') {
25
+ proc.killed = true;
26
+ }
27
+ });
28
+ return proc;
29
+ }
30
+ let mockProc;
31
+ let mockLineEmitter;
32
+ vi.mock('child_process', () => ({
33
+ spawn: vi.fn(() => mockProc),
34
+ }));
35
+ vi.mock('readline', () => ({
36
+ createInterface: vi.fn(() => {
37
+ mockLineEmitter = new EventEmitter();
38
+ mockLineEmitter.close = vi.fn();
39
+ return mockLineEmitter;
40
+ }),
41
+ }));
42
+ import { spawn } from 'child_process';
43
+ const mockSpawn = vi.mocked(spawn);
3
44
  function freshState() {
4
45
  return {
5
46
  sessionId: null,
47
+ model: null,
6
48
  totalInputTokens: 0,
7
49
  totalOutputTokens: 0,
50
+ totalCachedInputTokens: 0,
8
51
  turnCount: 0,
9
52
  };
10
53
  }
54
+ /** Emit a JSON line on mock stdout (via the readline 'line' event) */
55
+ function emitLine(obj) {
56
+ mockLineEmitter.emit('line', JSON.stringify(obj));
57
+ }
58
+ /** Emit a raw string line on mock stdout */
59
+ function emitRawLine(text) {
60
+ mockLineEmitter.emit('line', text);
61
+ }
11
62
  // ---------------------------------------------------------------------------
12
63
  // mapAppServerNotification
13
64
  // ---------------------------------------------------------------------------
@@ -97,13 +148,14 @@ describe('mapAppServerNotification', () => {
97
148
  it('maps turn/completed (success) with usage', () => {
98
149
  const state = freshState();
99
150
  state.turnCount = 1;
151
+ state.model = 'gpt-5-codex';
100
152
  const notification = {
101
153
  method: 'turn/completed',
102
154
  params: {
103
155
  turn: {
104
156
  id: 'turn_1',
105
157
  status: 'completed',
106
- usage: { input_tokens: 100, output_tokens: 50 },
158
+ usage: { input_tokens: 100, output_tokens: 50, cached_input_tokens: 20 },
107
159
  },
108
160
  },
109
161
  };
@@ -115,28 +167,34 @@ describe('mapAppServerNotification', () => {
115
167
  cost: {
116
168
  inputTokens: 100,
117
169
  outputTokens: 50,
170
+ cachedInputTokens: 20,
118
171
  numTurns: 1,
119
172
  },
120
173
  });
174
+ expect(result[0].cost.totalCostUsd).toBeGreaterThan(0);
121
175
  expect(state.totalInputTokens).toBe(100);
122
176
  expect(state.totalOutputTokens).toBe(50);
177
+ expect(state.totalCachedInputTokens).toBe(20);
123
178
  });
124
179
  it('accumulates usage across multiple turns', () => {
125
180
  const state = freshState();
126
181
  state.turnCount = 2;
182
+ state.model = 'gpt-5-codex';
127
183
  mapAppServerNotification({
128
184
  method: 'turn/completed',
129
- params: { turn: { id: 't1', status: 'completed', usage: { input_tokens: 100, output_tokens: 50 } } },
185
+ params: { turn: { id: 't1', status: 'completed', usage: { input_tokens: 100, output_tokens: 50, cached_input_tokens: 10 } } },
130
186
  }, state);
131
187
  const result = mapAppServerNotification({
132
188
  method: 'turn/completed',
133
- params: { turn: { id: 't2', status: 'completed', usage: { input_tokens: 200, output_tokens: 80 } } },
189
+ params: { turn: { id: 't2', status: 'completed', usage: { input_tokens: 200, output_tokens: 80, cached_input_tokens: 30 } } },
134
190
  }, state);
135
191
  expect(result[0]).toMatchObject({
136
192
  type: 'result',
137
193
  success: true,
138
- cost: { inputTokens: 300, outputTokens: 130 },
194
+ cost: { inputTokens: 300, outputTokens: 130, cachedInputTokens: 40 },
139
195
  });
196
+ expect(result[0].cost.totalCostUsd).toBeGreaterThan(0);
197
+ expect(state.totalCachedInputTokens).toBe(40);
140
198
  });
141
199
  it('maps turn/completed (failed) with error', () => {
142
200
  const state = freshState();
@@ -520,6 +578,67 @@ describe('mapAppServerItemEvent', () => {
520
578
  });
521
579
  });
522
580
  // ---------------------------------------------------------------------------
581
+ // Token Accumulation Edge Cases
582
+ // ---------------------------------------------------------------------------
583
+ describe('mapAppServerNotification — token edge cases', () => {
584
+ it('cached_input_tokens is present in usage but does not affect totalInputTokens accumulation', () => {
585
+ const state = freshState();
586
+ state.turnCount = 1;
587
+ const result = mapAppServerNotification({
588
+ method: 'turn/completed',
589
+ params: {
590
+ turn: {
591
+ id: 'turn_1',
592
+ status: 'completed',
593
+ usage: { input_tokens: 100, output_tokens: 50, cached_input_tokens: 30 },
594
+ },
595
+ },
596
+ }, state);
597
+ // totalInputTokens should only accumulate input_tokens, not cached_input_tokens
598
+ expect(state.totalInputTokens).toBe(100);
599
+ expect(state.totalOutputTokens).toBe(50);
600
+ expect(result[0]).toMatchObject({
601
+ type: 'result',
602
+ success: true,
603
+ cost: { inputTokens: 100, outputTokens: 50 },
604
+ });
605
+ });
606
+ it('turn/completed without usage does not crash, tokens stay at 0', () => {
607
+ const state = freshState();
608
+ state.turnCount = 1;
609
+ // turn object present but no usage field
610
+ const result = mapAppServerNotification({
611
+ method: 'turn/completed',
612
+ params: { turn: { id: 'turn_1', status: 'completed' } },
613
+ }, state);
614
+ expect(state.totalInputTokens).toBe(0);
615
+ expect(state.totalOutputTokens).toBe(0);
616
+ expect(result[0]).toMatchObject({
617
+ type: 'result',
618
+ success: true,
619
+ });
620
+ });
621
+ it('zero-token turns result in undefined cost fields (falsy to undefined)', () => {
622
+ const state = freshState();
623
+ state.turnCount = 0; // 0 turns → falsy → undefined
624
+ const result = mapAppServerNotification({
625
+ method: 'turn/completed',
626
+ params: {
627
+ turn: {
628
+ id: 'turn_1',
629
+ status: 'completed',
630
+ usage: { input_tokens: 0, output_tokens: 0 },
631
+ },
632
+ },
633
+ }, state);
634
+ // 0 || undefined → undefined due to falsy conversion
635
+ const cost = result[0].cost;
636
+ expect(cost.inputTokens).toBeUndefined();
637
+ expect(cost.outputTokens).toBeUndefined();
638
+ expect(cost.numTurns).toBeUndefined();
639
+ });
640
+ });
641
+ // ---------------------------------------------------------------------------
523
642
  // CodexAppServerProvider class
524
643
  // ---------------------------------------------------------------------------
525
644
  describe('CodexAppServerProvider', () => {
@@ -537,15 +656,593 @@ describe('CodexAppServerProvider', () => {
537
656
  });
538
657
  });
539
658
  // ---------------------------------------------------------------------------
540
- // AppServerProcessManager
659
+ // AppServerProcessManager — Lifecycle
541
660
  // ---------------------------------------------------------------------------
542
- describe('AppServerProcessManager', () => {
543
- it('exports AppServerProcessManager class', async () => {
544
- const { AppServerProcessManager } = await import('./codex-app-server-provider.js');
661
+ describe('AppServerProcessManager — lifecycle', () => {
662
+ let AppServerProcessManager;
663
+ beforeEach(async () => {
664
+ vi.clearAllMocks();
665
+ mockProc = createMockChildProcess();
666
+ const mod = await import('./codex-app-server-provider.js');
667
+ AppServerProcessManager = mod.AppServerProcessManager;
668
+ });
669
+ it('exports AppServerProcessManager class', () => {
545
670
  const manager = new AppServerProcessManager({ cwd: '/tmp' });
546
671
  expect(manager.isHealthy()).toBe(false);
547
672
  expect(manager.pid).toBeUndefined();
548
673
  });
674
+ it('start() spawns a process and completes initialize handshake', async () => {
675
+ const manager = new AppServerProcessManager({ cwd: '/tmp' });
676
+ const startPromise = manager.start();
677
+ // The start() method calls request('initialize', ...) which writes to stdin
678
+ // and waits for a response. We need to send back the response.
679
+ // Wait a tick for the spawn + request to be set up
680
+ await vi.waitFor(() => {
681
+ expect(mockProc.stdin.write).toHaveBeenCalled();
682
+ });
683
+ // Parse the initialize request that was written to stdin
684
+ const initializeCall = mockProc.stdin.write.mock.calls[0][0];
685
+ const initReq = JSON.parse(initializeCall.trim());
686
+ expect(initReq.method).toBe('initialize');
687
+ expect(initReq.id).toBe(1);
688
+ // Send back a successful response
689
+ emitLine({ id: 1, result: { capabilities: {} } });
690
+ // After initialize, the `initialized` notification is sent, then model/list request
691
+ await vi.waitFor(() => {
692
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
693
+ });
694
+ const initializedCall = mockProc.stdin.write.mock.calls[1][0];
695
+ const initializedMsg = JSON.parse(initializedCall.trim());
696
+ expect(initializedMsg.method).toBe('initialized');
697
+ // Respond to model/list request
698
+ emitLine({ id: 2, result: { models: [] } });
699
+ await startPromise;
700
+ expect(manager.isHealthy()).toBe(true);
701
+ expect(manager.pid).toBe(12345);
702
+ });
703
+ it('start() is idempotent — calling twice does not spawn a second process', async () => {
704
+ const manager = new AppServerProcessManager({ cwd: '/tmp' });
705
+ const startPromise = manager.start();
706
+ await vi.waitFor(() => {
707
+ expect(mockProc.stdin.write).toHaveBeenCalled();
708
+ });
709
+ emitLine({ id: 1, result: {} });
710
+ await vi.waitFor(() => {
711
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
712
+ });
713
+ emitLine({ id: 2, result: { models: [] } });
714
+ await startPromise;
715
+ expect(mockSpawn).toHaveBeenCalledTimes(1);
716
+ // Second call should be a no-op
717
+ await manager.start();
718
+ expect(mockSpawn).toHaveBeenCalledTimes(1);
719
+ });
720
+ it('request() correlates responses by id', async () => {
721
+ const manager = new AppServerProcessManager({ cwd: '/tmp' });
722
+ // Start the manager
723
+ const startPromise = manager.start();
724
+ await vi.waitFor(() => {
725
+ expect(mockProc.stdin.write).toHaveBeenCalled();
726
+ });
727
+ emitLine({ id: 1, result: {} });
728
+ await vi.waitFor(() => {
729
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
730
+ });
731
+ emitLine({ id: 2, result: { models: [] } });
732
+ await startPromise;
733
+ // Send a request
734
+ const reqPromise = manager.request('thread/start', { cwd: '/project' });
735
+ await vi.waitFor(() => {
736
+ // The request should be the 4th write (initialize, initialized, model/list, thread/start)
737
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(4);
738
+ });
739
+ const reqCall = mockProc.stdin.write.mock.calls[3][0];
740
+ const req = JSON.parse(reqCall.trim());
741
+ expect(req.method).toBe('thread/start');
742
+ expect(req.id).toBe(3);
743
+ // Respond
744
+ emitLine({ id: 3, result: { thread: { id: 'thr_123' } } });
745
+ const result = await reqPromise;
746
+ expect(result).toEqual({ thread: { id: 'thr_123' } });
747
+ });
748
+ it('request() times out when no response is received', async () => {
749
+ const manager = new AppServerProcessManager({ cwd: '/tmp' });
750
+ const startPromise = manager.start();
751
+ await vi.waitFor(() => {
752
+ expect(mockProc.stdin.write).toHaveBeenCalled();
753
+ });
754
+ emitLine({ id: 1, result: {} });
755
+ await vi.waitFor(() => {
756
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
757
+ });
758
+ emitLine({ id: 2, result: { models: [] } });
759
+ await startPromise;
760
+ // Now switch to fake timers for the timeout test
761
+ vi.useFakeTimers();
762
+ try {
763
+ // Send a request with a short timeout
764
+ const reqPromise = manager.request('thread/start', {}, 5000);
765
+ // Advance time past the timeout
766
+ vi.advanceTimersByTime(5001);
767
+ await expect(reqPromise).rejects.toThrow(/timed out/);
768
+ }
769
+ finally {
770
+ vi.useRealTimers();
771
+ }
772
+ });
773
+ it('request() rejects with formatted message on JSON-RPC error response', async () => {
774
+ const manager = new AppServerProcessManager({ cwd: '/tmp' });
775
+ const startPromise = manager.start();
776
+ await vi.waitFor(() => {
777
+ expect(mockProc.stdin.write).toHaveBeenCalled();
778
+ });
779
+ emitLine({ id: 1, result: {} });
780
+ await vi.waitFor(() => {
781
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
782
+ });
783
+ emitLine({ id: 2, result: { models: [] } });
784
+ await startPromise;
785
+ const reqPromise = manager.request('thread/start', { cwd: '/project' });
786
+ await vi.waitFor(() => {
787
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(4);
788
+ });
789
+ // Respond with an error
790
+ emitLine({ id: 3, error: { code: -32600, message: 'Invalid request' } });
791
+ await expect(reqPromise).rejects.toThrow('JSON-RPC error (-32600): Invalid request');
792
+ });
793
+ it('non-JSON stdout lines are silently ignored (no crash)', async () => {
794
+ const manager = new AppServerProcessManager({ cwd: '/tmp' });
795
+ const startPromise = manager.start();
796
+ await vi.waitFor(() => {
797
+ expect(mockProc.stdin.write).toHaveBeenCalled();
798
+ });
799
+ // Emit non-JSON lines — should not throw
800
+ emitRawLine('Some debug output from the binary');
801
+ emitRawLine('WARNING: something happened');
802
+ emitRawLine('');
803
+ emitRawLine('not valid json {{{');
804
+ // Now send the valid response
805
+ emitLine({ id: 1, result: {} });
806
+ // Respond to model/list request
807
+ await vi.waitFor(() => {
808
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
809
+ });
810
+ emitLine({ id: 2, result: { models: [] } });
811
+ await startPromise;
812
+ expect(manager.isHealthy()).toBe(true);
813
+ });
814
+ it('process exit event rejects all pending requests and sets healthy to false', async () => {
815
+ const manager = new AppServerProcessManager({ cwd: '/tmp' });
816
+ const startPromise = manager.start();
817
+ await vi.waitFor(() => {
818
+ expect(mockProc.stdin.write).toHaveBeenCalled();
819
+ });
820
+ emitLine({ id: 1, result: {} });
821
+ await vi.waitFor(() => {
822
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
823
+ });
824
+ emitLine({ id: 2, result: { models: [] } });
825
+ await startPromise;
826
+ expect(manager.isHealthy()).toBe(true);
827
+ // Send a request but don't respond
828
+ const reqPromise = manager.request('thread/start', {});
829
+ await vi.waitFor(() => {
830
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(4);
831
+ });
832
+ // Simulate process exit
833
+ mockProc.emit('exit', 1, null);
834
+ await expect(reqPromise).rejects.toThrow(/App server exited/);
835
+ expect(manager.isHealthy()).toBe(false);
836
+ });
837
+ it('process error event rejects all pending requests', async () => {
838
+ const manager = new AppServerProcessManager({ cwd: '/tmp' });
839
+ const startPromise = manager.start();
840
+ await vi.waitFor(() => {
841
+ expect(mockProc.stdin.write).toHaveBeenCalled();
842
+ });
843
+ emitLine({ id: 1, result: {} });
844
+ await vi.waitFor(() => {
845
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
846
+ });
847
+ emitLine({ id: 2, result: { models: [] } });
848
+ await startPromise;
849
+ // Send a request but don't respond
850
+ const reqPromise = manager.request('some/method', {});
851
+ await vi.waitFor(() => {
852
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(4);
853
+ });
854
+ // Simulate process error
855
+ mockProc.emit('error', new Error('ENOENT'));
856
+ await expect(reqPromise).rejects.toThrow(/App server process error: ENOENT/);
857
+ });
858
+ });
859
+ // ---------------------------------------------------------------------------
860
+ // Thread Notification Routing
861
+ // ---------------------------------------------------------------------------
862
+ describe('AppServerProcessManager — thread notification routing', () => {
863
+ let AppServerProcessManager;
864
+ beforeEach(async () => {
865
+ vi.clearAllMocks();
866
+ mockProc = createMockChildProcess();
867
+ const mod = await import('./codex-app-server-provider.js');
868
+ AppServerProcessManager = mod.AppServerProcessManager;
869
+ });
870
+ async function startManager() {
871
+ const manager = new AppServerProcessManager({ cwd: '/tmp' });
872
+ const startPromise = manager.start();
873
+ await vi.waitFor(() => {
874
+ expect(mockProc.stdin.write).toHaveBeenCalled();
875
+ });
876
+ emitLine({ id: 1, result: {} });
877
+ // Respond to model/list request (called by start() after handshake)
878
+ await vi.waitFor(() => {
879
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
880
+ });
881
+ emitLine({ id: 2, result: { models: [] } });
882
+ await startPromise;
883
+ return manager;
884
+ }
885
+ it('subscribeThread() routes notifications with matching threadId to the listener', async () => {
886
+ const manager = await startManager();
887
+ const received = [];
888
+ manager.subscribeThread('thr_1', (n) => received.push(n));
889
+ // Emit a notification with threadId = thr_1
890
+ emitLine({ method: 'turn/started', params: { threadId: 'thr_1', turn: { id: 't1' } } });
891
+ expect(received).toHaveLength(1);
892
+ expect(received[0].method).toBe('turn/started');
893
+ });
894
+ it('unsubscribeThread() stops delivering notifications after unsubscribe', async () => {
895
+ const manager = await startManager();
896
+ const received = [];
897
+ manager.subscribeThread('thr_1', (n) => received.push(n));
898
+ emitLine({ method: 'turn/started', params: { threadId: 'thr_1' } });
899
+ expect(received).toHaveLength(1);
900
+ manager.unsubscribeThread('thr_1');
901
+ emitLine({ method: 'turn/completed', params: { threadId: 'thr_1' } });
902
+ // Should not receive new notifications
903
+ expect(received).toHaveLength(1);
904
+ });
905
+ it('notifications without threadId dispatch to global listeners', async () => {
906
+ const manager = await startManager();
907
+ const globalReceived = [];
908
+ manager.globalListeners.add((n) => globalReceived.push(n));
909
+ // Notification without threadId
910
+ emitLine({ method: 'thread/closed', params: {} });
911
+ expect(globalReceived).toHaveLength(1);
912
+ expect(globalReceived[0].method).toBe('thread/closed');
913
+ });
914
+ it('notifications with unknown threadId fall through to global listeners', async () => {
915
+ const manager = await startManager();
916
+ const globalReceived = [];
917
+ const threadReceived = [];
918
+ manager.subscribeThread('thr_1', (n) => threadReceived.push(n));
919
+ manager.globalListeners.add((n) => globalReceived.push(n));
920
+ // Notification with unknown threadId (not subscribed)
921
+ emitLine({ method: 'turn/started', params: { threadId: 'thr_unknown' } });
922
+ expect(threadReceived).toHaveLength(0);
923
+ expect(globalReceived).toHaveLength(1);
924
+ });
925
+ it('multiple concurrent threads each receive only their own notifications', async () => {
926
+ const manager = await startManager();
927
+ const received1 = [];
928
+ const received2 = [];
929
+ manager.subscribeThread('thr_A', (n) => received1.push(n));
930
+ manager.subscribeThread('thr_B', (n) => received2.push(n));
931
+ emitLine({ method: 'turn/started', params: { threadId: 'thr_A', turn: { id: 'tA1' } } });
932
+ emitLine({ method: 'turn/started', params: { threadId: 'thr_B', turn: { id: 'tB1' } } });
933
+ emitLine({ method: 'turn/completed', params: { threadId: 'thr_A', turn: { id: 'tA1' } } });
934
+ expect(received1).toHaveLength(2); // turn/started + turn/completed for thr_A
935
+ expect(received2).toHaveLength(1); // turn/started for thr_B
936
+ expect(received1[0].params?.threadId).toBe('thr_A');
937
+ expect(received2[0].params?.threadId).toBe('thr_B');
938
+ });
939
+ });
940
+ // ---------------------------------------------------------------------------
941
+ // Approval & Sandbox Policy Resolution
942
+ // ---------------------------------------------------------------------------
943
+ describe('Approval & Sandbox Policy Resolution (via spawn params)', () => {
944
+ let CodexAppServerProvider;
945
+ beforeEach(async () => {
946
+ vi.clearAllMocks();
947
+ mockProc = createMockChildProcess();
948
+ const mod = await import('./codex-app-server-provider.js');
949
+ CodexAppServerProvider = mod.CodexAppServerProvider;
950
+ });
951
+ // Since resolveApprovalPolicy and resolveSandboxPolicy are module-private,
952
+ // we test them indirectly by examining what AppServerAgentHandle passes to
953
+ // the processManager when starting a thread. We can observe this through
954
+ // the stdin writes after the handle's stream is consumed.
955
+ function makeConfig(overrides = {}) {
956
+ return {
957
+ prompt: 'Test prompt',
958
+ cwd: '/project',
959
+ env: {},
960
+ abortController: new AbortController(),
961
+ autonomous: false,
962
+ sandboxEnabled: false,
963
+ ...overrides,
964
+ };
965
+ }
966
+ /**
967
+ * Helper: start consuming a handle's stream, complete the initialize
968
+ * handshake, and wait for the thread/start request to be written to stdin.
969
+ * Returns the parsed thread/start request.
970
+ * Does NOT attempt to drain the full stream — the test should not need to.
971
+ */
972
+ async function driveToThreadStart(handle) {
973
+ const iter = handle.stream[Symbol.asyncIterator]();
974
+ // Kick off the generator — it will call processManager.start()
975
+ const firstNext = iter.next();
976
+ // Wait for the initialize request
977
+ await vi.waitFor(() => {
978
+ expect(mockProc.stdin.write).toHaveBeenCalled();
979
+ }, { timeout: 3000 });
980
+ // Respond to initialize
981
+ emitLine({ id: 1, result: {} });
982
+ // Respond to model/list request (called by start() after handshake)
983
+ await vi.waitFor(() => {
984
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
985
+ }, { timeout: 3000 });
986
+ emitLine({ id: 2, result: { models: [] } });
987
+ // Wait for thread/start request (4 writes: initialize, initialized, model/list, thread/start)
988
+ await vi.waitFor(() => {
989
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(4);
990
+ }, { timeout: 3000 });
991
+ const threadStartCall = mockProc.stdin.write.mock.calls[3][0];
992
+ const threadStartReq = JSON.parse(threadStartCall.trim());
993
+ // Do NOT respond — we only need to inspect the request params.
994
+ // Clean up by stopping the iteration (no await needed for drain).
995
+ firstNext.catch(() => { }); // suppress unhandled rejection from the pending generator
996
+ return threadStartReq;
997
+ }
998
+ it('autonomous: true resolves approval policy to on-request', async () => {
999
+ const provider = new CodexAppServerProvider();
1000
+ const handle = provider.spawn(makeConfig({ autonomous: true, sandboxEnabled: false }));
1001
+ const threadStartReq = await driveToThreadStart(handle);
1002
+ expect(threadStartReq.params.approvalPolicy).toBe('on-request');
1003
+ });
1004
+ it('autonomous: false resolves approval policy to untrusted', async () => {
1005
+ const provider = new CodexAppServerProvider();
1006
+ const handle = provider.spawn(makeConfig({ autonomous: false }));
1007
+ const threadStartReq = await driveToThreadStart(handle);
1008
+ expect(threadStartReq.params.approvalPolicy).toBe('untrusted');
1009
+ });
1010
+ it('sandboxEnabled: false does not include sandbox on thread/start', async () => {
1011
+ const provider = new CodexAppServerProvider();
1012
+ const handle = provider.spawn(makeConfig({ sandboxEnabled: false }));
1013
+ const threadStartReq = await driveToThreadStart(handle);
1014
+ expect(threadStartReq.params.sandbox).toBeUndefined();
1015
+ });
1016
+ it('sandboxEnabled: true includes sandbox mode string on thread/start', async () => {
1017
+ const provider = new CodexAppServerProvider();
1018
+ const handle = provider.spawn(makeConfig({ sandboxEnabled: true, cwd: '/my/project' }));
1019
+ const threadStartReq = await driveToThreadStart(handle);
1020
+ expect(threadStartReq.params.sandbox).toBe('workspace-write');
1021
+ });
1022
+ });
1023
+ // ---------------------------------------------------------------------------
1024
+ // Shutdown
1025
+ // ---------------------------------------------------------------------------
1026
+ describe('AppServerProcessManager — shutdown', () => {
1027
+ let AppServerProcessManager;
1028
+ beforeEach(async () => {
1029
+ vi.clearAllMocks();
1030
+ mockProc = createMockChildProcess();
1031
+ const mod = await import('./codex-app-server-provider.js');
1032
+ AppServerProcessManager = mod.AppServerProcessManager;
1033
+ });
1034
+ afterEach(() => {
1035
+ vi.useRealTimers();
1036
+ });
1037
+ async function startManager() {
1038
+ const manager = new AppServerProcessManager({ cwd: '/tmp' });
1039
+ const startPromise = manager.start();
1040
+ await vi.waitFor(() => {
1041
+ expect(mockProc.stdin.write).toHaveBeenCalled();
1042
+ });
1043
+ emitLine({ id: 1, result: {} });
1044
+ // Respond to model/list request (called by start() after handshake)
1045
+ await vi.waitFor(() => {
1046
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
1047
+ });
1048
+ emitLine({ id: 2, result: { models: [] } });
1049
+ await startPromise;
1050
+ return manager;
1051
+ }
1052
+ it('shutdown() is idempotent — calling twice does not spawn additional shutdown logic', async () => {
1053
+ const manager = await startManager();
1054
+ // Make process exit immediately when SIGTERM is received
1055
+ mockProc.kill = vi.fn((signal) => {
1056
+ if (signal === 'SIGTERM') {
1057
+ setTimeout(() => mockProc.emit('exit', 0, 'SIGTERM'), 0);
1058
+ }
1059
+ });
1060
+ const promise1 = manager.shutdown();
1061
+ const promise2 = manager.shutdown();
1062
+ await Promise.all([promise1, promise2]);
1063
+ // SIGTERM should only have been called once (not twice)
1064
+ const sigtermCalls = mockProc.kill.mock.calls
1065
+ .filter((c) => c[0] === 'SIGTERM');
1066
+ expect(sigtermCalls).toHaveLength(1);
1067
+ });
1068
+ it('graceful SIGTERM is sent first on shutdown', async () => {
1069
+ const manager = await startManager();
1070
+ // Make process exit immediately when SIGTERM is received
1071
+ mockProc.kill = vi.fn((signal) => {
1072
+ if (signal === 'SIGTERM') {
1073
+ setTimeout(() => mockProc.emit('exit', 0, 'SIGTERM'), 0);
1074
+ }
1075
+ });
1076
+ await manager.shutdown();
1077
+ expect(mockProc.kill).toHaveBeenCalledWith('SIGTERM');
1078
+ });
1079
+ it('force SIGKILL fires after 5s if process does not exit after SIGTERM', async () => {
1080
+ vi.useFakeTimers();
1081
+ const manager = await startManager();
1082
+ // Process ignores SIGTERM (doesn't exit)
1083
+ mockProc.kill = vi.fn((signal) => {
1084
+ if (signal === 'SIGKILL') {
1085
+ mockProc.killed = true;
1086
+ // SIGKILL triggers exit
1087
+ mockProc.emit('exit', null, 'SIGKILL');
1088
+ }
1089
+ // SIGTERM is ignored — no exit emitted
1090
+ });
1091
+ const shutdownPromise = manager.shutdown();
1092
+ // At this point SIGTERM has been called but process hasn't exited
1093
+ expect(mockProc.kill).toHaveBeenCalledWith('SIGTERM');
1094
+ expect(mockProc.kill).not.toHaveBeenCalledWith('SIGKILL');
1095
+ // Advance time by 5 seconds to trigger SIGKILL
1096
+ vi.advanceTimersByTime(5000);
1097
+ await shutdownPromise;
1098
+ expect(mockProc.kill).toHaveBeenCalledWith('SIGKILL');
1099
+ });
1100
+ it('pending requests are rejected with shutting down error on shutdown', async () => {
1101
+ const manager = await startManager();
1102
+ // Send a request but don't respond
1103
+ const reqPromise = manager.request('thread/start', {});
1104
+ // Attach a catch handler immediately to prevent unhandled rejection
1105
+ const rejectionPromise = reqPromise.catch((err) => err);
1106
+ await vi.waitFor(() => {
1107
+ // 4 writes: initialize, initialized, model/list, thread/start
1108
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(4);
1109
+ });
1110
+ // Make process exit on SIGTERM
1111
+ mockProc.kill = vi.fn((signal) => {
1112
+ if (signal === 'SIGTERM') {
1113
+ setTimeout(() => mockProc.emit('exit', 0, 'SIGTERM'), 0);
1114
+ }
1115
+ });
1116
+ await manager.shutdown();
1117
+ const error = await rejectionPromise;
1118
+ expect(error).toBeInstanceOf(Error);
1119
+ expect(error.message).toMatch(/shutting down/);
1120
+ });
1121
+ it('after shutdown, isHealthy() is false, pid is undefined, thread listeners cleared', async () => {
1122
+ const manager = await startManager();
1123
+ // Subscribe a thread before shutdown
1124
+ manager.subscribeThread('thr_1', () => { });
1125
+ // Make process exit on SIGTERM
1126
+ mockProc.kill = vi.fn((signal) => {
1127
+ if (signal === 'SIGTERM') {
1128
+ setTimeout(() => mockProc.emit('exit', 0, 'SIGTERM'), 0);
1129
+ }
1130
+ });
1131
+ await manager.shutdown();
1132
+ expect(manager.isHealthy()).toBe(false);
1133
+ expect(manager.pid).toBeUndefined();
1134
+ // Thread listeners should be cleared
1135
+ expect(manager.threadListeners.size).toBe(0);
1136
+ expect(manager.globalListeners.size).toBe(0);
1137
+ });
1138
+ });
1139
+ // ---------------------------------------------------------------------------
1140
+ // Provider Class (CodexAppServerProvider)
1141
+ // ---------------------------------------------------------------------------
1142
+ describe('CodexAppServerProvider — spawn/resume/shutdown', () => {
1143
+ let CodexAppServerProvider;
1144
+ beforeEach(async () => {
1145
+ vi.clearAllMocks();
1146
+ mockProc = createMockChildProcess();
1147
+ const mod = await import('./codex-app-server-provider.js');
1148
+ CodexAppServerProvider = mod.CodexAppServerProvider;
1149
+ });
1150
+ function makeConfig(overrides = {}) {
1151
+ return {
1152
+ prompt: 'Test prompt',
1153
+ cwd: '/project',
1154
+ env: {},
1155
+ abortController: new AbortController(),
1156
+ autonomous: true,
1157
+ sandboxEnabled: false,
1158
+ ...overrides,
1159
+ };
1160
+ }
1161
+ it('spawn() creates a handle with stream and sessionId', () => {
1162
+ const provider = new CodexAppServerProvider();
1163
+ const handle = provider.spawn(makeConfig());
1164
+ expect(handle).toBeDefined();
1165
+ expect(handle.sessionId).toBeNull(); // Not set until stream is consumed
1166
+ expect(handle.stream).toBeDefined();
1167
+ });
1168
+ it('resume() creates a handle with the given sessionId passed through', () => {
1169
+ const provider = new CodexAppServerProvider();
1170
+ const handle = provider.resume('thr_existing', makeConfig());
1171
+ expect(handle).toBeDefined();
1172
+ expect(handle.stream).toBeDefined();
1173
+ // The resumeThreadId is stored internally and used when the stream starts
1174
+ });
1175
+ it('singleton process manager — multiple spawn() calls reuse the same processManager when healthy', async () => {
1176
+ const provider = new CodexAppServerProvider();
1177
+ const handle1 = provider.spawn(makeConfig());
1178
+ const handle2 = provider.spawn(makeConfig());
1179
+ // Both handles should reference the same internal processManager
1180
+ // We can check by examining that only one processManager was created
1181
+ expect(provider.processManager).toBeDefined();
1182
+ // The processManager should be the same instance (not healthy yet, but same object)
1183
+ const pm1 = provider.processManager;
1184
+ expect(pm1).toBeDefined();
1185
+ // Spawn again — should get the same processManager since it was just created
1186
+ // (it's not healthy yet, so getOrCreateProcessManager creates a new one)
1187
+ // Actually: getOrCreateProcessManager checks isHealthy(), which returns false before start()
1188
+ // So each spawn will create a new PM if the previous one isn't healthy.
1189
+ // Let's verify this behavior:
1190
+ const pm2 = provider.processManager;
1191
+ // Since isHealthy() was false for pm1, pm2 is a new instance
1192
+ expect(pm2).toBeDefined();
1193
+ });
1194
+ it('process manager recreation — if isHealthy() returns false, creates a new process manager', () => {
1195
+ const provider = new CodexAppServerProvider();
1196
+ // First spawn creates a PM
1197
+ provider.spawn(makeConfig());
1198
+ const pm1 = provider.processManager;
1199
+ // PM is not healthy (not started), so next spawn should create a new one
1200
+ expect(pm1.isHealthy()).toBe(false);
1201
+ provider.spawn(makeConfig());
1202
+ const pm2 = provider.processManager;
1203
+ expect(pm2).not.toBe(pm1);
1204
+ });
1205
+ it('singleton process manager — reuses PM when healthy', async () => {
1206
+ const provider = new CodexAppServerProvider();
1207
+ // Manually make the PM healthy by starting it
1208
+ provider.spawn(makeConfig());
1209
+ const pm = provider.processManager;
1210
+ // Start the PM so it becomes healthy
1211
+ const startPromise = pm.start();
1212
+ await vi.waitFor(() => {
1213
+ expect(mockProc.stdin.write).toHaveBeenCalled();
1214
+ });
1215
+ emitLine({ id: 1, result: {} });
1216
+ await vi.waitFor(() => {
1217
+ expect(mockProc.stdin.write).toHaveBeenCalledTimes(3);
1218
+ });
1219
+ emitLine({ id: 2, result: { models: [] } });
1220
+ await startPromise;
1221
+ expect(pm.isHealthy()).toBe(true);
1222
+ // Now spawn again — should reuse the same PM
1223
+ provider.spawn(makeConfig());
1224
+ const pm2 = provider.processManager;
1225
+ expect(pm2).toBe(pm);
1226
+ });
1227
+ it('shutdown() delegates to processManager.shutdown() and nullifies', async () => {
1228
+ const provider = new CodexAppServerProvider();
1229
+ provider.spawn(makeConfig());
1230
+ const pm = provider.processManager;
1231
+ const shutdownSpy = vi.spyOn(pm, 'shutdown').mockResolvedValue(undefined);
1232
+ await provider.shutdown();
1233
+ expect(shutdownSpy).toHaveBeenCalledTimes(1);
1234
+ expect(provider.processManager).toBeNull();
1235
+ });
1236
+ it('shutdown() is safe to call when no process manager exists', async () => {
1237
+ const provider = new CodexAppServerProvider();
1238
+ // No spawn() called, so no processManager
1239
+ await expect(provider.shutdown()).resolves.toBeUndefined();
1240
+ });
1241
+ });
1242
+ // ---------------------------------------------------------------------------
1243
+ // AppServerProcessManager — MCP server configuration (SUP-1733)
1244
+ // ---------------------------------------------------------------------------
1245
+ describe('AppServerProcessManager — MCP server configuration', () => {
549
1246
  it('configureMcpServers is a no-op when not initialized', async () => {
550
1247
  const { AppServerProcessManager } = await import('./codex-app-server-provider.js');
551
1248
  const manager = new AppServerProcessManager({ cwd: '/tmp' });
@@ -587,3 +1284,134 @@ describe('normalizeMcpToolName', () => {
587
1284
  .toBe('mcp:unknown/unknown');
588
1285
  });
589
1286
  });
1287
+ // ---------------------------------------------------------------------------
1288
+ // resolveSandboxPolicy
1289
+ // ---------------------------------------------------------------------------
1290
+ describe('resolveSandboxPolicy', () => {
1291
+ it('returns readOnly policy with network access for read-only level', () => {
1292
+ const config = { sandboxLevel: 'read-only', sandboxEnabled: false, cwd: '/work' };
1293
+ expect(resolveSandboxPolicy(config)).toEqual({ type: 'readOnly', networkAccess: true });
1294
+ });
1295
+ it('returns workspaceWrite policy with writableRoots and network for workspace-write level', () => {
1296
+ const config = { sandboxLevel: 'workspace-write', sandboxEnabled: false, cwd: '/work' };
1297
+ expect(resolveSandboxPolicy(config)).toEqual({ type: 'workspaceWrite', writableRoots: ['/work'], networkAccess: true });
1298
+ });
1299
+ it('returns dangerFullAccess policy for full-access level', () => {
1300
+ const config = { sandboxLevel: 'full-access', sandboxEnabled: false, cwd: '/work' };
1301
+ expect(resolveSandboxPolicy(config)).toEqual({ type: 'dangerFullAccess' });
1302
+ });
1303
+ it('falls back to sandboxEnabled boolean when no level set', () => {
1304
+ const config = { sandboxEnabled: true, cwd: '/work' };
1305
+ expect(resolveSandboxPolicy(config)).toEqual({ type: 'workspaceWrite', writableRoots: ['/work'], networkAccess: true });
1306
+ });
1307
+ it('returns undefined when sandbox disabled and no level set', () => {
1308
+ const config = { sandboxEnabled: false, cwd: '/work' };
1309
+ expect(resolveSandboxPolicy(config)).toBeUndefined();
1310
+ });
1311
+ it('sandboxLevel takes precedence over sandboxEnabled boolean', () => {
1312
+ const config = { sandboxLevel: 'read-only', sandboxEnabled: true, cwd: '/work' };
1313
+ expect(resolveSandboxPolicy(config)).toEqual({ type: 'readOnly', networkAccess: true });
1314
+ });
1315
+ });
1316
+ // ---------------------------------------------------------------------------
1317
+ // resolveCodexModel (SUP-1749)
1318
+ // ---------------------------------------------------------------------------
1319
+ describe('resolveCodexModel', () => {
1320
+ it('returns explicit model when set', () => {
1321
+ const config = { model: 'custom-model', env: {} };
1322
+ expect(resolveCodexModel(config)).toBe('custom-model');
1323
+ });
1324
+ it('maps opus tier to gpt-5-codex', () => {
1325
+ const config = { env: { CODEX_MODEL_TIER: 'opus' } };
1326
+ expect(resolveCodexModel(config)).toBe('gpt-5-codex');
1327
+ });
1328
+ it('maps sonnet tier to gpt-5.2-codex', () => {
1329
+ const config = { env: { CODEX_MODEL_TIER: 'sonnet' } };
1330
+ expect(resolveCodexModel(config)).toBe('gpt-5.2-codex');
1331
+ });
1332
+ it('maps haiku tier to gpt-5.3-codex', () => {
1333
+ const config = { env: { CODEX_MODEL_TIER: 'haiku' } };
1334
+ expect(resolveCodexModel(config)).toBe('gpt-5.3-codex');
1335
+ });
1336
+ it('uses CODEX_MODEL env var as fallback', () => {
1337
+ const config = { env: { CODEX_MODEL: 'gpt-5.5-codex' } };
1338
+ expect(resolveCodexModel(config)).toBe('gpt-5.5-codex');
1339
+ });
1340
+ it('returns default model when nothing configured', () => {
1341
+ const config = { env: {} };
1342
+ expect(resolveCodexModel(config)).toBe('gpt-5-codex');
1343
+ });
1344
+ it('explicit model takes precedence over tier', () => {
1345
+ const config = { model: 'my-model', env: { CODEX_MODEL_TIER: 'opus' } };
1346
+ expect(resolveCodexModel(config)).toBe('my-model');
1347
+ });
1348
+ it('tier takes precedence over CODEX_MODEL env', () => {
1349
+ const config = { env: { CODEX_MODEL_TIER: 'haiku', CODEX_MODEL: 'custom' } };
1350
+ expect(resolveCodexModel(config)).toBe('gpt-5.3-codex');
1351
+ });
1352
+ });
1353
+ // ---------------------------------------------------------------------------
1354
+ // calculateCostUsd (SUP-1750)
1355
+ // ---------------------------------------------------------------------------
1356
+ describe('calculateCostUsd', () => {
1357
+ it('calculates cost with default pricing (gpt-5-codex)', () => {
1358
+ // 1000 input (800 fresh + 200 cached), 500 output
1359
+ const cost = calculateCostUsd(1000, 200, 500);
1360
+ // fresh: 800/1M * 2.00 = 0.0016
1361
+ // cached: 200/1M * 0.50 = 0.0001
1362
+ // output: 500/1M * 8.00 = 0.004
1363
+ expect(cost).toBeCloseTo(0.0057, 6);
1364
+ });
1365
+ it('calculates cost with specific model pricing', () => {
1366
+ const cost = calculateCostUsd(1000, 200, 500, 'gpt-5.2-codex');
1367
+ // fresh: 800/1M * 1.00 = 0.0008
1368
+ // cached: 200/1M * 0.25 = 0.00005
1369
+ // output: 500/1M * 4.00 = 0.002
1370
+ expect(cost).toBeCloseTo(0.00285, 6);
1371
+ });
1372
+ it('calculates cost with haiku model pricing', () => {
1373
+ const cost = calculateCostUsd(1000, 200, 500, 'gpt-5.3-codex');
1374
+ // fresh: 800/1M * 0.50 = 0.0004
1375
+ // cached: 200/1M * 0.125 = 0.000025
1376
+ // output: 500/1M * 2.00 = 0.001
1377
+ expect(cost).toBeCloseTo(0.001425, 6);
1378
+ });
1379
+ it('uses default pricing for unknown models', () => {
1380
+ const cost = calculateCostUsd(1000, 0, 500, 'unknown-model');
1381
+ // fresh: 1000/1M * 2.00 = 0.002
1382
+ // output: 500/1M * 8.00 = 0.004
1383
+ expect(cost).toBeCloseTo(0.006, 6);
1384
+ });
1385
+ it('handles zero tokens', () => {
1386
+ expect(calculateCostUsd(0, 0, 0)).toBe(0);
1387
+ });
1388
+ it('handles all cached tokens (fresh = 0)', () => {
1389
+ const cost = calculateCostUsd(1000, 1000, 0);
1390
+ // fresh: 0
1391
+ // cached: 1000/1M * 0.50 = 0.0005
1392
+ expect(cost).toBeCloseTo(0.0005, 6);
1393
+ });
1394
+ it('clamps fresh tokens to zero when cached exceeds input', () => {
1395
+ const cost = calculateCostUsd(100, 200, 0);
1396
+ // fresh: max(0, 100 - 200) = 0
1397
+ // cached: 200/1M * 0.50 = 0.0001
1398
+ expect(cost).toBeCloseTo(0.0001, 6);
1399
+ });
1400
+ });
1401
+ // ---------------------------------------------------------------------------
1402
+ // CODEX_MODEL_MAP and CODEX_DEFAULT_MODEL (SUP-1749)
1403
+ // ---------------------------------------------------------------------------
1404
+ describe('CODEX_MODEL_MAP', () => {
1405
+ it('maps all three tiers', () => {
1406
+ expect(CODEX_MODEL_MAP).toEqual({
1407
+ opus: 'gpt-5-codex',
1408
+ sonnet: 'gpt-5.2-codex',
1409
+ haiku: 'gpt-5.3-codex',
1410
+ });
1411
+ });
1412
+ });
1413
+ describe('CODEX_DEFAULT_MODEL', () => {
1414
+ it('is gpt-5-codex', () => {
1415
+ expect(CODEX_DEFAULT_MODEL).toBe('gpt-5-codex');
1416
+ });
1417
+ });