onbuzz 4.8.2 → 4.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "onbuzz",
3
- "version": "4.8.2",
3
+ "version": "4.8.3",
4
4
  "description": "Loxia OnBuzz - Your AI Fleet",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -19,11 +19,11 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
19
19
  };
20
20
 
21
21
  const mockModelsService = {
22
- getAvailableModelNames: jest.fn().mockReturnValue(['gpt-5.1-codex-mini', 'gpt-5-mini']),
22
+ getAvailableModelNames: jest.fn().mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']),
23
23
  getModels: jest.fn().mockReturnValue([
24
- { name: 'gpt-5.1-codex-mini', type: 'chat', contextWindow: 400000 },
25
- { name: 'gpt-5-mini', type: 'chat', contextWindow: 400000 },
26
- { name: 'gpt-5-nano', type: 'chat', contextWindow: 400000 },
24
+ { name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
25
+ { name: 'gpt-4.1-mini', type: 'chat', contextWindow: 400000 },
26
+ { name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
27
27
  { name: 'random-model-xyz', type: 'chat', contextWindow: 200000 }
28
28
  ])
29
29
  };
@@ -53,12 +53,12 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
53
53
  const onRetryAttempt = jest.fn();
54
54
 
55
55
  // Only 2 validated models available; first fails, second succeeds
56
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini', 'gpt-5-mini']);
56
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']);
57
57
  mockAiService.sendMessage
58
58
  .mockRejectedValueOnce(new Error('Service unavailable'))
59
59
  .mockResolvedValueOnce({ content: 'Summary of conversation' });
60
60
 
61
- await service._generateSummary(testMessages, 'gpt-5.1-codex-mini', { onRetryAttempt });
61
+ await service._generateSummary(testMessages, 'gpt-4.1-nano', { onRetryAttempt });
62
62
 
63
63
  expect(onRetryAttempt).toHaveBeenCalledTimes(1);
64
64
  });
@@ -66,18 +66,18 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
66
66
  test('onRetryAttempt receives correct message, failedModel, nextModel, attempt', async () => {
67
67
  const onRetryAttempt = jest.fn();
68
68
 
69
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini', 'gpt-5-mini']);
69
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']);
70
70
  mockAiService.sendMessage
71
71
  .mockRejectedValueOnce(new Error('Service unavailable'))
72
72
  .mockResolvedValueOnce({ content: 'Summary of conversation' });
73
73
 
74
- await service._generateSummary(testMessages, 'gpt-5.1-codex-mini', { onRetryAttempt });
74
+ await service._generateSummary(testMessages, 'gpt-4.1-nano', { onRetryAttempt });
75
75
 
76
76
  expect(onRetryAttempt).toHaveBeenCalledWith(
77
77
  expect.objectContaining({
78
78
  type: 'compaction_retry',
79
- failedModel: 'gpt-5.1-codex-mini',
80
- nextModel: 'gpt-5-mini',
79
+ failedModel: 'gpt-4.1-nano',
80
+ nextModel: 'gpt-4.1-mini',
81
81
  attempt: 1
82
82
  })
83
83
  );
@@ -87,14 +87,14 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
87
87
  const onRetryAttempt = jest.fn();
88
88
 
89
89
  // Only 1 validated model, and no suitable random models
90
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini']);
90
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
91
91
  mockModelsService.getModels.mockReturnValue([
92
- { name: 'gpt-5.1-codex-mini', type: 'chat', contextWindow: 400000 }
92
+ { name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 }
93
93
  ]);
94
94
  mockAiService.sendMessage.mockRejectedValue(new Error('Service unavailable'));
95
95
 
96
96
  await expect(
97
- service._generateSummary(testMessages, 'gpt-5.1-codex-mini', { onRetryAttempt })
97
+ service._generateSummary(testMessages, 'gpt-4.1-nano', { onRetryAttempt })
98
98
  ).rejects.toThrow();
99
99
 
100
100
  // The only call to onRetryAttempt would be from the last-resort block, but
@@ -108,16 +108,16 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
108
108
 
109
109
  describe('Random model fallback', () => {
110
110
  test('after all recommended models fail, tries a random model from modelsService', async () => {
111
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini']);
111
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
112
112
  mockModelsService.getModels.mockReturnValue([
113
- { name: 'gpt-5.1-codex-mini', type: 'chat', contextWindow: 400000 },
113
+ { name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
114
114
  { name: 'random-model-xyz', type: 'chat', contextWindow: 200000 }
115
115
  ]);
116
116
  mockAiService.sendMessage
117
117
  .mockRejectedValueOnce(new Error('Service unavailable')) // recommended model fails
118
118
  .mockResolvedValueOnce({ content: 'Last-resort summary' }); // random model succeeds
119
119
 
120
- const result = await service._generateSummary(testMessages, 'gpt-5.1-codex-mini', {});
120
+ const result = await service._generateSummary(testMessages, 'gpt-4.1-nano', {});
121
121
 
122
122
  // Should have been called twice: once for recommended, once for random
123
123
  expect(mockAiService.sendMessage).toHaveBeenCalledTimes(2);
@@ -125,16 +125,16 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
125
125
  });
126
126
 
127
127
  test('random model success returns valid summary and does not throw', async () => {
128
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini']);
128
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
129
129
  mockModelsService.getModels.mockReturnValue([
130
- { name: 'gpt-5.1-codex-mini', type: 'chat', contextWindow: 400000 },
130
+ { name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
131
131
  { name: 'fallback-model', type: 'chat', contextWindow: 200000 }
132
132
  ]);
133
133
  mockAiService.sendMessage
134
134
  .mockRejectedValueOnce(new Error('Service unavailable'))
135
135
  .mockResolvedValueOnce({ content: 'Fallback summary content' });
136
136
 
137
- const result = await service._generateSummary(testMessages, 'gpt-5.1-codex-mini', {});
137
+ const result = await service._generateSummary(testMessages, 'gpt-4.1-nano', {});
138
138
 
139
139
  expect(result.role).toBe('system');
140
140
  expect(result.type).toBe('summary');
@@ -142,44 +142,44 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
142
142
  });
143
143
 
144
144
  test('random model failure still throws ALL_MODELS_EXHAUSTED', async () => {
145
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini']);
145
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
146
146
  mockAiService.sendMessage.mockRejectedValue(new Error('Everything is broken'));
147
147
 
148
148
  await expect(
149
- service._generateSummary(testMessages, 'gpt-5.1-codex-mini', {})
149
+ service._generateSummary(testMessages, 'gpt-4.1-nano', {})
150
150
  ).rejects.toThrow('ALL_MODELS_EXHAUSTED');
151
151
  });
152
152
 
153
153
  test('random model is NOT one already attempted (filtered out)', async () => {
154
- // Only gpt-5.1-codex-mini is validated; random pool has others
155
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini']);
154
+ // Only gpt-4.1-nano is validated; random pool has others
155
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
156
156
  mockModelsService.getModels.mockReturnValue([
157
- { name: 'gpt-5.1-codex-mini', type: 'chat', contextWindow: 400000 },
157
+ { name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
158
158
  { name: 'random-model-xyz', type: 'chat', contextWindow: 200000 }
159
159
  ]);
160
160
  mockAiService.sendMessage
161
- .mockRejectedValueOnce(new Error('fail')) // gpt-5.1-codex-mini fails
161
+ .mockRejectedValueOnce(new Error('fail')) // gpt-4.1-nano fails
162
162
  .mockResolvedValueOnce({ content: 'Random success' }); // random-model-xyz succeeds
163
163
 
164
- const result = await service._generateSummary(testMessages, 'gpt-5.1-codex-mini', {});
164
+ const result = await service._generateSummary(testMessages, 'gpt-4.1-nano', {});
165
165
 
166
166
  // Second call should be the random model, not the already-attempted one
167
167
  const secondCallModel = mockAiService.sendMessage.mock.calls[1][0];
168
- expect(secondCallModel).not.toBe('gpt-5.1-codex-mini');
169
- expect(result.metadata.compactionModel).not.toBe('gpt-5.1-codex-mini');
168
+ expect(secondCallModel).not.toBe('gpt-4.1-nano');
169
+ expect(result.metadata.compactionModel).not.toBe('gpt-4.1-nano');
170
170
  });
171
171
 
172
172
  test('random model must have sufficient context window', async () => {
173
173
  // All models except the recommended one have tiny context windows
174
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini']);
174
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
175
175
  mockModelsService.getModels.mockReturnValue([
176
- { name: 'gpt-5.1-codex-mini', type: 'chat', contextWindow: 400000 },
176
+ { name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
177
177
  { name: 'tiny-model', type: 'chat', contextWindow: 100 } // too small
178
178
  ]);
179
179
  mockAiService.sendMessage.mockRejectedValue(new Error('fail'));
180
180
 
181
181
  await expect(
182
- service._generateSummary(testMessages, 'gpt-5.1-codex-mini', {})
182
+ service._generateSummary(testMessages, 'gpt-4.1-nano', {})
183
183
  ).rejects.toThrow('ALL_MODELS_EXHAUSTED');
184
184
 
185
185
  // Should only have tried the recommended model, not the tiny one
@@ -193,11 +193,11 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
193
193
  test('onAllModelsExhausted is called only after ALL models (including random) fail', async () => {
194
194
  const onAllModelsExhausted = jest.fn();
195
195
 
196
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini']);
196
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
197
197
  mockAiService.sendMessage.mockRejectedValue(new Error('fail'));
198
198
 
199
199
  await expect(
200
- service._generateSummary(testMessages, 'gpt-5.1-codex-mini', { onAllModelsExhausted })
200
+ service._generateSummary(testMessages, 'gpt-4.1-nano', { onAllModelsExhausted })
201
201
  ).rejects.toThrow('ALL_MODELS_EXHAUSTED');
202
202
 
203
203
  expect(onAllModelsExhausted).toHaveBeenCalledTimes(1);
@@ -211,16 +211,16 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
211
211
  test('onAllModelsExhausted includes all attempted model names', async () => {
212
212
  const onAllModelsExhausted = jest.fn();
213
213
 
214
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini', 'gpt-5-mini']);
214
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']);
215
215
  mockAiService.sendMessage.mockRejectedValue(new Error('fail'));
216
216
 
217
217
  await expect(
218
- service._generateSummary(testMessages, 'gpt-5.1-codex-mini', { onAllModelsExhausted })
218
+ service._generateSummary(testMessages, 'gpt-4.1-nano', { onAllModelsExhausted })
219
219
  ).rejects.toThrow('ALL_MODELS_EXHAUSTED');
220
220
 
221
221
  const callArg = onAllModelsExhausted.mock.calls[0][0];
222
- expect(callArg.models).toContain('gpt-5.1-codex-mini');
223
- expect(callArg.models).toContain('gpt-5-mini');
222
+ expect(callArg.models).toContain('gpt-4.1-nano');
223
+ expect(callArg.models).toContain('gpt-4.1-mini');
224
224
  // Should also include at least one random model that was attempted
225
225
  expect(callArg.models.length).toBeGreaterThanOrEqual(2);
226
226
  });
@@ -233,10 +233,10 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
233
233
  const onRetryAttempt = jest.fn();
234
234
  const onAllModelsExhausted = jest.fn();
235
235
 
236
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini', 'gpt-5-mini']);
236
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']);
237
237
  mockAiService.sendMessage.mockResolvedValueOnce({ content: 'Great summary here' });
238
238
 
239
- const result = await service._generateSummary(testMessages, 'gpt-5.1-codex-mini', {
239
+ const result = await service._generateSummary(testMessages, 'gpt-4.1-nano', {
240
240
  onRetryAttempt,
241
241
  onAllModelsExhausted
242
242
  });
@@ -253,12 +253,12 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
253
253
  const onRetryAttempt = jest.fn();
254
254
  const onAllModelsExhausted = jest.fn();
255
255
 
256
- mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-5.1-codex-mini', 'gpt-5-mini']);
256
+ mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']);
257
257
  mockAiService.sendMessage
258
258
  .mockRejectedValueOnce(new Error('429 rate limit'))
259
259
  .mockResolvedValueOnce({ content: 'Second model summary' });
260
260
 
261
- const result = await service._generateSummary(testMessages, 'gpt-5.1-codex-mini', {
261
+ const result = await service._generateSummary(testMessages, 'gpt-4.1-nano', {
262
262
  onRetryAttempt,
263
263
  onAllModelsExhausted
264
264
  });
@@ -266,7 +266,7 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
266
266
  expect(onRetryAttempt).toHaveBeenCalledTimes(1);
267
267
  expect(onAllModelsExhausted).not.toHaveBeenCalled();
268
268
  expect(result.content).toContain('Second model summary');
269
- expect(result.metadata.compactionModel).toBe('gpt-5-mini');
269
+ expect(result.metadata.compactionModel).toBe('gpt-4.1-mini');
270
270
  });
271
271
  });
272
272
  });
@@ -656,22 +656,30 @@ const COMPACTION_CONFIG = {
656
656
  MIN_MIDDLE_SEGMENT_PERCENTAGE: 0.50, // Middle must be at least 50% of messages
657
657
  MAX_BOOKEND_PERCENTAGE: 0.50, // Beginning + end together capped at 50%
658
658
 
659
- // Recommended model pool for compaction (validated against live model catalog at runtime)
660
- // Names MUST match catalog keys exactly (no azure-openai- prefix)
661
- // Ordered by context window size (largest first) to handle very large conversations
659
+ // Recommended model pool for compaction (validated against live model catalog at runtime).
660
+ // Names MUST match catalog keys exactly (no azure-openai- prefix).
661
+ //
662
+ // Compaction is a structured transcription job, not a reasoning task — the
663
+ // P7 prompt's three passes (USER VOICE / EVENT LOG / STATE NARRATIVE)
664
+ // produce ~1.5K output tokens of well-shaped text. Using reasoning models
665
+ // for this burns reasoning-token cost on a task that doesn't need it AND
666
+ // adds significant TTFT/total latency per pass. The previous defaults
667
+ // (gpt-5.1-codex-mini, gpt-5-mini, gpt-5-nano, o4-mini — all reasoning)
668
+ // were chosen for their 400K context windows, but the gpt-4.1 family
669
+ // offers 1M-token context AND is non-reasoning AND ~30× cheaper than the
670
+ // previous primary.
671
+ //
672
+ // Both entries below have 1M context, so the runtime model-filter never
673
+ // has to fall through to a smaller-context model for size reasons.
662
674
  COMPACTION_MODELS: [
663
- 'gpt-5.1-codex-mini', // 400K context - best for large conversations
664
- 'gpt-5-mini', // 400K context
665
- 'gpt-5-nano', // 400K context - lightweight
666
- 'o4-mini' // 128K context - reasoning model
675
+ 'gpt-4.1-nano', // 1M context, non-reasoning, cheapest primary
676
+ 'gpt-4.1-mini', // 1M context, non-reasoning — fallback
667
677
  ],
668
678
 
669
679
  // Context windows for recommended compaction models (fallback if modelsService unavailable)
670
680
  MODEL_CONTEXT_WINDOWS: {
671
- 'gpt-5.1-codex-mini': 400000,
672
- 'gpt-5-mini': 400000,
673
- 'gpt-5-nano': 400000,
674
- 'o4-mini': 128000
681
+ 'gpt-4.1-nano': 1048576,
682
+ 'gpt-4.1-mini': 1048576,
675
683
  },
676
684
 
677
685
  // Token limits