onbuzz 4.8.2 → 4.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -19,11 +19,11 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
19
19
|
};
|
|
20
20
|
|
|
21
21
|
const mockModelsService = {
|
|
22
|
-
getAvailableModelNames: jest.fn().mockReturnValue(['gpt-
|
|
22
|
+
getAvailableModelNames: jest.fn().mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']),
|
|
23
23
|
getModels: jest.fn().mockReturnValue([
|
|
24
|
-
{ name: 'gpt-
|
|
25
|
-
{ name: 'gpt-
|
|
26
|
-
{ name: 'gpt-
|
|
24
|
+
{ name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
|
|
25
|
+
{ name: 'gpt-4.1-mini', type: 'chat', contextWindow: 400000 },
|
|
26
|
+
{ name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
|
|
27
27
|
{ name: 'random-model-xyz', type: 'chat', contextWindow: 200000 }
|
|
28
28
|
])
|
|
29
29
|
};
|
|
@@ -53,12 +53,12 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
53
53
|
const onRetryAttempt = jest.fn();
|
|
54
54
|
|
|
55
55
|
// Only 2 validated models available; first fails, second succeeds
|
|
56
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
56
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']);
|
|
57
57
|
mockAiService.sendMessage
|
|
58
58
|
.mockRejectedValueOnce(new Error('Service unavailable'))
|
|
59
59
|
.mockResolvedValueOnce({ content: 'Summary of conversation' });
|
|
60
60
|
|
|
61
|
-
await service._generateSummary(testMessages, 'gpt-
|
|
61
|
+
await service._generateSummary(testMessages, 'gpt-4.1-nano', { onRetryAttempt });
|
|
62
62
|
|
|
63
63
|
expect(onRetryAttempt).toHaveBeenCalledTimes(1);
|
|
64
64
|
});
|
|
@@ -66,18 +66,18 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
66
66
|
test('onRetryAttempt receives correct message, failedModel, nextModel, attempt', async () => {
|
|
67
67
|
const onRetryAttempt = jest.fn();
|
|
68
68
|
|
|
69
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
69
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']);
|
|
70
70
|
mockAiService.sendMessage
|
|
71
71
|
.mockRejectedValueOnce(new Error('Service unavailable'))
|
|
72
72
|
.mockResolvedValueOnce({ content: 'Summary of conversation' });
|
|
73
73
|
|
|
74
|
-
await service._generateSummary(testMessages, 'gpt-
|
|
74
|
+
await service._generateSummary(testMessages, 'gpt-4.1-nano', { onRetryAttempt });
|
|
75
75
|
|
|
76
76
|
expect(onRetryAttempt).toHaveBeenCalledWith(
|
|
77
77
|
expect.objectContaining({
|
|
78
78
|
type: 'compaction_retry',
|
|
79
|
-
failedModel: 'gpt-
|
|
80
|
-
nextModel: 'gpt-
|
|
79
|
+
failedModel: 'gpt-4.1-nano',
|
|
80
|
+
nextModel: 'gpt-4.1-mini',
|
|
81
81
|
attempt: 1
|
|
82
82
|
})
|
|
83
83
|
);
|
|
@@ -87,14 +87,14 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
87
87
|
const onRetryAttempt = jest.fn();
|
|
88
88
|
|
|
89
89
|
// Only 1 validated model, and no suitable random models
|
|
90
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
90
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
|
|
91
91
|
mockModelsService.getModels.mockReturnValue([
|
|
92
|
-
{ name: 'gpt-
|
|
92
|
+
{ name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 }
|
|
93
93
|
]);
|
|
94
94
|
mockAiService.sendMessage.mockRejectedValue(new Error('Service unavailable'));
|
|
95
95
|
|
|
96
96
|
await expect(
|
|
97
|
-
service._generateSummary(testMessages, 'gpt-
|
|
97
|
+
service._generateSummary(testMessages, 'gpt-4.1-nano', { onRetryAttempt })
|
|
98
98
|
).rejects.toThrow();
|
|
99
99
|
|
|
100
100
|
// The only call to onRetryAttempt would be from the last-resort block, but
|
|
@@ -108,16 +108,16 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
108
108
|
|
|
109
109
|
describe('Random model fallback', () => {
|
|
110
110
|
test('after all recommended models fail, tries a random model from modelsService', async () => {
|
|
111
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
111
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
|
|
112
112
|
mockModelsService.getModels.mockReturnValue([
|
|
113
|
-
{ name: 'gpt-
|
|
113
|
+
{ name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
|
|
114
114
|
{ name: 'random-model-xyz', type: 'chat', contextWindow: 200000 }
|
|
115
115
|
]);
|
|
116
116
|
mockAiService.sendMessage
|
|
117
117
|
.mockRejectedValueOnce(new Error('Service unavailable')) // recommended model fails
|
|
118
118
|
.mockResolvedValueOnce({ content: 'Last-resort summary' }); // random model succeeds
|
|
119
119
|
|
|
120
|
-
const result = await service._generateSummary(testMessages, 'gpt-
|
|
120
|
+
const result = await service._generateSummary(testMessages, 'gpt-4.1-nano', {});
|
|
121
121
|
|
|
122
122
|
// Should have been called twice: once for recommended, once for random
|
|
123
123
|
expect(mockAiService.sendMessage).toHaveBeenCalledTimes(2);
|
|
@@ -125,16 +125,16 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
125
125
|
});
|
|
126
126
|
|
|
127
127
|
test('random model success returns valid summary and does not throw', async () => {
|
|
128
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
128
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
|
|
129
129
|
mockModelsService.getModels.mockReturnValue([
|
|
130
|
-
{ name: 'gpt-
|
|
130
|
+
{ name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
|
|
131
131
|
{ name: 'fallback-model', type: 'chat', contextWindow: 200000 }
|
|
132
132
|
]);
|
|
133
133
|
mockAiService.sendMessage
|
|
134
134
|
.mockRejectedValueOnce(new Error('Service unavailable'))
|
|
135
135
|
.mockResolvedValueOnce({ content: 'Fallback summary content' });
|
|
136
136
|
|
|
137
|
-
const result = await service._generateSummary(testMessages, 'gpt-
|
|
137
|
+
const result = await service._generateSummary(testMessages, 'gpt-4.1-nano', {});
|
|
138
138
|
|
|
139
139
|
expect(result.role).toBe('system');
|
|
140
140
|
expect(result.type).toBe('summary');
|
|
@@ -142,44 +142,44 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
142
142
|
});
|
|
143
143
|
|
|
144
144
|
test('random model failure still throws ALL_MODELS_EXHAUSTED', async () => {
|
|
145
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
145
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
|
|
146
146
|
mockAiService.sendMessage.mockRejectedValue(new Error('Everything is broken'));
|
|
147
147
|
|
|
148
148
|
await expect(
|
|
149
|
-
service._generateSummary(testMessages, 'gpt-
|
|
149
|
+
service._generateSummary(testMessages, 'gpt-4.1-nano', {})
|
|
150
150
|
).rejects.toThrow('ALL_MODELS_EXHAUSTED');
|
|
151
151
|
});
|
|
152
152
|
|
|
153
153
|
test('random model is NOT one already attempted (filtered out)', async () => {
|
|
154
|
-
// Only gpt-
|
|
155
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
154
|
+
// Only gpt-4.1-nano is validated; random pool has others
|
|
155
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
|
|
156
156
|
mockModelsService.getModels.mockReturnValue([
|
|
157
|
-
{ name: 'gpt-
|
|
157
|
+
{ name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
|
|
158
158
|
{ name: 'random-model-xyz', type: 'chat', contextWindow: 200000 }
|
|
159
159
|
]);
|
|
160
160
|
mockAiService.sendMessage
|
|
161
|
-
.mockRejectedValueOnce(new Error('fail')) // gpt-
|
|
161
|
+
.mockRejectedValueOnce(new Error('fail')) // gpt-4.1-nano fails
|
|
162
162
|
.mockResolvedValueOnce({ content: 'Random success' }); // random-model-xyz succeeds
|
|
163
163
|
|
|
164
|
-
const result = await service._generateSummary(testMessages, 'gpt-
|
|
164
|
+
const result = await service._generateSummary(testMessages, 'gpt-4.1-nano', {});
|
|
165
165
|
|
|
166
166
|
// Second call should be the random model, not the already-attempted one
|
|
167
167
|
const secondCallModel = mockAiService.sendMessage.mock.calls[1][0];
|
|
168
|
-
expect(secondCallModel).not.toBe('gpt-
|
|
169
|
-
expect(result.metadata.compactionModel).not.toBe('gpt-
|
|
168
|
+
expect(secondCallModel).not.toBe('gpt-4.1-nano');
|
|
169
|
+
expect(result.metadata.compactionModel).not.toBe('gpt-4.1-nano');
|
|
170
170
|
});
|
|
171
171
|
|
|
172
172
|
test('random model must have sufficient context window', async () => {
|
|
173
173
|
// All models except the recommended one have tiny context windows
|
|
174
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
174
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
|
|
175
175
|
mockModelsService.getModels.mockReturnValue([
|
|
176
|
-
{ name: 'gpt-
|
|
176
|
+
{ name: 'gpt-4.1-nano', type: 'chat', contextWindow: 400000 },
|
|
177
177
|
{ name: 'tiny-model', type: 'chat', contextWindow: 100 } // too small
|
|
178
178
|
]);
|
|
179
179
|
mockAiService.sendMessage.mockRejectedValue(new Error('fail'));
|
|
180
180
|
|
|
181
181
|
await expect(
|
|
182
|
-
service._generateSummary(testMessages, 'gpt-
|
|
182
|
+
service._generateSummary(testMessages, 'gpt-4.1-nano', {})
|
|
183
183
|
).rejects.toThrow('ALL_MODELS_EXHAUSTED');
|
|
184
184
|
|
|
185
185
|
// Should only have tried the recommended model, not the tiny one
|
|
@@ -193,11 +193,11 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
193
193
|
test('onAllModelsExhausted is called only after ALL models (including random) fail', async () => {
|
|
194
194
|
const onAllModelsExhausted = jest.fn();
|
|
195
195
|
|
|
196
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
196
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano']);
|
|
197
197
|
mockAiService.sendMessage.mockRejectedValue(new Error('fail'));
|
|
198
198
|
|
|
199
199
|
await expect(
|
|
200
|
-
service._generateSummary(testMessages, 'gpt-
|
|
200
|
+
service._generateSummary(testMessages, 'gpt-4.1-nano', { onAllModelsExhausted })
|
|
201
201
|
).rejects.toThrow('ALL_MODELS_EXHAUSTED');
|
|
202
202
|
|
|
203
203
|
expect(onAllModelsExhausted).toHaveBeenCalledTimes(1);
|
|
@@ -211,16 +211,16 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
211
211
|
test('onAllModelsExhausted includes all attempted model names', async () => {
|
|
212
212
|
const onAllModelsExhausted = jest.fn();
|
|
213
213
|
|
|
214
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
214
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']);
|
|
215
215
|
mockAiService.sendMessage.mockRejectedValue(new Error('fail'));
|
|
216
216
|
|
|
217
217
|
await expect(
|
|
218
|
-
service._generateSummary(testMessages, 'gpt-
|
|
218
|
+
service._generateSummary(testMessages, 'gpt-4.1-nano', { onAllModelsExhausted })
|
|
219
219
|
).rejects.toThrow('ALL_MODELS_EXHAUSTED');
|
|
220
220
|
|
|
221
221
|
const callArg = onAllModelsExhausted.mock.calls[0][0];
|
|
222
|
-
expect(callArg.models).toContain('gpt-
|
|
223
|
-
expect(callArg.models).toContain('gpt-
|
|
222
|
+
expect(callArg.models).toContain('gpt-4.1-nano');
|
|
223
|
+
expect(callArg.models).toContain('gpt-4.1-mini');
|
|
224
224
|
// Should also include at least one random model that was attempted
|
|
225
225
|
expect(callArg.models.length).toBeGreaterThanOrEqual(2);
|
|
226
226
|
});
|
|
@@ -233,10 +233,10 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
233
233
|
const onRetryAttempt = jest.fn();
|
|
234
234
|
const onAllModelsExhausted = jest.fn();
|
|
235
235
|
|
|
236
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
236
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']);
|
|
237
237
|
mockAiService.sendMessage.mockResolvedValueOnce({ content: 'Great summary here' });
|
|
238
238
|
|
|
239
|
-
const result = await service._generateSummary(testMessages, 'gpt-
|
|
239
|
+
const result = await service._generateSummary(testMessages, 'gpt-4.1-nano', {
|
|
240
240
|
onRetryAttempt,
|
|
241
241
|
onAllModelsExhausted
|
|
242
242
|
});
|
|
@@ -253,12 +253,12 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
253
253
|
const onRetryAttempt = jest.fn();
|
|
254
254
|
const onAllModelsExhausted = jest.fn();
|
|
255
255
|
|
|
256
|
-
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-
|
|
256
|
+
mockModelsService.getAvailableModelNames.mockReturnValue(['gpt-4.1-nano', 'gpt-4.1-mini']);
|
|
257
257
|
mockAiService.sendMessage
|
|
258
258
|
.mockRejectedValueOnce(new Error('429 rate limit'))
|
|
259
259
|
.mockResolvedValueOnce({ content: 'Second model summary' });
|
|
260
260
|
|
|
261
|
-
const result = await service._generateSummary(testMessages, 'gpt-
|
|
261
|
+
const result = await service._generateSummary(testMessages, 'gpt-4.1-nano', {
|
|
262
262
|
onRetryAttempt,
|
|
263
263
|
onAllModelsExhausted
|
|
264
264
|
});
|
|
@@ -266,7 +266,7 @@ describe('ConversationCompactionService - _generateSummary retry behavior', () =
|
|
|
266
266
|
expect(onRetryAttempt).toHaveBeenCalledTimes(1);
|
|
267
267
|
expect(onAllModelsExhausted).not.toHaveBeenCalled();
|
|
268
268
|
expect(result.content).toContain('Second model summary');
|
|
269
|
-
expect(result.metadata.compactionModel).toBe('gpt-
|
|
269
|
+
expect(result.metadata.compactionModel).toBe('gpt-4.1-mini');
|
|
270
270
|
});
|
|
271
271
|
});
|
|
272
272
|
});
|
|
@@ -656,22 +656,30 @@ const COMPACTION_CONFIG = {
|
|
|
656
656
|
MIN_MIDDLE_SEGMENT_PERCENTAGE: 0.50, // Middle must be at least 50% of messages
|
|
657
657
|
MAX_BOOKEND_PERCENTAGE: 0.50, // Beginning + end together capped at 50%
|
|
658
658
|
|
|
659
|
-
// Recommended model pool for compaction (validated against live model catalog at runtime)
|
|
660
|
-
// Names MUST match catalog keys exactly (no azure-openai- prefix)
|
|
661
|
-
//
|
|
659
|
+
// Recommended model pool for compaction (validated against live model catalog at runtime).
|
|
660
|
+
// Names MUST match catalog keys exactly (no azure-openai- prefix).
|
|
661
|
+
//
|
|
662
|
+
// Compaction is a structured transcription job, not a reasoning task — the
|
|
663
|
+
// P7 prompt's three passes (USER VOICE / EVENT LOG / STATE NARRATIVE)
|
|
664
|
+
// produce ~1.5K output tokens of well-shaped text. Using reasoning models
|
|
665
|
+
// for this burns reasoning-token cost on a task that doesn't need it AND
|
|
666
|
+
// adds significant TTFT/total latency per pass. The previous defaults
|
|
667
|
+
// (gpt-5.1-codex-mini, gpt-5-mini, gpt-5-nano, o4-mini — all reasoning)
|
|
668
|
+
// were chosen for their 400K context windows, but the gpt-4.1 family
|
|
669
|
+
// offers 1M-token context AND is non-reasoning AND ~30× cheaper than the
|
|
670
|
+
// previous primary.
|
|
671
|
+
//
|
|
672
|
+
// Both entries below have 1M context, so the runtime model-filter never
|
|
673
|
+
// has to fall through to a smaller-context model for size reasons.
|
|
662
674
|
COMPACTION_MODELS: [
|
|
663
|
-
'gpt-
|
|
664
|
-
'gpt-
|
|
665
|
-
'gpt-5-nano', // 400K context - lightweight
|
|
666
|
-
'o4-mini' // 128K context - reasoning model
|
|
675
|
+
'gpt-4.1-nano', // 1M context, non-reasoning, cheapest — primary
|
|
676
|
+
'gpt-4.1-mini', // 1M context, non-reasoning — fallback
|
|
667
677
|
],
|
|
668
678
|
|
|
669
679
|
// Context windows for recommended compaction models (fallback if modelsService unavailable)
|
|
670
680
|
MODEL_CONTEXT_WINDOWS: {
|
|
671
|
-
'gpt-
|
|
672
|
-
'gpt-
|
|
673
|
-
'gpt-5-nano': 400000,
|
|
674
|
-
'o4-mini': 128000
|
|
681
|
+
'gpt-4.1-nano': 1048576,
|
|
682
|
+
'gpt-4.1-mini': 1048576,
|
|
675
683
|
},
|
|
676
684
|
|
|
677
685
|
// Token limits
|