modelmix 3.8.0 → 3.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,483 @@
1
+ const { expect } = require('chai');
2
+ const sinon = require('sinon');
3
+ const nock = require('nock');
4
+ const { ModelMix } = require('../index.js');
5
+ const Bottleneck = require('bottleneck');
6
+
7
+ describe('Rate Limiting with Bottleneck Tests', () => {
8
+
9
+ afterEach(() => {
10
+ nock.cleanAll();
11
+ sinon.restore();
12
+ });
13
+
14
+ describe('Default Bottleneck Configuration', () => {
15
+ it('should initialize with default bottleneck settings', () => {
16
+ const model = ModelMix.new();
17
+
18
+ expect(model.config.bottleneck).to.deep.equal({
19
+ maxConcurrent: 8,
20
+ minTime: 500
21
+ });
22
+
23
+ expect(model.limiter).to.be.instanceOf(Bottleneck);
24
+ });
25
+
26
+ it('should apply custom bottleneck configuration', () => {
27
+ const customConfig = {
28
+ maxConcurrent: 2,
29
+ minTime: 1000,
30
+ reservoir: 10,
31
+ reservoirRefreshInterval: 60000,
32
+ reservoirRefreshAmount: 10
33
+ };
34
+
35
+ const model = ModelMix.new({
36
+ config: {
37
+ bottleneck: customConfig
38
+ }
39
+ });
40
+
41
+ expect(model.config.bottleneck).to.deep.equal(customConfig);
42
+ });
43
+ });
44
+
45
+ describe('Rate Limiting in Action', () => {
46
+ let model;
47
+
48
+ beforeEach(() => {
49
+ model = ModelMix.new({
50
+ config: {
51
+ debug: false,
52
+ bottleneck: {
53
+ maxConcurrent: 1,
54
+ minTime: 100 // Reduced for faster tests
55
+ }
56
+ }
57
+ });
58
+ });
59
+
60
+ it('should enforce minimum time between requests', async () => {
61
+ const startTimes = [];
62
+
63
+ model.gpt4o();
64
+
65
+ // Mock API responses
66
+ nock('https://api.openai.com')
67
+ .post('/v1/chat/completions')
68
+ .times(3)
69
+ .reply(function() {
70
+ startTimes.push(Date.now());
71
+ return [200, {
72
+ choices: [{
73
+ message: {
74
+ role: 'assistant',
75
+ content: `Response ${startTimes.length}`
76
+ }
77
+ }]
78
+ }];
79
+ });
80
+
81
+ // Start three requests sequentially to test rate limiting
82
+ const start = Date.now();
83
+
84
+ const result1 = await model.addText('Request 1').message();
85
+ const result2 = await model.addText('Request 2').message();
86
+ const result3 = await model.addText('Request 3').message();
87
+
88
+ const totalTime = Date.now() - start;
89
+
90
+ expect(result1).to.include('Response 1');
91
+ expect(result2).to.include('Response 2');
92
+ expect(result3).to.include('Response 3');
93
+ expect(startTimes).to.have.length(3);
94
+
95
+ // With minTime of 100ms, 3 requests should take at least 200ms (100ms between each)
96
+ expect(totalTime).to.be.at.least(200);
97
+ });
98
+
99
+ it('should limit concurrent requests', async () => {
100
+ let concurrentCount = 0;
101
+ let maxConcurrent = 0;
102
+
103
+ model = ModelMix.new({
104
+ config: {
105
+ debug: false,
106
+ bottleneck: {
107
+ maxConcurrent: 2,
108
+ minTime: 50
109
+ }
110
+ }
111
+ });
112
+
113
+ model.gpt4o();
114
+
115
+ // Mock API with delay to simulate concurrent requests
116
+ nock('https://api.openai.com')
117
+ .post('/v1/chat/completions')
118
+ .times(5)
119
+ .reply(function() {
120
+ concurrentCount++;
121
+ maxConcurrent = Math.max(maxConcurrent, concurrentCount);
122
+
123
+ return new Promise(resolve => {
124
+ setTimeout(() => {
125
+ concurrentCount--;
126
+ resolve([200, {
127
+ choices: [{
128
+ message: {
129
+ role: 'assistant',
130
+ content: 'Concurrent response'
131
+ }
132
+ }]
133
+ }]);
134
+ }, 100);
135
+ });
136
+ });
137
+
138
+ // Start 5 requests simultaneously
139
+ const promises = Array.from({ length: 5 }, (_, i) =>
140
+ model.addText(`Concurrent request ${i + 1}`).message()
141
+ );
142
+
143
+ await Promise.all(promises);
144
+
145
+ // Should never exceed maxConcurrent of 2
146
+ expect(maxConcurrent).to.be.at.most(2);
147
+ });
148
+ });
149
+
150
+ describe('Rate Limiting with Different Providers', () => {
151
+ let model;
152
+
153
+ beforeEach(() => {
154
+ model = ModelMix.new({
155
+ config: {
156
+ debug: false,
157
+ bottleneck: {
158
+ maxConcurrent: 1,
159
+ minTime: 500
160
+ }
161
+ }
162
+ });
163
+ });
164
+
165
+ it('should apply rate limiting to OpenAI requests', async () => {
166
+ const requestTimes = [];
167
+
168
+ model.gpt4o();
169
+
170
+ nock('https://api.openai.com')
171
+ .post('/v1/chat/completions')
172
+ .times(2)
173
+ .reply(function() {
174
+ requestTimes.push(Date.now());
175
+ return [200, {
176
+ choices: [{
177
+ message: {
178
+ role: 'assistant',
179
+ content: 'OpenAI rate limited response'
180
+ }
181
+ }]
182
+ }];
183
+ });
184
+
185
+ const start = Date.now();
186
+
187
+ await model.addText('First request').message();
188
+ await model.addText('Second request').message();
189
+
190
+ const totalTime = Date.now() - start;
191
+
192
+ // Should take at least 500ms due to rate limiting
193
+ expect(totalTime).to.be.at.least(500);
194
+ });
195
+
196
+ it('should apply rate limiting to Anthropic requests', async () => {
197
+ const requestTimes = [];
198
+
199
+ model.sonnet4();
200
+
201
+ nock('https://api.anthropic.com')
202
+ .post('/v1/messages')
203
+ .times(2)
204
+ .reply(function() {
205
+ requestTimes.push(Date.now());
206
+ return [200, {
207
+ content: [{
208
+ type: 'text',
209
+ text: 'Anthropic rate limited response'
210
+ }]
211
+ }];
212
+ });
213
+
214
+ const start = Date.now();
215
+
216
+ await model.addText('First request').message();
217
+ await model.addText('Second request').message();
218
+
219
+ const totalTime = Date.now() - start;
220
+
221
+ // Should take at least 500ms due to rate limiting
222
+ expect(totalTime).to.be.at.least(500);
223
+ });
224
+
225
+
226
+ });
227
+
228
+ describe('Bottleneck Error Handling', () => {
229
+ let model;
230
+
231
+ beforeEach(() => {
232
+ model = ModelMix.new({
233
+ config: {
234
+ debug: false,
235
+ bottleneck: {
236
+ maxConcurrent: 1,
237
+ minTime: 100
238
+ }
239
+ }
240
+ });
241
+ });
242
+
243
+ it('should handle rate limiting with API errors', async () => {
244
+ model.gpt4o();
245
+
246
+ nock('https://api.openai.com')
247
+ .post('/v1/chat/completions')
248
+ .reply(429, {
249
+ error: {
250
+ message: 'Rate limit exceeded',
251
+ type: 'rate_limit_error'
252
+ }
253
+ });
254
+
255
+ try {
256
+ await model.addText('Rate limited request').message();
257
+ expect.fail('Should have thrown an error');
258
+ } catch (error) {
259
+ expect(error.message).to.include('429');
260
+ }
261
+ });
262
+
263
+ it('should continue rate limiting after errors', async () => {
264
+ const requestTimes = [];
265
+
266
+ model.gpt4o();
267
+
268
+ // First request fails
269
+ nock('https://api.openai.com')
270
+ .post('/v1/chat/completions')
271
+ .reply(function() {
272
+ requestTimes.push(Date.now());
273
+ return [500, { error: 'Server error' }];
274
+ });
275
+
276
+ // Second request succeeds
277
+ nock('https://api.openai.com')
278
+ .post('/v1/chat/completions')
279
+ .reply(function() {
280
+ requestTimes.push(Date.now());
281
+ return [200, {
282
+ choices: [{
283
+ message: {
284
+ role: 'assistant',
285
+ content: 'Success after error'
286
+ }
287
+ }]
288
+ }];
289
+ });
290
+
291
+ const start = Date.now();
292
+
293
+ try {
294
+ await model.addText('Failing request').message();
295
+ } catch (error) {
296
+ // Expected to fail
297
+ }
298
+
299
+ const response = await model.addText('Success request').message();
300
+ const totalTime = Date.now() - start;
301
+
302
+ expect(response).to.include('Success after error');
303
+ expect(totalTime).to.be.at.least(100); // Rate limiting still applied
304
+ });
305
+ });
306
+
307
+ describe('Advanced Bottleneck Features', () => {
308
+ it('should handle reservoir-based rate limiting', async () => {
309
+ const model = ModelMix.new({
310
+ config: {
311
+ debug: false,
312
+ bottleneck: {
313
+ maxConcurrent: 5,
314
+ minTime: 100,
315
+ reservoir: 3, // Only 3 requests allowed initially
316
+ reservoirRefreshInterval: 2000, // Refresh every 2 seconds
317
+ reservoirRefreshAmount: 2 // Add 2 more requests
318
+ }
319
+ }
320
+ });
321
+
322
+ model.gpt4o();
323
+
324
+ let requestCount = 0;
325
+
326
+ nock('https://api.openai.com')
327
+ .post('/v1/chat/completions')
328
+ .times(5)
329
+ .reply(function() {
330
+ requestCount++;
331
+ return [200, {
332
+ choices: [{
333
+ message: {
334
+ role: 'assistant',
335
+ content: `Response ${requestCount}`
336
+ }
337
+ }]
338
+ }];
339
+ });
340
+
341
+ const startTime = Date.now();
342
+
343
+ // Try to make 5 requests (should be limited by reservoir)
344
+ const promises = Array.from({ length: 5 }, (_, i) =>
345
+ model.addText(`Request ${i + 1}`).message()
346
+ );
347
+
348
+ const results = await Promise.all(promises);
349
+ const endTime = Date.now();
350
+
351
+ expect(results).to.have.length(5);
352
+
353
+ // With reservoir of 3 and refresh of 2 after 2 seconds,
354
+ // all 5 requests should complete but take some time
355
+ expect(endTime - startTime).to.be.at.least(2000);
356
+ });
357
+
358
+ it('should handle priority queuing', async () => {
359
+ const model = ModelMix.new({
360
+ config: {
361
+ debug: false,
362
+ bottleneck: {
363
+ maxConcurrent: 1,
364
+ minTime: 200
365
+ }
366
+ }
367
+ });
368
+
369
+ model.gpt4o();
370
+
371
+ const results = [];
372
+
373
+ nock('https://api.openai.com')
374
+ .post('/v1/chat/completions')
375
+ .times(3)
376
+ .reply(function(uri, body) {
377
+ const content = body.messages[0].content;
378
+ results.push(content);
379
+ return [200, {
380
+ choices: [{
381
+ message: {
382
+ role: 'assistant',
383
+ content: `Processed: ${content}`
384
+ }
385
+ }]
386
+ }];
387
+ });
388
+
389
+ // Submit requests with different priorities
390
+ // (Note: Bottleneck priority requires custom implementation)
391
+ const promises = [
392
+ model.addText('Low priority').message(),
393
+ model.addText('High priority').message(),
394
+ model.addText('Medium priority').message()
395
+ ];
396
+
397
+ await Promise.all(promises);
398
+
399
+ expect(results).to.have.length(3);
400
+ // Results should be processed in submission order due to rate limiting
401
+ });
402
+ });
403
+
404
+ describe('Bottleneck Performance', () => {
405
+ it('should track bottleneck statistics', async () => {
406
+ const model = ModelMix.new({
407
+ config: {
408
+ debug: false,
409
+ bottleneck: {
410
+ maxConcurrent: 2,
411
+ minTime: 100,
412
+ trackDoneStatus: true
413
+ }
414
+ }
415
+ });
416
+
417
+ model.gpt4o();
418
+
419
+ nock('https://api.openai.com')
420
+ .post('/v1/chat/completions')
421
+ .times(3)
422
+ .reply(200, {
423
+ choices: [{
424
+ message: {
425
+ role: 'assistant',
426
+ content: 'Statistics tracking response'
427
+ }
428
+ }]
429
+ });
430
+
431
+ // Make multiple requests
432
+ await Promise.all([
433
+ model.addText('Request 1').message(),
434
+ model.addText('Request 2').message(),
435
+ model.addText('Request 3').message()
436
+ ]);
437
+
438
+ // Check bottleneck counts
439
+ const counts = model.limiter.counts();
440
+ expect(counts).to.have.property('RECEIVED');
441
+ expect(counts).to.have.property('QUEUED');
442
+ expect(counts).to.have.property('RUNNING');
443
+ expect(counts).to.have.property('EXECUTING');
444
+ });
445
+
446
+ it('should handle bottleneck events', (done) => {
447
+ const model = ModelMix.new({
448
+ config: {
449
+ debug: false,
450
+ bottleneck: {
451
+ maxConcurrent: 1,
452
+ minTime: 100
453
+ }
454
+ }
455
+ });
456
+
457
+ let eventFired = false;
458
+
459
+ // Listen for bottleneck events
460
+ model.limiter.on('idle', () => {
461
+ eventFired = true;
462
+ expect(eventFired).to.be.true;
463
+ done();
464
+ });
465
+
466
+ model.gpt4o();
467
+
468
+ nock('https://api.openai.com')
469
+ .post('/v1/chat/completions')
470
+ .reply(200, {
471
+ choices: [{
472
+ message: {
473
+ role: 'assistant',
474
+ content: 'Event handling response'
475
+ }
476
+ }]
477
+ });
478
+
479
+ // Make a request to trigger events
480
+ model.addText('Event test').message();
481
+ });
482
+ });
483
+ });