modelmix 3.8.2 → 3.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +5 -1
- package/README.md +3 -2
- package/demo/demo.mjs +8 -8
- package/demo/images.mjs +9 -0
- package/demo/img.png +0 -0
- package/index.js +66 -17
- package/package.json +20 -6
- package/test/README.md +158 -0
- package/test/bottleneck.test.js +483 -0
- package/test/fallback.test.js +387 -0
- package/test/fixtures/data.json +36 -0
- package/test/fixtures/img.png +0 -0
- package/test/fixtures/template.txt +15 -0
- package/test/images.test.js +87 -0
- package/test/json.test.js +295 -0
- package/test/live.test.js +356 -0
- package/test/mocha.opts +5 -0
- package/test/setup.js +176 -0
- package/test/templates.test.js +473 -0
- package/test/test-runner.js +73 -0
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
const { expect } = require('chai');
|
|
2
|
+
const sinon = require('sinon');
|
|
3
|
+
const nock = require('nock');
|
|
4
|
+
const { ModelMix } = require('../index.js');
|
|
5
|
+
const Bottleneck = require('bottleneck');
|
|
6
|
+
|
|
7
|
+
describe('Rate Limiting with Bottleneck Tests', () => {
|
|
8
|
+
|
|
9
|
+
afterEach(() => {
|
|
10
|
+
nock.cleanAll();
|
|
11
|
+
sinon.restore();
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
describe('Default Bottleneck Configuration', () => {
|
|
15
|
+
it('should initialize with default bottleneck settings', () => {
|
|
16
|
+
const model = ModelMix.new();
|
|
17
|
+
|
|
18
|
+
expect(model.config.bottleneck).to.deep.equal({
|
|
19
|
+
maxConcurrent: 8,
|
|
20
|
+
minTime: 500
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
expect(model.limiter).to.be.instanceOf(Bottleneck);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('should apply custom bottleneck configuration', () => {
|
|
27
|
+
const customConfig = {
|
|
28
|
+
maxConcurrent: 2,
|
|
29
|
+
minTime: 1000,
|
|
30
|
+
reservoir: 10,
|
|
31
|
+
reservoirRefreshInterval: 60000,
|
|
32
|
+
reservoirRefreshAmount: 10
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const model = ModelMix.new({
|
|
36
|
+
config: {
|
|
37
|
+
bottleneck: customConfig
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
expect(model.config.bottleneck).to.deep.equal(customConfig);
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
describe('Rate Limiting in Action', () => {
|
|
46
|
+
let model;
|
|
47
|
+
|
|
48
|
+
beforeEach(() => {
|
|
49
|
+
model = ModelMix.new({
|
|
50
|
+
config: {
|
|
51
|
+
debug: false,
|
|
52
|
+
bottleneck: {
|
|
53
|
+
maxConcurrent: 1,
|
|
54
|
+
minTime: 100 // Reduced for faster tests
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('should enforce minimum time between requests', async () => {
|
|
61
|
+
const startTimes = [];
|
|
62
|
+
|
|
63
|
+
model.gpt4o();
|
|
64
|
+
|
|
65
|
+
// Mock API responses
|
|
66
|
+
nock('https://api.openai.com')
|
|
67
|
+
.post('/v1/chat/completions')
|
|
68
|
+
.times(3)
|
|
69
|
+
.reply(function() {
|
|
70
|
+
startTimes.push(Date.now());
|
|
71
|
+
return [200, {
|
|
72
|
+
choices: [{
|
|
73
|
+
message: {
|
|
74
|
+
role: 'assistant',
|
|
75
|
+
content: `Response ${startTimes.length}`
|
|
76
|
+
}
|
|
77
|
+
}]
|
|
78
|
+
}];
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
// Start three requests sequentially to test rate limiting
|
|
82
|
+
const start = Date.now();
|
|
83
|
+
|
|
84
|
+
const result1 = await model.addText('Request 1').message();
|
|
85
|
+
const result2 = await model.addText('Request 2').message();
|
|
86
|
+
const result3 = await model.addText('Request 3').message();
|
|
87
|
+
|
|
88
|
+
const totalTime = Date.now() - start;
|
|
89
|
+
|
|
90
|
+
expect(result1).to.include('Response 1');
|
|
91
|
+
expect(result2).to.include('Response 2');
|
|
92
|
+
expect(result3).to.include('Response 3');
|
|
93
|
+
expect(startTimes).to.have.length(3);
|
|
94
|
+
|
|
95
|
+
// With minTime of 100ms, 3 requests should take at least 200ms (100ms between each)
|
|
96
|
+
expect(totalTime).to.be.at.least(200);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('should limit concurrent requests', async () => {
|
|
100
|
+
let concurrentCount = 0;
|
|
101
|
+
let maxConcurrent = 0;
|
|
102
|
+
|
|
103
|
+
model = ModelMix.new({
|
|
104
|
+
config: {
|
|
105
|
+
debug: false,
|
|
106
|
+
bottleneck: {
|
|
107
|
+
maxConcurrent: 2,
|
|
108
|
+
minTime: 50
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
model.gpt4o();
|
|
114
|
+
|
|
115
|
+
// Mock API with delay to simulate concurrent requests
|
|
116
|
+
nock('https://api.openai.com')
|
|
117
|
+
.post('/v1/chat/completions')
|
|
118
|
+
.times(5)
|
|
119
|
+
.reply(function() {
|
|
120
|
+
concurrentCount++;
|
|
121
|
+
maxConcurrent = Math.max(maxConcurrent, concurrentCount);
|
|
122
|
+
|
|
123
|
+
return new Promise(resolve => {
|
|
124
|
+
setTimeout(() => {
|
|
125
|
+
concurrentCount--;
|
|
126
|
+
resolve([200, {
|
|
127
|
+
choices: [{
|
|
128
|
+
message: {
|
|
129
|
+
role: 'assistant',
|
|
130
|
+
content: 'Concurrent response'
|
|
131
|
+
}
|
|
132
|
+
}]
|
|
133
|
+
}]);
|
|
134
|
+
}, 100);
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
// Start 5 requests simultaneously
|
|
139
|
+
const promises = Array.from({ length: 5 }, (_, i) =>
|
|
140
|
+
model.addText(`Concurrent request ${i + 1}`).message()
|
|
141
|
+
);
|
|
142
|
+
|
|
143
|
+
await Promise.all(promises);
|
|
144
|
+
|
|
145
|
+
// Should never exceed maxConcurrent of 2
|
|
146
|
+
expect(maxConcurrent).to.be.at.most(2);
|
|
147
|
+
});
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
describe('Rate Limiting with Different Providers', () => {
|
|
151
|
+
let model;
|
|
152
|
+
|
|
153
|
+
beforeEach(() => {
|
|
154
|
+
model = ModelMix.new({
|
|
155
|
+
config: {
|
|
156
|
+
debug: false,
|
|
157
|
+
bottleneck: {
|
|
158
|
+
maxConcurrent: 1,
|
|
159
|
+
minTime: 500
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
it('should apply rate limiting to OpenAI requests', async () => {
|
|
166
|
+
const requestTimes = [];
|
|
167
|
+
|
|
168
|
+
model.gpt4o();
|
|
169
|
+
|
|
170
|
+
nock('https://api.openai.com')
|
|
171
|
+
.post('/v1/chat/completions')
|
|
172
|
+
.times(2)
|
|
173
|
+
.reply(function() {
|
|
174
|
+
requestTimes.push(Date.now());
|
|
175
|
+
return [200, {
|
|
176
|
+
choices: [{
|
|
177
|
+
message: {
|
|
178
|
+
role: 'assistant',
|
|
179
|
+
content: 'OpenAI rate limited response'
|
|
180
|
+
}
|
|
181
|
+
}]
|
|
182
|
+
}];
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
const start = Date.now();
|
|
186
|
+
|
|
187
|
+
await model.addText('First request').message();
|
|
188
|
+
await model.addText('Second request').message();
|
|
189
|
+
|
|
190
|
+
const totalTime = Date.now() - start;
|
|
191
|
+
|
|
192
|
+
// Should take at least 500ms due to rate limiting
|
|
193
|
+
expect(totalTime).to.be.at.least(500);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it('should apply rate limiting to Anthropic requests', async () => {
|
|
197
|
+
const requestTimes = [];
|
|
198
|
+
|
|
199
|
+
model.sonnet4();
|
|
200
|
+
|
|
201
|
+
nock('https://api.anthropic.com')
|
|
202
|
+
.post('/v1/messages')
|
|
203
|
+
.times(2)
|
|
204
|
+
.reply(function() {
|
|
205
|
+
requestTimes.push(Date.now());
|
|
206
|
+
return [200, {
|
|
207
|
+
content: [{
|
|
208
|
+
type: 'text',
|
|
209
|
+
text: 'Anthropic rate limited response'
|
|
210
|
+
}]
|
|
211
|
+
}];
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
const start = Date.now();
|
|
215
|
+
|
|
216
|
+
await model.addText('First request').message();
|
|
217
|
+
await model.addText('Second request').message();
|
|
218
|
+
|
|
219
|
+
const totalTime = Date.now() - start;
|
|
220
|
+
|
|
221
|
+
// Should take at least 500ms due to rate limiting
|
|
222
|
+
expect(totalTime).to.be.at.least(500);
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
describe('Bottleneck Error Handling', () => {
|
|
229
|
+
let model;
|
|
230
|
+
|
|
231
|
+
beforeEach(() => {
|
|
232
|
+
model = ModelMix.new({
|
|
233
|
+
config: {
|
|
234
|
+
debug: false,
|
|
235
|
+
bottleneck: {
|
|
236
|
+
maxConcurrent: 1,
|
|
237
|
+
minTime: 100
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
it('should handle rate limiting with API errors', async () => {
|
|
244
|
+
model.gpt4o();
|
|
245
|
+
|
|
246
|
+
nock('https://api.openai.com')
|
|
247
|
+
.post('/v1/chat/completions')
|
|
248
|
+
.reply(429, {
|
|
249
|
+
error: {
|
|
250
|
+
message: 'Rate limit exceeded',
|
|
251
|
+
type: 'rate_limit_error'
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
try {
|
|
256
|
+
await model.addText('Rate limited request').message();
|
|
257
|
+
expect.fail('Should have thrown an error');
|
|
258
|
+
} catch (error) {
|
|
259
|
+
expect(error.message).to.include('429');
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
it('should continue rate limiting after errors', async () => {
|
|
264
|
+
const requestTimes = [];
|
|
265
|
+
|
|
266
|
+
model.gpt4o();
|
|
267
|
+
|
|
268
|
+
// First request fails
|
|
269
|
+
nock('https://api.openai.com')
|
|
270
|
+
.post('/v1/chat/completions')
|
|
271
|
+
.reply(function() {
|
|
272
|
+
requestTimes.push(Date.now());
|
|
273
|
+
return [500, { error: 'Server error' }];
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
// Second request succeeds
|
|
277
|
+
nock('https://api.openai.com')
|
|
278
|
+
.post('/v1/chat/completions')
|
|
279
|
+
.reply(function() {
|
|
280
|
+
requestTimes.push(Date.now());
|
|
281
|
+
return [200, {
|
|
282
|
+
choices: [{
|
|
283
|
+
message: {
|
|
284
|
+
role: 'assistant',
|
|
285
|
+
content: 'Success after error'
|
|
286
|
+
}
|
|
287
|
+
}]
|
|
288
|
+
}];
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
const start = Date.now();
|
|
292
|
+
|
|
293
|
+
try {
|
|
294
|
+
await model.addText('Failing request').message();
|
|
295
|
+
} catch (error) {
|
|
296
|
+
// Expected to fail
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const response = await model.addText('Success request').message();
|
|
300
|
+
const totalTime = Date.now() - start;
|
|
301
|
+
|
|
302
|
+
expect(response).to.include('Success after error');
|
|
303
|
+
expect(totalTime).to.be.at.least(100); // Rate limiting still applied
|
|
304
|
+
});
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
describe('Advanced Bottleneck Features', () => {
|
|
308
|
+
it('should handle reservoir-based rate limiting', async () => {
|
|
309
|
+
const model = ModelMix.new({
|
|
310
|
+
config: {
|
|
311
|
+
debug: false,
|
|
312
|
+
bottleneck: {
|
|
313
|
+
maxConcurrent: 5,
|
|
314
|
+
minTime: 100,
|
|
315
|
+
reservoir: 3, // Only 3 requests allowed initially
|
|
316
|
+
reservoirRefreshInterval: 2000, // Refresh every 2 seconds
|
|
317
|
+
reservoirRefreshAmount: 2 // Add 2 more requests
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
model.gpt4o();
|
|
323
|
+
|
|
324
|
+
let requestCount = 0;
|
|
325
|
+
|
|
326
|
+
nock('https://api.openai.com')
|
|
327
|
+
.post('/v1/chat/completions')
|
|
328
|
+
.times(5)
|
|
329
|
+
.reply(function() {
|
|
330
|
+
requestCount++;
|
|
331
|
+
return [200, {
|
|
332
|
+
choices: [{
|
|
333
|
+
message: {
|
|
334
|
+
role: 'assistant',
|
|
335
|
+
content: `Response ${requestCount}`
|
|
336
|
+
}
|
|
337
|
+
}]
|
|
338
|
+
}];
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
const startTime = Date.now();
|
|
342
|
+
|
|
343
|
+
// Try to make 5 requests (should be limited by reservoir)
|
|
344
|
+
const promises = Array.from({ length: 5 }, (_, i) =>
|
|
345
|
+
model.addText(`Request ${i + 1}`).message()
|
|
346
|
+
);
|
|
347
|
+
|
|
348
|
+
const results = await Promise.all(promises);
|
|
349
|
+
const endTime = Date.now();
|
|
350
|
+
|
|
351
|
+
expect(results).to.have.length(5);
|
|
352
|
+
|
|
353
|
+
// With reservoir of 3 and refresh of 2 after 2 seconds,
|
|
354
|
+
// all 5 requests should complete but take some time
|
|
355
|
+
expect(endTime - startTime).to.be.at.least(2000);
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
it('should handle priority queuing', async () => {
|
|
359
|
+
const model = ModelMix.new({
|
|
360
|
+
config: {
|
|
361
|
+
debug: false,
|
|
362
|
+
bottleneck: {
|
|
363
|
+
maxConcurrent: 1,
|
|
364
|
+
minTime: 200
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
model.gpt4o();
|
|
370
|
+
|
|
371
|
+
const results = [];
|
|
372
|
+
|
|
373
|
+
nock('https://api.openai.com')
|
|
374
|
+
.post('/v1/chat/completions')
|
|
375
|
+
.times(3)
|
|
376
|
+
.reply(function(uri, body) {
|
|
377
|
+
const content = body.messages[0].content;
|
|
378
|
+
results.push(content);
|
|
379
|
+
return [200, {
|
|
380
|
+
choices: [{
|
|
381
|
+
message: {
|
|
382
|
+
role: 'assistant',
|
|
383
|
+
content: `Processed: ${content}`
|
|
384
|
+
}
|
|
385
|
+
}]
|
|
386
|
+
}];
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
// Submit requests with different priorities
|
|
390
|
+
// (Note: Bottleneck priority requires custom implementation)
|
|
391
|
+
const promises = [
|
|
392
|
+
model.addText('Low priority').message(),
|
|
393
|
+
model.addText('High priority').message(),
|
|
394
|
+
model.addText('Medium priority').message()
|
|
395
|
+
];
|
|
396
|
+
|
|
397
|
+
await Promise.all(promises);
|
|
398
|
+
|
|
399
|
+
expect(results).to.have.length(3);
|
|
400
|
+
// Results should be processed in submission order due to rate limiting
|
|
401
|
+
});
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
describe('Bottleneck Performance', () => {
|
|
405
|
+
it('should track bottleneck statistics', async () => {
|
|
406
|
+
const model = ModelMix.new({
|
|
407
|
+
config: {
|
|
408
|
+
debug: false,
|
|
409
|
+
bottleneck: {
|
|
410
|
+
maxConcurrent: 2,
|
|
411
|
+
minTime: 100,
|
|
412
|
+
trackDoneStatus: true
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
model.gpt4o();
|
|
418
|
+
|
|
419
|
+
nock('https://api.openai.com')
|
|
420
|
+
.post('/v1/chat/completions')
|
|
421
|
+
.times(3)
|
|
422
|
+
.reply(200, {
|
|
423
|
+
choices: [{
|
|
424
|
+
message: {
|
|
425
|
+
role: 'assistant',
|
|
426
|
+
content: 'Statistics tracking response'
|
|
427
|
+
}
|
|
428
|
+
}]
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
// Make multiple requests
|
|
432
|
+
await Promise.all([
|
|
433
|
+
model.addText('Request 1').message(),
|
|
434
|
+
model.addText('Request 2').message(),
|
|
435
|
+
model.addText('Request 3').message()
|
|
436
|
+
]);
|
|
437
|
+
|
|
438
|
+
// Check bottleneck counts
|
|
439
|
+
const counts = model.limiter.counts();
|
|
440
|
+
expect(counts).to.have.property('RECEIVED');
|
|
441
|
+
expect(counts).to.have.property('QUEUED');
|
|
442
|
+
expect(counts).to.have.property('RUNNING');
|
|
443
|
+
expect(counts).to.have.property('EXECUTING');
|
|
444
|
+
});
|
|
445
|
+
|
|
446
|
+
it('should handle bottleneck events', (done) => {
|
|
447
|
+
const model = ModelMix.new({
|
|
448
|
+
config: {
|
|
449
|
+
debug: false,
|
|
450
|
+
bottleneck: {
|
|
451
|
+
maxConcurrent: 1,
|
|
452
|
+
minTime: 100
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
});
|
|
456
|
+
|
|
457
|
+
let eventFired = false;
|
|
458
|
+
|
|
459
|
+
// Listen for bottleneck events
|
|
460
|
+
model.limiter.on('idle', () => {
|
|
461
|
+
eventFired = true;
|
|
462
|
+
expect(eventFired).to.be.true;
|
|
463
|
+
done();
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
model.gpt4o();
|
|
467
|
+
|
|
468
|
+
nock('https://api.openai.com')
|
|
469
|
+
.post('/v1/chat/completions')
|
|
470
|
+
.reply(200, {
|
|
471
|
+
choices: [{
|
|
472
|
+
message: {
|
|
473
|
+
role: 'assistant',
|
|
474
|
+
content: 'Event handling response'
|
|
475
|
+
}
|
|
476
|
+
}]
|
|
477
|
+
});
|
|
478
|
+
|
|
479
|
+
// Make a request to trigger events
|
|
480
|
+
model.addText('Event test').message();
|
|
481
|
+
});
|
|
482
|
+
});
|
|
483
|
+
});
|