onbuzz 3.9.6 → 3.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for VisionTool vision model selection logic.
|
|
3
|
+
*
|
|
4
|
+
* The _selectVisionModel method picks the best available vision model
|
|
5
|
+
* using a priority keyword list, falling back to agent's current model
|
|
6
|
+
* or the first available vision model.
|
|
7
|
+
*
|
|
8
|
+
* We test the logic directly (same approach as webTool.visionModel.test.js)
|
|
9
|
+
* to avoid importing the full VisionTool with its heavy BaseTool + fs dependencies.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { describe, test, expect } from '@jest/globals';
|
|
13
|
+
|
|
14
|
+
// Replicate the exact priority list from visionTool.js
|
|
15
|
+
const VISION_MODEL_PRIORITY = ['o4-mini', 'kimi', 'gpt-5-mini', 'gpt-5-nano', 'gpt-4.1-mini', 'gpt-4.1', 'o3', 'gpt-5'];
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Replicate _selectVisionModel + _getVisionModels logic from visionTool.js
|
|
19
|
+
*/
|
|
20
|
+
function selectVisionModel(models, context = {}) {
|
|
21
|
+
const visionModels = (models || []).filter(m => m.supportsVision === true);
|
|
22
|
+
|
|
23
|
+
// 1. Priority keyword search
|
|
24
|
+
for (const keyword of VISION_MODEL_PRIORITY) {
|
|
25
|
+
const match = visionModels.find(m => m.name.toLowerCase().includes(keyword));
|
|
26
|
+
if (match) return match.name;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// 2. Agent's current model if it has vision
|
|
30
|
+
const agentModel = context.currentModel;
|
|
31
|
+
if (agentModel && visionModels.some(m => m.name === agentModel)) {
|
|
32
|
+
return agentModel;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// 3. First available vision model
|
|
36
|
+
if (visionModels.length > 0) return visionModels[0].name;
|
|
37
|
+
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function getVisionModels(models) {
|
|
42
|
+
return (models || []).filter(m => m.supportsVision === true);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Full model catalog simulation
|
|
46
|
+
const ALL_MODELS = [
|
|
47
|
+
{ name: 'gpt-5.4', supportsVision: false },
|
|
48
|
+
{ name: 'gpt-5.4-mini', supportsVision: false },
|
|
49
|
+
{ name: 'gpt-5-chat', supportsVision: true },
|
|
50
|
+
{ name: 'gpt-5-mini', supportsVision: true },
|
|
51
|
+
{ name: 'gpt-5-nano', supportsVision: true },
|
|
52
|
+
{ name: 'gpt-4.1', supportsVision: true },
|
|
53
|
+
{ name: 'gpt-4.1-mini', supportsVision: true },
|
|
54
|
+
{ name: 'gpt-4.1-nano', supportsVision: true },
|
|
55
|
+
{ name: 'o4-mini', supportsVision: true },
|
|
56
|
+
{ name: 'o3', supportsVision: true },
|
|
57
|
+
{ name: 'grok-4', supportsVision: true },
|
|
58
|
+
{ name: 'grok-4-fast-reasoning', supportsVision: true },
|
|
59
|
+
{ name: 'Kimi-K2.5', supportsVision: true },
|
|
60
|
+
{ name: 'DeepSeek-V3.2', supportsVision: false },
|
|
61
|
+
{ name: 'DeepSeek-R1-0528', supportsVision: false },
|
|
62
|
+
{ name: 'gpt-5.3-codex', supportsVision: false },
|
|
63
|
+
{ name: 'Phi-4-reasoning', supportsVision: false },
|
|
64
|
+
];
|
|
65
|
+
|
|
66
|
+
describe('VisionTool model selection logic', () => {
|
|
67
|
+
|
|
68
|
+
// ── Priority keyword matching ────────────────────────────────
|
|
69
|
+
describe('priority-based selection', () => {
|
|
70
|
+
test('selects o4-mini as top priority', () => {
|
|
71
|
+
expect(selectVisionModel(ALL_MODELS)).toBe('o4-mini');
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test('selects Kimi-K2.5 when o4-mini unavailable', () => {
|
|
75
|
+
const models = ALL_MODELS.filter(m => m.name !== 'o4-mini');
|
|
76
|
+
expect(selectVisionModel(models)).toBe('Kimi-K2.5');
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test('selects gpt-5-mini when o4-mini and Kimi unavailable', () => {
|
|
80
|
+
const models = ALL_MODELS.filter(m => !['o4-mini', 'Kimi-K2.5'].includes(m.name));
|
|
81
|
+
expect(selectVisionModel(models)).toBe('gpt-5-mini');
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
test('selects gpt-5-nano when higher-priority mini models unavailable', () => {
|
|
85
|
+
const models = ALL_MODELS.filter(m =>
|
|
86
|
+
!['o4-mini', 'Kimi-K2.5', 'gpt-5-mini'].includes(m.name)
|
|
87
|
+
);
|
|
88
|
+
expect(selectVisionModel(models)).toBe('gpt-5-nano');
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test('selects gpt-4.1-mini next', () => {
|
|
92
|
+
const models = ALL_MODELS.filter(m =>
|
|
93
|
+
!['o4-mini', 'Kimi-K2.5', 'gpt-5-mini', 'gpt-5-nano'].includes(m.name)
|
|
94
|
+
);
|
|
95
|
+
expect(selectVisionModel(models)).toBe('gpt-4.1-mini');
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
test('selects gpt-4.1 after mini variant', () => {
|
|
99
|
+
const models = ALL_MODELS.filter(m =>
|
|
100
|
+
!['o4-mini', 'Kimi-K2.5', 'gpt-5-mini', 'gpt-5-nano', 'gpt-4.1-mini'].includes(m.name)
|
|
101
|
+
);
|
|
102
|
+
expect(selectVisionModel(models)).toBe('gpt-4.1');
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
test('selects o3 when only reasoning models available', () => {
|
|
106
|
+
const models = [
|
|
107
|
+
{ name: 'o3', supportsVision: true },
|
|
108
|
+
{ name: 'grok-4', supportsVision: true },
|
|
109
|
+
];
|
|
110
|
+
expect(selectVisionModel(models)).toBe('o3');
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test('matches gpt-5-chat via "gpt-5" keyword (last in priority)', () => {
|
|
114
|
+
const models = [
|
|
115
|
+
{ name: 'gpt-5-chat', supportsVision: true },
|
|
116
|
+
{ name: 'grok-4', supportsVision: true },
|
|
117
|
+
];
|
|
118
|
+
expect(selectVisionModel(models)).toBe('gpt-5-chat');
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
test('skips non-vision models even if name matches priority keyword', () => {
|
|
122
|
+
const models = [
|
|
123
|
+
{ name: 'gpt-5-mini-no-vision', supportsVision: false },
|
|
124
|
+
{ name: 'grok-4', supportsVision: true },
|
|
125
|
+
];
|
|
126
|
+
expect(selectVisionModel(models)).toBe('grok-4');
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
// ── Agent current model fallback ─────────────────────────────
|
|
131
|
+
describe('agent current model fallback', () => {
|
|
132
|
+
test('uses agent current model when no priority keywords match', () => {
|
|
133
|
+
const models = [
|
|
134
|
+
{ name: 'exotic-vision-model', supportsVision: true },
|
|
135
|
+
{ name: 'another-exotic', supportsVision: true },
|
|
136
|
+
];
|
|
137
|
+
expect(selectVisionModel(models, { currentModel: 'exotic-vision-model' }))
|
|
138
|
+
.toBe('exotic-vision-model');
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
test('ignores agent current model if it lacks vision', () => {
|
|
142
|
+
const models = [
|
|
143
|
+
{ name: 'agent-model', supportsVision: false },
|
|
144
|
+
{ name: 'fallback-vision', supportsVision: true },
|
|
145
|
+
];
|
|
146
|
+
expect(selectVisionModel(models, { currentModel: 'agent-model' }))
|
|
147
|
+
.toBe('fallback-vision');
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
test('prefers priority keyword over agent current model', () => {
|
|
151
|
+
expect(selectVisionModel(ALL_MODELS, { currentModel: 'grok-4' }))
|
|
152
|
+
.toBe('o4-mini');
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
// ── First available fallback ─────────────────────────────────
|
|
157
|
+
describe('first available fallback', () => {
|
|
158
|
+
test('falls back to first vision model when nothing matches', () => {
|
|
159
|
+
const models = [
|
|
160
|
+
{ name: 'custom-a', supportsVision: true },
|
|
161
|
+
{ name: 'custom-b', supportsVision: true },
|
|
162
|
+
];
|
|
163
|
+
expect(selectVisionModel(models)).toBe('custom-a');
|
|
164
|
+
});
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
// ── No vision models ─────────────────────────────────────────
|
|
168
|
+
describe('no vision models', () => {
|
|
169
|
+
test('returns null when no models have vision', () => {
|
|
170
|
+
const models = [
|
|
171
|
+
{ name: 'gpt-5.3-codex', supportsVision: false },
|
|
172
|
+
{ name: 'DeepSeek-V3.2', supportsVision: false },
|
|
173
|
+
];
|
|
174
|
+
expect(selectVisionModel(models)).toBeNull();
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
test('returns null for empty list', () => {
|
|
178
|
+
expect(selectVisionModel([])).toBeNull();
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
test('returns null for null', () => {
|
|
182
|
+
expect(selectVisionModel(null)).toBeNull();
|
|
183
|
+
});
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
// ── getVisionModels filtering ────────────────────────────────
|
|
187
|
+
describe('getVisionModels', () => {
|
|
188
|
+
test('only returns models with supportsVision === true', () => {
|
|
189
|
+
const result = getVisionModels(ALL_MODELS);
|
|
190
|
+
expect(result.every(m => m.supportsVision)).toBe(true);
|
|
191
|
+
expect(result.length).toBe(ALL_MODELS.filter(m => m.supportsVision).length);
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
test('excludes codex, DeepSeek, Phi models (no vision)', () => {
|
|
195
|
+
const result = getVisionModels(ALL_MODELS);
|
|
196
|
+
const names = result.map(m => m.name);
|
|
197
|
+
expect(names).not.toContain('gpt-5.3-codex');
|
|
198
|
+
expect(names).not.toContain('DeepSeek-V3.2');
|
|
199
|
+
expect(names).not.toContain('Phi-4-reasoning');
|
|
200
|
+
});
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// ── Old Anthropic keywords no longer match ───────────────────
|
|
204
|
+
describe('Anthropic removal verification', () => {
|
|
205
|
+
test('priority list does NOT contain opus or sonnet', () => {
|
|
206
|
+
expect(VISION_MODEL_PRIORITY).not.toContain('opus');
|
|
207
|
+
expect(VISION_MODEL_PRIORITY).not.toContain('sonnet');
|
|
208
|
+
expect(VISION_MODEL_PRIORITY.some(k => k.includes('claude'))).toBe(false);
|
|
209
|
+
expect(VISION_MODEL_PRIORITY.some(k => k.includes('anthropic'))).toBe(false);
|
|
210
|
+
});
|
|
211
|
+
});
|
|
212
|
+
});
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for WebTool._selectVisionModel() — the dynamic vision model
|
|
3
|
+
* selector used for screenshot analysis.
|
|
4
|
+
*
|
|
5
|
+
* Verifies:
|
|
6
|
+
* - Priority keyword matching (o4-mini > kimi > gpt-5-mini > ...)
|
|
7
|
+
* - Falls back to first vision model when no keywords match
|
|
8
|
+
* - Returns null when modelsService is unavailable
|
|
9
|
+
* - Non-vision models are excluded
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { jest, describe, test, expect } from '@jest/globals';
|
|
13
|
+
|
|
14
|
+
// We can't easily import the full WebTool (heavy Puppeteer dependency),
|
|
15
|
+
// so we extract and test the _selectVisionModel logic directly.
|
|
16
|
+
// The method is simple enough to replicate in isolation.
|
|
17
|
+
|
|
18
|
+
const PRIORITY = ['o4-mini', 'kimi', 'gpt-5-mini', 'gpt-5-nano', 'gpt-4.1-mini', 'gpt-4.1', 'o3', 'gpt-5'];
|
|
19
|
+
|
|
20
|
+
function selectVisionModel(context) {
|
|
21
|
+
const modelsService = context.aiService?.modelsService;
|
|
22
|
+
if (!modelsService) return null;
|
|
23
|
+
|
|
24
|
+
const allModels = modelsService.getModels?.() || [];
|
|
25
|
+
const visionModels = allModels.filter(m => m.supportsVision === true);
|
|
26
|
+
if (visionModels.length === 0) return null;
|
|
27
|
+
|
|
28
|
+
for (const keyword of PRIORITY) {
|
|
29
|
+
const match = visionModels.find(m => m.name.toLowerCase().includes(keyword));
|
|
30
|
+
if (match) return match.name;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return visionModels[0].name;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function makeContext(models) {
|
|
37
|
+
return {
|
|
38
|
+
aiService: {
|
|
39
|
+
modelsService: {
|
|
40
|
+
getModels: () => models
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const ALL_MODELS = [
|
|
47
|
+
{ name: 'o4-mini', supportsVision: true },
|
|
48
|
+
{ name: 'o3', supportsVision: true },
|
|
49
|
+
{ name: 'gpt-5-mini', supportsVision: true },
|
|
50
|
+
{ name: 'gpt-4.1', supportsVision: true },
|
|
51
|
+
{ name: 'Kimi-K2.5', supportsVision: true },
|
|
52
|
+
{ name: 'grok-4', supportsVision: true },
|
|
53
|
+
{ name: 'gpt-5.3-codex', supportsVision: false },
|
|
54
|
+
{ name: 'DeepSeek-V3.2', supportsVision: false },
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
describe('WebTool._selectVisionModel (logic test)', () => {
|
|
58
|
+
|
|
59
|
+
test('selects o4-mini as top priority', () => {
|
|
60
|
+
expect(selectVisionModel(makeContext(ALL_MODELS))).toBe('o4-mini');
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
test('selects Kimi-K2.5 when o4-mini unavailable', () => {
|
|
64
|
+
const models = ALL_MODELS.filter(m => m.name !== 'o4-mini');
|
|
65
|
+
expect(selectVisionModel(makeContext(models))).toBe('Kimi-K2.5');
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test('selects gpt-5-mini when o4-mini and Kimi unavailable', () => {
|
|
69
|
+
const models = ALL_MODELS.filter(m => !['o4-mini', 'Kimi-K2.5'].includes(m.name));
|
|
70
|
+
expect(selectVisionModel(makeContext(models))).toBe('gpt-5-mini');
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test('selects o3 when only o3 and grok have vision', () => {
|
|
74
|
+
const models = [
|
|
75
|
+
{ name: 'o3', supportsVision: true },
|
|
76
|
+
{ name: 'grok-4', supportsVision: true },
|
|
77
|
+
];
|
|
78
|
+
expect(selectVisionModel(makeContext(models))).toBe('o3');
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
test('falls back to first vision model when no keywords match', () => {
|
|
82
|
+
const models = [
|
|
83
|
+
{ name: 'exotic-vision-1', supportsVision: true },
|
|
84
|
+
{ name: 'exotic-vision-2', supportsVision: true },
|
|
85
|
+
];
|
|
86
|
+
expect(selectVisionModel(makeContext(models))).toBe('exotic-vision-1');
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
test('skips non-vision models', () => {
|
|
90
|
+
const models = [
|
|
91
|
+
{ name: 'gpt-5.3-codex', supportsVision: false },
|
|
92
|
+
{ name: 'grok-4', supportsVision: true },
|
|
93
|
+
];
|
|
94
|
+
expect(selectVisionModel(makeContext(models))).toBe('grok-4');
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
test('returns null when no vision models exist', () => {
|
|
98
|
+
const models = [
|
|
99
|
+
{ name: 'gpt-5.3-codex', supportsVision: false },
|
|
100
|
+
];
|
|
101
|
+
expect(selectVisionModel(makeContext(models))).toBeNull();
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
test('returns null when models list is empty', () => {
|
|
105
|
+
expect(selectVisionModel(makeContext([]))).toBeNull();
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test('returns null when aiService has no modelsService', () => {
|
|
109
|
+
expect(selectVisionModel({ aiService: {} })).toBeNull();
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test('returns null when context has no aiService', () => {
|
|
113
|
+
expect(selectVisionModel({})).toBeNull();
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
test('analyzeScreenshot would use o4-mini as fallback when _selectVisionModel returns null', () => {
|
|
117
|
+
// The actual code does: this._selectVisionModel(context) || 'o4-mini'
|
|
118
|
+
const result = selectVisionModel({}) || 'o4-mini';
|
|
119
|
+
expect(result).toBe('o4-mini');
|
|
120
|
+
});
|
|
121
|
+
});
|
package/src/tools/visionTool.js
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Purpose:
|
|
5
5
|
* - Analyze images from disk with natural language questions
|
|
6
6
|
* - Session-based: first call uploads image, subsequent calls reuse it
|
|
7
|
-
* - Uses the best available vision model (prefers
|
|
7
|
+
* - Uses the best available vision model (prefers o4-mini for best MMMU/cost ratio)
|
|
8
8
|
* - Returns structured answers only (no filler text)
|
|
9
9
|
*
|
|
10
10
|
* Actions:
|
|
@@ -18,12 +18,12 @@ import { BaseTool } from './baseTool.js';
|
|
|
18
18
|
import { promises as fs } from 'fs';
|
|
19
19
|
import path from 'path';
|
|
20
20
|
|
|
21
|
-
//
|
|
22
|
-
// 1.
|
|
23
|
-
// 2.
|
|
24
|
-
// 3.
|
|
25
|
-
//
|
|
26
|
-
const VISION_MODEL_PRIORITY = ['
|
|
21
|
+
// Vision model selection priority (best MMMU-Pro score / cost ratio):
|
|
22
|
+
// 1. Keyword match from priority list below (filtered to vision-capable only)
|
|
23
|
+
// 2. Agent's current model if it has vision capability
|
|
24
|
+
// 3. First available vision model
|
|
25
|
+
// o4-mini (79.2%, cheapest), Kimi (78.5%, cheap), gpt-5-mini (75.3%, very cheap), gpt-4.1 (69.3%, solid)
|
|
26
|
+
const VISION_MODEL_PRIORITY = ['o4-mini', 'kimi', 'gpt-5-mini', 'gpt-5-nano', 'gpt-4.1-mini', 'gpt-4.1', 'o3', 'gpt-5'];
|
|
27
27
|
|
|
28
28
|
const MAX_IMAGE_SIZE_BYTES = 20 * 1024 * 1024; // 20MB
|
|
29
29
|
const SESSION_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
|
package/src/tools/webTool.js
CHANGED
|
@@ -2832,6 +2832,28 @@ If blocked (CAPTCHA, access denied), use stealthLevel: "maximum" (visible browse
|
|
|
2832
2832
|
};
|
|
2833
2833
|
}
|
|
2834
2834
|
|
|
2835
|
+
/**
|
|
2836
|
+
* Select the best available vision model for screenshot analysis.
|
|
2837
|
+
* Priority: o4-mini > kimi > gpt-5-mini > gpt-4.1 > o3 > first vision model
|
|
2838
|
+
* @private
|
|
2839
|
+
*/
|
|
2840
|
+
_selectVisionModel(context) {
|
|
2841
|
+
const modelsService = context.aiService?.modelsService;
|
|
2842
|
+
if (!modelsService) return null;
|
|
2843
|
+
|
|
2844
|
+
const allModels = modelsService.getModels?.() || [];
|
|
2845
|
+
const visionModels = allModels.filter(m => m.supportsVision === true);
|
|
2846
|
+
if (visionModels.length === 0) return null;
|
|
2847
|
+
|
|
2848
|
+
const priority = ['o4-mini', 'kimi', 'gpt-5-mini', 'gpt-5-nano', 'gpt-4.1-mini', 'gpt-4.1', 'o3', 'gpt-5'];
|
|
2849
|
+
for (const keyword of priority) {
|
|
2850
|
+
const match = visionModels.find(m => m.name.toLowerCase().includes(keyword));
|
|
2851
|
+
if (match) return match.name;
|
|
2852
|
+
}
|
|
2853
|
+
|
|
2854
|
+
return visionModels[0].name;
|
|
2855
|
+
}
|
|
2856
|
+
|
|
2835
2857
|
/**
|
|
2836
2858
|
* Analyze screenshot using AI vision model
|
|
2837
2859
|
* @param {Page} page - Puppeteer page
|
|
@@ -2856,8 +2878,8 @@ If blocked (CAPTCHA, access denied), use stealthLevel: "maximum" (visible browse
|
|
|
2856
2878
|
});
|
|
2857
2879
|
|
|
2858
2880
|
try {
|
|
2859
|
-
//
|
|
2860
|
-
const model = '
|
|
2881
|
+
// Select best available vision model dynamically
|
|
2882
|
+
const model = this._selectVisionModel(context) || 'o4-mini';
|
|
2861
2883
|
|
|
2862
2884
|
// Create message with image
|
|
2863
2885
|
const response = await aiService.sendMessage(
|