@arclabs561/ai-visual-test 0.5.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +102 -11
- package/DEPLOYMENT.md +225 -9
- package/README.md +71 -80
- package/index.d.ts +862 -3
- package/package.json +10 -51
- package/src/batch-optimizer.mjs +39 -0
- package/src/cache.mjs +241 -16
- package/src/config.mjs +33 -91
- package/src/constants.mjs +54 -0
- package/src/convenience.mjs +113 -10
- package/src/cost-optimization.mjs +1 -0
- package/src/cost-tracker.mjs +134 -2
- package/src/data-extractor.mjs +36 -7
- package/src/dynamic-few-shot.mjs +69 -11
- package/src/errors.mjs +6 -2
- package/src/experience-propagation.mjs +12 -0
- package/src/experience-tracer.mjs +12 -3
- package/src/game-player.mjs +222 -43
- package/src/graceful-shutdown.mjs +126 -0
- package/src/helpers/playwright.mjs +22 -8
- package/src/human-validation-manager.mjs +99 -2
- package/src/index.mjs +48 -3
- package/src/integrations/playwright.mjs +140 -0
- package/src/judge.mjs +697 -24
- package/src/load-env.mjs +2 -1
- package/src/logger.mjs +31 -3
- package/src/model-tier-selector.mjs +1 -221
- package/src/natural-language-specs.mjs +31 -3
- package/src/persona-enhanced.mjs +4 -2
- package/src/persona-experience.mjs +1 -1
- package/src/pricing.mjs +28 -0
- package/src/prompt-composer.mjs +162 -5
- package/src/provider-data.mjs +115 -0
- package/src/render-change-detector.mjs +5 -0
- package/src/research-enhanced-validation.mjs +7 -5
- package/src/retry.mjs +21 -7
- package/src/rubrics.mjs +4 -0
- package/src/safe-logger.mjs +71 -0
- package/src/session-cost-tracker.mjs +320 -0
- package/src/smart-validator.mjs +8 -8
- package/src/spec-templates.mjs +52 -6
- package/src/startup-validation.mjs +127 -0
- package/src/temporal-adaptive.mjs +2 -2
- package/src/temporal-decision-manager.mjs +1 -271
- package/src/temporal-logic.mjs +104 -0
- package/src/temporal-note-pruner.mjs +119 -0
- package/src/temporal-preprocessor.mjs +1 -543
- package/src/temporal.mjs +681 -79
- package/src/utils/action-hallucination-detector.mjs +301 -0
- package/src/utils/baseline-validator.mjs +82 -0
- package/src/utils/cache-stats.mjs +104 -0
- package/src/utils/cached-llm.mjs +164 -0
- package/src/utils/capability-stratifier.mjs +108 -0
- package/src/utils/counterfactual-tester.mjs +83 -0
- package/src/utils/error-recovery.mjs +117 -0
- package/src/utils/explainability-scorer.mjs +119 -0
- package/src/utils/exploratory-automation.mjs +131 -0
- package/src/utils/index.mjs +10 -0
- package/src/utils/intent-recognizer.mjs +201 -0
- package/src/utils/log-sanitizer.mjs +165 -0
- package/src/utils/path-validator.mjs +88 -0
- package/src/utils/performance-logger.mjs +316 -0
- package/src/utils/performance-measurement.mjs +280 -0
- package/src/utils/prompt-sanitizer.mjs +213 -0
- package/src/utils/rate-limiter.mjs +144 -0
- package/src/validation-framework.mjs +24 -20
- package/src/validation-result-normalizer.mjs +27 -1
- package/src/validation.mjs +75 -25
- package/src/validators/accessibility-validator.mjs +144 -0
- package/src/validators/hybrid-validator.mjs +48 -4
- package/api/health.js +0 -34
- package/api/validate.js +0 -252
- package/public/index.html +0 -149
- package/vercel.json +0 -27
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arclabs561/ai-visual-test",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.7.3",
|
|
4
|
+
"description": "Visual testing framework for web applications using Vision Language Models",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.mjs",
|
|
7
7
|
"exports": {
|
|
@@ -16,11 +16,8 @@
|
|
|
16
16
|
"./package.json": "./package.json"
|
|
17
17
|
},
|
|
18
18
|
"files": [
|
|
19
|
-
"src
|
|
19
|
+
"src/**/*",
|
|
20
20
|
"index.d.ts",
|
|
21
|
-
"api/**/*.js",
|
|
22
|
-
"public/**/*.html",
|
|
23
|
-
"vercel.json",
|
|
24
21
|
"README.md",
|
|
25
22
|
"CHANGELOG.md",
|
|
26
23
|
"CONTRIBUTING.md",
|
|
@@ -29,42 +26,6 @@
|
|
|
29
26
|
"LICENSE",
|
|
30
27
|
".secretsignore.example"
|
|
31
28
|
],
|
|
32
|
-
"scripts": {
|
|
33
|
-
"test": "node --test test/*.test.mjs",
|
|
34
|
-
"test:validation": "node --test test/validation-*.test.mjs",
|
|
35
|
-
"test:unit": "node --test test/unit/*.test.mjs",
|
|
36
|
-
"test:integration": "node --test test/integration/*.test.mjs",
|
|
37
|
-
"test:e2e": "node --test test/e2e/*.test.mjs",
|
|
38
|
-
"test:datasets": "node --test test/dataset-*.test.mjs",
|
|
39
|
-
"playwright:check": "node scripts/ensure-playwright.mjs",
|
|
40
|
-
"playwright:install": "node scripts/ensure-playwright.mjs --install",
|
|
41
|
-
"playwright:setup": "node scripts/ensure-playwright.mjs --install --install-browsers",
|
|
42
|
-
"annotate": "node evaluation/utils/invoke-human-annotation.mjs",
|
|
43
|
-
"annotate:quick": "node evaluation/utils/quick-start-annotation.mjs",
|
|
44
|
-
"annotate:full": "node evaluation/utils/start-human-annotation.mjs",
|
|
45
|
-
"validate:annotations": "node evaluation/utils/validate-annotation-quality.mjs",
|
|
46
|
-
"validate:dataset": "node evaluation/utils/validate-dataset-quality.mjs",
|
|
47
|
-
"match:vllm": "node evaluation/utils/match-annotations-with-vllm.mjs",
|
|
48
|
-
"datasets:download": "node evaluation/utils/download-all-datasets.mjs",
|
|
49
|
-
"datasets:parse": "node evaluation/utils/parse-all-datasets.mjs",
|
|
50
|
-
"datasets:setup": "npm run datasets:download && npm run datasets:parse",
|
|
51
|
-
"docs": "node scripts/generate-docs.mjs",
|
|
52
|
-
"lint": "echo 'No linter configured'",
|
|
53
|
-
"prepublishOnly": "npm test",
|
|
54
|
-
"check:secrets": "node scripts/detect-secrets.mjs",
|
|
55
|
-
"check:quality": "node node_modules/@arclabs561/hookwise/src/hooks/code-quality.mjs || echo 'Hookwise not available'",
|
|
56
|
-
"check:docs": "node node_modules/@arclabs561/hookwise/src/hooks/doc-bloat.mjs || echo 'Hookwise not available'",
|
|
57
|
-
"check:security": "npm run check:secrets",
|
|
58
|
-
"check:test-performance": "node node_modules/@arclabs561/hookwise/src/hooks/test-performance.mjs || node scripts/analyze-test-performance.mjs",
|
|
59
|
-
"check:commit": "node node_modules/@arclabs561/hookwise/src/hooks/commit-msg.mjs .git/COMMIT_EDITMSG || echo 'Hookwise not available'",
|
|
60
|
-
"check:all": "npm run check:secrets && npx hookwise garden && npm run check:test-performance",
|
|
61
|
-
"garden": "npx hookwise garden",
|
|
62
|
-
"garden:enhanced": "node scripts/enhanced-garden.mjs",
|
|
63
|
-
"deprecate:old": "node scripts/deprecate-old-package.mjs",
|
|
64
|
-
"garden:watch": "node scripts/watch-garden.mjs",
|
|
65
|
-
"test:performance": "npm run check:test-performance",
|
|
66
|
-
"test:slow": "npm test 2>&1 | grep -E '✔.*\\([0-9]+\\.[0-9]+ms\\)' | sort -t'(' -k2 -nr | head -20"
|
|
67
|
-
},
|
|
68
29
|
"keywords": [
|
|
69
30
|
"visual-testing",
|
|
70
31
|
"ai",
|
|
@@ -78,7 +39,11 @@
|
|
|
78
39
|
"author": "arclabs561 <henry@henrywallace.io>",
|
|
79
40
|
"license": "MIT",
|
|
80
41
|
"dependencies": {
|
|
81
|
-
"
|
|
42
|
+
"@anthropic-ai/sdk": "0.70.0",
|
|
43
|
+
"@google/generative-ai": "0.24.1",
|
|
44
|
+
"async-mutex": "0.5.0",
|
|
45
|
+
"dotenv": "^16.4.5",
|
|
46
|
+
"openai": "6.9.1"
|
|
82
47
|
},
|
|
83
48
|
"peerDependencies": {
|
|
84
49
|
"@arclabs561/llm-utils": "*",
|
|
@@ -92,21 +57,15 @@
|
|
|
92
57
|
"optional": true
|
|
93
58
|
}
|
|
94
59
|
},
|
|
95
|
-
"devDependencies": {
|
|
96
|
-
"@types/node": "^22.10.1",
|
|
97
|
-
"async-mutex": "0.5.0",
|
|
98
|
-
"fast-check": "4.3.0",
|
|
99
|
-
"proper-lockfile": "4.1.2"
|
|
100
|
-
},
|
|
101
60
|
"engines": {
|
|
102
61
|
"node": ">=18.0.0"
|
|
103
62
|
},
|
|
104
63
|
"repository": {
|
|
105
64
|
"type": "git",
|
|
106
|
-
"url": "
|
|
65
|
+
"url": "https://github.com/arclabs561/ai-visual-test.git"
|
|
107
66
|
},
|
|
108
67
|
"homepage": "https://github.com/arclabs561/ai-visual-test#readme",
|
|
109
68
|
"bugs": {
|
|
110
69
|
"url": "https://github.com/arclabs561/ai-visual-test/issues"
|
|
111
70
|
}
|
|
112
|
-
}
|
|
71
|
+
}
|
package/src/batch-optimizer.mjs
CHANGED
|
@@ -218,6 +218,25 @@ export class BatchOptimizer {
|
|
|
218
218
|
} catch (metricsError) {
|
|
219
219
|
warn(`[BatchOptimizer] Error updating rejection metrics: ${metricsError.message}`);
|
|
220
220
|
}
|
|
221
|
+
|
|
222
|
+
// Log batch optimizer rejection (weighted: rejections are critical)
|
|
223
|
+
// Use dynamic import with proper error handling to prevent unhandled promise rejections
|
|
224
|
+
import('./utils/performance-logger.mjs')
|
|
225
|
+
.then(({ logBatchOptimizer }) => {
|
|
226
|
+
logBatchOptimizer({
|
|
227
|
+
event: 'reject',
|
|
228
|
+
queueDepth: this.queue.length,
|
|
229
|
+
maxQueueSize: this.maxQueueSize,
|
|
230
|
+
activeRequests: this.activeRequests,
|
|
231
|
+
maxConcurrency: this.maxConcurrency,
|
|
232
|
+
reason: 'Queue full - preventing memory leak'
|
|
233
|
+
});
|
|
234
|
+
})
|
|
235
|
+
.catch((importError) => {
|
|
236
|
+
// Log to console if performance logger unavailable (better than silent failure)
|
|
237
|
+
warn(`[BatchOptimizer] Performance logger unavailable: ${importError.message}`);
|
|
238
|
+
});
|
|
239
|
+
|
|
221
240
|
warn(`[BatchOptimizer] Queue is full (${this.queue.length}/${this.maxQueueSize}). Rejecting request to prevent memory leak. Total rejections: ${this.metrics.queueRejections}`);
|
|
222
241
|
throw new TimeoutError(
|
|
223
242
|
`Queue is full (${this.queue.length}/${this.maxQueueSize}). Too many concurrent requests.`,
|
|
@@ -236,6 +255,26 @@ export class BatchOptimizer {
|
|
|
236
255
|
|
|
237
256
|
const timeoutId = setTimeout(() => {
|
|
238
257
|
timeoutFired = true;
|
|
258
|
+
|
|
259
|
+
// Log batch optimizer timeout (weighted: timeouts are critical)
|
|
260
|
+
// Use dynamic import with proper error handling to prevent unhandled promise rejections
|
|
261
|
+
import('./utils/performance-logger.mjs')
|
|
262
|
+
.then(({ logBatchOptimizer }) => {
|
|
263
|
+
logBatchOptimizer({
|
|
264
|
+
event: 'timeout',
|
|
265
|
+
queueDepth: this.queue.length,
|
|
266
|
+
maxQueueSize: this.maxQueueSize,
|
|
267
|
+
activeRequests: this.activeRequests,
|
|
268
|
+
maxConcurrency: this.maxConcurrency,
|
|
269
|
+
waitTime: Date.now() - queueStartTime,
|
|
270
|
+
reason: 'Request timeout - queue wait exceeded limit'
|
|
271
|
+
});
|
|
272
|
+
})
|
|
273
|
+
.catch((importError) => {
|
|
274
|
+
// Log to console if performance logger unavailable (better than silent failure)
|
|
275
|
+
warn(`[BatchOptimizer] Performance logger unavailable: ${importError.message}`);
|
|
276
|
+
});
|
|
277
|
+
|
|
239
278
|
// Remove from queue if still waiting
|
|
240
279
|
// CRITICAL FIX: Use stored queueEntry reference instead of searching by resolve function
|
|
241
280
|
// The resolve function is wrapped, so direct comparison might not work
|
package/src/cache.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* VLLM Cache
|
|
3
3
|
*
|
|
4
|
-
* Provides persistent caching for VLLM API calls to reduce costs and improve performance.
|
|
4
|
+
* Provides persistent caching for VLLM API calls (vision) and text-only LLM calls to reduce costs and improve performance.
|
|
5
5
|
* Uses file-based storage for cache persistence across test runs.
|
|
6
6
|
*
|
|
7
7
|
* BUGS FIXED (2025-01):
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
* - Why separate: Different persistence strategy (file vs memory), different lifetime (7 days vs process lifetime),
|
|
16
16
|
* different failure domain (disk errors don't affect in-memory batching), minimal data overlap (<5%)
|
|
17
17
|
* - No coordination with BatchOptimizer cache or TemporalPreprocessing cache (by design - they serve different purposes)
|
|
18
|
+
* - Supports both vision LLM calls (with images) and text-only LLM calls (no images)
|
|
18
19
|
*/
|
|
19
20
|
|
|
20
21
|
import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync, unlinkSync } from 'fs';
|
|
@@ -73,36 +74,106 @@ export function initCache(cacheDir) {
|
|
|
73
74
|
}
|
|
74
75
|
|
|
75
76
|
/**
|
|
76
|
-
* Generate cache key from image path, prompt, and context
|
|
77
|
+
* Generate cache key from image path, prompt, and context (for vision LLM calls)
|
|
77
78
|
*
|
|
78
79
|
* @param {string} imagePath - Path to image file
|
|
79
80
|
* @param {string} prompt - Validation prompt
|
|
80
81
|
* @param {import('./index.mjs').ValidationContext} [context={}] - Validation context
|
|
81
82
|
* @returns {string} SHA-256 hash of cache key
|
|
82
83
|
*/
|
|
84
|
+
/**
|
|
85
|
+
* Normalize and sort object keys for deterministic JSON serialization
|
|
86
|
+
*/
|
|
87
|
+
function deterministicStringify(obj) {
|
|
88
|
+
if (obj === null || typeof obj !== 'object') {
|
|
89
|
+
return JSON.stringify(obj);
|
|
90
|
+
}
|
|
91
|
+
if (Array.isArray(obj)) {
|
|
92
|
+
return '[' + obj.map(deterministicStringify).join(',') + ']';
|
|
93
|
+
}
|
|
94
|
+
const sortedKeys = Object.keys(obj).sort();
|
|
95
|
+
const pairs = sortedKeys.map(key => {
|
|
96
|
+
return JSON.stringify(key) + ':' + deterministicStringify(obj[key]);
|
|
97
|
+
});
|
|
98
|
+
return '{' + pairs.join(',') + '}';
|
|
99
|
+
}
|
|
100
|
+
|
|
83
101
|
export function generateCacheKey(imagePath, prompt, context = {}) {
|
|
84
|
-
//
|
|
85
|
-
//
|
|
86
|
-
//
|
|
87
|
-
// - Different prompts with same first 1000 chars = same cache key = wrong cache hit
|
|
88
|
-
// - Different game states with same first 500 chars = same cache key = wrong cache hit
|
|
102
|
+
// Content-addressed: hash image bytes, not the file path.
|
|
103
|
+
// This ensures cache invalidation when a screenshot is regenerated
|
|
104
|
+
// to the same path (e.g. /tmp/vlm_magic.png).
|
|
89
105
|
//
|
|
90
|
-
//
|
|
91
|
-
//
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
106
|
+
// For multi-image keys, imagePath may be a pipe-delimited string
|
|
107
|
+
// like "path1|path2" from judge.mjs.
|
|
108
|
+
const pathStr = imagePath || '';
|
|
109
|
+
const paths = pathStr.includes('|') ? pathStr.split('|') : [pathStr];
|
|
110
|
+
const imageHashes = paths.map(p => {
|
|
111
|
+
try {
|
|
112
|
+
const bytes = readFileSync(p);
|
|
113
|
+
return createHash('sha256').update(bytes).digest('hex');
|
|
114
|
+
} catch (error) {
|
|
115
|
+
// File unreadable (deleted, permissions) -- fall back to path hash
|
|
116
|
+
// so the key is still deterministic for error cases.
|
|
117
|
+
warn(`Cannot read image for cache key, falling back to path hash: ${p}`);
|
|
118
|
+
return createHash('sha256').update(p).digest('hex');
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
const imageDigest = imageHashes.length === 1
|
|
122
|
+
? imageHashes[0]
|
|
123
|
+
: createHash('sha256').update(imageHashes.join(':')).digest('hex');
|
|
124
|
+
|
|
125
|
+
// Build key data with deterministic structure
|
|
95
126
|
const keyData = {
|
|
96
|
-
|
|
127
|
+
type: 'vision', // Distinguish from text-only calls
|
|
128
|
+
imageDigest, // SHA-256 of image bytes (content-addressed)
|
|
97
129
|
prompt, // Full prompt, not truncated
|
|
98
130
|
testType: context.testType || '',
|
|
99
131
|
frame: context.frame || '',
|
|
100
132
|
score: context.score || '',
|
|
101
|
-
|
|
102
|
-
|
|
133
|
+
// Use deterministic stringify for nested objects to ensure consistent keys
|
|
134
|
+
viewport: context.viewport ? deterministicStringify(context.viewport) : '',
|
|
135
|
+
gameState: context.gameState ? deterministicStringify(context.gameState) : '' // Full game state, not truncated
|
|
103
136
|
};
|
|
104
137
|
|
|
105
|
-
|
|
138
|
+
// Use deterministic stringify to ensure consistent key generation
|
|
139
|
+
// even if object property order varies
|
|
140
|
+
const keyString = deterministicStringify(keyData);
|
|
141
|
+
return createHash('sha256').update(keyString).digest('hex');
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Generate cache key for text-only LLM calls
|
|
146
|
+
*
|
|
147
|
+
* @param {string} prompt - Text prompt
|
|
148
|
+
* @param {string} provider - LLM provider (e.g., 'gemini', 'openai', 'claude')
|
|
149
|
+
* @param {{
|
|
150
|
+
* model?: string | null;
|
|
151
|
+
* temperature?: number;
|
|
152
|
+
* maxTokens?: number;
|
|
153
|
+
* tier?: string;
|
|
154
|
+
* }} [options={}] - LLM call options
|
|
155
|
+
* @returns {string} SHA-256 hash of cache key
|
|
156
|
+
*/
|
|
157
|
+
export function generateTextLLMCacheKey(prompt, provider, options = {}) {
|
|
158
|
+
const {
|
|
159
|
+
model = null,
|
|
160
|
+
temperature = 0.1,
|
|
161
|
+
maxTokens = 1000,
|
|
162
|
+
tier = null
|
|
163
|
+
} = options;
|
|
164
|
+
|
|
165
|
+
const keyData = {
|
|
166
|
+
type: 'text', // Distinguish from vision calls
|
|
167
|
+
prompt, // Full prompt, not truncated
|
|
168
|
+
provider,
|
|
169
|
+
model,
|
|
170
|
+
temperature,
|
|
171
|
+
maxTokens,
|
|
172
|
+
tier
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
// Use deterministic stringify for consistent cache keys
|
|
176
|
+
const keyString = deterministicStringify(keyData);
|
|
106
177
|
return createHash('sha256').update(keyString).digest('hex');
|
|
107
178
|
}
|
|
108
179
|
|
|
@@ -208,6 +279,33 @@ async function saveCache(cache) {
|
|
|
208
279
|
|
|
209
280
|
// Apply size limits (LRU eviction: keep most recently accessed)
|
|
210
281
|
const entriesToKeep = entries.slice(-MAX_CACHE_SIZE);
|
|
282
|
+
const evictedCount = entries.length - entriesToKeep.length;
|
|
283
|
+
|
|
284
|
+
// Log cache eviction (weighted: evictions are important for cache health)
|
|
285
|
+
if (evictedCount > 0) {
|
|
286
|
+
// Use dynamic import with proper error handling to prevent unhandled promise rejections
|
|
287
|
+
import('./utils/performance-logger.mjs')
|
|
288
|
+
.then(({ logCacheOperation }) => {
|
|
289
|
+
logCacheOperation({
|
|
290
|
+
operation: 'evict',
|
|
291
|
+
cacheSize: entriesToKeep.length,
|
|
292
|
+
maxSize: MAX_CACHE_SIZE,
|
|
293
|
+
reason: `LRU eviction: ${evictedCount} entries removed`
|
|
294
|
+
});
|
|
295
|
+
})
|
|
296
|
+
.catch(async (importError) => {
|
|
297
|
+
// Log to logger if performance logger unavailable (better than silent failure)
|
|
298
|
+
if (process.env.DEBUG_CACHE) {
|
|
299
|
+
try {
|
|
300
|
+
const { warn } = await import('./logger.mjs');
|
|
301
|
+
warn(`[Cache] Performance logger unavailable: ${importError.message}`);
|
|
302
|
+
} catch {
|
|
303
|
+
// Fallback to console if logger also unavailable
|
|
304
|
+
console.warn(`[Cache] Performance logger unavailable: ${importError.message}`);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
});
|
|
308
|
+
}
|
|
211
309
|
|
|
212
310
|
for (const { key, value, timestamp } of entriesToKeep) {
|
|
213
311
|
const entry = {
|
|
@@ -346,6 +444,32 @@ export function getCached(imagePath, prompt, context = {}) {
|
|
|
346
444
|
const age = Date.now() - originalTimestamp;
|
|
347
445
|
if (age > MAX_CACHE_AGE) {
|
|
348
446
|
cache.delete(key); // Remove expired entry
|
|
447
|
+
|
|
448
|
+
// Log cache expiration (weighted: expirations are important for cache health)
|
|
449
|
+
// Use dynamic import with proper error handling to prevent unhandled promise rejections
|
|
450
|
+
import('./utils/performance-logger.mjs')
|
|
451
|
+
.then(({ logCacheOperation }) => {
|
|
452
|
+
const currentCache = getCache();
|
|
453
|
+
logCacheOperation({
|
|
454
|
+
operation: 'expire',
|
|
455
|
+
cacheSize: currentCache.size,
|
|
456
|
+
maxSize: MAX_CACHE_SIZE,
|
|
457
|
+
reason: `Entry expired (age: ${Math.floor(age / (1000 * 60 * 60 * 24))} days)`
|
|
458
|
+
});
|
|
459
|
+
})
|
|
460
|
+
.catch(async (importError) => {
|
|
461
|
+
// Log to logger if performance logger unavailable (better than silent failure)
|
|
462
|
+
if (process.env.DEBUG_CACHE) {
|
|
463
|
+
try {
|
|
464
|
+
const { warn } = await import('./logger.mjs');
|
|
465
|
+
warn(`[Cache] Performance logger unavailable: ${importError.message}`);
|
|
466
|
+
} catch {
|
|
467
|
+
// Fallback to console if logger also unavailable
|
|
468
|
+
console.warn(`[Cache] Performance logger unavailable: ${importError.message}`);
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
});
|
|
472
|
+
|
|
349
473
|
return null;
|
|
350
474
|
}
|
|
351
475
|
}
|
|
@@ -404,6 +528,107 @@ export function clearCache() {
|
|
|
404
528
|
});
|
|
405
529
|
}
|
|
406
530
|
|
|
531
|
+
/**
|
|
532
|
+
* Get cached text-only LLM response
|
|
533
|
+
*
|
|
534
|
+
* @param {string} prompt - Text prompt
|
|
535
|
+
* @param {string} provider - LLM provider
|
|
536
|
+
* @param {{
|
|
537
|
+
* model?: string | null;
|
|
538
|
+
* temperature?: number;
|
|
539
|
+
* maxTokens?: number;
|
|
540
|
+
* tier?: string;
|
|
541
|
+
* }} [options={}] - LLM call options
|
|
542
|
+
* @returns {string | null} Cached response or null if not found
|
|
543
|
+
*/
|
|
544
|
+
export function getCachedTextLLM(prompt, provider, options = {}) {
|
|
545
|
+
const cache = getCache();
|
|
546
|
+
const key = generateTextLLMCacheKey(prompt, provider, options);
|
|
547
|
+
const cached = cache.get(key);
|
|
548
|
+
|
|
549
|
+
if (cached) {
|
|
550
|
+
// Update access time for LRU eviction
|
|
551
|
+
cached._lastAccessed = Date.now();
|
|
552
|
+
|
|
553
|
+
// Check expiration based on original timestamp
|
|
554
|
+
const originalTimestamp = cached._originalTimestamp || cached._lastAccessed;
|
|
555
|
+
const age = Date.now() - originalTimestamp;
|
|
556
|
+
if (age > MAX_CACHE_AGE) {
|
|
557
|
+
cache.delete(key); // Remove expired entry
|
|
558
|
+
|
|
559
|
+
// Log cache expiration
|
|
560
|
+
// Use dynamic import with proper error handling to prevent unhandled promise rejections
|
|
561
|
+
import('./utils/performance-logger.mjs')
|
|
562
|
+
.then(({ logCacheOperation }) => {
|
|
563
|
+
const currentCache = getCache();
|
|
564
|
+
logCacheOperation({
|
|
565
|
+
operation: 'expire',
|
|
566
|
+
cacheSize: currentCache.size,
|
|
567
|
+
maxSize: MAX_CACHE_SIZE,
|
|
568
|
+
reason: `Text LLM entry expired (age: ${Math.floor(age / (1000 * 60 * 60 * 24))} days)`
|
|
569
|
+
});
|
|
570
|
+
})
|
|
571
|
+
.catch(async (importError) => {
|
|
572
|
+
// Log to logger if performance logger unavailable (better than silent failure)
|
|
573
|
+
if (process.env.DEBUG_CACHE) {
|
|
574
|
+
try {
|
|
575
|
+
const { warn } = await import('./logger.mjs');
|
|
576
|
+
warn(`[Cache] Performance logger unavailable: ${importError.message}`);
|
|
577
|
+
} catch {
|
|
578
|
+
// Fallback to console if logger also unavailable
|
|
579
|
+
console.warn(`[Cache] Performance logger unavailable: ${importError.message}`);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
});
|
|
583
|
+
|
|
584
|
+
return null;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
// Return the cached response (stored as 'response' field for text-only calls)
|
|
588
|
+
return cached.response || null;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
return null;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
/**
|
|
595
|
+
* Set cached text-only LLM response
|
|
596
|
+
*
|
|
597
|
+
* @param {string} prompt - Text prompt
|
|
598
|
+
* @param {string} provider - LLM provider
|
|
599
|
+
* @param {{
|
|
600
|
+
* model?: string | null;
|
|
601
|
+
* temperature?: number;
|
|
602
|
+
* maxTokens?: number;
|
|
603
|
+
* tier?: string;
|
|
604
|
+
* }} [options={}] - LLM call options
|
|
605
|
+
* @param {string} response - LLM response to cache
|
|
606
|
+
* @returns {void}
|
|
607
|
+
*/
|
|
608
|
+
export function setCachedTextLLM(prompt, provider, options, response) {
|
|
609
|
+
const cache = getCache();
|
|
610
|
+
const key = generateTextLLMCacheKey(prompt, provider, options);
|
|
611
|
+
const now = Date.now();
|
|
612
|
+
|
|
613
|
+
// Check if this is a new entry or updating existing
|
|
614
|
+
const existing = cache.get(key);
|
|
615
|
+
const originalTimestamp = existing?._originalTimestamp || now;
|
|
616
|
+
|
|
617
|
+
// Store response with metadata for cache management
|
|
618
|
+
const resultWithMetadata = {
|
|
619
|
+
response, // Store the text response
|
|
620
|
+
_lastAccessed: now,
|
|
621
|
+
_originalTimestamp: originalTimestamp
|
|
622
|
+
};
|
|
623
|
+
|
|
624
|
+
cache.set(key, resultWithMetadata);
|
|
625
|
+
|
|
626
|
+
// Save cache (async, fire-and-forget)
|
|
627
|
+
saveCache(cache).catch(error => {
|
|
628
|
+
warn(`[VLLM Cache] Failed to save text LLM cache (non-blocking): ${error.message}`);
|
|
629
|
+
});
|
|
630
|
+
}
|
|
631
|
+
|
|
407
632
|
/**
|
|
408
633
|
* Get cache statistics
|
|
409
634
|
*
|
package/src/config.mjs
CHANGED
|
@@ -8,97 +8,11 @@
|
|
|
8
8
|
import { ConfigError } from './errors.mjs';
|
|
9
9
|
import { loadEnv } from './load-env.mjs';
|
|
10
10
|
import { API_CONSTANTS } from './constants.mjs';
|
|
11
|
+
import { MODEL_TIERS, PROVIDER_CONFIGS } from './provider-data.mjs';
|
|
11
12
|
|
|
12
|
-
// Load .env file
|
|
13
|
+
// Load .env file on module load
|
|
13
14
|
loadEnv();
|
|
14
15
|
|
|
15
|
-
/**
|
|
16
|
-
* Model tiers for each provider
|
|
17
|
-
* Updated January 2025: Latest models - Gemini 2.5 Pro, GPT-5, Claude 4.5 Sonnet
|
|
18
|
-
*
|
|
19
|
-
* GROQ INTEGRATION (2025):
|
|
20
|
-
* - Groq added for high-frequency decisions (10-60Hz temporal decisions)
|
|
21
|
-
* - ~0.22s latency (vs 1-3s for other providers)
|
|
22
|
-
* - 185-276 tokens/sec throughput
|
|
23
|
-
* - OpenAI-compatible API
|
|
24
|
-
* - Cost-competitive, free tier available
|
|
25
|
-
* - Best for: Fast tier decisions, high-Hz temporal decisions, real-time applications
|
|
26
|
-
*/
|
|
27
|
-
const MODEL_TIERS = {
|
|
28
|
-
gemini: {
|
|
29
|
-
fast: 'gemini-2.0-flash-exp', // Fast, outperforms 1.5 Pro (2x speed)
|
|
30
|
-
balanced: 'gemini-2.5-pro', // Best balance (2025 leader, released June 2025)
|
|
31
|
-
best: 'gemini-2.5-pro' // Best quality (top vision-language model, 1M+ context)
|
|
32
|
-
},
|
|
33
|
-
openai: {
|
|
34
|
-
fast: 'gpt-4o-mini', // Fast, cheaper
|
|
35
|
-
balanced: 'gpt-5', // Best balance (released August 2025, unified reasoning)
|
|
36
|
-
best: 'gpt-5' // Best quality (state-of-the-art multimodal, August 2025)
|
|
37
|
-
},
|
|
38
|
-
claude: {
|
|
39
|
-
fast: 'claude-3-5-haiku-20241022', // Fast, cheaper
|
|
40
|
-
balanced: 'claude-sonnet-4-5', // Best balance (released September 2025, enhanced vision)
|
|
41
|
-
best: 'claude-sonnet-4-5' // Best quality (latest flagship, September 2025)
|
|
42
|
-
},
|
|
43
|
-
groq: {
|
|
44
|
-
// NOTE: Groq vision support requires different model
|
|
45
|
-
// For vision: meta-llama/llama-4-scout-17b-16e-instruct (preview, supports vision)
|
|
46
|
-
// For text-only: llama-3.3-70b-versatile is fastest (~0.22s latency)
|
|
47
|
-
fast: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable, fastest Groq option
|
|
48
|
-
balanced: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable, balanced
|
|
49
|
-
best: 'meta-llama/llama-4-scout-17b-16e-instruct' // Vision-capable, best quality (preview)
|
|
50
|
-
// WARNING: Groq vision models are preview-only. Text-only: use llama-3.3-70b-versatile
|
|
51
|
-
}
|
|
52
|
-
};
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* Default provider configurations
|
|
56
|
-
*
|
|
57
|
-
* GROQ INTEGRATION:
|
|
58
|
-
* - OpenAI-compatible API (easy migration)
|
|
59
|
-
* - ~0.22s latency (10x faster than typical providers)
|
|
60
|
-
* - Best for high-frequency decisions (10-60Hz temporal decisions)
|
|
61
|
-
* - Free tier available for testing
|
|
62
|
-
*/
|
|
63
|
-
const PROVIDER_CONFIGS = {
|
|
64
|
-
gemini: {
|
|
65
|
-
name: 'gemini',
|
|
66
|
-
apiUrl: 'https://generativelanguage.googleapis.com/v1beta',
|
|
67
|
-
model: 'gemini-2.5-pro', // Latest: Released June 2025, top vision-language model, 1M+ context
|
|
68
|
-
freeTier: true,
|
|
69
|
-
pricing: { input: 1.25, output: 5.00 }, // Updated pricing for 2.5 Pro
|
|
70
|
-
priority: 1 // Higher priority = preferred
|
|
71
|
-
},
|
|
72
|
-
openai: {
|
|
73
|
-
name: 'openai',
|
|
74
|
-
apiUrl: 'https://api.openai.com/v1',
|
|
75
|
-
model: 'gpt-5', // Latest: Released August 2025, state-of-the-art multimodal
|
|
76
|
-
freeTier: false,
|
|
77
|
-
pricing: { input: 5.00, output: 15.00 }, // Updated pricing for gpt-5
|
|
78
|
-
priority: 2
|
|
79
|
-
},
|
|
80
|
-
claude: {
|
|
81
|
-
name: 'claude',
|
|
82
|
-
apiUrl: 'https://api.anthropic.com/v1',
|
|
83
|
-
model: 'claude-sonnet-4-5', // Latest: Released September 2025, enhanced vision capabilities
|
|
84
|
-
freeTier: false,
|
|
85
|
-
pricing: { input: 3.00, output: 15.00 }, // Updated pricing for 4.5
|
|
86
|
-
priority: 3
|
|
87
|
-
},
|
|
88
|
-
groq: {
|
|
89
|
-
name: 'groq',
|
|
90
|
-
apiUrl: 'https://api.groq.com/openai/v1', // OpenAI-compatible endpoint
|
|
91
|
-
model: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable (preview), ~0.22s latency
|
|
92
|
-
freeTier: true, // Free tier available
|
|
93
|
-
pricing: { input: 0.59, output: 0.79 }, // Actual 2025 pricing: $0.59/$0.79 per 1M tokens (real-time API)
|
|
94
|
-
priority: 0, // Highest priority for high-frequency decisions
|
|
95
|
-
latency: 220, // ~0.22s latency in ms (10x faster than typical)
|
|
96
|
-
throughput: 200, // ~200 tokens/sec average
|
|
97
|
-
visionSupported: true // llama-4-scout-17b-16e-instruct supports vision (preview)
|
|
98
|
-
// Text-only alternative: llama-3.3-70b-versatile (faster, no vision)
|
|
99
|
-
}
|
|
100
|
-
};
|
|
101
|
-
|
|
102
16
|
/**
|
|
103
17
|
* Create configuration from environment or options
|
|
104
18
|
*
|
|
@@ -111,12 +25,13 @@ export function createConfig(options = {}) {
|
|
|
111
25
|
apiKey = null,
|
|
112
26
|
env = process.env,
|
|
113
27
|
cacheDir = null,
|
|
114
|
-
cacheEnabled = true,
|
|
28
|
+
cacheEnabled = process.env.DISABLE_LLM_CACHE !== 'true',
|
|
115
29
|
maxConcurrency = API_CONSTANTS.DEFAULT_MAX_CONCURRENCY,
|
|
116
30
|
timeout = API_CONSTANTS.DEFAULT_TIMEOUT_MS,
|
|
117
31
|
verbose = false,
|
|
118
32
|
modelTier = null, // 'fast', 'balanced', 'best', or null for default
|
|
119
|
-
model = null
|
|
33
|
+
model = null, // Explicit model override
|
|
34
|
+
anchors = null // Domain visual anchors: { domain?, positive?: string[], negative?: string[] }
|
|
120
35
|
} = options;
|
|
121
36
|
|
|
122
37
|
// Auto-detect provider if not specified
|
|
@@ -154,11 +69,39 @@ export function createConfig(options = {}) {
|
|
|
154
69
|
providerConfig.model = env.VLM_MODEL;
|
|
155
70
|
}
|
|
156
71
|
|
|
72
|
+
// Normalize anchors: ensure arrays, filter empty/invalid entries.
|
|
73
|
+
// Each entry can be a plain string or { text?, image?, label?, dimension? }.
|
|
74
|
+
let normalizedAnchors = null;
|
|
75
|
+
if (anchors && typeof anchors === 'object') {
|
|
76
|
+
const normalizeEntries = (arr) => {
|
|
77
|
+
if (!Array.isArray(arr)) return [];
|
|
78
|
+
return arr.filter(entry => {
|
|
79
|
+
if (typeof entry === 'string') return entry.trim().length > 0;
|
|
80
|
+
if (entry && typeof entry === 'object') {
|
|
81
|
+
return (entry.text && typeof entry.text === 'string' && entry.text.trim()) ||
|
|
82
|
+
(entry.image && typeof entry.image === 'string' && entry.image.trim());
|
|
83
|
+
}
|
|
84
|
+
return false;
|
|
85
|
+
});
|
|
86
|
+
};
|
|
87
|
+
const pos = normalizeEntries(anchors.positive);
|
|
88
|
+
const neg = normalizeEntries(anchors.negative);
|
|
89
|
+
const hasDomain = anchors.domain && typeof anchors.domain === 'string' && anchors.domain.trim();
|
|
90
|
+
|
|
91
|
+
if (pos.length > 0 || neg.length > 0 || hasDomain) {
|
|
92
|
+
normalizedAnchors = {};
|
|
93
|
+
if (hasDomain) normalizedAnchors.domain = anchors.domain.trim();
|
|
94
|
+
if (pos.length > 0) normalizedAnchors.positive = pos;
|
|
95
|
+
if (neg.length > 0) normalizedAnchors.negative = neg;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
157
99
|
return {
|
|
158
100
|
provider: selectedProvider,
|
|
159
101
|
apiKey: selectedApiKey,
|
|
160
102
|
providerConfig,
|
|
161
103
|
enabled: !!selectedApiKey,
|
|
104
|
+
anchors: normalizedAnchors,
|
|
162
105
|
cache: {
|
|
163
106
|
enabled: cacheEnabled,
|
|
164
107
|
dir: cacheDir
|
|
@@ -265,4 +208,3 @@ export function getProvider(providerName = null) {
|
|
|
265
208
|
const provider = providerName || config.provider;
|
|
266
209
|
return PROVIDER_CONFIGS[provider] || PROVIDER_CONFIGS.gemini;
|
|
267
210
|
}
|
|
268
|
-
|
package/src/constants.mjs
CHANGED
|
@@ -78,3 +78,57 @@ export const UNCERTAINTY_CONSTANTS = {
|
|
|
78
78
|
EDGE_CASE_SELF_CONSISTENCY_N: 3
|
|
79
79
|
};
|
|
80
80
|
|
|
81
|
+
/**
|
|
82
|
+
* API Endpoint Configuration (for serverless functions)
|
|
83
|
+
*/
|
|
84
|
+
export const API_ENDPOINT_CONSTANTS = {
|
|
85
|
+
/** Maximum image size in bytes (10MB) */
|
|
86
|
+
MAX_IMAGE_SIZE: 10 * 1024 * 1024,
|
|
87
|
+
|
|
88
|
+
/** Maximum prompt length in characters */
|
|
89
|
+
MAX_PROMPT_LENGTH: 5000,
|
|
90
|
+
|
|
91
|
+
/** Maximum context size in bytes */
|
|
92
|
+
MAX_CONTEXT_SIZE: 10000,
|
|
93
|
+
|
|
94
|
+
/** Default rate limit window in milliseconds (1 minute) */
|
|
95
|
+
RATE_LIMIT_WINDOW_MS: 60 * 1000,
|
|
96
|
+
|
|
97
|
+
/** Default maximum requests per window */
|
|
98
|
+
RATE_LIMIT_MAX_REQUESTS: 10
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Retry Configuration
|
|
103
|
+
*/
|
|
104
|
+
export const RETRY_CONSTANTS = {
|
|
105
|
+
/** Default base delay for exponential backoff in milliseconds (1 second) */
|
|
106
|
+
DEFAULT_BASE_DELAY_MS: 1000,
|
|
107
|
+
|
|
108
|
+
/** Default maximum delay for exponential backoff in milliseconds (30 seconds) */
|
|
109
|
+
DEFAULT_MAX_DELAY_MS: 30000,
|
|
110
|
+
|
|
111
|
+
/** Default maximum number of retries */
|
|
112
|
+
DEFAULT_MAX_RETRIES: 3,
|
|
113
|
+
|
|
114
|
+
/** Jitter amount as percentage of delay (±25%) */
|
|
115
|
+
JITTER_PERCENTAGE: 0.25
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Validation Configuration
|
|
120
|
+
*/
|
|
121
|
+
export const VALIDATION_CONSTANTS = {
|
|
122
|
+
/** Maximum prompt length for validation (10k characters) */
|
|
123
|
+
MAX_PROMPT_LENGTH: 10000,
|
|
124
|
+
|
|
125
|
+
/** Maximum context size in bytes */
|
|
126
|
+
MAX_CONTEXT_SIZE: 50000,
|
|
127
|
+
|
|
128
|
+
/** Minimum timeout in milliseconds */
|
|
129
|
+
MIN_TIMEOUT_MS: 1000,
|
|
130
|
+
|
|
131
|
+
/** Maximum timeout in milliseconds (5 minutes) */
|
|
132
|
+
MAX_TIMEOUT_MS: 300000
|
|
133
|
+
};
|
|
134
|
+
|