@arclabs561/ai-visual-test 0.5.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +102 -11
  2. package/DEPLOYMENT.md +225 -9
  3. package/README.md +71 -80
  4. package/index.d.ts +862 -3
  5. package/package.json +10 -51
  6. package/src/batch-optimizer.mjs +39 -0
  7. package/src/cache.mjs +241 -16
  8. package/src/config.mjs +33 -91
  9. package/src/constants.mjs +54 -0
  10. package/src/convenience.mjs +113 -10
  11. package/src/cost-optimization.mjs +1 -0
  12. package/src/cost-tracker.mjs +134 -2
  13. package/src/data-extractor.mjs +36 -7
  14. package/src/dynamic-few-shot.mjs +69 -11
  15. package/src/errors.mjs +6 -2
  16. package/src/experience-propagation.mjs +12 -0
  17. package/src/experience-tracer.mjs +12 -3
  18. package/src/game-player.mjs +222 -43
  19. package/src/graceful-shutdown.mjs +126 -0
  20. package/src/helpers/playwright.mjs +22 -8
  21. package/src/human-validation-manager.mjs +99 -2
  22. package/src/index.mjs +48 -3
  23. package/src/integrations/playwright.mjs +140 -0
  24. package/src/judge.mjs +697 -24
  25. package/src/load-env.mjs +2 -1
  26. package/src/logger.mjs +31 -3
  27. package/src/model-tier-selector.mjs +1 -221
  28. package/src/natural-language-specs.mjs +31 -3
  29. package/src/persona-enhanced.mjs +4 -2
  30. package/src/persona-experience.mjs +1 -1
  31. package/src/pricing.mjs +28 -0
  32. package/src/prompt-composer.mjs +162 -5
  33. package/src/provider-data.mjs +115 -0
  34. package/src/render-change-detector.mjs +5 -0
  35. package/src/research-enhanced-validation.mjs +7 -5
  36. package/src/retry.mjs +21 -7
  37. package/src/rubrics.mjs +4 -0
  38. package/src/safe-logger.mjs +71 -0
  39. package/src/session-cost-tracker.mjs +320 -0
  40. package/src/smart-validator.mjs +8 -8
  41. package/src/spec-templates.mjs +52 -6
  42. package/src/startup-validation.mjs +127 -0
  43. package/src/temporal-adaptive.mjs +2 -2
  44. package/src/temporal-decision-manager.mjs +1 -271
  45. package/src/temporal-logic.mjs +104 -0
  46. package/src/temporal-note-pruner.mjs +119 -0
  47. package/src/temporal-preprocessor.mjs +1 -543
  48. package/src/temporal.mjs +681 -79
  49. package/src/utils/action-hallucination-detector.mjs +301 -0
  50. package/src/utils/baseline-validator.mjs +82 -0
  51. package/src/utils/cache-stats.mjs +104 -0
  52. package/src/utils/cached-llm.mjs +164 -0
  53. package/src/utils/capability-stratifier.mjs +108 -0
  54. package/src/utils/counterfactual-tester.mjs +83 -0
  55. package/src/utils/error-recovery.mjs +117 -0
  56. package/src/utils/explainability-scorer.mjs +119 -0
  57. package/src/utils/exploratory-automation.mjs +131 -0
  58. package/src/utils/index.mjs +10 -0
  59. package/src/utils/intent-recognizer.mjs +201 -0
  60. package/src/utils/log-sanitizer.mjs +165 -0
  61. package/src/utils/path-validator.mjs +88 -0
  62. package/src/utils/performance-logger.mjs +316 -0
  63. package/src/utils/performance-measurement.mjs +280 -0
  64. package/src/utils/prompt-sanitizer.mjs +213 -0
  65. package/src/utils/rate-limiter.mjs +144 -0
  66. package/src/validation-framework.mjs +24 -20
  67. package/src/validation-result-normalizer.mjs +27 -1
  68. package/src/validation.mjs +75 -25
  69. package/src/validators/accessibility-validator.mjs +144 -0
  70. package/src/validators/hybrid-validator.mjs +48 -4
  71. package/api/health.js +0 -34
  72. package/api/validate.js +0 -252
  73. package/public/index.html +0 -149
  74. package/vercel.json +0 -27
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@arclabs561/ai-visual-test",
3
- "version": "0.5.1",
4
- "description": "AI-powered visual testing framework for web applications using Vision Language Models",
3
+ "version": "0.7.3",
4
+ "description": "Visual testing framework for web applications using Vision Language Models",
5
5
  "type": "module",
6
6
  "main": "src/index.mjs",
7
7
  "exports": {
@@ -16,11 +16,8 @@
16
16
  "./package.json": "./package.json"
17
17
  },
18
18
  "files": [
19
- "src/**/*.mjs",
19
+ "src/**/*",
20
20
  "index.d.ts",
21
- "api/**/*.js",
22
- "public/**/*.html",
23
- "vercel.json",
24
21
  "README.md",
25
22
  "CHANGELOG.md",
26
23
  "CONTRIBUTING.md",
@@ -29,42 +26,6 @@
29
26
  "LICENSE",
30
27
  ".secretsignore.example"
31
28
  ],
32
- "scripts": {
33
- "test": "node --test test/*.test.mjs",
34
- "test:validation": "node --test test/validation-*.test.mjs",
35
- "test:unit": "node --test test/unit/*.test.mjs",
36
- "test:integration": "node --test test/integration/*.test.mjs",
37
- "test:e2e": "node --test test/e2e/*.test.mjs",
38
- "test:datasets": "node --test test/dataset-*.test.mjs",
39
- "playwright:check": "node scripts/ensure-playwright.mjs",
40
- "playwright:install": "node scripts/ensure-playwright.mjs --install",
41
- "playwright:setup": "node scripts/ensure-playwright.mjs --install --install-browsers",
42
- "annotate": "node evaluation/utils/invoke-human-annotation.mjs",
43
- "annotate:quick": "node evaluation/utils/quick-start-annotation.mjs",
44
- "annotate:full": "node evaluation/utils/start-human-annotation.mjs",
45
- "validate:annotations": "node evaluation/utils/validate-annotation-quality.mjs",
46
- "validate:dataset": "node evaluation/utils/validate-dataset-quality.mjs",
47
- "match:vllm": "node evaluation/utils/match-annotations-with-vllm.mjs",
48
- "datasets:download": "node evaluation/utils/download-all-datasets.mjs",
49
- "datasets:parse": "node evaluation/utils/parse-all-datasets.mjs",
50
- "datasets:setup": "npm run datasets:download && npm run datasets:parse",
51
- "docs": "node scripts/generate-docs.mjs",
52
- "lint": "echo 'No linter configured'",
53
- "prepublishOnly": "npm test",
54
- "check:secrets": "node scripts/detect-secrets.mjs",
55
- "check:quality": "node node_modules/@arclabs561/hookwise/src/hooks/code-quality.mjs || echo 'Hookwise not available'",
56
- "check:docs": "node node_modules/@arclabs561/hookwise/src/hooks/doc-bloat.mjs || echo 'Hookwise not available'",
57
- "check:security": "npm run check:secrets",
58
- "check:test-performance": "node node_modules/@arclabs561/hookwise/src/hooks/test-performance.mjs || node scripts/analyze-test-performance.mjs",
59
- "check:commit": "node node_modules/@arclabs561/hookwise/src/hooks/commit-msg.mjs .git/COMMIT_EDITMSG || echo 'Hookwise not available'",
60
- "check:all": "npm run check:secrets && npx hookwise garden && npm run check:test-performance",
61
- "garden": "npx hookwise garden",
62
- "garden:enhanced": "node scripts/enhanced-garden.mjs",
63
- "deprecate:old": "node scripts/deprecate-old-package.mjs",
64
- "garden:watch": "node scripts/watch-garden.mjs",
65
- "test:performance": "npm run check:test-performance",
66
- "test:slow": "npm test 2>&1 | grep -E '✔.*\\([0-9]+\\.[0-9]+ms\\)' | sort -t'(' -k2 -nr | head -20"
67
- },
68
29
  "keywords": [
69
30
  "visual-testing",
70
31
  "ai",
@@ -78,7 +39,11 @@
78
39
  "author": "arclabs561 <henry@henrywallace.io>",
79
40
  "license": "MIT",
80
41
  "dependencies": {
81
- "dotenv": "^16.4.5"
42
+ "@anthropic-ai/sdk": "0.70.0",
43
+ "@google/generative-ai": "0.24.1",
44
+ "async-mutex": "0.5.0",
45
+ "dotenv": "^16.4.5",
46
+ "openai": "6.9.1"
82
47
  },
83
48
  "peerDependencies": {
84
49
  "@arclabs561/llm-utils": "*",
@@ -92,21 +57,15 @@
92
57
  "optional": true
93
58
  }
94
59
  },
95
- "devDependencies": {
96
- "@types/node": "^22.10.1",
97
- "async-mutex": "0.5.0",
98
- "fast-check": "4.3.0",
99
- "proper-lockfile": "4.1.2"
100
- },
101
60
  "engines": {
102
61
  "node": ">=18.0.0"
103
62
  },
104
63
  "repository": {
105
64
  "type": "git",
106
- "url": "git+https://github.com/arclabs561/ai-visual-test.git"
65
+ "url": "https://github.com/arclabs561/ai-visual-test.git"
107
66
  },
108
67
  "homepage": "https://github.com/arclabs561/ai-visual-test#readme",
109
68
  "bugs": {
110
69
  "url": "https://github.com/arclabs561/ai-visual-test/issues"
111
70
  }
112
- }
71
+ }
@@ -218,6 +218,25 @@ export class BatchOptimizer {
218
218
  } catch (metricsError) {
219
219
  warn(`[BatchOptimizer] Error updating rejection metrics: ${metricsError.message}`);
220
220
  }
221
+
222
+ // Log batch optimizer rejection (weighted: rejections are critical)
223
+ // Use dynamic import with proper error handling to prevent unhandled promise rejections
224
+ import('./utils/performance-logger.mjs')
225
+ .then(({ logBatchOptimizer }) => {
226
+ logBatchOptimizer({
227
+ event: 'reject',
228
+ queueDepth: this.queue.length,
229
+ maxQueueSize: this.maxQueueSize,
230
+ activeRequests: this.activeRequests,
231
+ maxConcurrency: this.maxConcurrency,
232
+ reason: 'Queue full - preventing memory leak'
233
+ });
234
+ })
235
+ .catch((importError) => {
236
+ // Log to console if performance logger unavailable (better than silent failure)
237
+ warn(`[BatchOptimizer] Performance logger unavailable: ${importError.message}`);
238
+ });
239
+
221
240
  warn(`[BatchOptimizer] Queue is full (${this.queue.length}/${this.maxQueueSize}). Rejecting request to prevent memory leak. Total rejections: ${this.metrics.queueRejections}`);
222
241
  throw new TimeoutError(
223
242
  `Queue is full (${this.queue.length}/${this.maxQueueSize}). Too many concurrent requests.`,
@@ -236,6 +255,26 @@ export class BatchOptimizer {
236
255
 
237
256
  const timeoutId = setTimeout(() => {
238
257
  timeoutFired = true;
258
+
259
+ // Log batch optimizer timeout (weighted: timeouts are critical)
260
+ // Use dynamic import with proper error handling to prevent unhandled promise rejections
261
+ import('./utils/performance-logger.mjs')
262
+ .then(({ logBatchOptimizer }) => {
263
+ logBatchOptimizer({
264
+ event: 'timeout',
265
+ queueDepth: this.queue.length,
266
+ maxQueueSize: this.maxQueueSize,
267
+ activeRequests: this.activeRequests,
268
+ maxConcurrency: this.maxConcurrency,
269
+ waitTime: Date.now() - queueStartTime,
270
+ reason: 'Request timeout - queue wait exceeded limit'
271
+ });
272
+ })
273
+ .catch((importError) => {
274
+ // Log to console if performance logger unavailable (better than silent failure)
275
+ warn(`[BatchOptimizer] Performance logger unavailable: ${importError.message}`);
276
+ });
277
+
239
278
  // Remove from queue if still waiting
240
279
  // CRITICAL FIX: Use stored queueEntry reference instead of searching by resolve function
241
280
  // The resolve function is wrapped, so direct comparison might not work
package/src/cache.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * VLLM Cache
3
3
  *
4
- * Provides persistent caching for VLLM API calls to reduce costs and improve performance.
4
+ * Provides persistent caching for VLLM API calls (vision) and text-only LLM calls to reduce costs and improve performance.
5
5
  * Uses file-based storage for cache persistence across test runs.
6
6
  *
7
7
  * BUGS FIXED (2025-01):
@@ -15,6 +15,7 @@
15
15
  * - Why separate: Different persistence strategy (file vs memory), different lifetime (7 days vs process lifetime),
16
16
  * different failure domain (disk errors don't affect in-memory batching), minimal data overlap (<5%)
17
17
  * - No coordination with BatchOptimizer cache or TemporalPreprocessing cache (by design - they serve different purposes)
18
+ * - Supports both vision LLM calls (with images) and text-only LLM calls (no images)
18
19
  */
19
20
 
20
21
  import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync, unlinkSync } from 'fs';
@@ -73,36 +74,106 @@ export function initCache(cacheDir) {
73
74
  }
74
75
 
75
76
  /**
76
- * Generate cache key from image path, prompt, and context
77
+ * Generate cache key from image path, prompt, and context (for vision LLM calls)
77
78
  *
78
79
  * @param {string} imagePath - Path to image file
79
80
  * @param {string} prompt - Validation prompt
80
81
  * @param {import('./index.mjs').ValidationContext} [context={}] - Validation context
81
82
  * @returns {string} SHA-256 hash of cache key
82
83
  */
84
+ /**
85
+ * Normalize and sort object keys for deterministic JSON serialization
86
+ */
87
+ function deterministicStringify(obj) {
88
+ if (obj === null || typeof obj !== 'object') {
89
+ return JSON.stringify(obj);
90
+ }
91
+ if (Array.isArray(obj)) {
92
+ return '[' + obj.map(deterministicStringify).join(',') + ']';
93
+ }
94
+ const sortedKeys = Object.keys(obj).sort();
95
+ const pairs = sortedKeys.map(key => {
96
+ return JSON.stringify(key) + ':' + deterministicStringify(obj[key]);
97
+ });
98
+ return '{' + pairs.join(',') + '}';
99
+ }
100
+
83
101
  export function generateCacheKey(imagePath, prompt, context = {}) {
84
- // NOTE: Don't truncate cache keys - it causes collisions!
85
- //
86
- // The bug: Truncating prompt (1000 chars) and gameState (500 chars) means:
87
- // - Different prompts with same first 1000 chars = same cache key = wrong cache hit
88
- // - Different game states with same first 500 chars = same cache key = wrong cache hit
102
+ // Content-addressed: hash image bytes, not the file path.
103
+ // This ensures cache invalidation when a screenshot is regenerated
104
+ // to the same path (e.g. /tmp/vlm_magic.png).
89
105
  //
90
- // The fix: Hash the FULL content, don't truncate
91
- // SHA-256 handles arbitrary length, so there's no reason to truncate
92
- //
93
- // Why truncation existed: Probably to keep keys "manageable", but it's dangerous
94
- // Better approach: Hash full content, collisions are cryptographically unlikely
106
+ // For multi-image keys, imagePath may be a pipe-delimited string
107
+ // like "path1|path2" from judge.mjs.
108
+ const pathStr = imagePath || '';
109
+ const paths = pathStr.includes('|') ? pathStr.split('|') : [pathStr];
110
+ const imageHashes = paths.map(p => {
111
+ try {
112
+ const bytes = readFileSync(p);
113
+ return createHash('sha256').update(bytes).digest('hex');
114
+ } catch (error) {
115
+ // File unreadable (deleted, permissions) -- fall back to path hash
116
+ // so the key is still deterministic for error cases.
117
+ warn(`Cannot read image for cache key, falling back to path hash: ${p}`);
118
+ return createHash('sha256').update(p).digest('hex');
119
+ }
120
+ });
121
+ const imageDigest = imageHashes.length === 1
122
+ ? imageHashes[0]
123
+ : createHash('sha256').update(imageHashes.join(':')).digest('hex');
124
+
125
+ // Build key data with deterministic structure
95
126
  const keyData = {
96
- imagePath,
127
+ type: 'vision', // Distinguish from text-only calls
128
+ imageDigest, // SHA-256 of image bytes (content-addressed)
97
129
  prompt, // Full prompt, not truncated
98
130
  testType: context.testType || '',
99
131
  frame: context.frame || '',
100
132
  score: context.score || '',
101
- viewport: context.viewport ? JSON.stringify(context.viewport) : '',
102
- gameState: context.gameState ? JSON.stringify(context.gameState) : '' // Full game state, not truncated
133
+ // Use deterministic stringify for nested objects to ensure consistent keys
134
+ viewport: context.viewport ? deterministicStringify(context.viewport) : '',
135
+ gameState: context.gameState ? deterministicStringify(context.gameState) : '' // Full game state, not truncated
103
136
  };
104
137
 
105
- const keyString = JSON.stringify(keyData);
138
+ // Use deterministic stringify to ensure consistent key generation
139
+ // even if object property order varies
140
+ const keyString = deterministicStringify(keyData);
141
+ return createHash('sha256').update(keyString).digest('hex');
142
+ }
143
+
144
+ /**
145
+ * Generate cache key for text-only LLM calls
146
+ *
147
+ * @param {string} prompt - Text prompt
148
+ * @param {string} provider - LLM provider (e.g., 'gemini', 'openai', 'claude')
149
+ * @param {{
150
+ * model?: string | null;
151
+ * temperature?: number;
152
+ * maxTokens?: number;
153
+ * tier?: string;
154
+ * }} [options={}] - LLM call options
155
+ * @returns {string} SHA-256 hash of cache key
156
+ */
157
+ export function generateTextLLMCacheKey(prompt, provider, options = {}) {
158
+ const {
159
+ model = null,
160
+ temperature = 0.1,
161
+ maxTokens = 1000,
162
+ tier = null
163
+ } = options;
164
+
165
+ const keyData = {
166
+ type: 'text', // Distinguish from vision calls
167
+ prompt, // Full prompt, not truncated
168
+ provider,
169
+ model,
170
+ temperature,
171
+ maxTokens,
172
+ tier
173
+ };
174
+
175
+ // Use deterministic stringify for consistent cache keys
176
+ const keyString = deterministicStringify(keyData);
106
177
  return createHash('sha256').update(keyString).digest('hex');
107
178
  }
108
179
 
@@ -208,6 +279,33 @@ async function saveCache(cache) {
208
279
 
209
280
  // Apply size limits (LRU eviction: keep most recently accessed)
210
281
  const entriesToKeep = entries.slice(-MAX_CACHE_SIZE);
282
+ const evictedCount = entries.length - entriesToKeep.length;
283
+
284
+ // Log cache eviction (weighted: evictions are important for cache health)
285
+ if (evictedCount > 0) {
286
+ // Use dynamic import with proper error handling to prevent unhandled promise rejections
287
+ import('./utils/performance-logger.mjs')
288
+ .then(({ logCacheOperation }) => {
289
+ logCacheOperation({
290
+ operation: 'evict',
291
+ cacheSize: entriesToKeep.length,
292
+ maxSize: MAX_CACHE_SIZE,
293
+ reason: `LRU eviction: ${evictedCount} entries removed`
294
+ });
295
+ })
296
+ .catch(async (importError) => {
297
+ // Log to logger if performance logger unavailable (better than silent failure)
298
+ if (process.env.DEBUG_CACHE) {
299
+ try {
300
+ const { warn } = await import('./logger.mjs');
301
+ warn(`[Cache] Performance logger unavailable: ${importError.message}`);
302
+ } catch {
303
+ // Fallback to console if logger also unavailable
304
+ console.warn(`[Cache] Performance logger unavailable: ${importError.message}`);
305
+ }
306
+ }
307
+ });
308
+ }
211
309
 
212
310
  for (const { key, value, timestamp } of entriesToKeep) {
213
311
  const entry = {
@@ -346,6 +444,32 @@ export function getCached(imagePath, prompt, context = {}) {
346
444
  const age = Date.now() - originalTimestamp;
347
445
  if (age > MAX_CACHE_AGE) {
348
446
  cache.delete(key); // Remove expired entry
447
+
448
+ // Log cache expiration (weighted: expirations are important for cache health)
449
+ // Use dynamic import with proper error handling to prevent unhandled promise rejections
450
+ import('./utils/performance-logger.mjs')
451
+ .then(({ logCacheOperation }) => {
452
+ const currentCache = getCache();
453
+ logCacheOperation({
454
+ operation: 'expire',
455
+ cacheSize: currentCache.size,
456
+ maxSize: MAX_CACHE_SIZE,
457
+ reason: `Entry expired (age: ${Math.floor(age / (1000 * 60 * 60 * 24))} days)`
458
+ });
459
+ })
460
+ .catch(async (importError) => {
461
+ // Log to logger if performance logger unavailable (better than silent failure)
462
+ if (process.env.DEBUG_CACHE) {
463
+ try {
464
+ const { warn } = await import('./logger.mjs');
465
+ warn(`[Cache] Performance logger unavailable: ${importError.message}`);
466
+ } catch {
467
+ // Fallback to console if logger also unavailable
468
+ console.warn(`[Cache] Performance logger unavailable: ${importError.message}`);
469
+ }
470
+ }
471
+ });
472
+
349
473
  return null;
350
474
  }
351
475
  }
@@ -404,6 +528,107 @@ export function clearCache() {
404
528
  });
405
529
  }
406
530
 
531
+ /**
532
+ * Get cached text-only LLM response
533
+ *
534
+ * @param {string} prompt - Text prompt
535
+ * @param {string} provider - LLM provider
536
+ * @param {{
537
+ * model?: string | null;
538
+ * temperature?: number;
539
+ * maxTokens?: number;
540
+ * tier?: string;
541
+ * }} [options={}] - LLM call options
542
+ * @returns {string | null} Cached response or null if not found
543
+ */
544
+ export function getCachedTextLLM(prompt, provider, options = {}) {
545
+ const cache = getCache();
546
+ const key = generateTextLLMCacheKey(prompt, provider, options);
547
+ const cached = cache.get(key);
548
+
549
+ if (cached) {
550
+ // Update access time for LRU eviction
551
+ cached._lastAccessed = Date.now();
552
+
553
+ // Check expiration based on original timestamp
554
+ const originalTimestamp = cached._originalTimestamp || cached._lastAccessed;
555
+ const age = Date.now() - originalTimestamp;
556
+ if (age > MAX_CACHE_AGE) {
557
+ cache.delete(key); // Remove expired entry
558
+
559
+ // Log cache expiration
560
+ // Use dynamic import with proper error handling to prevent unhandled promise rejections
561
+ import('./utils/performance-logger.mjs')
562
+ .then(({ logCacheOperation }) => {
563
+ const currentCache = getCache();
564
+ logCacheOperation({
565
+ operation: 'expire',
566
+ cacheSize: currentCache.size,
567
+ maxSize: MAX_CACHE_SIZE,
568
+ reason: `Text LLM entry expired (age: ${Math.floor(age / (1000 * 60 * 60 * 24))} days)`
569
+ });
570
+ })
571
+ .catch(async (importError) => {
572
+ // Log to logger if performance logger unavailable (better than silent failure)
573
+ if (process.env.DEBUG_CACHE) {
574
+ try {
575
+ const { warn } = await import('./logger.mjs');
576
+ warn(`[Cache] Performance logger unavailable: ${importError.message}`);
577
+ } catch {
578
+ // Fallback to console if logger also unavailable
579
+ console.warn(`[Cache] Performance logger unavailable: ${importError.message}`);
580
+ }
581
+ }
582
+ });
583
+
584
+ return null;
585
+ }
586
+
587
+ // Return the cached response (stored as 'response' field for text-only calls)
588
+ return cached.response || null;
589
+ }
590
+
591
+ return null;
592
+ }
593
+
594
+ /**
595
+ * Set cached text-only LLM response
596
+ *
597
+ * @param {string} prompt - Text prompt
598
+ * @param {string} provider - LLM provider
599
+ * @param {{
600
+ * model?: string | null;
601
+ * temperature?: number;
602
+ * maxTokens?: number;
603
+ * tier?: string;
604
+ * }} [options={}] - LLM call options
605
+ * @param {string} response - LLM response to cache
606
+ * @returns {void}
607
+ */
608
+ export function setCachedTextLLM(prompt, provider, options, response) {
609
+ const cache = getCache();
610
+ const key = generateTextLLMCacheKey(prompt, provider, options);
611
+ const now = Date.now();
612
+
613
+ // Check if this is a new entry or updating existing
614
+ const existing = cache.get(key);
615
+ const originalTimestamp = existing?._originalTimestamp || now;
616
+
617
+ // Store response with metadata for cache management
618
+ const resultWithMetadata = {
619
+ response, // Store the text response
620
+ _lastAccessed: now,
621
+ _originalTimestamp: originalTimestamp
622
+ };
623
+
624
+ cache.set(key, resultWithMetadata);
625
+
626
+ // Save cache (async, fire-and-forget)
627
+ saveCache(cache).catch(error => {
628
+ warn(`[VLLM Cache] Failed to save text LLM cache (non-blocking): ${error.message}`);
629
+ });
630
+ }
631
+
407
632
  /**
408
633
  * Get cache statistics
409
634
  *
package/src/config.mjs CHANGED
@@ -8,97 +8,11 @@
8
8
  import { ConfigError } from './errors.mjs';
9
9
  import { loadEnv } from './load-env.mjs';
10
10
  import { API_CONSTANTS } from './constants.mjs';
11
+ import { MODEL_TIERS, PROVIDER_CONFIGS } from './provider-data.mjs';
11
12
 
12
- // Load .env file automatically on module load
13
+ // Load .env file on module load
13
14
  loadEnv();
14
15
 
15
- /**
16
- * Model tiers for each provider
17
- * Updated January 2025: Latest models - Gemini 2.5 Pro, GPT-5, Claude 4.5 Sonnet
18
- *
19
- * GROQ INTEGRATION (2025):
20
- * - Groq added for high-frequency decisions (10-60Hz temporal decisions)
21
- * - ~0.22s latency (vs 1-3s for other providers)
22
- * - 185-276 tokens/sec throughput
23
- * - OpenAI-compatible API
24
- * - Cost-competitive, free tier available
25
- * - Best for: Fast tier decisions, high-Hz temporal decisions, real-time applications
26
- */
27
- const MODEL_TIERS = {
28
- gemini: {
29
- fast: 'gemini-2.0-flash-exp', // Fast, outperforms 1.5 Pro (2x speed)
30
- balanced: 'gemini-2.5-pro', // Best balance (2025 leader, released June 2025)
31
- best: 'gemini-2.5-pro' // Best quality (top vision-language model, 1M+ context)
32
- },
33
- openai: {
34
- fast: 'gpt-4o-mini', // Fast, cheaper
35
- balanced: 'gpt-5', // Best balance (released August 2025, unified reasoning)
36
- best: 'gpt-5' // Best quality (state-of-the-art multimodal, August 2025)
37
- },
38
- claude: {
39
- fast: 'claude-3-5-haiku-20241022', // Fast, cheaper
40
- balanced: 'claude-sonnet-4-5', // Best balance (released September 2025, enhanced vision)
41
- best: 'claude-sonnet-4-5' // Best quality (latest flagship, September 2025)
42
- },
43
- groq: {
44
- // NOTE: Groq vision support requires different model
45
- // For vision: meta-llama/llama-4-scout-17b-16e-instruct (preview, supports vision)
46
- // For text-only: llama-3.3-70b-versatile is fastest (~0.22s latency)
47
- fast: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable, fastest Groq option
48
- balanced: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable, balanced
49
- best: 'meta-llama/llama-4-scout-17b-16e-instruct' // Vision-capable, best quality (preview)
50
- // WARNING: Groq vision models are preview-only. Text-only: use llama-3.3-70b-versatile
51
- }
52
- };
53
-
54
- /**
55
- * Default provider configurations
56
- *
57
- * GROQ INTEGRATION:
58
- * - OpenAI-compatible API (easy migration)
59
- * - ~0.22s latency (10x faster than typical providers)
60
- * - Best for high-frequency decisions (10-60Hz temporal decisions)
61
- * - Free tier available for testing
62
- */
63
- const PROVIDER_CONFIGS = {
64
- gemini: {
65
- name: 'gemini',
66
- apiUrl: 'https://generativelanguage.googleapis.com/v1beta',
67
- model: 'gemini-2.5-pro', // Latest: Released June 2025, top vision-language model, 1M+ context
68
- freeTier: true,
69
- pricing: { input: 1.25, output: 5.00 }, // Updated pricing for 2.5 Pro
70
- priority: 1 // Higher priority = preferred
71
- },
72
- openai: {
73
- name: 'openai',
74
- apiUrl: 'https://api.openai.com/v1',
75
- model: 'gpt-5', // Latest: Released August 2025, state-of-the-art multimodal
76
- freeTier: false,
77
- pricing: { input: 5.00, output: 15.00 }, // Updated pricing for gpt-5
78
- priority: 2
79
- },
80
- claude: {
81
- name: 'claude',
82
- apiUrl: 'https://api.anthropic.com/v1',
83
- model: 'claude-sonnet-4-5', // Latest: Released September 2025, enhanced vision capabilities
84
- freeTier: false,
85
- pricing: { input: 3.00, output: 15.00 }, // Updated pricing for 4.5
86
- priority: 3
87
- },
88
- groq: {
89
- name: 'groq',
90
- apiUrl: 'https://api.groq.com/openai/v1', // OpenAI-compatible endpoint
91
- model: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable (preview), ~0.22s latency
92
- freeTier: true, // Free tier available
93
- pricing: { input: 0.59, output: 0.79 }, // Actual 2025 pricing: $0.59/$0.79 per 1M tokens (real-time API)
94
- priority: 0, // Highest priority for high-frequency decisions
95
- latency: 220, // ~0.22s latency in ms (10x faster than typical)
96
- throughput: 200, // ~200 tokens/sec average
97
- visionSupported: true // llama-4-scout-17b-16e-instruct supports vision (preview)
98
- // Text-only alternative: llama-3.3-70b-versatile (faster, no vision)
99
- }
100
- };
101
-
102
16
  /**
103
17
  * Create configuration from environment or options
104
18
  *
@@ -111,12 +25,13 @@ export function createConfig(options = {}) {
111
25
  apiKey = null,
112
26
  env = process.env,
113
27
  cacheDir = null,
114
- cacheEnabled = true,
28
+ cacheEnabled = process.env.DISABLE_LLM_CACHE !== 'true',
115
29
  maxConcurrency = API_CONSTANTS.DEFAULT_MAX_CONCURRENCY,
116
30
  timeout = API_CONSTANTS.DEFAULT_TIMEOUT_MS,
117
31
  verbose = false,
118
32
  modelTier = null, // 'fast', 'balanced', 'best', or null for default
119
- model = null // Explicit model override
33
+ model = null, // Explicit model override
34
+ anchors = null // Domain visual anchors: { domain?, positive?: string[], negative?: string[] }
120
35
  } = options;
121
36
 
122
37
  // Auto-detect provider if not specified
@@ -154,11 +69,39 @@ export function createConfig(options = {}) {
154
69
  providerConfig.model = env.VLM_MODEL;
155
70
  }
156
71
 
72
+ // Normalize anchors: ensure arrays, filter empty/invalid entries.
73
+ // Each entry can be a plain string or { text?, image?, label?, dimension? }.
74
+ let normalizedAnchors = null;
75
+ if (anchors && typeof anchors === 'object') {
76
+ const normalizeEntries = (arr) => {
77
+ if (!Array.isArray(arr)) return [];
78
+ return arr.filter(entry => {
79
+ if (typeof entry === 'string') return entry.trim().length > 0;
80
+ if (entry && typeof entry === 'object') {
81
+ return (entry.text && typeof entry.text === 'string' && entry.text.trim()) ||
82
+ (entry.image && typeof entry.image === 'string' && entry.image.trim());
83
+ }
84
+ return false;
85
+ });
86
+ };
87
+ const pos = normalizeEntries(anchors.positive);
88
+ const neg = normalizeEntries(anchors.negative);
89
+ const hasDomain = anchors.domain && typeof anchors.domain === 'string' && anchors.domain.trim();
90
+
91
+ if (pos.length > 0 || neg.length > 0 || hasDomain) {
92
+ normalizedAnchors = {};
93
+ if (hasDomain) normalizedAnchors.domain = anchors.domain.trim();
94
+ if (pos.length > 0) normalizedAnchors.positive = pos;
95
+ if (neg.length > 0) normalizedAnchors.negative = neg;
96
+ }
97
+ }
98
+
157
99
  return {
158
100
  provider: selectedProvider,
159
101
  apiKey: selectedApiKey,
160
102
  providerConfig,
161
103
  enabled: !!selectedApiKey,
104
+ anchors: normalizedAnchors,
162
105
  cache: {
163
106
  enabled: cacheEnabled,
164
107
  dir: cacheDir
@@ -265,4 +208,3 @@ export function getProvider(providerName = null) {
265
208
  const provider = providerName || config.provider;
266
209
  return PROVIDER_CONFIGS[provider] || PROVIDER_CONFIGS.gemini;
267
210
  }
268
-
package/src/constants.mjs CHANGED
@@ -78,3 +78,57 @@ export const UNCERTAINTY_CONSTANTS = {
78
78
  EDGE_CASE_SELF_CONSISTENCY_N: 3
79
79
  };
80
80
 
81
+ /**
82
+ * API Endpoint Configuration (for serverless functions)
83
+ */
84
+ export const API_ENDPOINT_CONSTANTS = {
85
+ /** Maximum image size in bytes (10MB) */
86
+ MAX_IMAGE_SIZE: 10 * 1024 * 1024,
87
+
88
+ /** Maximum prompt length in characters */
89
+ MAX_PROMPT_LENGTH: 5000,
90
+
91
+ /** Maximum context size in bytes */
92
+ MAX_CONTEXT_SIZE: 10000,
93
+
94
+ /** Default rate limit window in milliseconds (1 minute) */
95
+ RATE_LIMIT_WINDOW_MS: 60 * 1000,
96
+
97
+ /** Default maximum requests per window */
98
+ RATE_LIMIT_MAX_REQUESTS: 10
99
+ };
100
+
101
+ /**
102
+ * Retry Configuration
103
+ */
104
+ export const RETRY_CONSTANTS = {
105
+ /** Default base delay for exponential backoff in milliseconds (1 second) */
106
+ DEFAULT_BASE_DELAY_MS: 1000,
107
+
108
+ /** Default maximum delay for exponential backoff in milliseconds (30 seconds) */
109
+ DEFAULT_MAX_DELAY_MS: 30000,
110
+
111
+ /** Default maximum number of retries */
112
+ DEFAULT_MAX_RETRIES: 3,
113
+
114
+ /** Jitter amount as percentage of delay (±25%) */
115
+ JITTER_PERCENTAGE: 0.25
116
+ };
117
+
118
+ /**
119
+ * Validation Configuration
120
+ */
121
+ export const VALIDATION_CONSTANTS = {
122
+ /** Maximum prompt length for validation (10k characters) */
123
+ MAX_PROMPT_LENGTH: 10000,
124
+
125
+ /** Maximum context size in bytes */
126
+ MAX_CONTEXT_SIZE: 50000,
127
+
128
+ /** Minimum timeout in milliseconds */
129
+ MIN_TIMEOUT_MS: 1000,
130
+
131
+ /** Maximum timeout in milliseconds (5 minutes) */
132
+ MAX_TIMEOUT_MS: 300000
133
+ };
134
+