@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.secretsignore.example +20 -0
  2. package/CHANGELOG.md +360 -0
  3. package/CONTRIBUTING.md +63 -0
  4. package/DEPLOYMENT.md +80 -0
  5. package/LICENSE +22 -0
  6. package/README.md +142 -0
  7. package/SECURITY.md +108 -0
  8. package/api/health.js +34 -0
  9. package/api/validate.js +252 -0
  10. package/index.d.ts +1221 -0
  11. package/package.json +112 -0
  12. package/public/index.html +149 -0
  13. package/src/batch-optimizer.mjs +451 -0
  14. package/src/bias-detector.mjs +370 -0
  15. package/src/bias-mitigation.mjs +233 -0
  16. package/src/cache.mjs +433 -0
  17. package/src/config.mjs +268 -0
  18. package/src/constants.mjs +80 -0
  19. package/src/context-compressor.mjs +350 -0
  20. package/src/convenience.mjs +617 -0
  21. package/src/cost-tracker.mjs +257 -0
  22. package/src/cross-modal-consistency.mjs +170 -0
  23. package/src/data-extractor.mjs +232 -0
  24. package/src/dynamic-few-shot.mjs +140 -0
  25. package/src/dynamic-prompts.mjs +361 -0
  26. package/src/ensemble/index.mjs +53 -0
  27. package/src/ensemble-judge.mjs +366 -0
  28. package/src/error-handler.mjs +67 -0
  29. package/src/errors.mjs +167 -0
  30. package/src/experience-propagation.mjs +128 -0
  31. package/src/experience-tracer.mjs +487 -0
  32. package/src/explanation-manager.mjs +299 -0
  33. package/src/feedback-aggregator.mjs +248 -0
  34. package/src/game-goal-prompts.mjs +478 -0
  35. package/src/game-player.mjs +548 -0
  36. package/src/hallucination-detector.mjs +155 -0
  37. package/src/helpers/playwright.mjs +80 -0
  38. package/src/human-validation-manager.mjs +516 -0
  39. package/src/index.mjs +364 -0
  40. package/src/judge.mjs +929 -0
  41. package/src/latency-aware-batch-optimizer.mjs +192 -0
  42. package/src/load-env.mjs +159 -0
  43. package/src/logger.mjs +55 -0
  44. package/src/metrics.mjs +187 -0
  45. package/src/model-tier-selector.mjs +221 -0
  46. package/src/multi-modal/index.mjs +36 -0
  47. package/src/multi-modal-fusion.mjs +190 -0
  48. package/src/multi-modal.mjs +524 -0
  49. package/src/natural-language-specs.mjs +1071 -0
  50. package/src/pair-comparison.mjs +277 -0
  51. package/src/persona/index.mjs +42 -0
  52. package/src/persona-enhanced.mjs +200 -0
  53. package/src/persona-experience.mjs +572 -0
  54. package/src/position-counterbalance.mjs +140 -0
  55. package/src/prompt-composer.mjs +375 -0
  56. package/src/render-change-detector.mjs +583 -0
  57. package/src/research-enhanced-validation.mjs +436 -0
  58. package/src/retry.mjs +152 -0
  59. package/src/rubrics.mjs +231 -0
  60. package/src/score-tracker.mjs +277 -0
  61. package/src/smart-validator.mjs +447 -0
  62. package/src/spec-config.mjs +106 -0
  63. package/src/spec-templates.mjs +347 -0
  64. package/src/specs/index.mjs +38 -0
  65. package/src/temporal/index.mjs +102 -0
  66. package/src/temporal-adaptive.mjs +163 -0
  67. package/src/temporal-batch-optimizer.mjs +222 -0
  68. package/src/temporal-constants.mjs +69 -0
  69. package/src/temporal-context.mjs +49 -0
  70. package/src/temporal-decision-manager.mjs +271 -0
  71. package/src/temporal-decision.mjs +669 -0
  72. package/src/temporal-errors.mjs +58 -0
  73. package/src/temporal-note-pruner.mjs +173 -0
  74. package/src/temporal-preprocessor.mjs +543 -0
  75. package/src/temporal-prompt-formatter.mjs +219 -0
  76. package/src/temporal-validation.mjs +159 -0
  77. package/src/temporal.mjs +415 -0
  78. package/src/type-guards.mjs +311 -0
  79. package/src/uncertainty-reducer.mjs +470 -0
  80. package/src/utils/index.mjs +175 -0
  81. package/src/validation-framework.mjs +321 -0
  82. package/src/validation-result-normalizer.mjs +64 -0
  83. package/src/validation.mjs +243 -0
  84. package/src/validators/accessibility-programmatic.mjs +345 -0
  85. package/src/validators/accessibility-validator.mjs +223 -0
  86. package/src/validators/batch-validator.mjs +143 -0
  87. package/src/validators/hybrid-validator.mjs +268 -0
  88. package/src/validators/index.mjs +34 -0
  89. package/src/validators/prompt-builder.mjs +218 -0
  90. package/src/validators/rubric.mjs +85 -0
  91. package/src/validators/state-programmatic.mjs +260 -0
  92. package/src/validators/state-validator.mjs +291 -0
  93. package/vercel.json +27 -0
package/src/cache.mjs ADDED
@@ -0,0 +1,433 @@
1
+ /**
2
+ * VLLM Cache
3
+ *
4
+ * Provides persistent caching for VLLM API calls to reduce costs and improve performance.
5
+ * Uses file-based storage for cache persistence across test runs.
6
+ *
7
+ * BUGS FIXED (2025-01):
8
+ * 1. Timestamp reset on save - was resetting ALL timestamps to `now`, breaking 7-day expiration
9
+ * 2. Cache key truncation - was truncating prompts/gameState, causing collisions
10
+ *
11
+ * ARCHITECTURE NOTES:
12
+ * - This is ONE of THREE cache systems in the codebase (see docs/CACHE_ARCHITECTURE_DEEP_DIVE.md)
13
+ * - File-based, persistent across runs (7-day TTL, LRU eviction)
14
+ * - Purpose: Long-term persistence of API responses across restarts
15
+ * - Why separate: Different persistence strategy (file vs memory), different lifetime (7 days vs process lifetime),
16
+ * different failure domain (disk errors don't affect in-memory batching), minimal data overlap (<5%)
17
+ * - No coordination with BatchOptimizer cache or TemporalPreprocessing cache (by design - they serve different purposes)
18
+ */
19
+
20
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync, unlinkSync } from 'fs';
21
+ import { join, dirname, normalize, resolve } from 'path';
22
+ import { createHash } from 'crypto';
23
+ import { fileURLToPath } from 'url';
24
+ import { Mutex } from 'async-mutex';
25
+ import { CacheError, FileError } from './errors.mjs';
26
+ import { warn, log } from './logger.mjs';
27
+
28
+ const __filename = fileURLToPath(import.meta.url);
29
+ const __dirname = dirname(__filename);
30
+
31
+ import { CACHE_CONSTANTS } from './constants.mjs';
32
+
33
+ // Default cache directory (can be overridden)
34
+ let CACHE_DIR = null;
35
+ let CACHE_FILE = null;
36
+ const MAX_CACHE_AGE = CACHE_CONSTANTS.MAX_CACHE_AGE_MS;
37
+ const MAX_CACHE_SIZE = CACHE_CONSTANTS.MAX_CACHE_SIZE;
38
+ const MAX_CACHE_SIZE_BYTES = CACHE_CONSTANTS.MAX_CACHE_SIZE_BYTES;
39
+
40
+ // Cache instance
41
+ let cacheInstance = null;
42
+ // Cache write mutex to prevent race conditions (proper async mutex)
43
+ const cacheWriteMutex = new Mutex();
44
+ // VERIFIABLE: Track cache metrics to verify claims about atomic writes
45
+ // Initialize to empty object so metrics are always available (even before first save)
46
+ let cacheMetrics = { atomicWrites: 0, atomicWriteFailures: 0, tempFileCleanups: 0 };
47
+
48
+ /**
49
+ * Initialize cache with directory
50
+ *
51
+ * @param {string | undefined} [cacheDir] - Cache directory path, or undefined for default
52
+ * @returns {void}
53
+ */
54
+ export function initCache(cacheDir) {
55
+ // SECURITY: Validate and normalize cache directory to prevent path traversal
56
+ if (cacheDir) {
57
+ const normalized = normalize(resolve(cacheDir));
58
+ // Prevent path traversal
59
+ if (normalized.includes('..')) {
60
+ throw new CacheError('Invalid cache directory: path traversal detected', { cacheDir });
61
+ }
62
+ CACHE_DIR = normalized;
63
+ } else {
64
+ CACHE_DIR = join(__dirname, '..', '..', '..', 'test-results', 'vllm-cache');
65
+ }
66
+ CACHE_FILE = join(CACHE_DIR, 'cache.json');
67
+
68
+ if (!existsSync(CACHE_DIR)) {
69
+ mkdirSync(CACHE_DIR, { recursive: true });
70
+ }
71
+
72
+ cacheInstance = null; // Reset instance to reload
73
+ }
74
+
75
+ /**
76
+ * Generate cache key from image path, prompt, and context
77
+ *
78
+ * @param {string} imagePath - Path to image file
79
+ * @param {string} prompt - Validation prompt
80
+ * @param {import('./index.mjs').ValidationContext} [context={}] - Validation context
81
+ * @returns {string} SHA-256 hash of cache key
82
+ */
83
+ export function generateCacheKey(imagePath, prompt, context = {}) {
84
+ // NOTE: Don't truncate cache keys - it causes collisions!
85
+ //
86
+ // The bug: Truncating prompt (1000 chars) and gameState (500 chars) means:
87
+ // - Different prompts with same first 1000 chars = same cache key = wrong cache hit
88
+ // - Different game states with same first 500 chars = same cache key = wrong cache hit
89
+ //
90
+ // The fix: Hash the FULL content, don't truncate
91
+ // SHA-256 handles arbitrary length, so there's no reason to truncate
92
+ //
93
+ // Why truncation existed: Probably to keep keys "manageable", but it's dangerous
94
+ // Better approach: Hash full content, collisions are cryptographically unlikely
95
+ const keyData = {
96
+ imagePath,
97
+ prompt, // Full prompt, not truncated
98
+ testType: context.testType || '',
99
+ frame: context.frame || '',
100
+ score: context.score || '',
101
+ viewport: context.viewport ? JSON.stringify(context.viewport) : '',
102
+ gameState: context.gameState ? JSON.stringify(context.gameState) : '' // Full game state, not truncated
103
+ };
104
+
105
+ const keyString = JSON.stringify(keyData);
106
+ return createHash('sha256').update(keyString).digest('hex');
107
+ }
108
+
109
+ /**
110
+ * Load cache from file
111
+ *
112
+ * NOTE: Preserves original timestamps from file for expiration logic.
113
+ * We need the original timestamp to check if entries are older than MAX_CACHE_AGE (7 days).
114
+ *
115
+ * The cache file format is: { key: { data: {...}, timestamp: number } }
116
+ * - `timestamp`: When the entry was created (used for expiration)
117
+ * - `data._lastAccessed`: When the entry was last accessed (used for LRU eviction)
118
+ */
119
+ function loadCache() {
120
+ if (!CACHE_FILE || !existsSync(CACHE_FILE)) {
121
+ return new Map();
122
+ }
123
+
124
+ try {
125
+ let cacheData;
126
+ try {
127
+ cacheData = JSON.parse(readFileSync(CACHE_FILE, 'utf8'));
128
+ } catch (parseError) {
129
+ // SECURITY: Handle malformed JSON gracefully to prevent DoS
130
+ warn(`[VLLM Cache] Failed to parse cache file (corrupted?): ${parseError.message}`);
131
+ // Recover by starting with empty cache
132
+ return new Map();
133
+ }
134
+ const cache = new Map();
135
+ const now = Date.now();
136
+
137
+ // Filter out expired entries based on ORIGINAL timestamp
138
+ // IMPORTANT: We preserve the original timestamp from the file
139
+ // This allows 7-day expiration to work correctly
140
+ for (const [key, value] of Object.entries(cacheData)) {
141
+ if (value.timestamp && (now - value.timestamp) < MAX_CACHE_AGE) {
142
+ // Preserve both the data and the original timestamp
143
+ // The timestamp is stored in the file, not in the data object
144
+ // But we need to track it for expiration, so we store it in the data
145
+ const entry = {
146
+ ...value.data,
147
+ _originalTimestamp: value.timestamp // Preserve for expiration checks
148
+ };
149
+ cache.set(key, entry);
150
+ }
151
+ }
152
+
153
+ return cache;
154
+ } catch (error) {
155
+ warn(`[VLLM Cache] Failed to load cache: ${error.message}`);
156
+ return new Map();
157
+ }
158
+ }
159
+
160
+ /**
161
+ * Save cache to file with size limits and race condition protection
162
+ *
163
+ * Uses async mutex to prevent concurrent writes and atomic file operations
164
+ * to prevent corruption.
165
+ */
166
+ async function saveCache(cache) {
167
+ if (!CACHE_FILE) return;
168
+
169
+ // Use proper async mutex to prevent concurrent writes
170
+ // This ensures only one save operation happens at a time, even with async operations
171
+ const release = await cacheWriteMutex.acquire();
172
+
173
+ try {
174
+ const cacheData = {};
175
+ const now = Date.now();
176
+ let totalSize = 0;
177
+
178
+ // BUG FIX (2025-01): Don't reset timestamps on save!
179
+ //
180
+ // The bug was: `timestamp: now` for ALL entries
181
+ // This broke 7-day expiration because old entries got new timestamps
182
+ //
183
+ // The fix: Preserve original timestamp for existing entries, use `now` only for new entries
184
+ //
185
+ // Two timestamps serve different purposes:
186
+ // - `timestamp`: Creation time (for expiration - 7 days)
187
+ // - `_lastAccessed`: Access time (for LRU eviction - least recently used)
188
+ //
189
+ // Convert to array and sort by _lastAccessed (LRU: oldest access first)
190
+ const entries = Array.from(cache.entries())
191
+ .map(([key, value]) => {
192
+ // Preserve original timestamp if it exists, otherwise use current time (new entry)
193
+ const originalTimestamp = value._originalTimestamp || now;
194
+ // Remove _originalTimestamp from data before saving (it's metadata, not part of result)
195
+ const { _originalTimestamp, ...dataWithoutMetadata } = value;
196
+
197
+ return {
198
+ key,
199
+ value: dataWithoutMetadata,
200
+ timestamp: originalTimestamp, // Preserve original, don't reset!
201
+ lastAccessed: value._lastAccessed || originalTimestamp
202
+ };
203
+ })
204
+ .sort((a, b) => {
205
+ // Sort by access time for LRU eviction (oldest access = evict first)
206
+ return a.lastAccessed - b.lastAccessed;
207
+ });
208
+
209
+ // Apply size limits (LRU eviction: keep most recently accessed)
210
+ const entriesToKeep = entries.slice(-MAX_CACHE_SIZE);
211
+
212
+ for (const { key, value, timestamp } of entriesToKeep) {
213
+ const entry = {
214
+ data: value,
215
+ timestamp // Original timestamp preserved for expiration
216
+ };
217
+ const entrySize = JSON.stringify(entry).length;
218
+
219
+ // Check total size limit
220
+ if (totalSize + entrySize > MAX_CACHE_SIZE_BYTES) {
221
+ break; // Stop adding entries if we exceed size limit
222
+ }
223
+
224
+ cacheData[key] = entry;
225
+ totalSize += entrySize;
226
+ }
227
+
228
+ // Update in-memory cache to match saved entries
229
+ // IMPORTANT: Restore _originalTimestamp for expiration checks
230
+ cache.clear();
231
+ for (const [key, entry] of Object.entries(cacheData)) {
232
+ const entryWithMetadata = {
233
+ ...entry.data,
234
+ _originalTimestamp: entry.timestamp // Restore for expiration checks
235
+ };
236
+ cache.set(key, entryWithMetadata);
237
+ }
238
+
239
+ // ATOMIC WRITE: Write to temp file first, then rename atomically
240
+ // This prevents corruption if process crashes during write
241
+ // Note: writeFileSync flushes to OS buffers; rename is atomic on most filesystems
242
+ // For stronger durability guarantees, we could add fsync, but it adds latency
243
+ // The current approach balances performance and safety for cache use case
244
+ // VERIFIABLE: Track atomic write operations to verify "prevents corruption" claim
245
+ // CRITICAL FIX: Handle renameSync failure separately to ensure temp file cleanup
246
+ // MCP research: If writeFileSync succeeds but renameSync fails, temp file must be cleaned up
247
+ const tempFile = CACHE_FILE + '.tmp';
248
+ const writeStartTime = Date.now();
249
+ let writeSucceeded = false;
250
+ let renameSucceeded = false;
251
+
252
+ try {
253
+ writeFileSync(tempFile, JSON.stringify(cacheData, null, 2), 'utf8');
254
+ writeSucceeded = true;
255
+ renameSync(tempFile, CACHE_FILE); // Atomic operation on most filesystems
256
+ renameSucceeded = true;
257
+ const writeDuration = Date.now() - writeStartTime;
258
+
259
+ // Track successful atomic writes (for metrics)
260
+ // NOTE: cacheMetrics is initialized at module level
261
+ cacheMetrics.atomicWrites++;
262
+
263
+ // Log in debug mode for verification
264
+ if (process.env.DEBUG_CACHE) {
265
+ log(`[VLLM Cache] Atomic write completed in ${writeDuration}ms (${Object.keys(cacheData).length} entries)`);
266
+ }
267
+ } catch (writeOrRenameError) {
268
+ // CRITICAL FIX: If write succeeded but rename failed, clean up temp file
269
+ // MCP research confirms this is a critical edge case
270
+ if (writeSucceeded && !renameSucceeded) {
271
+ try {
272
+ if (existsSync(tempFile)) {
273
+ unlinkSync(tempFile);
274
+ cacheMetrics.tempFileCleanups++;
275
+ if (process.env.DEBUG_CACHE) {
276
+ log(`[VLLM Cache] Cleaned up temp file after renameSync failure`);
277
+ }
278
+ }
279
+ } catch (cleanupError) {
280
+ // Ignore cleanup errors, but log them
281
+ warn(`[VLLM Cache] Failed to clean up temp file after rename failure: ${cleanupError.message}`);
282
+ }
283
+ }
284
+ // Re-throw to be caught by outer catch block
285
+ throw writeOrRenameError;
286
+ }
287
+ } catch (error) {
288
+ // VERIFIABLE: Track failures to verify atomic write claim
289
+ // NOTE: cacheMetrics is initialized at module level
290
+ cacheMetrics.atomicWriteFailures++;
291
+
292
+ warn(`[VLLM Cache] Failed to save cache: ${error.message}`);
293
+ // Clean up temp file if it exists
294
+ try {
295
+ const tempFile = CACHE_FILE + '.tmp';
296
+ if (existsSync(tempFile)) {
297
+ unlinkSync(tempFile);
298
+ cacheMetrics.tempFileCleanups++;
299
+ // VERIFIABLE: Log temp file cleanup to verify atomic write safety
300
+ if (process.env.DEBUG_CACHE) {
301
+ log(`[VLLM Cache] Cleaned up temp file after failed atomic write`);
302
+ }
303
+ }
304
+ } catch (cleanupError) {
305
+ // Ignore cleanup errors
306
+ }
307
+ } finally {
308
+ release(); // Release mutex
309
+ }
310
+ }
311
+
312
+ /**
313
+ * Get cache instance (singleton)
314
+ */
315
+ function getCache() {
316
+ if (!cacheInstance) {
317
+ if (!CACHE_DIR) {
318
+ initCache(); // Initialize with default directory
319
+ }
320
+ cacheInstance = loadCache();
321
+ }
322
+ return cacheInstance;
323
+ }
324
+
325
+ /**
326
+ * Get cached result
327
+ *
328
+ * @param {string} imagePath - Path to image file
329
+ * @param {string} prompt - Validation prompt
330
+ * @param {import('./index.mjs').ValidationContext} [context={}] - Validation context
331
+ * @returns {import('./index.mjs').ValidationResult | null} Cached result or null if not found
332
+ */
333
+ export function getCached(imagePath, prompt, context = {}) {
334
+ const cache = getCache();
335
+ const key = generateCacheKey(imagePath, prompt, context);
336
+ const cached = cache.get(key);
337
+
338
+ if (cached) {
339
+ // Update access time for LRU eviction
340
+ // This is separate from timestamp (creation time) which is used for expiration
341
+ cached._lastAccessed = Date.now();
342
+
343
+ // Check expiration based on original timestamp
344
+ // If entry is older than MAX_CACHE_AGE, remove it and return null
345
+ const originalTimestamp = cached._originalTimestamp || cached._lastAccessed;
346
+ const age = Date.now() - originalTimestamp;
347
+ if (age > MAX_CACHE_AGE) {
348
+ cache.delete(key); // Remove expired entry
349
+ return null;
350
+ }
351
+ }
352
+
353
+ return cached || null;
354
+ }
355
+
356
+ /**
357
+ * Set cached result
358
+ *
359
+ * @param {string} imagePath - Path to image file
360
+ * @param {string} prompt - Validation prompt
361
+ * @param {import('./index.mjs').ValidationContext} context - Validation context
362
+ * @param {import('./index.mjs').ValidationResult} result - Validation result to cache
363
+ * @returns {void}
364
+ */
365
+ export function setCached(imagePath, prompt, context, result) {
366
+ const cache = getCache();
367
+ const key = generateCacheKey(imagePath, prompt, context);
368
+ const now = Date.now();
369
+
370
+ // Check if this is a new entry or updating existing
371
+ const existing = cache.get(key);
372
+ const originalTimestamp = existing?._originalTimestamp || now; // Preserve if exists, else new
373
+
374
+ // Add metadata for cache management
375
+ // - _lastAccessed: For LRU eviction (when was it last used)
376
+ // - _originalTimestamp: For expiration (when was it created)
377
+ const resultWithMetadata = {
378
+ ...result,
379
+ _lastAccessed: now, // Update access time
380
+ _originalTimestamp: originalTimestamp // Preserve creation time
381
+ };
382
+
383
+ cache.set(key, resultWithMetadata);
384
+
385
+ // Always save cache (saveCache handles size limits and LRU eviction)
386
+ // The if/else was redundant - both branches did the same thing
387
+ // Save is async and fire-and-forget - errors are logged but don't affect in-memory cache
388
+ saveCache(cache).catch(error => {
389
+ warn(`[VLLM Cache] Failed to save cache (non-blocking): ${error.message}`);
390
+ });
391
+ }
392
+
393
+ /**
394
+ * Clear cache
395
+ *
396
+ * @returns {void}
397
+ */
398
+ export function clearCache() {
399
+ const cache = getCache();
400
+ cache.clear();
401
+ // Save cache to disk (async, fire-and-forget)
402
+ saveCache(cache).catch(error => {
403
+ warn(`[VLLM Cache] Failed to save cache after clear (non-blocking): ${error.message}`);
404
+ });
405
+ }
406
+
407
+ /**
408
+ * Get cache statistics
409
+ *
410
+ * VERIFIABLE: Includes atomic write metrics to verify "prevents corruption" claim
411
+ *
412
+ * @returns {import('./index.mjs').CacheStats} Cache statistics
413
+ */
414
+ export function getCacheStats() {
415
+ const cache = getCache();
416
+ const stats = {
417
+ size: cache.size,
418
+ maxAge: MAX_CACHE_AGE,
419
+ cacheFile: CACHE_FILE
420
+ };
421
+
422
+ // VERIFIABLE: Include atomic write metrics to verify "prevents corruption" claim
423
+ // NOTE: cacheMetrics is always initialized at module level
424
+ stats.atomicWrites = cacheMetrics.atomicWrites;
425
+ stats.atomicWriteFailures = cacheMetrics.atomicWriteFailures;
426
+ stats.tempFileCleanups = cacheMetrics.tempFileCleanups;
427
+ stats.atomicWriteSuccessRate = cacheMetrics.atomicWrites + cacheMetrics.atomicWriteFailures > 0
428
+ ? (cacheMetrics.atomicWrites / (cacheMetrics.atomicWrites + cacheMetrics.atomicWriteFailures)) * 100
429
+ : 100;
430
+
431
+ return stats;
432
+ }
433
+
package/src/config.mjs ADDED
@@ -0,0 +1,268 @@
1
+ /**
2
+ * Configuration System
3
+ *
4
+ * Handles provider selection, API keys, and settings.
5
+ * Designed to be flexible and extensible.
6
+ */
7
+
8
+ import { ConfigError } from './errors.mjs';
9
+ import { loadEnv } from './load-env.mjs';
10
+ import { API_CONSTANTS } from './constants.mjs';
11
+
12
+ // Load .env file automatically on module load
13
+ loadEnv();
14
+
15
+ /**
16
+ * Model tiers for each provider
17
+ * Updated January 2025: Latest models - Gemini 2.5 Pro, GPT-5, Claude 4.5 Sonnet
18
+ *
19
+ * GROQ INTEGRATION (2025):
20
+ * - Groq added for high-frequency decisions (10-60Hz temporal decisions)
21
+ * - ~0.22s latency (vs 1-3s for other providers)
22
+ * - 185-276 tokens/sec throughput
23
+ * - OpenAI-compatible API
24
+ * - Cost-competitive, free tier available
25
+ * - Best for: Fast tier decisions, high-Hz temporal decisions, real-time applications
26
+ */
27
+ const MODEL_TIERS = {
28
+ gemini: {
29
+ fast: 'gemini-2.0-flash-exp', // Fast, outperforms 1.5 Pro (2x speed)
30
+ balanced: 'gemini-2.5-pro', // Best balance (2025 leader, released June 2025)
31
+ best: 'gemini-2.5-pro' // Best quality (top vision-language model, 1M+ context)
32
+ },
33
+ openai: {
34
+ fast: 'gpt-4o-mini', // Fast, cheaper
35
+ balanced: 'gpt-5', // Best balance (released August 2025, unified reasoning)
36
+ best: 'gpt-5' // Best quality (state-of-the-art multimodal, August 2025)
37
+ },
38
+ claude: {
39
+ fast: 'claude-3-5-haiku-20241022', // Fast, cheaper
40
+ balanced: 'claude-sonnet-4-5', // Best balance (released September 2025, enhanced vision)
41
+ best: 'claude-sonnet-4-5' // Best quality (latest flagship, September 2025)
42
+ },
43
+ groq: {
44
+ // NOTE: Groq vision support requires different model
45
+ // For vision: meta-llama/llama-4-scout-17b-16e-instruct (preview, supports vision)
46
+ // For text-only: llama-3.3-70b-versatile is fastest (~0.22s latency)
47
+ fast: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable, fastest Groq option
48
+ balanced: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable, balanced
49
+ best: 'meta-llama/llama-4-scout-17b-16e-instruct' // Vision-capable, best quality (preview)
50
+ // WARNING: Groq vision models are preview-only. Text-only: use llama-3.3-70b-versatile
51
+ }
52
+ };
53
+
54
+ /**
55
+ * Default provider configurations
56
+ *
57
+ * GROQ INTEGRATION:
58
+ * - OpenAI-compatible API (easy migration)
59
+ * - ~0.22s latency (10x faster than typical providers)
60
+ * - Best for high-frequency decisions (10-60Hz temporal decisions)
61
+ * - Free tier available for testing
62
+ */
63
+ const PROVIDER_CONFIGS = {
64
+ gemini: {
65
+ name: 'gemini',
66
+ apiUrl: 'https://generativelanguage.googleapis.com/v1beta',
67
+ model: 'gemini-2.5-pro', // Latest: Released June 2025, top vision-language model, 1M+ context
68
+ freeTier: true,
69
+ pricing: { input: 1.25, output: 5.00 }, // Updated pricing for 2.5 Pro
70
+ priority: 1 // Higher priority = preferred
71
+ },
72
+ openai: {
73
+ name: 'openai',
74
+ apiUrl: 'https://api.openai.com/v1',
75
+ model: 'gpt-5', // Latest: Released August 2025, state-of-the-art multimodal
76
+ freeTier: false,
77
+ pricing: { input: 5.00, output: 15.00 }, // Updated pricing for gpt-5
78
+ priority: 2
79
+ },
80
+ claude: {
81
+ name: 'claude',
82
+ apiUrl: 'https://api.anthropic.com/v1',
83
+ model: 'claude-sonnet-4-5', // Latest: Released September 2025, enhanced vision capabilities
84
+ freeTier: false,
85
+ pricing: { input: 3.00, output: 15.00 }, // Updated pricing for 4.5
86
+ priority: 3
87
+ },
88
+ groq: {
89
+ name: 'groq',
90
+ apiUrl: 'https://api.groq.com/openai/v1', // OpenAI-compatible endpoint
91
+ model: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable (preview), ~0.22s latency
92
+ freeTier: true, // Free tier available
93
+ pricing: { input: 0.59, output: 0.79 }, // Actual 2025 pricing: $0.59/$0.79 per 1M tokens (real-time API)
94
+ priority: 0, // Highest priority for high-frequency decisions
95
+ latency: 220, // ~0.22s latency in ms (10x faster than typical)
96
+ throughput: 200, // ~200 tokens/sec average
97
+ visionSupported: true // llama-4-scout-17b-16e-instruct supports vision (preview)
98
+ // Text-only alternative: llama-3.3-70b-versatile (faster, no vision)
99
+ }
100
+ };
101
+
102
+ /**
103
+ * Create configuration from environment or options
104
+ *
105
+ * @param {import('./index.mjs').ConfigOptions} [options={}] - Configuration options
106
+ * @returns {import('./index.mjs').Config} Configuration object
107
+ */
108
+ export function createConfig(options = {}) {
109
+ const {
110
+ provider = null,
111
+ apiKey = null,
112
+ env = process.env,
113
+ cacheDir = null,
114
+ cacheEnabled = true,
115
+ maxConcurrency = API_CONSTANTS.DEFAULT_MAX_CONCURRENCY,
116
+ timeout = API_CONSTANTS.DEFAULT_TIMEOUT_MS,
117
+ verbose = false,
118
+ modelTier = null, // 'fast', 'balanced', 'best', or null for default
119
+ model = null // Explicit model override
120
+ } = options;
121
+
122
+ // Auto-detect provider if not specified
123
+ let selectedProvider = provider;
124
+ if (!selectedProvider) {
125
+ selectedProvider = detectProvider(env);
126
+ }
127
+
128
+ // Get API key - respect explicit null/undefined (don't check env if null/undefined is explicitly passed)
129
+ // Check if apiKey was explicitly provided in options (vs defaulting to null)
130
+ const apiKeyExplicitlyProvided = 'apiKey' in options;
131
+ let selectedApiKey;
132
+ if (apiKeyExplicitlyProvided && (apiKey === null || apiKey === undefined)) {
133
+ // Explicitly null/undefined - don't check env, use null
134
+ selectedApiKey = null;
135
+ } else {
136
+ // apiKey not provided or has a value - use it if provided, otherwise check env
137
+ selectedApiKey = apiKey || getApiKey(selectedProvider, env);
138
+ }
139
+
140
+ // Get provider config
141
+ let providerConfig = { ...PROVIDER_CONFIGS[selectedProvider] || PROVIDER_CONFIGS.gemini };
142
+
143
+ // Override model if specified
144
+ if (model) {
145
+ providerConfig.model = model;
146
+ } else if (modelTier && MODEL_TIERS[selectedProvider] && MODEL_TIERS[selectedProvider][modelTier]) {
147
+ // Use tier-based model selection
148
+ providerConfig.model = MODEL_TIERS[selectedProvider][modelTier];
149
+ } else if (env.VLM_MODEL_TIER && MODEL_TIERS[selectedProvider] && MODEL_TIERS[selectedProvider][env.VLM_MODEL_TIER]) {
150
+ // Check environment variable for model tier
151
+ providerConfig.model = MODEL_TIERS[selectedProvider][env.VLM_MODEL_TIER];
152
+ } else if (env.VLM_MODEL) {
153
+ // Explicit model override from environment
154
+ providerConfig.model = env.VLM_MODEL;
155
+ }
156
+
157
+ return {
158
+ provider: selectedProvider,
159
+ apiKey: selectedApiKey,
160
+ providerConfig,
161
+ enabled: !!selectedApiKey,
162
+ cache: {
163
+ enabled: cacheEnabled,
164
+ dir: cacheDir
165
+ },
166
+ performance: {
167
+ maxConcurrency,
168
+ timeout
169
+ },
170
+ debug: {
171
+ verbose
172
+ }
173
+ };
174
+ }
175
+
176
+ /**
177
+ * Detect provider from environment variables
178
+ */
179
+ function detectProvider(env) {
180
+ // Priority: explicit VLM_PROVIDER > auto-detect from API keys > default to gemini
181
+ const explicitProvider = env.VLM_PROVIDER?.trim().toLowerCase();
182
+ if (explicitProvider && PROVIDER_CONFIGS[explicitProvider]) {
183
+ return explicitProvider;
184
+ }
185
+
186
+ // Auto-detect: prefer cheaper/faster providers first
187
+ // Groq has priority 0 (highest) for high-frequency decisions
188
+ const availableProviders = Object.values(PROVIDER_CONFIGS)
189
+ .filter(config => {
190
+ // Check provider-specific key
191
+ const providerKey = env[`${config.name.toUpperCase()}_API_KEY`];
192
+ if (providerKey) {
193
+ return true;
194
+ }
195
+ // Special case: Anthropic uses ANTHROPIC_API_KEY
196
+ if (config.name === 'claude' && env.ANTHROPIC_API_KEY) {
197
+ return true;
198
+ }
199
+ // Fallback to generic API_KEY
200
+ return !!env.API_KEY;
201
+ })
202
+ .sort((a, b) => a.priority - b.priority); // Lower priority number = higher priority
203
+
204
+ return availableProviders.length > 0
205
+ ? availableProviders[0].name
206
+ : 'gemini'; // Default to gemini (cheapest)
207
+ }
208
+
209
+ /**
210
+ * Get API key for provider
211
+ */
212
+ function getApiKey(provider, env) {
213
+ // Check provider-specific key first
214
+ const providerKey = env[`${provider.toUpperCase()}_API_KEY`];
215
+ if (providerKey) {
216
+ return providerKey;
217
+ }
218
+
219
+ // Special case: Anthropic uses ANTHROPIC_API_KEY (not CLAUDE_API_KEY)
220
+ if (provider === 'claude' && env.ANTHROPIC_API_KEY) {
221
+ return env.ANTHROPIC_API_KEY;
222
+ }
223
+
224
+ // Special case: Groq uses GROQ_API_KEY
225
+ if (provider === 'groq' && env.GROQ_API_KEY) {
226
+ return env.GROQ_API_KEY;
227
+ }
228
+
229
+ // Fallback to generic API_KEY
230
+ return env.API_KEY || null;
231
+ }
232
+
233
+ /**
234
+ * Get current configuration (singleton)
235
+ *
236
+ * @returns {import('./index.mjs').Config} Current configuration
237
+ */
238
+ let configInstance = null;
239
+
240
+ export function getConfig() {
241
+ if (!configInstance) {
242
+ configInstance = createConfig();
243
+ }
244
+ return configInstance;
245
+ }
246
+
247
+ /**
248
+ * Set configuration (useful for testing)
249
+ *
250
+ * @param {import('./index.mjs').Config} config - Configuration to set
251
+ * @returns {void}
252
+ */
253
+ export function setConfig(config) {
254
+ configInstance = config;
255
+ }
256
+
257
+ /**
258
+ * Get provider configuration
259
+ *
260
+ * @param {string | null} [providerName=null] - Provider name, or null to use default
261
+ * @returns {import('./index.mjs').Config['providerConfig']} Provider configuration
262
+ */
263
+ export function getProvider(providerName = null) {
264
+ const config = getConfig();
265
+ const provider = providerName || config.provider;
266
+ return PROVIDER_CONFIGS[provider] || PROVIDER_CONFIGS.gemini;
267
+ }
268
+