@arclabs561/ai-visual-test 0.5.1 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +127 -11
  2. package/DEPLOYMENT.md +225 -9
  3. package/README.md +71 -80
  4. package/index.d.ts +902 -5
  5. package/package.json +10 -51
  6. package/src/batch-optimizer.mjs +39 -0
  7. package/src/cache.mjs +241 -16
  8. package/src/config.mjs +33 -91
  9. package/src/constants.mjs +54 -0
  10. package/src/convenience.mjs +113 -10
  11. package/src/cost-optimization.mjs +1 -0
  12. package/src/cost-tracker.mjs +134 -2
  13. package/src/data-extractor.mjs +36 -7
  14. package/src/dynamic-few-shot.mjs +69 -11
  15. package/src/errors.mjs +6 -2
  16. package/src/experience-propagation.mjs +12 -0
  17. package/src/experience-tracer.mjs +12 -3
  18. package/src/game-player.mjs +222 -43
  19. package/src/graceful-shutdown.mjs +126 -0
  20. package/src/helpers/playwright.mjs +22 -8
  21. package/src/human-validation-manager.mjs +99 -2
  22. package/src/index.mjs +48 -3
  23. package/src/integrations/playwright.mjs +140 -0
  24. package/src/judge.mjs +699 -24
  25. package/src/load-env.mjs +2 -1
  26. package/src/logger.mjs +31 -3
  27. package/src/model-tier-selector.mjs +1 -221
  28. package/src/natural-language-specs.mjs +31 -3
  29. package/src/persona-enhanced.mjs +4 -2
  30. package/src/persona-experience.mjs +1 -1
  31. package/src/pricing.mjs +28 -0
  32. package/src/prompt-composer.mjs +162 -5
  33. package/src/provider-data.mjs +115 -0
  34. package/src/render-change-detector.mjs +5 -0
  35. package/src/research-enhanced-validation.mjs +7 -5
  36. package/src/retry.mjs +21 -7
  37. package/src/rubrics.mjs +4 -0
  38. package/src/safe-logger.mjs +71 -0
  39. package/src/session-cost-tracker.mjs +320 -0
  40. package/src/smart-validator.mjs +8 -8
  41. package/src/spec-templates.mjs +52 -6
  42. package/src/startup-validation.mjs +127 -0
  43. package/src/temporal-adaptive.mjs +2 -2
  44. package/src/temporal-decision-manager.mjs +1 -271
  45. package/src/temporal-logic.mjs +104 -0
  46. package/src/temporal-note-pruner.mjs +119 -0
  47. package/src/temporal-preprocessor.mjs +1 -543
  48. package/src/temporal.mjs +681 -79
  49. package/src/utils/action-hallucination-detector.mjs +301 -0
  50. package/src/utils/baseline-validator.mjs +82 -0
  51. package/src/utils/cache-stats.mjs +104 -0
  52. package/src/utils/cached-llm.mjs +164 -0
  53. package/src/utils/capability-stratifier.mjs +108 -0
  54. package/src/utils/counterfactual-tester.mjs +83 -0
  55. package/src/utils/error-recovery.mjs +117 -0
  56. package/src/utils/explainability-scorer.mjs +119 -0
  57. package/src/utils/exploratory-automation.mjs +131 -0
  58. package/src/utils/index.mjs +10 -0
  59. package/src/utils/intent-recognizer.mjs +201 -0
  60. package/src/utils/log-sanitizer.mjs +165 -0
  61. package/src/utils/path-validator.mjs +88 -0
  62. package/src/utils/performance-logger.mjs +316 -0
  63. package/src/utils/performance-measurement.mjs +280 -0
  64. package/src/utils/prompt-sanitizer.mjs +213 -0
  65. package/src/utils/rate-limiter.mjs +144 -0
  66. package/src/validation-framework.mjs +24 -20
  67. package/src/validation-result-normalizer.mjs +35 -1
  68. package/src/validation.mjs +75 -25
  69. package/src/validators/accessibility-validator.mjs +144 -0
  70. package/src/validators/hybrid-validator.mjs +48 -4
  71. package/api/health.js +0 -34
  72. package/api/validate.js +0 -252
  73. package/public/index.html +0 -149
  74. package/vercel.json +0 -27
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Provider Data & Pricing Configuration
3
+ *
4
+ * Central source of truth for provider configuration, models, and pricing.
5
+ * Extracted from config.mjs to facilitate updates and prevent drift.
6
+ *
7
+ * ⚠️ IMPORTANT: Model names should be verified against current provider documentation.
8
+ * Some models may be preview-only, deprecated, or have different names in production.
9
+ */
10
+
11
+ /**
12
+ * Model tiers for each provider
13
+ *
14
+ * Models can be overridden via environment variables:
15
+ * - VLM_MODEL_TIER: 'fast' | 'balanced' | 'best'
16
+ * - VLM_MODEL: explicit model name override
17
+ *
18
+ * GROQ INTEGRATION:
19
+ * - Groq added for high-frequency decisions (10-60Hz temporal decisions)
20
+ * - ~0.22s latency (vs 1-3s for other providers)
21
+ * - 185-276 tokens/sec throughput
22
+ * - OpenAI-compatible API
23
+ * - Cost-competitive, free tier available
24
+ * - Useful for: Fast tier decisions, high-Hz temporal decisions, real-time applications
25
+ */
26
+ export const MODEL_TIERS = {
27
+ gemini: {
28
+ fast: 'gemini-2.5-flash', // Fast, cost-effective (stable)
29
+ balanced: 'gemini-2.5-flash', // Good balance (using Flash as default balanced too)
30
+ best: 'gemini-3-pro-preview' // High quality (preview)
31
+ },
32
+ openai: {
33
+ fast: 'gpt-4o-mini', // Fast, cheaper
34
+ balanced: 'gpt-4o', // Balanced (current production)
35
+ best: 'gpt-5' // High quality (late 2025, latest production)
36
+ },
37
+ claude: {
38
+ fast: 'claude-haiku-4-5', // Fast, cheaper (Haiku 4.5, Feb 2025)
39
+ balanced: 'claude-sonnet-4-5', // Balanced (Sept 2025)
40
+ best: 'claude-opus-4-6' // High quality (Opus 4.6, March 2026)
41
+ },
42
+ groq: {
43
+ // NOTE: Groq vision support requires different model
44
+ // For vision: meta-llama/llama-4-scout-17b-16e-instruct (preview, supports vision)
45
+ // For text-only: llama-3.3-70b-versatile is fastest (~0.22s latency)
46
+ fast: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable, fastest Groq option
47
+ balanced: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable, balanced
48
+ best: 'meta-llama/llama-4-scout-17b-16e-instruct' // Vision-capable, high quality (preview)
49
+ // WARNING: Groq vision models are preview-only. Text-only: use llama-3.3-70b-versatile
50
+ },
51
+ openrouter: {
52
+ // OpenRouter provides access to multiple models via unified API
53
+ fast: 'anthropic/claude-haiku-4-5', // Fast, cheaper via OpenRouter
54
+ balanced: 'anthropic/claude-sonnet-4-5', // Balanced via OpenRouter
55
+ best: 'anthropic/claude-opus-4-6' // High quality via OpenRouter
56
+ }
57
+ };
58
+
59
+ /**
60
+ * Default provider configurations
61
+ *
62
+ * GROQ INTEGRATION:
63
+ * - OpenAI-compatible API (easy migration)
64
+ * - ~0.22s latency (10x faster than typical providers)
65
+ * - Useful for high-frequency decisions (10-60Hz temporal decisions)
66
+ * - Free tier available for testing
67
+ */
68
+ export const PROVIDER_CONFIGS = {
69
+ gemini: {
70
+ name: 'gemini',
71
+ apiUrl: 'https://generativelanguage.googleapis.com/v1beta',
72
+ model: 'gemini-2.5-flash', // Latest stable (June 2025)
73
+ freeTier: true,
74
+ pricing: { input: 0.10, output: 0.40 }, // 2.5 Flash is cheaper
75
+ priority: 1
76
+ },
77
+ openai: {
78
+ name: 'openai',
79
+ apiUrl: 'https://api.openai.com/v1',
80
+ model: 'gpt-4o', // Current production
81
+ freeTier: false,
82
+ pricing: { input: 5.00, output: 15.00 },
83
+ priority: 2
84
+ },
85
+ claude: {
86
+ name: 'claude',
87
+ apiUrl: 'https://api.anthropic.com/v1',
88
+ model: 'claude-sonnet-4-5', // Latest flagship (Sept 2025)
89
+ freeTier: false,
90
+ pricing: { input: 3.00, output: 15.00 },
91
+ priority: 3
92
+ },
93
+ groq: {
94
+ name: 'groq',
95
+ apiUrl: 'https://api.groq.com/openai/v1', // OpenAI-compatible endpoint
96
+ model: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable (preview), ~0.22s latency
97
+ freeTier: true, // Free tier available
98
+ pricing: { input: 0.59, output: 0.79 }, // Actual 2025 pricing: $0.59/$0.79 per 1M tokens (real-time API)
99
+ priority: 0, // Highest priority for high-frequency decisions
100
+ latency: 220, // ~0.22s latency in ms (10x faster than typical)
101
+ throughput: 200, // ~200 tokens/sec average
102
+ visionSupported: true // llama-4-scout-17b-16e-instruct supports vision (preview)
103
+ // Text-only alternative: llama-3.3-70b-versatile (faster, no vision)
104
+ },
105
+ openrouter: {
106
+ name: 'openrouter',
107
+ apiUrl: 'https://openrouter.ai/api/v1', // OpenAI-compatible endpoint
108
+ model: 'anthropic/claude-sonnet-4', // Default to Claude Sonnet via OpenRouter
109
+ freeTier: false,
110
+ pricing: { input: 3.00, output: 15.00 }, // Varies by model
111
+ priority: 2,
112
+ visionSupported: true
113
+ }
114
+ };
115
+
@@ -253,6 +253,11 @@ export function calculateOptimalFPS(changeHistory, options = {}) {
253
253
  targetChangeInterval = 100 // Target: capture every 100ms of changes
254
254
  } = options;
255
255
 
256
+ // Handle null/undefined or non-array input
257
+ if (!changeHistory || !Array.isArray(changeHistory)) {
258
+ return minFPS;
259
+ }
260
+
256
261
  if (changeHistory.length < 2) {
257
262
  return minFPS;
258
263
  }
@@ -307,12 +307,14 @@ export async function validateWithExplicitRubric(imagePath, prompt, options = {}
307
307
  // Import rubric builder
308
308
  const { buildRubricPrompt, DEFAULT_RUBRIC } = await import('./rubrics.mjs');
309
309
 
310
- // Build prompt with explicit rubric
310
+ // Build prompt: user prompt + rubric evaluation framework.
311
+ // buildRubricPrompt(rubric, includeDimensions) returns the rubric text;
312
+ // we prepend the user's prompt so both are sent to the VLM.
313
+ const rubricToUse = rubric || (useDefaultRubric ? DEFAULT_RUBRIC : null);
311
314
  let enhancedPrompt = prompt;
312
- if (useDefaultRubric && !rubric) {
313
- enhancedPrompt = buildRubricPrompt(prompt, DEFAULT_RUBRIC);
314
- } else if (rubric) {
315
- enhancedPrompt = buildRubricPrompt(prompt, rubric);
315
+ if (rubricToUse) {
316
+ const rubricText = buildRubricPrompt(rubricToUse, true);
317
+ enhancedPrompt = `${prompt}\n\n${rubricText}`;
316
318
  }
317
319
 
318
320
  // Perform validation
package/src/retry.mjs CHANGED
@@ -7,6 +7,7 @@
7
7
 
8
8
  import { ProviderError, TimeoutError } from './errors.mjs';
9
9
  import { log, warn } from './logger.mjs';
10
+ import { RETRY_CONSTANTS } from './constants.mjs';
10
11
 
11
12
  /**
12
13
  * Check if an error is retryable
@@ -50,12 +51,12 @@ export function isRetryableError(error) {
50
51
  * @param {boolean} jitter - Add random jitter to prevent thundering herd
51
52
  * @returns {number} Delay in milliseconds
52
53
  */
53
- export function calculateBackoff(attempt, baseDelay = 1000, maxDelay = 30000, jitter = true) {
54
+ export function calculateBackoff(attempt, baseDelay = RETRY_CONSTANTS.DEFAULT_BASE_DELAY_MS, maxDelay = RETRY_CONSTANTS.DEFAULT_MAX_DELAY_MS, jitter = true) {
54
55
  const exponentialDelay = Math.min(baseDelay * Math.pow(2, attempt), maxDelay);
55
56
 
56
57
  if (jitter) {
57
- // Add ±25% random jitter
58
- const jitterAmount = exponentialDelay * 0.25;
58
+ // Add random jitter to prevent thundering herd
59
+ const jitterAmount = exponentialDelay * RETRY_CONSTANTS.JITTER_PERCENTAGE;
59
60
  const jitterValue = (Math.random() * 2 - 1) * jitterAmount;
60
61
  return Math.max(0, exponentialDelay + jitterValue);
61
62
  }
@@ -80,9 +81,9 @@ export function calculateBackoff(attempt, baseDelay = 1000, maxDelay = 30000, ji
80
81
  */
81
82
  export async function retryWithBackoff(fn, options = {}) {
82
83
  const {
83
- maxRetries = 3,
84
- baseDelay = 1000,
85
- maxDelay = 30000,
84
+ maxRetries = RETRY_CONSTANTS.DEFAULT_MAX_RETRIES,
85
+ baseDelay = RETRY_CONSTANTS.DEFAULT_BASE_DELAY_MS,
86
+ maxDelay = RETRY_CONSTANTS.DEFAULT_MAX_DELAY_MS,
86
87
  onRetry = null,
87
88
  retryable = isRetryableError
88
89
  } = options;
@@ -117,7 +118,20 @@ export async function retryWithBackoff(fn, options = {}) {
117
118
  }
118
119
  }
119
120
 
120
- // All retries exhausted
121
+ // All retries exhausted - enhance error message with retry context
122
+ const enhancedMessage = enhanceErrorMessage(
123
+ lastError,
124
+ maxRetries + 1,
125
+ 'retryWithBackoff'
126
+ );
127
+
128
+ // Preserve original error but enhance message
129
+ if (lastError instanceof Error) {
130
+ lastError.message = enhancedMessage;
131
+ } else {
132
+ lastError = new Error(enhancedMessage);
133
+ }
134
+
121
135
  throw lastError;
122
136
  }
123
137
 
package/src/rubrics.mjs CHANGED
@@ -162,6 +162,10 @@ Provide your evaluation as JSON:
162
162
  "visual": "<visual evidence from screenshot>",
163
163
  "functional": "<functional evidence>",
164
164
  "accessibility": "<accessibility evidence>"
165
+ },
166
+ "dimensionScores": {
167
+ "<dimension_name>": <0-10 integer>,
168
+ ...for each dimension in the rubric
165
169
  }
166
170
  }`;
167
171
 
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Performance Logger Service
3
+ *
4
+ * Handles optional performance logging without fire-and-forget race conditions.
5
+ * Provides a safe, non-blocking interface for metrics.
6
+ */
7
+
8
+ let loggerImplementation = null;
9
+ let isEnabled = false;
10
+
11
+ /**
12
+ * Initialize the logger (lazy load)
13
+ */
14
+ async function initLogger() {
15
+ if (loggerImplementation) return loggerImplementation;
16
+
17
+ try {
18
+ // Only load if actually needed/configured
19
+ const { logCacheOperation, logTemporalDecision } = await import('./utils/performance-logger.mjs');
20
+ loggerImplementation = { logCacheOperation, logTemporalDecision };
21
+ isEnabled = true;
22
+ } catch (error) {
23
+ // Graceful degradation - if logger fails to load, we just don't log
24
+ // This is better than crashing or hanging
25
+ isEnabled = false;
26
+ loggerImplementation = {
27
+ logCacheOperation: () => {},
28
+ logTemporalDecision: () => {}
29
+ };
30
+ }
31
+ return loggerImplementation;
32
+ }
33
+
34
+ /**
35
+ * Log a cache operation safely
36
+ */
37
+ export function safeLogCacheOperation(data) {
38
+ // Non-blocking check
39
+ if (!isEnabled && !loggerImplementation) {
40
+ // Trigger init but don't wait for it (fire and forget INIT, not the log itself)
41
+ initLogger().then(logger => logger?.logCacheOperation(data)).catch(() => {});
42
+ return;
43
+ }
44
+
45
+ if (isEnabled && loggerImplementation) {
46
+ try {
47
+ loggerImplementation.logCacheOperation(data);
48
+ } catch {
49
+ // Swallow logging errors to prevent disrupting main flow
50
+ }
51
+ }
52
+ }
53
+
54
+ /**
55
+ * Log a temporal decision safely
56
+ */
57
+ export function safeLogTemporalDecision(data) {
58
+ if (!isEnabled && !loggerImplementation) {
59
+ initLogger().then(logger => logger?.logTemporalDecision(data)).catch(() => {});
60
+ return;
61
+ }
62
+
63
+ if (isEnabled && loggerImplementation) {
64
+ try {
65
+ loggerImplementation.logTemporalDecision(data);
66
+ } catch {
67
+ // Swallow logging errors
68
+ }
69
+ }
70
+ }
71
+
@@ -0,0 +1,320 @@
1
+ /**
2
+ * Session-Level Cost Tracker
3
+ *
4
+ * Tracks costs per test run/session with detailed breakdown and transparency.
5
+ * Provides "trap debug" hooks to show total ML API resources for usage tracking.
6
+ *
7
+ * Usage:
8
+ * ```javascript
9
+ * import { startSession, endSession, getSessionCosts } from './session-cost-tracker.mjs';
10
+ *
11
+ * const sessionId = startSession('comprehensive-evaluation');
12
+ * // ... run tests ...
13
+ * const summary = endSession(sessionId);
14
+ * console.log(`Total cost: $${summary.totalCost.toFixed(4)}`);
15
+ * ```
16
+ */
17
+
18
+ import { getCostTracker, recordCost } from './cost-tracker.mjs';
19
+ import { getCacheStats } from './cache.mjs';
20
+ import { log, warn } from './logger.mjs';
21
+ import { writeFileSync, mkdirSync, existsSync } from 'fs';
22
+ import { join } from 'path';
23
+
24
+ /**
25
+ * Active sessions
26
+ */
27
+ const activeSessions = new Map();
28
+
29
+ /**
30
+ * Session cost data structure
31
+ */
32
+ class SessionCostData {
33
+ constructor(sessionId, name) {
34
+ this.sessionId = sessionId;
35
+ this.name = name;
36
+ this.startTime = Date.now();
37
+ this.endTime = null;
38
+ this.costs = {
39
+ total: 0,
40
+ byProvider: {},
41
+ byTest: {},
42
+ apiCalls: 0,
43
+ cacheHits: 0,
44
+ cacheMisses: 0,
45
+ tokens: {
46
+ input: 0,
47
+ output: 0,
48
+ total: 0
49
+ }
50
+ };
51
+ this.entries = [];
52
+ }
53
+
54
+ recordCostEntry(entry) {
55
+ this.costs.total += entry.cost || 0;
56
+ this.costs.apiCalls += 1;
57
+
58
+ // Track by provider
59
+ if (!this.costs.byProvider[entry.provider]) {
60
+ this.costs.byProvider[entry.provider] = { total: 0, calls: 0, tokens: { input: 0, output: 0 } };
61
+ }
62
+ this.costs.byProvider[entry.provider].total += entry.cost || 0;
63
+ this.costs.byProvider[entry.provider].calls += 1;
64
+ this.costs.byProvider[entry.provider].tokens.input += entry.inputTokens || 0;
65
+ this.costs.byProvider[entry.provider].tokens.output += entry.outputTokens || 0;
66
+
67
+ // Track by test
68
+ const testName = entry.testName || 'unknown';
69
+ if (!this.costs.byTest[testName]) {
70
+ this.costs.byTest[testName] = { total: 0, calls: 0 };
71
+ }
72
+ this.costs.byTest[testName].total += entry.cost || 0;
73
+ this.costs.byTest[testName].calls += 1;
74
+
75
+ // Track tokens
76
+ this.costs.tokens.input += entry.inputTokens || 0;
77
+ this.costs.tokens.output += entry.outputTokens || 0;
78
+ this.costs.tokens.total = this.costs.tokens.input + this.costs.tokens.output;
79
+
80
+ // Store entry
81
+ this.entries.push({
82
+ ...entry,
83
+ timestamp: entry.timestamp || Date.now()
84
+ });
85
+ }
86
+
87
+ recordCacheHit() {
88
+ this.costs.cacheHits += 1;
89
+ }
90
+
91
+ recordCacheMiss() {
92
+ this.costs.cacheMisses += 1;
93
+ }
94
+
95
+ getSummary() {
96
+ const duration = (this.endTime || Date.now()) - this.startTime;
97
+ const cacheHitRate = this.costs.cacheHits + this.costs.cacheMisses > 0
98
+ ? (this.costs.cacheHits / (this.costs.cacheHits + this.costs.cacheMisses) * 100).toFixed(1)
99
+ : 0;
100
+
101
+ return {
102
+ sessionId: this.sessionId,
103
+ name: this.name,
104
+ duration: duration,
105
+ durationSeconds: (duration / 1000).toFixed(2),
106
+ costs: {
107
+ ...this.costs,
108
+ cacheHitRate: `${cacheHitRate}%`,
109
+ averageCostPerCall: this.costs.apiCalls > 0
110
+ ? (this.costs.total / this.costs.apiCalls).toFixed(6)
111
+ : 0,
112
+ costPerSecond: duration > 0
113
+ ? ((this.costs.total / duration) * 1000).toFixed(6)
114
+ : 0
115
+ },
116
+ startTime: new Date(this.startTime).toISOString(),
117
+ endTime: this.endTime ? new Date(this.endTime).toISOString() : null
118
+ };
119
+ }
120
+ }
121
+
122
+ /**
123
+ * Start a new cost tracking session
124
+ *
125
+ * @param {string} name - Session name (e.g., 'comprehensive-evaluation')
126
+ * @param {object} [options] - Session options
127
+ * @returns {string} Session ID
128
+ */
129
+ export function startSession(name, options = {}) {
130
+ const sessionId = `session-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
131
+ const session = new SessionCostData(sessionId, name);
132
+ activeSessions.set(sessionId, session);
133
+
134
+ if (options.verbose !== false) {
135
+ log(`[CostTracker] Started session: ${name} (${sessionId})`);
136
+ }
137
+
138
+ return sessionId;
139
+ }
140
+
141
+ /**
142
+ * End a cost tracking session
143
+ *
144
+ * @param {string} sessionId - Session ID
145
+ * @param {object} [options] - Options
146
+ * @returns {object} Session summary
147
+ */
148
+ export function endSession(sessionId, options = {}) {
149
+ const session = activeSessions.get(sessionId);
150
+ if (!session) {
151
+ warn(`[CostTracker] Session not found: ${sessionId}`);
152
+ return null;
153
+ }
154
+
155
+ session.endTime = Date.now();
156
+ const summary = session.getSummary();
157
+
158
+ // Get cache stats
159
+ try {
160
+ const cacheStats = getCacheStats();
161
+ summary.cacheStats = cacheStats;
162
+ } catch (error) {
163
+ // Silently fail if cache stats unavailable
164
+ }
165
+
166
+ // Save session report
167
+ if (options.saveReport !== false) {
168
+ const reportsDir = join(process.cwd(), 'evaluation', 'results', 'cost-reports');
169
+ if (!existsSync(reportsDir)) {
170
+ mkdirSync(reportsDir, { recursive: true });
171
+ }
172
+
173
+ const reportFile = join(reportsDir, `cost-report-${sessionId}-${Date.now()}.json`);
174
+ writeFileSync(reportFile, JSON.stringify({
175
+ summary,
176
+ entries: session.entries,
177
+ timestamp: new Date().toISOString()
178
+ }, null, 2));
179
+
180
+ if (options.verbose !== false) {
181
+ log(`[CostTracker] Session report saved: ${reportFile}`);
182
+ }
183
+ }
184
+
185
+ // Print summary if verbose
186
+ if (options.verbose !== false) {
187
+ printSessionSummary(summary);
188
+ }
189
+
190
+ activeSessions.delete(sessionId);
191
+ return summary;
192
+ }
193
+
194
+ /**
195
+ * Record cost for current session
196
+ *
197
+ * @param {string} sessionId - Session ID
198
+ * @param {object} costData - Cost data
199
+ */
200
+ export function recordSessionCost(sessionId, costData) {
201
+ const session = activeSessions.get(sessionId);
202
+ if (session) {
203
+ session.recordCostEntry(costData);
204
+ // Also record in global cost tracker
205
+ recordCost(costData);
206
+ } else {
207
+ // No active session, just record globally
208
+ recordCost(costData);
209
+ }
210
+ }
211
+
212
+ /**
213
+ * Record cache hit for current session
214
+ *
215
+ * @param {string} sessionId - Session ID
216
+ */
217
+ export function recordSessionCacheHit(sessionId) {
218
+ const session = activeSessions.get(sessionId);
219
+ if (session) {
220
+ session.recordCacheHit();
221
+ }
222
+ }
223
+
224
+ /**
225
+ * Record cache miss for current session
226
+ *
227
+ * @param {string} sessionId - Session ID
228
+ */
229
+ export function recordSessionCacheMiss(sessionId) {
230
+ const session = activeSessions.get(sessionId);
231
+ if (session) {
232
+ session.recordCacheMiss();
233
+ }
234
+ }
235
+
236
+ /**
237
+ * Get current session costs
238
+ *
239
+ * @param {string} sessionId - Session ID
240
+ * @returns {object} Current session costs
241
+ */
242
+ export function getSessionCosts(sessionId) {
243
+ const session = activeSessions.get(sessionId);
244
+ if (!session) {
245
+ return null;
246
+ }
247
+ return session.getSummary();
248
+ }
249
+
250
+ /**
251
+ * Print session summary
252
+ */
253
+ function printSessionSummary(summary) {
254
+ console.log('\n' + '='.repeat(70));
255
+ console.log(`💰 Cost Report: ${summary.name}`);
256
+ console.log('='.repeat(70));
257
+ console.log(`Session ID: ${summary.sessionId}`);
258
+ console.log(`Duration: ${summary.durationSeconds}s`);
259
+ console.log(`\n📊 API Usage:`);
260
+ console.log(` Total Cost: $${summary.costs.total.toFixed(4)}`);
261
+ console.log(` API Calls: ${summary.costs.apiCalls}`);
262
+ console.log(` Average per Call: $${summary.costs.averageCostPerCall}`);
263
+ console.log(` Cost per Second: $${summary.costs.costPerSecond}/s`);
264
+
265
+ console.log(`\n💾 Cache Performance:`);
266
+ console.log(` Cache Hits: ${summary.costs.cacheHits}`);
267
+ console.log(` Cache Misses: ${summary.costs.cacheMisses}`);
268
+ console.log(` Hit Rate: ${summary.costs.cacheHitRate}`);
269
+ const cacheSavings = summary.costs.cacheHits * parseFloat(summary.costs.averageCostPerCall);
270
+ if (cacheSavings > 0) {
271
+ console.log(` Estimated Savings: $${cacheSavings.toFixed(4)} (from cache hits)`);
272
+ }
273
+
274
+ console.log(`\n🔢 Token Usage:`);
275
+ console.log(` Input Tokens: ${summary.costs.tokens.input.toLocaleString()}`);
276
+ console.log(` Output Tokens: ${summary.costs.tokens.output.toLocaleString()}`);
277
+ console.log(` Total Tokens: ${summary.costs.tokens.total.toLocaleString()}`);
278
+
279
+ if (Object.keys(summary.costs.byProvider).length > 0) {
280
+ console.log(`\n📦 By Provider:`);
281
+ for (const [provider, data] of Object.entries(summary.costs.byProvider)) {
282
+ console.log(` ${provider}:`);
283
+ console.log(` Cost: $${data.total.toFixed(4)}`);
284
+ console.log(` Calls: ${data.calls}`);
285
+ console.log(` Tokens: ${data.tokens.input.toLocaleString()} in, ${data.tokens.output.toLocaleString()} out`);
286
+ }
287
+ }
288
+
289
+ if (Object.keys(summary.costs.byTest).length > 0) {
290
+ console.log(`\n🧪 By Test (Top 10):`);
291
+ const sortedTests = Object.entries(summary.costs.byTest)
292
+ .sort((a, b) => b[1].total - a[1].total)
293
+ .slice(0, 10);
294
+ for (const [testName, data] of sortedTests) {
295
+ console.log(` ${testName}: $${data.total.toFixed(4)} (${data.calls} calls)`);
296
+ }
297
+ }
298
+
299
+ console.log('='.repeat(70) + '\n');
300
+ }
301
+
302
+ /**
303
+ * Get all active sessions
304
+ *
305
+ * @returns {Array} Active session IDs
306
+ */
307
+ export function getActiveSessions() {
308
+ return Array.from(activeSessions.keys());
309
+ }
310
+
311
+ /**
312
+ * Get global cost stats (across all sessions)
313
+ *
314
+ * @returns {object} Global cost statistics
315
+ */
316
+ export function getGlobalCostStats() {
317
+ const tracker = getCostTracker();
318
+ return tracker.getStats();
319
+ }
320
+
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Smart Validator Selector
3
3
  *
4
- * Automatically selects the best validator type based on available context.
4
+ * Selects validator type based on available context.
5
5
  * Guides users to the right tool for the job.
6
6
  *
7
7
  * Design Philosophy:
@@ -36,10 +36,10 @@ import { log, warn } from './logger.mjs';
36
36
  /**
37
37
  * Smart accessibility validation
38
38
  *
39
- * Automatically chooses the best validator based on available context:
39
+ * Chooses validator based on available context:
40
40
  * - Has page access → uses programmatic (fast, deterministic)
41
41
  * - Only has screenshot → uses VLLM (semantic evaluation)
42
- * - Has both and needs semantic context → uses hybrid (best of both)
42
+ * - Has both and needs semantic context → uses hybrid (combines both)
43
43
  *
44
44
  * @param {Object} options - Validation options
45
45
  * @param {any} [options.page] - Playwright page object (if available)
@@ -75,7 +75,7 @@ export async function validateAccessibilitySmart(options = {}) {
75
75
 
76
76
  // Decision tree:
77
77
  // 1. Has page access → use programmatic (fast, deterministic)
78
- // 2. Has both + need semantic → use hybrid (best of both)
78
+ // 2. Has both + need semantic → use hybrid (combines both)
79
79
  // 3. Only screenshot → use VLLM (semantic evaluation)
80
80
 
81
81
  if (page && !shouldUseHybrid) {
@@ -132,9 +132,9 @@ export async function validateAccessibilitySmart(options = {}) {
132
132
  /**
133
133
  * Smart state validation
134
134
  *
135
- * Automatically chooses the best validator based on available context:
135
+ * Chooses validator based on available context:
136
136
  * - Has page access + direct state → uses programmatic (fast, deterministic)
137
- * - Has page access + screenshot + need semantic → uses hybrid (best of both)
137
+ * - Has page access + screenshot + need semantic → uses hybrid (combines both)
138
138
  * - Only screenshot → uses VLLM (extracts state from screenshot)
139
139
  *
140
140
  * @param {Object} options - Validation options
@@ -236,7 +236,7 @@ export async function validateStateSmart(options = {}) {
236
236
  /**
237
237
  * Smart element validation
238
238
  *
239
- * Validates element visibility, position, contrast, etc. using the best available method.
239
+ * Validates element visibility, position, contrast, etc. using available methods.
240
240
  *
241
241
  * @param {Object} options - Validation options
242
242
  * @param {any} options.page - Playwright page object
@@ -317,7 +317,7 @@ export async function validateElementSmart(options = {}) {
317
317
  /**
318
318
  * Smart validation with automatic tool selection
319
319
  *
320
- * This is the main entry point that automatically selects the best validator
320
+ * Main entry point that selects validator
321
321
  * based on what you're trying to validate and what context you have.
322
322
  *
323
323
  * @param {Object} options - Validation options