@arclabs561/ai-visual-test 0.5.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +102 -11
  2. package/DEPLOYMENT.md +225 -9
  3. package/README.md +71 -80
  4. package/index.d.ts +862 -3
  5. package/package.json +10 -51
  6. package/src/batch-optimizer.mjs +39 -0
  7. package/src/cache.mjs +241 -16
  8. package/src/config.mjs +33 -91
  9. package/src/constants.mjs +54 -0
  10. package/src/convenience.mjs +113 -10
  11. package/src/cost-optimization.mjs +1 -0
  12. package/src/cost-tracker.mjs +134 -2
  13. package/src/data-extractor.mjs +36 -7
  14. package/src/dynamic-few-shot.mjs +69 -11
  15. package/src/errors.mjs +6 -2
  16. package/src/experience-propagation.mjs +12 -0
  17. package/src/experience-tracer.mjs +12 -3
  18. package/src/game-player.mjs +222 -43
  19. package/src/graceful-shutdown.mjs +126 -0
  20. package/src/helpers/playwright.mjs +22 -8
  21. package/src/human-validation-manager.mjs +99 -2
  22. package/src/index.mjs +48 -3
  23. package/src/integrations/playwright.mjs +140 -0
  24. package/src/judge.mjs +697 -24
  25. package/src/load-env.mjs +2 -1
  26. package/src/logger.mjs +31 -3
  27. package/src/model-tier-selector.mjs +1 -221
  28. package/src/natural-language-specs.mjs +31 -3
  29. package/src/persona-enhanced.mjs +4 -2
  30. package/src/persona-experience.mjs +1 -1
  31. package/src/pricing.mjs +28 -0
  32. package/src/prompt-composer.mjs +162 -5
  33. package/src/provider-data.mjs +115 -0
  34. package/src/render-change-detector.mjs +5 -0
  35. package/src/research-enhanced-validation.mjs +7 -5
  36. package/src/retry.mjs +21 -7
  37. package/src/rubrics.mjs +4 -0
  38. package/src/safe-logger.mjs +71 -0
  39. package/src/session-cost-tracker.mjs +320 -0
  40. package/src/smart-validator.mjs +8 -8
  41. package/src/spec-templates.mjs +52 -6
  42. package/src/startup-validation.mjs +127 -0
  43. package/src/temporal-adaptive.mjs +2 -2
  44. package/src/temporal-decision-manager.mjs +1 -271
  45. package/src/temporal-logic.mjs +104 -0
  46. package/src/temporal-note-pruner.mjs +119 -0
  47. package/src/temporal-preprocessor.mjs +1 -543
  48. package/src/temporal.mjs +681 -79
  49. package/src/utils/action-hallucination-detector.mjs +301 -0
  50. package/src/utils/baseline-validator.mjs +82 -0
  51. package/src/utils/cache-stats.mjs +104 -0
  52. package/src/utils/cached-llm.mjs +164 -0
  53. package/src/utils/capability-stratifier.mjs +108 -0
  54. package/src/utils/counterfactual-tester.mjs +83 -0
  55. package/src/utils/error-recovery.mjs +117 -0
  56. package/src/utils/explainability-scorer.mjs +119 -0
  57. package/src/utils/exploratory-automation.mjs +131 -0
  58. package/src/utils/index.mjs +10 -0
  59. package/src/utils/intent-recognizer.mjs +201 -0
  60. package/src/utils/log-sanitizer.mjs +165 -0
  61. package/src/utils/path-validator.mjs +88 -0
  62. package/src/utils/performance-logger.mjs +316 -0
  63. package/src/utils/performance-measurement.mjs +280 -0
  64. package/src/utils/prompt-sanitizer.mjs +213 -0
  65. package/src/utils/rate-limiter.mjs +144 -0
  66. package/src/validation-framework.mjs +24 -20
  67. package/src/validation-result-normalizer.mjs +27 -1
  68. package/src/validation.mjs +75 -25
  69. package/src/validators/accessibility-validator.mjs +144 -0
  70. package/src/validators/hybrid-validator.mjs +48 -4
  71. package/api/health.js +0 -34
  72. package/api/validate.js +0 -252
  73. package/public/index.html +0 -149
  74. package/vercel.json +0 -27
@@ -0,0 +1,316 @@
1
+ /**
2
+ * Performance Logger
3
+ *
4
+ * Provides structured logging for critical performance metrics:
5
+ * - API call performance (latency, retries, errors, costs)
6
+ * - Cache effectiveness (hit rates, eviction patterns)
7
+ * - Temporal decision reasoning (why prompts triggered/skipped)
8
+ * - Batch optimizer metrics (queue depth, timeouts, rejections)
9
+ * - Error patterns (frequency, types, recovery)
10
+ *
11
+ * Weighted logging: More detail for critical paths (API calls, cache misses, errors)
12
+ */
13
+
14
+ import { log, warn, error, isDebugEnabled } from '../logger.mjs';
15
+
16
+ /**
17
+ * Log API call performance
18
+ *
19
+ * @param {Object} params - Performance data
20
+ * @param {string} params.provider - Provider name (gemini, openai, claude, groq)
21
+ * @param {number} params.latency - Response time in ms
22
+ * @param {number} params.retries - Number of retries
23
+ * @param {number} params.cost - Estimated cost
24
+ * @param {number} params.inputTokens - Input tokens
25
+ * @param {number} params.outputTokens - Output tokens
26
+ * @param {boolean} params.success - Whether call succeeded
27
+ * @param {Error} [params.error] - Error if failed
28
+ * @param {string} [params.testName] - Test name for context
29
+ */
30
+ export function logAPICallPerformance(params) {
31
+ const {
32
+ provider,
33
+ latency,
34
+ retries = 0,
35
+ cost = null,
36
+ inputTokens = 0,
37
+ outputTokens = 0,
38
+ success = true,
39
+ error: err = null,
40
+ testName = 'unknown'
41
+ } = params;
42
+
43
+ // Always log errors (critical visibility)
44
+ if (!success && err) {
45
+ error(`[API] ${provider} call failed`, {
46
+ provider,
47
+ latency,
48
+ retries,
49
+ error: err.message,
50
+ testName,
51
+ stack: err.stack
52
+ });
53
+ return;
54
+ }
55
+
56
+ // Log retries (important for debugging)
57
+ if (retries > 0) {
58
+ warn(`[API] ${provider} call succeeded after ${retries} retries`, {
59
+ provider,
60
+ latency,
61
+ retries,
62
+ cost,
63
+ testName
64
+ });
65
+ }
66
+
67
+ // Detailed logging in debug mode (weighted: always log for critical paths)
68
+ if (isDebugEnabled() || latency > 5000 || retries > 0) {
69
+ log(`[API] ${provider} call`, {
70
+ provider,
71
+ latency: `${latency}ms`,
72
+ retries,
73
+ cost: cost ? `$${cost.toFixed(6)}` : null,
74
+ tokens: `${inputTokens} in, ${outputTokens} out`,
75
+ testName,
76
+ performance: latency < 1000 ? 'fast' : latency < 3000 ? 'normal' : 'slow'
77
+ });
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Log cache operation
83
+ *
84
+ * @param {Object} params - Cache operation data
85
+ * @param {string} params.operation - Operation type (hit, miss, set, evict, expire)
86
+ * @param {boolean} params.hit - Whether it was a hit
87
+ * @param {number} [params.latency] - Lookup latency in ms
88
+ * @param {number} [params.cacheSize] - Current cache size
89
+ * @param {number} [params.maxSize] - Max cache size
90
+ * @param {string} [params.reason] - Reason for eviction/expiration
91
+ */
92
+ export function logCacheOperation(params) {
93
+ const {
94
+ operation,
95
+ hit = false,
96
+ latency = null,
97
+ cacheSize = null,
98
+ maxSize = null,
99
+ reason = null
100
+ } = params;
101
+
102
+ // Always log evictions and expirations (important for cache health)
103
+ if (operation === 'evict' || operation === 'expire') {
104
+ warn(`[Cache] ${operation}`, {
105
+ operation,
106
+ cacheSize,
107
+ maxSize,
108
+ reason,
109
+ utilization: maxSize ? `${((cacheSize / maxSize) * 100).toFixed(1)}%` : null
110
+ });
111
+ return;
112
+ }
113
+
114
+ // Log misses in debug mode (weighted: cache misses are important)
115
+ if (operation === 'miss' && isDebugEnabled()) {
116
+ log(`[Cache] miss`, {
117
+ operation,
118
+ latency: latency ? `${latency}ms` : null,
119
+ cacheSize,
120
+ maxSize
121
+ });
122
+ }
123
+
124
+ // Log hits only in verbose debug mode (less critical)
125
+ if (operation === 'hit' && isDebugEnabled()) {
126
+ log(`[Cache] hit`, {
127
+ operation,
128
+ latency: latency ? `${latency}ms` : null,
129
+ cacheSize
130
+ });
131
+ }
132
+ }
133
+
134
+ /**
135
+ * Log temporal decision reasoning
136
+ *
137
+ * @param {Object} params - Decision data
138
+ * @param {boolean} params.shouldPrompt - Whether to prompt
139
+ * @param {string} params.reason - Reason for decision
140
+ * @param {string} params.urgency - Urgency level (low, medium, high)
141
+ * @param {number} [params.coherence] - Temporal coherence score
142
+ * @param {number} [params.stateChange] - State change magnitude
143
+ * @param {number} [params.noteCount] - Number of temporal notes
144
+ * @param {boolean} [params.isDecisionPoint] - Whether this is a decision point
145
+ * @param {boolean} [params.hasUserAction] - Whether user action occurred
146
+ */
147
+ export function logTemporalDecision(params) {
148
+ const {
149
+ shouldPrompt,
150
+ reason,
151
+ urgency,
152
+ coherence = null,
153
+ stateChange = null,
154
+ noteCount = null,
155
+ isDecisionPoint = false,
156
+ hasUserAction = false
157
+ } = params;
158
+
159
+ // Always log high-urgency decisions (critical visibility)
160
+ if (urgency === 'high') {
161
+ log(`[Temporal] Decision: ${shouldPrompt ? 'PROMPT' : 'WAIT'} (${urgency})`, {
162
+ shouldPrompt,
163
+ reason,
164
+ urgency,
165
+ coherence,
166
+ stateChange,
167
+ noteCount,
168
+ isDecisionPoint,
169
+ hasUserAction
170
+ });
171
+ return;
172
+ }
173
+
174
+ // Log medium-urgency in debug mode
175
+ if (urgency === 'medium' && isDebugEnabled()) {
176
+ log(`[Temporal] Decision: ${shouldPrompt ? 'PROMPT' : 'WAIT'} (${urgency})`, {
177
+ shouldPrompt,
178
+ reason,
179
+ urgency,
180
+ coherence,
181
+ stateChange,
182
+ noteCount
183
+ });
184
+ }
185
+
186
+ // Log low-urgency only in verbose debug mode
187
+ if (urgency === 'low' && isDebugEnabled()) {
188
+ log(`[Temporal] Decision: ${shouldPrompt ? 'PROMPT' : 'WAIT'} (${urgency})`, {
189
+ shouldPrompt,
190
+ reason,
191
+ urgency
192
+ });
193
+ }
194
+ }
195
+
196
+ /**
197
+ * Log batch optimizer metrics
198
+ *
199
+ * @param {Object} params - Batch optimizer data
200
+ * @param {string} params.event - Event type (queue, process, timeout, reject)
201
+ * @param {number} [params.queueDepth] - Current queue depth
202
+ * @param {number} [params.maxQueueSize] - Max queue size
203
+ * @param {number} [params.activeRequests] - Active concurrent requests
204
+ * @param {number} [params.maxConcurrency] - Max concurrency
205
+ * @param {number} [params.waitTime] - Wait time in ms
206
+ * @param {string} [params.reason] - Reason for timeout/rejection
207
+ */
208
+ export function logBatchOptimizer(params) {
209
+ const {
210
+ event,
211
+ queueDepth = null,
212
+ maxQueueSize = null,
213
+ activeRequests = null,
214
+ maxConcurrency = null,
215
+ waitTime = null,
216
+ reason = null
217
+ } = params;
218
+
219
+ // Always log rejections and timeouts (critical visibility)
220
+ if (event === 'reject' || event === 'timeout') {
221
+ warn(`[BatchOptimizer] ${event}`, {
222
+ event,
223
+ queueDepth,
224
+ maxQueueSize,
225
+ activeRequests,
226
+ maxConcurrency,
227
+ waitTime: waitTime ? `${waitTime}ms` : null,
228
+ reason,
229
+ utilization: maxQueueSize ? `${((queueDepth / maxQueueSize) * 100).toFixed(1)}%` : null
230
+ });
231
+ return;
232
+ }
233
+
234
+ // Log queue depth when high (important for monitoring)
235
+ if (event === 'queue' && queueDepth && maxQueueSize && queueDepth > maxQueueSize * 0.8) {
236
+ warn(`[BatchOptimizer] High queue depth`, {
237
+ event,
238
+ queueDepth,
239
+ maxQueueSize,
240
+ utilization: `${((queueDepth / maxQueueSize) * 100).toFixed(1)}%`
241
+ });
242
+ }
243
+
244
+ // Log processing in debug mode
245
+ if (event === 'process' && isDebugEnabled()) {
246
+ log(`[BatchOptimizer] ${event}`, {
247
+ event,
248
+ queueDepth,
249
+ activeRequests,
250
+ maxConcurrency,
251
+ waitTime: waitTime ? `${waitTime}ms` : null
252
+ });
253
+ }
254
+ }
255
+
256
+ /**
257
+ * Log error pattern
258
+ *
259
+ * @param {Object} params - Error data
260
+ * @param {Error} params.error - Error object
261
+ * @param {string} params.context - Context where error occurred
262
+ * @param {string} [params.recovery] - Recovery strategy attempted
263
+ * @param {boolean} [params.recovered] - Whether recovery succeeded
264
+ * @param {number} [params.retryCount] - Number of retries
265
+ */
266
+ export function logErrorPattern(params) {
267
+ const {
268
+ error: err,
269
+ context,
270
+ recovery = null,
271
+ recovered = false,
272
+ retryCount = 0
273
+ } = params;
274
+
275
+ // Always log errors (critical visibility)
276
+ error(`[Error] ${context}`, {
277
+ context,
278
+ error: err.message,
279
+ errorType: err.constructor.name,
280
+ recovery,
281
+ recovered,
282
+ retryCount,
283
+ stack: err.stack
284
+ });
285
+ }
286
+
287
+ /**
288
+ * Log cache statistics summary
289
+ *
290
+ * @param {Object} params - Cache statistics
291
+ * @param {number} params.hits - Number of cache hits
292
+ * @param {number} params.misses - Number of cache misses
293
+ * @param {number} params.hitRate - Hit rate percentage
294
+ * @param {number} params.avgLatency - Average lookup latency in ms
295
+ * @param {number} params.savings - Estimated time/cost savings
296
+ */
297
+ export function logCacheStats(params) {
298
+ const {
299
+ hits,
300
+ misses,
301
+ hitRate,
302
+ avgLatency,
303
+ savings = null
304
+ } = params;
305
+
306
+ // Always log cache statistics (important for optimization)
307
+ log(`[Cache] Statistics`, {
308
+ hits,
309
+ misses,
310
+ hitRate: `${hitRate.toFixed(1)}%`,
311
+ avgLatency: `${avgLatency.toFixed(2)}ms`,
312
+ savings: savings ? `${savings}ms saved` : null,
313
+ effectiveness: hitRate > 50 ? 'good' : hitRate > 30 ? 'moderate' : 'low'
314
+ });
315
+ }
316
+
@@ -0,0 +1,280 @@
1
+ /**
2
+ * Performance Measurement Utilities
3
+ *
4
+ * Provides utilities for measuring and tracking performance metrics
5
+ * across the system. Useful for identifying bottlenecks and optimizing.
6
+ */
7
+
8
+ import { log, warn } from '../logger.mjs';
9
+
10
+ // Use performance.now() if available (Node.js 16.5+), otherwise use Date.now()
11
+ const getHighResTime = typeof performance !== 'undefined' && performance.now
12
+ ? () => performance.now()
13
+ : () => Date.now();
14
+
15
+ /**
16
+ * Performance measurement class
17
+ */
18
+ export class PerformanceMeasurement {
19
+ constructor(name, options = {}) {
20
+ this.name = name;
21
+ this.startTime = null;
22
+ this.endTime = null;
23
+ this.marks = [];
24
+ this.metadata = options.metadata || {};
25
+ this.autoLog = options.autoLog !== false; // Default true
26
+ }
27
+
28
+ /**
29
+ * Start measurement
30
+ */
31
+ start() {
32
+ this.startTime = getHighResTime();
33
+ this.marks = [];
34
+ return this;
35
+ }
36
+
37
+ /**
38
+ * Mark a checkpoint
39
+ *
40
+ * @param {string} label - Checkpoint label
41
+ * @param {Object} [metadata={}] - Additional metadata
42
+ */
43
+ mark(label, metadata = {}) {
44
+ const now = getHighResTime();
45
+ const elapsed = this.startTime ? now - this.startTime : 0;
46
+
47
+ this.marks.push({
48
+ label,
49
+ timestamp: now,
50
+ elapsed,
51
+ metadata
52
+ });
53
+
54
+ return this;
55
+ }
56
+
57
+ /**
58
+ * End measurement
59
+ *
60
+ * @param {Object} [metadata={}] - Final metadata
61
+ * @returns {Object} Measurement result
62
+ */
63
+ end(metadata = {}) {
64
+ this.endTime = getHighResTime();
65
+ const duration = this.startTime ? this.endTime - this.startTime : 0;
66
+
67
+ const result = {
68
+ name: this.name,
69
+ duration,
70
+ durationMs: duration.toFixed(2),
71
+ marks: this.marks,
72
+ metadata: { ...this.metadata, ...metadata }
73
+ };
74
+
75
+ if (this.autoLog) {
76
+ log(`[Performance] ${this.name}: ${duration.toFixed(2)}ms`);
77
+ if (this.marks.length > 0) {
78
+ this.marks.forEach(mark => {
79
+ log(` - ${mark.label}: ${mark.elapsed.toFixed(2)}ms`);
80
+ });
81
+ }
82
+ }
83
+
84
+ return result;
85
+ }
86
+
87
+ /**
88
+ * Get current elapsed time without ending
89
+ */
90
+ getElapsed() {
91
+ if (!this.startTime) return 0;
92
+ return getHighResTime() - this.startTime;
93
+ }
94
+ }
95
+
96
+ /**
97
+ * Measure async function execution
98
+ *
99
+ * @param {string} name - Measurement name
100
+ * @param {Function} fn - Async function to measure
101
+ * @param {Object} [options={}] - Measurement options
102
+ * @returns {Promise<*>} Function result
103
+ */
104
+ export async function measureAsync(name, fn, options = {}) {
105
+ const measurement = new PerformanceMeasurement(name, options);
106
+ measurement.start();
107
+
108
+ try {
109
+ const result = await fn();
110
+ const measurementResult = measurement.end({ success: true });
111
+ return { result, measurement: measurementResult };
112
+ } catch (error) {
113
+ measurement.end({ success: false, error: error.message });
114
+ throw error;
115
+ }
116
+ }
117
+
118
+ /**
119
+ * Measure sync function execution
120
+ *
121
+ * @param {string} name - Measurement name
122
+ * @param {Function} fn - Sync function to measure
123
+ * @param {Object} [options={}] - Measurement options
124
+ * @returns {*} Function result
125
+ */
126
+ export function measureSync(name, fn, options = {}) {
127
+ const measurement = new PerformanceMeasurement(name, options);
128
+ measurement.start();
129
+
130
+ try {
131
+ const result = fn();
132
+ const measurementResult = measurement.end({ success: true });
133
+ return { result, measurement: measurementResult };
134
+ } catch (error) {
135
+ measurement.end({ success: false, error: error.message });
136
+ throw error;
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Performance profiler for tracking multiple operations
142
+ */
143
+ export class PerformanceProfiler {
144
+ constructor() {
145
+ this.measurements = [];
146
+ this.active = new Map();
147
+ }
148
+
149
+ /**
150
+ * Start profiling an operation
151
+ *
152
+ * @param {string} name - Operation name
153
+ * @param {Object} [metadata={}] - Metadata
154
+ * @returns {string} Profile ID
155
+ */
156
+ start(name, metadata = {}) {
157
+ const id = `${name}-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
158
+ const measurement = new PerformanceMeasurement(name, { metadata, autoLog: false });
159
+ measurement.start();
160
+ this.active.set(id, measurement);
161
+ return id;
162
+ }
163
+
164
+ /**
165
+ * End profiling an operation
166
+ *
167
+ * @param {string} id - Profile ID
168
+ * @param {Object} [metadata={}] - Final metadata
169
+ * @returns {Object} Measurement result
170
+ */
171
+ end(id, metadata = {}) {
172
+ const measurement = this.active.get(id);
173
+ if (!measurement) {
174
+ warn(`[PerformanceProfiler] No active measurement found for ID: ${id}`);
175
+ return null;
176
+ }
177
+
178
+ this.active.delete(id);
179
+ const result = measurement.end(metadata);
180
+ this.measurements.push(result);
181
+ return result;
182
+ }
183
+
184
+ /**
185
+ * Get summary statistics
186
+ *
187
+ * @returns {Object} Summary statistics
188
+ */
189
+ getSummary() {
190
+ if (this.measurements.length === 0) {
191
+ return { count: 0 };
192
+ }
193
+
194
+ const durations = this.measurements.map(m => m.duration);
195
+ const total = durations.reduce((sum, d) => sum + d, 0);
196
+ const average = total / durations.length;
197
+ const min = Math.min(...durations);
198
+ const max = Math.max(...durations);
199
+
200
+ // Calculate percentiles
201
+ const sorted = [...durations].sort((a, b) => a - b);
202
+ const p50 = sorted[Math.floor(sorted.length * 0.5)];
203
+ const p95 = sorted[Math.floor(sorted.length * 0.95)];
204
+ const p99 = sorted[Math.floor(sorted.length * 0.99)];
205
+
206
+ // Group by name
207
+ const byName = {};
208
+ this.measurements.forEach(m => {
209
+ if (!byName[m.name]) {
210
+ byName[m.name] = { count: 0, total: 0, durations: [] };
211
+ }
212
+ byName[m.name].count += 1;
213
+ byName[m.name].total += m.duration;
214
+ byName[m.name].durations.push(m.duration);
215
+ });
216
+
217
+ // Calculate averages by name
218
+ Object.keys(byName).forEach(name => {
219
+ const data = byName[name];
220
+ data.average = data.total / data.count;
221
+ data.min = Math.min(...data.durations);
222
+ data.max = Math.max(...data.durations);
223
+ delete data.durations; // Remove raw durations
224
+ });
225
+
226
+ return {
227
+ count: this.measurements.length,
228
+ total: total.toFixed(2),
229
+ average: average.toFixed(2),
230
+ min: min.toFixed(2),
231
+ max: max.toFixed(2),
232
+ p50: p50.toFixed(2),
233
+ p95: p95.toFixed(2),
234
+ p99: p99.toFixed(2),
235
+ byName
236
+ };
237
+ }
238
+
239
+ /**
240
+ * Reset profiler
241
+ */
242
+ reset() {
243
+ this.measurements = [];
244
+ this.active.clear();
245
+ }
246
+
247
+ /**
248
+ * Clear measurements (alias for reset)
249
+ */
250
+ clear() {
251
+ this.reset();
252
+ }
253
+
254
+ /**
255
+ * Export measurements
256
+ *
257
+ * @returns {Array} All measurements
258
+ */
259
+ export() {
260
+ return [...this.measurements];
261
+ }
262
+ }
263
+
264
+ /**
265
+ * Global profiler instance
266
+ */
267
+ let globalProfiler = null;
268
+
269
+ /**
270
+ * Get global profiler instance
271
+ *
272
+ * @returns {PerformanceProfiler} Profiler instance
273
+ */
274
+ export function getProfiler() {
275
+ if (!globalProfiler) {
276
+ globalProfiler = new PerformanceProfiler();
277
+ }
278
+ return globalProfiler;
279
+ }
280
+