@arclabs561/ai-visual-test 0.5.1 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +127 -11
  2. package/DEPLOYMENT.md +225 -9
  3. package/README.md +71 -80
  4. package/index.d.ts +902 -5
  5. package/package.json +10 -51
  6. package/src/batch-optimizer.mjs +39 -0
  7. package/src/cache.mjs +241 -16
  8. package/src/config.mjs +33 -91
  9. package/src/constants.mjs +54 -0
  10. package/src/convenience.mjs +113 -10
  11. package/src/cost-optimization.mjs +1 -0
  12. package/src/cost-tracker.mjs +134 -2
  13. package/src/data-extractor.mjs +36 -7
  14. package/src/dynamic-few-shot.mjs +69 -11
  15. package/src/errors.mjs +6 -2
  16. package/src/experience-propagation.mjs +12 -0
  17. package/src/experience-tracer.mjs +12 -3
  18. package/src/game-player.mjs +222 -43
  19. package/src/graceful-shutdown.mjs +126 -0
  20. package/src/helpers/playwright.mjs +22 -8
  21. package/src/human-validation-manager.mjs +99 -2
  22. package/src/index.mjs +48 -3
  23. package/src/integrations/playwright.mjs +140 -0
  24. package/src/judge.mjs +699 -24
  25. package/src/load-env.mjs +2 -1
  26. package/src/logger.mjs +31 -3
  27. package/src/model-tier-selector.mjs +1 -221
  28. package/src/natural-language-specs.mjs +31 -3
  29. package/src/persona-enhanced.mjs +4 -2
  30. package/src/persona-experience.mjs +1 -1
  31. package/src/pricing.mjs +28 -0
  32. package/src/prompt-composer.mjs +162 -5
  33. package/src/provider-data.mjs +115 -0
  34. package/src/render-change-detector.mjs +5 -0
  35. package/src/research-enhanced-validation.mjs +7 -5
  36. package/src/retry.mjs +21 -7
  37. package/src/rubrics.mjs +4 -0
  38. package/src/safe-logger.mjs +71 -0
  39. package/src/session-cost-tracker.mjs +320 -0
  40. package/src/smart-validator.mjs +8 -8
  41. package/src/spec-templates.mjs +52 -6
  42. package/src/startup-validation.mjs +127 -0
  43. package/src/temporal-adaptive.mjs +2 -2
  44. package/src/temporal-decision-manager.mjs +1 -271
  45. package/src/temporal-logic.mjs +104 -0
  46. package/src/temporal-note-pruner.mjs +119 -0
  47. package/src/temporal-preprocessor.mjs +1 -543
  48. package/src/temporal.mjs +681 -79
  49. package/src/utils/action-hallucination-detector.mjs +301 -0
  50. package/src/utils/baseline-validator.mjs +82 -0
  51. package/src/utils/cache-stats.mjs +104 -0
  52. package/src/utils/cached-llm.mjs +164 -0
  53. package/src/utils/capability-stratifier.mjs +108 -0
  54. package/src/utils/counterfactual-tester.mjs +83 -0
  55. package/src/utils/error-recovery.mjs +117 -0
  56. package/src/utils/explainability-scorer.mjs +119 -0
  57. package/src/utils/exploratory-automation.mjs +131 -0
  58. package/src/utils/index.mjs +10 -0
  59. package/src/utils/intent-recognizer.mjs +201 -0
  60. package/src/utils/log-sanitizer.mjs +165 -0
  61. package/src/utils/path-validator.mjs +88 -0
  62. package/src/utils/performance-logger.mjs +316 -0
  63. package/src/utils/performance-measurement.mjs +280 -0
  64. package/src/utils/prompt-sanitizer.mjs +213 -0
  65. package/src/utils/rate-limiter.mjs +144 -0
  66. package/src/validation-framework.mjs +24 -20
  67. package/src/validation-result-normalizer.mjs +35 -1
  68. package/src/validation.mjs +75 -25
  69. package/src/validators/accessibility-validator.mjs +144 -0
  70. package/src/validators/hybrid-validator.mjs +48 -4
  71. package/api/health.js +0 -34
  72. package/api/validate.js +0 -252
  73. package/public/index.html +0 -149
  74. package/vercel.json +0 -27
@@ -0,0 +1,201 @@
1
+ /**
2
+ * Intent Recognition for Browser Automation
3
+ *
4
+ * Parses natural language tasks into structured intents.
5
+ * Simple keyword-based recognition - fast and sufficient for most cases.
6
+ *
7
+ * Research Context:
8
+ * - Intent recognition accuracy >85% is often cited as critical for browser automation agents
9
+ * - Ambiguous tasks require disambiguation (e.g., "Buy this product" = add to cart + checkout)
10
+ * - Multi-step tasks need workflow decomposition
11
+ *
12
+ * Implementation:
13
+ * - We use simple keyword-based recognition (fast, <1ms)
14
+ * - LLM-based recognition was considered but adds latency (>1s) and cost without clear benefit for current use cases.
15
+ * - Complex disambiguation happens during action execution, not intent parsing
16
+ *
17
+ * See docs/research/IMPLEMENTATION_VS_RESEARCH.md for detailed research context.
18
+ *
19
+ * @module intent-recognizer
20
+ */
21
+
22
+
23
+ /**
24
+ * Recognized intent types
25
+ */
26
+ export const INTENT_TYPES = {
27
+ NAVIGATE: 'navigate',
28
+ FILL_FORM: 'fill_form',
29
+ VALIDATE: 'validate',
30
+ EXPLORE: 'explore',
31
+ PLAY_GAME: 'play_game',
32
+ CLICK: 'click',
33
+ WAIT: 'wait',
34
+ EXTRACT: 'extract',
35
+ UNKNOWN: 'unknown'
36
+ };
37
+
38
+ /**
39
+ * Recognize intent from natural language task
40
+ *
41
+ * @param {string} task - Natural language task description
42
+ * @returns {Promise<Object>} Recognized intent with confidence
43
+ */
44
+ export async function recognizeIntent(task) {
45
+ // Simple keyword-based recognition - fast and sufficient
46
+ // LLM-based recognition adds latency and cost without clear benefit
47
+ return recognizeIntentKeyword(task);
48
+ }
49
+
50
+
51
+ /**
52
+ * Keyword-based intent recognition (fallback)
53
+ */
54
+ function recognizeIntentKeyword(task) {
55
+ const lower = task.toLowerCase();
56
+
57
+ // Navigate
58
+ if (lower.match(/\b(navigate|go to|visit|open|browse to|take me to)\b/)) {
59
+ const target = extractTarget(task); // Use original task text, not lowercased
60
+ return {
61
+ intent: INTENT_TYPES.NAVIGATE,
62
+ confidence: 0.8,
63
+ subIntents: [],
64
+ parameters: target ? { target } : {},
65
+ reasoning: 'Keyword-based recognition: navigation intent detected'
66
+ };
67
+ }
68
+
69
+ // Fill form
70
+ if (lower.match(/\b(fill|complete|submit|enter|type)\b.*\b(form|field|input)\b/)) {
71
+ return {
72
+ intent: INTENT_TYPES.FILL_FORM,
73
+ confidence: 0.8,
74
+ subIntents: [],
75
+ parameters: {},
76
+ reasoning: 'Keyword-based recognition: form filling intent detected'
77
+ };
78
+ }
79
+
80
+ // Validate
81
+ if (lower.match(/\b(check|validate|verify|test|ensure|confirm)\b/)) {
82
+ return {
83
+ intent: INTENT_TYPES.VALIDATE,
84
+ confidence: 0.8,
85
+ subIntents: [],
86
+ parameters: {},
87
+ reasoning: 'Keyword-based recognition: validation intent detected'
88
+ };
89
+ }
90
+
91
+ // Explore
92
+ if (lower.match(/\b(explore|try|find|search|look for|discover)\b/)) {
93
+ return {
94
+ intent: INTENT_TYPES.EXPLORE,
95
+ confidence: 0.8,
96
+ subIntents: [],
97
+ parameters: {},
98
+ reasoning: 'Keyword-based recognition: exploration intent detected'
99
+ };
100
+ }
101
+
102
+ // Play game
103
+ if (lower.match(/\b(play|game|score|level)\b/)) {
104
+ return {
105
+ intent: INTENT_TYPES.PLAY_GAME,
106
+ confidence: 0.8,
107
+ subIntents: [],
108
+ parameters: {},
109
+ reasoning: 'Keyword-based recognition: game playing intent detected'
110
+ };
111
+ }
112
+
113
+ // Click
114
+ if (lower.match(/\b(click|press|tap|select)\b/)) {
115
+ const target = extractTarget(task); // Use original task text, not lowercased
116
+ return {
117
+ intent: INTENT_TYPES.CLICK,
118
+ confidence: 0.8,
119
+ subIntents: [],
120
+ parameters: target ? { target } : {},
121
+ reasoning: 'Keyword-based recognition: click intent detected'
122
+ };
123
+ }
124
+
125
+ // Wait
126
+ if (lower.match(/\b(wait|pause|delay)\b/)) {
127
+ return {
128
+ intent: INTENT_TYPES.WAIT,
129
+ confidence: 0.8,
130
+ subIntents: [],
131
+ parameters: {},
132
+ reasoning: 'Keyword-based recognition: wait intent detected'
133
+ };
134
+ }
135
+
136
+ // Extract
137
+ if (lower.match(/\b(extract|get|read|find|identify)\b.*\b(information|data|value|text)\b/)) {
138
+ return {
139
+ intent: INTENT_TYPES.EXTRACT,
140
+ confidence: 0.8,
141
+ subIntents: [],
142
+ parameters: {},
143
+ reasoning: 'Keyword-based recognition: extraction intent detected'
144
+ };
145
+ }
146
+
147
+ // Unknown
148
+ return {
149
+ intent: INTENT_TYPES.UNKNOWN,
150
+ confidence: 0.5,
151
+ subIntents: [],
152
+ parameters: {},
153
+ reasoning: 'Keyword-based recognition: intent unclear'
154
+ };
155
+ }
156
+
157
+ /**
158
+ * Extract target from task text
159
+ */
160
+ function extractTarget(text) {
161
+ // Try to extract quoted strings or specific targets
162
+ const quoted = text.match(/"([^"]+)"/) || text.match(/'([^']+)'/);
163
+ if (quoted) return quoted[1];
164
+
165
+ // Extract after "to" or "for"
166
+ const afterTo = text.match(/\b(?:to|for)\s+([a-z\s]+)/i);
167
+ if (afterTo) return afterTo[1].trim();
168
+
169
+ return null;
170
+ }
171
+
172
+ /**
173
+ * Batch recognize intents
174
+ */
175
+ export async function batchRecognizeIntents(tasks, _screenshotPaths = [], _options = {}) {
176
+ const results = await Promise.all(
177
+ tasks.map((task) =>
178
+ recognizeIntent(task)
179
+ )
180
+ );
181
+
182
+ const total = results.length;
183
+ const recognized = results.filter(r => r.intent !== INTENT_TYPES.UNKNOWN).length;
184
+ const accuracy = recognized / total;
185
+
186
+ const intentDistribution = results.reduce((acc, r) => {
187
+ acc[r.intent] = (acc[r.intent] || 0) + 1;
188
+ return acc;
189
+ }, {});
190
+
191
+ return {
192
+ total,
193
+ recognized,
194
+ accuracy,
195
+ intentDistribution,
196
+ results,
197
+ recommendation: accuracy >= 0.85
198
+ ? 'Intent recognition accuracy meets target (>85%)'
199
+ : 'Intent recognition accuracy below target. Consider improving prompts or adding more training examples.'
200
+ };
201
+ }
@@ -0,0 +1,165 @@
1
+ /**
2
+ * Log Sanitization Utilities
3
+ *
4
+ * Provides log sanitization to prevent information disclosure in production logs.
5
+ * Removes sensitive data like API keys, full paths, and long prompts.
6
+ */
7
+
8
+ import { basename } from 'path';
9
+
10
+ /**
11
+ * Fields that should never be logged (sensitive data)
12
+ */
13
+ const SENSITIVE_FIELDS = [
14
+ 'apiKey',
15
+ 'token',
16
+ 'password',
17
+ 'secret',
18
+ 'credential',
19
+ 'authorization',
20
+ 'x-api-key',
21
+ 'x-goog-api-key',
22
+ 'bearer'
23
+ ];
24
+
25
+ /**
26
+ * Maximum length for logged strings (prevents log flooding)
27
+ */
28
+ const MAX_LOG_LENGTH = 200;
29
+
30
+ /**
31
+ * Sanitize data for logging
32
+ *
33
+ * Removes sensitive fields, truncates long strings, and sanitizes paths.
34
+ *
35
+ * @param {unknown} data - Data to sanitize
36
+ * @param {Object} [options={}] - Sanitization options
37
+ * @param {boolean} [options.removeSensitive=true] - Remove sensitive fields
38
+ * @param {number} [options.maxLength=200] - Maximum string length
39
+ * @param {boolean} [options.sanitizePaths=true] - Sanitize file paths
40
+ * @returns {unknown} Sanitized data
41
+ */
42
+ export function sanitizeForLogging(data, options = {}) {
43
+ const {
44
+ removeSensitive = true,
45
+ maxLength = MAX_LOG_LENGTH,
46
+ sanitizePaths = true
47
+ } = options;
48
+
49
+ // Handle null/undefined
50
+ if (data === null || data === undefined) {
51
+ return data;
52
+ }
53
+
54
+ // Handle primitives
55
+ if (typeof data !== 'object') {
56
+ return sanitizePrimitive(data, maxLength);
57
+ }
58
+
59
+ // Handle arrays
60
+ if (Array.isArray(data)) {
61
+ return data.map(item => sanitizeForLogging(item, options));
62
+ }
63
+
64
+ // Handle objects
65
+ const sanitized = {};
66
+ for (const [key, value] of Object.entries(data)) {
67
+ // Remove sensitive fields
68
+ if (removeSensitive && isSensitiveField(key)) {
69
+ sanitized[key] = '[REDACTED]';
70
+ continue;
71
+ }
72
+
73
+ // Sanitize paths
74
+ if (sanitizePaths && isPathField(key)) {
75
+ sanitized[key] = typeof value === 'string' ? basename(value) : value;
76
+ continue;
77
+ }
78
+
79
+ // Recursively sanitize nested objects
80
+ if (typeof value === 'object' && value !== null) {
81
+ sanitized[key] = sanitizeForLogging(value, options);
82
+ continue;
83
+ }
84
+
85
+ // Sanitize primitives
86
+ sanitized[key] = sanitizePrimitive(value, maxLength);
87
+ }
88
+
89
+ return sanitized;
90
+ }
91
+
92
+ /**
93
+ * Check if a field name indicates sensitive data
94
+ *
95
+ * @param {string} fieldName - Field name to check
96
+ * @returns {boolean} True if field is sensitive
97
+ */
98
+ function isSensitiveField(fieldName) {
99
+ const lower = fieldName.toLowerCase();
100
+ return SENSITIVE_FIELDS.some(sensitive => lower.includes(sensitive.toLowerCase()));
101
+ }
102
+
103
+ /**
104
+ * Check if a field name indicates a file path
105
+ *
106
+ * @param {string} fieldName - Field name to check
107
+ * @returns {boolean} True if field is a path
108
+ */
109
+ function isPathField(fieldName) {
110
+ const lower = fieldName.toLowerCase();
111
+ return lower.includes('path') || lower.includes('file') || lower.includes('dir');
112
+ }
113
+
114
+ /**
115
+ * Sanitize a primitive value
116
+ *
117
+ * @param {unknown} value - Value to sanitize
118
+ * @param {number} maxLength - Maximum length
119
+ * @returns {unknown} Sanitized value
120
+ */
121
+ function sanitizePrimitive(value, maxLength) {
122
+ if (typeof value === 'string') {
123
+ if (value.length > maxLength) {
124
+ return value.substring(0, maxLength) + '...';
125
+ }
126
+ return value;
127
+ }
128
+ return value;
129
+ }
130
+
131
+ /**
132
+ * Sanitize error object for logging
133
+ *
134
+ * @param {Error} error - Error object
135
+ * @param {Object} [options={}] - Sanitization options
136
+ * @returns {Object} Sanitized error
137
+ */
138
+ export function sanitizeErrorForLogging(error, options = {}) {
139
+ if (!error || typeof error !== 'object') {
140
+ return error;
141
+ }
142
+
143
+ const sanitized = {
144
+ name: error.name,
145
+ message: sanitizePrimitive(error.message, options.maxLength || MAX_LOG_LENGTH)
146
+ };
147
+
148
+ // Include stack trace only in debug mode
149
+ if (options.includeStack && error.stack) {
150
+ sanitized.stack = sanitizePrimitive(error.stack, options.maxStackLength || 500);
151
+ }
152
+
153
+ // Sanitize error details if present
154
+ if (error.details && typeof error.details === 'object') {
155
+ sanitized.details = sanitizeForLogging(error.details, options);
156
+ }
157
+
158
+ // Include code if present (useful for debugging)
159
+ if (error.code) {
160
+ sanitized.code = error.code;
161
+ }
162
+
163
+ return sanitized;
164
+ }
165
+
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Path Validation Utilities
3
+ *
4
+ * Provides secure path validation to prevent path traversal attacks.
5
+ *
6
+ * NOTE: The primary validateFilePath() function is in src/validation.mjs.
7
+ * This module provides additional utilities for path validation.
8
+ *
9
+ * @deprecated validateFilePath() - Use validateFilePath() from '../validation.mjs' instead
10
+ * This function is kept for backward compatibility but delegates to the main implementation.
11
+ */
12
+
13
+ import { resolve, normalize, basename } from 'path';
14
+ import { existsSync } from 'fs';
15
+ import { ValidationError } from '../errors.mjs';
16
+ import { validateFilePath as validateFilePathMain } from '../validation.mjs';
17
+
18
+ /**
19
+ * Validates and normalizes a file path to prevent path traversal attacks
20
+ *
21
+ * @deprecated Use validateFilePath() from '../validation.mjs' instead
22
+ * @param {string} userPath - User-provided file path
23
+ * @param {string} baseDir - Base directory (optional, defaults to process.cwd())
24
+ * @returns {string} - Resolved, normalized path
25
+ * @throws {ValidationError} - If path is invalid or outside base directory
26
+ */
27
+ export function validateFilePath(userPath, baseDir = process.cwd()) {
28
+ // Validate empty string before delegating (main implementation also checks, but we want consistent error)
29
+ if (typeof userPath !== 'string' || !userPath.trim()) {
30
+ throw new ValidationError('File path must be a non-empty string', null, {
31
+ received: typeof userPath
32
+ });
33
+ }
34
+
35
+ // Delegate to main implementation for consistency
36
+ return validateFilePathMain(userPath, { baseDir });
37
+ }
38
+
39
+ /**
40
+ * Sanitize file path for error messages (prevents information disclosure)
41
+ *
42
+ * @param {string} fullPath - Full file path
43
+ * @param {number} maxDepth - Maximum directory depth to show (default: 2)
44
+ * @returns {string} - Sanitized path showing only last N components
45
+ */
46
+ export function sanitizePathForError(fullPath, maxDepth = 2) {
47
+ if (typeof fullPath !== 'string') {
48
+ return '[invalid path]';
49
+ }
50
+
51
+ const parts = fullPath.split('/').filter(p => p);
52
+ if (parts.length <= maxDepth) {
53
+ return fullPath;
54
+ }
55
+
56
+ // Show only last maxDepth parts
57
+ return '.../' + parts.slice(-maxDepth).join('/');
58
+ }
59
+
60
+ /**
61
+ * Validate that a path is within allowed directories
62
+ *
63
+ * @param {string} userPath - User-provided path
64
+ * @param {string[]} allowedDirs - Array of allowed base directories
65
+ * @returns {string} - Resolved path if valid
66
+ * @throws {ValidationError} - If path is outside all allowed directories
67
+ */
68
+ export function validatePathInAllowedDirs(userPath, allowedDirs) {
69
+ if (!Array.isArray(allowedDirs) || allowedDirs.length === 0) {
70
+ throw new ValidationError('allowedDirs must be a non-empty array');
71
+ }
72
+
73
+ for (const allowedDir of allowedDirs) {
74
+ try {
75
+ const resolved = validateFilePath(userPath, allowedDir);
76
+ return resolved;
77
+ } catch {
78
+ // Try next allowed directory
79
+ continue;
80
+ }
81
+ }
82
+
83
+ // Path not in any allowed directory
84
+ throw new ValidationError('File path is outside allowed directories', userPath, {
85
+ allowedDirs
86
+ });
87
+ }
88
+