@arclabs561/ai-visual-test 0.5.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +102 -11
  2. package/DEPLOYMENT.md +225 -9
  3. package/README.md +71 -80
  4. package/index.d.ts +862 -3
  5. package/package.json +10 -51
  6. package/src/batch-optimizer.mjs +39 -0
  7. package/src/cache.mjs +241 -16
  8. package/src/config.mjs +33 -91
  9. package/src/constants.mjs +54 -0
  10. package/src/convenience.mjs +113 -10
  11. package/src/cost-optimization.mjs +1 -0
  12. package/src/cost-tracker.mjs +134 -2
  13. package/src/data-extractor.mjs +36 -7
  14. package/src/dynamic-few-shot.mjs +69 -11
  15. package/src/errors.mjs +6 -2
  16. package/src/experience-propagation.mjs +12 -0
  17. package/src/experience-tracer.mjs +12 -3
  18. package/src/game-player.mjs +222 -43
  19. package/src/graceful-shutdown.mjs +126 -0
  20. package/src/helpers/playwright.mjs +22 -8
  21. package/src/human-validation-manager.mjs +99 -2
  22. package/src/index.mjs +48 -3
  23. package/src/integrations/playwright.mjs +140 -0
  24. package/src/judge.mjs +697 -24
  25. package/src/load-env.mjs +2 -1
  26. package/src/logger.mjs +31 -3
  27. package/src/model-tier-selector.mjs +1 -221
  28. package/src/natural-language-specs.mjs +31 -3
  29. package/src/persona-enhanced.mjs +4 -2
  30. package/src/persona-experience.mjs +1 -1
  31. package/src/pricing.mjs +28 -0
  32. package/src/prompt-composer.mjs +162 -5
  33. package/src/provider-data.mjs +115 -0
  34. package/src/render-change-detector.mjs +5 -0
  35. package/src/research-enhanced-validation.mjs +7 -5
  36. package/src/retry.mjs +21 -7
  37. package/src/rubrics.mjs +4 -0
  38. package/src/safe-logger.mjs +71 -0
  39. package/src/session-cost-tracker.mjs +320 -0
  40. package/src/smart-validator.mjs +8 -8
  41. package/src/spec-templates.mjs +52 -6
  42. package/src/startup-validation.mjs +127 -0
  43. package/src/temporal-adaptive.mjs +2 -2
  44. package/src/temporal-decision-manager.mjs +1 -271
  45. package/src/temporal-logic.mjs +104 -0
  46. package/src/temporal-note-pruner.mjs +119 -0
  47. package/src/temporal-preprocessor.mjs +1 -543
  48. package/src/temporal.mjs +681 -79
  49. package/src/utils/action-hallucination-detector.mjs +301 -0
  50. package/src/utils/baseline-validator.mjs +82 -0
  51. package/src/utils/cache-stats.mjs +104 -0
  52. package/src/utils/cached-llm.mjs +164 -0
  53. package/src/utils/capability-stratifier.mjs +108 -0
  54. package/src/utils/counterfactual-tester.mjs +83 -0
  55. package/src/utils/error-recovery.mjs +117 -0
  56. package/src/utils/explainability-scorer.mjs +119 -0
  57. package/src/utils/exploratory-automation.mjs +131 -0
  58. package/src/utils/index.mjs +10 -0
  59. package/src/utils/intent-recognizer.mjs +201 -0
  60. package/src/utils/log-sanitizer.mjs +165 -0
  61. package/src/utils/path-validator.mjs +88 -0
  62. package/src/utils/performance-logger.mjs +316 -0
  63. package/src/utils/performance-measurement.mjs +280 -0
  64. package/src/utils/prompt-sanitizer.mjs +213 -0
  65. package/src/utils/rate-limiter.mjs +144 -0
  66. package/src/validation-framework.mjs +24 -20
  67. package/src/validation-result-normalizer.mjs +27 -1
  68. package/src/validation.mjs +75 -25
  69. package/src/validators/accessibility-validator.mjs +144 -0
  70. package/src/validators/hybrid-validator.mjs +48 -4
  71. package/api/health.js +0 -34
  72. package/api/validate.js +0 -252
  73. package/public/index.html +0 -149
  74. package/vercel.json +0 -27
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Capability Stratifier
3
+ *
4
+ * Tests VLLM capabilities at different levels (low/mid/high)
5
+ *
6
+ * Research context:
7
+ * - VLMs exhibit widespread deficits in low- and mid-level visual abilities
8
+ * - High-level object recognition performance cannot predict low-level capabilities
9
+ * - Need stratified testing to identify capability gaps
10
+ */
11
+
12
+ import { validateScreenshot } from '../judge.mjs';
13
+
14
+ /**
15
+ * Test capability at specific level
16
+ *
17
+ * @param {string} level - 'low', 'mid', or 'high'
18
+ * @param {Array<{imagePath: string, prompt: string, expected: any}>} testCases
19
+ * @param {Object} options - Test options
20
+ * @returns {Promise<Object>} Capability test result
21
+ */
22
+ export async function testCapabilityLevel(level, testCases, options = {}) {
23
+ const results = await Promise.all(
24
+ testCases.map(async (tc) => {
25
+ const result = await validateScreenshot(tc.imagePath, tc.prompt, {
26
+ testType: `capability-${level}`,
27
+ ...options
28
+ });
29
+
30
+ const extractedValue = result.extractedValue || result.score;
31
+ const correct = extractedValue === tc.expected ||
32
+ (typeof extractedValue === 'number' && typeof tc.expected === 'number' &&
33
+ Math.abs(extractedValue - tc.expected) < 0.1);
34
+
35
+ return {
36
+ testCase: tc,
37
+ result,
38
+ correct,
39
+ extractedValue,
40
+ expected: tc.expected
41
+ };
42
+ })
43
+ );
44
+
45
+ const accuracy = results.filter(r => r.correct).length / results.length;
46
+
47
+ return {
48
+ level,
49
+ accuracy,
50
+ total: results.length,
51
+ correct: results.filter(r => r.correct).length,
52
+ results,
53
+ recommendation: accuracy < 0.7
54
+ ? `Low ${level}-level capability accuracy. VLLM may struggle with ${level}-level visual tasks.`
55
+ : `${level}-level capability appears adequate.`
56
+ };
57
+ }
58
+
59
+ /**
60
+ * Stratified capability testing (all levels)
61
+ *
62
+ * @param {Object} testSuites - {low: [...], mid: [...], high: [...]}
63
+ * @param {Object} options - Test options
64
+ * @returns {Promise<Object>} Stratified test results
65
+ */
66
+ export async function testStratifiedCapabilities(testSuites, options = {}) {
67
+ const levels = ['low', 'mid', 'high'];
68
+ const results = {};
69
+
70
+ for (const level of levels) {
71
+ if (testSuites[level] && testSuites[level].length > 0) {
72
+ results[level] = await testCapabilityLevel(level, testSuites[level], options);
73
+ }
74
+ }
75
+
76
+ // Detect gaps (high-level >0.9 but low-level <0.7)
77
+ const gaps = [];
78
+ if (results.high && results.low) {
79
+ if (results.high.accuracy > 0.9 && results.low.accuracy < 0.7) {
80
+ gaps.push({
81
+ type: 'high-low-gap',
82
+ highAccuracy: results.high.accuracy,
83
+ lowAccuracy: results.low.accuracy,
84
+ recommendation: 'High-level performance does not predict low-level capabilities. Validate low-level tasks separately.'
85
+ });
86
+ }
87
+ }
88
+
89
+ if (results.high && results.mid) {
90
+ if (results.high.accuracy > 0.9 && results.mid.accuracy < 0.7) {
91
+ gaps.push({
92
+ type: 'high-mid-gap',
93
+ highAccuracy: results.high.accuracy,
94
+ midAccuracy: results.mid.accuracy,
95
+ recommendation: 'High-level performance does not predict mid-level capabilities. Validate mid-level tasks separately.'
96
+ });
97
+ }
98
+ }
99
+
100
+ return {
101
+ results,
102
+ gaps,
103
+ overallRecommendation: gaps.length > 0
104
+ ? 'Capability gaps detected. High-level performance cannot predict low/mid-level capabilities.'
105
+ : 'Capability levels appear consistent.'
106
+ };
107
+ }
108
+
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Counterfactual Tester
3
+ *
4
+ * Tests whether VLLM uses visual analysis vs. memorized knowledge
5
+ *
6
+ * Research context:
7
+ * - VLMs achieve only 58.57% accuracy on basic visual tasks
8
+ * - When counterfactual images contradict training data, accuracy drops to 17.05%
9
+ * - 75.70% of errors are bias-aligned rather than random
10
+ *
11
+ * This utility helps detect when VLLM is using memorization vs. visual analysis
12
+ */
13
+
14
+ import { validateScreenshot } from '../judge.mjs';
15
+
16
+ /**
17
+ * Test counterfactual scenario
18
+ *
19
+ * @param {string} imagePath - Path to counterfactual image
20
+ * @param {string} prompt - Question about the image
21
+ * @param {any} expectedMemorized - What memorized knowledge would predict
22
+ * @param {any} expectedVisual - What visual analysis should find
23
+ * @param {Object} options - Test options
24
+ * @returns {Promise<Object>} Test result
25
+ */
26
+ export async function testCounterfactual(imagePath, prompt, expectedMemorized, expectedVisual, options = {}) {
27
+ const result = await validateScreenshot(imagePath, prompt, {
28
+ testType: 'counterfactual',
29
+ ...options
30
+ });
31
+
32
+ const extractedValue = result.extractedValue || result.score;
33
+ const usesVisual = extractedValue === expectedVisual ||
34
+ (typeof extractedValue === 'number' && typeof expectedVisual === 'number' &&
35
+ Math.abs(extractedValue - expectedVisual) < 0.1);
36
+ const usesMemorization = extractedValue === expectedMemorized ||
37
+ (typeof extractedValue === 'number' && typeof expectedMemorized === 'number' &&
38
+ Math.abs(extractedValue - expectedMemorized) < 0.1);
39
+
40
+ return {
41
+ extractedValue,
42
+ expectedMemorized,
43
+ expectedVisual,
44
+ usesVisual,
45
+ usesMemorization,
46
+ biasAligned: usesMemorization && !usesVisual,
47
+ result,
48
+ recommendation: usesMemorization
49
+ ? 'VLLM appears to use memorized knowledge. Consider visual analysis validation.'
50
+ : 'VLLM appears to use visual analysis.'
51
+ };
52
+ }
53
+
54
+ /**
55
+ * Batch test counterfactual scenarios
56
+ *
57
+ * @param {Array<{imagePath: string, prompt: string, expectedMemorized: any, expectedVisual: any}>} testCases
58
+ * @param {Object} options - Test options
59
+ * @returns {Promise<Object>} Batch test results
60
+ */
61
+ export async function batchTestCounterfactual(testCases, options = {}) {
62
+ const results = await Promise.all(
63
+ testCases.map(tc =>
64
+ testCounterfactual(tc.imagePath, tc.prompt, tc.expectedMemorized, tc.expectedVisual, options)
65
+ )
66
+ );
67
+
68
+ const visualCount = results.filter(r => r.usesVisual).length;
69
+ const memorizationCount = results.filter(r => r.usesMemorization).length;
70
+ const biasAlignedCount = results.filter(r => r.biasAligned).length;
71
+
72
+ return {
73
+ total: results.length,
74
+ visualAccuracy: visualCount / results.length,
75
+ memorizationRate: memorizationCount / results.length,
76
+ biasAlignedRate: biasAlignedCount / results.length,
77
+ results,
78
+ recommendation: biasAlignedCount > results.length * 0.5
79
+ ? 'High bias-aligned error rate. VLLM may be relying on memorized knowledge.'
80
+ : 'VLLM appears to use visual analysis appropriately.'
81
+ };
82
+ }
83
+
@@ -0,0 +1,117 @@
1
+ /**
2
+ * Error Recovery for Browser Automation
3
+ *
4
+ * Simple retry logic: wait and retry, or try alternative action.
5
+ *
6
+ * Research Context:
7
+ * - Error recovery success rate >70% is often cited as critical for browser automation agents
8
+ * - Agents should gracefully handle failures and try alternatives
9
+ * - Need to avoid infinite retry loops
10
+ *
11
+ * Implementation:
12
+ * - Most errors are timeouts or element not found - simple wait + retry handles these
13
+ * - Complex error classification adds complexity without clear benefit
14
+ * - The VLLM can handle complex error recovery during action execution
15
+ *
16
+ * See docs/research/IMPLEMENTATION_VS_RESEARCH.md for detailed research context.
17
+ *
18
+ * @module error-recovery
19
+ */
20
+
21
+ /**
22
+ * Error recovery strategy
23
+ */
24
+ export class ErrorRecoveryStrategy {
25
+ constructor(options = {}) {
26
+ this.maxRetries = options.maxRetries || 3;
27
+ this.retryDelay = options.retryDelay || 1000;
28
+ this.recoveryHistory = [];
29
+ }
30
+
31
+ /**
32
+ * Attempt to recover from error
33
+ *
34
+ * @param {Error} error - The error that occurred
35
+ * @param {Object} action - The action that failed
36
+ * @param {Object} context - Current context (page, state, etc.)
37
+ * @returns {Promise<Object|null>} Recovery action or null if no recovery possible
38
+ */
39
+ async attemptRecovery(error, action, context = {}) {
40
+ if (this.recoveryHistory.length >= this.maxRetries) {
41
+ return null; // Max retries reached
42
+ }
43
+
44
+ const recovery = this.generateRecoveryAction(error, action, context);
45
+
46
+ if (!recovery) {
47
+ return null; // No recovery strategy available
48
+ }
49
+
50
+ this.recoveryHistory.push({
51
+ error: error.message,
52
+ action,
53
+ recovery,
54
+ timestamp: Date.now()
55
+ });
56
+
57
+ return recovery;
58
+ }
59
+
60
+ /**
61
+ * Generate recovery action based on error type
62
+ *
63
+ * Simple strategy: wait longer for timeouts/network, wait and retry for others.
64
+ */
65
+ generateRecoveryAction(error, action, context) {
66
+ const errorMessage = error.message.toLowerCase();
67
+
68
+ // Timeout or network errors: wait longer
69
+ if (errorMessage.includes('timeout') || errorMessage.includes('network')) {
70
+ return {
71
+ type: 'wait',
72
+ duration: this.retryDelay * 2,
73
+ reason: 'Timeout/network error, waiting longer',
74
+ originalAction: action
75
+ };
76
+ }
77
+
78
+ // Everything else: wait and retry
79
+ return {
80
+ type: 'wait',
81
+ duration: this.retryDelay,
82
+ reason: 'Error occurred, waiting and retrying',
83
+ originalAction: action
84
+ };
85
+ }
86
+
87
+ /**
88
+ * Reset recovery state
89
+ */
90
+ reset() {
91
+ this.recoveryHistory = [];
92
+ }
93
+
94
+ /**
95
+ * Get recovery statistics
96
+ */
97
+ getStats() {
98
+ const successful = this.recoveryHistory.filter(r => r.success).length;
99
+ const total = this.recoveryHistory.length;
100
+ const successRate = total > 0 ? successful / total : 0;
101
+
102
+ return {
103
+ totalRecoveries: total,
104
+ successfulRecoveries: successful,
105
+ successRate,
106
+ recoveries: this.recoveryHistory
107
+ };
108
+ }
109
+ }
110
+
111
+ /**
112
+ * Create error recovery strategy
113
+ */
114
+ export function createErrorRecoveryStrategy(options = {}) {
115
+ return new ErrorRecoveryStrategy(options);
116
+ }
117
+
@@ -0,0 +1,119 @@
1
+ /**
2
+ * Explainability Scoring
3
+ *
4
+ * Simple heuristic: checks if reasoning exists, mentions the action, and isn't too technical.
5
+ *
6
+ * Research Context:
7
+ * - Explainability score >80% is often cited as critical for browser automation agents
8
+ * - Users need to understand agent reasoning for trust and debugging
9
+ * - Transparency scores measure communication quality
10
+ *
11
+ * Implementation:
12
+ * - Simple checks (has action, has target, not too technical, reasonable length) are sufficient
13
+ * - Complex scoring adds computation without clear benefit
14
+ * - The VLLM's reasoning is already human-readable
15
+ *
16
+ * See docs/research/IMPLEMENTATION_VS_RESEARCH.md for detailed research context.
17
+ *
18
+ * @module explainability-scorer
19
+ */
20
+
21
+ /**
22
+ * Score explainability of action reasoning
23
+ *
24
+ * @param {string} reasoning - Agent's reasoning for an action
25
+ * @param {Object} action - The action taken
26
+ * @param {Object} [options] - Scoring options
27
+ * @returns {Object} Explainability score and analysis
28
+ */
29
+ export function scoreExplainability(reasoning, action, options = {}) {
30
+ if (!reasoning || reasoning.trim().length === 0) {
31
+ return {
32
+ score: 0,
33
+ clarity: 0,
34
+ completeness: 0,
35
+ relevance: 0,
36
+ issues: ['No reasoning provided'],
37
+ recommendation: 'Add reasoning to explain why this action was taken'
38
+ };
39
+ }
40
+
41
+ // Simple scoring: has reasoning, mentions action, not too technical
42
+ const hasAction = action.type && reasoning.toLowerCase().includes(action.type.toLowerCase());
43
+
44
+ // Check for target: selector, key, or URL - also check for semantic mentions (e.g., "submit button" for selector "#submit")
45
+ let hasTarget = false;
46
+ if (action.selector) {
47
+ // Check for exact selector match or semantic match (e.g., "submit button" for "#submit")
48
+ const selectorLower = action.selector.toLowerCase().replace(/[#.]/g, '');
49
+ const reasoningLower = reasoning.toLowerCase();
50
+ hasTarget = reasoning.includes(action.selector) ||
51
+ (selectorLower && reasoningLower.includes(selectorLower));
52
+ } else if (action.key) {
53
+ hasTarget = reasoning.includes(action.key);
54
+ } else if (action.url) {
55
+ hasTarget = reasoning.includes(action.url);
56
+ }
57
+
58
+ const notTooTechnical = !reasoning.match(/\b(algorithm|implementation|optimization|paradigm)\b/gi);
59
+ const reasonableLength = reasoning.length > 20 && reasoning.length < 500;
60
+
61
+ // Completeness: considers both action and target, plus reasoning depth
62
+ const hasDepth = reasoning.split(/[.!?]/).length > 2; // Multiple sentences indicate depth
63
+ const completeness = (hasAction && hasTarget && hasDepth) ? 0.9 :
64
+ (hasAction && hasTarget) ? 0.8 :
65
+ (hasAction || hasTarget) ? 0.6 : 0.4;
66
+
67
+ const score = (hasAction ? 0.4 : 0) +
68
+ (hasTarget ? 0.3 : 0) +
69
+ (notTooTechnical ? 0.2 : 0) +
70
+ (reasonableLength ? 0.1 : 0);
71
+
72
+ const issues = [];
73
+ if (!hasAction) issues.push('Reasoning does not mention action type');
74
+ if (!hasTarget) issues.push('Reasoning does not mention action target');
75
+ if (!notTooTechnical) issues.push('Reasoning uses technical jargon');
76
+ if (!reasonableLength) issues.push('Reasoning is too short or too long');
77
+
78
+ return {
79
+ score,
80
+ clarity: notTooTechnical && reasonableLength ? 0.8 : 0.5,
81
+ completeness,
82
+ relevance: hasAction ? 0.8 : 0.5,
83
+ issues,
84
+ recommendation: score >= 0.7
85
+ ? 'Reasoning is clear and relevant'
86
+ : 'Add more context about the action and its target'
87
+ };
88
+ }
89
+
90
+
91
+ /**
92
+ * Batch score explainability
93
+ */
94
+ export function batchScoreExplainability(reasonings, actions, options = {}) {
95
+ const scores = reasonings.map((reasoning, i) =>
96
+ scoreExplainability(reasoning, actions[i] || {}, options)
97
+ );
98
+
99
+ const avgScore = scores.reduce((sum, s) => sum + s.score, 0) / scores.length;
100
+ const avgClarity = scores.reduce((sum, s) => sum + s.clarity, 0) / scores.length;
101
+ const avgCompleteness = scores.reduce((sum, s) => sum + s.completeness, 0) / scores.length;
102
+ const avgRelevance = scores.reduce((sum, s) => sum + s.relevance, 0) / scores.length;
103
+
104
+ const meetsTarget = avgScore >= 0.8;
105
+
106
+ return {
107
+ total: scores.length,
108
+ averageScore: avgScore,
109
+ averageClarity: avgClarity,
110
+ averageCompleteness: avgCompleteness,
111
+ averageRelevance: avgRelevance,
112
+ meetsTarget,
113
+ scores,
114
+ recommendation: meetsTarget
115
+ ? 'Explainability meets target (>80%)'
116
+ : `Explainability ${(avgScore * 100).toFixed(1)}% below target. Improve reasoning clarity, completeness, or relevance.`
117
+ };
118
+ }
119
+
@@ -0,0 +1,131 @@
1
+ /**
2
+ * Exploratory Automation
3
+ *
4
+ * Tries alternative approaches when actions fail.
5
+ * Simple strategy: wait, try different action type, or give up after max attempts.
6
+ *
7
+ * Research Context:
8
+ * - Exploratory success rate >60% is often cited as critical for browser automation agents
9
+ * - Agents should try alternative approaches when initial attempts fail
10
+ * - Need to track exploration attempts and avoid infinite loops
11
+ *
12
+ * Implementation:
13
+ * - Simple wait + alternative action type is sufficient for most failures
14
+ * - Complex exploration strategies add complexity without clear benefit
15
+ * - The VLLM can handle complex decision-making during action execution
16
+ *
17
+ * See docs/research/IMPLEMENTATION_VS_RESEARCH.md for detailed research context.
18
+ *
19
+ * @module exploratory-automation
20
+ */
21
+
22
+ /**
23
+ * Exploration strategy
24
+ */
25
+ export class ExploratoryStrategy {
26
+ constructor(options = {}) {
27
+ this.maxAttempts = options.maxAttempts || 5;
28
+ this.attemptHistory = [];
29
+ this.alternativeActions = [];
30
+ }
31
+
32
+ /**
33
+ * Get next exploration action
34
+ *
35
+ * @param {Object} currentState - Current browser state
36
+ * @param {Array} failedActions - Actions that have failed
37
+ * @param {string} goal - Current goal
38
+ * @returns {Object|null} Next action to try, or null if no more alternatives
39
+ */
40
+ getNextAction(currentState, failedActions = [], goal = '') {
41
+ if (this.attemptHistory.length >= this.maxAttempts) {
42
+ return null; // Max attempts reached
43
+ }
44
+
45
+ // Generate alternative actions based on goal and failed actions
46
+ const alternatives = this.generateAlternatives(currentState, failedActions, goal);
47
+
48
+ // Filter out already attempted actions
49
+ const untried = alternatives.filter(alt =>
50
+ !this.attemptHistory.some(attempt =>
51
+ JSON.stringify(attempt.action) === JSON.stringify(alt)
52
+ )
53
+ );
54
+
55
+ if (untried.length === 0) {
56
+ return null; // No more alternatives
57
+ }
58
+
59
+ // Select next action (prefer actions that haven't been tried)
60
+ const nextAction = untried[0];
61
+ this.attemptHistory.push({
62
+ action: nextAction,
63
+ timestamp: Date.now(),
64
+ state: currentState
65
+ });
66
+
67
+ return nextAction;
68
+ }
69
+
70
+ /**
71
+ * Generate alternative actions
72
+ *
73
+ * Simple strategy: wait, then try a different action type if available.
74
+ */
75
+ generateAlternatives(currentState, failedActions, goal) {
76
+ const alternatives = [];
77
+ const lastFailed = failedActions[failedActions.length - 1];
78
+
79
+ if (!lastFailed) {
80
+ return alternatives;
81
+ }
82
+
83
+ // If click failed, try wait then retry
84
+ if (lastFailed.type === 'click') {
85
+ alternatives.push(
86
+ { type: 'wait', duration: 1000 },
87
+ { type: 'keyboard', key: 'Tab' } // Try keyboard navigation
88
+ );
89
+ }
90
+
91
+ // If keyboard failed, try wait
92
+ if (lastFailed.type === 'keyboard') {
93
+ alternatives.push({ type: 'wait', duration: 1000 });
94
+ }
95
+
96
+ // Always have wait as fallback
97
+ if (alternatives.length === 0) {
98
+ alternatives.push({ type: 'wait', duration: 1000 });
99
+ }
100
+
101
+ return alternatives;
102
+ }
103
+
104
+ /**
105
+ * Reset exploration state
106
+ */
107
+ reset() {
108
+ this.attemptHistory = [];
109
+ this.alternativeActions = [];
110
+ }
111
+
112
+ /**
113
+ * Get exploration statistics
114
+ */
115
+ getStats() {
116
+ return {
117
+ totalAttempts: this.attemptHistory.length,
118
+ maxAttempts: this.maxAttempts,
119
+ remainingAttempts: this.maxAttempts - this.attemptHistory.length,
120
+ attempts: this.attemptHistory
121
+ };
122
+ }
123
+ }
124
+
125
+ /**
126
+ * Create exploratory strategy
127
+ */
128
+ export function createExploratoryStrategy(options = {}) {
129
+ return new ExploratoryStrategy(options);
130
+ }
131
+
@@ -173,3 +173,13 @@ export {
173
173
  initHumanValidation
174
174
  } from '../human-validation-manager.mjs';
175
175
 
176
+ // Browser automation utilities
177
+ export * from './counterfactual-tester.mjs';
178
+ export * from './capability-stratifier.mjs';
179
+ export * from './baseline-validator.mjs';
180
+ export * from './intent-recognizer.mjs';
181
+ export * from './action-hallucination-detector.mjs';
182
+ export * from './exploratory-automation.mjs';
183
+ export * from './error-recovery.mjs';
184
+ export * from './explainability-scorer.mjs';
185
+