@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.secretsignore.example +20 -0
  2. package/CHANGELOG.md +360 -0
  3. package/CONTRIBUTING.md +63 -0
  4. package/DEPLOYMENT.md +80 -0
  5. package/LICENSE +22 -0
  6. package/README.md +142 -0
  7. package/SECURITY.md +108 -0
  8. package/api/health.js +34 -0
  9. package/api/validate.js +252 -0
  10. package/index.d.ts +1221 -0
  11. package/package.json +112 -0
  12. package/public/index.html +149 -0
  13. package/src/batch-optimizer.mjs +451 -0
  14. package/src/bias-detector.mjs +370 -0
  15. package/src/bias-mitigation.mjs +233 -0
  16. package/src/cache.mjs +433 -0
  17. package/src/config.mjs +268 -0
  18. package/src/constants.mjs +80 -0
  19. package/src/context-compressor.mjs +350 -0
  20. package/src/convenience.mjs +617 -0
  21. package/src/cost-tracker.mjs +257 -0
  22. package/src/cross-modal-consistency.mjs +170 -0
  23. package/src/data-extractor.mjs +232 -0
  24. package/src/dynamic-few-shot.mjs +140 -0
  25. package/src/dynamic-prompts.mjs +361 -0
  26. package/src/ensemble/index.mjs +53 -0
  27. package/src/ensemble-judge.mjs +366 -0
  28. package/src/error-handler.mjs +67 -0
  29. package/src/errors.mjs +167 -0
  30. package/src/experience-propagation.mjs +128 -0
  31. package/src/experience-tracer.mjs +487 -0
  32. package/src/explanation-manager.mjs +299 -0
  33. package/src/feedback-aggregator.mjs +248 -0
  34. package/src/game-goal-prompts.mjs +478 -0
  35. package/src/game-player.mjs +548 -0
  36. package/src/hallucination-detector.mjs +155 -0
  37. package/src/helpers/playwright.mjs +80 -0
  38. package/src/human-validation-manager.mjs +516 -0
  39. package/src/index.mjs +364 -0
  40. package/src/judge.mjs +929 -0
  41. package/src/latency-aware-batch-optimizer.mjs +192 -0
  42. package/src/load-env.mjs +159 -0
  43. package/src/logger.mjs +55 -0
  44. package/src/metrics.mjs +187 -0
  45. package/src/model-tier-selector.mjs +221 -0
  46. package/src/multi-modal/index.mjs +36 -0
  47. package/src/multi-modal-fusion.mjs +190 -0
  48. package/src/multi-modal.mjs +524 -0
  49. package/src/natural-language-specs.mjs +1071 -0
  50. package/src/pair-comparison.mjs +277 -0
  51. package/src/persona/index.mjs +42 -0
  52. package/src/persona-enhanced.mjs +200 -0
  53. package/src/persona-experience.mjs +572 -0
  54. package/src/position-counterbalance.mjs +140 -0
  55. package/src/prompt-composer.mjs +375 -0
  56. package/src/render-change-detector.mjs +583 -0
  57. package/src/research-enhanced-validation.mjs +436 -0
  58. package/src/retry.mjs +152 -0
  59. package/src/rubrics.mjs +231 -0
  60. package/src/score-tracker.mjs +277 -0
  61. package/src/smart-validator.mjs +447 -0
  62. package/src/spec-config.mjs +106 -0
  63. package/src/spec-templates.mjs +347 -0
  64. package/src/specs/index.mjs +38 -0
  65. package/src/temporal/index.mjs +102 -0
  66. package/src/temporal-adaptive.mjs +163 -0
  67. package/src/temporal-batch-optimizer.mjs +222 -0
  68. package/src/temporal-constants.mjs +69 -0
  69. package/src/temporal-context.mjs +49 -0
  70. package/src/temporal-decision-manager.mjs +271 -0
  71. package/src/temporal-decision.mjs +669 -0
  72. package/src/temporal-errors.mjs +58 -0
  73. package/src/temporal-note-pruner.mjs +173 -0
  74. package/src/temporal-preprocessor.mjs +543 -0
  75. package/src/temporal-prompt-formatter.mjs +219 -0
  76. package/src/temporal-validation.mjs +159 -0
  77. package/src/temporal.mjs +415 -0
  78. package/src/type-guards.mjs +311 -0
  79. package/src/uncertainty-reducer.mjs +470 -0
  80. package/src/utils/index.mjs +175 -0
  81. package/src/validation-framework.mjs +321 -0
  82. package/src/validation-result-normalizer.mjs +64 -0
  83. package/src/validation.mjs +243 -0
  84. package/src/validators/accessibility-programmatic.mjs +345 -0
  85. package/src/validators/accessibility-validator.mjs +223 -0
  86. package/src/validators/batch-validator.mjs +143 -0
  87. package/src/validators/hybrid-validator.mjs +268 -0
  88. package/src/validators/index.mjs +34 -0
  89. package/src/validators/prompt-builder.mjs +218 -0
  90. package/src/validators/rubric.mjs +85 -0
  91. package/src/validators/state-programmatic.mjs +260 -0
  92. package/src/validators/state-validator.mjs +291 -0
  93. package/vercel.json +27 -0
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Shared Constants
3
+ *
4
+ * Centralized constants for magic numbers used throughout the codebase.
5
+ * All values are documented with their purpose and rationale.
6
+ */
7
+
8
+ /**
9
+ * Cache Configuration
10
+ */
11
+ export const CACHE_CONSTANTS = {
12
+ /** Maximum age of cache entries in milliseconds (7 days) */
13
+ MAX_CACHE_AGE_MS: 7 * 24 * 60 * 60 * 1000,
14
+
15
+ /** Maximum number of cache entries before LRU eviction */
16
+ MAX_CACHE_SIZE: 1000,
17
+
18
+ /** Maximum cache file size in bytes (100MB) */
19
+ MAX_CACHE_SIZE_BYTES: 100 * 1024 * 1024
20
+ };
21
+
22
+ /**
23
+ * Temporal Aggregation Configuration
24
+ */
25
+ export const TEMPORAL_CONSTANTS = {
26
+ /** Default window size for temporal aggregation in milliseconds (10 seconds) */
27
+ DEFAULT_WINDOW_SIZE_MS: 10000,
28
+
29
+ /** Default exponential decay factor for older notes (0.9 = 10% decay per window) */
30
+ DEFAULT_DECAY_FACTOR: 0.9,
31
+
32
+ /** Default coherence threshold for temporal consistency checks (0.7 = 70% coherence required) */
33
+ DEFAULT_COHERENCE_THRESHOLD: 0.7
34
+ };
35
+
36
+ /**
37
+ * API Configuration
38
+ */
39
+ export const API_CONSTANTS = {
40
+ /** Default timeout for API calls in milliseconds (30 seconds) */
41
+ DEFAULT_TIMEOUT_MS: 30000,
42
+
43
+ /** Default maximum concurrency for API calls */
44
+ DEFAULT_MAX_CONCURRENCY: 5
45
+ };
46
+
47
+ /**
48
+ * Batch Optimizer Configuration
49
+ */
50
+ export const BATCH_OPTIMIZER_CONSTANTS = {
51
+ /** Maximum queue size before rejecting new requests (prevents memory leaks) */
52
+ MAX_QUEUE_SIZE: 1000,
53
+
54
+ /** Request timeout in milliseconds (30 seconds) */
55
+ REQUEST_TIMEOUT_MS: 30000
56
+ };
57
+
58
+ /**
59
+ * Uncertainty Reduction Configuration
60
+ */
61
+ export const UNCERTAINTY_CONSTANTS = {
62
+ /** Low score threshold for edge case detection (bottom 30% of 0-10 scale) */
63
+ LOW_SCORE_THRESHOLD: 3,
64
+
65
+ /** High score threshold for edge case detection (top 10% of 0-10 scale) */
66
+ HIGH_SCORE_THRESHOLD: 9,
67
+
68
+ /** High uncertainty threshold for triggering self-consistency (0.3 = 30% uncertainty) */
69
+ HIGH_UNCERTAINTY_THRESHOLD: 0.3,
70
+
71
+ /** Issue count threshold for over-detection risk (5+ issues might indicate hallucination) */
72
+ OVER_DETECTION_ISSUE_COUNT: 5,
73
+
74
+ /** Self-consistency N for Tier 1 scenarios (expert, medical, blocking issues) */
75
+ TIER1_SELF_CONSISTENCY_N: 5,
76
+
77
+ /** Self-consistency N for edge cases (Tier 2) */
78
+ EDGE_CASE_SELF_CONSISTENCY_N: 3
79
+ };
80
+
@@ -0,0 +1,350 @@
1
+ /**
2
+ * Context Compressor
3
+ *
4
+ * Compresses historical context to reduce token usage while maintaining accuracy.
5
+ *
6
+ * General-purpose utility - no domain-specific logic.
7
+ */
8
+
9
+ /**
10
+ * Compress context by aggregating notes and extracting key insights
11
+ *
12
+ * @param {import('./index.mjs').TemporalNote[]} notes - Array of temporal notes to compress
13
+ * @param {{
14
+ * maxTokens?: number;
15
+ * maxNotes?: number;
16
+ * includeRecent?: boolean;
17
+ * includeKeyEvents?: boolean;
18
+ * aggregationStrategy?: 'temporal' | 'semantic' | 'importance';
19
+ * }} [options={}] - Compression options
20
+ * @returns {import('./index.mjs').TemporalNote[]} Compressed array of notes
21
+ */
22
+ export function compressContext(notes, options = {}) {
23
+ const {
24
+ maxTokens = 500, // Target token count
25
+ maxNotes = 10, // Maximum notes to include
26
+ includeRecent = true, // Always include most recent notes
27
+ includeKeyEvents = true, // Always include key events (bugs, state changes)
28
+ aggregationStrategy = 'temporal' // 'temporal', 'semantic', 'importance'
29
+ } = options;
30
+
31
+ if (!notes || notes.length === 0) {
32
+ return {
33
+ compressed: [],
34
+ summary: 'No notes available',
35
+ tokenEstimate: 0,
36
+ compressionRatio: 1.0
37
+ };
38
+ }
39
+
40
+ // Sort notes by timestamp (most recent first)
41
+ const sortedNotes = [...notes].sort((a, b) => (b.timestamp || 0) - (a.timestamp || 0));
42
+
43
+ // Extract key events (bugs, state changes, critical observations)
44
+ const keyEvents = sortedNotes.filter(note =>
45
+ note.step?.includes('bug') ||
46
+ note.step?.includes('error') ||
47
+ note.step?.includes('critical') ||
48
+ note.severity === 'CRITICAL' ||
49
+ note.reflection?.score !== undefined
50
+ );
51
+
52
+ // Select notes based on strategy
53
+ let selectedNotes = [];
54
+
55
+ if (aggregationStrategy === 'temporal') {
56
+ // Temporal: Include most recent + key events
57
+ const recentNotes = includeRecent ? sortedNotes.slice(0, Math.floor(maxNotes * 0.7)) : [];
58
+ const keyEventNotes = includeKeyEvents ? keyEvents.slice(0, Math.floor(maxNotes * 0.3)) : [];
59
+
60
+ // Combine and deduplicate
61
+ const combined = [...recentNotes, ...keyEventNotes];
62
+ const seen = new Set();
63
+ selectedNotes = combined.filter(note => {
64
+ const id = note.step + (note.timestamp || 0);
65
+ if (seen.has(id)) return false;
66
+ seen.add(id);
67
+ return true;
68
+ }).slice(0, maxNotes);
69
+ } else if (aggregationStrategy === 'semantic') {
70
+ // Semantic: Group by similarity and select representatives
71
+ selectedNotes = selectSemanticRepresentatives(sortedNotes, maxNotes, keyEvents);
72
+ } else if (aggregationStrategy === 'importance') {
73
+ // Importance: Score notes by importance and select top
74
+ selectedNotes = selectByImportance(sortedNotes, maxNotes, keyEvents);
75
+ }
76
+
77
+ // Generate summary from selected notes
78
+ const summary = generateSummary(selectedNotes, sortedNotes);
79
+
80
+ // Estimate token count
81
+ const tokenEstimate = estimateTokens(selectedNotes, summary);
82
+ const originalTokenEstimate = estimateTokens(sortedNotes);
83
+ const compressionRatio = originalTokenEstimate > 0 ? tokenEstimate / originalTokenEstimate : 1.0;
84
+
85
+ return {
86
+ compressed: selectedNotes,
87
+ summary,
88
+ tokenEstimate,
89
+ compressionRatio,
90
+ originalCount: notes.length,
91
+ compressedCount: selectedNotes.length
92
+ };
93
+ }
94
+
95
+ /**
96
+ * Select semantic representatives (group similar notes, pick one from each group)
97
+ */
98
+ function selectSemanticRepresentatives(notes, maxNotes, keyEvents) {
99
+ // Simple semantic grouping by step type
100
+ const groups = new Map();
101
+
102
+ notes.forEach(note => {
103
+ const groupKey = note.step?.split('_')[0] || 'other';
104
+ if (!groups.has(groupKey)) {
105
+ groups.set(groupKey, []);
106
+ }
107
+ groups.get(groupKey).push(note);
108
+ });
109
+
110
+ // Select most recent from each group
111
+ const representatives = [];
112
+ for (const [groupKey, groupNotes] of groups.entries()) {
113
+ const sorted = groupNotes.sort((a, b) => (b.timestamp || 0) - (a.timestamp || 0));
114
+ representatives.push(sorted[0]);
115
+ }
116
+
117
+ // Always include key events
118
+ const combined = [...representatives, ...keyEvents];
119
+ const seen = new Set();
120
+ return combined.filter(note => {
121
+ const id = note.step + (note.timestamp || 0);
122
+ if (seen.has(id)) return false;
123
+ seen.add(id);
124
+ return true;
125
+ }).slice(0, maxNotes);
126
+ }
127
+
128
+ /**
129
+ * Select notes by importance score
130
+ */
131
+ function selectByImportance(notes, maxNotes, keyEvents) {
132
+ // Score notes by importance
133
+ const scored = notes.map(note => {
134
+ let score = 0;
135
+
136
+ // Key events get high score
137
+ if (keyEvents.includes(note)) score += 10;
138
+
139
+ // Recent notes get higher score
140
+ const age = Date.now() - (note.timestamp || 0);
141
+ const ageScore = Math.max(0, 10 - (age / 1000)); // Decay over 10 seconds
142
+ score += ageScore;
143
+
144
+ // Critical severity gets high score
145
+ if (note.severity === 'CRITICAL') score += 5;
146
+ if (note.severity === 'HIGH') score += 3;
147
+
148
+ // Reflections get higher score
149
+ if (note.reflection) score += 2;
150
+
151
+ // State changes get higher score
152
+ if (note.gameState || note.state) score += 1;
153
+
154
+ return { note, score };
155
+ });
156
+
157
+ // Sort by score and select top
158
+ const topScored = scored.sort((a, b) => b.score - a.score).slice(0, maxNotes);
159
+ return topScored.map(item => item.note);
160
+ }
161
+
162
+ /**
163
+ * Generate summary from selected notes
164
+ */
165
+ function generateSummary(selectedNotes, allNotes) {
166
+ if (selectedNotes.length === 0) {
167
+ return 'No notes available';
168
+ }
169
+
170
+ const parts = [];
171
+
172
+ // Count by type
173
+ const typeCounts = {};
174
+ selectedNotes.forEach(note => {
175
+ const type = note.step?.split('_')[0] || 'other';
176
+ typeCounts[type] = (typeCounts[type] || 0) + 1;
177
+ });
178
+
179
+ parts.push(`Summary: ${selectedNotes.length} key observations from ${allNotes.length} total notes.`);
180
+
181
+ // Key statistics
182
+ const bugs = selectedNotes.filter(n => n.step?.includes('bug')).length;
183
+ const reflections = selectedNotes.filter(n => n.reflection).length;
184
+ const critical = selectedNotes.filter(n => n.severity === 'CRITICAL').length;
185
+
186
+ if (bugs > 0) parts.push(`${bugs} bug detection(s)`);
187
+ if (reflections > 0) parts.push(`${reflections} reflection(s)`);
188
+ if (critical > 0) parts.push(`${critical} critical issue(s)`);
189
+
190
+ // Time span
191
+ if (selectedNotes.length > 1) {
192
+ const first = selectedNotes[selectedNotes.length - 1].timestamp || 0;
193
+ const last = selectedNotes[0].timestamp || 0;
194
+ const span = Math.round((last - first) / 1000);
195
+ if (span > 0) parts.push(`Time span: ${span}s`);
196
+ }
197
+
198
+ return parts.join(', ');
199
+ }
200
+
201
+ /**
202
+ * Estimate token count for notes
203
+ */
204
+ function estimateTokens(notes, summary = '') {
205
+ // Rough estimate: 1 token ≈ 4 characters
206
+ const noteText = notes.map(n =>
207
+ `${n.step || ''} ${n.observation || ''} ${JSON.stringify(n.gameState || n.state || {})}`
208
+ ).join(' ');
209
+ const totalText = noteText + ' ' + summary;
210
+ return Math.ceil(totalText.length / 4);
211
+ }
212
+
213
+ /**
214
+ * Compress state history by keeping important transitions
215
+ *
216
+ * @param {Array<Record<string, unknown>>} stateHistory - Array of state objects
217
+ * @param {{
218
+ * maxLength?: number;
219
+ * preserveImportant?: boolean;
220
+ * }} [options={}] - Compression options
221
+ * @returns {Array<Record<string, unknown>>} Compressed state history
222
+ */
223
+ export function compressStateHistory(stateHistory, options = {}) {
224
+ const {
225
+ maxStates = 3, // Maximum states to include
226
+ includeFirst = true, // Always include first state
227
+ includeLast = true, // Always include last state
228
+ includeKeyTransitions = true // Include states with significant changes
229
+ } = options;
230
+
231
+ if (!stateHistory || stateHistory.length === 0) {
232
+ return {
233
+ compressed: [],
234
+ summary: 'No state history',
235
+ tokenEstimate: 0
236
+ };
237
+ }
238
+
239
+ const states = Array.isArray(stateHistory) ? stateHistory : [stateHistory];
240
+
241
+ // Select key states
242
+ let selectedStates = [];
243
+
244
+ if (includeFirst && states.length > 0) {
245
+ selectedStates.push(states[0]);
246
+ }
247
+
248
+ if (includeLast && states.length > 1 && states[states.length - 1] !== states[0]) {
249
+ selectedStates.push(states[states.length - 1]);
250
+ }
251
+
252
+ // Find key transitions (significant changes)
253
+ if (includeKeyTransitions && states.length > 2) {
254
+ const transitions = findKeyTransitions(states);
255
+ selectedStates.push(...transitions);
256
+ }
257
+
258
+ // Deduplicate and limit
259
+ const seen = new Set();
260
+ const unique = selectedStates.filter(state => {
261
+ const id = JSON.stringify(state);
262
+ if (seen.has(id)) return false;
263
+ seen.add(id);
264
+ return true;
265
+ }).slice(0, maxStates);
266
+
267
+ // Generate summary
268
+ const summary = generateStateSummary(unique, states);
269
+
270
+ // Estimate tokens
271
+ const tokenEstimate = estimateStateTokens(unique, summary);
272
+ const originalTokenEstimate = estimateStateTokens(states);
273
+ const compressionRatio = originalTokenEstimate > 0 ? tokenEstimate / originalTokenEstimate : 1.0;
274
+
275
+ return {
276
+ compressed: unique,
277
+ summary,
278
+ tokenEstimate,
279
+ compressionRatio,
280
+ originalCount: states.length,
281
+ compressedCount: unique.length,
282
+ originalTokenEstimate
283
+ };
284
+ }
285
+
286
+ /**
287
+ * Find key transitions (states with significant changes)
288
+ */
289
+ function findKeyTransitions(states) {
290
+ const transitions = [];
291
+
292
+ for (let i = 1; i < states.length; i++) {
293
+ const prev = states[i - 1];
294
+ const curr = states[i];
295
+
296
+ // Check for significant changes (general-purpose, not game-specific)
297
+ const hasSignificantChange = Object.keys(curr).some(key => {
298
+ const prevVal = prev[key];
299
+ const currVal = curr[key];
300
+
301
+ // Numeric changes
302
+ if (typeof prevVal === 'number' && typeof currVal === 'number') {
303
+ return Math.abs(currVal - prevVal) > 10; // Threshold for significant change
304
+ }
305
+
306
+ // String/boolean changes
307
+ return prevVal !== currVal;
308
+ });
309
+
310
+ if (hasSignificantChange) {
311
+ transitions.push(curr);
312
+ }
313
+ }
314
+
315
+ return transitions;
316
+ }
317
+
318
+ /**
319
+ * Generate summary for state history
320
+ */
321
+ function generateStateSummary(selectedStates, allStates) {
322
+ if (selectedStates.length === 0) {
323
+ return 'No state history';
324
+ }
325
+
326
+ const parts = [];
327
+ parts.push(`${selectedStates.length} key states from ${allStates.length} total`);
328
+
329
+ if (selectedStates.length > 1) {
330
+ const first = selectedStates[0];
331
+ const last = selectedStates[selectedStates.length - 1];
332
+
333
+ // Check for any changes (general-purpose)
334
+ const hasChanges = Object.keys(last).some(key => first[key] !== last[key]);
335
+
336
+ if (hasChanges) parts.push('state changes detected');
337
+ }
338
+
339
+ return parts.join(', ');
340
+ }
341
+
342
+ /**
343
+ * Estimate tokens for state history
344
+ */
345
+ function estimateStateTokens(states, summary = '') {
346
+ const stateText = states.map(s => JSON.stringify(s)).join(' ');
347
+ const totalText = stateText + ' ' + summary;
348
+ return Math.ceil(totalText.length / 4);
349
+ }
350
+