@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.secretsignore.example +20 -0
  2. package/CHANGELOG.md +360 -0
  3. package/CONTRIBUTING.md +63 -0
  4. package/DEPLOYMENT.md +80 -0
  5. package/LICENSE +22 -0
  6. package/README.md +142 -0
  7. package/SECURITY.md +108 -0
  8. package/api/health.js +34 -0
  9. package/api/validate.js +252 -0
  10. package/index.d.ts +1221 -0
  11. package/package.json +112 -0
  12. package/public/index.html +149 -0
  13. package/src/batch-optimizer.mjs +451 -0
  14. package/src/bias-detector.mjs +370 -0
  15. package/src/bias-mitigation.mjs +233 -0
  16. package/src/cache.mjs +433 -0
  17. package/src/config.mjs +268 -0
  18. package/src/constants.mjs +80 -0
  19. package/src/context-compressor.mjs +350 -0
  20. package/src/convenience.mjs +617 -0
  21. package/src/cost-tracker.mjs +257 -0
  22. package/src/cross-modal-consistency.mjs +170 -0
  23. package/src/data-extractor.mjs +232 -0
  24. package/src/dynamic-few-shot.mjs +140 -0
  25. package/src/dynamic-prompts.mjs +361 -0
  26. package/src/ensemble/index.mjs +53 -0
  27. package/src/ensemble-judge.mjs +366 -0
  28. package/src/error-handler.mjs +67 -0
  29. package/src/errors.mjs +167 -0
  30. package/src/experience-propagation.mjs +128 -0
  31. package/src/experience-tracer.mjs +487 -0
  32. package/src/explanation-manager.mjs +299 -0
  33. package/src/feedback-aggregator.mjs +248 -0
  34. package/src/game-goal-prompts.mjs +478 -0
  35. package/src/game-player.mjs +548 -0
  36. package/src/hallucination-detector.mjs +155 -0
  37. package/src/helpers/playwright.mjs +80 -0
  38. package/src/human-validation-manager.mjs +516 -0
  39. package/src/index.mjs +364 -0
  40. package/src/judge.mjs +929 -0
  41. package/src/latency-aware-batch-optimizer.mjs +192 -0
  42. package/src/load-env.mjs +159 -0
  43. package/src/logger.mjs +55 -0
  44. package/src/metrics.mjs +187 -0
  45. package/src/model-tier-selector.mjs +221 -0
  46. package/src/multi-modal/index.mjs +36 -0
  47. package/src/multi-modal-fusion.mjs +190 -0
  48. package/src/multi-modal.mjs +524 -0
  49. package/src/natural-language-specs.mjs +1071 -0
  50. package/src/pair-comparison.mjs +277 -0
  51. package/src/persona/index.mjs +42 -0
  52. package/src/persona-enhanced.mjs +200 -0
  53. package/src/persona-experience.mjs +572 -0
  54. package/src/position-counterbalance.mjs +140 -0
  55. package/src/prompt-composer.mjs +375 -0
  56. package/src/render-change-detector.mjs +583 -0
  57. package/src/research-enhanced-validation.mjs +436 -0
  58. package/src/retry.mjs +152 -0
  59. package/src/rubrics.mjs +231 -0
  60. package/src/score-tracker.mjs +277 -0
  61. package/src/smart-validator.mjs +447 -0
  62. package/src/spec-config.mjs +106 -0
  63. package/src/spec-templates.mjs +347 -0
  64. package/src/specs/index.mjs +38 -0
  65. package/src/temporal/index.mjs +102 -0
  66. package/src/temporal-adaptive.mjs +163 -0
  67. package/src/temporal-batch-optimizer.mjs +222 -0
  68. package/src/temporal-constants.mjs +69 -0
  69. package/src/temporal-context.mjs +49 -0
  70. package/src/temporal-decision-manager.mjs +271 -0
  71. package/src/temporal-decision.mjs +669 -0
  72. package/src/temporal-errors.mjs +58 -0
  73. package/src/temporal-note-pruner.mjs +173 -0
  74. package/src/temporal-preprocessor.mjs +543 -0
  75. package/src/temporal-prompt-formatter.mjs +219 -0
  76. package/src/temporal-validation.mjs +159 -0
  77. package/src/temporal.mjs +415 -0
  78. package/src/type-guards.mjs +311 -0
  79. package/src/uncertainty-reducer.mjs +470 -0
  80. package/src/utils/index.mjs +175 -0
  81. package/src/validation-framework.mjs +321 -0
  82. package/src/validation-result-normalizer.mjs +64 -0
  83. package/src/validation.mjs +243 -0
  84. package/src/validators/accessibility-programmatic.mjs +345 -0
  85. package/src/validators/accessibility-validator.mjs +223 -0
  86. package/src/validators/batch-validator.mjs +143 -0
  87. package/src/validators/hybrid-validator.mjs +268 -0
  88. package/src/validators/index.mjs +34 -0
  89. package/src/validators/prompt-builder.mjs +218 -0
  90. package/src/validators/rubric.mjs +85 -0
  91. package/src/validators/state-programmatic.mjs +260 -0
  92. package/src/validators/state-validator.mjs +291 -0
  93. package/vercel.json +27 -0
package/SECURITY.md ADDED
@@ -0,0 +1,108 @@
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ | Version | Supported |
6
+ | ------- | ------------------ |
7
+ | 0.1.x | :white_check_mark: |
8
+
9
+ ## Reporting a Vulnerability
10
+
11
+ If you discover a security vulnerability, please report it responsibly:
12
+
13
+ 1. **Email:** security@henrywallace.io
14
+ 2. **Include:**
15
+ - Description of the vulnerability
16
+ - Steps to reproduce
17
+ - Potential impact
18
+ - Suggested fix (if any)
19
+
20
+ **Please do not** open a public GitHub issue for security vulnerabilities.
21
+
22
+ We will acknowledge receipt within 48 hours and provide an update on the status of the vulnerability within 7 days.
23
+
24
+ ## Security Best Practices
25
+
26
+ ### For Package Users
27
+
28
+ 1. **Always use environment variables** for API keys
29
+ - Never hardcode secrets in your code
30
+ - Use `.env` files (not committed to git)
31
+ - Rotate keys regularly
32
+
33
+ 2. **Enable secret detection**
34
+ - Use the provided pre-commit hook
35
+ - Review `.secretsignore.example` for configuration
36
+ - Run `node scripts/detect-secrets.mjs --scan-history` periodically
37
+
38
+ 3. **Validate inputs**
39
+ - Validate file paths before passing to functions
40
+ - Sanitize user-provided prompts
41
+ - Set reasonable size limits on inputs
42
+
43
+ 4. **Monitor API usage**
44
+ - Set up rate limiting if using the API
45
+ - Monitor for unusual patterns
46
+ - Review error logs regularly
47
+
48
+ 5. **Keep dependencies updated**
49
+ - Regularly update `@playwright/test` peer dependency
50
+ - Run `npm audit` regularly
51
+ - Review security advisories
52
+
53
+ ### For Contributors
54
+
55
+ 1. **Follow secure coding practices**
56
+ - Never commit secrets
57
+ - Use the pre-commit hook
58
+ - Review code for security issues
59
+
60
+ 2. **Test security features**
61
+ - Add security-focused tests
62
+ - Test input validation
63
+ - Test error handling
64
+
65
+ 3. **Document security considerations**
66
+ - Document any security assumptions
67
+ - Note any known limitations
68
+ - Update this file for new vulnerabilities
69
+
70
+ ## Known Security Considerations
71
+
72
+ ### API Endpoint (`/api/validate`)
73
+
74
+ - **Authentication** - Optional API key authentication via `API_KEY` or `VLLM_API_KEY` environment variable
75
+ - Set `REQUIRE_AUTH=true` to enforce authentication
76
+ - API key can be provided via `X-API-Key` header or `Authorization: Bearer <key>` header
77
+ - **Rate Limiting** - Built-in rate limiting (10 requests/minute by default, configurable via `RATE_LIMIT_MAX_REQUESTS`)
78
+ - Rate limit headers: `X-RateLimit-Limit`, `X-RateLimit-Remaining`, `X-RateLimit-Reset`
79
+ - Returns 429 status when limit exceeded
80
+ - Uses in-memory store (use Redis for production multi-instance deployments)
81
+ - **Error messages** - Sanitized to prevent information leakage
82
+
83
+ ### File Operations
84
+
85
+ - **Path validation** - Always validate file paths before use
86
+ - **Temporary files** - Cleaned up automatically, but ensure proper error handling
87
+
88
+ ### Environment Variables
89
+
90
+ - **No validation** - Validate required environment variables at startup
91
+ - **No encryption** - Store sensitive values securely
92
+
93
+ ## Security Features
94
+
95
+ - ✅ Pre-commit secret detection (enhanced with red team recommendations)
96
+ - ✅ Git history scanning option
97
+ - ✅ Zero runtime dependencies
98
+ - ✅ Input validation
99
+ - ✅ Error handling with sanitization
100
+ - ✅ Rate limiting (configurable, in-memory or Redis)
101
+ - ✅ Authentication (optional API key)
102
+ - ✅ Path traversal protection
103
+ - ✅ Size limits on all inputs
104
+
105
+ ## Changelog
106
+
107
+ See [CHANGELOG.md](./CHANGELOG.md) for security-related updates.
108
+
package/api/health.js ADDED
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Health check endpoint
3
+ *
4
+ * GET /api/health
5
+ */
6
+
7
+ import { createConfig } from '../src/index.mjs';
8
+
9
+ export default async function handler(req, res) {
10
+ if (req.method !== 'GET') {
11
+ return res.status(405).json({ error: 'Method not allowed' });
12
+ }
13
+
14
+ try {
15
+ const config = createConfig();
16
+
17
+ return res.status(200).json({
18
+ status: 'ok',
19
+ enabled: config.enabled,
20
+ provider: config.provider,
21
+ version: '0.1.0',
22
+ timestamp: new Date().toISOString()
23
+ });
24
+ } catch (error) {
25
+ // SECURITY: Don't expose internal error details
26
+ // Log server-side for debugging, return generic message to client
27
+ console.error('[Health] Error:', error);
28
+ return res.status(500).json({
29
+ status: 'error',
30
+ error: 'Health check failed'
31
+ });
32
+ }
33
+ }
34
+
@@ -0,0 +1,252 @@
1
+ /**
2
+ * Vercel Serverless Function for VLLM Screenshot Validation
3
+ *
4
+ * POST /api/validate
5
+ *
6
+ * Body:
7
+ * {
8
+ * "image": "base64-encoded-image",
9
+ * "prompt": "Evaluation prompt",
10
+ * "context": { ... }
11
+ * }
12
+ *
13
+ * Returns:
14
+ * {
15
+ * "enabled": boolean,
16
+ * "provider": string,
17
+ * "score": number|null,
18
+ * "issues": string[],
19
+ * "assessment": string|null,
20
+ * "reasoning": string,
21
+ * "estimatedCost": object|null,
22
+ * "responseTime": number
23
+ * }
24
+ */
25
+
26
+ import { validateScreenshot, createConfig, normalizeValidationResult } from '../src/index.mjs';
27
+ import { writeFileSync, unlinkSync } from 'fs';
28
+ import { join } from 'path';
29
+ import { tmpdir } from 'os';
30
+ import { randomBytes } from 'crypto';
31
+
32
+ // Security limits
33
+ const MAX_IMAGE_SIZE = 10 * 1024 * 1024; // 10MB
34
+ const MAX_PROMPT_LENGTH = 5000;
35
+ const MAX_CONTEXT_SIZE = 10000;
36
+
37
+ // Rate limiting configuration
38
+ const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
39
+ const RATE_LIMIT_MAX_REQUESTS = parseInt(process.env.RATE_LIMIT_MAX_REQUESTS || '10', 10);
40
+ const rateLimitStore = new Map(); // In-memory store (use Redis in production)
41
+
42
+ // Authentication configuration
43
+ const API_KEY = process.env.API_KEY || process.env.VLLM_API_KEY || null;
44
+ // Default to requiring auth if API key is set (more secure)
45
+ // Set REQUIRE_AUTH=false explicitly to disable
46
+ const REQUIRE_AUTH = process.env.REQUIRE_AUTH !== 'false' && API_KEY !== null;
47
+
48
+ /**
49
+ * Simple rate limiter (in-memory)
50
+ * For production, use Redis or a dedicated rate limiting service
51
+ */
52
+ function checkRateLimit(identifier) {
53
+ const now = Date.now();
54
+ const windowStart = now - RATE_LIMIT_WINDOW;
55
+
56
+ // Clean up old entries
57
+ for (const [key, timestamps] of rateLimitStore.entries()) {
58
+ const recent = timestamps.filter(ts => ts > windowStart);
59
+ if (recent.length === 0) {
60
+ rateLimitStore.delete(key);
61
+ } else {
62
+ rateLimitStore.set(key, recent);
63
+ }
64
+ }
65
+
66
+ // Check current identifier
67
+ const timestamps = rateLimitStore.get(identifier) || [];
68
+ const recent = timestamps.filter(ts => ts > windowStart);
69
+
70
+ if (recent.length >= RATE_LIMIT_MAX_REQUESTS) {
71
+ return {
72
+ allowed: false,
73
+ remaining: 0,
74
+ resetAt: Math.min(...recent) + RATE_LIMIT_WINDOW
75
+ };
76
+ }
77
+
78
+ // Add current request
79
+ recent.push(now);
80
+ rateLimitStore.set(identifier, recent);
81
+
82
+ return {
83
+ allowed: true,
84
+ remaining: RATE_LIMIT_MAX_REQUESTS - recent.length,
85
+ resetAt: now + RATE_LIMIT_WINDOW
86
+ };
87
+ }
88
+
89
+ /**
90
+ * Get client identifier for rate limiting
91
+ */
92
+ function getClientIdentifier(req) {
93
+ // Try to get IP from various headers (Vercel, Cloudflare, etc.)
94
+ const forwarded = req.headers['x-forwarded-for'];
95
+ const realIp = req.headers['x-real-ip'];
96
+ const ip = forwarded?.split(',')[0] || realIp || req.socket?.remoteAddress || 'unknown';
97
+
98
+ // If API key is provided, use it as identifier (more accurate)
99
+ const apiKey = req.headers['x-api-key'] || req.headers['authorization']?.replace('Bearer ', '');
100
+ return apiKey || ip;
101
+ }
102
+
103
+ /**
104
+ * Check authentication
105
+ */
106
+ function checkAuth(req) {
107
+ if (!REQUIRE_AUTH || !API_KEY) {
108
+ return { authenticated: true };
109
+ }
110
+
111
+ // SECURITY: Only accept API key from headers, not request body
112
+ // API keys in request bodies are logged, visible in dev tools, and stored in history
113
+ const providedKey = req.headers['x-api-key'] ||
114
+ req.headers['authorization']?.replace('Bearer ', '');
115
+
116
+ if (!providedKey) {
117
+ return { authenticated: false, error: 'Authentication required. Provide API key via X-API-Key header or Authorization: Bearer <key>' };
118
+ }
119
+
120
+ if (providedKey !== API_KEY) {
121
+ return { authenticated: false, error: 'Invalid API key' };
122
+ }
123
+
124
+ return { authenticated: true };
125
+ }
126
+
127
+ export default async function handler(req, res) {
128
+ // Only allow POST
129
+ if (req.method !== 'POST') {
130
+ return res.status(405).json({ error: 'Method not allowed' });
131
+ }
132
+
133
+ // Check authentication
134
+ const authResult = checkAuth(req);
135
+ if (!authResult.authenticated) {
136
+ return res.status(401).json({ error: authResult.error });
137
+ }
138
+
139
+ // Check rate limit
140
+ const clientId = getClientIdentifier(req);
141
+ const rateLimit = checkRateLimit(clientId);
142
+ if (!rateLimit.allowed) {
143
+ res.setHeader('X-RateLimit-Limit', RATE_LIMIT_MAX_REQUESTS);
144
+ res.setHeader('X-RateLimit-Remaining', 0);
145
+ res.setHeader('X-RateLimit-Reset', new Date(rateLimit.resetAt).toISOString());
146
+ return res.status(429).json({
147
+ error: 'Rate limit exceeded',
148
+ retryAfter: Math.ceil((rateLimit.resetAt - Date.now()) / 1000)
149
+ });
150
+ }
151
+
152
+ // Set rate limit headers
153
+ res.setHeader('X-RateLimit-Limit', RATE_LIMIT_MAX_REQUESTS);
154
+ res.setHeader('X-RateLimit-Remaining', rateLimit.remaining);
155
+ res.setHeader('X-RateLimit-Reset', new Date(rateLimit.resetAt).toISOString());
156
+
157
+ try {
158
+ const { image, prompt, context = {} } = req.body;
159
+
160
+ // Validate input presence
161
+ if (!image) {
162
+ return res.status(400).json({ error: 'Missing image (base64 encoded)' });
163
+ }
164
+ if (!prompt) {
165
+ return res.status(400).json({ error: 'Missing prompt' });
166
+ }
167
+
168
+ // Validate input size
169
+ if (typeof image !== 'string' || image.length > MAX_IMAGE_SIZE) {
170
+ return res.status(400).json({ error: 'Image too large or invalid format' });
171
+ }
172
+ if (typeof prompt !== 'string' || prompt.length > MAX_PROMPT_LENGTH) {
173
+ return res.status(400).json({ error: 'Prompt too long' });
174
+ }
175
+ if (context && typeof context === 'object') {
176
+ const contextSize = JSON.stringify(context).length;
177
+ if (contextSize > MAX_CONTEXT_SIZE) {
178
+ return res.status(400).json({ error: 'Context too large' });
179
+ }
180
+ }
181
+
182
+ // Decode base64 image
183
+ // SECURITY: Whitelist specific MIME types to prevent unexpected formats
184
+ const validMimeTypes = ['image/png', 'image/jpeg', 'image/jpg', 'image/gif', 'image/webp'];
185
+ const mimeMatch = image.match(/^data:(image\/(?:png|jpeg|jpg|gif|webp));base64,/);
186
+ if (!mimeMatch) {
187
+ return res.status(400).json({ error: 'Invalid image MIME type. Supported: image/png, image/jpeg, image/jpg, image/gif, image/webp' });
188
+ }
189
+
190
+ let imageBuffer;
191
+ try {
192
+ const base64Data = image.replace(/^data:image\/(?:png|jpeg|jpg|gif|webp);base64,/, '');
193
+ imageBuffer = Buffer.from(base64Data, 'base64');
194
+
195
+ // Additional validation: check decoded buffer size matches expected
196
+ // Base64 encoding increases size by ~33%, so decoded should be smaller
197
+ const expectedMaxDecoded = Math.floor(MAX_IMAGE_SIZE * 0.75); // Conservative estimate
198
+ if (imageBuffer.length > expectedMaxDecoded) {
199
+ return res.status(400).json({ error: 'Decoded image exceeds maximum size' });
200
+ }
201
+ } catch (error) {
202
+ return res.status(400).json({ error: 'Invalid base64 image' });
203
+ }
204
+
205
+ // Save to temporary file with secure random name (prevents race conditions and information disclosure)
206
+ // SECURITY: Use cryptographically secure random suffix to prevent collisions
207
+ const randomSuffix = randomBytes(16).toString('hex');
208
+ const tempPath = join(tmpdir(), `vllm-validate-${randomSuffix}.png`);
209
+
210
+ // RESOURCE PROTECTION: File system operation is rate-limited by API rate limiting above
211
+ // This writeFileSync is bounded by:
212
+ // 1. Rate limiting (prevents too many concurrent operations)
213
+ // 2. Size limits (MAX_IMAGE_SIZE prevents large files)
214
+ // 3. Serverless timeout (function will timeout if operation takes too long)
215
+ writeFileSync(tempPath, imageBuffer);
216
+
217
+ try {
218
+ // Validate screenshot
219
+ const result = await validateScreenshot(tempPath, prompt, context);
220
+
221
+ // Clean up temp file
222
+ unlinkSync(tempPath);
223
+
224
+ // Normalize result structure before returning (ensures consistent API response)
225
+ const normalizedResult = normalizeValidationResult(result, 'api/validate');
226
+
227
+ // Return normalized result
228
+ return res.status(200).json(normalizedResult);
229
+ } catch (error) {
230
+ // Clean up temp file on error
231
+ try {
232
+ unlinkSync(tempPath);
233
+ } catch {}
234
+
235
+ throw error;
236
+ }
237
+ } catch (error) {
238
+ // Log full error for debugging (server-side only)
239
+ console.error('[VLLM API] Error:', error);
240
+
241
+ // Return sanitized error to client (don't leak internal details)
242
+ // Never expose: file paths, API keys, internal structure, stack traces
243
+ const sanitizedError = error instanceof Error
244
+ ? 'Validation failed. Please check your input and try again.'
245
+ : 'Validation failed';
246
+
247
+ return res.status(500).json({
248
+ error: sanitizedError
249
+ });
250
+ }
251
+ }
252
+