@browsercash/chase 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/config.js ADDED
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Infer validation thresholds from task description.
3
+ * Only validates fields the user explicitly asked for.
4
+ */
5
+ export function inferValidationFromTask(taskDescription) {
6
+ const overrides = {};
7
+ // Detect what fields the user explicitly asked for
8
+ const mentionsRating = /\b(rating|star|review|score)\b/i.test(taskDescription);
9
+ const mentionsPrice = /\b(price|cost|\$|dollar|amount)\b/i.test(taskDescription);
10
+ // Only require fields the user explicitly asked for
11
+ // Default to not requiring ratings unless explicitly mentioned
12
+ if (!mentionsRating) {
13
+ overrides.requireRatings = false;
14
+ overrides.minRatingRate = 0;
15
+ }
16
+ // Only require prices if explicitly mentioned
17
+ if (!mentionsPrice) {
18
+ overrides.requirePrices = false;
19
+ overrides.minPriceRate = 0;
20
+ }
21
+ // "All items" tasks need higher minimum counts
22
+ if (/\ball\b/i.test(taskDescription)) {
23
+ overrides.minItemCount = 20;
24
+ }
25
+ // Tasks with specific counts (e.g., "top 100 products")
26
+ const countMatch = taskDescription.match(/(?:top|first|get|extract)\s+(\d+)/i);
27
+ if (countMatch) {
28
+ const requested = parseInt(countMatch[1], 10);
29
+ // Require at least 70% of requested count
30
+ overrides.minItemCount = Math.max(overrides.minItemCount || 1, Math.floor(requested * 0.7));
31
+ }
32
+ return overrides;
33
+ }
34
+ /**
35
+ * Get default validation thresholds.
36
+ */
37
+ function getDefaultValidation() {
38
+ return {
39
+ minPriceRate: 0.9,
40
+ minRatingRate: 0.8,
41
+ minItemCount: 1,
42
+ requirePrices: true,
43
+ requireRatings: true,
44
+ };
45
+ }
46
+ /**
47
+ * Load validation thresholds from environment variables.
48
+ */
49
+ function loadValidationFromEnv() {
50
+ const overrides = {};
51
+ if (process.env.MIN_PRICE_RATE !== undefined) {
52
+ overrides.minPriceRate = parseFloat(process.env.MIN_PRICE_RATE);
53
+ }
54
+ if (process.env.MIN_RATING_RATE !== undefined) {
55
+ overrides.minRatingRate = parseFloat(process.env.MIN_RATING_RATE);
56
+ }
57
+ if (process.env.MIN_ITEM_COUNT !== undefined) {
58
+ overrides.minItemCount = parseInt(process.env.MIN_ITEM_COUNT, 10);
59
+ }
60
+ if (process.env.REQUIRE_PRICES !== undefined) {
61
+ overrides.requirePrices = process.env.REQUIRE_PRICES.toLowerCase() === 'true';
62
+ }
63
+ if (process.env.REQUIRE_RATINGS !== undefined) {
64
+ overrides.requireRatings = process.env.REQUIRE_RATINGS.toLowerCase() === 'true';
65
+ }
66
+ return overrides;
67
+ }
68
+ /**
69
+ * Load configuration from environment variables with optional overrides.
70
+ *
71
+ * @param taskDescriptionOrOptions - Either a task description string (for backwards compatibility)
72
+ * or an options object with cdpUrl and taskDescription
73
+ */
74
+ export function loadConfig(taskDescriptionOrOptions) {
75
+ // Handle backwards compatibility: string arg = task description
76
+ let cdpUrlOverride;
77
+ let taskDescription;
78
+ if (typeof taskDescriptionOrOptions === 'string') {
79
+ taskDescription = taskDescriptionOrOptions;
80
+ }
81
+ else if (taskDescriptionOrOptions) {
82
+ cdpUrlOverride = taskDescriptionOrOptions.cdpUrl;
83
+ taskDescription = taskDescriptionOrOptions.taskDescription;
84
+ }
85
+ // CDP URL: prefer override, then env var
86
+ const cdpUrl = cdpUrlOverride || process.env.CDP_URL;
87
+ if (!cdpUrl) {
88
+ throw new Error('CDP_URL is required (pass via options.cdpUrl or set CDP_URL env var)');
89
+ }
90
+ // Build validation thresholds: defaults → env overrides → task inference
91
+ const defaultValidation = getDefaultValidation();
92
+ const envOverrides = loadValidationFromEnv();
93
+ const taskOverrides = taskDescription ? inferValidationFromTask(taskDescription) : {};
94
+ const validation = {
95
+ ...defaultValidation,
96
+ ...envOverrides,
97
+ ...taskOverrides,
98
+ };
99
+ return {
100
+ cdpUrl,
101
+ outputDir: process.env.OUTPUT_DIR || './generated',
102
+ sessionsDir: process.env.SESSIONS_DIR || './sessions',
103
+ maxTurns: parseInt(process.env.MAX_TURNS || '15', 10),
104
+ model: process.env.MODEL || 'claude-opus-4-5-20251101',
105
+ maxFixIterations: parseInt(process.env.MAX_FIX_ITERATIONS || '5', 10),
106
+ // Script execution timeout (default 5 minutes for complex multi-page tasks)
107
+ fixTimeout: parseInt(process.env.FIX_TIMEOUT || '300000', 10),
108
+ // Claude fix request timeout (default 5 minutes for generating fixes)
109
+ fixRequestTimeout: parseInt(process.env.FIX_REQUEST_TIMEOUT || '300000', 10),
110
+ validation,
111
+ };
112
+ }
@@ -0,0 +1,351 @@
1
+ /**
2
+ * Structured error classification for browser automation scripts.
3
+ * Replaces heuristic string patterns with categorized, confidence-scored errors.
4
+ */
5
+ export var ErrorCategory;
6
+ (function (ErrorCategory) {
7
+ ErrorCategory["CDP_CONNECTION"] = "CDP_CONNECTION";
8
+ ErrorCategory["NAVIGATION"] = "NAVIGATION";
9
+ ErrorCategory["SELECTOR_EMPTY"] = "SELECTOR_EMPTY";
10
+ ErrorCategory["SELECTOR_WRONG"] = "SELECTOR_WRONG";
11
+ ErrorCategory["DATA_QUALITY"] = "DATA_QUALITY";
12
+ ErrorCategory["EXTRACTION_INCOMPLETE"] = "EXTRACTION_INCOMPLETE";
13
+ ErrorCategory["JSON_PARSING"] = "JSON_PARSING";
14
+ ErrorCategory["JAVASCRIPT_ERROR"] = "JAVASCRIPT_ERROR";
15
+ ErrorCategory["BASH_ERROR"] = "BASH_ERROR";
16
+ ErrorCategory["TIMEOUT"] = "TIMEOUT";
17
+ ErrorCategory["ACCESS_DENIED"] = "ACCESS_DENIED";
18
+ ErrorCategory["UNKNOWN"] = "UNKNOWN";
19
+ })(ErrorCategory || (ErrorCategory = {}));
20
+ const CLASSIFICATION_RULES = [
21
+ // CDP Connection errors - highest priority
22
+ {
23
+ category: ErrorCategory.CDP_CONNECTION,
24
+ patterns: [
25
+ /Resource temporarily unavailable/i,
26
+ /os error 35/i,
27
+ /WebSocket.*(?:error|closed|failed)/i,
28
+ /ECONNREFUSED/i,
29
+ /connection closed/i,
30
+ /CDP.*(?:stale|unavailable|disconnected)/i,
31
+ ],
32
+ confidence: 0.95,
33
+ getMessage: () => 'Browser CDP connection lost or unavailable',
34
+ getSuggestedFix: () => 'Restart the browser and get a fresh CDP_URL',
35
+ },
36
+ // Timeout errors
37
+ {
38
+ category: ErrorCategory.TIMEOUT,
39
+ patterns: [
40
+ /timed?\s*out/i,
41
+ /timeout/i,
42
+ /exceeded.*time/i,
43
+ ],
44
+ confidence: 0.9,
45
+ getMessage: () => 'Script execution timed out',
46
+ getSuggestedFix: () => 'Increase timeout or optimize script. Check if page is loading slowly.',
47
+ },
48
+ // JSON parsing errors (double-encoded JSON)
49
+ {
50
+ category: ErrorCategory.JSON_PARSING,
51
+ patterns: [
52
+ /jq:\s*error/i,
53
+ /cannot be added/i,
54
+ /cannot be subtracted/i,
55
+ /Cannot iterate over string/i,
56
+ /parse error.*Invalid/i,
57
+ ],
58
+ confidence: 0.95,
59
+ getMessage: () => 'JSON parsing error - agent-browser eval returns double-encoded JSON',
60
+ getSuggestedFix: () => 'Add unwrap_json() helper and use: DATA=$(unwrap_json "$RAW_OUTPUT")',
61
+ },
62
+ // JavaScript errors
63
+ {
64
+ category: ErrorCategory.JAVASCRIPT_ERROR,
65
+ patterns: [
66
+ /SyntaxError:\s*(.+)/i,
67
+ /TypeError:\s*(.+)/i,
68
+ /ReferenceError:\s*(.+)/i,
69
+ /EvalError:\s*(.+)/i,
70
+ ],
71
+ confidence: 0.95,
72
+ getMessage: (match) => `JavaScript error: ${match?.[1] || 'unknown'}`,
73
+ getSuggestedFix: () => 'Check JavaScript syntax. Use single quotes around JS in bash to avoid escaping issues.',
74
+ },
75
+ // Bash errors
76
+ {
77
+ category: ErrorCategory.BASH_ERROR,
78
+ patterns: [
79
+ /integer expression expected/i,
80
+ /syntax error: operand expected/i,
81
+ /unbound variable/i,
82
+ /bad substitution/i,
83
+ /command not found/i,
84
+ ],
85
+ confidence: 0.9,
86
+ getMessage: (match) => `Bash error: ${match?.[0] || 'unknown'}`,
87
+ getSuggestedFix: () => 'Check bash syntax. Ensure variables are properly quoted.',
88
+ },
89
+ // Access denied
90
+ {
91
+ category: ErrorCategory.ACCESS_DENIED,
92
+ patterns: [
93
+ /Access Denied/i,
94
+ /403 Forbidden/i,
95
+ /401 Unauthorized/i,
96
+ /blocked by.*(?:captcha|cloudflare|bot)/i,
97
+ ],
98
+ confidence: 0.9,
99
+ getMessage: () => 'Access denied - page blocked access',
100
+ getSuggestedFix: () => 'The site may be blocking automated access. Try a different approach or use the existing browser session.',
101
+ },
102
+ // Navigation errors
103
+ {
104
+ category: ErrorCategory.NAVIGATION,
105
+ patterns: [
106
+ /net::ERR_/i,
107
+ /Navigation failed/i,
108
+ /page[\s-]?not[\s-]?found/i,
109
+ /error[\s:_-]*404/i,
110
+ /404[\s:_-]*(?:not[\s-]?found|error)/i,
111
+ ],
112
+ confidence: 0.85,
113
+ getMessage: () => 'Navigation error - page not found or failed to load',
114
+ getSuggestedFix: () => 'Check the URL is correct. Navigate via site menu instead of direct URL.',
115
+ },
116
+ // Selector returning zero items
117
+ {
118
+ category: ErrorCategory.SELECTOR_EMPTY,
119
+ patterns: [
120
+ /extracted\s*0/i,
121
+ /"totalExtracted":\s*0/i,
122
+ /No items extracted/i,
123
+ /returned empty results/i,
124
+ /^\s*\[\s*\]\s*$/m,
125
+ ],
126
+ confidence: 0.9,
127
+ getMessage: () => 'Selector returned zero items',
128
+ getSuggestedFix: () => 'The container selector matches nothing. Test selectors first with: agent-browser eval \'document.querySelectorAll("SELECTOR").length\'',
129
+ },
130
+ // Wrong selector (targeting ads/carousel)
131
+ {
132
+ category: ErrorCategory.SELECTOR_WRONG,
133
+ patterns: [
134
+ /WRONG_SELECTOR/i,
135
+ /targeting.*(?:carousel|ads|sticky)/i,
136
+ /same item.*(?:appears|multiple pages)/i,
137
+ /items didn't change between pages/i,
138
+ ],
139
+ confidence: 0.9,
140
+ getMessage: () => 'Selector targeting ads/carousel instead of main product grid',
141
+ getSuggestedFix: () => 'Find a selector that returns 20-50 items per page. Use findProductGrid() to discover the main container.',
142
+ },
143
+ // Incomplete extraction
144
+ {
145
+ category: ErrorCategory.EXTRACTION_INCOMPLETE,
146
+ patterns: [
147
+ /INCOMPLETE/i,
148
+ /Only\s+(\d+)\s+items.*(?:need|expected|incomplete)/i,
149
+ /Task requested\s+(\d+).*but only extracted\s+(\d+)/i,
150
+ ],
151
+ confidence: 0.85,
152
+ getMessage: (match, stdout) => {
153
+ const countMatch = stdout.match(/extracted\s+(\d+)/i) || stdout.match(/"totalExtracted":\s*(\d+)/);
154
+ const count = countMatch ? countMatch[1] : 'few';
155
+ return `Incomplete extraction - only ${count} items found`;
156
+ },
157
+ getSuggestedFix: () => 'Use scroll-and-accumulate pattern. Handle pagination. Ensure selector targets main grid.',
158
+ },
159
+ // Data quality issues
160
+ {
161
+ category: ErrorCategory.DATA_QUALITY,
162
+ patterns: [
163
+ /valid prices.*need/i,
164
+ /valid ratings.*need/i,
165
+ /DATA QUALITY ISSUES/i,
166
+ /N\/A.*(?:prices?|ratings?)/i,
167
+ ],
168
+ confidence: 0.85,
169
+ getMessage: (match, stdout) => {
170
+ if (/prices/i.test(stdout))
171
+ return 'Low price extraction rate';
172
+ if (/ratings/i.test(stdout))
173
+ return 'Low rating extraction rate';
174
+ return 'Data quality issues detected';
175
+ },
176
+ getSuggestedFix: () => 'Use universal helper functions (getPrice, getRating) that try multiple discovery methods.',
177
+ extractDetails: (_match, stdout) => {
178
+ const priceMatch = stdout.match(/(\d+)%.*valid prices/);
179
+ const ratingMatch = stdout.match(/(\d+)%.*valid ratings/);
180
+ return {
181
+ priceRate: priceMatch ? parseInt(priceMatch[1], 10) : undefined,
182
+ ratingRate: ratingMatch ? parseInt(ratingMatch[1], 10) : undefined,
183
+ };
184
+ },
185
+ },
186
+ ];
187
+ /**
188
+ * Classify error(s) from script output.
189
+ * Returns all matching classifications sorted by confidence.
190
+ */
191
+ export function classifyErrors(stdout, stderr, exitCode, timedOut) {
192
+ const errors = [];
193
+ const combined = stdout + '\n' + stderr;
194
+ // Handle timeout first
195
+ if (timedOut) {
196
+ errors.push({
197
+ category: ErrorCategory.TIMEOUT,
198
+ message: 'Script execution timed out',
199
+ confidence: 1.0,
200
+ suggestedFix: 'Increase timeout or optimize script',
201
+ });
202
+ }
203
+ // Check each rule
204
+ for (const rule of CLASSIFICATION_RULES) {
205
+ for (const pattern of rule.patterns) {
206
+ const match = combined.match(pattern);
207
+ if (match) {
208
+ errors.push({
209
+ category: rule.category,
210
+ message: rule.getMessage(match, stdout, stderr),
211
+ confidence: rule.confidence,
212
+ suggestedFix: rule.getSuggestedFix?.(),
213
+ details: rule.extractDetails?.(match, stdout, stderr),
214
+ });
215
+ break; // Only one match per rule
216
+ }
217
+ }
218
+ }
219
+ // If no specific errors but non-zero exit code
220
+ if (errors.length === 0 && exitCode !== null && exitCode !== 0) {
221
+ errors.push({
222
+ category: ErrorCategory.UNKNOWN,
223
+ message: `Script exited with code ${exitCode}`,
224
+ confidence: 0.5,
225
+ });
226
+ }
227
+ // Sort by confidence (highest first)
228
+ errors.sort((a, b) => b.confidence - a.confidence);
229
+ // Deduplicate by category (keep highest confidence)
230
+ const seen = new Set();
231
+ return errors.filter(e => {
232
+ if (seen.has(e.category))
233
+ return false;
234
+ seen.add(e.category);
235
+ return true;
236
+ });
237
+ }
238
+ /**
239
+ * Get the primary (highest confidence) error classification.
240
+ */
241
+ export function getPrimaryError(stdout, stderr, exitCode, timedOut) {
242
+ const errors = classifyErrors(stdout, stderr, exitCode, timedOut);
243
+ return errors.length > 0 ? errors[0] : null;
244
+ }
245
+ /**
246
+ * Format classified errors for display in prompts.
247
+ */
248
+ export function formatClassifiedErrors(errors) {
249
+ if (errors.length === 0)
250
+ return '';
251
+ let output = '## Classified Errors\n\n';
252
+ for (const error of errors) {
253
+ output += `### ${error.category}\n`;
254
+ output += `**Issue:** ${error.message}\n`;
255
+ if (error.suggestedFix) {
256
+ output += `**Fix:** ${error.suggestedFix}\n`;
257
+ }
258
+ if (error.details) {
259
+ output += `**Details:** ${JSON.stringify(error.details)}\n`;
260
+ }
261
+ output += `**Confidence:** ${Math.round(error.confidence * 100)}%\n\n`;
262
+ }
263
+ return output;
264
+ }
265
+ /**
266
+ * Generate targeted guidance based on error classification.
267
+ */
268
+ export function getGuidanceForError(error) {
269
+ switch (error.category) {
270
+ case ErrorCategory.CDP_CONNECTION:
271
+ return `
272
+ ## CDP CONNECTION LOST
273
+ The browser CDP session is no longer available. This typically happens when:
274
+ - The browser was closed
275
+ - The CDP session timed out
276
+ - Network issues interrupted the connection
277
+
278
+ **Action Required:** Get a fresh CDP_URL and restart the script.
279
+ `;
280
+ case ErrorCategory.JSON_PARSING:
281
+ return `
282
+ ## JSON PARSING ERROR (Double-Encoded JSON)
283
+
284
+ agent-browser eval returns DOUBLE-ENCODED JSON. The output is a string containing JSON, not raw JSON.
285
+
286
+ **Required Fix:**
287
+ 1. Add this helper at the TOP of your script:
288
+ \`\`\`bash
289
+ unwrap_json() {
290
+ echo "$1" | jq -r 'if type == "string" then fromjson else . end' 2>/dev/null || echo "$1"
291
+ }
292
+ \`\`\`
293
+
294
+ 2. Use it after EVERY agent-browser eval that returns JSON:
295
+ \`\`\`bash
296
+ RAW_DATA=$(agent-browser --cdp "$CDP" eval '...JSON.stringify...')
297
+ DATA=$(unwrap_json "$RAW_DATA")
298
+ \`\`\`
299
+ `;
300
+ case ErrorCategory.SELECTOR_EMPTY:
301
+ return `
302
+ ## NO ITEMS EXTRACTED
303
+
304
+ The container selector doesn't match any elements. Common causes:
305
+ 1. Wrong selector - test with: agent-browser eval 'document.querySelectorAll("SELECTOR").length'
306
+ 2. Page not fully loaded - add more sleep time
307
+ 3. JavaScript syntax error - check for escaping issues
308
+
309
+ **Fix:** Use single quotes around JavaScript to avoid bash escaping:
310
+ \`\`\`bash
311
+ agent-browser --cdp "$CDP" eval '(function() { ... })();'
312
+ \`\`\`
313
+ `;
314
+ case ErrorCategory.SELECTOR_WRONG:
315
+ return `
316
+ ## WRONG SELECTOR (Targeting Ads/Carousel)
317
+
318
+ Your selector finds items from sponsored/ads section, NOT the main product grid.
319
+
320
+ **How to find the MAIN grid:**
321
+ 1. Use findProductGrid() to discover the container with most repeated children
322
+ 2. Test candidate selectors and pick the one with 20-50 items per page
323
+ 3. Verify items CHANGE after pagination - if same items appear, wrong selector
324
+ `;
325
+ case ErrorCategory.DATA_QUALITY:
326
+ return `
327
+ ## DATA QUALITY ISSUES
328
+
329
+ Price or rating extraction is failing for many items.
330
+
331
+ **Use universal helper functions that try multiple discovery methods:**
332
+ - getPrice(el) - tries schema.org, data attributes, ARIA, class patterns, text patterns
333
+ - getRating(el) - tries schema.org, data attributes, ARIA labels, text patterns
334
+
335
+ **NEVER return "N/A"** - return empty string if no data found.
336
+ `;
337
+ case ErrorCategory.EXTRACTION_INCOMPLETE:
338
+ return `
339
+ ## INCOMPLETE EXTRACTION
340
+
341
+ Only a portion of items were extracted. This often happens with:
342
+ 1. Lazy-loaded content that requires scrolling
343
+ 2. Pagination that wasn't fully handled
344
+ 3. Selector targeting a subset (ads/carousel) instead of main grid
345
+
346
+ **Fix:** Use scroll-and-accumulate pattern and handle all pages.
347
+ `;
348
+ default:
349
+ return error.suggestedFix ? `\n**Suggested fix:** ${error.suggestedFix}\n` : '';
350
+ }
351
+ }
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * PostToolUse hook for capturing agent-browser commands
4
+ *
5
+ * This hook is invoked by Claude Code after each Bash tool use.
6
+ * It filters for agent-browser commands and logs them to a session file.
7
+ *
8
+ * Input (stdin): JSON with tool_name, tool_input, tool_output, session_id
9
+ * Output (stdout): JSON with decision (allow/block) and optional reason
10
+ */
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ async function main() {
14
+ // Read input from stdin
15
+ let inputData = '';
16
+ for await (const chunk of process.stdin) {
17
+ inputData += chunk;
18
+ }
19
+ try {
20
+ const input = JSON.parse(inputData);
21
+ // Only process Bash tool calls
22
+ if (input.tool_name !== 'Bash') {
23
+ // Allow other tools to proceed
24
+ console.log(JSON.stringify({ decision: 'allow' }));
25
+ return;
26
+ }
27
+ const command = input.tool_input.command || '';
28
+ // Only log agent-browser commands
29
+ if (command.includes('agent-browser')) {
30
+ const sessionsDir = process.env.CLAUDE_GEN_SESSIONS_DIR || './sessions';
31
+ const sessionFile = path.join(sessionsDir, `${input.session_id}.jsonl`);
32
+ // Ensure sessions directory exists
33
+ if (!fs.existsSync(sessionsDir)) {
34
+ fs.mkdirSync(sessionsDir, { recursive: true });
35
+ }
36
+ // Determine if command was successful (exit code 0)
37
+ const output = input.tool_output || '';
38
+ const success = !output.includes('Error:') && !output.includes('error:');
39
+ const entry = {
40
+ timestamp: new Date().toISOString(),
41
+ command,
42
+ output: output.substring(0, 1000), // Truncate long outputs
43
+ success,
44
+ };
45
+ // Append to session file
46
+ fs.appendFileSync(sessionFile, JSON.stringify(entry) + '\n');
47
+ }
48
+ // Always allow the command to proceed
49
+ console.log(JSON.stringify({ decision: 'allow' }));
50
+ }
51
+ catch (error) {
52
+ // On error, still allow the command
53
+ console.error('Hook error:', error);
54
+ console.log(JSON.stringify({ decision: 'allow' }));
55
+ }
56
+ }
57
+ main();