npm - @browsercash/chase - Versions diffs - 1.0.0 - Mend

@browsercash/chase 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/.claude/settings.local.json +14 -0
package/.dockerignore +34 -0
package/README.md +256 -0
package/api-1 (3).json +831 -0
package/dist/browser-cash.js +128 -0
package/dist/claude-runner.js +285 -0
package/dist/cli-install.js +104 -0
package/dist/cli.js +503 -0
package/dist/codegen/bash-generator.js +104 -0
package/dist/config.js +112 -0
package/dist/errors/error-classifier.js +351 -0
package/dist/hooks/capture-hook.js +57 -0
package/dist/index.js +180 -0
package/dist/iterative-tester.js +407 -0
package/dist/logger/command-log.js +38 -0
package/dist/prompts/agentic-prompt.js +78 -0
package/dist/prompts/fix-prompt.js +477 -0
package/dist/prompts/helpers.js +214 -0
package/dist/prompts/system-prompt.js +282 -0
package/dist/script-runner.js +429 -0
package/dist/server.js +1934 -0
package/dist/types/iteration-history.js +139 -0
package/openapi.json +1131 -0
package/package.json +44 -0

package/dist/config.js ADDED Viewed

@@ -0,0 +1,112 @@
+/**
+ * Infer validation thresholds from task description.
+ * Only validates fields the user explicitly asked for.
+ */
+export function inferValidationFromTask(taskDescription) {
+    const overrides = {};
+    // Detect what fields the user explicitly asked for
+    const mentionsRating = /\b(rating|star|review|score)\b/i.test(taskDescription);
+    const mentionsPrice = /\b(price|cost|\$|dollar|amount)\b/i.test(taskDescription);
+    // Only require fields the user explicitly asked for
+    // Default to not requiring ratings unless explicitly mentioned
+    if (!mentionsRating) {
+        overrides.requireRatings = false;
+        overrides.minRatingRate = 0;
+    }
+    // Only require prices if explicitly mentioned
+    if (!mentionsPrice) {
+        overrides.requirePrices = false;
+        overrides.minPriceRate = 0;
+    }
+    // "All items" tasks need higher minimum counts
+    if (/\ball\b/i.test(taskDescription)) {
+        overrides.minItemCount = 20;
+    }
+    // Tasks with specific counts (e.g., "top 100 products")
+    const countMatch = taskDescription.match(/(?:top|first|get|extract)\s+(\d+)/i);
+    if (countMatch) {
+        const requested = parseInt(countMatch[1], 10);
+        // Require at least 70% of requested count
+        overrides.minItemCount = Math.max(overrides.minItemCount || 1, Math.floor(requested * 0.7));
+    }
+    return overrides;
+}
+/**
+ * Get default validation thresholds.
+ */
+function getDefaultValidation() {
+    return {
+        minPriceRate: 0.9,
+        minRatingRate: 0.8,
+        minItemCount: 1,
+        requirePrices: true,
+        requireRatings: true,
+    };
+}
+/**
+ * Load validation thresholds from environment variables.
+ */
+function loadValidationFromEnv() {
+    const overrides = {};
+    if (process.env.MIN_PRICE_RATE !== undefined) {
+        overrides.minPriceRate = parseFloat(process.env.MIN_PRICE_RATE);
+    }
+    if (process.env.MIN_RATING_RATE !== undefined) {
+        overrides.minRatingRate = parseFloat(process.env.MIN_RATING_RATE);
+    }
+    if (process.env.MIN_ITEM_COUNT !== undefined) {
+        overrides.minItemCount = parseInt(process.env.MIN_ITEM_COUNT, 10);
+    }
+    if (process.env.REQUIRE_PRICES !== undefined) {
+        overrides.requirePrices = process.env.REQUIRE_PRICES.toLowerCase() === 'true';
+    }
+    if (process.env.REQUIRE_RATINGS !== undefined) {
+        overrides.requireRatings = process.env.REQUIRE_RATINGS.toLowerCase() === 'true';
+    }
+    return overrides;
+}
+/**
+ * Load configuration from environment variables with optional overrides.
+ *
+ * @param taskDescriptionOrOptions - Either a task description string (for backwards compatibility)
+ *                                   or an options object with cdpUrl and taskDescription
+ */
+export function loadConfig(taskDescriptionOrOptions) {
+    // Handle backwards compatibility: string arg = task description
+    let cdpUrlOverride;
+    let taskDescription;
+    if (typeof taskDescriptionOrOptions === 'string') {
+        taskDescription = taskDescriptionOrOptions;
+    }
+    else if (taskDescriptionOrOptions) {
+        cdpUrlOverride = taskDescriptionOrOptions.cdpUrl;
+        taskDescription = taskDescriptionOrOptions.taskDescription;
+    }
+    // CDP URL: prefer override, then env var
+    const cdpUrl = cdpUrlOverride || process.env.CDP_URL;
+    if (!cdpUrl) {
+        throw new Error('CDP_URL is required (pass via options.cdpUrl or set CDP_URL env var)');
+    }
+    // Build validation thresholds: defaults → env overrides → task inference
+    const defaultValidation = getDefaultValidation();
+    const envOverrides = loadValidationFromEnv();
+    const taskOverrides = taskDescription ? inferValidationFromTask(taskDescription) : {};
+    const validation = {
+        ...defaultValidation,
+        ...envOverrides,
+        ...taskOverrides,
+    };
+    return {
+        cdpUrl,
+        outputDir: process.env.OUTPUT_DIR || './generated',
+        sessionsDir: process.env.SESSIONS_DIR || './sessions',
+        maxTurns: parseInt(process.env.MAX_TURNS || '15', 10),
+        model: process.env.MODEL || 'claude-opus-4-5-20251101',
+        maxFixIterations: parseInt(process.env.MAX_FIX_ITERATIONS || '5', 10),
+        // Script execution timeout (default 5 minutes for complex multi-page tasks)
+        fixTimeout: parseInt(process.env.FIX_TIMEOUT || '300000', 10),
+        // Claude fix request timeout (default 5 minutes for generating fixes)
+        fixRequestTimeout: parseInt(process.env.FIX_REQUEST_TIMEOUT || '300000', 10),
+        validation,
+    };
+}

package/dist/errors/error-classifier.js ADDED Viewed

@@ -0,0 +1,351 @@
+/**
+ * Structured error classification for browser automation scripts.
+ * Replaces heuristic string patterns with categorized, confidence-scored errors.
+ */
+export var ErrorCategory;
+(function (ErrorCategory) {
+    ErrorCategory["CDP_CONNECTION"] = "CDP_CONNECTION";
+    ErrorCategory["NAVIGATION"] = "NAVIGATION";
+    ErrorCategory["SELECTOR_EMPTY"] = "SELECTOR_EMPTY";
+    ErrorCategory["SELECTOR_WRONG"] = "SELECTOR_WRONG";
+    ErrorCategory["DATA_QUALITY"] = "DATA_QUALITY";
+    ErrorCategory["EXTRACTION_INCOMPLETE"] = "EXTRACTION_INCOMPLETE";
+    ErrorCategory["JSON_PARSING"] = "JSON_PARSING";
+    ErrorCategory["JAVASCRIPT_ERROR"] = "JAVASCRIPT_ERROR";
+    ErrorCategory["BASH_ERROR"] = "BASH_ERROR";
+    ErrorCategory["TIMEOUT"] = "TIMEOUT";
+    ErrorCategory["ACCESS_DENIED"] = "ACCESS_DENIED";
+    ErrorCategory["UNKNOWN"] = "UNKNOWN";
+})(ErrorCategory || (ErrorCategory = {}));
+const CLASSIFICATION_RULES = [
+    // CDP Connection errors - highest priority
+    {
+        category: ErrorCategory.CDP_CONNECTION,
+        patterns: [
+            /Resource temporarily unavailable/i,
+            /os error 35/i,
+            /WebSocket.*(?:error|closed|failed)/i,
+            /ECONNREFUSED/i,
+            /connection closed/i,
+            /CDP.*(?:stale|unavailable|disconnected)/i,
+        ],
+        confidence: 0.95,
+        getMessage: () => 'Browser CDP connection lost or unavailable',
+        getSuggestedFix: () => 'Restart the browser and get a fresh CDP_URL',
+    },
+    // Timeout errors
+    {
+        category: ErrorCategory.TIMEOUT,
+        patterns: [
+            /timed?\s*out/i,
+            /timeout/i,
+            /exceeded.*time/i,
+        ],
+        confidence: 0.9,
+        getMessage: () => 'Script execution timed out',
+        getSuggestedFix: () => 'Increase timeout or optimize script. Check if page is loading slowly.',
+    },
+    // JSON parsing errors (double-encoded JSON)
+    {
+        category: ErrorCategory.JSON_PARSING,
+        patterns: [
+            /jq:\s*error/i,
+            /cannot be added/i,
+            /cannot be subtracted/i,
+            /Cannot iterate over string/i,
+            /parse error.*Invalid/i,
+        ],
+        confidence: 0.95,
+        getMessage: () => 'JSON parsing error - agent-browser eval returns double-encoded JSON',
+        getSuggestedFix: () => 'Add unwrap_json() helper and use: DATA=$(unwrap_json "$RAW_OUTPUT")',
+    },
+    // JavaScript errors
+    {
+        category: ErrorCategory.JAVASCRIPT_ERROR,
+        patterns: [
+            /SyntaxError:\s*(.+)/i,
+            /TypeError:\s*(.+)/i,
+            /ReferenceError:\s*(.+)/i,
+            /EvalError:\s*(.+)/i,
+        ],
+        confidence: 0.95,
+        getMessage: (match) => `JavaScript error: ${match?.[1] || 'unknown'}`,
+        getSuggestedFix: () => 'Check JavaScript syntax. Use single quotes around JS in bash to avoid escaping issues.',
+    },
+    // Bash errors
+    {
+        category: ErrorCategory.BASH_ERROR,
+        patterns: [
+            /integer expression expected/i,
+            /syntax error: operand expected/i,
+            /unbound variable/i,
+            /bad substitution/i,
+            /command not found/i,
+        ],
+        confidence: 0.9,
+        getMessage: (match) => `Bash error: ${match?.[0] || 'unknown'}`,
+        getSuggestedFix: () => 'Check bash syntax. Ensure variables are properly quoted.',
+    },
+    // Access denied
+    {
+        category: ErrorCategory.ACCESS_DENIED,
+        patterns: [
+            /Access Denied/i,
+            /403 Forbidden/i,
+            /401 Unauthorized/i,
+            /blocked by.*(?:captcha|cloudflare|bot)/i,
+        ],
+        confidence: 0.9,
+        getMessage: () => 'Access denied - page blocked access',
+        getSuggestedFix: () => 'The site may be blocking automated access. Try a different approach or use the existing browser session.',
+    },
+    // Navigation errors
+    {
+        category: ErrorCategory.NAVIGATION,
+        patterns: [
+            /net::ERR_/i,
+            /Navigation failed/i,
+            /page[\s-]?not[\s-]?found/i,
+            /error[\s:_-]*404/i,
+            /404[\s:_-]*(?:not[\s-]?found|error)/i,
+        ],
+        confidence: 0.85,
+        getMessage: () => 'Navigation error - page not found or failed to load',
+        getSuggestedFix: () => 'Check the URL is correct. Navigate via site menu instead of direct URL.',
+    },
+    // Selector returning zero items
+    {
+        category: ErrorCategory.SELECTOR_EMPTY,
+        patterns: [
+            /extracted\s*0/i,
+            /"totalExtracted":\s*0/i,
+            /No items extracted/i,
+            /returned empty results/i,
+            /^\s*\[\s*\]\s*$/m,
+        ],
+        confidence: 0.9,
+        getMessage: () => 'Selector returned zero items',
+        getSuggestedFix: () => 'The container selector matches nothing. Test selectors first with: agent-browser eval \'document.querySelectorAll("SELECTOR").length\'',
+    },
+    // Wrong selector (targeting ads/carousel)
+    {
+        category: ErrorCategory.SELECTOR_WRONG,
+        patterns: [
+            /WRONG_SELECTOR/i,
+            /targeting.*(?:carousel|ads|sticky)/i,
+            /same item.*(?:appears|multiple pages)/i,
+            /items didn't change between pages/i,
+        ],
+        confidence: 0.9,
+        getMessage: () => 'Selector targeting ads/carousel instead of main product grid',
+        getSuggestedFix: () => 'Find a selector that returns 20-50 items per page. Use findProductGrid() to discover the main container.',
+    },
+    // Incomplete extraction
+    {
+        category: ErrorCategory.EXTRACTION_INCOMPLETE,
+        patterns: [
+            /INCOMPLETE/i,
+            /Only\s+(\d+)\s+items.*(?:need|expected|incomplete)/i,
+            /Task requested\s+(\d+).*but only extracted\s+(\d+)/i,
+        ],
+        confidence: 0.85,
+        getMessage: (match, stdout) => {
+            const countMatch = stdout.match(/extracted\s+(\d+)/i) || stdout.match(/"totalExtracted":\s*(\d+)/);
+            const count = countMatch ? countMatch[1] : 'few';
+            return `Incomplete extraction - only ${count} items found`;
+        },
+        getSuggestedFix: () => 'Use scroll-and-accumulate pattern. Handle pagination. Ensure selector targets main grid.',
+    },
+    // Data quality issues
+    {
+        category: ErrorCategory.DATA_QUALITY,
+        patterns: [
+            /valid prices.*need/i,
+            /valid ratings.*need/i,
+            /DATA QUALITY ISSUES/i,
+            /N\/A.*(?:prices?|ratings?)/i,
+        ],
+        confidence: 0.85,
+        getMessage: (match, stdout) => {
+            if (/prices/i.test(stdout))
+                return 'Low price extraction rate';
+            if (/ratings/i.test(stdout))
+                return 'Low rating extraction rate';
+            return 'Data quality issues detected';
+        },
+        getSuggestedFix: () => 'Use universal helper functions (getPrice, getRating) that try multiple discovery methods.',
+        extractDetails: (_match, stdout) => {
+            const priceMatch = stdout.match(/(\d+)%.*valid prices/);
+            const ratingMatch = stdout.match(/(\d+)%.*valid ratings/);
+            return {
+                priceRate: priceMatch ? parseInt(priceMatch[1], 10) : undefined,
+                ratingRate: ratingMatch ? parseInt(ratingMatch[1], 10) : undefined,
+            };
+        },
+    },
+];
+/**
+ * Classify error(s) from script output.
+ * Returns all matching classifications sorted by confidence.
+ */
+export function classifyErrors(stdout, stderr, exitCode, timedOut) {
+    const errors = [];
+    const combined = stdout + '\n' + stderr;
+    // Handle timeout first
+    if (timedOut) {
+        errors.push({
+            category: ErrorCategory.TIMEOUT,
+            message: 'Script execution timed out',
+            confidence: 1.0,
+            suggestedFix: 'Increase timeout or optimize script',
+        });
+    }
+    // Check each rule
+    for (const rule of CLASSIFICATION_RULES) {
+        for (const pattern of rule.patterns) {
+            const match = combined.match(pattern);
+            if (match) {
+                errors.push({
+                    category: rule.category,
+                    message: rule.getMessage(match, stdout, stderr),
+                    confidence: rule.confidence,
+                    suggestedFix: rule.getSuggestedFix?.(),
+                    details: rule.extractDetails?.(match, stdout, stderr),
+                });
+                break; // Only one match per rule
+            }
+        }
+    }
+    // If no specific errors but non-zero exit code
+    if (errors.length === 0 && exitCode !== null && exitCode !== 0) {
+        errors.push({
+            category: ErrorCategory.UNKNOWN,
+            message: `Script exited with code ${exitCode}`,
+            confidence: 0.5,
+        });
+    }
+    // Sort by confidence (highest first)
+    errors.sort((a, b) => b.confidence - a.confidence);
+    // Deduplicate by category (keep highest confidence)
+    const seen = new Set();
+    return errors.filter(e => {
+        if (seen.has(e.category))
+            return false;
+        seen.add(e.category);
+        return true;
+    });
+}
+/**
+ * Get the primary (highest confidence) error classification.
+ */
+export function getPrimaryError(stdout, stderr, exitCode, timedOut) {
+    const errors = classifyErrors(stdout, stderr, exitCode, timedOut);
+    return errors.length > 0 ? errors[0] : null;
+}
+/**
+ * Format classified errors for display in prompts.
+ */
+export function formatClassifiedErrors(errors) {
+    if (errors.length === 0)
+        return '';
+    let output = '## Classified Errors\n\n';
+    for (const error of errors) {
+        output += `### ${error.category}\n`;
+        output += `**Issue:** ${error.message}\n`;
+        if (error.suggestedFix) {
+            output += `**Fix:** ${error.suggestedFix}\n`;
+        }
+        if (error.details) {
+            output += `**Details:** ${JSON.stringify(error.details)}\n`;
+        }
+        output += `**Confidence:** ${Math.round(error.confidence * 100)}%\n\n`;
+    }
+    return output;
+}
+/**
+ * Generate targeted guidance based on error classification.
+ */
+export function getGuidanceForError(error) {
+    switch (error.category) {
+        case ErrorCategory.CDP_CONNECTION:
+            return `
+## CDP CONNECTION LOST
+The browser CDP session is no longer available. This typically happens when:
+- The browser was closed
+- The CDP session timed out
+- Network issues interrupted the connection
+**Action Required:** Get a fresh CDP_URL and restart the script.
+`;
+        case ErrorCategory.JSON_PARSING:
+            return `
+## JSON PARSING ERROR (Double-Encoded JSON)
+agent-browser eval returns DOUBLE-ENCODED JSON. The output is a string containing JSON, not raw JSON.
+**Required Fix:**
+1. Add this helper at the TOP of your script:
+\`\`\`bash
+unwrap_json() {
+  echo "$1" | jq -r 'if type == "string" then fromjson else . end' 2>/dev/null || echo "$1"
+}
+\`\`\`
+2. Use it after EVERY agent-browser eval that returns JSON:
+\`\`\`bash
+RAW_DATA=$(agent-browser --cdp "$CDP" eval '...JSON.stringify...')
+DATA=$(unwrap_json "$RAW_DATA")
+\`\`\`
+`;
+        case ErrorCategory.SELECTOR_EMPTY:
+            return `
+## NO ITEMS EXTRACTED
+The container selector doesn't match any elements. Common causes:
+1. Wrong selector - test with: agent-browser eval 'document.querySelectorAll("SELECTOR").length'
+2. Page not fully loaded - add more sleep time
+3. JavaScript syntax error - check for escaping issues
+**Fix:** Use single quotes around JavaScript to avoid bash escaping:
+\`\`\`bash
+agent-browser --cdp "$CDP" eval '(function() { ... })();'
+\`\`\`
+`;
+        case ErrorCategory.SELECTOR_WRONG:
+            return `
+## WRONG SELECTOR (Targeting Ads/Carousel)
+Your selector finds items from sponsored/ads section, NOT the main product grid.
+**How to find the MAIN grid:**
+1. Use findProductGrid() to discover the container with most repeated children
+2. Test candidate selectors and pick the one with 20-50 items per page
+3. Verify items CHANGE after pagination - if same items appear, wrong selector
+`;
+        case ErrorCategory.DATA_QUALITY:
+            return `
+## DATA QUALITY ISSUES
+Price or rating extraction is failing for many items.
+**Use universal helper functions that try multiple discovery methods:**
+- getPrice(el) - tries schema.org, data attributes, ARIA, class patterns, text patterns
+- getRating(el) - tries schema.org, data attributes, ARIA labels, text patterns
+**NEVER return "N/A"** - return empty string if no data found.
+`;
+        case ErrorCategory.EXTRACTION_INCOMPLETE:
+            return `
+## INCOMPLETE EXTRACTION
+Only a portion of items were extracted. This often happens with:
+1. Lazy-loaded content that requires scrolling
+2. Pagination that wasn't fully handled
+3. Selector targeting a subset (ads/carousel) instead of main grid
+**Fix:** Use scroll-and-accumulate pattern and handle all pages.
+`;
+        default:
+            return error.suggestedFix ? `\n**Suggested fix:** ${error.suggestedFix}\n` : '';
+    }
+}

package/dist/hooks/capture-hook.js ADDED Viewed

@@ -0,0 +1,57 @@
+#!/usr/bin/env node
+/**
+ * PostToolUse hook for capturing agent-browser commands
+ *
+ * This hook is invoked by Claude Code after each Bash tool use.
+ * It filters for agent-browser commands and logs them to a session file.
+ *
+ * Input (stdin): JSON with tool_name, tool_input, tool_output, session_id
+ * Output (stdout): JSON with decision (allow/block) and optional reason
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+async function main() {
+    // Read input from stdin
+    let inputData = '';
+    for await (const chunk of process.stdin) {
+        inputData += chunk;
+    }
+    try {
+        const input = JSON.parse(inputData);
+        // Only process Bash tool calls
+        if (input.tool_name !== 'Bash') {
+            // Allow other tools to proceed
+            console.log(JSON.stringify({ decision: 'allow' }));
+            return;
+        }
+        const command = input.tool_input.command || '';
+        // Only log agent-browser commands
+        if (command.includes('agent-browser')) {
+            const sessionsDir = process.env.CLAUDE_GEN_SESSIONS_DIR || './sessions';
+            const sessionFile = path.join(sessionsDir, `${input.session_id}.jsonl`);
+            // Ensure sessions directory exists
+            if (!fs.existsSync(sessionsDir)) {
+                fs.mkdirSync(sessionsDir, { recursive: true });
+            }
+            // Determine if command was successful (exit code 0)
+            const output = input.tool_output || '';
+            const success = !output.includes('Error:') && !output.includes('error:');
+            const entry = {
+                timestamp: new Date().toISOString(),
+                command,
+                output: output.substring(0, 1000), // Truncate long outputs
+                success,
+            };
+            // Append to session file
+            fs.appendFileSync(sessionFile, JSON.stringify(entry) + '\n');
+        }
+        // Always allow the command to proceed
+        console.log(JSON.stringify({ decision: 'allow' }));
+    }
+    catch (error) {
+        // On error, still allow the command
+        console.error('Hook error:', error);
+        console.log(JSON.stringify({ decision: 'allow' }));
+    }
+}
+main();