@browsercash/chase 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +14 -0
- package/.dockerignore +34 -0
- package/README.md +256 -0
- package/api-1 (3).json +831 -0
- package/dist/browser-cash.js +128 -0
- package/dist/claude-runner.js +285 -0
- package/dist/cli-install.js +104 -0
- package/dist/cli.js +503 -0
- package/dist/codegen/bash-generator.js +104 -0
- package/dist/config.js +112 -0
- package/dist/errors/error-classifier.js +351 -0
- package/dist/hooks/capture-hook.js +57 -0
- package/dist/index.js +180 -0
- package/dist/iterative-tester.js +407 -0
- package/dist/logger/command-log.js +38 -0
- package/dist/prompts/agentic-prompt.js +78 -0
- package/dist/prompts/fix-prompt.js +477 -0
- package/dist/prompts/helpers.js +214 -0
- package/dist/prompts/system-prompt.js +282 -0
- package/dist/script-runner.js +429 -0
- package/dist/server.js +1934 -0
- package/dist/types/iteration-history.js +139 -0
- package/openapi.json +1131 -0
- package/package.json +44 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System prompt for agentic mode - Claude performs the task directly
|
|
3
|
+
* and returns structured results instead of generating a reusable script.
|
|
4
|
+
*/
|
|
5
|
+
export function getAgenticPrompt(cdpUrl) {
|
|
6
|
+
return `You are a browser automation agent. Perform the requested task directly and return the results.
|
|
7
|
+
|
|
8
|
+
## Environment
|
|
9
|
+
CDP_URL: ${cdpUrl}
|
|
10
|
+
|
|
11
|
+
## Available Commands
|
|
12
|
+
- agent-browser --cdp "$CDP_URL" open "<url>" - Navigate to URL
|
|
13
|
+
- agent-browser --cdp "$CDP_URL" eval "<js>" - Run JavaScript and get result
|
|
14
|
+
- agent-browser --cdp "$CDP_URL" snapshot -i - Get page structure (for your understanding only)
|
|
15
|
+
|
|
16
|
+
## Workflow
|
|
17
|
+
1. Navigate to target website using the open command
|
|
18
|
+
2. Use snapshot to understand page structure if needed
|
|
19
|
+
3. Use eval to interact with the page and extract data
|
|
20
|
+
4. When scrolling is needed, use eval with window.scrollBy()
|
|
21
|
+
5. Return results as structured JSON
|
|
22
|
+
|
|
23
|
+
## Important Guidelines
|
|
24
|
+
|
|
25
|
+
**JavaScript Execution:**
|
|
26
|
+
- Use single quotes around JavaScript in eval commands
|
|
27
|
+
- Use double quotes for strings inside the JavaScript
|
|
28
|
+
- Avoid complex escaping - keep JavaScript simple
|
|
29
|
+
|
|
30
|
+
**Data Extraction:**
|
|
31
|
+
- Extract all requested data in a single pass when possible
|
|
32
|
+
- For large datasets, scroll and accumulate items
|
|
33
|
+
- Clean and validate data before returning
|
|
34
|
+
|
|
35
|
+
**Error Handling:**
|
|
36
|
+
- If a page fails to load, report the error
|
|
37
|
+
- If data cannot be found, explain what was attempted
|
|
38
|
+
- Always return a structured response
|
|
39
|
+
|
|
40
|
+
## Final Output Format
|
|
41
|
+
|
|
42
|
+
When you have completed the task, output your final results in this exact JSON format:
|
|
43
|
+
|
|
44
|
+
\`\`\`json
|
|
45
|
+
{
|
|
46
|
+
"success": true,
|
|
47
|
+
"data": {
|
|
48
|
+
// Your extracted data here - structure depends on the task
|
|
49
|
+
},
|
|
50
|
+
"summary": "Brief description of what was extracted"
|
|
51
|
+
}
|
|
52
|
+
\`\`\`
|
|
53
|
+
|
|
54
|
+
If the task fails, use this format:
|
|
55
|
+
|
|
56
|
+
\`\`\`json
|
|
57
|
+
{
|
|
58
|
+
"success": false,
|
|
59
|
+
"error": "Description of what went wrong",
|
|
60
|
+
"attempted": "Description of what was tried"
|
|
61
|
+
}
|
|
62
|
+
\`\`\`
|
|
63
|
+
|
|
64
|
+
## Example Task Flow
|
|
65
|
+
|
|
66
|
+
For a task like "Extract top 5 stories from Hacker News":
|
|
67
|
+
|
|
68
|
+
1. Navigate: \`agent-browser --cdp "$CDP_URL" open "https://news.ycombinator.com"\`
|
|
69
|
+
2. Wait for load: \`sleep 2\`
|
|
70
|
+
3. Extract data:
|
|
71
|
+
\`\`\`
|
|
72
|
+
agent-browser --cdp "$CDP_URL" eval 'JSON.stringify(Array.from(document.querySelectorAll(".athing")).slice(0, 5).map(function(el) { var titleEl = el.querySelector(".titleline > a"); var scoreEl = el.nextElementSibling?.querySelector(".score"); return { title: titleEl?.textContent || "", url: titleEl?.href || "", score: scoreEl?.textContent || "" }; }))'
|
|
73
|
+
\`\`\`
|
|
74
|
+
|
|
75
|
+
4. Output final JSON result
|
|
76
|
+
|
|
77
|
+
NOW: Perform the requested task and return the results.`;
|
|
78
|
+
}
|
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
import { getHelperReference } from './helpers.js';
|
|
2
|
+
import { formatIterationHistory } from '../types/iteration-history.js';
|
|
3
|
+
import { classifyErrors, getGuidanceForError, } from '../errors/error-classifier.js';
|
|
4
|
+
/**
|
|
5
|
+
* Generate a prompt for Claude to fix a failing script.
|
|
6
|
+
* Keep it simple and focused on the actual error.
|
|
7
|
+
*/
|
|
8
|
+
export function getFixPrompt(originalTask, scriptContent, errorOutput, failedLineNumber, cdpUrl, history) {
|
|
9
|
+
const cdpInfo = cdpUrl
|
|
10
|
+
? `\nCDP_URL is available: ${cdpUrl}\nYou can run agent-browser commands to inspect the live DOM.\n`
|
|
11
|
+
: '';
|
|
12
|
+
const failedLineInfo = failedLineNumber
|
|
13
|
+
? `\nThe script failed at approximately line ${failedLineNumber}.`
|
|
14
|
+
: '';
|
|
15
|
+
// Detect common issues
|
|
16
|
+
const hasZeroItems = errorOutput.includes('extracted 0') ||
|
|
17
|
+
errorOutput.includes('"totalExtracted": 0') ||
|
|
18
|
+
errorOutput.includes('No items extracted');
|
|
19
|
+
// Detect wrong selector issues (targeting ads/carousel instead of main grid)
|
|
20
|
+
const hasWrongSelector = errorOutput.includes('WRONG_SELECTOR') ||
|
|
21
|
+
errorOutput.includes('same item') ||
|
|
22
|
+
errorOutput.includes('targeting a carousel') ||
|
|
23
|
+
errorOutput.includes('targeting a sticky element') ||
|
|
24
|
+
(errorOutput.includes('Only') && errorOutput.includes('items') && errorOutput.includes('incomplete'));
|
|
25
|
+
// Detect low item count per page (sign of wrong selector)
|
|
26
|
+
const lowItemCounts = errorOutput.match(/Found (\d+) items on page/g);
|
|
27
|
+
const hasLowItemCounts = lowItemCounts && lowItemCounts.some(m => {
|
|
28
|
+
const count = parseInt(m.match(/(\d+)/)?.[1] || '100', 10);
|
|
29
|
+
return count < 10;
|
|
30
|
+
});
|
|
31
|
+
// Detect jq JSON parsing errors (double-encoded JSON issue)
|
|
32
|
+
const hasJqError = errorOutput.includes('jq: error') ||
|
|
33
|
+
errorOutput.includes('cannot be added') ||
|
|
34
|
+
errorOutput.includes('cannot be subtracted') ||
|
|
35
|
+
errorOutput.includes('Cannot iterate over string');
|
|
36
|
+
// Handle both regular and escaped JSON patterns
|
|
37
|
+
const hasEmptyPrices = (errorOutput.match(/"price":\s*""/g) || []).length > 5 ||
|
|
38
|
+
(errorOutput.match(/\\"price\\":\s*\\"\\"/g) || []).length > 5;
|
|
39
|
+
// Detect N/A prices and ratings (stricter validation)
|
|
40
|
+
// Check both regular JSON and escaped JSON patterns
|
|
41
|
+
const naPricePattern = /"price":\s*"(?:N\/A|n\/a|TBD|)"/g;
|
|
42
|
+
const escapedNaPricePattern = /\\"price\\":\s*\\"(?:N\/A|n\/a|TBD|)\\"/g;
|
|
43
|
+
const hasInvalidPrices = (errorOutput.match(naPricePattern) || []).length > 5 ||
|
|
44
|
+
(errorOutput.match(escapedNaPricePattern) || []).length > 5 ||
|
|
45
|
+
errorOutput.includes('have valid prices (need 50%+)');
|
|
46
|
+
const naRatingPattern = /"rating":\s*"(?:N\/A|n\/a|TBD|)"/g;
|
|
47
|
+
const escapedNaRatingPattern = /\\"rating\\":\s*\\"(?:N\/A|n\/a|TBD|)\\"/g;
|
|
48
|
+
const hasInvalidRatings = (errorOutput.match(naRatingPattern) || []).length > 5 ||
|
|
49
|
+
(errorOutput.match(escapedNaRatingPattern) || []).length > 5 ||
|
|
50
|
+
errorOutput.includes('have valid ratings (need 30%+)');
|
|
51
|
+
const is404Error = errorOutput.toLowerCase().includes('page not found') ||
|
|
52
|
+
errorOutput.toLowerCase().includes('404');
|
|
53
|
+
let guidance = '';
|
|
54
|
+
if (hasJqError) {
|
|
55
|
+
guidance += `
|
|
56
|
+
## CRITICAL: JSON PARSING ERROR (jq cannot process output)
|
|
57
|
+
|
|
58
|
+
The script is trying to combine incompatible JSON types. This happens because agent-browser eval returns DOUBLE-ENCODED JSON (a string containing JSON, not raw JSON).
|
|
59
|
+
|
|
60
|
+
**The Problem:**
|
|
61
|
+
When you do: DATA=$(agent-browser --cdp "$CDP" eval '...return JSON.stringify(items)...')
|
|
62
|
+
The output is a STRING like: "[{\\"name\\":\\"foo\\"}]"
|
|
63
|
+
NOT an array like: [{"name":"foo"}]
|
|
64
|
+
|
|
65
|
+
**The Fix - Add this unwrap_json helper at the TOP of your script:**
|
|
66
|
+
|
|
67
|
+
\`\`\`bash
|
|
68
|
+
# REQUIRED: Add after CDP= line
|
|
69
|
+
unwrap_json() {
|
|
70
|
+
echo "$1" | jq -r 'if type == "string" then fromjson else . end' 2>/dev/null || echo "$1"
|
|
71
|
+
}
|
|
72
|
+
\`\`\`
|
|
73
|
+
|
|
74
|
+
**Then use it after EVERY agent-browser eval that returns JSON:**
|
|
75
|
+
|
|
76
|
+
\`\`\`bash
|
|
77
|
+
# WRONG:
|
|
78
|
+
DATA=$(agent-browser --cdp "$CDP" eval '...JSON.stringify...')
|
|
79
|
+
|
|
80
|
+
# CORRECT:
|
|
81
|
+
RAW_DATA=$(agent-browser --cdp "$CDP" eval '...JSON.stringify...')
|
|
82
|
+
DATA=$(unwrap_json "$RAW_DATA")
|
|
83
|
+
\`\`\`
|
|
84
|
+
|
|
85
|
+
This MUST be fixed or jq operations will always fail.
|
|
86
|
+
`;
|
|
87
|
+
}
|
|
88
|
+
if (hasWrongSelector || hasLowItemCounts) {
|
|
89
|
+
guidance += `
|
|
90
|
+
## CRITICAL: WRONG SELECTOR (Targeting Ads/Carousel Instead of Main Grid)
|
|
91
|
+
|
|
92
|
+
Your selector is finding items from a sponsored/ads carousel or sidebar, NOT the main product grid.
|
|
93
|
+
Evidence: Same item appearing on multiple pages, or very few items (< 10) per page.
|
|
94
|
+
|
|
95
|
+
**The Problem:**
|
|
96
|
+
E-commerce pages have multiple product containers:
|
|
97
|
+
1. Sponsored/ad carousels (small, sticky, don't change with pagination)
|
|
98
|
+
2. Main product grid (20-50 items per page, changes with pagination)
|
|
99
|
+
|
|
100
|
+
**How to Fix - Use Universal Discovery:**
|
|
101
|
+
|
|
102
|
+
1. **Use findProductGrid() to find the main container:**
|
|
103
|
+
\`\`\`javascript
|
|
104
|
+
function findProductGrid() {
|
|
105
|
+
var best = null;
|
|
106
|
+
var containers = document.querySelectorAll("main, section, [role=main], div");
|
|
107
|
+
for (var i = 0; i < containers.length; i++) {
|
|
108
|
+
var c = containers[i];
|
|
109
|
+
var children = c.children;
|
|
110
|
+
if (children.length < 15) continue;
|
|
111
|
+
var firstTag = children[0] ? children[0].tagName : null;
|
|
112
|
+
if (!firstTag) continue;
|
|
113
|
+
var sameCount = 0;
|
|
114
|
+
for (var j = 0; j < children.length; j++) {
|
|
115
|
+
if (children[j].tagName === firstTag) sameCount++;
|
|
116
|
+
}
|
|
117
|
+
if (sameCount >= 15 && (!best || sameCount > best.count)) {
|
|
118
|
+
best = { el: c, count: sameCount };
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return best ? best.el : null;
|
|
122
|
+
}
|
|
123
|
+
\`\`\`
|
|
124
|
+
|
|
125
|
+
2. **Try universal semantic selectors (test each, pick the one with MOST items 20-50):**
|
|
126
|
+
- Schema.org: \`[itemtype*="Product"]\`
|
|
127
|
+
- ARIA: \`[role="listitem"]\`
|
|
128
|
+
- Data attributes: \`[data-testid*="product"]\`, \`[data-automation-id*="product"]\`
|
|
129
|
+
- Structural: \`[class*="product-card"]\`, \`[class*="search-result"]\`
|
|
130
|
+
|
|
131
|
+
3. **Verify it's not ads** - check if items have "sponsored" class or are position:fixed
|
|
132
|
+
|
|
133
|
+
4. **After pagination, verify items CHANGED** - if same items appear, wrong selector
|
|
134
|
+
`;
|
|
135
|
+
}
|
|
136
|
+
// Detect fragile site-specific selectors (auto-generated class names like .w_V_DM, .a2_x4)
|
|
137
|
+
const hasFragileSelectors = /\.\w{1,3}_[A-Za-z0-9]{2,}/.test(scriptContent) ||
|
|
138
|
+
/\.a-offscreen/.test(scriptContent) ||
|
|
139
|
+
/span\.a-icon-alt/.test(scriptContent);
|
|
140
|
+
if (hasFragileSelectors) {
|
|
141
|
+
guidance += `
|
|
142
|
+
## WARNING: FRAGILE SITE-SPECIFIC SELECTORS DETECTED
|
|
143
|
+
|
|
144
|
+
Your script uses auto-generated class names (like \`.w_V_DM\`, \`.a-offscreen\`) that are:
|
|
145
|
+
- Specific to one site and can change without notice
|
|
146
|
+
- Not portable to other e-commerce sites
|
|
147
|
+
- Prone to breaking when the site updates
|
|
148
|
+
|
|
149
|
+
**Replace with Universal Selectors:**
|
|
150
|
+
|
|
151
|
+
Instead of site-specific classes, use:
|
|
152
|
+
1. Schema.org: \`[itemprop="price"]\`, \`[itemprop="name"]\`, \`[itemprop="ratingValue"]\`
|
|
153
|
+
2. ARIA: \`[aria-label*="price"]\`, \`[aria-label*="rating"]\`
|
|
154
|
+
3. Data attributes: \`[data-price]\`, \`[data-rating]\`, \`[data-value]\`
|
|
155
|
+
4. Text patterns: Extract from innerText using regex for currency/rating patterns
|
|
156
|
+
|
|
157
|
+
**Use the universal helper functions (getPrice, getRating, getName) that try multiple discovery methods.**
|
|
158
|
+
`;
|
|
159
|
+
}
|
|
160
|
+
if (hasZeroItems) {
|
|
161
|
+
guidance += `
|
|
162
|
+
## CRITICAL: NO ITEMS EXTRACTED
|
|
163
|
+
|
|
164
|
+
The script is extracting 0 items. Common causes:
|
|
165
|
+
|
|
166
|
+
1. **Wrong selector** - The container selector doesn't match any elements
|
|
167
|
+
Fix: Run this to find the right selector:
|
|
168
|
+
agent-browser --cdp "$CDP" eval 'document.querySelectorAll("[data-component-type]").length'
|
|
169
|
+
|
|
170
|
+
2. **JavaScript error** - Syntax error in the extraction code
|
|
171
|
+
Fix: Test the JavaScript in browser console first
|
|
172
|
+
|
|
173
|
+
3. **Timing issue** - Page not fully loaded
|
|
174
|
+
Fix: Add more sleep time or wait for elements
|
|
175
|
+
|
|
176
|
+
4. **Wrong quotes** - Using double quotes around JavaScript causes bash escaping issues
|
|
177
|
+
Fix: Use single quotes around the JavaScript in eval command
|
|
178
|
+
|
|
179
|
+
IMPORTANT: Use SINGLE QUOTES around JavaScript to avoid escaping issues:
|
|
180
|
+
agent-browser --cdp "$CDP" eval '(function() { ... })();'
|
|
181
|
+
`;
|
|
182
|
+
}
|
|
183
|
+
if (hasEmptyPrices) {
|
|
184
|
+
guidance += `
|
|
185
|
+
## EMPTY PRICES DETECTED
|
|
186
|
+
|
|
187
|
+
Many items have empty prices. Use the universal getPrice() function that tries multiple discovery methods:
|
|
188
|
+
|
|
189
|
+
1. Schema.org: \`[itemprop="price"]\` with content attribute or textContent
|
|
190
|
+
2. Data attributes: \`[data-price]\`, \`[data-automation-id*="price"]\`
|
|
191
|
+
3. ARIA labels: \`[aria-label*="price"]\`
|
|
192
|
+
4. Class patterns: \`[class*="price"]\` (excluding crossed-out/original prices)
|
|
193
|
+
5. Text patterns: Currency symbols followed by numbers
|
|
194
|
+
|
|
195
|
+
Make sure you're using the universal getPrice() helper function in your extraction code.
|
|
196
|
+
`;
|
|
197
|
+
}
|
|
198
|
+
if (hasInvalidPrices) {
|
|
199
|
+
guidance += `
|
|
200
|
+
## DATA QUALITY ISSUE: MANY PRICES ARE N/A OR MISSING
|
|
201
|
+
|
|
202
|
+
Your price selector isn't finding ALL prices. Use the universal getPrice() function that tries multiple discovery methods.
|
|
203
|
+
|
|
204
|
+
1. **First, inspect how prices appear in the DOM:**
|
|
205
|
+
agent-browser --cdp "$CDP" eval 'var el=document.querySelector("YOUR_CONTAINER_SELECTOR");var html=el?.innerHTML||"";JSON.stringify({hasItemprop:html.includes("itemprop"),hasDataPrice:html.includes("data-price"),hasPriceClass:html.includes("price"),sample:html.substring(0,1000)})'
|
|
206
|
+
|
|
207
|
+
2. **Use the universal getPrice function (tries multiple discovery methods):**
|
|
208
|
+
\`\`\`javascript
|
|
209
|
+
function getPrice(el) {
|
|
210
|
+
// 1. Schema.org markup
|
|
211
|
+
var schema = el.querySelector("[itemprop=price]");
|
|
212
|
+
if (schema) {
|
|
213
|
+
var val = schema.getAttribute("content") || schema.textContent;
|
|
214
|
+
if (val && /\\d/.test(val)) return val.trim();
|
|
215
|
+
}
|
|
216
|
+
// 2. Data attributes
|
|
217
|
+
var dataPrice = el.querySelector("[data-price], [data-automation-id*=price]");
|
|
218
|
+
if (dataPrice) {
|
|
219
|
+
var val2 = dataPrice.getAttribute("data-price") || dataPrice.textContent;
|
|
220
|
+
if (val2 && /\\d/.test(val2)) return val2.trim();
|
|
221
|
+
}
|
|
222
|
+
// 3. ARIA labels with price
|
|
223
|
+
var ariaPrice = el.querySelector("[aria-label*=price]");
|
|
224
|
+
if (ariaPrice) {
|
|
225
|
+
var label = ariaPrice.getAttribute("aria-label") || "";
|
|
226
|
+
var m = label.match(/[\\$\\u00A3\\u20AC]\\s*[\\d,.]+/);
|
|
227
|
+
if (m) return m[0].trim();
|
|
228
|
+
}
|
|
229
|
+
// 4. Common price class patterns
|
|
230
|
+
var priceEl = el.querySelector("[class*=price]:not([class*=crossed]):not([class*=was])");
|
|
231
|
+
if (priceEl && /[\\$\\u00A3\\u20AC]/.test(priceEl.textContent)) {
|
|
232
|
+
var m2 = priceEl.textContent.match(/[\\$\\u00A3\\u20AC]\\s*[\\d,.]+/);
|
|
233
|
+
if (m2) return m2[0].trim();
|
|
234
|
+
}
|
|
235
|
+
// 5. Text pattern fallback
|
|
236
|
+
var text = el.innerText || "";
|
|
237
|
+
var match = text.match(/(?:[\\$\\u00A3\\u20AC]|USD|CAD|EUR|GBP)\\s*[\\d,.]+/i);
|
|
238
|
+
return match ? match[0].trim() : "";
|
|
239
|
+
}
|
|
240
|
+
\`\`\`
|
|
241
|
+
|
|
242
|
+
3. **NEVER return "N/A"** - return empty string if no price found
|
|
243
|
+
`;
|
|
244
|
+
}
|
|
245
|
+
if (hasInvalidRatings) {
|
|
246
|
+
guidance += `
|
|
247
|
+
## DATA QUALITY ISSUE: MANY RATINGS ARE N/A OR MISSING
|
|
248
|
+
|
|
249
|
+
Your rating selector isn't finding the actual ratings. Use the universal getRating() function that tries multiple discovery methods.
|
|
250
|
+
|
|
251
|
+
1. **First, inspect how ratings appear in the DOM:**
|
|
252
|
+
agent-browser --cdp "$CDP" eval 'var el=document.querySelector("YOUR_CONTAINER_SELECTOR");var html=el?.innerHTML||"";JSON.stringify({hasItemprop:html.includes("ratingValue"),hasDataRating:html.includes("data-rating")||html.includes("data-value"),hasAriaLabel:html.includes("aria-label"),sample:html.substring(0,1000)})'
|
|
253
|
+
|
|
254
|
+
2. **Use the universal getRating function (tries multiple discovery methods):**
|
|
255
|
+
\`\`\`javascript
|
|
256
|
+
function getRating(el) {
|
|
257
|
+
// 1. Schema.org markup
|
|
258
|
+
var schema = el.querySelector("[itemprop=ratingValue]");
|
|
259
|
+
if (schema) {
|
|
260
|
+
var val = schema.getAttribute("content") || schema.textContent;
|
|
261
|
+
if (val && /\\d/.test(val)) return val.trim();
|
|
262
|
+
}
|
|
263
|
+
// 2. Data attributes (data-rating, data-value)
|
|
264
|
+
var dataRating = el.querySelector("[data-rating], [data-value]");
|
|
265
|
+
if (dataRating) {
|
|
266
|
+
var val2 = dataRating.getAttribute("data-rating") || dataRating.getAttribute("data-value");
|
|
267
|
+
if (val2 && /^\\d+\\.?\\d*$/.test(val2)) return val2;
|
|
268
|
+
}
|
|
269
|
+
// 3. ARIA labels ("4.5 out of 5 stars", "4.5 stars")
|
|
270
|
+
var ariaEls = el.querySelectorAll("[aria-label]");
|
|
271
|
+
for (var i = 0; i < ariaEls.length; i++) {
|
|
272
|
+
var label = ariaEls[i].getAttribute("aria-label") || "";
|
|
273
|
+
var m = label.match(/(\\d+\\.?\\d*)\\s*(?:out of|stars?)/i);
|
|
274
|
+
if (m) return m[1];
|
|
275
|
+
}
|
|
276
|
+
// 4. Text pattern ("4.5 out of 5")
|
|
277
|
+
var text = el.innerText || "";
|
|
278
|
+
var m2 = text.match(/(\\d+\\.?\\d*)\\s*out\\s*of\\s*5/i);
|
|
279
|
+
if (m2) return m2[1];
|
|
280
|
+
return "";
|
|
281
|
+
}
|
|
282
|
+
\`\`\`
|
|
283
|
+
|
|
284
|
+
3. **NEVER return "N/A" as a fallback** - return empty string if no rating found
|
|
285
|
+
`;
|
|
286
|
+
}
|
|
287
|
+
if (is404Error) {
|
|
288
|
+
guidance += `
|
|
289
|
+
## 404 ERROR DETECTED
|
|
290
|
+
|
|
291
|
+
The URL doesn't exist. Try:
|
|
292
|
+
1. Navigate via site menu instead of direct URL
|
|
293
|
+
2. Use a different URL pattern
|
|
294
|
+
3. Search from homepage
|
|
295
|
+
`;
|
|
296
|
+
}
|
|
297
|
+
// Use error classifier for structured error analysis
|
|
298
|
+
const classifiedErrors = classifyErrors(errorOutput, '', null, false);
|
|
299
|
+
let classifiedGuidance = '';
|
|
300
|
+
if (classifiedErrors.length > 0) {
|
|
301
|
+
// Add targeted guidance for the primary error
|
|
302
|
+
classifiedGuidance = getGuidanceForError(classifiedErrors[0]);
|
|
303
|
+
}
|
|
304
|
+
// Format iteration history if available
|
|
305
|
+
const historySection = history ? formatIterationHistory(history) : '';
|
|
306
|
+
return `Fix this browser automation script.
|
|
307
|
+
${cdpInfo}
|
|
308
|
+
## Original Task
|
|
309
|
+
${originalTask}
|
|
310
|
+
${historySection}
|
|
311
|
+
## Script That Failed
|
|
312
|
+
\`\`\`bash
|
|
313
|
+
${scriptContent}
|
|
314
|
+
\`\`\`
|
|
315
|
+
|
|
316
|
+
## Error Output
|
|
317
|
+
\`\`\`
|
|
318
|
+
${errorOutput}
|
|
319
|
+
\`\`\`
|
|
320
|
+
${failedLineInfo}
|
|
321
|
+
${classifiedGuidance}
|
|
322
|
+
${guidance}
|
|
323
|
+
|
|
324
|
+
## Key Fix Tips
|
|
325
|
+
|
|
326
|
+
1. **Use SINGLE quotes around JavaScript** (not double quotes):
|
|
327
|
+
Good: agent-browser --cdp "$CDP" eval '(function() { ... })();'
|
|
328
|
+
Bad: agent-browser --cdp "$CDP" eval "(function() { ... })();"
|
|
329
|
+
|
|
330
|
+
2. **Avoid dollar signs in regex** - use CSS selectors instead
|
|
331
|
+
|
|
332
|
+
3. **Test selectors first**:
|
|
333
|
+
agent-browser --cdp "$CDP" eval 'document.querySelectorAll("SELECTOR").length'
|
|
334
|
+
|
|
335
|
+
4. ${getHelperReference()}
|
|
336
|
+
|
|
337
|
+
5. **Avoid fragile site-specific selectors** like .a-offscreen, .w_V_DM - they break when sites update
|
|
338
|
+
|
|
339
|
+
IMPORTANT: You MUST output a complete, working bash script in a code block. Do not just explain - output the actual fixed script.
|
|
340
|
+
|
|
341
|
+
\`\`\`bash
|
|
342
|
+
#!/bin/bash
|
|
343
|
+
set -e
|
|
344
|
+
CDP="\${CDP_URL:?Required}"
|
|
345
|
+
|
|
346
|
+
# Your complete fixed script here - include ALL code, not just the changed parts
|
|
347
|
+
\`\`\`
|
|
348
|
+
|
|
349
|
+
After outputting the script, do not add any more text.`;
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Parse the fixed script from Claude's response
|
|
353
|
+
* Uses multiple patterns to handle variations in Claude's output format
|
|
354
|
+
*/
|
|
355
|
+
export function parseFixedScript(response) {
|
|
356
|
+
const candidates = [];
|
|
357
|
+
// Helper to score a script candidate
|
|
358
|
+
const scoreScript = (content) => {
|
|
359
|
+
let score = 0;
|
|
360
|
+
if (content.includes('#!/bin/bash'))
|
|
361
|
+
score += 100;
|
|
362
|
+
if (content.includes('CDP='))
|
|
363
|
+
score += 50;
|
|
364
|
+
if (content.includes('agent-browser --cdp'))
|
|
365
|
+
score += 30;
|
|
366
|
+
if (content.includes('open "http'))
|
|
367
|
+
score += 20;
|
|
368
|
+
if (content.includes('eval '))
|
|
369
|
+
score += 20;
|
|
370
|
+
if (content.includes('echo "'))
|
|
371
|
+
score += 10;
|
|
372
|
+
if (content.includes('DATA=$(') || content.includes('RAW_DATA=$('))
|
|
373
|
+
score += 20;
|
|
374
|
+
if (content.includes('totalExtracted'))
|
|
375
|
+
score += 15;
|
|
376
|
+
if (content.includes('unwrap_json'))
|
|
377
|
+
score += 25; // Bonus for using the helper
|
|
378
|
+
// Longer scripts are usually more complete
|
|
379
|
+
score += Math.min(content.length / 100, 50);
|
|
380
|
+
return score;
|
|
381
|
+
};
|
|
382
|
+
// Helper to ensure shebang is present
|
|
383
|
+
const ensureShebang = (content) => {
|
|
384
|
+
if (!content.startsWith('#!/')) {
|
|
385
|
+
return '#!/bin/bash\nset -e\n\nCDP="${CDP_URL:?Required}"\n\n' + content;
|
|
386
|
+
}
|
|
387
|
+
return content;
|
|
388
|
+
};
|
|
389
|
+
// Pattern 1: Standard code blocks with language specifier (case insensitive, flexible whitespace)
|
|
390
|
+
const codeBlockPatterns = [
|
|
391
|
+
/```(?:bash|sh|shell|Bash|Shell|BASH|SH)\s*\n([\s\S]*?)```/g, // With newline
|
|
392
|
+
/```(?:bash|sh|shell|Bash|Shell|BASH|SH)\s*([\s\S]*?)```/g, // Without newline
|
|
393
|
+
];
|
|
394
|
+
for (const pattern of codeBlockPatterns) {
|
|
395
|
+
let match;
|
|
396
|
+
while ((match = pattern.exec(response)) !== null) {
|
|
397
|
+
const content = match[1].trim();
|
|
398
|
+
if (!content.includes('agent-browser'))
|
|
399
|
+
continue;
|
|
400
|
+
candidates.push({ content, score: scoreScript(content) });
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
// Pattern 2: Generic code blocks (no language specifier)
|
|
404
|
+
const genericPatterns = [
|
|
405
|
+
/```\n([\s\S]*?)```/g,
|
|
406
|
+
/```\s*([\s\S]*?)```/g,
|
|
407
|
+
];
|
|
408
|
+
for (const pattern of genericPatterns) {
|
|
409
|
+
let match;
|
|
410
|
+
while ((match = pattern.exec(response)) !== null) {
|
|
411
|
+
const content = match[1].trim();
|
|
412
|
+
if (content.includes('agent-browser') && (content.includes('eval') || content.includes('#!/bin/bash'))) {
|
|
413
|
+
candidates.push({ content, score: scoreScript(content) });
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
// Sort by score and pick the best
|
|
418
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
419
|
+
if (candidates.length > 0) {
|
|
420
|
+
return ensureShebang(candidates[0].content);
|
|
421
|
+
}
|
|
422
|
+
// Fallback 1: If no code block, check if response starts with shebang
|
|
423
|
+
if (response.trim().startsWith('#!/bin/bash')) {
|
|
424
|
+
// Find where the script ends (next code block marker or end of content)
|
|
425
|
+
let script = response.trim();
|
|
426
|
+
const endMarker = script.indexOf('\n```');
|
|
427
|
+
if (endMarker !== -1) {
|
|
428
|
+
script = script.substring(0, endMarker);
|
|
429
|
+
}
|
|
430
|
+
return script.trim();
|
|
431
|
+
}
|
|
432
|
+
// Fallback 2: Try to find script content starting from shebang anywhere in response
|
|
433
|
+
const shebangIndex = response.indexOf('#!/bin/bash');
|
|
434
|
+
if (shebangIndex !== -1) {
|
|
435
|
+
let script = response.substring(shebangIndex);
|
|
436
|
+
// Look for end markers
|
|
437
|
+
const endMarkers = ['\n```', '\n\n---', '\n## '];
|
|
438
|
+
let endIndex = script.length;
|
|
439
|
+
for (const marker of endMarkers) {
|
|
440
|
+
const idx = script.indexOf(marker);
|
|
441
|
+
if (idx !== -1 && idx < endIndex) {
|
|
442
|
+
endIndex = idx;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
script = script.substring(0, endIndex);
|
|
446
|
+
if (script.includes('agent-browser')) {
|
|
447
|
+
return script.trim();
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
// Fallback 3: Look for script fragments that can be reconstructed
|
|
451
|
+
// Some responses have the script broken into explanation sections
|
|
452
|
+
const scriptFragmentPattern = /agent-browser\s+--cdp\s+[^\n]+/g;
|
|
453
|
+
const fragments = response.match(scriptFragmentPattern);
|
|
454
|
+
if (fragments && fragments.length >= 2) {
|
|
455
|
+
// There are multiple agent-browser commands, might be a script without proper code block
|
|
456
|
+
// Try to extract from a larger context
|
|
457
|
+
const lines = response.split('\n');
|
|
458
|
+
const scriptLines = [];
|
|
459
|
+
let inScript = false;
|
|
460
|
+
for (const line of lines) {
|
|
461
|
+
if (line.includes('#!/bin/bash') || line.includes('set -e')) {
|
|
462
|
+
inScript = true;
|
|
463
|
+
}
|
|
464
|
+
if (inScript) {
|
|
465
|
+
// Stop at common non-script indicators
|
|
466
|
+
if (line.match(/^[A-Z][a-z].*:$/) || line.startsWith('Note:') || line.startsWith('This ')) {
|
|
467
|
+
break;
|
|
468
|
+
}
|
|
469
|
+
scriptLines.push(line);
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
if (scriptLines.length > 5 && scriptLines.some(l => l.includes('agent-browser'))) {
|
|
473
|
+
return ensureShebang(scriptLines.join('\n').trim());
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
return null;
|
|
477
|
+
}
|