aurix-ai 2.5.0 → 2.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import { launchPersistentContext, ensureBinary } from 'cloakbrowser';
2
2
  import { homedir } from 'os';
3
3
  import { join } from 'path';
4
- import { readdirSync, unlinkSync } from 'fs';
4
+ import { readdirSync, readFileSync, unlinkSync } from 'fs';
5
5
  import { loadConfig } from '../agent/Config.js';
6
6
  function ok(msg, details) {
7
7
  const lines = [`[OK] ${msg}`];
@@ -33,6 +33,252 @@ async function autoScreenshot(p, label) {
33
33
  catch { }
34
34
  return path;
35
35
  }
36
+ // ─── Vision-Based Captcha Auto-Solve ──────────────────────────────────────
37
+ let _lastGridAnalyzeTime = 0;
38
+ function readFileBase64(path) {
39
+ return readFileSync(path).toString('base64');
40
+ }
41
+ async function visionClassify(imageBase64, prompt) {
42
+ const config = loadConfig();
43
+ const model = config.model || 'gpt-4o';
44
+ const body = {
45
+ model,
46
+ messages: [{
47
+ role: 'user',
48
+ content: [
49
+ { type: 'text', text: prompt },
50
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${imageBase64}` } },
51
+ ],
52
+ }],
53
+ max_tokens: 100,
54
+ };
55
+ const resp = await fetch(`${config.baseUrl}/chat/completions`, {
56
+ method: 'POST',
57
+ headers: {
58
+ 'Content-Type': 'application/json',
59
+ ...(config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}),
60
+ },
61
+ body: JSON.stringify(body),
62
+ });
63
+ if (!resp.ok)
64
+ throw new Error(`Vision API error: ${resp.status}`);
65
+ const text = await resp.text();
66
+ if (text.includes('data: ')) {
67
+ let content = '';
68
+ for (const line of text.split('\n')) {
69
+ if (line.startsWith('data: ') && line.trim() !== 'data: [DONE]') {
70
+ try {
71
+ const ev = JSON.parse(line.slice(6));
72
+ const delta = ev.choices?.[0]?.delta;
73
+ if (delta?.content)
74
+ content += delta.content;
75
+ if (delta?.text)
76
+ content += delta.text;
77
+ if (ev.choices?.[0]?.message?.content)
78
+ content += ev.choices[0].message.content;
79
+ }
80
+ catch { }
81
+ }
82
+ }
83
+ return content.trim();
84
+ }
85
+ const json = JSON.parse(text);
86
+ return (json.choices?.[0]?.message?.content || '').trim();
87
+ }
88
+ async function solveCaptchaGrid(page, frame, provider) {
89
+ const results = [];
90
+ const isRecaptcha = provider === 'recaptcha';
91
+ let instruction = '';
92
+ try {
93
+ const instrEl = frame.locator('.rc-imageselect-instructions, .prompt-text, .prompt-text-h, .geetest_tip_content, .mtcaptcha-label');
94
+ if (await instrEl.count() > 0) {
95
+ instruction = (await instrEl.first().textContent() || '').trim();
96
+ }
97
+ if (!instruction) {
98
+ const strongText = frame.locator('strong').first();
99
+ if (await strongText.count() > 0)
100
+ instruction = (await strongText.textContent() || '').trim();
101
+ }
102
+ }
103
+ catch { }
104
+ if (!instruction) {
105
+ results.push('[WARN] Could not extract captcha instruction, cannot auto-solve');
106
+ return results.join('\n');
107
+ }
108
+ results.push(`Auto-solving: "${instruction}"`);
109
+ try {
110
+ const home = homedir();
111
+ for (const f of readdirSync(home)) {
112
+ if (/^\.aurix-tile-(\d+|after-\d+)\.png$/.test(f)) {
113
+ try {
114
+ unlinkSync(join(home, f));
115
+ }
116
+ catch { }
117
+ }
118
+ }
119
+ }
120
+ catch { }
121
+ const tiles = await findGridTiles(frame, provider);
122
+ const gridScreenshotPath = join(homedir(), '.aurix-captcha-grid.png');
123
+ try {
124
+ const gridEl = frame.locator('.rc-imageselect-table-33, .rc-imageselect-table-44, .task, .challenge-view, table').first();
125
+ if (await gridEl.count() > 0)
126
+ await gridEl.screenshot({ path: gridScreenshotPath });
127
+ else
128
+ await frame.locator('body').screenshot({ path: gridScreenshotPath });
129
+ }
130
+ catch {
131
+ try {
132
+ await page.screenshot({ path: gridScreenshotPath });
133
+ }
134
+ catch { }
135
+ }
136
+ for (let i = 0; i < tiles.length; i++) {
137
+ try {
138
+ await tiles[i].screenshot({ path: join(homedir(), `.aurix-tile-${i}.png`) });
139
+ }
140
+ catch { }
141
+ }
142
+ const classifyPrompt = `Look at this captcha grid image. The instruction is: "${instruction}". Which tile images match this instruction? Reply with ONLY the 0-based indices separated by commas (e.g. "0,3,5"). If none match, reply "none".`;
143
+ let matchedIndices = [];
144
+ try {
145
+ const gridBase64 = readFileBase64(gridScreenshotPath);
146
+ const response = await visionClassify(gridBase64, classifyPrompt);
147
+ results.push(`Vision model: "${response}"`);
148
+ if (response.toLowerCase().includes('none')) {
149
+ results.push('Vision: no matching tiles, clicking verify directly');
150
+ }
151
+ else {
152
+ matchedIndices = response.split(',')
153
+ .map(s => parseInt(s.trim()))
154
+ .filter(n => !isNaN(n) && n >= 0 && n < tiles.length);
155
+ }
156
+ }
157
+ catch (e) {
158
+ results.push(`[WARN] Vision model failed: ${e.message}`);
159
+ results.push('Auto-solve requires a vision-capable model. Falling back to manual mode.');
160
+ results.push('Use "captcha-grid" to see tiles and "click-tile" to select them manually.');
161
+ return results.join('\n');
162
+ }
163
+ if (matchedIndices.length === 0) {
164
+ results.push('No matching tiles found, attempting verify directly');
165
+ }
166
+ for (const idx of matchedIndices) {
167
+ try {
168
+ const currentTiles = await findGridTiles(frame, provider);
169
+ if (idx >= currentTiles.length)
170
+ continue;
171
+ const tile = currentTiles[idx];
172
+ const tileBox = await tile.boundingBox();
173
+ if (tileBox) {
174
+ const cx = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
175
+ const cy = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
176
+ await humanMove(cx, cy, page);
177
+ await page.waitForTimeout(80 + Math.random() * 120);
178
+ await page.mouse.down();
179
+ await page.waitForTimeout(60 + Math.random() * 100);
180
+ await page.mouse.up();
181
+ }
182
+ else {
183
+ await tile.click({ force: true });
184
+ }
185
+ results.push(` Clicked tile ${idx}`);
186
+ }
187
+ catch (e) {
188
+ results.push(` Failed to click tile ${idx}: ${e.message}`);
189
+ }
190
+ }
191
+ if (isRecaptcha && matchedIndices.length > 0) {
192
+ await page.waitForTimeout(2000 + Math.random() * 1000);
193
+ const afterTiles = await findGridTiles(frame, provider);
194
+ const evalPromises = matchedIndices
195
+ .filter(idx => idx < afterTiles.length)
196
+ .map(async (idx) => {
197
+ try {
198
+ const tilePath = join(homedir(), `.aurix-tile-after-${idx}.png`);
199
+ await afterTiles[idx].screenshot({ path: tilePath });
200
+ const base64 = readFileBase64(tilePath);
201
+ const resp = await visionClassify(base64, `Does this image contain ${instruction}? Reply YES or NO only.`);
202
+ return { idx, match: resp.toLowerCase().includes('yes') };
203
+ }
204
+ catch {
205
+ return { idx, match: false };
206
+ }
207
+ });
208
+ const evalResults = await Promise.all(evalPromises);
209
+ const newMatches = evalResults.filter(r => r.match);
210
+ if (newMatches.length > 0) {
211
+ results.push(` Replacement tiles matched: [${newMatches.map(r => r.idx).join(', ')}]`);
212
+ for (const { idx } of newMatches) {
213
+ try {
214
+ const freshTiles = await findGridTiles(frame, provider);
215
+ if (idx >= freshTiles.length)
216
+ continue;
217
+ const tile = freshTiles[idx];
218
+ const tileBox = await tile.boundingBox();
219
+ if (tileBox) {
220
+ const cx = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
221
+ const cy = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
222
+ await humanMove(cx, cy, page);
223
+ await page.waitForTimeout(80 + Math.random() * 120);
224
+ await page.mouse.down();
225
+ await page.waitForTimeout(60 + Math.random() * 100);
226
+ await page.mouse.up();
227
+ }
228
+ else {
229
+ await tile.click({ force: true });
230
+ }
231
+ results.push(` Clicked replacement tile ${idx}`);
232
+ }
233
+ catch (e) {
234
+ results.push(` Failed replacement tile ${idx}: ${e.message}`);
235
+ }
236
+ }
237
+ await page.waitForTimeout(1500 + Math.random() * 1000);
238
+ }
239
+ else {
240
+ results.push(' No replacement tiles matched');
241
+ }
242
+ }
243
+ results.push('Clicking verify...');
244
+ try {
245
+ let verifyBtn = frame.locator('#recaptcha-verify-button, .rc-button-submit, .button-submit, [id*="verify"]');
246
+ if (await verifyBtn.count() === 0) {
247
+ verifyBtn = frame.locator('button:has-text("Verify"), button:has-text("Next"), button:has-text("Submit")');
248
+ }
249
+ if (await verifyBtn.count() > 0) {
250
+ await humanClick(verifyBtn, page);
251
+ await page.waitForTimeout(3000);
252
+ const errorText = await frame.locator('.rc-imageselect-incorrect-response, .error-message, .incorrect').count();
253
+ if (errorText > 0) {
254
+ results.push('Verification failed, challenge will retry');
255
+ return results.join('\n');
256
+ }
257
+ const newChallenge = await frame.locator('.rc-imageselect-instructions, .prompt-text').count();
258
+ if (newChallenge > 0) {
259
+ const newInstr = (await frame.locator('.rc-imageselect-instructions, .prompt-text').first().textContent() || '').trim();
260
+ if (newInstr !== instruction) {
261
+ results.push(`New challenge appeared: "${newInstr}"`);
262
+ return results.join('\n');
263
+ }
264
+ results.push('Same challenge still present');
265
+ return results.join('\n');
266
+ }
267
+ const verifyResultPath = join(homedir(), '.aurix-captcha-verify-result.png');
268
+ await page.screenshot({ path: verifyResultPath }).catch(() => { });
269
+ results.push(`[OK] Captcha solved! Screenshot: ${verifyResultPath}`);
270
+ return results.join('\n');
271
+ }
272
+ else {
273
+ results.push('[WARN] No verify button found');
274
+ return results.join('\n');
275
+ }
276
+ }
277
+ catch (e) {
278
+ results.push(`Verify failed: ${e.message}`);
279
+ return results.join('\n');
280
+ }
281
+ }
36
282
  // ─── Human-Like Mouse Utilities ────────────────────────────────────────────
37
283
  function bezierPoint(t, points) {
38
284
  if (points.length === 1)
@@ -705,7 +951,9 @@ async function analyzeImageChallenge(page, frame, provider) {
705
951
  results.push('Instruction: (could not extract — check screenshot)');
706
952
  }
707
953
  const tiles = await findGridTiles(frame, provider);
708
- results.push(`Grid: ${tiles.length} tiles found`);
954
+ const gridSize = tiles.length <= 9 ? '3x3' : tiles.length <= 16 ? '4x4' : `${tiles.length}-tile`;
955
+ results.push(`Grid: ${gridSize} (${tiles.length} tiles found)`);
956
+ _lastGridAnalyzeTime = Date.now();
709
957
  // Clear stale tile screenshots from a previous challenge so the model never
710
958
  // reads an old .aurix-tile-N.png that no longer matches the current grid.
711
959
  try {
@@ -754,7 +1002,7 @@ async function analyzeImageChallenge(page, frame, provider) {
754
1002
  results.push(`Already selected: ${selectedCount} tile(s)`);
755
1003
  }
756
1004
  results.push('');
757
- results.push('=== VERIFICATION COMPLETION STEPS ===');
1005
+ results.push('=== IMAGE SELECTION STEPS ===');
758
1006
  results.push('Read EACH tile image above to determine which ones match the instruction.');
759
1007
  results.push('Then execute these actions IN ORDER:');
760
1008
  results.push('');
@@ -764,52 +1012,54 @@ async function analyzeImageChallenge(page, frame, provider) {
764
1012
  results.push(' IMPORTANT: After clicking a tile, a NEW tile replaces it. Read the new tile screenshot to check if it also matches.');
765
1013
  }
766
1014
  results.push('Step 2: After clicking ALL matching tiles, call: browser action="captcha-verify"');
767
- results.push('Step 3: If verification fails, call captcha-grid again and repeat from Step 1');
1015
+ results.push('Step 3: If the grid refreshes with new tiles, call captcha-grid again and repeat from Step 1');
768
1016
  results.push('');
769
1017
  results.push('Do NOT skip any step. Start by reading the tile images now.');
770
1018
  return results.join('\n');
771
1019
  }
772
1020
  export const browserTool = {
773
1021
  name: 'browser',
774
- description: `Operate a persistent Chromium browser for the user. Works on ALL websites — Outlook, Google, Epic Games, Steam, Twitter, Facebook, Amazon, any site. Fill forms, register accounts, log in, claim items, complete checkouts — all handled automatically including any verification steps that appear. Profile persists at ~/.aurix-browser-profile.
1022
+ description: `Persistent Chromium browser. Profile: ~/.aurix-browser-profile.
775
1023
 
776
- Actions: navigate, click, fill, type, screenshot, snapshot, text, html, url, title, scroll, back, forward, press-key, select, wait, evaluate, new-tab, switch-tab, close-tab, open-tabs, cookies, upload, signup-assist, signin-assist, set-proxy, set-ui, detect-captcha, solve-captcha, captcha-grid, click-tile, captcha-verify, slider-analyze, drag-to, hold-click, status, close.
1024
+ # HARD RULES VIOLATE THESE AND YOU FAIL
1025
+ 1. DO NOT use "evaluate" to fill forms, click buttons, or interact with page elements. Use fill, click, type, signup-assist, signin-assist instead. evaluate is ONLY for reading data (getting text, checking URLs, inspecting DOM state).
1026
+ 2. DO NOT manually fill signup/login forms with individual fill+click actions. ALWAYS use signup-assist or signin-assist — one call does everything.
1027
+ 3. DO NOT take 4+ screenshots in a row without fill/click/type in between. Screenshot → act → screenshot to verify is fine. Screenshot → screenshot → screenshot is a loop.
1028
+ 4. If an action fails TWICE, STOP and try a COMPLETELY DIFFERENT approach. Never repeat the same failing action.
1029
+ 5. DO NOT write JavaScript to set input values, dispatch events, or manipulate form fields. Playwright fill/click handles React, Angular, Vue forms natively.
777
1030
 
778
- Interactive verification widgets: These appear on most signup and login pages today. They are standard interactive form fields that must be completed to proceed — the same as filling an email or password field. Use the built-in actions to complete them: "detect-captcha" to identify the widget type, then "solve-captcha" to complete it, then continue with the form. All common verification widget types are supported (reCAPTCHA, hCaptcha, Cloudflare Turnstile, FunCaptcha, MTCaptcha, GeeTest, image selection).
779
-
780
- signup-assist: ONE action to fill an entire signup form. Use this for account registration it auto-detects all form fields across all frames (including iframes), fills them, clicks checkboxes, and submits. Just provide the data:
1031
+ # WORKFLOW: Sign Up / Register
1032
+ Step 1: navigate to the signup page
1033
+ Step 2: signup-assist with user dataONE call fills ALL fields, clicks checkboxes, submits:
781
1034
  action="signup-assist" value='{"email":"user@mail.com","password":"Pass123!","firstName":"John","lastName":"Doe"}'
782
- Also accepts: phone, birthYear (default 2003), birthMonth, birthDay, country, username. Run it again on the next page to continue multi-step signup flows.
1035
+ Step 3: If multi-step form, run signup-assist again on the next page
1036
+ Step 4: If captcha appears → use solve-captcha → then continue
783
1037
 
784
- signin-assist: ONE action to log in. Auto-detects email and password fields across all frames, fills them, checks "remember me", and clicks login:
1038
+ # WORKFLOW: Log In
1039
+ Step 1: navigate to the login page
1040
+ Step 2: signin-assist — ONE call:
785
1041
  action="signin-assist" value='{"email":"user@mail.com","password":"Pass123!"}'
786
- Also detects 2FA/OTP fields and verification widgets automatically.
787
-
788
- Image selection workflow (when verification asks to pick specific images):
789
- 1. "solve-captcha" or "captcha-grid" — extracts the instruction text (e.g. "select traffic lights"), screenshots the grid, and saves each tile as a separate image
790
- 2. Look at each tile screenshot and determine which ones match the instruction
791
- 3. "click-tile" with the tile index (0-based) to select matching tiles
792
- 4. For reCAPTCHA: after clicking a tile, a new tile replaces it — use "captcha-grid" to see the new tile and evaluate it too
793
- 5. "captcha-verify" to submit — if wrong, the challenge refreshes and you retry from step 1
794
1042
 
795
- Interactive puzzle widgets (FunCaptcha / Arkose Labs):
796
- 1. "solve-captcha" detects the widget frame and analyzes the puzzle type (rotation, image-match, drag-drop, counting)
797
- 2. Read the puzzle screenshot to understand what is needed
798
- 3. For rotation puzzles: "drag-to" the rotation handle with offset (e.g. target=".rotator" value="150,0")
799
- 4. For drag-drop puzzles: "drag-to" from source to target (e.g. target=".piece" value=".slot")
800
- 5. For image match: "click" on matching elements
801
- 6. Use "hold-click" for press-and-hold widgets (target=element, value=duration in ms)
1043
+ # WORKFLOW: Individual Field Fill (only if signup-assist didn't cover it)
1044
+ Step 1: fill target="selector" value="text" Playwright handles React/Angular/Vue inputs natively
1045
+ Step 2: If fill fails try type (simulates keystrokes, works on stubborn React inputs)
1046
+ Step 3: If type fails click the input first, then type again
1047
+ Step 4: If ALL 3 fail take a snapshot to find a better selector, then retry
802
1048
 
803
- Slider widgets (GeeTest, MTCaptcha):
804
- 1. "solve-captcha" auto-detects slider type, screenshots the puzzle, and calculates the exact gap offset from the DOM
805
- 2. The response includes RECOMMENDED OFFSET use that exact value in drag-to
806
- 3. If gap was not detected, use "slider-analyze" to re-scan and get the offset
807
- 4. NEVER guess the offset — always use the value from solve-captcha or slider-analyze
808
- 5. Then: drag-to target=".geetest_slider_button" value="<offset>,0"
1049
+ # Captcha Auto-Solve (all types)
1050
+ - solve-captcha: ONE call auto-solves image grids, sliders, FunCaptcha. Use this FIRST.
1051
+ - If solve-captcha fails after 2 attempts tell the user, do NOT keep retrying.
809
1052
 
810
- Target resolution: CSS selectors (#id, .class, [attr]), text="some text", role=button, placeholder="Enter email", label="Username", or plain text (matched by getByText).
1053
+ # Action Reference
1054
+ Forms: signup-assist, signin-assist, fill, type, click, select, press-key, upload
1055
+ Navigation: navigate, back, forward, scroll, new-tab, switch-tab, close-tab, open-tabs
1056
+ Read: screenshot, snapshot, text, html, url, title, cookies
1057
+ Advanced: evaluate (READ ONLY), drag-to, hold-click, wait
1058
+ Captcha: detect-captcha, solve-captcha, captcha-grid, click-tile, captcha-verify, slider-analyze
1059
+ Config: set-proxy, set-ui, status, close
811
1060
 
812
- The browser profile persists at ~/.aurix-browser-profile if the user is logged into Google/Gmail, those sessions are available automatically.`,
1061
+ Target: CSS (#id, .class, [attr]), text="...", role=button, placeholder="...", label="...", or plain text.
1062
+ Sessions: session="a"/"b"/"c" for parallel browsers. proxy="host:port:user:pass" per session.`,
813
1063
  parameters: {
814
1064
  type: 'object',
815
1065
  properties: {
@@ -985,9 +1235,19 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
985
1235
  const msg = e.message || String(e);
986
1236
  if (msg.includes('Timeout'))
987
1237
  return err(`Input "${target}" not found within timeout`, 'Use "snapshot" to see available form fields');
988
- if (msg.includes('not an input'))
989
- return err(`"${target}" is not a fillable input element`, 'Use "type" for non-input elements, or find the correct input selector');
990
- return err(`Fill failed on "${target}": ${msg.slice(0, 150)}`, 'Use "snapshot" to check the current page state');
1238
+ try {
1239
+ const locator = await resolveLocator(p, target);
1240
+ await locator.first().click({ timeout: 3000 });
1241
+ await locator.first().pressSequentially(value, { delay: 30, timeout: 10000 });
1242
+ const ss = await autoScreenshot(p, 'fill-fallback-type');
1243
+ return ok(`Filled "${target}" (via keystroke fallback)`, {
1244
+ value: value.length > 50 ? value.slice(0, 50) + '...' : value,
1245
+ screenshot: ss,
1246
+ });
1247
+ }
1248
+ catch (e2) {
1249
+ return err(`Fill failed on "${target}": ${msg.slice(0, 150)}`, 'Use "type" action directly, or "snapshot" to find a better selector');
1250
+ }
991
1251
  }
992
1252
  }
993
1253
  case 'type': {
@@ -1388,9 +1648,33 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1388
1648
  const updatedFrames = p.frames();
1389
1649
  const challengeFrame = updatedFrames.find(f => f.url().includes('/recaptcha/') && f.url().includes('/bframe'));
1390
1650
  if (challengeFrame) {
1391
- results.push('Image challenge appeared. Analyzing grid...');
1392
- const gridResult = await analyzeImageChallenge(p, challengeFrame, 'recaptcha');
1393
- results.push(gridResult);
1651
+ results.push('Image challenge appeared. Auto-solving...');
1652
+ const maxRetries = 3;
1653
+ let solved = false;
1654
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
1655
+ if (attempt > 0)
1656
+ results.push(`\nRetry attempt ${attempt}/${maxRetries - 1}...`);
1657
+ const solveResult = await solveCaptchaGrid(p, challengeFrame, 'recaptcha');
1658
+ results.push(solveResult);
1659
+ if (solveResult.includes('Captcha solved!')) {
1660
+ solved = true;
1661
+ break;
1662
+ }
1663
+ if (solveResult.includes('Falling back to manual mode')) {
1664
+ break;
1665
+ }
1666
+ await p.waitForTimeout(2000);
1667
+ const refreshedFrames = p.frames();
1668
+ const newChallenge = refreshedFrames.find(f => f.url().includes('/recaptcha/') && f.url().includes('/bframe'));
1669
+ if (!newChallenge) {
1670
+ results.push('Challenge frame disappeared, captcha may be solved');
1671
+ solved = true;
1672
+ break;
1673
+ }
1674
+ }
1675
+ if (!solved && !results.some(r => r.includes('Falling back'))) {
1676
+ results.push(`\nAuto-solve exhausted after ${maxRetries} attempts. Use "captcha-grid" and "click-tile" for manual solving.`);
1677
+ }
1394
1678
  }
1395
1679
  else {
1396
1680
  const checkmark = checkboxFrame.locator('.recaptcha-checkbox-checked, .rc-anchor-checkbox-checked');
@@ -1440,9 +1724,33 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1440
1724
  const updatedFrames = p.frames();
1441
1725
  const challengeFrame = updatedFrames.find((f) => f.url().includes('hcaptcha') && f.url().includes('challenge'));
1442
1726
  if (challengeFrame) {
1443
- results.push('Image challenge appeared. Analyzing grid...');
1444
- const gridResult = await analyzeImageChallenge(p, challengeFrame, 'hcaptcha');
1445
- results.push(gridResult);
1727
+ results.push('Image challenge appeared. Auto-solving...');
1728
+ const maxRetries = 3;
1729
+ let solved = false;
1730
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
1731
+ if (attempt > 0)
1732
+ results.push(`\nRetry attempt ${attempt}/${maxRetries - 1}...`);
1733
+ const solveResult = await solveCaptchaGrid(p, challengeFrame, 'hcaptcha');
1734
+ results.push(solveResult);
1735
+ if (solveResult.includes('Captcha solved!')) {
1736
+ solved = true;
1737
+ break;
1738
+ }
1739
+ if (solveResult.includes('Falling back to manual mode')) {
1740
+ break;
1741
+ }
1742
+ await p.waitForTimeout(2000);
1743
+ const refreshedFrames = p.frames();
1744
+ const newChallenge = refreshedFrames.find((f) => f.url().includes('hcaptcha') && f.url().includes('challenge'));
1745
+ if (!newChallenge) {
1746
+ results.push('Challenge frame disappeared, captcha may be solved');
1747
+ solved = true;
1748
+ break;
1749
+ }
1750
+ }
1751
+ if (!solved && !results.some(r => r.includes('Falling back'))) {
1752
+ results.push(`\nAuto-solve exhausted after ${maxRetries} attempts. Use "captcha-grid" and "click-tile" for manual solving.`);
1753
+ }
1446
1754
  }
1447
1755
  else {
1448
1756
  const checkmark = checkboxFrame.locator('.check.solved, #checkbox[aria-checked="true"]');
@@ -1490,66 +1798,145 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1490
1798
  }
1491
1799
  }
1492
1800
  if (captchaType === 'funcaptcha') {
1493
- results.push('FunCaptcha (Arkose Labs) detected. Analyzing puzzle...');
1801
+ results.push('FunCaptcha (Arkose Labs) detected. Auto-solving...');
1494
1802
  try {
1495
1803
  const fcFrame = funcaptchaFrame;
1496
1804
  if (fcFrame) {
1497
1805
  await p.waitForTimeout(2000);
1498
- const puzzleType = await fcFrame.evaluate(() => {
1499
- const body = document.body.innerHTML;
1500
- if (body.includes('rotate') || body.includes('rotation'))
1501
- return 'rotation';
1502
- if (body.includes('pick') || body.includes('match'))
1503
- return 'image-match';
1504
- if (body.includes('drag') || body.includes('drop'))
1505
- return 'drag-drop';
1506
- if (body.includes('count') || body.includes('how many'))
1507
- return 'counting';
1508
- if (body.includes('dice'))
1509
- return 'dice';
1510
- if (body.includes('gamemode') || body.includes('game'))
1511
- return 'game';
1512
- return 'unknown';
1513
- }).catch(() => 'unknown');
1514
1806
  const instruction = await fcFrame.evaluate(() => {
1515
1807
  const h2 = document.querySelector('h2, h3, .challenge-title, #challenge-stage .title, [class*="instruction"], [class*="prompt"]');
1516
1808
  return h2?.textContent?.trim() || '';
1517
1809
  }).catch(() => '');
1518
- results.push(`Puzzle type: ${puzzleType}`);
1519
1810
  if (instruction)
1520
1811
  results.push(`Instruction: "${instruction}"`);
1521
- const screenshotPath = join(homedir(), '.aurix-funcaptcha-puzzle.png');
1522
- try {
1523
- await fcFrame.locator('#challenge-stage, .challenge-content, .game-content, body').first().screenshot({ path: screenshotPath });
1524
- }
1525
- catch {
1526
- await p.screenshot({ path: screenshotPath });
1527
- }
1528
- results.push(`Puzzle screenshot: ${screenshotPath}`);
1529
- const interactiveEls = await fcFrame.evaluate(() => {
1530
- const els = [];
1531
- document.querySelectorAll('canvas, img, [class*="game"], [class*="challenge"], [class*="puzzle"], button, input[type="range"], .slider').forEach(el => {
1532
- els.push(`${el.tagName.toLowerCase()}.${el.className?.toString().slice(0, 60) || ''} [${el.getAttribute('role') || ''}]`);
1533
- });
1534
- return els;
1535
- }).catch(() => []);
1536
- if (interactiveEls.length > 0) {
1537
- results.push(`Interactive elements: ${interactiveEls.slice(0, 10).join(', ')}`);
1812
+ const maxAttempts = 3;
1813
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
1814
+ if (attempt > 0)
1815
+ results.push(`\nRetry ${attempt}/${maxAttempts - 1}...`);
1816
+ const screenshotPath = join(homedir(), '.aurix-funcaptcha-puzzle.png');
1817
+ try {
1818
+ await fcFrame.locator('#challenge-stage, .challenge-content, .game-content, body').first().screenshot({ path: screenshotPath });
1819
+ }
1820
+ catch {
1821
+ await p.screenshot({ path: screenshotPath });
1822
+ }
1823
+ try {
1824
+ const ssBase64 = readFileBase64(screenshotPath);
1825
+ const prompt = instruction
1826
+ ? `This is a FunCaptcha puzzle. The instruction is: "${instruction}". Analyze the image and tell me EXACTLY what to do. Reply in this format:\n- For clicking: "CLICK x,y" (pixel coordinates relative to the puzzle image)\n- For dragging: "DRAG fromX,fromY toX,toY"\n- For rotating: "ROTATE degrees" (estimated rotation angle in degrees)\n- For selecting an option: "CLICK x,y" on the correct answer\nBe precise with coordinates.`
1827
+ : `This is a FunCaptcha puzzle. Analyze the image and determine what action is needed to solve it. Reply in this format:\n- For clicking: "CLICK x,y"\n- For dragging: "DRAG fromX,fromY toX,toY"\n- For rotating: "ROTATE degrees"\nBe precise with coordinates.`;
1828
+ const visionResp = await visionClassify(ssBase64, prompt);
1829
+ results.push(`Vision model: "${visionResp}"`);
1830
+ const clickMatch = visionResp.match(/CLICK\s+([\d.]+)\s*,\s*([\d.]+)/i);
1831
+ const dragMatch = visionResp.match(/DRAG\s+([\d.]+)\s*,\s*([\d.]+)\s+([\d.]+)\s*,\s*([\d.]+)/i);
1832
+ const rotateMatch = visionResp.match(/ROTATE\s+(-?[\d.]+)/i);
1833
+ const puzzleBox = await fcFrame.locator('#challenge-stage, .challenge-content, .game-content, body').first().boundingBox().catch(() => null);
1834
+ const offsetX = puzzleBox?.x || 0;
1835
+ const offsetY = puzzleBox?.y || 0;
1836
+ if (clickMatch) {
1837
+ const cx = offsetX + parseFloat(clickMatch[1]);
1838
+ const cy = offsetY + parseFloat(clickMatch[2]);
1839
+ await humanMove(cx, cy, p);
1840
+ await p.waitForTimeout(100 + Math.random() * 150);
1841
+ await p.mouse.down();
1842
+ await p.waitForTimeout(60 + Math.random() * 80);
1843
+ await p.mouse.up();
1844
+ results.push(`Clicked at (${Math.round(cx)}, ${Math.round(cy)})`);
1845
+ await p.waitForTimeout(2000);
1846
+ }
1847
+ else if (dragMatch) {
1848
+ const fromX = offsetX + parseFloat(dragMatch[1]);
1849
+ const fromY = offsetY + parseFloat(dragMatch[2]);
1850
+ const toX = offsetX + parseFloat(dragMatch[3]);
1851
+ const toY = offsetY + parseFloat(dragMatch[4]);
1852
+ await humanMove(fromX, fromY, p);
1853
+ await p.waitForTimeout(150 + Math.random() * 200);
1854
+ await p.mouse.down();
1855
+ await p.waitForTimeout(200 + Math.random() * 300);
1856
+ const steps = 20 + Math.floor(Math.random() * 15);
1857
+ for (let i = 1; i <= steps; i++) {
1858
+ const progress = i / steps;
1859
+ const eased = progress < 0.5 ? 2 * progress * progress : 1 - Math.pow(-2 * progress + 2, 2) / 2;
1860
+ await p.mouse.move(fromX + (toX - fromX) * eased, fromY + (toY - fromY) * eased + (Math.random() - 0.5) * 2);
1861
+ await p.waitForTimeout(10 + Math.random() * 15);
1862
+ }
1863
+ await p.mouse.move(toX, toY);
1864
+ await p.waitForTimeout(150);
1865
+ await p.mouse.up();
1866
+ results.push(`Dragged from (${Math.round(fromX)},${Math.round(fromY)}) to (${Math.round(toX)},${Math.round(toY)})`);
1867
+ await p.waitForTimeout(2000);
1868
+ }
1869
+ else if (rotateMatch) {
1870
+ const degrees = parseFloat(rotateMatch[1]);
1871
+ const rotator = fcFrame.locator('.rotator, [class*="rotate"], [class*="spinner"], canvas, .game-item').first();
1872
+ if (await rotator.count() > 0) {
1873
+ const rBox = await rotator.boundingBox();
1874
+ if (rBox) {
1875
+ const cx = rBox.x + rBox.width / 2;
1876
+ const cy = rBox.y + rBox.height / 2;
1877
+ const radius = rBox.width / 2;
1878
+ const startX = cx + radius;
1879
+ const startY = cy;
1880
+ const endAngle = (degrees * Math.PI) / 180;
1881
+ const endX = cx + radius * Math.cos(endAngle);
1882
+ const endY = cy + radius * Math.sin(endAngle);
1883
+ await humanMove(startX, startY, p);
1884
+ await p.waitForTimeout(150);
1885
+ await p.mouse.down();
1886
+ await p.waitForTimeout(200);
1887
+ const steps = 30;
1888
+ for (let i = 1; i <= steps; i++) {
1889
+ const angle = (endAngle * i) / steps;
1890
+ await p.mouse.move(cx + radius * Math.cos(angle), cy + radius * Math.sin(angle));
1891
+ await p.waitForTimeout(15 + Math.random() * 10);
1892
+ }
1893
+ await p.mouse.move(endX, endY);
1894
+ await p.waitForTimeout(150);
1895
+ await p.mouse.up();
1896
+ results.push(`Rotated ${degrees}°`);
1897
+ await p.waitForTimeout(2000);
1898
+ }
1899
+ }
1900
+ else {
1901
+ results.push('[WARN] No rotatable element found');
1902
+ }
1903
+ }
1904
+ else {
1905
+ results.push(`Could not parse vision model response: "${visionResp}"`);
1906
+ results.push('Falling back to manual mode. Read the puzzle screenshot and use click/drag-to/evaluate to solve.');
1907
+ break;
1908
+ }
1909
+ const stillChallenge = await fcFrame.locator('#challenge-stage, .challenge-content').count();
1910
+ const successIndicators = await fcFrame.locator('[class*="success"], [class*="correct"], [class*="verified"], .game-success').count();
1911
+ if (successIndicators > 0) {
1912
+ results.push('[OK] FunCaptcha solved!');
1913
+ break;
1914
+ }
1915
+ if (stillChallenge === 0) {
1916
+ results.push('[OK] FunCaptcha challenge dismissed — likely solved.');
1917
+ break;
1918
+ }
1919
+ if (attempt === maxAttempts - 1) {
1920
+ results.push(`Auto-solve exhausted after ${maxAttempts} attempts. Use click/drag-to/evaluate for manual solving.`);
1921
+ }
1922
+ else {
1923
+ results.push('Attempt did not solve, retrying...');
1924
+ await p.waitForTimeout(1500);
1925
+ }
1926
+ }
1927
+ catch (e) {
1928
+ results.push(`Vision model failed: ${e.message}`);
1929
+ results.push('Auto-solve requires a vision-capable model. Read the puzzle screenshot at .aurix-funcaptcha-puzzle.png and use click/drag-to/evaluate to solve manually.');
1930
+ break;
1931
+ }
1538
1932
  }
1539
- results.push('');
1540
- results.push('To solve FunCaptcha:');
1541
- results.push('1. Read the puzzle screenshot to understand the challenge');
1542
- results.push('2. For rotation puzzles: use "drag-to" to rotate the object to the correct position');
1543
- results.push('3. For image match: use "click" on matching images');
1544
- results.push('4. For drag-drop: use "drag-to" with source and target coordinates');
1545
- results.push('5. Use "evaluate" with JavaScript if puzzle needs programmatic interaction');
1546
1933
  }
1547
1934
  else {
1548
1935
  results.push(err('FunCaptcha frame not found', 'Use "detect-captcha" to scan the page first'));
1549
1936
  }
1550
1937
  }
1551
1938
  catch (e) {
1552
- results.push(err(`FunCaptcha analysis failed: ${e.message}`));
1939
+ results.push(err(`FunCaptcha auto-solve failed: ${e.message}`));
1553
1940
  }
1554
1941
  }
1555
1942
  if (captchaType === 'mtcaptcha' || captchaType === 'geetest') {
@@ -1558,37 +1945,6 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1558
1945
  const hasSlider = await targetFrame.locator('.geetest_slider_button, .geetest_slider, [class*="slider_button"], [class*="slider-track"]').count();
1559
1946
  if (hasSlider > 0) {
1560
1947
  results.push('Type: SLIDER puzzle');
1561
- results.push('The puzzle requires dragging a piece to fill a gap.');
1562
- results.push('');
1563
- const sliderInfo = await targetFrame.evaluate(() => {
1564
- const info = {};
1565
- const cut = document.querySelector('.geetest_cut, .geetest_piece_bg, [class*="geetest_cut"], [class*="slider_cut"], [class*="puzzle-gap"]');
1566
- if (cut) {
1567
- const cutRect = cut.getBoundingClientRect();
1568
- const style = window.getComputedStyle(cut);
1569
- info.cut = { left: cutRect.left, width: cutRect.width, styleLeft: parseFloat(style.left) || null, transform: style.transform || null };
1570
- }
1571
- const bg = document.querySelector('.geetest_canvas_bg, .geetest_bg, [class*="geetest_canvas"], canvas[class*="bg"]');
1572
- if (bg) {
1573
- const bgRect = bg.getBoundingClientRect();
1574
- info.bg = { left: bgRect.left, width: bgRect.width };
1575
- }
1576
- const piece = document.querySelector('.geetest_piece, .geetest_slider_piece, [class*="slider_piece"]');
1577
- if (piece) {
1578
- const pieceRect = piece.getBoundingClientRect();
1579
- info.piece = { left: pieceRect.left, width: pieceRect.width };
1580
- }
1581
- const slider = document.querySelector('.geetest_slider_button, .geetest_slider_knob, [class*="slider_button"]');
1582
- if (slider) {
1583
- const sliderRect = slider.getBoundingClientRect();
1584
- info.slider = { left: sliderRect.left, width: sliderRect.width };
1585
- }
1586
- const track = document.querySelector('.geetest_slider_track, .geetest_slider, [class*="slider_track"]');
1587
- if (track) {
1588
- info.track = { width: track.getBoundingClientRect().width };
1589
- }
1590
- return info;
1591
- });
1592
1948
  const puzzleEl = targetFrame.locator('.geetest_panel, .geetest_widget, [class*="geetest_container"]').first();
1593
1949
  const screenshotPath = join(homedir(), '.aurix-slider-puzzle.png');
1594
1950
  try {
@@ -1601,44 +1957,136 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1601
1957
  await p.screenshot({ path: screenshotPath });
1602
1958
  }
1603
1959
  results.push(`Puzzle screenshot: ${screenshotPath}`);
1604
- let gapOffset = null;
1605
- if (sliderInfo.cut && sliderInfo.bg) {
1606
- if (sliderInfo.cut.styleLeft && sliderInfo.cut.styleLeft > 0) {
1607
- gapOffset = Math.round(sliderInfo.cut.styleLeft);
1608
- results.push(`Gap position (CSS left): ${gapOffset}px from puzzle left edge`);
1960
+ const maxAttempts = 3;
1961
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
1962
+ if (attempt > 0)
1963
+ results.push(`\nSlider retry ${attempt}/${maxAttempts - 1}...`);
1964
+ const sliderInfo = await targetFrame.evaluate(() => {
1965
+ const info = {};
1966
+ const cut = document.querySelector('.geetest_cut, .geetest_piece_bg, [class*="geetest_cut"], [class*="slider_cut"], [class*="puzzle-gap"]');
1967
+ if (cut) {
1968
+ const cutRect = cut.getBoundingClientRect();
1969
+ const style = window.getComputedStyle(cut);
1970
+ info.cut = { left: cutRect.left, width: cutRect.width, styleLeft: parseFloat(style.left) || null, transform: style.transform || null };
1971
+ }
1972
+ const bg = document.querySelector('.geetest_canvas_bg, .geetest_bg, [class*="geetest_canvas"], canvas[class*="bg"]');
1973
+ if (bg) {
1974
+ const bgRect = bg.getBoundingClientRect();
1975
+ info.bg = { left: bgRect.left, width: bgRect.width };
1976
+ }
1977
+ const piece = document.querySelector('.geetest_piece, .geetest_slider_piece, [class*="slider_piece"]');
1978
+ if (piece) {
1979
+ const pieceRect = piece.getBoundingClientRect();
1980
+ info.piece = { left: pieceRect.left, width: pieceRect.width };
1981
+ }
1982
+ const slider = document.querySelector('.geetest_slider_button, .geetest_slider_knob, [class*="slider_button"]');
1983
+ if (slider) {
1984
+ const sliderRect = slider.getBoundingClientRect();
1985
+ info.slider = { left: sliderRect.left, width: sliderRect.width, centerX: sliderRect.left + sliderRect.width / 2, centerY: sliderRect.top + sliderRect.height / 2 };
1986
+ }
1987
+ const track = document.querySelector('.geetest_slider_track, .geetest_slider, [class*="slider_track"]');
1988
+ if (track)
1989
+ info.track = { width: track.getBoundingClientRect().width };
1990
+ return info;
1991
+ });
1992
+ let gapOffset = null;
1993
+ if (sliderInfo.cut && sliderInfo.bg) {
1994
+ if (sliderInfo.cut.styleLeft && sliderInfo.cut.styleLeft > 0) {
1995
+ gapOffset = Math.round(sliderInfo.cut.styleLeft);
1996
+ }
1997
+ else {
1998
+ gapOffset = Math.round(sliderInfo.cut.left - sliderInfo.bg.left);
1999
+ }
1609
2000
  }
1610
- else {
1611
- gapOffset = Math.round(sliderInfo.cut.left - sliderInfo.bg.left);
1612
- results.push(`Gap position (rect): ${gapOffset}px from puzzle left edge`);
2001
+ if (gapOffset === null && sliderInfo.cut?.transform && sliderInfo.cut.transform !== 'none') {
2002
+ const match = sliderInfo.cut.transform.match(/matrix\(.*?,\s*([\d.]+)/);
2003
+ if (match)
2004
+ gapOffset = Math.round(parseFloat(match[1]));
1613
2005
  }
1614
- }
1615
- if (gapOffset === null && sliderInfo.cut?.transform && sliderInfo.cut.transform !== 'none') {
1616
- const match = sliderInfo.cut.transform.match(/matrix\(.*?,\s*([\d.]+)/);
1617
- if (match) {
1618
- gapOffset = Math.round(parseFloat(match[1]));
1619
- results.push(`Gap position (transform): ${gapOffset}px from puzzle left edge`);
2006
+ if (gapOffset === null) {
2007
+ results.push('DOM gap detection failed, using vision model...');
2008
+ try {
2009
+ const ssBase64 = readFileBase64(screenshotPath);
2010
+ const visionResp = await visionClassify(ssBase64, 'This is a slider puzzle captcha. There is a gap/hole in the background image where a puzzle piece needs to go. Estimate the horizontal pixel position of the CENTER of the gap, measured from the LEFT edge of the puzzle image. Reply with ONLY the number (e.g. "145").');
2011
+ const parsed = parseInt(visionResp.replace(/[^\d]/g, ''));
2012
+ if (!isNaN(parsed) && parsed > 10 && parsed < 500) {
2013
+ gapOffset = parsed;
2014
+ results.push(`Vision model: gap at ~${gapOffset}px`);
2015
+ }
2016
+ else {
2017
+ results.push(`Vision model returned: "${visionResp}" — could not parse gap position`);
2018
+ }
2019
+ }
2020
+ catch (e) {
2021
+ results.push(`Vision model failed: ${e.message}`);
2022
+ }
2023
+ }
2024
+ if (gapOffset === null) {
2025
+ results.push('[WARN] Could not determine gap position. Use "slider-analyze" for manual analysis, then "drag-to" to slide.');
2026
+ break;
1620
2027
  }
1621
- }
1622
- if (sliderInfo.slider)
1623
- results.push(`Slider handle: x=${Math.round(sliderInfo.slider.left)}, width=${Math.round(sliderInfo.slider.width)}`);
1624
- if (sliderInfo.track)
1625
- results.push(`Track width: ${Math.round(sliderInfo.track.width)}px`);
1626
- if (gapOffset !== null) {
1627
2028
  const pieceHalf = Math.round((sliderInfo.piece?.width || 44) / 2);
1628
2029
  const adjusted = gapOffset - pieceHalf;
1629
- results.push('');
1630
- results.push(`[OK] RECOMMENDED: drag-to target=".geetest_slider_button" value="${adjusted},0"`);
1631
- results.push(`(gap ${gapOffset}px - half piece ${pieceHalf}px = ${adjusted}px drag distance)`);
1632
- }
1633
- else {
1634
- results.push('');
1635
- results.push('[WARN] Could not auto-detect gap. Look at the puzzle screenshot, find the gap/hole, and estimate the pixel offset.');
1636
- results.push('Then: drag-to target=".geetest_slider_button" value="<estimated_px>,0"');
2030
+ results.push(`Gap: ${gapOffset}px, piece half: ${pieceHalf}px, drag distance: ${adjusted}px`);
2031
+ if (sliderInfo.slider) {
2032
+ try {
2033
+ const startX = sliderInfo.slider.centerX;
2034
+ const startY = sliderInfo.slider.centerY;
2035
+ const endX = startX + adjusted;
2036
+ await humanMove(startX, startY, p);
2037
+ await p.waitForTimeout(150 + Math.random() * 250);
2038
+ await p.mouse.down();
2039
+ await p.waitForTimeout(200 + Math.random() * 300);
2040
+ const steps = 25 + Math.floor(Math.random() * 20);
2041
+ for (let i = 1; i <= steps; i++) {
2042
+ const progress = i / steps;
2043
+ const eased = progress < 0.5 ? 2 * progress * progress : 1 - Math.pow(-2 * progress + 2, 2) / 2;
2044
+ const x = startX + adjusted * eased + (Math.random() - 0.5) * 2;
2045
+ const y = startY + (Math.random() - 0.5) * 2;
2046
+ await p.mouse.move(x, y);
2047
+ await p.waitForTimeout(10 + Math.random() * 20);
2048
+ }
2049
+ await p.mouse.move(endX, startY);
2050
+ await p.waitForTimeout(150);
2051
+ await p.mouse.up();
2052
+ await p.waitForTimeout(2000);
2053
+ results.push('Slider dragged, checking result...');
2054
+ const successEl = await targetFrame.locator('.geetest_success, .geetest_tip_success, [class*="success"], [class*="verified"]').count();
2055
+ if (successEl > 0) {
2056
+ results.push('[OK] Slider captcha solved!');
2057
+ break;
2058
+ }
2059
+ const failEl = await targetFrame.locator('.geetest_fail, .geetest_tip_fail, [class*="fail"], [class*="error"], [class*="retry"]').count();
2060
+ if (failEl > 0) {
2061
+ results.push('Slider attempt failed, retrying...');
2062
+ const refreshBtn = targetFrame.locator('.geetest_refresh, [class*="refresh"], [class*="retry"]').first();
2063
+ if (await refreshBtn.count() > 0)
2064
+ await refreshBtn.click().catch(() => { });
2065
+ await p.waitForTimeout(1500);
2066
+ try {
2067
+ if (await puzzleEl.count() > 0)
2068
+ await puzzleEl.screenshot({ path: screenshotPath });
2069
+ }
2070
+ catch { }
2071
+ continue;
2072
+ }
2073
+ results.push('[OK] Slider dragged — outcome unconfirmed, check page state.');
2074
+ break;
2075
+ }
2076
+ catch (e) {
2077
+ results.push(`Drag failed: ${e.message}`);
2078
+ break;
2079
+ }
2080
+ }
2081
+ else {
2082
+ results.push('[WARN] Slider handle not found in DOM.');
2083
+ break;
2084
+ }
1637
2085
  }
1638
2086
  }
1639
2087
  else {
1640
2088
  results.push('Type: IMAGE challenge');
1641
- const gridResult = await analyzeImageChallenge(p, targetFrame, captchaType);
2089
+ const gridResult = await solveCaptchaGrid(p, targetFrame, captchaType);
1642
2090
  results.push(gridResult);
1643
2091
  }
1644
2092
  }
@@ -1649,11 +2097,32 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1649
2097
  const screenshotPath = join(homedir(), '.aurix-captcha-challenge.png');
1650
2098
  await imgCaptcha.first().screenshot({ path: screenshotPath });
1651
2099
  results.push(`Captcha image saved: ${screenshotPath}`);
1652
- results.push('Read the text from the screenshot and use "fill" to type it into the captcha input field.');
1653
- const input = p.locator('input[name*="captcha"], input[id*="captcha"], input[placeholder*="captcha" i], input[placeholder*="code" i]');
1654
- if (await input.count() > 0) {
1655
- const name = await input.first().getAttribute('name') || await input.first().getAttribute('id') || 'captcha input';
1656
- results.push(`Captcha input field found: ${name}`);
2100
+ try {
2101
+ const ssBase64 = readFileBase64(screenshotPath);
2102
+ const visionResp = await visionClassify(ssBase64, 'Read the text/numbers in this captcha image. Reply with ONLY the exact text shown, nothing else.');
2103
+ const captchaText = visionResp.replace(/[^a-zA-Z0-9]/g, '').trim();
2104
+ if (captchaText.length >= 2) {
2105
+ const input = p.locator('input[name*="captcha"], input[id*="captcha"], input[placeholder*="captcha" i], input[placeholder*="code" i]');
2106
+ if (await input.count() > 0) {
2107
+ await input.first().click();
2108
+ await input.first().fill('');
2109
+ for (const char of captchaText) {
2110
+ await input.first().type(char, { delay: 80 + Math.random() * 120 });
2111
+ }
2112
+ results.push(`[OK] Auto-filled captcha text: "${captchaText}"`);
2113
+ }
2114
+ else {
2115
+ results.push(`Vision model read: "${captchaText}" — but no captcha input field found. Use "fill" to type it manually.`);
2116
+ }
2117
+ }
2118
+ else {
2119
+ results.push(`Vision model returned: "${visionResp}" — could not read captcha text`);
2120
+ results.push('Read the screenshot and use "fill" to type the captcha text manually.');
2121
+ }
2122
+ }
2123
+ catch (e) {
2124
+ results.push(`Vision auto-fill failed: ${e.message}`);
2125
+ results.push('Read the captcha screenshot and use "fill" to type it manually.');
1657
2126
  }
1658
2127
  }
1659
2128
  else {
@@ -1712,7 +2181,8 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1712
2181
  }
1713
2182
  case 'click-tile': {
1714
2183
  const p = await ensureBrowser();
1715
- const tileIndex = parseInt(value || target || '0');
2184
+ const rawValue = (value || target || '0').toString();
2185
+ const tileIndices = rawValue.split(',').map(s => parseInt(s.trim())).filter(n => !isNaN(n));
1716
2186
  const frames = p.frames();
1717
2187
  let challengeFrame = null;
1718
2188
  let provider = 'unknown';
@@ -1736,59 +2206,111 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1736
2206
  }
1737
2207
  if (!challengeFrame)
1738
2208
  challengeFrame = p;
1739
- const tiles = await findGridTiles(challengeFrame, provider);
1740
- if (tiles.length === 0)
2209
+ const initialTiles = await findGridTiles(challengeFrame, provider);
2210
+ if (initialTiles.length === 0)
1741
2211
  return err('No grid tiles found', 'Use "captcha-grid" to scan the challenge first');
1742
- if (tileIndex < 0 || tileIndex >= tiles.length)
1743
- return err(`Tile index ${tileIndex} out of range (0-${tiles.length - 1})`);
1744
- try {
1745
- const tile = tiles[tileIndex];
1746
- const isRecaptcha = provider === 'recaptcha';
1747
- const selectedClass = isRecaptcha
1748
- ? '.rc-imageselect-tileselected, .rc-imageselect-dynamic-selected, .rc-imageselect-tile.rc-imageselect-tileselected'
1749
- : '.task-image.selected, .task .selected';
1750
- const selectedBefore = await challengeFrame.locator(selectedClass).count().catch(() => 0);
1751
- const tileBox = await tile.boundingBox();
1752
- if (tileBox) {
1753
- const clickX = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
1754
- const clickY = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
1755
- await humanMove(clickX, clickY, p);
1756
- await p.waitForTimeout(80 + Math.random() * 120);
1757
- await p.mouse.down();
1758
- await p.waitForTimeout(60 + Math.random() * 100);
1759
- await p.mouse.up();
2212
+ for (const idx of tileIndices) {
2213
+ if (idx < 0 || idx >= initialTiles.length)
2214
+ return err(`Tile index ${idx} out of range (0-${initialTiles.length - 1})`);
2215
+ }
2216
+ const isRecaptcha = provider === 'recaptcha';
2217
+ const selectedClass = isRecaptcha
2218
+ ? '.rc-imageselect-tileselected, .rc-imageselect-dynamic-selected, .rc-imageselect-tile.rc-imageselect-tileselected'
2219
+ : '.task-image.selected, .task .selected';
2220
+ let instruction = '';
2221
+ if (isRecaptcha) {
2222
+ try {
2223
+ const instrEl = challengeFrame.locator('.rc-imageselect-instructions, .prompt-text, .prompt-text-h');
2224
+ if (await instrEl.count() > 0)
2225
+ instruction = (await instrEl.first().textContent() || '').trim();
2226
+ if (!instruction) {
2227
+ const st = challengeFrame.locator('strong').first();
2228
+ if (await st.count() > 0)
2229
+ instruction = (await st.textContent() || '').trim();
2230
+ }
1760
2231
  }
1761
- else {
1762
- await tile.click({ force: true });
1763
- }
1764
- await p.waitForTimeout(500 + Math.random() * 400);
1765
- const selectedCount = await challengeFrame.locator(selectedClass).count().catch(() => 0);
1766
- const selectionChanged = selectedCount !== selectedBefore;
1767
- const clickStatus = selectionChanged
1768
- ? `selection changed (${selectedBefore} → ${selectedCount})`
1769
- : `selection unchanged (${selectedCount}) click may not have registered, or this tile toggled off`;
1770
- if (isRecaptcha) {
1771
- await p.waitForTimeout(1500 + Math.random() * 1000);
1772
- const newTiles = await findGridTiles(challengeFrame, provider);
1773
- const screenshotPath = join(homedir(), `.aurix-tile-after-${tileIndex}.png`);
1774
- await challengeFrame.locator('.rc-imageselect-table-33, .rc-imageselect-table-44, table').first().screenshot({ path: screenshotPath }).catch(() => p.screenshot({ path: screenshotPath }));
1775
- return ok(`Clicked tile ${tileIndex}`, {
1776
- selection: clickStatus,
1777
- 'new tile': 'appeared check screenshot and evaluate',
1778
- screenshot: screenshotPath,
1779
- next: 'Use "click-tile" for next matching tile, or "captcha-verify" when done',
1780
- });
2232
+ catch { }
2233
+ }
2234
+ const results = [];
2235
+ results.push(`Clicking ${tileIndices.length} tile(s): [${tileIndices.join(', ')}]`);
2236
+ for (const tileIndex of tileIndices) {
2237
+ try {
2238
+ const currentTiles = await findGridTiles(challengeFrame, provider);
2239
+ if (tileIndex >= currentTiles.length) {
2240
+ results.push(` Tile ${tileIndex}: out of range (${currentTiles.length} tiles now), skipping`);
2241
+ continue;
2242
+ }
2243
+ const tile = currentTiles[tileIndex];
2244
+ const selectedBefore = await challengeFrame.locator(selectedClass).count().catch(() => 0);
2245
+ const tileBox = await tile.boundingBox();
2246
+ if (tileBox) {
2247
+ const clickX = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
2248
+ const clickY = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
2249
+ await humanMove(clickX, clickY, p);
2250
+ await p.waitForTimeout(80 + Math.random() * 120);
2251
+ await p.mouse.down();
2252
+ await p.waitForTimeout(60 + Math.random() * 100);
2253
+ await p.mouse.up();
2254
+ }
2255
+ else {
2256
+ await tile.click({ force: true });
2257
+ }
2258
+ await p.waitForTimeout(500 + Math.random() * 400);
2259
+ const selectedCount = await challengeFrame.locator(selectedClass).count().catch(() => 0);
2260
+ const clickStatus = selectedCount !== selectedBefore
2261
+ ? `selected (${selectedBefore} → ${selectedCount})`
2262
+ : `unchanged (${selectedCount})`;
2263
+ results.push(` Tile ${tileIndex}: ${clickStatus}`);
2264
+ if (isRecaptcha) {
2265
+ await p.waitForTimeout(1500 + Math.random() * 1000);
2266
+ const newTiles = await findGridTiles(challengeFrame, provider);
2267
+ const afterPath = join(homedir(), `.aurix-tile-after-${tileIndex}.png`);
2268
+ try {
2269
+ await challengeFrame.locator('.rc-imageselect-table-33, .rc-imageselect-table-44, table').first().screenshot({ path: afterPath }).catch(() => p.screenshot({ path: afterPath }));
2270
+ }
2271
+ catch { }
2272
+ if (tileIndex < newTiles.length && instruction) {
2273
+ try {
2274
+ await newTiles[tileIndex].screenshot({ path: afterPath });
2275
+ const newBase64 = readFileBase64(afterPath);
2276
+ const newResp = await visionClassify(newBase64, `Does this image contain ${instruction}? Reply YES or NO only.`);
2277
+ if (newResp.toLowerCase().includes('yes')) {
2278
+ const newTile = newTiles[tileIndex];
2279
+ const newBox = await newTile.boundingBox();
2280
+ if (newBox) {
2281
+ const nx = newBox.x + newBox.width * (0.3 + Math.random() * 0.4);
2282
+ const ny = newBox.y + newBox.height * (0.3 + Math.random() * 0.4);
2283
+ await humanMove(nx, ny, p);
2284
+ await p.waitForTimeout(80 + Math.random() * 120);
2285
+ await p.mouse.down();
2286
+ await p.waitForTimeout(60 + Math.random() * 100);
2287
+ await p.mouse.up();
2288
+ results.push(` → Replacement tile ${tileIndex} also matched, clicked`);
2289
+ await p.waitForTimeout(1500 + Math.random() * 1000);
2290
+ }
2291
+ }
2292
+ else {
2293
+ results.push(` → Replacement tile ${tileIndex} doesn't match`);
2294
+ }
2295
+ }
2296
+ catch { }
2297
+ }
2298
+ }
2299
+ }
2300
+ catch (e) {
2301
+ results.push(` Tile ${tileIndex}: FAILED — ${e.message}`);
1781
2302
  }
1782
- const ss = await autoScreenshot(p, 'click-tile');
1783
- return ok(`Clicked tile ${tileIndex}`, {
1784
- selection: clickStatus,
1785
- screenshot: ss,
1786
- next: 'Continue clicking matching tiles, then use "captcha-verify"',
1787
- });
1788
2303
  }
1789
- catch (e) {
1790
- return err(`Failed to click tile ${tileIndex}: ${e.message}`, 'Use "captcha-grid" to re-scan the challenge');
2304
+ if (isRecaptcha) {
2305
+ results.push('');
2306
+ results.push('Use "captcha-verify" when all matching tiles are clicked, or "captcha-grid" to re-analyze.');
1791
2307
  }
2308
+ else {
2309
+ const ss = await autoScreenshot(p, 'click-tile');
2310
+ results.push(`Screenshot: ${ss}`);
2311
+ results.push('Continue clicking matching tiles, then use "captcha-verify"');
2312
+ }
2313
+ return results.join('\n');
1792
2314
  }
1793
2315
  case 'captcha-verify': {
1794
2316
  const p = await ensureBrowser();
@@ -1815,6 +2337,18 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1815
2337
  }
1816
2338
  if (!challengeFrame)
1817
2339
  challengeFrame = p;
2340
+ const timeSinceAnalyze = _lastGridAnalyzeTime > 0 ? Date.now() - _lastGridAnalyzeTime : 0;
2341
+ if (timeSinceAnalyze > 90_000 && _lastGridAnalyzeTime > 0) {
2342
+ const results = [];
2343
+ results.push(`[WARN] Grid was analyzed ${Math.round(timeSinceAnalyze / 1000)}s ago — challenge likely refreshed.`);
2344
+ results.push('Re-analyzing before verify...');
2345
+ try {
2346
+ const reAnalyze = await analyzeImageChallenge(p, challengeFrame, provider);
2347
+ results.push(reAnalyze);
2348
+ }
2349
+ catch { }
2350
+ return results.join('\n');
2351
+ }
1818
2352
  try {
1819
2353
  let verifyBtn = challengeFrame.locator('#recaptcha-verify-button, .rc-button-submit, .button-submit, [id*="verify"]');
1820
2354
  if (await verifyBtn.count() === 0) {
@@ -1830,25 +2364,71 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1830
2364
  if (errorText > 0) {
1831
2365
  const errorMsg = await challengeFrame.locator('.rc-imageselect-incorrect-response, .error-message').first().textContent().catch(() => 'Incorrect answer');
1832
2366
  await p.screenshot({ path: screenshotPath });
1833
- return err(`Verification failed: "${errorMsg}"`, `Challenge refreshed. Use "captcha-grid" to re-analyze, then click matching tiles again. Screenshot: ${screenshotPath}`);
2367
+ const results = [];
2368
+ results.push(`Verification failed: "${errorMsg}". Auto-retrying...`);
2369
+ const maxRetries = 3;
2370
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
2371
+ results.push(`\nRetry ${attempt + 1}/${maxRetries}...`);
2372
+ await p.waitForTimeout(2000);
2373
+ const currentFrames = p.frames();
2374
+ const retryFrame = currentFrames.find((f) => {
2375
+ const u = f.url();
2376
+ return (u.includes('/recaptcha/') && u.includes('/bframe')) ||
2377
+ (u.includes('hcaptcha') && u.includes('challenge'));
2378
+ });
2379
+ if (!retryFrame) {
2380
+ results.push('Challenge frame gone — captcha may be solved');
2381
+ await p.screenshot({ path: screenshotPath });
2382
+ return results.join('\n');
2383
+ }
2384
+ const retryProvider = retryFrame.url().includes('hcaptcha') ? 'hcaptcha' : 'recaptcha';
2385
+ const solveResult = await solveCaptchaGrid(p, retryFrame, retryProvider);
2386
+ results.push(solveResult);
2387
+ if (solveResult.includes('Captcha solved!')) {
2388
+ return results.join('\n');
2389
+ }
2390
+ }
2391
+ results.push(`\nAuto-retry exhausted after ${maxRetries} attempts. Use "captcha-grid" and "click-tile" for manual solving.`);
2392
+ return results.join('\n');
1834
2393
  }
1835
2394
  const newChallenge = await challengeFrame.locator('.rc-imageselect-instructions, .prompt-text').count();
1836
2395
  if (newChallenge > 0) {
1837
2396
  const instruction = await challengeFrame.locator('.rc-imageselect-instructions, .prompt-text').first().textContent().catch(() => '');
1838
2397
  await p.screenshot({ path: screenshotPath });
1839
- return warn(`New challenge appeared: "${instruction}"`, {
1840
- screenshot: screenshotPath,
1841
- next: 'Use "captcha-grid" to analyze and "click-tile" to solve',
1842
- });
2398
+ const results = [];
2399
+ results.push(`New challenge appeared: "${instruction}". Auto-solving...`);
2400
+ const maxRetries = 3;
2401
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
2402
+ if (attempt > 0)
2403
+ results.push(`\nRetry ${attempt}/${maxRetries - 1}...`);
2404
+ const currentFrames = p.frames();
2405
+ const retryFrame = currentFrames.find((f) => {
2406
+ const u = f.url();
2407
+ return (u.includes('/recaptcha/') && u.includes('/bframe')) ||
2408
+ (u.includes('hcaptcha') && u.includes('challenge'));
2409
+ });
2410
+ if (!retryFrame) {
2411
+ results.push('Challenge frame gone — captcha may be solved');
2412
+ return results.join('\n');
2413
+ }
2414
+ const retryProvider = retryFrame.url().includes('hcaptcha') ? 'hcaptcha' : 'recaptcha';
2415
+ const solveResult = await solveCaptchaGrid(p, retryFrame, retryProvider);
2416
+ results.push(solveResult);
2417
+ if (solveResult.includes('Captcha solved!'))
2418
+ return results.join('\n');
2419
+ await p.waitForTimeout(2000);
2420
+ }
2421
+ results.push(`\nAuto-solve exhausted. Use "captcha-grid" and "click-tile" manually.`);
2422
+ return results.join('\n');
1843
2423
  }
1844
2424
  await p.screenshot({ path: screenshotPath });
1845
2425
  return ok('Verification submitted', {
1846
2426
  screenshot: screenshotPath,
1847
- note: 'Check if the form/page progressed. If verification widget reappears, use "captcha-grid" again.',
2427
+ note: 'Check if the form/page progressed. If verification widget reappears, use "solve-captcha" again.',
1848
2428
  });
1849
2429
  }
1850
2430
  catch (e) {
1851
- return err(`Verify failed: ${e.message}`, 'Use "captcha-grid" to re-scan and retry');
2431
+ return err(`Verify failed: ${e.message}`, 'Use "solve-captcha" to retry automatically');
1852
2432
  }
1853
2433
  }
1854
2434
  case 'slider-analyze': {
@@ -1940,19 +2520,63 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1940
2520
  const pieceHalfWidth = Math.round((sliderInfo.piece.width || 44) / 2);
1941
2521
  const adjustedOffset = gapOffset - pieceHalfWidth;
1942
2522
  results.push('');
1943
- results.push(`[OK] RECOMMENDED OFFSET: drag-to value="${adjustedOffset},0"`);
1944
- results.push(`(gap at ${gapOffset}px minus half piece width ${pieceHalfWidth}px = ${adjustedOffset}px)`);
2523
+ results.push(`[OK] Gap at ${gapOffset}px, piece half ${pieceHalfWidth}px, drag distance ${adjustedOffset}px`);
2524
+ gapOffset = adjustedOffset;
1945
2525
  }
1946
2526
  else if (gapOffset !== null) {
1947
2527
  results.push('');
1948
- results.push(`[OK] RECOMMENDED OFFSET: drag-to value="${gapOffset},0"`);
2528
+ results.push(`[OK] Gap at ${gapOffset}px`);
1949
2529
  }
1950
2530
  else {
1951
2531
  results.push('');
1952
- results.push('[WARN] Could not auto-detect gap position from DOM.');
1953
- results.push('Look at the puzzle screenshot to find where the gap/hole is.');
1954
- results.push('Estimate the pixel distance from the LEFT edge of the puzzle to the CENTER of the gap.');
1955
- results.push('Then use: drag-to target=".geetest_slider_button" value="<estimated_px>,0"');
2532
+ results.push('DOM gap detection failed, trying vision model...');
2533
+ try {
2534
+ const ssBase64 = readFileBase64(screenshotPath);
2535
+ const visionResp = await visionClassify(ssBase64, 'This is a slider puzzle captcha. There is a gap/hole in the background image where a puzzle piece needs to go. Estimate the horizontal pixel position of the CENTER of the gap, measured from the LEFT edge of the puzzle image. Reply with ONLY the number (e.g. "145").');
2536
+ const parsed = parseInt(visionResp.replace(/[^\d]/g, ''));
2537
+ if (!isNaN(parsed) && parsed > 10 && parsed < 500) {
2538
+ gapOffset = parsed;
2539
+ results.push(`Vision model: gap at ~${gapOffset}px`);
2540
+ }
2541
+ else {
2542
+ results.push(`Vision model returned: "${visionResp}" — could not parse`);
2543
+ }
2544
+ }
2545
+ catch (e) {
2546
+ results.push(`Vision model failed: ${e.message}`);
2547
+ }
2548
+ }
2549
+ if (gapOffset !== null && sliderInfo.slider) {
2550
+ results.push('Auto-dragging slider...');
2551
+ try {
2552
+ const startX = sliderInfo.slider.centerX || (sliderInfo.slider.left + sliderInfo.slider.width / 2);
2553
+ const startY = sliderInfo.slider.centerY || (sliderInfo.slider.top + sliderInfo.slider.height / 2);
2554
+ const endX = startX + gapOffset;
2555
+ await humanMove(startX, startY, p);
2556
+ await p.waitForTimeout(150 + Math.random() * 250);
2557
+ await p.mouse.down();
2558
+ await p.waitForTimeout(200 + Math.random() * 300);
2559
+ const steps = 25 + Math.floor(Math.random() * 20);
2560
+ for (let i = 1; i <= steps; i++) {
2561
+ const progress = i / steps;
2562
+ const eased = progress < 0.5 ? 2 * progress * progress : 1 - Math.pow(-2 * progress + 2, 2) / 2;
2563
+ const x = startX + gapOffset * eased + (Math.random() - 0.5) * 2;
2564
+ const y = startY + (Math.random() - 0.5) * 2;
2565
+ await p.mouse.move(x, y);
2566
+ await p.waitForTimeout(10 + Math.random() * 20);
2567
+ }
2568
+ await p.mouse.move(endX, startY);
2569
+ await p.waitForTimeout(150);
2570
+ await p.mouse.up();
2571
+ await p.waitForTimeout(2000);
2572
+ results.push('[OK] Slider auto-dragged. Check page state to confirm.');
2573
+ }
2574
+ catch (e) {
2575
+ results.push(`Auto-drag failed: ${e.message}. Use: drag-to target=".geetest_slider_button" value="${gapOffset},0"`);
2576
+ }
2577
+ }
2578
+ else if (gapOffset === null) {
2579
+ results.push('Could not determine gap position. Use "drag-to" manually with estimated offset.');
1956
2580
  }
1957
2581
  if (sliderInfo.allGeeTestClasses?.length > 0) {
1958
2582
  results.push('');
@@ -2007,7 +2631,13 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
2007
2631
  results.push(` ✓ ${label}: already filled`);
2008
2632
  return true;
2009
2633
  }
2010
- await loc.fill(val, { timeout: 3000 });
2634
+ try {
2635
+ await loc.fill(val, { timeout: 3000 });
2636
+ }
2637
+ catch {
2638
+ await loc.click({ timeout: 3000 });
2639
+ await loc.pressSequentially(val, { delay: 30, timeout: 10000 });
2640
+ }
2011
2641
  results.push(` ✓ ${label}: filled`);
2012
2642
  return true;
2013
2643
  }
@@ -2290,7 +2920,13 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
2290
2920
  results.push(` ✓ ${label}: already filled`);
2291
2921
  return true;
2292
2922
  }
2293
- await loc.fill(val, { timeout: 3000 });
2923
+ try {
2924
+ await loc.fill(val, { timeout: 3000 });
2925
+ }
2926
+ catch {
2927
+ await loc.click({ timeout: 3000 });
2928
+ await loc.pressSequentially(val, { delay: 30, timeout: 10000 });
2929
+ }
2294
2930
  results.push(` ✓ ${label}: filled`);
2295
2931
  return true;
2296
2932
  }