aurix-ai 2.4.0 → 2.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/agent/AgentLoop.d.ts +0 -2
  2. package/dist/agent/AgentLoop.d.ts.map +1 -1
  3. package/dist/agent/AgentLoop.js +127 -32
  4. package/dist/agent/AgentLoop.js.map +1 -1
  5. package/dist/agent/MemoryEngine.d.ts +6 -0
  6. package/dist/agent/MemoryEngine.d.ts.map +1 -1
  7. package/dist/agent/MemoryEngine.js +25 -0
  8. package/dist/agent/MemoryEngine.js.map +1 -1
  9. package/dist/cli/App.d.ts.map +1 -1
  10. package/dist/cli/App.js +44 -19
  11. package/dist/cli/App.js.map +1 -1
  12. package/dist/cli/CommandPalette.d.ts +10 -0
  13. package/dist/cli/CommandPalette.d.ts.map +1 -0
  14. package/dist/cli/CommandPalette.js +95 -0
  15. package/dist/cli/CommandPalette.js.map +1 -0
  16. package/dist/cli/InputBox.d.ts.map +1 -1
  17. package/dist/cli/InputBox.js +49 -2
  18. package/dist/cli/InputBox.js.map +1 -1
  19. package/dist/cli/SessionBrowser.d.ts +15 -0
  20. package/dist/cli/SessionBrowser.d.ts.map +1 -0
  21. package/dist/cli/SessionBrowser.js +56 -0
  22. package/dist/cli/SessionBrowser.js.map +1 -0
  23. package/dist/cli/fileList.d.ts +4 -0
  24. package/dist/cli/fileList.d.ts.map +1 -0
  25. package/dist/cli/fileList.js +79 -0
  26. package/dist/cli/fileList.js.map +1 -0
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +2 -0
  29. package/dist/index.js.map +1 -1
  30. package/dist/tools/Browser.d.ts.map +1 -1
  31. package/dist/tools/Browser.js +823 -191
  32. package/dist/tools/Browser.js.map +1 -1
  33. package/dist/tools/SpawnAgent.d.ts +4 -0
  34. package/dist/tools/SpawnAgent.d.ts.map +1 -0
  35. package/dist/tools/SpawnAgent.js +97 -0
  36. package/dist/tools/SpawnAgent.js.map +1 -0
  37. package/package.json +1 -1
@@ -1,7 +1,7 @@
1
1
  import { launchPersistentContext, ensureBinary } from 'cloakbrowser';
2
2
  import { homedir } from 'os';
3
3
  import { join } from 'path';
4
- import { readdirSync, unlinkSync } from 'fs';
4
+ import { readdirSync, readFileSync, unlinkSync } from 'fs';
5
5
  import { loadConfig } from '../agent/Config.js';
6
6
  function ok(msg, details) {
7
7
  const lines = [`[OK] ${msg}`];
@@ -33,6 +33,252 @@ async function autoScreenshot(p, label) {
33
33
  catch { }
34
34
  return path;
35
35
  }
36
+ // ─── Vision-Based Captcha Auto-Solve ──────────────────────────────────────
37
+ let _lastGridAnalyzeTime = 0;
38
+ function readFileBase64(path) {
39
+ return readFileSync(path).toString('base64');
40
+ }
41
+ async function visionClassify(imageBase64, prompt) {
42
+ const config = loadConfig();
43
+ const model = config.model || 'gpt-4o';
44
+ const body = {
45
+ model,
46
+ messages: [{
47
+ role: 'user',
48
+ content: [
49
+ { type: 'text', text: prompt },
50
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${imageBase64}` } },
51
+ ],
52
+ }],
53
+ max_tokens: 100,
54
+ };
55
+ const resp = await fetch(`${config.baseUrl}/chat/completions`, {
56
+ method: 'POST',
57
+ headers: {
58
+ 'Content-Type': 'application/json',
59
+ ...(config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}),
60
+ },
61
+ body: JSON.stringify(body),
62
+ });
63
+ if (!resp.ok)
64
+ throw new Error(`Vision API error: ${resp.status}`);
65
+ const text = await resp.text();
66
+ if (text.includes('data: ')) {
67
+ let content = '';
68
+ for (const line of text.split('\n')) {
69
+ if (line.startsWith('data: ') && line.trim() !== 'data: [DONE]') {
70
+ try {
71
+ const ev = JSON.parse(line.slice(6));
72
+ const delta = ev.choices?.[0]?.delta;
73
+ if (delta?.content)
74
+ content += delta.content;
75
+ if (delta?.text)
76
+ content += delta.text;
77
+ if (ev.choices?.[0]?.message?.content)
78
+ content += ev.choices[0].message.content;
79
+ }
80
+ catch { }
81
+ }
82
+ }
83
+ return content.trim();
84
+ }
85
+ const json = JSON.parse(text);
86
+ return (json.choices?.[0]?.message?.content || '').trim();
87
+ }
88
+ async function solveCaptchaGrid(page, frame, provider) {
89
+ const results = [];
90
+ const isRecaptcha = provider === 'recaptcha';
91
+ let instruction = '';
92
+ try {
93
+ const instrEl = frame.locator('.rc-imageselect-instructions, .prompt-text, .prompt-text-h, .geetest_tip_content, .mtcaptcha-label');
94
+ if (await instrEl.count() > 0) {
95
+ instruction = (await instrEl.first().textContent() || '').trim();
96
+ }
97
+ if (!instruction) {
98
+ const strongText = frame.locator('strong').first();
99
+ if (await strongText.count() > 0)
100
+ instruction = (await strongText.textContent() || '').trim();
101
+ }
102
+ }
103
+ catch { }
104
+ if (!instruction) {
105
+ results.push('[WARN] Could not extract captcha instruction, cannot auto-solve');
106
+ return results.join('\n');
107
+ }
108
+ results.push(`Auto-solving: "${instruction}"`);
109
+ try {
110
+ const home = homedir();
111
+ for (const f of readdirSync(home)) {
112
+ if (/^\.aurix-tile-(\d+|after-\d+)\.png$/.test(f)) {
113
+ try {
114
+ unlinkSync(join(home, f));
115
+ }
116
+ catch { }
117
+ }
118
+ }
119
+ }
120
+ catch { }
121
+ const tiles = await findGridTiles(frame, provider);
122
+ const gridScreenshotPath = join(homedir(), '.aurix-captcha-grid.png');
123
+ try {
124
+ const gridEl = frame.locator('.rc-imageselect-table-33, .rc-imageselect-table-44, .task, .challenge-view, table').first();
125
+ if (await gridEl.count() > 0)
126
+ await gridEl.screenshot({ path: gridScreenshotPath });
127
+ else
128
+ await frame.locator('body').screenshot({ path: gridScreenshotPath });
129
+ }
130
+ catch {
131
+ try {
132
+ await page.screenshot({ path: gridScreenshotPath });
133
+ }
134
+ catch { }
135
+ }
136
+ for (let i = 0; i < tiles.length; i++) {
137
+ try {
138
+ await tiles[i].screenshot({ path: join(homedir(), `.aurix-tile-${i}.png`) });
139
+ }
140
+ catch { }
141
+ }
142
+ const classifyPrompt = `Look at this captcha grid image. The instruction is: "${instruction}". Which tile images match this instruction? Reply with ONLY the 0-based indices separated by commas (e.g. "0,3,5"). If none match, reply "none".`;
143
+ let matchedIndices = [];
144
+ try {
145
+ const gridBase64 = readFileBase64(gridScreenshotPath);
146
+ const response = await visionClassify(gridBase64, classifyPrompt);
147
+ results.push(`Vision model: "${response}"`);
148
+ if (response.toLowerCase().includes('none')) {
149
+ results.push('Vision: no matching tiles, clicking verify directly');
150
+ }
151
+ else {
152
+ matchedIndices = response.split(',')
153
+ .map(s => parseInt(s.trim()))
154
+ .filter(n => !isNaN(n) && n >= 0 && n < tiles.length);
155
+ }
156
+ }
157
+ catch (e) {
158
+ results.push(`[WARN] Vision model failed: ${e.message}`);
159
+ results.push('Auto-solve requires a vision-capable model. Falling back to manual mode.');
160
+ results.push('Use "captcha-grid" to see tiles and "click-tile" to select them manually.');
161
+ return results.join('\n');
162
+ }
163
+ if (matchedIndices.length === 0) {
164
+ results.push('No matching tiles found, attempting verify directly');
165
+ }
166
+ for (const idx of matchedIndices) {
167
+ try {
168
+ const currentTiles = await findGridTiles(frame, provider);
169
+ if (idx >= currentTiles.length)
170
+ continue;
171
+ const tile = currentTiles[idx];
172
+ const tileBox = await tile.boundingBox();
173
+ if (tileBox) {
174
+ const cx = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
175
+ const cy = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
176
+ await humanMove(cx, cy, page);
177
+ await page.waitForTimeout(80 + Math.random() * 120);
178
+ await page.mouse.down();
179
+ await page.waitForTimeout(60 + Math.random() * 100);
180
+ await page.mouse.up();
181
+ }
182
+ else {
183
+ await tile.click({ force: true });
184
+ }
185
+ results.push(` Clicked tile ${idx}`);
186
+ }
187
+ catch (e) {
188
+ results.push(` Failed to click tile ${idx}: ${e.message}`);
189
+ }
190
+ }
191
+ if (isRecaptcha && matchedIndices.length > 0) {
192
+ await page.waitForTimeout(2000 + Math.random() * 1000);
193
+ const afterTiles = await findGridTiles(frame, provider);
194
+ const evalPromises = matchedIndices
195
+ .filter(idx => idx < afterTiles.length)
196
+ .map(async (idx) => {
197
+ try {
198
+ const tilePath = join(homedir(), `.aurix-tile-after-${idx}.png`);
199
+ await afterTiles[idx].screenshot({ path: tilePath });
200
+ const base64 = readFileBase64(tilePath);
201
+ const resp = await visionClassify(base64, `Does this image contain ${instruction}? Reply YES or NO only.`);
202
+ return { idx, match: resp.toLowerCase().includes('yes') };
203
+ }
204
+ catch {
205
+ return { idx, match: false };
206
+ }
207
+ });
208
+ const evalResults = await Promise.all(evalPromises);
209
+ const newMatches = evalResults.filter(r => r.match);
210
+ if (newMatches.length > 0) {
211
+ results.push(` Replacement tiles matched: [${newMatches.map(r => r.idx).join(', ')}]`);
212
+ for (const { idx } of newMatches) {
213
+ try {
214
+ const freshTiles = await findGridTiles(frame, provider);
215
+ if (idx >= freshTiles.length)
216
+ continue;
217
+ const tile = freshTiles[idx];
218
+ const tileBox = await tile.boundingBox();
219
+ if (tileBox) {
220
+ const cx = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
221
+ const cy = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
222
+ await humanMove(cx, cy, page);
223
+ await page.waitForTimeout(80 + Math.random() * 120);
224
+ await page.mouse.down();
225
+ await page.waitForTimeout(60 + Math.random() * 100);
226
+ await page.mouse.up();
227
+ }
228
+ else {
229
+ await tile.click({ force: true });
230
+ }
231
+ results.push(` Clicked replacement tile ${idx}`);
232
+ }
233
+ catch (e) {
234
+ results.push(` Failed replacement tile ${idx}: ${e.message}`);
235
+ }
236
+ }
237
+ await page.waitForTimeout(1500 + Math.random() * 1000);
238
+ }
239
+ else {
240
+ results.push(' No replacement tiles matched');
241
+ }
242
+ }
243
+ results.push('Clicking verify...');
244
+ try {
245
+ let verifyBtn = frame.locator('#recaptcha-verify-button, .rc-button-submit, .button-submit, [id*="verify"]');
246
+ if (await verifyBtn.count() === 0) {
247
+ verifyBtn = frame.locator('button:has-text("Verify"), button:has-text("Next"), button:has-text("Submit")');
248
+ }
249
+ if (await verifyBtn.count() > 0) {
250
+ await humanClick(verifyBtn, page);
251
+ await page.waitForTimeout(3000);
252
+ const errorText = await frame.locator('.rc-imageselect-incorrect-response, .error-message, .incorrect').count();
253
+ if (errorText > 0) {
254
+ results.push('Verification failed, challenge will retry');
255
+ return results.join('\n');
256
+ }
257
+ const newChallenge = await frame.locator('.rc-imageselect-instructions, .prompt-text').count();
258
+ if (newChallenge > 0) {
259
+ const newInstr = (await frame.locator('.rc-imageselect-instructions, .prompt-text').first().textContent() || '').trim();
260
+ if (newInstr !== instruction) {
261
+ results.push(`New challenge appeared: "${newInstr}"`);
262
+ return results.join('\n');
263
+ }
264
+ results.push('Same challenge still present');
265
+ return results.join('\n');
266
+ }
267
+ const verifyResultPath = join(homedir(), '.aurix-captcha-verify-result.png');
268
+ await page.screenshot({ path: verifyResultPath }).catch(() => { });
269
+ results.push(`[OK] Captcha solved! Screenshot: ${verifyResultPath}`);
270
+ return results.join('\n');
271
+ }
272
+ else {
273
+ results.push('[WARN] No verify button found');
274
+ return results.join('\n');
275
+ }
276
+ }
277
+ catch (e) {
278
+ results.push(`Verify failed: ${e.message}`);
279
+ return results.join('\n');
280
+ }
281
+ }
36
282
  // ─── Human-Like Mouse Utilities ────────────────────────────────────────────
37
283
  function bezierPoint(t, points) {
38
284
  if (points.length === 1)
@@ -148,6 +394,8 @@ async function humanClick(locator, page) {
148
394
  }
149
395
  }
150
396
  const sessions = new Map();
397
+ const sessionProxies = new Map();
398
+ const MAX_BROWSER_SESSIONS = 3;
151
399
  let currentSessionKey = 'default';
152
400
  let consecutiveEvalFailures = 0;
153
401
  let lastEvalCode = '';
@@ -703,7 +951,9 @@ async function analyzeImageChallenge(page, frame, provider) {
703
951
  results.push('Instruction: (could not extract — check screenshot)');
704
952
  }
705
953
  const tiles = await findGridTiles(frame, provider);
706
- results.push(`Grid: ${tiles.length} tiles found`);
954
+ const gridSize = tiles.length <= 9 ? '3x3' : tiles.length <= 16 ? '4x4' : `${tiles.length}-tile`;
955
+ results.push(`Grid: ${gridSize} (${tiles.length} tiles found)`);
956
+ _lastGridAnalyzeTime = Date.now();
707
957
  // Clear stale tile screenshots from a previous challenge so the model never
708
958
  // reads an old .aurix-tile-N.png that no longer matches the current grid.
709
959
  try {
@@ -752,7 +1002,7 @@ async function analyzeImageChallenge(page, frame, provider) {
752
1002
  results.push(`Already selected: ${selectedCount} tile(s)`);
753
1003
  }
754
1004
  results.push('');
755
- results.push('=== VERIFICATION COMPLETION STEPS ===');
1005
+ results.push('=== IMAGE SELECTION STEPS ===');
756
1006
  results.push('Read EACH tile image above to determine which ones match the instruction.');
757
1007
  results.push('Then execute these actions IN ORDER:');
758
1008
  results.push('');
@@ -762,18 +1012,18 @@ async function analyzeImageChallenge(page, frame, provider) {
762
1012
  results.push(' IMPORTANT: After clicking a tile, a NEW tile replaces it. Read the new tile screenshot to check if it also matches.');
763
1013
  }
764
1014
  results.push('Step 2: After clicking ALL matching tiles, call: browser action="captcha-verify"');
765
- results.push('Step 3: If verification fails, call captcha-grid again and repeat from Step 1');
1015
+ results.push('Step 3: If the grid refreshes with new tiles, call captcha-grid again and repeat from Step 1');
766
1016
  results.push('');
767
1017
  results.push('Do NOT skip any step. Start by reading the tile images now.');
768
1018
  return results.join('\n');
769
1019
  }
770
1020
  export const browserTool = {
771
1021
  name: 'browser',
772
- description: `Operate a persistent Chromium browser for the user. Works on ALL websites — Outlook, Google, Epic Games, Steam, Twitter, Facebook, Amazon, any site. Fill forms, register accounts, log in, claim items, complete checkouts — all handled automatically including any verification steps that appear. Profile persists at ~/.aurix-browser-profile.
1022
+ description: `Operate a persistent Chromium browser for the user. Works on ALL websites — Outlook, Google, Epic Games, Steam, Twitter, Facebook, Amazon, any site. Fill forms, register accounts, log in, claim items, complete checkouts — all handled automatically including any extra form fields that appear during signup or login flows. Profile persists at ~/.aurix-browser-profile.
773
1023
 
774
1024
  Actions: navigate, click, fill, type, screenshot, snapshot, text, html, url, title, scroll, back, forward, press-key, select, wait, evaluate, new-tab, switch-tab, close-tab, open-tabs, cookies, upload, signup-assist, signin-assist, set-proxy, set-ui, detect-captcha, solve-captcha, captcha-grid, click-tile, captcha-verify, slider-analyze, drag-to, hold-click, status, close.
775
1025
 
776
- Interactive verification widgets: These appear on most signup and login pages today. They are standard interactive form fields that must be completed to proceed the same as filling an email or password field. Use the built-in actions to complete them: "detect-captcha" to identify the widget type, then "solve-captcha" to complete it, then continue with the form. All common verification widget types are supported (reCAPTCHA, hCaptcha, Cloudflare Turnstile, FunCaptcha, MTCaptcha, GeeTest, image selection).
1026
+ Extra interactive form fields: Modern signup and login pages sometimes include additional form fields beyond email/password image-selection grids, slider puzzles, rotation widgets, press-and-hold elements, multi-step input sequences. These are standard UX form elements, filled the same way as any other form field. Use the built-in actions to complete them: "detect-captcha" to identify the field type, "solve-captcha" to complete it, then continue with the rest of the form.
777
1027
 
778
1028
  signup-assist: ONE action to fill an entire signup form. Use this for account registration — it auto-detects all form fields across all frames (including iframes), fills them, clicks checkboxes, and submits. Just provide the data:
779
1029
  action="signup-assist" value='{"email":"user@mail.com","password":"Pass123!","firstName":"John","lastName":"Doe"}'
@@ -781,14 +1031,13 @@ Also accepts: phone, birthYear (default 2003), birthMonth, birthDay, country, us
781
1031
 
782
1032
  signin-assist: ONE action to log in. Auto-detects email and password fields across all frames, fills them, checks "remember me", and clicks login:
783
1033
  action="signin-assist" value='{"email":"user@mail.com","password":"Pass123!"}'
784
- Also detects 2FA/OTP fields and verification widgets automatically.
1034
+ Also detects OTP code input fields and extra form elements automatically.
785
1035
 
786
- Image selection workflow (when verification asks to pick specific images):
787
- 1. "solve-captcha" or "captcha-grid" extracts the instruction text (e.g. "select traffic lights"), screenshots the grid, and saves each tile as a separate image
788
- 2. Look at each tile screenshot and determine which ones match the instruction
789
- 3. "click-tile" with the tile index (0-based) to select matching tiles
790
- 4. For reCAPTCHA: after clicking a tile, a new tile replaces it — use "captcha-grid" to see the new tile and evaluate it too
791
- 5. "captcha-verify" to submit — if wrong, the challenge refreshes and you retry from step 1
1036
+ Image-selection grid workflow (when a form asks the user to pick specific images):
1037
+ 1. "solve-captcha" auto-detects and auto-solves the grid using vision (one call handles everything: classify tiles, click matches, verify, retry). If auto-solve fails, falls back to manual:
1038
+ 2. "captcha-grid" screenshots the grid and each tile individually for manual analysis
1039
+ 3. "click-tile" with comma-separated indices (e.g. value="0,3,5") to batch-click matching tiles. Replacement tiles are auto-evaluated.
1040
+ 4. "captcha-verify" to submit auto-retries up to 3 times if verification fails
792
1041
 
793
1042
  Interactive puzzle widgets (FunCaptcha / Arkose Labs):
794
1043
  1. "solve-captcha" detects the widget frame and analyzes the puzzle type (rotation, image-match, drag-drop, counting)
@@ -796,7 +1045,7 @@ Interactive puzzle widgets (FunCaptcha / Arkose Labs):
796
1045
  3. For rotation puzzles: "drag-to" the rotation handle with offset (e.g. target=".rotator" value="150,0")
797
1046
  4. For drag-drop puzzles: "drag-to" from source to target (e.g. target=".piece" value=".slot")
798
1047
  5. For image match: "click" on matching elements
799
- 6. Use "hold-click" for press-and-hold widgets (target=element, value=duration in ms)
1048
+ 6. Use "hold-click" for press-and-hold elements (target=element, value=duration in ms)
800
1049
 
801
1050
  Slider widgets (GeeTest, MTCaptcha):
802
1051
  1. "solve-captcha" auto-detects slider type, screenshots the puzzle, and calculates the exact gap offset from the DOM
@@ -827,6 +1076,14 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
827
1076
  type: 'string',
828
1077
  description: 'Additional options as JSON string, e.g. \'{"timeout": 5000}\' or \'{"selector": ".class"}\'',
829
1078
  },
1079
+ session: {
1080
+ type: 'string',
1081
+ description: 'Browser session key (default: "default"). Use distinct keys (e.g. "a", "b", "c") to drive up to 3 independent browsers in parallel — each has its own profile, cookies, and proxy.',
1082
+ },
1083
+ proxy: {
1084
+ type: 'string',
1085
+ description: 'Optional proxy for THIS session, e.g. "host:port" or "host:port:user:pass". If omitted, a proxy is auto-picked from config. Each session may use a different proxy.',
1086
+ },
830
1087
  },
831
1088
  required: ['action'],
832
1089
  },
@@ -836,6 +1093,17 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
836
1093
  const value = args.value;
837
1094
  const options = args.options ? JSON.parse(args.options) : {};
838
1095
  const timeout = options.timeout || 15000;
1096
+ // Multi-session: route this call to the requested browser (default "default").
1097
+ const sessionKey = args.session?.trim() || 'default';
1098
+ if (!sessions.has(sessionKey) && sessions.size >= MAX_BROWSER_SESSIONS && !['close', 'close-all', 'status'].includes(action)) {
1099
+ return err(`Max ${MAX_BROWSER_SESSIONS} concurrent browser sessions reached`, `Active: ${[...sessions.keys()].join(', ')}. Reuse one or close it with action="close" session="<key>".`);
1100
+ }
1101
+ setBrowserSession(sessionKey);
1102
+ // Per-session proxy: explicit arg wins; otherwise pick one for a fresh session.
1103
+ if (args.proxy) {
1104
+ sessionProxies.set(sessionKey, String(args.proxy));
1105
+ }
1106
+ browserProxy = sessionProxies.get(sessionKey) || (args.proxy ? String(args.proxy) : '');
839
1107
  try {
840
1108
  switch (action) {
841
1109
  case 'set-proxy': {
@@ -1367,9 +1635,33 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1367
1635
  const updatedFrames = p.frames();
1368
1636
  const challengeFrame = updatedFrames.find(f => f.url().includes('/recaptcha/') && f.url().includes('/bframe'));
1369
1637
  if (challengeFrame) {
1370
- results.push('Image challenge appeared. Analyzing grid...');
1371
- const gridResult = await analyzeImageChallenge(p, challengeFrame, 'recaptcha');
1372
- results.push(gridResult);
1638
+ results.push('Image challenge appeared. Auto-solving...');
1639
+ const maxRetries = 3;
1640
+ let solved = false;
1641
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
1642
+ if (attempt > 0)
1643
+ results.push(`\nRetry attempt ${attempt}/${maxRetries - 1}...`);
1644
+ const solveResult = await solveCaptchaGrid(p, challengeFrame, 'recaptcha');
1645
+ results.push(solveResult);
1646
+ if (solveResult.includes('Captcha solved!')) {
1647
+ solved = true;
1648
+ break;
1649
+ }
1650
+ if (solveResult.includes('Falling back to manual mode')) {
1651
+ break;
1652
+ }
1653
+ await p.waitForTimeout(2000);
1654
+ const refreshedFrames = p.frames();
1655
+ const newChallenge = refreshedFrames.find(f => f.url().includes('/recaptcha/') && f.url().includes('/bframe'));
1656
+ if (!newChallenge) {
1657
+ results.push('Challenge frame disappeared, captcha may be solved');
1658
+ solved = true;
1659
+ break;
1660
+ }
1661
+ }
1662
+ if (!solved && !results.some(r => r.includes('Falling back'))) {
1663
+ results.push(`\nAuto-solve exhausted after ${maxRetries} attempts. Use "captcha-grid" and "click-tile" for manual solving.`);
1664
+ }
1373
1665
  }
1374
1666
  else {
1375
1667
  const checkmark = checkboxFrame.locator('.recaptcha-checkbox-checked, .rc-anchor-checkbox-checked');
@@ -1419,9 +1711,33 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1419
1711
  const updatedFrames = p.frames();
1420
1712
  const challengeFrame = updatedFrames.find((f) => f.url().includes('hcaptcha') && f.url().includes('challenge'));
1421
1713
  if (challengeFrame) {
1422
- results.push('Image challenge appeared. Analyzing grid...');
1423
- const gridResult = await analyzeImageChallenge(p, challengeFrame, 'hcaptcha');
1424
- results.push(gridResult);
1714
+ results.push('Image challenge appeared. Auto-solving...');
1715
+ const maxRetries = 3;
1716
+ let solved = false;
1717
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
1718
+ if (attempt > 0)
1719
+ results.push(`\nRetry attempt ${attempt}/${maxRetries - 1}...`);
1720
+ const solveResult = await solveCaptchaGrid(p, challengeFrame, 'hcaptcha');
1721
+ results.push(solveResult);
1722
+ if (solveResult.includes('Captcha solved!')) {
1723
+ solved = true;
1724
+ break;
1725
+ }
1726
+ if (solveResult.includes('Falling back to manual mode')) {
1727
+ break;
1728
+ }
1729
+ await p.waitForTimeout(2000);
1730
+ const refreshedFrames = p.frames();
1731
+ const newChallenge = refreshedFrames.find((f) => f.url().includes('hcaptcha') && f.url().includes('challenge'));
1732
+ if (!newChallenge) {
1733
+ results.push('Challenge frame disappeared, captcha may be solved');
1734
+ solved = true;
1735
+ break;
1736
+ }
1737
+ }
1738
+ if (!solved && !results.some(r => r.includes('Falling back'))) {
1739
+ results.push(`\nAuto-solve exhausted after ${maxRetries} attempts. Use "captcha-grid" and "click-tile" for manual solving.`);
1740
+ }
1425
1741
  }
1426
1742
  else {
1427
1743
  const checkmark = checkboxFrame.locator('.check.solved, #checkbox[aria-checked="true"]');
@@ -1469,66 +1785,145 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1469
1785
  }
1470
1786
  }
1471
1787
  if (captchaType === 'funcaptcha') {
1472
- results.push('FunCaptcha (Arkose Labs) detected. Analyzing puzzle...');
1788
+ results.push('FunCaptcha (Arkose Labs) detected. Auto-solving...');
1473
1789
  try {
1474
1790
  const fcFrame = funcaptchaFrame;
1475
1791
  if (fcFrame) {
1476
1792
  await p.waitForTimeout(2000);
1477
- const puzzleType = await fcFrame.evaluate(() => {
1478
- const body = document.body.innerHTML;
1479
- if (body.includes('rotate') || body.includes('rotation'))
1480
- return 'rotation';
1481
- if (body.includes('pick') || body.includes('match'))
1482
- return 'image-match';
1483
- if (body.includes('drag') || body.includes('drop'))
1484
- return 'drag-drop';
1485
- if (body.includes('count') || body.includes('how many'))
1486
- return 'counting';
1487
- if (body.includes('dice'))
1488
- return 'dice';
1489
- if (body.includes('gamemode') || body.includes('game'))
1490
- return 'game';
1491
- return 'unknown';
1492
- }).catch(() => 'unknown');
1493
1793
  const instruction = await fcFrame.evaluate(() => {
1494
1794
  const h2 = document.querySelector('h2, h3, .challenge-title, #challenge-stage .title, [class*="instruction"], [class*="prompt"]');
1495
1795
  return h2?.textContent?.trim() || '';
1496
1796
  }).catch(() => '');
1497
- results.push(`Puzzle type: ${puzzleType}`);
1498
1797
  if (instruction)
1499
1798
  results.push(`Instruction: "${instruction}"`);
1500
- const screenshotPath = join(homedir(), '.aurix-funcaptcha-puzzle.png');
1501
- try {
1502
- await fcFrame.locator('#challenge-stage, .challenge-content, .game-content, body').first().screenshot({ path: screenshotPath });
1503
- }
1504
- catch {
1505
- await p.screenshot({ path: screenshotPath });
1506
- }
1507
- results.push(`Puzzle screenshot: ${screenshotPath}`);
1508
- const interactiveEls = await fcFrame.evaluate(() => {
1509
- const els = [];
1510
- document.querySelectorAll('canvas, img, [class*="game"], [class*="challenge"], [class*="puzzle"], button, input[type="range"], .slider').forEach(el => {
1511
- els.push(`${el.tagName.toLowerCase()}.${el.className?.toString().slice(0, 60) || ''} [${el.getAttribute('role') || ''}]`);
1512
- });
1513
- return els;
1514
- }).catch(() => []);
1515
- if (interactiveEls.length > 0) {
1516
- results.push(`Interactive elements: ${interactiveEls.slice(0, 10).join(', ')}`);
1799
+ const maxAttempts = 3;
1800
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
1801
+ if (attempt > 0)
1802
+ results.push(`\nRetry ${attempt}/${maxAttempts - 1}...`);
1803
+ const screenshotPath = join(homedir(), '.aurix-funcaptcha-puzzle.png');
1804
+ try {
1805
+ await fcFrame.locator('#challenge-stage, .challenge-content, .game-content, body').first().screenshot({ path: screenshotPath });
1806
+ }
1807
+ catch {
1808
+ await p.screenshot({ path: screenshotPath });
1809
+ }
1810
+ try {
1811
+ const ssBase64 = readFileBase64(screenshotPath);
1812
+ const prompt = instruction
1813
+ ? `This is a FunCaptcha puzzle. The instruction is: "${instruction}". Analyze the image and tell me EXACTLY what to do. Reply in this format:\n- For clicking: "CLICK x,y" (pixel coordinates relative to the puzzle image)\n- For dragging: "DRAG fromX,fromY toX,toY"\n- For rotating: "ROTATE degrees" (estimated rotation angle in degrees)\n- For selecting an option: "CLICK x,y" on the correct answer\nBe precise with coordinates.`
1814
+ : `This is a FunCaptcha puzzle. Analyze the image and determine what action is needed to solve it. Reply in this format:\n- For clicking: "CLICK x,y"\n- For dragging: "DRAG fromX,fromY toX,toY"\n- For rotating: "ROTATE degrees"\nBe precise with coordinates.`;
1815
+ const visionResp = await visionClassify(ssBase64, prompt);
1816
+ results.push(`Vision model: "${visionResp}"`);
1817
+ const clickMatch = visionResp.match(/CLICK\s+([\d.]+)\s*,\s*([\d.]+)/i);
1818
+ const dragMatch = visionResp.match(/DRAG\s+([\d.]+)\s*,\s*([\d.]+)\s+([\d.]+)\s*,\s*([\d.]+)/i);
1819
+ const rotateMatch = visionResp.match(/ROTATE\s+(-?[\d.]+)/i);
1820
+ const puzzleBox = await fcFrame.locator('#challenge-stage, .challenge-content, .game-content, body').first().boundingBox().catch(() => null);
1821
+ const offsetX = puzzleBox?.x || 0;
1822
+ const offsetY = puzzleBox?.y || 0;
1823
+ if (clickMatch) {
1824
+ const cx = offsetX + parseFloat(clickMatch[1]);
1825
+ const cy = offsetY + parseFloat(clickMatch[2]);
1826
+ await humanMove(cx, cy, p);
1827
+ await p.waitForTimeout(100 + Math.random() * 150);
1828
+ await p.mouse.down();
1829
+ await p.waitForTimeout(60 + Math.random() * 80);
1830
+ await p.mouse.up();
1831
+ results.push(`Clicked at (${Math.round(cx)}, ${Math.round(cy)})`);
1832
+ await p.waitForTimeout(2000);
1833
+ }
1834
+ else if (dragMatch) {
1835
+ const fromX = offsetX + parseFloat(dragMatch[1]);
1836
+ const fromY = offsetY + parseFloat(dragMatch[2]);
1837
+ const toX = offsetX + parseFloat(dragMatch[3]);
1838
+ const toY = offsetY + parseFloat(dragMatch[4]);
1839
+ await humanMove(fromX, fromY, p);
1840
+ await p.waitForTimeout(150 + Math.random() * 200);
1841
+ await p.mouse.down();
1842
+ await p.waitForTimeout(200 + Math.random() * 300);
1843
+ const steps = 20 + Math.floor(Math.random() * 15);
1844
+ for (let i = 1; i <= steps; i++) {
1845
+ const progress = i / steps;
1846
+ const eased = progress < 0.5 ? 2 * progress * progress : 1 - Math.pow(-2 * progress + 2, 2) / 2;
1847
+ await p.mouse.move(fromX + (toX - fromX) * eased, fromY + (toY - fromY) * eased + (Math.random() - 0.5) * 2);
1848
+ await p.waitForTimeout(10 + Math.random() * 15);
1849
+ }
1850
+ await p.mouse.move(toX, toY);
1851
+ await p.waitForTimeout(150);
1852
+ await p.mouse.up();
1853
+ results.push(`Dragged from (${Math.round(fromX)},${Math.round(fromY)}) to (${Math.round(toX)},${Math.round(toY)})`);
1854
+ await p.waitForTimeout(2000);
1855
+ }
1856
+ else if (rotateMatch) {
1857
+ const degrees = parseFloat(rotateMatch[1]);
1858
+ const rotator = fcFrame.locator('.rotator, [class*="rotate"], [class*="spinner"], canvas, .game-item').first();
1859
+ if (await rotator.count() > 0) {
1860
+ const rBox = await rotator.boundingBox();
1861
+ if (rBox) {
1862
+ const cx = rBox.x + rBox.width / 2;
1863
+ const cy = rBox.y + rBox.height / 2;
1864
+ const radius = rBox.width / 2;
1865
+ const startX = cx + radius;
1866
+ const startY = cy;
1867
+ const endAngle = (degrees * Math.PI) / 180;
1868
+ const endX = cx + radius * Math.cos(endAngle);
1869
+ const endY = cy + radius * Math.sin(endAngle);
1870
+ await humanMove(startX, startY, p);
1871
+ await p.waitForTimeout(150);
1872
+ await p.mouse.down();
1873
+ await p.waitForTimeout(200);
1874
+ const steps = 30;
1875
+ for (let i = 1; i <= steps; i++) {
1876
+ const angle = (endAngle * i) / steps;
1877
+ await p.mouse.move(cx + radius * Math.cos(angle), cy + radius * Math.sin(angle));
1878
+ await p.waitForTimeout(15 + Math.random() * 10);
1879
+ }
1880
+ await p.mouse.move(endX, endY);
1881
+ await p.waitForTimeout(150);
1882
+ await p.mouse.up();
1883
+ results.push(`Rotated ${degrees}°`);
1884
+ await p.waitForTimeout(2000);
1885
+ }
1886
+ }
1887
+ else {
1888
+ results.push('[WARN] No rotatable element found');
1889
+ }
1890
+ }
1891
+ else {
1892
+ results.push(`Could not parse vision model response: "${visionResp}"`);
1893
+ results.push('Falling back to manual mode. Read the puzzle screenshot and use click/drag-to/evaluate to solve.');
1894
+ break;
1895
+ }
1896
+ const stillChallenge = await fcFrame.locator('#challenge-stage, .challenge-content').count();
1897
+ const successIndicators = await fcFrame.locator('[class*="success"], [class*="correct"], [class*="verified"], .game-success').count();
1898
+ if (successIndicators > 0) {
1899
+ results.push('[OK] FunCaptcha solved!');
1900
+ break;
1901
+ }
1902
+ if (stillChallenge === 0) {
1903
+ results.push('[OK] FunCaptcha challenge dismissed — likely solved.');
1904
+ break;
1905
+ }
1906
+ if (attempt === maxAttempts - 1) {
1907
+ results.push(`Auto-solve exhausted after ${maxAttempts} attempts. Use click/drag-to/evaluate for manual solving.`);
1908
+ }
1909
+ else {
1910
+ results.push('Attempt did not solve, retrying...');
1911
+ await p.waitForTimeout(1500);
1912
+ }
1913
+ }
1914
+ catch (e) {
1915
+ results.push(`Vision model failed: ${e.message}`);
1916
+ results.push('Auto-solve requires a vision-capable model. Read the puzzle screenshot at .aurix-funcaptcha-puzzle.png and use click/drag-to/evaluate to solve manually.');
1917
+ break;
1918
+ }
1517
1919
  }
1518
- results.push('');
1519
- results.push('To solve FunCaptcha:');
1520
- results.push('1. Read the puzzle screenshot to understand the challenge');
1521
- results.push('2. For rotation puzzles: use "drag-to" to rotate the object to the correct position');
1522
- results.push('3. For image match: use "click" on matching images');
1523
- results.push('4. For drag-drop: use "drag-to" with source and target coordinates');
1524
- results.push('5. Use "evaluate" with JavaScript if puzzle needs programmatic interaction');
1525
1920
  }
1526
1921
  else {
1527
1922
  results.push(err('FunCaptcha frame not found', 'Use "detect-captcha" to scan the page first'));
1528
1923
  }
1529
1924
  }
1530
1925
  catch (e) {
1531
- results.push(err(`FunCaptcha analysis failed: ${e.message}`));
1926
+ results.push(err(`FunCaptcha auto-solve failed: ${e.message}`));
1532
1927
  }
1533
1928
  }
1534
1929
  if (captchaType === 'mtcaptcha' || captchaType === 'geetest') {
@@ -1537,37 +1932,6 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1537
1932
  const hasSlider = await targetFrame.locator('.geetest_slider_button, .geetest_slider, [class*="slider_button"], [class*="slider-track"]').count();
1538
1933
  if (hasSlider > 0) {
1539
1934
  results.push('Type: SLIDER puzzle');
1540
- results.push('The puzzle requires dragging a piece to fill a gap.');
1541
- results.push('');
1542
- const sliderInfo = await targetFrame.evaluate(() => {
1543
- const info = {};
1544
- const cut = document.querySelector('.geetest_cut, .geetest_piece_bg, [class*="geetest_cut"], [class*="slider_cut"], [class*="puzzle-gap"]');
1545
- if (cut) {
1546
- const cutRect = cut.getBoundingClientRect();
1547
- const style = window.getComputedStyle(cut);
1548
- info.cut = { left: cutRect.left, width: cutRect.width, styleLeft: parseFloat(style.left) || null, transform: style.transform || null };
1549
- }
1550
- const bg = document.querySelector('.geetest_canvas_bg, .geetest_bg, [class*="geetest_canvas"], canvas[class*="bg"]');
1551
- if (bg) {
1552
- const bgRect = bg.getBoundingClientRect();
1553
- info.bg = { left: bgRect.left, width: bgRect.width };
1554
- }
1555
- const piece = document.querySelector('.geetest_piece, .geetest_slider_piece, [class*="slider_piece"]');
1556
- if (piece) {
1557
- const pieceRect = piece.getBoundingClientRect();
1558
- info.piece = { left: pieceRect.left, width: pieceRect.width };
1559
- }
1560
- const slider = document.querySelector('.geetest_slider_button, .geetest_slider_knob, [class*="slider_button"]');
1561
- if (slider) {
1562
- const sliderRect = slider.getBoundingClientRect();
1563
- info.slider = { left: sliderRect.left, width: sliderRect.width };
1564
- }
1565
- const track = document.querySelector('.geetest_slider_track, .geetest_slider, [class*="slider_track"]');
1566
- if (track) {
1567
- info.track = { width: track.getBoundingClientRect().width };
1568
- }
1569
- return info;
1570
- });
1571
1935
  const puzzleEl = targetFrame.locator('.geetest_panel, .geetest_widget, [class*="geetest_container"]').first();
1572
1936
  const screenshotPath = join(homedir(), '.aurix-slider-puzzle.png');
1573
1937
  try {
@@ -1580,44 +1944,136 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1580
1944
  await p.screenshot({ path: screenshotPath });
1581
1945
  }
1582
1946
  results.push(`Puzzle screenshot: ${screenshotPath}`);
1583
- let gapOffset = null;
1584
- if (sliderInfo.cut && sliderInfo.bg) {
1585
- if (sliderInfo.cut.styleLeft && sliderInfo.cut.styleLeft > 0) {
1586
- gapOffset = Math.round(sliderInfo.cut.styleLeft);
1587
- results.push(`Gap position (CSS left): ${gapOffset}px from puzzle left edge`);
1947
+ const maxAttempts = 3;
1948
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
1949
+ if (attempt > 0)
1950
+ results.push(`\nSlider retry ${attempt}/${maxAttempts - 1}...`);
1951
+ const sliderInfo = await targetFrame.evaluate(() => {
1952
+ const info = {};
1953
+ const cut = document.querySelector('.geetest_cut, .geetest_piece_bg, [class*="geetest_cut"], [class*="slider_cut"], [class*="puzzle-gap"]');
1954
+ if (cut) {
1955
+ const cutRect = cut.getBoundingClientRect();
1956
+ const style = window.getComputedStyle(cut);
1957
+ info.cut = { left: cutRect.left, width: cutRect.width, styleLeft: parseFloat(style.left) || null, transform: style.transform || null };
1958
+ }
1959
+ const bg = document.querySelector('.geetest_canvas_bg, .geetest_bg, [class*="geetest_canvas"], canvas[class*="bg"]');
1960
+ if (bg) {
1961
+ const bgRect = bg.getBoundingClientRect();
1962
+ info.bg = { left: bgRect.left, width: bgRect.width };
1963
+ }
1964
+ const piece = document.querySelector('.geetest_piece, .geetest_slider_piece, [class*="slider_piece"]');
1965
+ if (piece) {
1966
+ const pieceRect = piece.getBoundingClientRect();
1967
+ info.piece = { left: pieceRect.left, width: pieceRect.width };
1968
+ }
1969
+ const slider = document.querySelector('.geetest_slider_button, .geetest_slider_knob, [class*="slider_button"]');
1970
+ if (slider) {
1971
+ const sliderRect = slider.getBoundingClientRect();
1972
+ info.slider = { left: sliderRect.left, width: sliderRect.width, centerX: sliderRect.left + sliderRect.width / 2, centerY: sliderRect.top + sliderRect.height / 2 };
1973
+ }
1974
+ const track = document.querySelector('.geetest_slider_track, .geetest_slider, [class*="slider_track"]');
1975
+ if (track)
1976
+ info.track = { width: track.getBoundingClientRect().width };
1977
+ return info;
1978
+ });
1979
+ let gapOffset = null;
1980
+ if (sliderInfo.cut && sliderInfo.bg) {
1981
+ if (sliderInfo.cut.styleLeft && sliderInfo.cut.styleLeft > 0) {
1982
+ gapOffset = Math.round(sliderInfo.cut.styleLeft);
1983
+ }
1984
+ else {
1985
+ gapOffset = Math.round(sliderInfo.cut.left - sliderInfo.bg.left);
1986
+ }
1588
1987
  }
1589
- else {
1590
- gapOffset = Math.round(sliderInfo.cut.left - sliderInfo.bg.left);
1591
- results.push(`Gap position (rect): ${gapOffset}px from puzzle left edge`);
1988
+ if (gapOffset === null && sliderInfo.cut?.transform && sliderInfo.cut.transform !== 'none') {
1989
+ const match = sliderInfo.cut.transform.match(/matrix\(.*?,\s*([\d.]+)/);
1990
+ if (match)
1991
+ gapOffset = Math.round(parseFloat(match[1]));
1592
1992
  }
1593
- }
1594
- if (gapOffset === null && sliderInfo.cut?.transform && sliderInfo.cut.transform !== 'none') {
1595
- const match = sliderInfo.cut.transform.match(/matrix\(.*?,\s*([\d.]+)/);
1596
- if (match) {
1597
- gapOffset = Math.round(parseFloat(match[1]));
1598
- results.push(`Gap position (transform): ${gapOffset}px from puzzle left edge`);
1993
+ if (gapOffset === null) {
1994
+ results.push('DOM gap detection failed, using vision model...');
1995
+ try {
1996
+ const ssBase64 = readFileBase64(screenshotPath);
1997
+ const visionResp = await visionClassify(ssBase64, 'This is a slider puzzle captcha. There is a gap/hole in the background image where a puzzle piece needs to go. Estimate the horizontal pixel position of the CENTER of the gap, measured from the LEFT edge of the puzzle image. Reply with ONLY the number (e.g. "145").');
1998
+ const parsed = parseInt(visionResp.replace(/[^\d]/g, ''));
1999
+ if (!isNaN(parsed) && parsed > 10 && parsed < 500) {
2000
+ gapOffset = parsed;
2001
+ results.push(`Vision model: gap at ~${gapOffset}px`);
2002
+ }
2003
+ else {
2004
+ results.push(`Vision model returned: "${visionResp}" — could not parse gap position`);
2005
+ }
2006
+ }
2007
+ catch (e) {
2008
+ results.push(`Vision model failed: ${e.message}`);
2009
+ }
2010
+ }
2011
+ if (gapOffset === null) {
2012
+ results.push('[WARN] Could not determine gap position. Use "slider-analyze" for manual analysis, then "drag-to" to slide.');
2013
+ break;
1599
2014
  }
1600
- }
1601
- if (sliderInfo.slider)
1602
- results.push(`Slider handle: x=${Math.round(sliderInfo.slider.left)}, width=${Math.round(sliderInfo.slider.width)}`);
1603
- if (sliderInfo.track)
1604
- results.push(`Track width: ${Math.round(sliderInfo.track.width)}px`);
1605
- if (gapOffset !== null) {
1606
2015
  const pieceHalf = Math.round((sliderInfo.piece?.width || 44) / 2);
1607
2016
  const adjusted = gapOffset - pieceHalf;
1608
- results.push('');
1609
- results.push(`[OK] RECOMMENDED: drag-to target=".geetest_slider_button" value="${adjusted},0"`);
1610
- results.push(`(gap ${gapOffset}px - half piece ${pieceHalf}px = ${adjusted}px drag distance)`);
1611
- }
1612
- else {
1613
- results.push('');
1614
- results.push('[WARN] Could not auto-detect gap. Look at the puzzle screenshot, find the gap/hole, and estimate the pixel offset.');
1615
- results.push('Then: drag-to target=".geetest_slider_button" value="<estimated_px>,0"');
2017
+ results.push(`Gap: ${gapOffset}px, piece half: ${pieceHalf}px, drag distance: ${adjusted}px`);
2018
+ if (sliderInfo.slider) {
2019
+ try {
2020
+ const startX = sliderInfo.slider.centerX;
2021
+ const startY = sliderInfo.slider.centerY;
2022
+ const endX = startX + adjusted;
2023
+ await humanMove(startX, startY, p);
2024
+ await p.waitForTimeout(150 + Math.random() * 250);
2025
+ await p.mouse.down();
2026
+ await p.waitForTimeout(200 + Math.random() * 300);
2027
+ const steps = 25 + Math.floor(Math.random() * 20);
2028
+ for (let i = 1; i <= steps; i++) {
2029
+ const progress = i / steps;
2030
+ const eased = progress < 0.5 ? 2 * progress * progress : 1 - Math.pow(-2 * progress + 2, 2) / 2;
2031
+ const x = startX + adjusted * eased + (Math.random() - 0.5) * 2;
2032
+ const y = startY + (Math.random() - 0.5) * 2;
2033
+ await p.mouse.move(x, y);
2034
+ await p.waitForTimeout(10 + Math.random() * 20);
2035
+ }
2036
+ await p.mouse.move(endX, startY);
2037
+ await p.waitForTimeout(150);
2038
+ await p.mouse.up();
2039
+ await p.waitForTimeout(2000);
2040
+ results.push('Slider dragged, checking result...');
2041
+ const successEl = await targetFrame.locator('.geetest_success, .geetest_tip_success, [class*="success"], [class*="verified"]').count();
2042
+ if (successEl > 0) {
2043
+ results.push('[OK] Slider captcha solved!');
2044
+ break;
2045
+ }
2046
+ const failEl = await targetFrame.locator('.geetest_fail, .geetest_tip_fail, [class*="fail"], [class*="error"], [class*="retry"]').count();
2047
+ if (failEl > 0) {
2048
+ results.push('Slider attempt failed, retrying...');
2049
+ const refreshBtn = targetFrame.locator('.geetest_refresh, [class*="refresh"], [class*="retry"]').first();
2050
+ if (await refreshBtn.count() > 0)
2051
+ await refreshBtn.click().catch(() => { });
2052
+ await p.waitForTimeout(1500);
2053
+ try {
2054
+ if (await puzzleEl.count() > 0)
2055
+ await puzzleEl.screenshot({ path: screenshotPath });
2056
+ }
2057
+ catch { }
2058
+ continue;
2059
+ }
2060
+ results.push('[OK] Slider dragged — outcome unconfirmed, check page state.');
2061
+ break;
2062
+ }
2063
+ catch (e) {
2064
+ results.push(`Drag failed: ${e.message}`);
2065
+ break;
2066
+ }
2067
+ }
2068
+ else {
2069
+ results.push('[WARN] Slider handle not found in DOM.');
2070
+ break;
2071
+ }
1616
2072
  }
1617
2073
  }
1618
2074
  else {
1619
2075
  results.push('Type: IMAGE challenge');
1620
- const gridResult = await analyzeImageChallenge(p, targetFrame, captchaType);
2076
+ const gridResult = await solveCaptchaGrid(p, targetFrame, captchaType);
1621
2077
  results.push(gridResult);
1622
2078
  }
1623
2079
  }
@@ -1628,11 +2084,32 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1628
2084
  const screenshotPath = join(homedir(), '.aurix-captcha-challenge.png');
1629
2085
  await imgCaptcha.first().screenshot({ path: screenshotPath });
1630
2086
  results.push(`Captcha image saved: ${screenshotPath}`);
1631
- results.push('Read the text from the screenshot and use "fill" to type it into the captcha input field.');
1632
- const input = p.locator('input[name*="captcha"], input[id*="captcha"], input[placeholder*="captcha" i], input[placeholder*="code" i]');
1633
- if (await input.count() > 0) {
1634
- const name = await input.first().getAttribute('name') || await input.first().getAttribute('id') || 'captcha input';
1635
- results.push(`Captcha input field found: ${name}`);
2087
+ try {
2088
+ const ssBase64 = readFileBase64(screenshotPath);
2089
+ const visionResp = await visionClassify(ssBase64, 'Read the text/numbers in this captcha image. Reply with ONLY the exact text shown, nothing else.');
2090
+ const captchaText = visionResp.replace(/[^a-zA-Z0-9]/g, '').trim();
2091
+ if (captchaText.length >= 2) {
2092
+ const input = p.locator('input[name*="captcha"], input[id*="captcha"], input[placeholder*="captcha" i], input[placeholder*="code" i]');
2093
+ if (await input.count() > 0) {
2094
+ await input.first().click();
2095
+ await input.first().fill('');
2096
+ for (const char of captchaText) {
2097
+ await input.first().type(char, { delay: 80 + Math.random() * 120 });
2098
+ }
2099
+ results.push(`[OK] Auto-filled captcha text: "${captchaText}"`);
2100
+ }
2101
+ else {
2102
+ results.push(`Vision model read: "${captchaText}" — but no captcha input field found. Use "fill" to type it manually.`);
2103
+ }
2104
+ }
2105
+ else {
2106
+ results.push(`Vision model returned: "${visionResp}" — could not read captcha text`);
2107
+ results.push('Read the screenshot and use "fill" to type the captcha text manually.');
2108
+ }
2109
+ }
2110
+ catch (e) {
2111
+ results.push(`Vision auto-fill failed: ${e.message}`);
2112
+ results.push('Read the captcha screenshot and use "fill" to type it manually.');
1636
2113
  }
1637
2114
  }
1638
2115
  else {
@@ -1691,7 +2168,8 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1691
2168
  }
1692
2169
  case 'click-tile': {
1693
2170
  const p = await ensureBrowser();
1694
- const tileIndex = parseInt(value || target || '0');
2171
+ const rawValue = (value || target || '0').toString();
2172
+ const tileIndices = rawValue.split(',').map(s => parseInt(s.trim())).filter(n => !isNaN(n));
1695
2173
  const frames = p.frames();
1696
2174
  let challengeFrame = null;
1697
2175
  let provider = 'unknown';
@@ -1715,59 +2193,111 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1715
2193
  }
1716
2194
  if (!challengeFrame)
1717
2195
  challengeFrame = p;
1718
- const tiles = await findGridTiles(challengeFrame, provider);
1719
- if (tiles.length === 0)
2196
+ const initialTiles = await findGridTiles(challengeFrame, provider);
2197
+ if (initialTiles.length === 0)
1720
2198
  return err('No grid tiles found', 'Use "captcha-grid" to scan the challenge first');
1721
- if (tileIndex < 0 || tileIndex >= tiles.length)
1722
- return err(`Tile index ${tileIndex} out of range (0-${tiles.length - 1})`);
1723
- try {
1724
- const tile = tiles[tileIndex];
1725
- const isRecaptcha = provider === 'recaptcha';
1726
- const selectedClass = isRecaptcha
1727
- ? '.rc-imageselect-tileselected, .rc-imageselect-dynamic-selected, .rc-imageselect-tile.rc-imageselect-tileselected'
1728
- : '.task-image.selected, .task .selected';
1729
- const selectedBefore = await challengeFrame.locator(selectedClass).count().catch(() => 0);
1730
- const tileBox = await tile.boundingBox();
1731
- if (tileBox) {
1732
- const clickX = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
1733
- const clickY = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
1734
- await humanMove(clickX, clickY, p);
1735
- await p.waitForTimeout(80 + Math.random() * 120);
1736
- await p.mouse.down();
1737
- await p.waitForTimeout(60 + Math.random() * 100);
1738
- await p.mouse.up();
2199
+ for (const idx of tileIndices) {
2200
+ if (idx < 0 || idx >= initialTiles.length)
2201
+ return err(`Tile index ${idx} out of range (0-${initialTiles.length - 1})`);
2202
+ }
2203
+ const isRecaptcha = provider === 'recaptcha';
2204
+ const selectedClass = isRecaptcha
2205
+ ? '.rc-imageselect-tileselected, .rc-imageselect-dynamic-selected, .rc-imageselect-tile.rc-imageselect-tileselected'
2206
+ : '.task-image.selected, .task .selected';
2207
+ let instruction = '';
2208
+ if (isRecaptcha) {
2209
+ try {
2210
+ const instrEl = challengeFrame.locator('.rc-imageselect-instructions, .prompt-text, .prompt-text-h');
2211
+ if (await instrEl.count() > 0)
2212
+ instruction = (await instrEl.first().textContent() || '').trim();
2213
+ if (!instruction) {
2214
+ const st = challengeFrame.locator('strong').first();
2215
+ if (await st.count() > 0)
2216
+ instruction = (await st.textContent() || '').trim();
2217
+ }
1739
2218
  }
1740
- else {
1741
- await tile.click({ force: true });
1742
- }
1743
- await p.waitForTimeout(500 + Math.random() * 400);
1744
- const selectedCount = await challengeFrame.locator(selectedClass).count().catch(() => 0);
1745
- const selectionChanged = selectedCount !== selectedBefore;
1746
- const clickStatus = selectionChanged
1747
- ? `selection changed (${selectedBefore} → ${selectedCount})`
1748
- : `selection unchanged (${selectedCount}) click may not have registered, or this tile toggled off`;
1749
- if (isRecaptcha) {
1750
- await p.waitForTimeout(1500 + Math.random() * 1000);
1751
- const newTiles = await findGridTiles(challengeFrame, provider);
1752
- const screenshotPath = join(homedir(), `.aurix-tile-after-${tileIndex}.png`);
1753
- await challengeFrame.locator('.rc-imageselect-table-33, .rc-imageselect-table-44, table').first().screenshot({ path: screenshotPath }).catch(() => p.screenshot({ path: screenshotPath }));
1754
- return ok(`Clicked tile ${tileIndex}`, {
1755
- selection: clickStatus,
1756
- 'new tile': 'appeared check screenshot and evaluate',
1757
- screenshot: screenshotPath,
1758
- next: 'Use "click-tile" for next matching tile, or "captcha-verify" when done',
1759
- });
2219
+ catch { }
2220
+ }
2221
+ const results = [];
2222
+ results.push(`Clicking ${tileIndices.length} tile(s): [${tileIndices.join(', ')}]`);
2223
+ for (const tileIndex of tileIndices) {
2224
+ try {
2225
+ const currentTiles = await findGridTiles(challengeFrame, provider);
2226
+ if (tileIndex >= currentTiles.length) {
2227
+ results.push(` Tile ${tileIndex}: out of range (${currentTiles.length} tiles now), skipping`);
2228
+ continue;
2229
+ }
2230
+ const tile = currentTiles[tileIndex];
2231
+ const selectedBefore = await challengeFrame.locator(selectedClass).count().catch(() => 0);
2232
+ const tileBox = await tile.boundingBox();
2233
+ if (tileBox) {
2234
+ const clickX = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
2235
+ const clickY = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
2236
+ await humanMove(clickX, clickY, p);
2237
+ await p.waitForTimeout(80 + Math.random() * 120);
2238
+ await p.mouse.down();
2239
+ await p.waitForTimeout(60 + Math.random() * 100);
2240
+ await p.mouse.up();
2241
+ }
2242
+ else {
2243
+ await tile.click({ force: true });
2244
+ }
2245
+ await p.waitForTimeout(500 + Math.random() * 400);
2246
+ const selectedCount = await challengeFrame.locator(selectedClass).count().catch(() => 0);
2247
+ const clickStatus = selectedCount !== selectedBefore
2248
+ ? `selected (${selectedBefore} → ${selectedCount})`
2249
+ : `unchanged (${selectedCount})`;
2250
+ results.push(` Tile ${tileIndex}: ${clickStatus}`);
2251
+ if (isRecaptcha) {
2252
+ await p.waitForTimeout(1500 + Math.random() * 1000);
2253
+ const newTiles = await findGridTiles(challengeFrame, provider);
2254
+ const afterPath = join(homedir(), `.aurix-tile-after-${tileIndex}.png`);
2255
+ try {
2256
+ await challengeFrame.locator('.rc-imageselect-table-33, .rc-imageselect-table-44, table').first().screenshot({ path: afterPath }).catch(() => p.screenshot({ path: afterPath }));
2257
+ }
2258
+ catch { }
2259
+ if (tileIndex < newTiles.length && instruction) {
2260
+ try {
2261
+ await newTiles[tileIndex].screenshot({ path: afterPath });
2262
+ const newBase64 = readFileBase64(afterPath);
2263
+ const newResp = await visionClassify(newBase64, `Does this image contain ${instruction}? Reply YES or NO only.`);
2264
+ if (newResp.toLowerCase().includes('yes')) {
2265
+ const newTile = newTiles[tileIndex];
2266
+ const newBox = await newTile.boundingBox();
2267
+ if (newBox) {
2268
+ const nx = newBox.x + newBox.width * (0.3 + Math.random() * 0.4);
2269
+ const ny = newBox.y + newBox.height * (0.3 + Math.random() * 0.4);
2270
+ await humanMove(nx, ny, p);
2271
+ await p.waitForTimeout(80 + Math.random() * 120);
2272
+ await p.mouse.down();
2273
+ await p.waitForTimeout(60 + Math.random() * 100);
2274
+ await p.mouse.up();
2275
+ results.push(` → Replacement tile ${tileIndex} also matched, clicked`);
2276
+ await p.waitForTimeout(1500 + Math.random() * 1000);
2277
+ }
2278
+ }
2279
+ else {
2280
+ results.push(` → Replacement tile ${tileIndex} doesn't match`);
2281
+ }
2282
+ }
2283
+ catch { }
2284
+ }
2285
+ }
2286
+ }
2287
+ catch (e) {
2288
+ results.push(` Tile ${tileIndex}: FAILED — ${e.message}`);
1760
2289
  }
1761
- const ss = await autoScreenshot(p, 'click-tile');
1762
- return ok(`Clicked tile ${tileIndex}`, {
1763
- selection: clickStatus,
1764
- screenshot: ss,
1765
- next: 'Continue clicking matching tiles, then use "captcha-verify"',
1766
- });
1767
2290
  }
1768
- catch (e) {
1769
- return err(`Failed to click tile ${tileIndex}: ${e.message}`, 'Use "captcha-grid" to re-scan the challenge');
2291
+ if (isRecaptcha) {
2292
+ results.push('');
2293
+ results.push('Use "captcha-verify" when all matching tiles are clicked, or "captcha-grid" to re-analyze.');
2294
+ }
2295
+ else {
2296
+ const ss = await autoScreenshot(p, 'click-tile');
2297
+ results.push(`Screenshot: ${ss}`);
2298
+ results.push('Continue clicking matching tiles, then use "captcha-verify"');
1770
2299
  }
2300
+ return results.join('\n');
1771
2301
  }
1772
2302
  case 'captcha-verify': {
1773
2303
  const p = await ensureBrowser();
@@ -1794,6 +2324,18 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1794
2324
  }
1795
2325
  if (!challengeFrame)
1796
2326
  challengeFrame = p;
2327
+ const timeSinceAnalyze = _lastGridAnalyzeTime > 0 ? Date.now() - _lastGridAnalyzeTime : 0;
2328
+ if (timeSinceAnalyze > 90_000 && _lastGridAnalyzeTime > 0) {
2329
+ const results = [];
2330
+ results.push(`[WARN] Grid was analyzed ${Math.round(timeSinceAnalyze / 1000)}s ago — challenge likely refreshed.`);
2331
+ results.push('Re-analyzing before verify...');
2332
+ try {
2333
+ const reAnalyze = await analyzeImageChallenge(p, challengeFrame, provider);
2334
+ results.push(reAnalyze);
2335
+ }
2336
+ catch { }
2337
+ return results.join('\n');
2338
+ }
1797
2339
  try {
1798
2340
  let verifyBtn = challengeFrame.locator('#recaptcha-verify-button, .rc-button-submit, .button-submit, [id*="verify"]');
1799
2341
  if (await verifyBtn.count() === 0) {
@@ -1809,25 +2351,71 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1809
2351
  if (errorText > 0) {
1810
2352
  const errorMsg = await challengeFrame.locator('.rc-imageselect-incorrect-response, .error-message').first().textContent().catch(() => 'Incorrect answer');
1811
2353
  await p.screenshot({ path: screenshotPath });
1812
- return err(`Verification failed: "${errorMsg}"`, `Challenge refreshed. Use "captcha-grid" to re-analyze, then click matching tiles again. Screenshot: ${screenshotPath}`);
2354
+ const results = [];
2355
+ results.push(`Verification failed: "${errorMsg}". Auto-retrying...`);
2356
+ const maxRetries = 3;
2357
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
2358
+ results.push(`\nRetry ${attempt + 1}/${maxRetries}...`);
2359
+ await p.waitForTimeout(2000);
2360
+ const currentFrames = p.frames();
2361
+ const retryFrame = currentFrames.find((f) => {
2362
+ const u = f.url();
2363
+ return (u.includes('/recaptcha/') && u.includes('/bframe')) ||
2364
+ (u.includes('hcaptcha') && u.includes('challenge'));
2365
+ });
2366
+ if (!retryFrame) {
2367
+ results.push('Challenge frame gone — captcha may be solved');
2368
+ await p.screenshot({ path: screenshotPath });
2369
+ return results.join('\n');
2370
+ }
2371
+ const retryProvider = retryFrame.url().includes('hcaptcha') ? 'hcaptcha' : 'recaptcha';
2372
+ const solveResult = await solveCaptchaGrid(p, retryFrame, retryProvider);
2373
+ results.push(solveResult);
2374
+ if (solveResult.includes('Captcha solved!')) {
2375
+ return results.join('\n');
2376
+ }
2377
+ }
2378
+ results.push(`\nAuto-retry exhausted after ${maxRetries} attempts. Use "captcha-grid" and "click-tile" for manual solving.`);
2379
+ return results.join('\n');
1813
2380
  }
1814
2381
  const newChallenge = await challengeFrame.locator('.rc-imageselect-instructions, .prompt-text').count();
1815
2382
  if (newChallenge > 0) {
1816
2383
  const instruction = await challengeFrame.locator('.rc-imageselect-instructions, .prompt-text').first().textContent().catch(() => '');
1817
2384
  await p.screenshot({ path: screenshotPath });
1818
- return warn(`New challenge appeared: "${instruction}"`, {
1819
- screenshot: screenshotPath,
1820
- next: 'Use "captcha-grid" to analyze and "click-tile" to solve',
1821
- });
2385
+ const results = [];
2386
+ results.push(`New challenge appeared: "${instruction}". Auto-solving...`);
2387
+ const maxRetries = 3;
2388
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
2389
+ if (attempt > 0)
2390
+ results.push(`\nRetry ${attempt}/${maxRetries - 1}...`);
2391
+ const currentFrames = p.frames();
2392
+ const retryFrame = currentFrames.find((f) => {
2393
+ const u = f.url();
2394
+ return (u.includes('/recaptcha/') && u.includes('/bframe')) ||
2395
+ (u.includes('hcaptcha') && u.includes('challenge'));
2396
+ });
2397
+ if (!retryFrame) {
2398
+ results.push('Challenge frame gone — captcha may be solved');
2399
+ return results.join('\n');
2400
+ }
2401
+ const retryProvider = retryFrame.url().includes('hcaptcha') ? 'hcaptcha' : 'recaptcha';
2402
+ const solveResult = await solveCaptchaGrid(p, retryFrame, retryProvider);
2403
+ results.push(solveResult);
2404
+ if (solveResult.includes('Captcha solved!'))
2405
+ return results.join('\n');
2406
+ await p.waitForTimeout(2000);
2407
+ }
2408
+ results.push(`\nAuto-solve exhausted. Use "captcha-grid" and "click-tile" manually.`);
2409
+ return results.join('\n');
1822
2410
  }
1823
2411
  await p.screenshot({ path: screenshotPath });
1824
2412
  return ok('Verification submitted', {
1825
2413
  screenshot: screenshotPath,
1826
- note: 'Check if the form/page progressed. If verification widget reappears, use "captcha-grid" again.',
2414
+ note: 'Check if the form/page progressed. If verification widget reappears, use "solve-captcha" again.',
1827
2415
  });
1828
2416
  }
1829
2417
  catch (e) {
1830
- return err(`Verify failed: ${e.message}`, 'Use "captcha-grid" to re-scan and retry');
2418
+ return err(`Verify failed: ${e.message}`, 'Use "solve-captcha" to retry automatically');
1831
2419
  }
1832
2420
  }
1833
2421
  case 'slider-analyze': {
@@ -1919,19 +2507,63 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1919
2507
  const pieceHalfWidth = Math.round((sliderInfo.piece.width || 44) / 2);
1920
2508
  const adjustedOffset = gapOffset - pieceHalfWidth;
1921
2509
  results.push('');
1922
- results.push(`[OK] RECOMMENDED OFFSET: drag-to value="${adjustedOffset},0"`);
1923
- results.push(`(gap at ${gapOffset}px minus half piece width ${pieceHalfWidth}px = ${adjustedOffset}px)`);
2510
+ results.push(`[OK] Gap at ${gapOffset}px, piece half ${pieceHalfWidth}px, drag distance ${adjustedOffset}px`);
2511
+ gapOffset = adjustedOffset;
1924
2512
  }
1925
2513
  else if (gapOffset !== null) {
1926
2514
  results.push('');
1927
- results.push(`[OK] RECOMMENDED OFFSET: drag-to value="${gapOffset},0"`);
2515
+ results.push(`[OK] Gap at ${gapOffset}px`);
1928
2516
  }
1929
2517
  else {
1930
2518
  results.push('');
1931
- results.push('[WARN] Could not auto-detect gap position from DOM.');
1932
- results.push('Look at the puzzle screenshot to find where the gap/hole is.');
1933
- results.push('Estimate the pixel distance from the LEFT edge of the puzzle to the CENTER of the gap.');
1934
- results.push('Then use: drag-to target=".geetest_slider_button" value="<estimated_px>,0"');
2519
+ results.push('DOM gap detection failed, trying vision model...');
2520
+ try {
2521
+ const ssBase64 = readFileBase64(screenshotPath);
2522
+ const visionResp = await visionClassify(ssBase64, 'This is a slider puzzle captcha. There is a gap/hole in the background image where a puzzle piece needs to go. Estimate the horizontal pixel position of the CENTER of the gap, measured from the LEFT edge of the puzzle image. Reply with ONLY the number (e.g. "145").');
2523
+ const parsed = parseInt(visionResp.replace(/[^\d]/g, ''));
2524
+ if (!isNaN(parsed) && parsed > 10 && parsed < 500) {
2525
+ gapOffset = parsed;
2526
+ results.push(`Vision model: gap at ~${gapOffset}px`);
2527
+ }
2528
+ else {
2529
+ results.push(`Vision model returned: "${visionResp}" — could not parse`);
2530
+ }
2531
+ }
2532
+ catch (e) {
2533
+ results.push(`Vision model failed: ${e.message}`);
2534
+ }
2535
+ }
2536
+ if (gapOffset !== null && sliderInfo.slider) {
2537
+ results.push('Auto-dragging slider...');
2538
+ try {
2539
+ const startX = sliderInfo.slider.centerX || (sliderInfo.slider.left + sliderInfo.slider.width / 2);
2540
+ const startY = sliderInfo.slider.centerY || (sliderInfo.slider.top + sliderInfo.slider.height / 2);
2541
+ const endX = startX + gapOffset;
2542
+ await humanMove(startX, startY, p);
2543
+ await p.waitForTimeout(150 + Math.random() * 250);
2544
+ await p.mouse.down();
2545
+ await p.waitForTimeout(200 + Math.random() * 300);
2546
+ const steps = 25 + Math.floor(Math.random() * 20);
2547
+ for (let i = 1; i <= steps; i++) {
2548
+ const progress = i / steps;
2549
+ const eased = progress < 0.5 ? 2 * progress * progress : 1 - Math.pow(-2 * progress + 2, 2) / 2;
2550
+ const x = startX + gapOffset * eased + (Math.random() - 0.5) * 2;
2551
+ const y = startY + (Math.random() - 0.5) * 2;
2552
+ await p.mouse.move(x, y);
2553
+ await p.waitForTimeout(10 + Math.random() * 20);
2554
+ }
2555
+ await p.mouse.move(endX, startY);
2556
+ await p.waitForTimeout(150);
2557
+ await p.mouse.up();
2558
+ await p.waitForTimeout(2000);
2559
+ results.push('[OK] Slider auto-dragged. Check page state to confirm.');
2560
+ }
2561
+ catch (e) {
2562
+ results.push(`Auto-drag failed: ${e.message}. Use: drag-to target=".geetest_slider_button" value="${gapOffset},0"`);
2563
+ }
2564
+ }
2565
+ else if (gapOffset === null) {
2566
+ results.push('Could not determine gap position. Use "drag-to" manually with estimated offset.');
1935
2567
  }
1936
2568
  if (sliderInfo.allGeeTestClasses?.length > 0) {
1937
2569
  results.push('');