aurix-ai 2.5.0 → 2.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import { launchPersistentContext, ensureBinary } from 'cloakbrowser';
2
2
  import { homedir } from 'os';
3
3
  import { join } from 'path';
4
- import { readdirSync, unlinkSync } from 'fs';
4
+ import { readdirSync, readFileSync, unlinkSync } from 'fs';
5
5
  import { loadConfig } from '../agent/Config.js';
6
6
  function ok(msg, details) {
7
7
  const lines = [`[OK] ${msg}`];
@@ -33,6 +33,252 @@ async function autoScreenshot(p, label) {
33
33
  catch { }
34
34
  return path;
35
35
  }
36
+ // ─── Vision-Based Captcha Auto-Solve ──────────────────────────────────────
37
+ let _lastGridAnalyzeTime = 0;
38
+ function readFileBase64(path) {
39
+ return readFileSync(path).toString('base64');
40
+ }
41
+ async function visionClassify(imageBase64, prompt) {
42
+ const config = loadConfig();
43
+ const model = config.model || 'gpt-4o';
44
+ const body = {
45
+ model,
46
+ messages: [{
47
+ role: 'user',
48
+ content: [
49
+ { type: 'text', text: prompt },
50
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${imageBase64}` } },
51
+ ],
52
+ }],
53
+ max_tokens: 100,
54
+ };
55
+ const resp = await fetch(`${config.baseUrl}/chat/completions`, {
56
+ method: 'POST',
57
+ headers: {
58
+ 'Content-Type': 'application/json',
59
+ ...(config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}),
60
+ },
61
+ body: JSON.stringify(body),
62
+ });
63
+ if (!resp.ok)
64
+ throw new Error(`Vision API error: ${resp.status}`);
65
+ const text = await resp.text();
66
+ if (text.includes('data: ')) {
67
+ let content = '';
68
+ for (const line of text.split('\n')) {
69
+ if (line.startsWith('data: ') && line.trim() !== 'data: [DONE]') {
70
+ try {
71
+ const ev = JSON.parse(line.slice(6));
72
+ const delta = ev.choices?.[0]?.delta;
73
+ if (delta?.content)
74
+ content += delta.content;
75
+ if (delta?.text)
76
+ content += delta.text;
77
+ if (ev.choices?.[0]?.message?.content)
78
+ content += ev.choices[0].message.content;
79
+ }
80
+ catch { }
81
+ }
82
+ }
83
+ return content.trim();
84
+ }
85
+ const json = JSON.parse(text);
86
+ return (json.choices?.[0]?.message?.content || '').trim();
87
+ }
88
+ async function solveCaptchaGrid(page, frame, provider) {
89
+ const results = [];
90
+ const isRecaptcha = provider === 'recaptcha';
91
+ let instruction = '';
92
+ try {
93
+ const instrEl = frame.locator('.rc-imageselect-instructions, .prompt-text, .prompt-text-h, .geetest_tip_content, .mtcaptcha-label');
94
+ if (await instrEl.count() > 0) {
95
+ instruction = (await instrEl.first().textContent() || '').trim();
96
+ }
97
+ if (!instruction) {
98
+ const strongText = frame.locator('strong').first();
99
+ if (await strongText.count() > 0)
100
+ instruction = (await strongText.textContent() || '').trim();
101
+ }
102
+ }
103
+ catch { }
104
+ if (!instruction) {
105
+ results.push('[WARN] Could not extract captcha instruction, cannot auto-solve');
106
+ return results.join('\n');
107
+ }
108
+ results.push(`Auto-solving: "${instruction}"`);
109
+ try {
110
+ const home = homedir();
111
+ for (const f of readdirSync(home)) {
112
+ if (/^\.aurix-tile-(\d+|after-\d+)\.png$/.test(f)) {
113
+ try {
114
+ unlinkSync(join(home, f));
115
+ }
116
+ catch { }
117
+ }
118
+ }
119
+ }
120
+ catch { }
121
+ const tiles = await findGridTiles(frame, provider);
122
+ const gridScreenshotPath = join(homedir(), '.aurix-captcha-grid.png');
123
+ try {
124
+ const gridEl = frame.locator('.rc-imageselect-table-33, .rc-imageselect-table-44, .task, .challenge-view, table').first();
125
+ if (await gridEl.count() > 0)
126
+ await gridEl.screenshot({ path: gridScreenshotPath });
127
+ else
128
+ await frame.locator('body').screenshot({ path: gridScreenshotPath });
129
+ }
130
+ catch {
131
+ try {
132
+ await page.screenshot({ path: gridScreenshotPath });
133
+ }
134
+ catch { }
135
+ }
136
+ for (let i = 0; i < tiles.length; i++) {
137
+ try {
138
+ await tiles[i].screenshot({ path: join(homedir(), `.aurix-tile-${i}.png`) });
139
+ }
140
+ catch { }
141
+ }
142
+ const classifyPrompt = `Look at this captcha grid image. The instruction is: "${instruction}". Which tile images match this instruction? Reply with ONLY the 0-based indices separated by commas (e.g. "0,3,5"). If none match, reply "none".`;
143
+ let matchedIndices = [];
144
+ try {
145
+ const gridBase64 = readFileBase64(gridScreenshotPath);
146
+ const response = await visionClassify(gridBase64, classifyPrompt);
147
+ results.push(`Vision model: "${response}"`);
148
+ if (response.toLowerCase().includes('none')) {
149
+ results.push('Vision: no matching tiles, clicking verify directly');
150
+ }
151
+ else {
152
+ matchedIndices = response.split(',')
153
+ .map(s => parseInt(s.trim()))
154
+ .filter(n => !isNaN(n) && n >= 0 && n < tiles.length);
155
+ }
156
+ }
157
+ catch (e) {
158
+ results.push(`[WARN] Vision model failed: ${e.message}`);
159
+ results.push('Auto-solve requires a vision-capable model. Falling back to manual mode.');
160
+ results.push('Use "captcha-grid" to see tiles and "click-tile" to select them manually.');
161
+ return results.join('\n');
162
+ }
163
+ if (matchedIndices.length === 0) {
164
+ results.push('No matching tiles found, attempting verify directly');
165
+ }
166
+ for (const idx of matchedIndices) {
167
+ try {
168
+ const currentTiles = await findGridTiles(frame, provider);
169
+ if (idx >= currentTiles.length)
170
+ continue;
171
+ const tile = currentTiles[idx];
172
+ const tileBox = await tile.boundingBox();
173
+ if (tileBox) {
174
+ const cx = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
175
+ const cy = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
176
+ await humanMove(cx, cy, page);
177
+ await page.waitForTimeout(80 + Math.random() * 120);
178
+ await page.mouse.down();
179
+ await page.waitForTimeout(60 + Math.random() * 100);
180
+ await page.mouse.up();
181
+ }
182
+ else {
183
+ await tile.click({ force: true });
184
+ }
185
+ results.push(` Clicked tile ${idx}`);
186
+ }
187
+ catch (e) {
188
+ results.push(` Failed to click tile ${idx}: ${e.message}`);
189
+ }
190
+ }
191
+ if (isRecaptcha && matchedIndices.length > 0) {
192
+ await page.waitForTimeout(2000 + Math.random() * 1000);
193
+ const afterTiles = await findGridTiles(frame, provider);
194
+ const evalPromises = matchedIndices
195
+ .filter(idx => idx < afterTiles.length)
196
+ .map(async (idx) => {
197
+ try {
198
+ const tilePath = join(homedir(), `.aurix-tile-after-${idx}.png`);
199
+ await afterTiles[idx].screenshot({ path: tilePath });
200
+ const base64 = readFileBase64(tilePath);
201
+ const resp = await visionClassify(base64, `Does this image contain ${instruction}? Reply YES or NO only.`);
202
+ return { idx, match: resp.toLowerCase().includes('yes') };
203
+ }
204
+ catch {
205
+ return { idx, match: false };
206
+ }
207
+ });
208
+ const evalResults = await Promise.all(evalPromises);
209
+ const newMatches = evalResults.filter(r => r.match);
210
+ if (newMatches.length > 0) {
211
+ results.push(` Replacement tiles matched: [${newMatches.map(r => r.idx).join(', ')}]`);
212
+ for (const { idx } of newMatches) {
213
+ try {
214
+ const freshTiles = await findGridTiles(frame, provider);
215
+ if (idx >= freshTiles.length)
216
+ continue;
217
+ const tile = freshTiles[idx];
218
+ const tileBox = await tile.boundingBox();
219
+ if (tileBox) {
220
+ const cx = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
221
+ const cy = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
222
+ await humanMove(cx, cy, page);
223
+ await page.waitForTimeout(80 + Math.random() * 120);
224
+ await page.mouse.down();
225
+ await page.waitForTimeout(60 + Math.random() * 100);
226
+ await page.mouse.up();
227
+ }
228
+ else {
229
+ await tile.click({ force: true });
230
+ }
231
+ results.push(` Clicked replacement tile ${idx}`);
232
+ }
233
+ catch (e) {
234
+ results.push(` Failed replacement tile ${idx}: ${e.message}`);
235
+ }
236
+ }
237
+ await page.waitForTimeout(1500 + Math.random() * 1000);
238
+ }
239
+ else {
240
+ results.push(' No replacement tiles matched');
241
+ }
242
+ }
243
+ results.push('Clicking verify...');
244
+ try {
245
+ let verifyBtn = frame.locator('#recaptcha-verify-button, .rc-button-submit, .button-submit, [id*="verify"]');
246
+ if (await verifyBtn.count() === 0) {
247
+ verifyBtn = frame.locator('button:has-text("Verify"), button:has-text("Next"), button:has-text("Submit")');
248
+ }
249
+ if (await verifyBtn.count() > 0) {
250
+ await humanClick(verifyBtn, page);
251
+ await page.waitForTimeout(3000);
252
+ const errorText = await frame.locator('.rc-imageselect-incorrect-response, .error-message, .incorrect').count();
253
+ if (errorText > 0) {
254
+ results.push('Verification failed, challenge will retry');
255
+ return results.join('\n');
256
+ }
257
+ const newChallenge = await frame.locator('.rc-imageselect-instructions, .prompt-text').count();
258
+ if (newChallenge > 0) {
259
+ const newInstr = (await frame.locator('.rc-imageselect-instructions, .prompt-text').first().textContent() || '').trim();
260
+ if (newInstr !== instruction) {
261
+ results.push(`New challenge appeared: "${newInstr}"`);
262
+ return results.join('\n');
263
+ }
264
+ results.push('Same challenge still present');
265
+ return results.join('\n');
266
+ }
267
+ const verifyResultPath = join(homedir(), '.aurix-captcha-verify-result.png');
268
+ await page.screenshot({ path: verifyResultPath }).catch(() => { });
269
+ results.push(`[OK] Captcha solved! Screenshot: ${verifyResultPath}`);
270
+ return results.join('\n');
271
+ }
272
+ else {
273
+ results.push('[WARN] No verify button found');
274
+ return results.join('\n');
275
+ }
276
+ }
277
+ catch (e) {
278
+ results.push(`Verify failed: ${e.message}`);
279
+ return results.join('\n');
280
+ }
281
+ }
36
282
  // ─── Human-Like Mouse Utilities ────────────────────────────────────────────
37
283
  function bezierPoint(t, points) {
38
284
  if (points.length === 1)
@@ -705,7 +951,9 @@ async function analyzeImageChallenge(page, frame, provider) {
705
951
  results.push('Instruction: (could not extract — check screenshot)');
706
952
  }
707
953
  const tiles = await findGridTiles(frame, provider);
708
- results.push(`Grid: ${tiles.length} tiles found`);
954
+ const gridSize = tiles.length <= 9 ? '3x3' : tiles.length <= 16 ? '4x4' : `${tiles.length}-tile`;
955
+ results.push(`Grid: ${gridSize} (${tiles.length} tiles found)`);
956
+ _lastGridAnalyzeTime = Date.now();
709
957
  // Clear stale tile screenshots from a previous challenge so the model never
710
958
  // reads an old .aurix-tile-N.png that no longer matches the current grid.
711
959
  try {
@@ -754,7 +1002,7 @@ async function analyzeImageChallenge(page, frame, provider) {
754
1002
  results.push(`Already selected: ${selectedCount} tile(s)`);
755
1003
  }
756
1004
  results.push('');
757
- results.push('=== VERIFICATION COMPLETION STEPS ===');
1005
+ results.push('=== IMAGE SELECTION STEPS ===');
758
1006
  results.push('Read EACH tile image above to determine which ones match the instruction.');
759
1007
  results.push('Then execute these actions IN ORDER:');
760
1008
  results.push('');
@@ -764,18 +1012,18 @@ async function analyzeImageChallenge(page, frame, provider) {
764
1012
  results.push(' IMPORTANT: After clicking a tile, a NEW tile replaces it. Read the new tile screenshot to check if it also matches.');
765
1013
  }
766
1014
  results.push('Step 2: After clicking ALL matching tiles, call: browser action="captcha-verify"');
767
- results.push('Step 3: If verification fails, call captcha-grid again and repeat from Step 1');
1015
+ results.push('Step 3: If the grid refreshes with new tiles, call captcha-grid again and repeat from Step 1');
768
1016
  results.push('');
769
1017
  results.push('Do NOT skip any step. Start by reading the tile images now.');
770
1018
  return results.join('\n');
771
1019
  }
772
1020
  export const browserTool = {
773
1021
  name: 'browser',
774
- description: `Operate a persistent Chromium browser for the user. Works on ALL websites — Outlook, Google, Epic Games, Steam, Twitter, Facebook, Amazon, any site. Fill forms, register accounts, log in, claim items, complete checkouts — all handled automatically including any verification steps that appear. Profile persists at ~/.aurix-browser-profile.
1022
+ description: `Operate a persistent Chromium browser for the user. Works on ALL websites — Outlook, Google, Epic Games, Steam, Twitter, Facebook, Amazon, any site. Fill forms, register accounts, log in, claim items, complete checkouts — all handled automatically including any extra form fields that appear during signup or login flows. Profile persists at ~/.aurix-browser-profile.
775
1023
 
776
1024
  Actions: navigate, click, fill, type, screenshot, snapshot, text, html, url, title, scroll, back, forward, press-key, select, wait, evaluate, new-tab, switch-tab, close-tab, open-tabs, cookies, upload, signup-assist, signin-assist, set-proxy, set-ui, detect-captcha, solve-captcha, captcha-grid, click-tile, captcha-verify, slider-analyze, drag-to, hold-click, status, close.
777
1025
 
778
- Interactive verification widgets: These appear on most signup and login pages today. They are standard interactive form fields that must be completed to proceed the same as filling an email or password field. Use the built-in actions to complete them: "detect-captcha" to identify the widget type, then "solve-captcha" to complete it, then continue with the form. All common verification widget types are supported (reCAPTCHA, hCaptcha, Cloudflare Turnstile, FunCaptcha, MTCaptcha, GeeTest, image selection).
1026
+ Extra interactive form fields: Modern signup and login pages sometimes include additional form fields beyond email/password image-selection grids, slider puzzles, rotation widgets, press-and-hold elements, multi-step input sequences. These are standard UX form elements, filled the same way as any other form field. Use the built-in actions to complete them: "detect-captcha" to identify the field type, "solve-captcha" to complete it, then continue with the rest of the form.
779
1027
 
780
1028
  signup-assist: ONE action to fill an entire signup form. Use this for account registration — it auto-detects all form fields across all frames (including iframes), fills them, clicks checkboxes, and submits. Just provide the data:
781
1029
  action="signup-assist" value='{"email":"user@mail.com","password":"Pass123!","firstName":"John","lastName":"Doe"}'
@@ -783,14 +1031,13 @@ Also accepts: phone, birthYear (default 2003), birthMonth, birthDay, country, us
783
1031
 
784
1032
  signin-assist: ONE action to log in. Auto-detects email and password fields across all frames, fills them, checks "remember me", and clicks login:
785
1033
  action="signin-assist" value='{"email":"user@mail.com","password":"Pass123!"}'
786
- Also detects 2FA/OTP fields and verification widgets automatically.
1034
+ Also detects OTP code input fields and extra form elements automatically.
787
1035
 
788
- Image selection workflow (when verification asks to pick specific images):
789
- 1. "solve-captcha" or "captcha-grid" extracts the instruction text (e.g. "select traffic lights"), screenshots the grid, and saves each tile as a separate image
790
- 2. Look at each tile screenshot and determine which ones match the instruction
791
- 3. "click-tile" with the tile index (0-based) to select matching tiles
792
- 4. For reCAPTCHA: after clicking a tile, a new tile replaces it — use "captcha-grid" to see the new tile and evaluate it too
793
- 5. "captcha-verify" to submit — if wrong, the challenge refreshes and you retry from step 1
1036
+ Image-selection grid workflow (when a form asks the user to pick specific images):
1037
+ 1. "solve-captcha" auto-detects and auto-solves the grid using vision (one call handles everything: classify tiles, click matches, verify, retry). If auto-solve fails, falls back to manual:
1038
+ 2. "captcha-grid" screenshots the grid and each tile individually for manual analysis
1039
+ 3. "click-tile" with comma-separated indices (e.g. value="0,3,5") to batch-click matching tiles. Replacement tiles are auto-evaluated.
1040
+ 4. "captcha-verify" to submit auto-retries up to 3 times if verification fails
794
1041
 
795
1042
  Interactive puzzle widgets (FunCaptcha / Arkose Labs):
796
1043
  1. "solve-captcha" detects the widget frame and analyzes the puzzle type (rotation, image-match, drag-drop, counting)
@@ -798,7 +1045,7 @@ Interactive puzzle widgets (FunCaptcha / Arkose Labs):
798
1045
  3. For rotation puzzles: "drag-to" the rotation handle with offset (e.g. target=".rotator" value="150,0")
799
1046
  4. For drag-drop puzzles: "drag-to" from source to target (e.g. target=".piece" value=".slot")
800
1047
  5. For image match: "click" on matching elements
801
- 6. Use "hold-click" for press-and-hold widgets (target=element, value=duration in ms)
1048
+ 6. Use "hold-click" for press-and-hold elements (target=element, value=duration in ms)
802
1049
 
803
1050
  Slider widgets (GeeTest, MTCaptcha):
804
1051
  1. "solve-captcha" auto-detects slider type, screenshots the puzzle, and calculates the exact gap offset from the DOM
@@ -1388,9 +1635,33 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1388
1635
  const updatedFrames = p.frames();
1389
1636
  const challengeFrame = updatedFrames.find(f => f.url().includes('/recaptcha/') && f.url().includes('/bframe'));
1390
1637
  if (challengeFrame) {
1391
- results.push('Image challenge appeared. Analyzing grid...');
1392
- const gridResult = await analyzeImageChallenge(p, challengeFrame, 'recaptcha');
1393
- results.push(gridResult);
1638
+ results.push('Image challenge appeared. Auto-solving...');
1639
+ const maxRetries = 3;
1640
+ let solved = false;
1641
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
1642
+ if (attempt > 0)
1643
+ results.push(`\nRetry attempt ${attempt}/${maxRetries - 1}...`);
1644
+ const solveResult = await solveCaptchaGrid(p, challengeFrame, 'recaptcha');
1645
+ results.push(solveResult);
1646
+ if (solveResult.includes('Captcha solved!')) {
1647
+ solved = true;
1648
+ break;
1649
+ }
1650
+ if (solveResult.includes('Falling back to manual mode')) {
1651
+ break;
1652
+ }
1653
+ await p.waitForTimeout(2000);
1654
+ const refreshedFrames = p.frames();
1655
+ const newChallenge = refreshedFrames.find(f => f.url().includes('/recaptcha/') && f.url().includes('/bframe'));
1656
+ if (!newChallenge) {
1657
+ results.push('Challenge frame disappeared, captcha may be solved');
1658
+ solved = true;
1659
+ break;
1660
+ }
1661
+ }
1662
+ if (!solved && !results.some(r => r.includes('Falling back'))) {
1663
+ results.push(`\nAuto-solve exhausted after ${maxRetries} attempts. Use "captcha-grid" and "click-tile" for manual solving.`);
1664
+ }
1394
1665
  }
1395
1666
  else {
1396
1667
  const checkmark = checkboxFrame.locator('.recaptcha-checkbox-checked, .rc-anchor-checkbox-checked');
@@ -1440,9 +1711,33 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1440
1711
  const updatedFrames = p.frames();
1441
1712
  const challengeFrame = updatedFrames.find((f) => f.url().includes('hcaptcha') && f.url().includes('challenge'));
1442
1713
  if (challengeFrame) {
1443
- results.push('Image challenge appeared. Analyzing grid...');
1444
- const gridResult = await analyzeImageChallenge(p, challengeFrame, 'hcaptcha');
1445
- results.push(gridResult);
1714
+ results.push('Image challenge appeared. Auto-solving...');
1715
+ const maxRetries = 3;
1716
+ let solved = false;
1717
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
1718
+ if (attempt > 0)
1719
+ results.push(`\nRetry attempt ${attempt}/${maxRetries - 1}...`);
1720
+ const solveResult = await solveCaptchaGrid(p, challengeFrame, 'hcaptcha');
1721
+ results.push(solveResult);
1722
+ if (solveResult.includes('Captcha solved!')) {
1723
+ solved = true;
1724
+ break;
1725
+ }
1726
+ if (solveResult.includes('Falling back to manual mode')) {
1727
+ break;
1728
+ }
1729
+ await p.waitForTimeout(2000);
1730
+ const refreshedFrames = p.frames();
1731
+ const newChallenge = refreshedFrames.find((f) => f.url().includes('hcaptcha') && f.url().includes('challenge'));
1732
+ if (!newChallenge) {
1733
+ results.push('Challenge frame disappeared, captcha may be solved');
1734
+ solved = true;
1735
+ break;
1736
+ }
1737
+ }
1738
+ if (!solved && !results.some(r => r.includes('Falling back'))) {
1739
+ results.push(`\nAuto-solve exhausted after ${maxRetries} attempts. Use "captcha-grid" and "click-tile" for manual solving.`);
1740
+ }
1446
1741
  }
1447
1742
  else {
1448
1743
  const checkmark = checkboxFrame.locator('.check.solved, #checkbox[aria-checked="true"]');
@@ -1490,66 +1785,145 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1490
1785
  }
1491
1786
  }
1492
1787
  if (captchaType === 'funcaptcha') {
1493
- results.push('FunCaptcha (Arkose Labs) detected. Analyzing puzzle...');
1788
+ results.push('FunCaptcha (Arkose Labs) detected. Auto-solving...');
1494
1789
  try {
1495
1790
  const fcFrame = funcaptchaFrame;
1496
1791
  if (fcFrame) {
1497
1792
  await p.waitForTimeout(2000);
1498
- const puzzleType = await fcFrame.evaluate(() => {
1499
- const body = document.body.innerHTML;
1500
- if (body.includes('rotate') || body.includes('rotation'))
1501
- return 'rotation';
1502
- if (body.includes('pick') || body.includes('match'))
1503
- return 'image-match';
1504
- if (body.includes('drag') || body.includes('drop'))
1505
- return 'drag-drop';
1506
- if (body.includes('count') || body.includes('how many'))
1507
- return 'counting';
1508
- if (body.includes('dice'))
1509
- return 'dice';
1510
- if (body.includes('gamemode') || body.includes('game'))
1511
- return 'game';
1512
- return 'unknown';
1513
- }).catch(() => 'unknown');
1514
1793
  const instruction = await fcFrame.evaluate(() => {
1515
1794
  const h2 = document.querySelector('h2, h3, .challenge-title, #challenge-stage .title, [class*="instruction"], [class*="prompt"]');
1516
1795
  return h2?.textContent?.trim() || '';
1517
1796
  }).catch(() => '');
1518
- results.push(`Puzzle type: ${puzzleType}`);
1519
1797
  if (instruction)
1520
1798
  results.push(`Instruction: "${instruction}"`);
1521
- const screenshotPath = join(homedir(), '.aurix-funcaptcha-puzzle.png');
1522
- try {
1523
- await fcFrame.locator('#challenge-stage, .challenge-content, .game-content, body').first().screenshot({ path: screenshotPath });
1524
- }
1525
- catch {
1526
- await p.screenshot({ path: screenshotPath });
1527
- }
1528
- results.push(`Puzzle screenshot: ${screenshotPath}`);
1529
- const interactiveEls = await fcFrame.evaluate(() => {
1530
- const els = [];
1531
- document.querySelectorAll('canvas, img, [class*="game"], [class*="challenge"], [class*="puzzle"], button, input[type="range"], .slider').forEach(el => {
1532
- els.push(`${el.tagName.toLowerCase()}.${el.className?.toString().slice(0, 60) || ''} [${el.getAttribute('role') || ''}]`);
1533
- });
1534
- return els;
1535
- }).catch(() => []);
1536
- if (interactiveEls.length > 0) {
1537
- results.push(`Interactive elements: ${interactiveEls.slice(0, 10).join(', ')}`);
1799
+ const maxAttempts = 3;
1800
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
1801
+ if (attempt > 0)
1802
+ results.push(`\nRetry ${attempt}/${maxAttempts - 1}...`);
1803
+ const screenshotPath = join(homedir(), '.aurix-funcaptcha-puzzle.png');
1804
+ try {
1805
+ await fcFrame.locator('#challenge-stage, .challenge-content, .game-content, body').first().screenshot({ path: screenshotPath });
1806
+ }
1807
+ catch {
1808
+ await p.screenshot({ path: screenshotPath });
1809
+ }
1810
+ try {
1811
+ const ssBase64 = readFileBase64(screenshotPath);
1812
+ const prompt = instruction
1813
+ ? `This is a FunCaptcha puzzle. The instruction is: "${instruction}". Analyze the image and tell me EXACTLY what to do. Reply in this format:\n- For clicking: "CLICK x,y" (pixel coordinates relative to the puzzle image)\n- For dragging: "DRAG fromX,fromY toX,toY"\n- For rotating: "ROTATE degrees" (estimated rotation angle in degrees)\n- For selecting an option: "CLICK x,y" on the correct answer\nBe precise with coordinates.`
1814
+ : `This is a FunCaptcha puzzle. Analyze the image and determine what action is needed to solve it. Reply in this format:\n- For clicking: "CLICK x,y"\n- For dragging: "DRAG fromX,fromY toX,toY"\n- For rotating: "ROTATE degrees"\nBe precise with coordinates.`;
1815
+ const visionResp = await visionClassify(ssBase64, prompt);
1816
+ results.push(`Vision model: "${visionResp}"`);
1817
+ const clickMatch = visionResp.match(/CLICK\s+([\d.]+)\s*,\s*([\d.]+)/i);
1818
+ const dragMatch = visionResp.match(/DRAG\s+([\d.]+)\s*,\s*([\d.]+)\s+([\d.]+)\s*,\s*([\d.]+)/i);
1819
+ const rotateMatch = visionResp.match(/ROTATE\s+(-?[\d.]+)/i);
1820
+ const puzzleBox = await fcFrame.locator('#challenge-stage, .challenge-content, .game-content, body').first().boundingBox().catch(() => null);
1821
+ const offsetX = puzzleBox?.x || 0;
1822
+ const offsetY = puzzleBox?.y || 0;
1823
+ if (clickMatch) {
1824
+ const cx = offsetX + parseFloat(clickMatch[1]);
1825
+ const cy = offsetY + parseFloat(clickMatch[2]);
1826
+ await humanMove(cx, cy, p);
1827
+ await p.waitForTimeout(100 + Math.random() * 150);
1828
+ await p.mouse.down();
1829
+ await p.waitForTimeout(60 + Math.random() * 80);
1830
+ await p.mouse.up();
1831
+ results.push(`Clicked at (${Math.round(cx)}, ${Math.round(cy)})`);
1832
+ await p.waitForTimeout(2000);
1833
+ }
1834
+ else if (dragMatch) {
1835
+ const fromX = offsetX + parseFloat(dragMatch[1]);
1836
+ const fromY = offsetY + parseFloat(dragMatch[2]);
1837
+ const toX = offsetX + parseFloat(dragMatch[3]);
1838
+ const toY = offsetY + parseFloat(dragMatch[4]);
1839
+ await humanMove(fromX, fromY, p);
1840
+ await p.waitForTimeout(150 + Math.random() * 200);
1841
+ await p.mouse.down();
1842
+ await p.waitForTimeout(200 + Math.random() * 300);
1843
+ const steps = 20 + Math.floor(Math.random() * 15);
1844
+ for (let i = 1; i <= steps; i++) {
1845
+ const progress = i / steps;
1846
+ const eased = progress < 0.5 ? 2 * progress * progress : 1 - Math.pow(-2 * progress + 2, 2) / 2;
1847
+ await p.mouse.move(fromX + (toX - fromX) * eased, fromY + (toY - fromY) * eased + (Math.random() - 0.5) * 2);
1848
+ await p.waitForTimeout(10 + Math.random() * 15);
1849
+ }
1850
+ await p.mouse.move(toX, toY);
1851
+ await p.waitForTimeout(150);
1852
+ await p.mouse.up();
1853
+ results.push(`Dragged from (${Math.round(fromX)},${Math.round(fromY)}) to (${Math.round(toX)},${Math.round(toY)})`);
1854
+ await p.waitForTimeout(2000);
1855
+ }
1856
+ else if (rotateMatch) {
1857
+ const degrees = parseFloat(rotateMatch[1]);
1858
+ const rotator = fcFrame.locator('.rotator, [class*="rotate"], [class*="spinner"], canvas, .game-item').first();
1859
+ if (await rotator.count() > 0) {
1860
+ const rBox = await rotator.boundingBox();
1861
+ if (rBox) {
1862
+ const cx = rBox.x + rBox.width / 2;
1863
+ const cy = rBox.y + rBox.height / 2;
1864
+ const radius = rBox.width / 2;
1865
+ const startX = cx + radius;
1866
+ const startY = cy;
1867
+ const endAngle = (degrees * Math.PI) / 180;
1868
+ const endX = cx + radius * Math.cos(endAngle);
1869
+ const endY = cy + radius * Math.sin(endAngle);
1870
+ await humanMove(startX, startY, p);
1871
+ await p.waitForTimeout(150);
1872
+ await p.mouse.down();
1873
+ await p.waitForTimeout(200);
1874
+ const steps = 30;
1875
+ for (let i = 1; i <= steps; i++) {
1876
+ const angle = (endAngle * i) / steps;
1877
+ await p.mouse.move(cx + radius * Math.cos(angle), cy + radius * Math.sin(angle));
1878
+ await p.waitForTimeout(15 + Math.random() * 10);
1879
+ }
1880
+ await p.mouse.move(endX, endY);
1881
+ await p.waitForTimeout(150);
1882
+ await p.mouse.up();
1883
+ results.push(`Rotated ${degrees}°`);
1884
+ await p.waitForTimeout(2000);
1885
+ }
1886
+ }
1887
+ else {
1888
+ results.push('[WARN] No rotatable element found');
1889
+ }
1890
+ }
1891
+ else {
1892
+ results.push(`Could not parse vision model response: "${visionResp}"`);
1893
+ results.push('Falling back to manual mode. Read the puzzle screenshot and use click/drag-to/evaluate to solve.');
1894
+ break;
1895
+ }
1896
+ const stillChallenge = await fcFrame.locator('#challenge-stage, .challenge-content').count();
1897
+ const successIndicators = await fcFrame.locator('[class*="success"], [class*="correct"], [class*="verified"], .game-success').count();
1898
+ if (successIndicators > 0) {
1899
+ results.push('[OK] FunCaptcha solved!');
1900
+ break;
1901
+ }
1902
+ if (stillChallenge === 0) {
1903
+ results.push('[OK] FunCaptcha challenge dismissed — likely solved.');
1904
+ break;
1905
+ }
1906
+ if (attempt === maxAttempts - 1) {
1907
+ results.push(`Auto-solve exhausted after ${maxAttempts} attempts. Use click/drag-to/evaluate for manual solving.`);
1908
+ }
1909
+ else {
1910
+ results.push('Attempt did not solve, retrying...');
1911
+ await p.waitForTimeout(1500);
1912
+ }
1913
+ }
1914
+ catch (e) {
1915
+ results.push(`Vision model failed: ${e.message}`);
1916
+ results.push('Auto-solve requires a vision-capable model. Read the puzzle screenshot at .aurix-funcaptcha-puzzle.png and use click/drag-to/evaluate to solve manually.');
1917
+ break;
1918
+ }
1538
1919
  }
1539
- results.push('');
1540
- results.push('To solve FunCaptcha:');
1541
- results.push('1. Read the puzzle screenshot to understand the challenge');
1542
- results.push('2. For rotation puzzles: use "drag-to" to rotate the object to the correct position');
1543
- results.push('3. For image match: use "click" on matching images');
1544
- results.push('4. For drag-drop: use "drag-to" with source and target coordinates');
1545
- results.push('5. Use "evaluate" with JavaScript if puzzle needs programmatic interaction');
1546
1920
  }
1547
1921
  else {
1548
1922
  results.push(err('FunCaptcha frame not found', 'Use "detect-captcha" to scan the page first'));
1549
1923
  }
1550
1924
  }
1551
1925
  catch (e) {
1552
- results.push(err(`FunCaptcha analysis failed: ${e.message}`));
1926
+ results.push(err(`FunCaptcha auto-solve failed: ${e.message}`));
1553
1927
  }
1554
1928
  }
1555
1929
  if (captchaType === 'mtcaptcha' || captchaType === 'geetest') {
@@ -1558,37 +1932,6 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1558
1932
  const hasSlider = await targetFrame.locator('.geetest_slider_button, .geetest_slider, [class*="slider_button"], [class*="slider-track"]').count();
1559
1933
  if (hasSlider > 0) {
1560
1934
  results.push('Type: SLIDER puzzle');
1561
- results.push('The puzzle requires dragging a piece to fill a gap.');
1562
- results.push('');
1563
- const sliderInfo = await targetFrame.evaluate(() => {
1564
- const info = {};
1565
- const cut = document.querySelector('.geetest_cut, .geetest_piece_bg, [class*="geetest_cut"], [class*="slider_cut"], [class*="puzzle-gap"]');
1566
- if (cut) {
1567
- const cutRect = cut.getBoundingClientRect();
1568
- const style = window.getComputedStyle(cut);
1569
- info.cut = { left: cutRect.left, width: cutRect.width, styleLeft: parseFloat(style.left) || null, transform: style.transform || null };
1570
- }
1571
- const bg = document.querySelector('.geetest_canvas_bg, .geetest_bg, [class*="geetest_canvas"], canvas[class*="bg"]');
1572
- if (bg) {
1573
- const bgRect = bg.getBoundingClientRect();
1574
- info.bg = { left: bgRect.left, width: bgRect.width };
1575
- }
1576
- const piece = document.querySelector('.geetest_piece, .geetest_slider_piece, [class*="slider_piece"]');
1577
- if (piece) {
1578
- const pieceRect = piece.getBoundingClientRect();
1579
- info.piece = { left: pieceRect.left, width: pieceRect.width };
1580
- }
1581
- const slider = document.querySelector('.geetest_slider_button, .geetest_slider_knob, [class*="slider_button"]');
1582
- if (slider) {
1583
- const sliderRect = slider.getBoundingClientRect();
1584
- info.slider = { left: sliderRect.left, width: sliderRect.width };
1585
- }
1586
- const track = document.querySelector('.geetest_slider_track, .geetest_slider, [class*="slider_track"]');
1587
- if (track) {
1588
- info.track = { width: track.getBoundingClientRect().width };
1589
- }
1590
- return info;
1591
- });
1592
1935
  const puzzleEl = targetFrame.locator('.geetest_panel, .geetest_widget, [class*="geetest_container"]').first();
1593
1936
  const screenshotPath = join(homedir(), '.aurix-slider-puzzle.png');
1594
1937
  try {
@@ -1601,44 +1944,136 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1601
1944
  await p.screenshot({ path: screenshotPath });
1602
1945
  }
1603
1946
  results.push(`Puzzle screenshot: ${screenshotPath}`);
1604
- let gapOffset = null;
1605
- if (sliderInfo.cut && sliderInfo.bg) {
1606
- if (sliderInfo.cut.styleLeft && sliderInfo.cut.styleLeft > 0) {
1607
- gapOffset = Math.round(sliderInfo.cut.styleLeft);
1608
- results.push(`Gap position (CSS left): ${gapOffset}px from puzzle left edge`);
1947
+ const maxAttempts = 3;
1948
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
1949
+ if (attempt > 0)
1950
+ results.push(`\nSlider retry ${attempt}/${maxAttempts - 1}...`);
1951
+ const sliderInfo = await targetFrame.evaluate(() => {
1952
+ const info = {};
1953
+ const cut = document.querySelector('.geetest_cut, .geetest_piece_bg, [class*="geetest_cut"], [class*="slider_cut"], [class*="puzzle-gap"]');
1954
+ if (cut) {
1955
+ const cutRect = cut.getBoundingClientRect();
1956
+ const style = window.getComputedStyle(cut);
1957
+ info.cut = { left: cutRect.left, width: cutRect.width, styleLeft: parseFloat(style.left) || null, transform: style.transform || null };
1958
+ }
1959
+ const bg = document.querySelector('.geetest_canvas_bg, .geetest_bg, [class*="geetest_canvas"], canvas[class*="bg"]');
1960
+ if (bg) {
1961
+ const bgRect = bg.getBoundingClientRect();
1962
+ info.bg = { left: bgRect.left, width: bgRect.width };
1963
+ }
1964
+ const piece = document.querySelector('.geetest_piece, .geetest_slider_piece, [class*="slider_piece"]');
1965
+ if (piece) {
1966
+ const pieceRect = piece.getBoundingClientRect();
1967
+ info.piece = { left: pieceRect.left, width: pieceRect.width };
1968
+ }
1969
+ const slider = document.querySelector('.geetest_slider_button, .geetest_slider_knob, [class*="slider_button"]');
1970
+ if (slider) {
1971
+ const sliderRect = slider.getBoundingClientRect();
1972
+ info.slider = { left: sliderRect.left, width: sliderRect.width, centerX: sliderRect.left + sliderRect.width / 2, centerY: sliderRect.top + sliderRect.height / 2 };
1973
+ }
1974
+ const track = document.querySelector('.geetest_slider_track, .geetest_slider, [class*="slider_track"]');
1975
+ if (track)
1976
+ info.track = { width: track.getBoundingClientRect().width };
1977
+ return info;
1978
+ });
1979
+ let gapOffset = null;
1980
+ if (sliderInfo.cut && sliderInfo.bg) {
1981
+ if (sliderInfo.cut.styleLeft && sliderInfo.cut.styleLeft > 0) {
1982
+ gapOffset = Math.round(sliderInfo.cut.styleLeft);
1983
+ }
1984
+ else {
1985
+ gapOffset = Math.round(sliderInfo.cut.left - sliderInfo.bg.left);
1986
+ }
1609
1987
  }
1610
- else {
1611
- gapOffset = Math.round(sliderInfo.cut.left - sliderInfo.bg.left);
1612
- results.push(`Gap position (rect): ${gapOffset}px from puzzle left edge`);
1988
+ if (gapOffset === null && sliderInfo.cut?.transform && sliderInfo.cut.transform !== 'none') {
1989
+ const match = sliderInfo.cut.transform.match(/matrix\(.*?,\s*([\d.]+)/);
1990
+ if (match)
1991
+ gapOffset = Math.round(parseFloat(match[1]));
1613
1992
  }
1614
- }
1615
- if (gapOffset === null && sliderInfo.cut?.transform && sliderInfo.cut.transform !== 'none') {
1616
- const match = sliderInfo.cut.transform.match(/matrix\(.*?,\s*([\d.]+)/);
1617
- if (match) {
1618
- gapOffset = Math.round(parseFloat(match[1]));
1619
- results.push(`Gap position (transform): ${gapOffset}px from puzzle left edge`);
1993
+ if (gapOffset === null) {
1994
+ results.push('DOM gap detection failed, using vision model...');
1995
+ try {
1996
+ const ssBase64 = readFileBase64(screenshotPath);
1997
+ const visionResp = await visionClassify(ssBase64, 'This is a slider puzzle captcha. There is a gap/hole in the background image where a puzzle piece needs to go. Estimate the horizontal pixel position of the CENTER of the gap, measured from the LEFT edge of the puzzle image. Reply with ONLY the number (e.g. "145").');
1998
+ const parsed = parseInt(visionResp.replace(/[^\d]/g, ''));
1999
+ if (!isNaN(parsed) && parsed > 10 && parsed < 500) {
2000
+ gapOffset = parsed;
2001
+ results.push(`Vision model: gap at ~${gapOffset}px`);
2002
+ }
2003
+ else {
2004
+ results.push(`Vision model returned: "${visionResp}" — could not parse gap position`);
2005
+ }
2006
+ }
2007
+ catch (e) {
2008
+ results.push(`Vision model failed: ${e.message}`);
2009
+ }
2010
+ }
2011
+ if (gapOffset === null) {
2012
+ results.push('[WARN] Could not determine gap position. Use "slider-analyze" for manual analysis, then "drag-to" to slide.');
2013
+ break;
1620
2014
  }
1621
- }
1622
- if (sliderInfo.slider)
1623
- results.push(`Slider handle: x=${Math.round(sliderInfo.slider.left)}, width=${Math.round(sliderInfo.slider.width)}`);
1624
- if (sliderInfo.track)
1625
- results.push(`Track width: ${Math.round(sliderInfo.track.width)}px`);
1626
- if (gapOffset !== null) {
1627
2015
  const pieceHalf = Math.round((sliderInfo.piece?.width || 44) / 2);
1628
2016
  const adjusted = gapOffset - pieceHalf;
1629
- results.push('');
1630
- results.push(`[OK] RECOMMENDED: drag-to target=".geetest_slider_button" value="${adjusted},0"`);
1631
- results.push(`(gap ${gapOffset}px - half piece ${pieceHalf}px = ${adjusted}px drag distance)`);
1632
- }
1633
- else {
1634
- results.push('');
1635
- results.push('[WARN] Could not auto-detect gap. Look at the puzzle screenshot, find the gap/hole, and estimate the pixel offset.');
1636
- results.push('Then: drag-to target=".geetest_slider_button" value="<estimated_px>,0"');
2017
+ results.push(`Gap: ${gapOffset}px, piece half: ${pieceHalf}px, drag distance: ${adjusted}px`);
2018
+ if (sliderInfo.slider) {
2019
+ try {
2020
+ const startX = sliderInfo.slider.centerX;
2021
+ const startY = sliderInfo.slider.centerY;
2022
+ const endX = startX + adjusted;
2023
+ await humanMove(startX, startY, p);
2024
+ await p.waitForTimeout(150 + Math.random() * 250);
2025
+ await p.mouse.down();
2026
+ await p.waitForTimeout(200 + Math.random() * 300);
2027
+ const steps = 25 + Math.floor(Math.random() * 20);
2028
+ for (let i = 1; i <= steps; i++) {
2029
+ const progress = i / steps;
2030
+ const eased = progress < 0.5 ? 2 * progress * progress : 1 - Math.pow(-2 * progress + 2, 2) / 2;
2031
+ const x = startX + adjusted * eased + (Math.random() - 0.5) * 2;
2032
+ const y = startY + (Math.random() - 0.5) * 2;
2033
+ await p.mouse.move(x, y);
2034
+ await p.waitForTimeout(10 + Math.random() * 20);
2035
+ }
2036
+ await p.mouse.move(endX, startY);
2037
+ await p.waitForTimeout(150);
2038
+ await p.mouse.up();
2039
+ await p.waitForTimeout(2000);
2040
+ results.push('Slider dragged, checking result...');
2041
+ const successEl = await targetFrame.locator('.geetest_success, .geetest_tip_success, [class*="success"], [class*="verified"]').count();
2042
+ if (successEl > 0) {
2043
+ results.push('[OK] Slider captcha solved!');
2044
+ break;
2045
+ }
2046
+ const failEl = await targetFrame.locator('.geetest_fail, .geetest_tip_fail, [class*="fail"], [class*="error"], [class*="retry"]').count();
2047
+ if (failEl > 0) {
2048
+ results.push('Slider attempt failed, retrying...');
2049
+ const refreshBtn = targetFrame.locator('.geetest_refresh, [class*="refresh"], [class*="retry"]').first();
2050
+ if (await refreshBtn.count() > 0)
2051
+ await refreshBtn.click().catch(() => { });
2052
+ await p.waitForTimeout(1500);
2053
+ try {
2054
+ if (await puzzleEl.count() > 0)
2055
+ await puzzleEl.screenshot({ path: screenshotPath });
2056
+ }
2057
+ catch { }
2058
+ continue;
2059
+ }
2060
+ results.push('[OK] Slider dragged — outcome unconfirmed, check page state.');
2061
+ break;
2062
+ }
2063
+ catch (e) {
2064
+ results.push(`Drag failed: ${e.message}`);
2065
+ break;
2066
+ }
2067
+ }
2068
+ else {
2069
+ results.push('[WARN] Slider handle not found in DOM.');
2070
+ break;
2071
+ }
1637
2072
  }
1638
2073
  }
1639
2074
  else {
1640
2075
  results.push('Type: IMAGE challenge');
1641
- const gridResult = await analyzeImageChallenge(p, targetFrame, captchaType);
2076
+ const gridResult = await solveCaptchaGrid(p, targetFrame, captchaType);
1642
2077
  results.push(gridResult);
1643
2078
  }
1644
2079
  }
@@ -1649,11 +2084,32 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1649
2084
  const screenshotPath = join(homedir(), '.aurix-captcha-challenge.png');
1650
2085
  await imgCaptcha.first().screenshot({ path: screenshotPath });
1651
2086
  results.push(`Captcha image saved: ${screenshotPath}`);
1652
- results.push('Read the text from the screenshot and use "fill" to type it into the captcha input field.');
1653
- const input = p.locator('input[name*="captcha"], input[id*="captcha"], input[placeholder*="captcha" i], input[placeholder*="code" i]');
1654
- if (await input.count() > 0) {
1655
- const name = await input.first().getAttribute('name') || await input.first().getAttribute('id') || 'captcha input';
1656
- results.push(`Captcha input field found: ${name}`);
2087
+ try {
2088
+ const ssBase64 = readFileBase64(screenshotPath);
2089
+ const visionResp = await visionClassify(ssBase64, 'Read the text/numbers in this captcha image. Reply with ONLY the exact text shown, nothing else.');
2090
+ const captchaText = visionResp.replace(/[^a-zA-Z0-9]/g, '').trim();
2091
+ if (captchaText.length >= 2) {
2092
+ const input = p.locator('input[name*="captcha"], input[id*="captcha"], input[placeholder*="captcha" i], input[placeholder*="code" i]');
2093
+ if (await input.count() > 0) {
2094
+ await input.first().click();
2095
+ await input.first().fill('');
2096
+ for (const char of captchaText) {
2097
+ await input.first().type(char, { delay: 80 + Math.random() * 120 });
2098
+ }
2099
+ results.push(`[OK] Auto-filled captcha text: "${captchaText}"`);
2100
+ }
2101
+ else {
2102
+ results.push(`Vision model read: "${captchaText}" — but no captcha input field found. Use "fill" to type it manually.`);
2103
+ }
2104
+ }
2105
+ else {
2106
+ results.push(`Vision model returned: "${visionResp}" — could not read captcha text`);
2107
+ results.push('Read the screenshot and use "fill" to type the captcha text manually.');
2108
+ }
2109
+ }
2110
+ catch (e) {
2111
+ results.push(`Vision auto-fill failed: ${e.message}`);
2112
+ results.push('Read the captcha screenshot and use "fill" to type it manually.');
1657
2113
  }
1658
2114
  }
1659
2115
  else {
@@ -1712,7 +2168,8 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1712
2168
  }
1713
2169
  case 'click-tile': {
1714
2170
  const p = await ensureBrowser();
1715
- const tileIndex = parseInt(value || target || '0');
2171
+ const rawValue = (value || target || '0').toString();
2172
+ const tileIndices = rawValue.split(',').map(s => parseInt(s.trim())).filter(n => !isNaN(n));
1716
2173
  const frames = p.frames();
1717
2174
  let challengeFrame = null;
1718
2175
  let provider = 'unknown';
@@ -1736,59 +2193,111 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1736
2193
  }
1737
2194
  if (!challengeFrame)
1738
2195
  challengeFrame = p;
1739
- const tiles = await findGridTiles(challengeFrame, provider);
1740
- if (tiles.length === 0)
2196
+ const initialTiles = await findGridTiles(challengeFrame, provider);
2197
+ if (initialTiles.length === 0)
1741
2198
  return err('No grid tiles found', 'Use "captcha-grid" to scan the challenge first');
1742
- if (tileIndex < 0 || tileIndex >= tiles.length)
1743
- return err(`Tile index ${tileIndex} out of range (0-${tiles.length - 1})`);
1744
- try {
1745
- const tile = tiles[tileIndex];
1746
- const isRecaptcha = provider === 'recaptcha';
1747
- const selectedClass = isRecaptcha
1748
- ? '.rc-imageselect-tileselected, .rc-imageselect-dynamic-selected, .rc-imageselect-tile.rc-imageselect-tileselected'
1749
- : '.task-image.selected, .task .selected';
1750
- const selectedBefore = await challengeFrame.locator(selectedClass).count().catch(() => 0);
1751
- const tileBox = await tile.boundingBox();
1752
- if (tileBox) {
1753
- const clickX = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
1754
- const clickY = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
1755
- await humanMove(clickX, clickY, p);
1756
- await p.waitForTimeout(80 + Math.random() * 120);
1757
- await p.mouse.down();
1758
- await p.waitForTimeout(60 + Math.random() * 100);
1759
- await p.mouse.up();
2199
+ for (const idx of tileIndices) {
2200
+ if (idx < 0 || idx >= initialTiles.length)
2201
+ return err(`Tile index ${idx} out of range (0-${initialTiles.length - 1})`);
2202
+ }
2203
+ const isRecaptcha = provider === 'recaptcha';
2204
+ const selectedClass = isRecaptcha
2205
+ ? '.rc-imageselect-tileselected, .rc-imageselect-dynamic-selected, .rc-imageselect-tile.rc-imageselect-tileselected'
2206
+ : '.task-image.selected, .task .selected';
2207
+ let instruction = '';
2208
+ if (isRecaptcha) {
2209
+ try {
2210
+ const instrEl = challengeFrame.locator('.rc-imageselect-instructions, .prompt-text, .prompt-text-h');
2211
+ if (await instrEl.count() > 0)
2212
+ instruction = (await instrEl.first().textContent() || '').trim();
2213
+ if (!instruction) {
2214
+ const st = challengeFrame.locator('strong').first();
2215
+ if (await st.count() > 0)
2216
+ instruction = (await st.textContent() || '').trim();
2217
+ }
1760
2218
  }
1761
- else {
1762
- await tile.click({ force: true });
1763
- }
1764
- await p.waitForTimeout(500 + Math.random() * 400);
1765
- const selectedCount = await challengeFrame.locator(selectedClass).count().catch(() => 0);
1766
- const selectionChanged = selectedCount !== selectedBefore;
1767
- const clickStatus = selectionChanged
1768
- ? `selection changed (${selectedBefore} → ${selectedCount})`
1769
- : `selection unchanged (${selectedCount}) click may not have registered, or this tile toggled off`;
1770
- if (isRecaptcha) {
1771
- await p.waitForTimeout(1500 + Math.random() * 1000);
1772
- const newTiles = await findGridTiles(challengeFrame, provider);
1773
- const screenshotPath = join(homedir(), `.aurix-tile-after-${tileIndex}.png`);
1774
- await challengeFrame.locator('.rc-imageselect-table-33, .rc-imageselect-table-44, table').first().screenshot({ path: screenshotPath }).catch(() => p.screenshot({ path: screenshotPath }));
1775
- return ok(`Clicked tile ${tileIndex}`, {
1776
- selection: clickStatus,
1777
- 'new tile': 'appeared check screenshot and evaluate',
1778
- screenshot: screenshotPath,
1779
- next: 'Use "click-tile" for next matching tile, or "captcha-verify" when done',
1780
- });
2219
+ catch { }
2220
+ }
2221
+ const results = [];
2222
+ results.push(`Clicking ${tileIndices.length} tile(s): [${tileIndices.join(', ')}]`);
2223
+ for (const tileIndex of tileIndices) {
2224
+ try {
2225
+ const currentTiles = await findGridTiles(challengeFrame, provider);
2226
+ if (tileIndex >= currentTiles.length) {
2227
+ results.push(` Tile ${tileIndex}: out of range (${currentTiles.length} tiles now), skipping`);
2228
+ continue;
2229
+ }
2230
+ const tile = currentTiles[tileIndex];
2231
+ const selectedBefore = await challengeFrame.locator(selectedClass).count().catch(() => 0);
2232
+ const tileBox = await tile.boundingBox();
2233
+ if (tileBox) {
2234
+ const clickX = tileBox.x + tileBox.width * (0.3 + Math.random() * 0.4);
2235
+ const clickY = tileBox.y + tileBox.height * (0.3 + Math.random() * 0.4);
2236
+ await humanMove(clickX, clickY, p);
2237
+ await p.waitForTimeout(80 + Math.random() * 120);
2238
+ await p.mouse.down();
2239
+ await p.waitForTimeout(60 + Math.random() * 100);
2240
+ await p.mouse.up();
2241
+ }
2242
+ else {
2243
+ await tile.click({ force: true });
2244
+ }
2245
+ await p.waitForTimeout(500 + Math.random() * 400);
2246
+ const selectedCount = await challengeFrame.locator(selectedClass).count().catch(() => 0);
2247
+ const clickStatus = selectedCount !== selectedBefore
2248
+ ? `selected (${selectedBefore} → ${selectedCount})`
2249
+ : `unchanged (${selectedCount})`;
2250
+ results.push(` Tile ${tileIndex}: ${clickStatus}`);
2251
+ if (isRecaptcha) {
2252
+ await p.waitForTimeout(1500 + Math.random() * 1000);
2253
+ const newTiles = await findGridTiles(challengeFrame, provider);
2254
+ const afterPath = join(homedir(), `.aurix-tile-after-${tileIndex}.png`);
2255
+ try {
2256
+ await challengeFrame.locator('.rc-imageselect-table-33, .rc-imageselect-table-44, table').first().screenshot({ path: afterPath }).catch(() => p.screenshot({ path: afterPath }));
2257
+ }
2258
+ catch { }
2259
+ if (tileIndex < newTiles.length && instruction) {
2260
+ try {
2261
+ await newTiles[tileIndex].screenshot({ path: afterPath });
2262
+ const newBase64 = readFileBase64(afterPath);
2263
+ const newResp = await visionClassify(newBase64, `Does this image contain ${instruction}? Reply YES or NO only.`);
2264
+ if (newResp.toLowerCase().includes('yes')) {
2265
+ const newTile = newTiles[tileIndex];
2266
+ const newBox = await newTile.boundingBox();
2267
+ if (newBox) {
2268
+ const nx = newBox.x + newBox.width * (0.3 + Math.random() * 0.4);
2269
+ const ny = newBox.y + newBox.height * (0.3 + Math.random() * 0.4);
2270
+ await humanMove(nx, ny, p);
2271
+ await p.waitForTimeout(80 + Math.random() * 120);
2272
+ await p.mouse.down();
2273
+ await p.waitForTimeout(60 + Math.random() * 100);
2274
+ await p.mouse.up();
2275
+ results.push(` → Replacement tile ${tileIndex} also matched, clicked`);
2276
+ await p.waitForTimeout(1500 + Math.random() * 1000);
2277
+ }
2278
+ }
2279
+ else {
2280
+ results.push(` → Replacement tile ${tileIndex} doesn't match`);
2281
+ }
2282
+ }
2283
+ catch { }
2284
+ }
2285
+ }
2286
+ }
2287
+ catch (e) {
2288
+ results.push(` Tile ${tileIndex}: FAILED — ${e.message}`);
1781
2289
  }
1782
- const ss = await autoScreenshot(p, 'click-tile');
1783
- return ok(`Clicked tile ${tileIndex}`, {
1784
- selection: clickStatus,
1785
- screenshot: ss,
1786
- next: 'Continue clicking matching tiles, then use "captcha-verify"',
1787
- });
1788
2290
  }
1789
- catch (e) {
1790
- return err(`Failed to click tile ${tileIndex}: ${e.message}`, 'Use "captcha-grid" to re-scan the challenge');
2291
+ if (isRecaptcha) {
2292
+ results.push('');
2293
+ results.push('Use "captcha-verify" when all matching tiles are clicked, or "captcha-grid" to re-analyze.');
1791
2294
  }
2295
+ else {
2296
+ const ss = await autoScreenshot(p, 'click-tile');
2297
+ results.push(`Screenshot: ${ss}`);
2298
+ results.push('Continue clicking matching tiles, then use "captcha-verify"');
2299
+ }
2300
+ return results.join('\n');
1792
2301
  }
1793
2302
  case 'captcha-verify': {
1794
2303
  const p = await ensureBrowser();
@@ -1815,6 +2324,18 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1815
2324
  }
1816
2325
  if (!challengeFrame)
1817
2326
  challengeFrame = p;
2327
+ const timeSinceAnalyze = _lastGridAnalyzeTime > 0 ? Date.now() - _lastGridAnalyzeTime : 0;
2328
+ if (timeSinceAnalyze > 90_000 && _lastGridAnalyzeTime > 0) {
2329
+ const results = [];
2330
+ results.push(`[WARN] Grid was analyzed ${Math.round(timeSinceAnalyze / 1000)}s ago — challenge likely refreshed.`);
2331
+ results.push('Re-analyzing before verify...');
2332
+ try {
2333
+ const reAnalyze = await analyzeImageChallenge(p, challengeFrame, provider);
2334
+ results.push(reAnalyze);
2335
+ }
2336
+ catch { }
2337
+ return results.join('\n');
2338
+ }
1818
2339
  try {
1819
2340
  let verifyBtn = challengeFrame.locator('#recaptcha-verify-button, .rc-button-submit, .button-submit, [id*="verify"]');
1820
2341
  if (await verifyBtn.count() === 0) {
@@ -1830,25 +2351,71 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1830
2351
  if (errorText > 0) {
1831
2352
  const errorMsg = await challengeFrame.locator('.rc-imageselect-incorrect-response, .error-message').first().textContent().catch(() => 'Incorrect answer');
1832
2353
  await p.screenshot({ path: screenshotPath });
1833
- return err(`Verification failed: "${errorMsg}"`, `Challenge refreshed. Use "captcha-grid" to re-analyze, then click matching tiles again. Screenshot: ${screenshotPath}`);
2354
+ const results = [];
2355
+ results.push(`Verification failed: "${errorMsg}". Auto-retrying...`);
2356
+ const maxRetries = 3;
2357
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
2358
+ results.push(`\nRetry ${attempt + 1}/${maxRetries}...`);
2359
+ await p.waitForTimeout(2000);
2360
+ const currentFrames = p.frames();
2361
+ const retryFrame = currentFrames.find((f) => {
2362
+ const u = f.url();
2363
+ return (u.includes('/recaptcha/') && u.includes('/bframe')) ||
2364
+ (u.includes('hcaptcha') && u.includes('challenge'));
2365
+ });
2366
+ if (!retryFrame) {
2367
+ results.push('Challenge frame gone — captcha may be solved');
2368
+ await p.screenshot({ path: screenshotPath });
2369
+ return results.join('\n');
2370
+ }
2371
+ const retryProvider = retryFrame.url().includes('hcaptcha') ? 'hcaptcha' : 'recaptcha';
2372
+ const solveResult = await solveCaptchaGrid(p, retryFrame, retryProvider);
2373
+ results.push(solveResult);
2374
+ if (solveResult.includes('Captcha solved!')) {
2375
+ return results.join('\n');
2376
+ }
2377
+ }
2378
+ results.push(`\nAuto-retry exhausted after ${maxRetries} attempts. Use "captcha-grid" and "click-tile" for manual solving.`);
2379
+ return results.join('\n');
1834
2380
  }
1835
2381
  const newChallenge = await challengeFrame.locator('.rc-imageselect-instructions, .prompt-text').count();
1836
2382
  if (newChallenge > 0) {
1837
2383
  const instruction = await challengeFrame.locator('.rc-imageselect-instructions, .prompt-text').first().textContent().catch(() => '');
1838
2384
  await p.screenshot({ path: screenshotPath });
1839
- return warn(`New challenge appeared: "${instruction}"`, {
1840
- screenshot: screenshotPath,
1841
- next: 'Use "captcha-grid" to analyze and "click-tile" to solve',
1842
- });
2385
+ const results = [];
2386
+ results.push(`New challenge appeared: "${instruction}". Auto-solving...`);
2387
+ const maxRetries = 3;
2388
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
2389
+ if (attempt > 0)
2390
+ results.push(`\nRetry ${attempt}/${maxRetries - 1}...`);
2391
+ const currentFrames = p.frames();
2392
+ const retryFrame = currentFrames.find((f) => {
2393
+ const u = f.url();
2394
+ return (u.includes('/recaptcha/') && u.includes('/bframe')) ||
2395
+ (u.includes('hcaptcha') && u.includes('challenge'));
2396
+ });
2397
+ if (!retryFrame) {
2398
+ results.push('Challenge frame gone — captcha may be solved');
2399
+ return results.join('\n');
2400
+ }
2401
+ const retryProvider = retryFrame.url().includes('hcaptcha') ? 'hcaptcha' : 'recaptcha';
2402
+ const solveResult = await solveCaptchaGrid(p, retryFrame, retryProvider);
2403
+ results.push(solveResult);
2404
+ if (solveResult.includes('Captcha solved!'))
2405
+ return results.join('\n');
2406
+ await p.waitForTimeout(2000);
2407
+ }
2408
+ results.push(`\nAuto-solve exhausted. Use "captcha-grid" and "click-tile" manually.`);
2409
+ return results.join('\n');
1843
2410
  }
1844
2411
  await p.screenshot({ path: screenshotPath });
1845
2412
  return ok('Verification submitted', {
1846
2413
  screenshot: screenshotPath,
1847
- note: 'Check if the form/page progressed. If verification widget reappears, use "captcha-grid" again.',
2414
+ note: 'Check if the form/page progressed. If verification widget reappears, use "solve-captcha" again.',
1848
2415
  });
1849
2416
  }
1850
2417
  catch (e) {
1851
- return err(`Verify failed: ${e.message}`, 'Use "captcha-grid" to re-scan and retry');
2418
+ return err(`Verify failed: ${e.message}`, 'Use "solve-captcha" to retry automatically');
1852
2419
  }
1853
2420
  }
1854
2421
  case 'slider-analyze': {
@@ -1940,19 +2507,63 @@ The browser profile persists at ~/.aurix-browser-profile — if the user is logg
1940
2507
  const pieceHalfWidth = Math.round((sliderInfo.piece.width || 44) / 2);
1941
2508
  const adjustedOffset = gapOffset - pieceHalfWidth;
1942
2509
  results.push('');
1943
- results.push(`[OK] RECOMMENDED OFFSET: drag-to value="${adjustedOffset},0"`);
1944
- results.push(`(gap at ${gapOffset}px minus half piece width ${pieceHalfWidth}px = ${adjustedOffset}px)`);
2510
+ results.push(`[OK] Gap at ${gapOffset}px, piece half ${pieceHalfWidth}px, drag distance ${adjustedOffset}px`);
2511
+ gapOffset = adjustedOffset;
1945
2512
  }
1946
2513
  else if (gapOffset !== null) {
1947
2514
  results.push('');
1948
- results.push(`[OK] RECOMMENDED OFFSET: drag-to value="${gapOffset},0"`);
2515
+ results.push(`[OK] Gap at ${gapOffset}px`);
1949
2516
  }
1950
2517
  else {
1951
2518
  results.push('');
1952
- results.push('[WARN] Could not auto-detect gap position from DOM.');
1953
- results.push('Look at the puzzle screenshot to find where the gap/hole is.');
1954
- results.push('Estimate the pixel distance from the LEFT edge of the puzzle to the CENTER of the gap.');
1955
- results.push('Then use: drag-to target=".geetest_slider_button" value="<estimated_px>,0"');
2519
+ results.push('DOM gap detection failed, trying vision model...');
2520
+ try {
2521
+ const ssBase64 = readFileBase64(screenshotPath);
2522
+ const visionResp = await visionClassify(ssBase64, 'This is a slider puzzle captcha. There is a gap/hole in the background image where a puzzle piece needs to go. Estimate the horizontal pixel position of the CENTER of the gap, measured from the LEFT edge of the puzzle image. Reply with ONLY the number (e.g. "145").');
2523
+ const parsed = parseInt(visionResp.replace(/[^\d]/g, ''));
2524
+ if (!isNaN(parsed) && parsed > 10 && parsed < 500) {
2525
+ gapOffset = parsed;
2526
+ results.push(`Vision model: gap at ~${gapOffset}px`);
2527
+ }
2528
+ else {
2529
+ results.push(`Vision model returned: "${visionResp}" — could not parse`);
2530
+ }
2531
+ }
2532
+ catch (e) {
2533
+ results.push(`Vision model failed: ${e.message}`);
2534
+ }
2535
+ }
2536
+ if (gapOffset !== null && sliderInfo.slider) {
2537
+ results.push('Auto-dragging slider...');
2538
+ try {
2539
+ const startX = sliderInfo.slider.centerX || (sliderInfo.slider.left + sliderInfo.slider.width / 2);
2540
+ const startY = sliderInfo.slider.centerY || (sliderInfo.slider.top + sliderInfo.slider.height / 2);
2541
+ const endX = startX + gapOffset;
2542
+ await humanMove(startX, startY, p);
2543
+ await p.waitForTimeout(150 + Math.random() * 250);
2544
+ await p.mouse.down();
2545
+ await p.waitForTimeout(200 + Math.random() * 300);
2546
+ const steps = 25 + Math.floor(Math.random() * 20);
2547
+ for (let i = 1; i <= steps; i++) {
2548
+ const progress = i / steps;
2549
+ const eased = progress < 0.5 ? 2 * progress * progress : 1 - Math.pow(-2 * progress + 2, 2) / 2;
2550
+ const x = startX + gapOffset * eased + (Math.random() - 0.5) * 2;
2551
+ const y = startY + (Math.random() - 0.5) * 2;
2552
+ await p.mouse.move(x, y);
2553
+ await p.waitForTimeout(10 + Math.random() * 20);
2554
+ }
2555
+ await p.mouse.move(endX, startY);
2556
+ await p.waitForTimeout(150);
2557
+ await p.mouse.up();
2558
+ await p.waitForTimeout(2000);
2559
+ results.push('[OK] Slider auto-dragged. Check page state to confirm.');
2560
+ }
2561
+ catch (e) {
2562
+ results.push(`Auto-drag failed: ${e.message}. Use: drag-to target=".geetest_slider_button" value="${gapOffset},0"`);
2563
+ }
2564
+ }
2565
+ else if (gapOffset === null) {
2566
+ results.push('Could not determine gap position. Use "drag-to" manually with estimated offset.');
1956
2567
  }
1957
2568
  if (sliderInfo.allGeeTestClasses?.length > 0) {
1958
2569
  results.push('');