@minded-ai/mindedjs 2.0.13 โ†’ 2.0.14-beta-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,632 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Browser Use AI Agent with Captcha Bypass
4
+ Uses the Python SDK instead of CLI for better control and captcha handling
5
+ """
6
+
7
+ import asyncio
8
+ import sys
9
+ import json
10
+ import argparse
11
+ import base64
12
+ import traceback
13
+ from typing import Optional, Dict, Any
14
+ from pathlib import Path
15
+
16
+ try:
17
+ import cv2
18
+ import pytesseract
19
+ import numpy as np
20
+ from PIL import Image
21
+ from io import BytesIO
22
+ from browser_use import Agent, Controller, ActionResult
23
+ from browser_use.llm import ChatOpenAI
24
+ from browser_use.browser.types import Page
25
+ import logging
26
+ except ImportError as e:
27
+ print(f"Error importing required packages: {e}")
28
+ print("Please install required dependencies:")
29
+ print("pip install browser-use opencv-python pytesseract pillow")
30
+ sys.exit(1)
31
+
32
+ # Configure logging
33
+ logging.basicConfig(
34
+ level=logging.INFO,
35
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
36
+ )
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ # Create a controller for custom actions
41
+ controller = Controller()
42
+
43
+ @controller.action('Detect and solve CAPTCHA challenges on the current page')
44
+ async def solve_captcha(page: Page) -> ActionResult:
45
+ """
46
+ Advanced CAPTCHA detection and solving tool using GPT-4 vision.
47
+ Fills CAPTCHA input fields but lets browser-use handle form submission.
48
+ """
49
+ try:
50
+ logger.info("Starting CAPTCHA detection and bypass...")
51
+
52
+ # Wait for page to load
53
+ await page.wait_for_timeout(3000)
54
+
55
+ # Try to find and screenshot the specific CAPTCHA image first
56
+ captcha_screenshot = await _capture_captcha_image(page)
57
+
58
+ if captcha_screenshot is None:
59
+ logger.warning("Could not find specific CAPTCHA image, falling back to full page screenshot")
60
+ captcha_screenshot = await page.screenshot(full_page=True)
61
+
62
+ # Use GPT-4 to solve the CAPTCHA
63
+ captcha_result = await _ai_solve_captcha_with_gpt4(captcha_screenshot)
64
+
65
+ if captcha_result.get('success'):
66
+ # Log which screenshot was analyzed
67
+ if 'screenshot_path' in captcha_result:
68
+ logger.info(f"๐Ÿ” Analysis used screenshot: {captcha_result['screenshot_path']}")
69
+
70
+ # Try to find input field and fill the solution
71
+ input_result = await _fill_captcha_solution(page, captcha_result['solution'])
72
+ return input_result
73
+ else:
74
+ # Log screenshot path even for failed attempts
75
+ if 'screenshot_path' in captcha_result:
76
+ logger.info(f"๐Ÿ” Failed analysis used screenshot: {captcha_result['screenshot_path']}")
77
+
78
+ return ActionResult(
79
+ extracted_content="No CAPTCHA detected or could not solve it",
80
+ error=captcha_result.get('message', 'CAPTCHA solving failed')
81
+ )
82
+
83
+ except Exception as e:
84
+ logger.error(f"Error in captcha bypass: {str(e)}")
85
+ return ActionResult(
86
+ extracted_content="CAPTCHA bypass failed",
87
+ error=f"CAPTCHA bypass error: {str(e)}"
88
+ )
89
+
90
+
91
+ async def _capture_captcha_image(page: Page) -> bytes:
92
+ """
93
+ Capture only the CAPTCHA image element for more accurate AI analysis
94
+ """
95
+ try:
96
+ # List of common CAPTCHA image selectors
97
+ captcha_image_selectors = [
98
+ # Specific selector from the user's example
99
+ '#ContentUsersPage_rc1_CaptchaImageUP',
100
+
101
+ # Generic CAPTCHA image selectors
102
+ 'img[id*="captcha" i]',
103
+ 'img[id*="Captcha"]',
104
+ 'img[class*="captcha" i]',
105
+ 'img[src*="captcha" i]',
106
+ 'img[alt*="captcha" i]',
107
+
108
+ # Telerik WebResource patterns (common in ASP.NET)
109
+ 'img[src*="Telerik.Web.UI.WebResource.axd"]',
110
+ 'img[src*="WebResource.axd"][src*="rca"]',
111
+
112
+ # reCAPTCHA and other common patterns
113
+ '.g-recaptcha img',
114
+ '.h-captcha img',
115
+ 'canvas[aria-label*="captcha" i]',
116
+
117
+ # Generic patterns by size (typical CAPTCHA dimensions)
118
+ 'img[style*="height:50px"]',
119
+ 'img[style*="width:180px"]',
120
+ ]
121
+
122
+ captcha_element = None
123
+ successful_selector = None
124
+
125
+ for selector in captcha_image_selectors:
126
+ try:
127
+ captcha_element = await page.query_selector(selector)
128
+ if captcha_element:
129
+ # Verify it's visible and has reasonable dimensions
130
+ bounding_box = await captcha_element.bounding_box()
131
+ if bounding_box and bounding_box['width'] > 10 and bounding_box['height'] > 10:
132
+ successful_selector = selector
133
+ logger.info(f"๐Ÿ“ธ Found CAPTCHA image using selector: {selector}")
134
+ logger.info(f"๐Ÿ“ CAPTCHA dimensions: {bounding_box['width']}x{bounding_box['height']} pixels")
135
+ break
136
+ except Exception as e:
137
+ logger.debug(f"Selector '{selector}' failed: {str(e)}")
138
+ continue
139
+
140
+ if not captcha_element:
141
+ logger.warning("โŒ Could not find CAPTCHA image element")
142
+ return None
143
+
144
+ # Take screenshot of just the CAPTCHA element
145
+ logger.info("๐Ÿ“ธ Taking screenshot of CAPTCHA image element only...")
146
+ captcha_screenshot = await captcha_element.screenshot()
147
+
148
+ # Save a debug version to see what element was captured
149
+ screenshots_dir = Path("screenshots")
150
+ screenshots_dir.mkdir(exist_ok=True)
151
+
152
+ from datetime import datetime
153
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
154
+ debug_path = screenshots_dir / f"captcha_element_{timestamp}.png"
155
+
156
+ with open(debug_path, "wb") as f:
157
+ f.write(captcha_screenshot)
158
+
159
+ logger.info(f"๐ŸŽฏ CAPTCHA element screenshot saved: {debug_path}")
160
+ logger.info(f"โœ… Successfully captured CAPTCHA using selector: {successful_selector}")
161
+
162
+ return captcha_screenshot
163
+
164
+ except Exception as e:
165
+ logger.error(f"Error capturing CAPTCHA image: {str(e)}")
166
+ return None
167
+
168
+
169
+ async def _ai_solve_captcha_with_gpt4(screenshot: bytes) -> Dict[str, Any]:
170
+ """
171
+ Use GPT-4 Vision to solve CAPTCHA challenges
172
+ """
173
+ try:
174
+ from openai import AsyncOpenAI
175
+ import os
176
+ from datetime import datetime
177
+
178
+ # Initialize OpenAI client
179
+ client = AsyncOpenAI(api_key=os.getenv('OPENAI_API_KEY'))
180
+
181
+ # Save screenshot for debugging
182
+ screenshots_dir = Path("screenshots")
183
+ screenshots_dir.mkdir(exist_ok=True)
184
+
185
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] # microseconds to milliseconds
186
+
187
+ # Determine if this is an element screenshot or full page
188
+ screenshot_type = "captcha_element" if len(screenshot) < 500000 else "full_page" # rough size estimate
189
+ screenshot_path = screenshots_dir / f"{screenshot_type}_{timestamp}.png"
190
+
191
+ with open(screenshot_path, "wb") as f:
192
+ f.write(screenshot)
193
+
194
+ logger.info(f"๐Ÿ“ธ Screenshot saved to: {screenshot_path}")
195
+
196
+ # Convert screenshot to base64
197
+ screenshot_b64 = base64.b64encode(screenshot).decode('utf-8')
198
+
199
+ logger.info("๐Ÿง  Sending CAPTCHA image to GPT-4 Vision for analysis...")
200
+
201
+ # Create prompt for GPT-4 Vision
202
+ prompt = """
203
+ This image contains a CAPTCHA challenge. Please analyze it and provide the solution.
204
+
205
+ Common CAPTCHA types:
206
+ 1. **Text CAPTCHA**: Distorted letters/numbers - read the exact text
207
+ 2. **Math CAPTCHA**: Arithmetic problems (e.g., "3 + 7 = ?") - solve and provide the answer
208
+ 3. **Simple Logic**: Basic questions or patterns
209
+
210
+ Instructions:
211
+ - Look carefully at all characters, including distorted or stylized text
212
+ - For math problems, calculate the answer
213
+ - Ignore background noise, lines, or visual distortions
214
+ - Focus only on the actual content that needs to be solved
215
+
216
+ Respond with ONLY the solution text/answer. Examples:
217
+ - "ABC123" (for text CAPTCHA showing these characters)
218
+ - "10" (for math problem "3 + 7 = ?")
219
+ - "HELLO" (for distorted text showing these letters)
220
+
221
+ If the image is unclear or you cannot determine the answer, respond with: "UNCLEAR"
222
+ """
223
+
224
+ response = await client.chat.completions.create(
225
+ model="gpt-4o",
226
+ messages=[
227
+ {
228
+ "role": "user",
229
+ "content": [
230
+ {
231
+ "type": "text",
232
+ "text": prompt
233
+ },
234
+ {
235
+ "type": "image_url",
236
+ "image_url": {
237
+ "url": f"data:image/png;base64,{screenshot_b64}",
238
+ "detail": "high"
239
+ }
240
+ }
241
+ ]
242
+ }
243
+ ],
244
+ max_tokens=300,
245
+ temperature=0.1
246
+ )
247
+
248
+ solution = response.choices[0].message.content.strip()
249
+ logger.info(f"GPT-4 Vision response: {solution}")
250
+
251
+ # Save the analysis result alongside the screenshot
252
+ analysis_path = screenshots_dir / f"{screenshot_type}_{timestamp}_analysis.txt"
253
+ with open(analysis_path, "w") as f:
254
+ f.write(f"Screenshot: {screenshot_path}\n")
255
+ f.write(f"Timestamp: {datetime.now()}\n")
256
+ f.write(f"GPT-4 Response: {solution}\n")
257
+ f.write(f"Success: {solution.upper() != 'UNCLEAR'}\n")
258
+
259
+ logger.info(f"๐Ÿ“ Analysis result saved to: {analysis_path}")
260
+
261
+ if solution.upper() == "UNCLEAR":
262
+ return {
263
+ 'success': False,
264
+ 'message': 'Could not determine CAPTCHA solution from image',
265
+ 'screenshot_path': str(screenshot_path)
266
+ }
267
+ else:
268
+ logger.info(f"๐ŸŽฏ GPT-4 solved CAPTCHA: '{solution}'")
269
+ return {
270
+ 'success': True,
271
+ 'solution': solution,
272
+ 'method': 'gpt4-vision',
273
+ 'screenshot_path': str(screenshot_path)
274
+ }
275
+
276
+ except Exception as e:
277
+ logger.error(f"Error in GPT-4 CAPTCHA solving: {str(e)}")
278
+ return {
279
+ 'success': False,
280
+ 'message': f'GPT-4 CAPTCHA solving error: {str(e)}'
281
+ }
282
+
283
+
284
+ async def _fill_captcha_solution(page, solution: str) -> ActionResult:
285
+ """
286
+ Find CAPTCHA input field and fill the solution (without submitting)
287
+ """
288
+ try:
289
+ # Common CAPTCHA input field selectors
290
+ captcha_selectors = [
291
+ # Case-insensitive CAPTCHA selectors
292
+ 'input[name*="captcha" i]',
293
+ 'input[id*="captcha" i]',
294
+ 'input[class*="captcha" i]',
295
+ 'input[placeholder*="captcha" i]',
296
+
297
+ # ASP.NET specific patterns
298
+ 'input[name*="CaptchaTextBox"]',
299
+ 'input[id*="CaptchaTextBox"]',
300
+ 'input[name*="Captcha"]',
301
+ 'input[id*="Captcha"]',
302
+
303
+ # Hebrew title attribute (for Israeli/Hebrew sites)
304
+ 'input[title*="ืชื•ื•ื™ื ื‘ืชืžื•ื ื”"]', # "characters in the image"
305
+ 'input[title*="ืชืžื•ื ื”"]', # "image"
306
+ 'input[title*="ืื™ืžื•ืช"]', # "verification"
307
+
308
+ # Generic patterns
309
+ 'input[type="text"][name*="code"]',
310
+ 'input[type="text"][id*="code"]',
311
+ 'input[type="text"][maxlength="6"]', # Common CAPTCHA length
312
+ 'input[type="text"][maxlength="5"]', # Common CAPTCHA length
313
+ 'input[type="text"][maxlength="4"]', # Common CAPTCHA length
314
+
315
+ # Class and structure patterns
316
+ '.captcha input',
317
+ '#captcha',
318
+ '[data-captcha] input',
319
+ 'input[autocomplete="off"][maxlength]', # Often CAPTCHAs disable autocomplete
320
+
321
+ # Specific selector for your element
322
+ '#ContentUsersPage_rc1_CaptchaTextBox',
323
+ 'input[name="ctl00\\$ContentUsersPage\\$rc1\\$CaptchaTextBox"]', # Escaped $ for CSS
324
+ 'input[id="ContentUsersPage_rc1_CaptchaTextBox"]'
325
+ ]
326
+
327
+ captcha_input = None
328
+ successful_selector = None
329
+
330
+ for selector in captcha_selectors:
331
+ try:
332
+ captcha_input = await page.query_selector(selector)
333
+ if captcha_input:
334
+ successful_selector = selector
335
+ logger.info(f"โœ… Found CAPTCHA input using selector: {selector}")
336
+
337
+ # Log element details for debugging
338
+ element_name = await captcha_input.get_attribute('name') or 'no-name'
339
+ element_id = await captcha_input.get_attribute('id') or 'no-id'
340
+ element_maxlength = await captcha_input.get_attribute('maxlength') or 'no-limit'
341
+ logger.info(f"๐Ÿ“ CAPTCHA element details - Name: {element_name}, ID: {element_id}, MaxLength: {element_maxlength}")
342
+ break
343
+ except Exception as e:
344
+ logger.debug(f"Selector '{selector}' failed: {str(e)}")
345
+ continue
346
+
347
+ if not captcha_input:
348
+ return ActionResult(
349
+ extracted_content="Could not find CAPTCHA input field",
350
+ error="CAPTCHA input field not found"
351
+ )
352
+
353
+ # Clear the input and enter the solution
354
+ await captcha_input.fill("")
355
+ await captcha_input.fill(solution)
356
+ logger.info(f"โœ… Entered CAPTCHA solution: '{solution}' into field")
357
+
358
+ # Verify the value was set correctly
359
+ filled_value = await captcha_input.input_value()
360
+ if filled_value == solution:
361
+ logger.info(f"โœ… Confirmed CAPTCHA field contains: '{filled_value}'")
362
+ else:
363
+ logger.warning(f"โš ๏ธ CAPTCHA field shows '{filled_value}' but expected '{solution}'")
364
+
365
+ # Wait a moment for the input to register
366
+ await page.wait_for_timeout(500)
367
+
368
+ return ActionResult(
369
+ extracted_content=f"Successfully filled CAPTCHA field with: {solution}",
370
+ include_in_memory=True
371
+ )
372
+
373
+ except Exception as e:
374
+ logger.error(f"Error filling CAPTCHA solution: {str(e)}")
375
+ return ActionResult(
376
+ extracted_content=f"Error filling CAPTCHA: {str(e)}",
377
+ error=str(e)
378
+ )
379
+
380
+
381
+ async def captcha_detection_hook(agent):
382
+ """
383
+ Lifecycle hook to automatically detect and solve CAPTCHAs
384
+ """
385
+ try:
386
+ page = await agent.browser_session.get_current_page()
387
+
388
+ # Check for CAPTCHA error messages that indicate we need to retry
389
+ captcha_error_indicators = [
390
+ 'ื”ืงืœื“ื” ืฉื’ื•ื™ื™ื”', # Hebrew: "wrong input"
391
+ 'incorrect captcha',
392
+ 'invalid captcha',
393
+ 'wrong captcha',
394
+ 'captcha error',
395
+ 'try again'
396
+ ]
397
+
398
+ page_content = await page.content()
399
+ page_content_lower = page_content.lower()
400
+
401
+ has_captcha_error = any(indicator in page_content_lower for indicator in captcha_error_indicators)
402
+
403
+ # Check for common CAPTCHA indicators in the page
404
+ captcha_indicators = [
405
+ 'captcha',
406
+ 'recaptcha',
407
+ 'hcaptcha',
408
+ 'verification',
409
+ 'security check',
410
+ 'prove you are human'
411
+ ]
412
+
413
+ has_captcha = any(indicator in page_content_lower for indicator in captcha_indicators)
414
+
415
+ if has_captcha:
416
+ # If there's a CAPTCHA error message, we should retry regardless of field content
417
+ if has_captcha_error:
418
+ logger.warning("๐Ÿ”„ CAPTCHA error detected! Clearing field and retrying...")
419
+ await _clear_captcha_field(page)
420
+ else:
421
+ # Check if CAPTCHA is already filled before attempting to solve
422
+ captcha_already_filled = await _is_captcha_already_filled(page)
423
+
424
+ if captcha_already_filled:
425
+ logger.info("โœ… CAPTCHA appears to be already filled and no errors detected, skipping automatic solving")
426
+ return
427
+
428
+ # Initialize attempt tracking if not exists
429
+ if not hasattr(agent, '_captcha_attempts'):
430
+ agent._captcha_attempts = {}
431
+
432
+ # Get current page URL as a key for attempt tracking
433
+ current_url = page.url
434
+ attempt_count = agent._captcha_attempts.get(current_url, 0)
435
+
436
+ # Limit attempts to prevent infinite loops
437
+ max_attempts = 10
438
+ if attempt_count >= max_attempts:
439
+ logger.error(f"โŒ Maximum CAPTCHA attempts ({max_attempts}) reached for {current_url}")
440
+ return
441
+
442
+ agent._captcha_attempts[current_url] = attempt_count + 1
443
+ logger.info(f"๐ŸŽฏ CAPTCHA attempt {attempt_count + 1}/{max_attempts} for page")
444
+
445
+ # Use the captcha solving action directly
446
+ result = await solve_captcha(page=page)
447
+
448
+ # Check if the result was successful by examining the content and error
449
+ if result.error is None and "Successfully filled CAPTCHA" in (result.extracted_content or ""):
450
+ logger.info(f"CAPTCHA filled successfully: {result.extracted_content}")
451
+ logger.info("CAPTCHA field is ready - browser-use can continue with the task")
452
+ else:
453
+ error_msg = result.error or result.extracted_content or "Unknown error"
454
+ logger.warning(f"CAPTCHA solving failed: {error_msg}")
455
+
456
+ except Exception as e:
457
+ logger.error(f"Error in CAPTCHA detection hook: {str(e)}")
458
+
459
+
460
+ async def _is_captcha_already_filled(page: Page) -> bool:
461
+ """
462
+ Check if CAPTCHA input field is already filled to avoid solving it repeatedly
463
+ """
464
+ try:
465
+ # Common CAPTCHA input field selectors (same as in _fill_captcha_solution)
466
+ captcha_selectors = [
467
+ # Case-insensitive CAPTCHA selectors
468
+ 'input[name*="captcha" i]',
469
+ 'input[id*="captcha" i]',
470
+ 'input[class*="captcha" i]',
471
+ 'input[placeholder*="captcha" i]',
472
+
473
+ # ASP.NET specific patterns
474
+ 'input[name*="CaptchaTextBox"]',
475
+ 'input[id*="CaptchaTextBox"]',
476
+ 'input[name*="Captcha"]',
477
+ 'input[id*="Captcha"]',
478
+
479
+ # Hebrew title attribute (for Israeli/Hebrew sites)
480
+ 'input[title*="ืชื•ื•ื™ื ื‘ืชืžื•ื ื”"]', # "characters in the image"
481
+ 'input[title*="ืชืžื•ื ื”"]', # "image"
482
+ 'input[title*="ืื™ืžื•ืช"]', # "verification"
483
+
484
+ # Generic patterns
485
+ 'input[type="text"][name*="code"]',
486
+ 'input[type="text"][id*="code"]',
487
+ 'input[type="text"][maxlength="6"]', # Common CAPTCHA length
488
+ 'input[type="text"][maxlength="5"]', # Common CAPTCHA length
489
+ 'input[type="text"][maxlength="4"]', # Common CAPTCHA length
490
+
491
+ # Class and structure patterns
492
+ '.captcha input',
493
+ '#captcha',
494
+ '[data-captcha] input',
495
+ 'input[autocomplete="off"][maxlength]', # Often CAPTCHAs disable autocomplete
496
+
497
+ # Specific selector for your element
498
+ '#ContentUsersPage_rc1_CaptchaTextBox',
499
+ 'input[name="ctl00\\$ContentUsersPage\\$rc1\\$CaptchaTextBox"]', # Escaped $ for CSS
500
+ 'input[id="ContentUsersPage_rc1_CaptchaTextBox"]'
501
+ ]
502
+
503
+ for selector in captcha_selectors:
504
+ try:
505
+ captcha_input = await page.query_selector(selector)
506
+ if captcha_input:
507
+ # Check if the input field has a value
508
+ current_value = await captcha_input.input_value()
509
+ if current_value and len(current_value.strip()) > 0:
510
+ logger.info(f"๐Ÿ” CAPTCHA field '{selector}' already contains: '{current_value}'")
511
+ return True
512
+ except Exception as e:
513
+ logger.debug(f"Error checking selector '{selector}': {str(e)}")
514
+ continue
515
+
516
+ return False
517
+
518
+ except Exception as e:
519
+ logger.error(f"Error checking if CAPTCHA is filled: {str(e)}")
520
+ return False # If we can't check, assume it's not filled to be safe
521
+
522
+
523
+ async def _clear_captcha_field(page: Page) -> None:
524
+ """
525
+ Clear the CAPTCHA input field when there's an error and we need to retry
526
+ """
527
+ try:
528
+ # Same selectors as used elsewhere
529
+ captcha_selectors = [
530
+ '#ContentUsersPage_rc1_CaptchaTextBox',
531
+ 'input[name*="captcha" i]',
532
+ 'input[id*="captcha" i]',
533
+ 'input[name*="CaptchaTextBox"]',
534
+ 'input[id*="CaptchaTextBox"]',
535
+ ]
536
+
537
+ for selector in captcha_selectors:
538
+ try:
539
+ captcha_input = await page.query_selector(selector)
540
+ if captcha_input:
541
+ await captcha_input.fill("")
542
+ logger.info(f"๐Ÿงน Cleared CAPTCHA field: {selector}")
543
+ return
544
+ except Exception as e:
545
+ logger.debug(f"Error clearing selector '{selector}': {str(e)}")
546
+ continue
547
+
548
+ except Exception as e:
549
+ logger.error(f"Error clearing CAPTCHA field: {str(e)}")
550
+
551
+
552
+ async def run_browser_task(task: str, max_steps: int = 30) -> str:
553
+ """
554
+ Execute a browser task using the Python SDK with CAPTCHA bypass
555
+ """
556
+ try:
557
+ # Initialize the LLM
558
+ llm = ChatOpenAI(model="gpt-4o")
559
+
560
+ # Create the agent with CAPTCHA bypass controller
561
+ agent = Agent(
562
+ task=task,
563
+ llm=llm,
564
+ controller=controller,
565
+ max_actions_per_step=3
566
+ )
567
+
568
+ # Run the agent with lifecycle hooks for automatic CAPTCHA detection
569
+ logger.info(f"Starting browser task: {task}")
570
+
571
+ result = await agent.run(
572
+ max_steps=max_steps,
573
+ on_step_start=captcha_detection_hook
574
+ )
575
+
576
+ logger.info("Task completed successfully")
577
+
578
+ # Extract the final result
579
+ if hasattr(result, 'final_result') and callable(result.final_result):
580
+ final_output = result.final_result()
581
+ elif hasattr(result, 'message'):
582
+ final_output = result.message
583
+ else:
584
+ final_output = str(result)
585
+
586
+ return final_output or "Task completed successfully"
587
+
588
+ except Exception as e:
589
+ error_msg = f"Error executing browser task: {str(e)}\n{traceback.format_exc()}"
590
+ logger.error(error_msg)
591
+ raise Exception(error_msg)
592
+
593
+
594
+ async def main():
595
+ """
596
+ Main entry point for the browser agent
597
+ """
598
+ parser = argparse.ArgumentParser(description='Browser Use Agent with CAPTCHA Bypass')
599
+ parser.add_argument('-p', '--prompt', required=True, help='Task prompt for the browser agent')
600
+ parser.add_argument('--max-steps', type=int, default=30, help='Maximum number of steps')
601
+ parser.add_argument('--output-format', choices=['text', 'json'], default='text', help='Output format')
602
+
603
+ args = parser.parse_args()
604
+
605
+ try:
606
+ result = await run_browser_task(args.prompt, args.max_steps)
607
+
608
+ if args.output_format == 'json':
609
+ output = {
610
+ 'success': True,
611
+ 'result': result,
612
+ 'task': args.prompt
613
+ }
614
+ print(json.dumps(output, indent=2))
615
+ else:
616
+ print(result)
617
+
618
+ except Exception as e:
619
+ if args.output_format == 'json':
620
+ output = {
621
+ 'success': False,
622
+ 'error': str(e),
623
+ 'task': args.prompt
624
+ }
625
+ print(json.dumps(output, indent=2))
626
+ else:
627
+ print(f"Error: {str(e)}", file=sys.stderr)
628
+ sys.exit(1)
629
+
630
+
631
+ if __name__ == "__main__":
632
+ asyncio.run(main())