PyPI - owl-browser - Versions diffs - 1.0.0__tar.gz - Mend

owl-browser 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

owl_browser-1.0.0/PKG-INFO +784 -0
owl_browser-1.0.0/README.md +748 -0
owl_browser-1.0.0/owl_browser/__init__.py +284 -0
owl_browser-1.0.0/owl_browser/async_browser.py +691 -0
owl_browser-1.0.0/owl_browser/browser.py +511 -0
owl_browser-1.0.0/owl_browser/context.py +1314 -0
owl_browser-1.0.0/owl_browser/core.py +852 -0
owl_browser-1.0.0/owl_browser/exceptions.py +252 -0
owl_browser-1.0.0/owl_browser/http_client.py +796 -0
owl_browser-1.0.0/owl_browser/jwt.py +451 -0
owl_browser-1.0.0/owl_browser/py.typed +2 -0
owl_browser-1.0.0/owl_browser/types.py +695 -0
owl_browser-1.0.0/owl_browser/ws_client.py +705 -0
owl_browser-1.0.0/owl_browser.egg-info/PKG-INFO +784 -0
owl_browser-1.0.0/owl_browser.egg-info/SOURCES.txt +18 -0
owl_browser-1.0.0/owl_browser.egg-info/dependency_links.txt +1 -0
owl_browser-1.0.0/owl_browser.egg-info/requires.txt +8 -0
owl_browser-1.0.0/owl_browser.egg-info/top_level.txt +1 -0
owl_browser-1.0.0/pyproject.toml +98 -0
owl_browser-1.0.0/setup.cfg +4 -0

owl_browser-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,784 @@
+Metadata-Version: 2.4
+Name: owl-browser
+Version: 1.0.0
+Summary: AI-first browser automation SDK with on-device vision model and natural language selectors
+Author-email: Olib AI <hello@olib.ai>
+License: MIT
+Project-URL: Homepage, https://www.owlbrowser.net
+Project-URL: Documentation, https://github.com/Olib-AI/owl-browser#readme
+Project-URL: Repository, https://github.com/Olib-AI/owl-browser
+Project-URL: Issues, https://github.com/Olib-AI/owl-browser/issues
+Keywords: browser,automation,scraping,ai,llm,vision,selenium-alternative,playwright-alternative,puppeteer-alternative,web-scraping,headless-browser,stealth,anti-detection,owl-browser
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: MacOS
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Software Development :: Testing
+Classifier: Typing :: Typed
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
+Requires-Dist: black>=23.0; extra == "dev"
+Requires-Dist: isort>=5.0; extra == "dev"
+Requires-Dist: mypy>=1.0; extra == "dev"
+Requires-Dist: ruff>=0.1; extra == "dev"
+# Owl Browser SDK for Python
+AI-first browser automation SDK with on-device vision model, natural language selectors, and comprehensive stealth features.
+## Features
+- **Natural Language Selectors** - Click, type, and interact using descriptions like "search button" or "login form"
+- **On-Device Vision Model** - Built-in Qwen 3 model for page understanding and CAPTCHA solving
+- **Stealth Mode** - Proxy support with timezone spoofing, WebRTC blocking, and anti-detection
+- **Thread-Safe** - Designed for concurrent usage with multiple pages
+- **Simple API** - Minimal code required for common tasks
+- **Dual Mode** - Connect to local browser binary OR remote HTTP server
+## Installation
+```bash
+pip install owl-browser
+```
+**Note:** You must also build the Owl Browser binary. See the main project README for build instructions.
+## Quick Start
+```python
+from owl_browser import Browser
+# Simple usage with context manager
+with Browser() as browser:
+    page = browser.new_page()
+    page.goto("https://example.com")
+    # Natural language selectors
+    page.click("search button")
+    page.type("search input", "hello world")
+    # Take screenshot
+    page.screenshot("screenshot.png")
+```
+## Remote Mode (HTTP Server)
+The SDK supports connecting to a remote Owl Browser HTTP server, enabling:
+- **Cloud deployment** - Run browser on remote servers
+- **Distributed scraping** - Connect multiple clients to one browser
+- **Resource optimization** - Share browser resources across applications
+### Basic Remote Usage
+```python
+from owl_browser import Browser, RemoteConfig
+# Connect to remote browser server
+browser = Browser(remote=RemoteConfig(
+    url="http://192.168.1.100:8080",
+    token="your-secret-token"
+))
+browser.launch()
+# API is identical to local mode!
+page = browser.new_page()
+page.goto("https://example.com")
+page.click("search button")
+page.type("search input", "hello world")
+page.screenshot("screenshot.png")
+browser.close()
+```
+### Remote with Context Manager
+```python
+from owl_browser import Browser, RemoteConfig
+with Browser(remote=RemoteConfig(
+    url="http://localhost:8080",
+    token="secret-token"
+)) as browser:
+    page = browser.new_page()
+    page.goto("https://example.com")
+    page.screenshot("screenshot.png")
+```
+### Async Remote Usage
+```python
+import asyncio
+from owl_browser import AsyncBrowser, RemoteConfig
+async def main():
+    async with AsyncBrowser(remote=RemoteConfig(
+        url="http://192.168.1.100:8080",
+        token="your-secret-token"
+    )) as browser:
+        page = await browser.new_page()
+        await page.goto("https://example.com")
+        await page.screenshot("screenshot.png")
+asyncio.run(main())
+```
+### JWT Authentication
+For enhanced security, the SDK supports JWT (JSON Web Token) authentication with RSA signing. The SDK can automatically generate and refresh JWT tokens using your private key:
+```python
+from owl_browser import Browser, RemoteConfig, JWTConfig, AuthMode
+# Connect with JWT authentication (auto-generated tokens)
+browser = Browser(remote=RemoteConfig(
+    url="http://192.168.1.100:8080",
+    auth_mode=AuthMode.JWT,
+    jwt=JWTConfig(
+        private_key="/path/to/private.pem",  # RSA private key
+        expires_in=3600,                      # Token validity (1 hour)
+        refresh_threshold=300,                # Refresh 5 min before expiry
+        issuer="my-app",                      # Optional claims
+        subject="user-123"
+    )
+))
+browser.launch()
+```
+You can also use the JWT utilities directly:
+```python
+from owl_browser import generate_jwt, decode_jwt, JWTManager, generate_key_pair
+# Generate a single token
+token = generate_jwt('/path/to/private.pem', expires_in=7200, issuer='my-app')
+# Decode a token (without verification)
+decoded = decode_jwt(token)
+print(f"Expires at: {decoded['payload']['exp']}")
+# Use JWTManager for auto-refresh
+jwt_manager = JWTManager('/path/to/private.pem', expires_in=3600, refresh_threshold=300)
+token = jwt_manager.get_token()  # Auto-refreshes when needed
+# Generate new RSA key pair
+private_key, public_key = generate_key_pair()
+with open('private.pem', 'w') as f:
+    f.write(private_key)
+with open('public.pem', 'w') as f:
+    f.write(public_key)
+```
+### WebSocket Transport
+For lower latency and persistent connections, use WebSocket transport instead of HTTP:
+```python
+from owl_browser import Browser, RemoteConfig, TransportMode, ReconnectConfig
+# WebSocket mode - real-time communication
+browser = Browser(remote=RemoteConfig(
+    url="http://192.168.1.100:8080",
+    token="your-secret-token",
+    transport=TransportMode.WEBSOCKET,  # Use WebSocket instead of HTTP
+    reconnect=ReconnectConfig(
+        enabled=True,
+        max_attempts=5,
+        initial_delay_ms=1000,
+        max_delay_ms=30000
+    )
+))
+browser.launch()
+```
+### High-Performance Configuration
+For high-concurrency workloads, configure retry, and concurrency limits:
+```python
+from owl_browser import Browser, RemoteConfig, RetryConfig, ConcurrencyConfig
+# High-performance HTTP configuration
+browser = Browser(remote=RemoteConfig(
+    url="http://192.168.1.100:8080",
+    token="your-secret-token",
+    timeout=30000,
+    # Retry configuration with exponential backoff
+    retry=RetryConfig(
+        max_retries=5,
+        initial_delay_ms=100,
+        max_delay_ms=10000,
+        backoff_multiplier=2.0,
+        jitter_factor=0.1
+    ),
+    # Concurrency limiting
+    concurrency=ConcurrencyConfig(
+        max_concurrent=50
+    )
+))
+browser.launch()
+```
+### RemoteConfig Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `url` | str | *required* | Base URL of HTTP server (e.g., `http://localhost:8080`) |
+| `token` | str | - | Bearer token (required for TOKEN mode) |
+| `auth_mode` | AuthMode | TOKEN | Authentication mode (TOKEN or JWT) |
+| `jwt` | JWTConfig | - | JWT configuration (required for JWT mode) |
+| `transport` | TransportMode | HTTP | Transport mode (HTTP or WEBSOCKET) |
+| `timeout` | int | 30000 | Request timeout in milliseconds |
+| `verify_ssl` | bool | True | Verify SSL certificates |
+| `retry` | RetryConfig | - | Retry configuration for HTTP transport |
+| `reconnect` | ReconnectConfig | - | Reconnection config for WebSocket |
+| `concurrency` | ConcurrencyConfig | - | Concurrency limiting config |
+### JWTConfig Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `private_key` | str | *required* | Path to RSA private key or PEM string |
+| `expires_in` | int | 3600 | Token validity in seconds |
+| `refresh_threshold` | int | 300 | Seconds before expiry to refresh |
+| `issuer` | str | - | Issuer claim (iss) |
+| `subject` | str | - | Subject claim (sub) |
+| `audience` | str | - | Audience claim (aud) |
+| `claims` | dict | - | Additional custom claims |
+### RetryConfig Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `max_retries` | int | 3 | Maximum number of retry attempts |
+| `initial_delay_ms` | int | 100 | Initial delay in milliseconds |
+| `max_delay_ms` | int | 10000 | Maximum delay cap in milliseconds |
+| `backoff_multiplier` | float | 2.0 | Multiplier for exponential backoff |
+| `jitter_factor` | float | 0.1 | Random jitter factor (0-1) |
+### ReconnectConfig Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `enabled` | bool | True | Whether auto-reconnection is enabled |
+| `max_attempts` | int | 5 | Maximum reconnection attempts (0 = infinite) |
+| `initial_delay_ms` | int | 1000 | Initial delay in milliseconds |
+| `max_delay_ms` | int | 30000 | Maximum delay cap in milliseconds |
+| `backoff_multiplier` | float | 2.0 | Multiplier for exponential backoff |
+| `jitter_factor` | float | 0.1 | Random jitter factor (0-1) |
+### ConcurrencyConfig Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `max_concurrent` | int | 10 | Maximum concurrent requests |
+### Checking Connection Mode
+```python
+from owl_browser import Browser, RemoteConfig, ConnectionMode
+browser = Browser(remote=RemoteConfig(url="...", token="..."))
+browser.launch()
+# Check the connection mode
+print(f"Mode: {browser.mode}")  # ConnectionMode.REMOTE
+print(f"Is Remote: {browser.is_remote}")  # True
+browser.close()
+```
+### Setting Up the HTTP Server
+See `http-server/README.md` for instructions on deploying the Owl Browser HTTP server.
+```bash
+# Start the HTTP server
+./owl_browser --http --port 8080 --token "your-secret-token"
+```
+## Usage Examples
+### Basic Navigation and Interaction
+```python
+from owl_browser import Browser
+browser = Browser()
+browser.launch()
+page = browser.new_page()
+page.goto("https://example.com")
+# Click using various selector types
+page.click("#submit")           # CSS selector
+page.click("100x200")           # Coordinates
+page.click("login button")      # Natural language
+# Type into inputs
+page.type("#email", "user@example.com")
+page.type("password field", "secret123")
+# Select from dropdowns
+page.pick("country dropdown", "United States")
+# Press special keys
+from owl_browser import KeyName
+page.press_key(KeyName.ENTER)
+browser.close()
+```
+### AI-Powered Features
+```python
+from owl_browser import Browser
+with Browser() as browser:
+    page = browser.new_page()
+    page.goto("https://news.example.com")
+    # Query the page using LLM
+    summary = page.query_page("What are the main headlines?")
+    print(summary)
+    # Get structured page summary
+    summary = page.summarize_page()
+    print(summary)
+    # Execute natural language commands
+    page.execute_nla("scroll down and click the first article")
+    # Auto-solve CAPTCHAs
+    result = page.solve_captcha()
+    if result.get("success"):
+        print("CAPTCHA solved!")
+```
+### Concurrent Scraping
+```python
+from owl_browser import Browser
+from concurrent.futures import ThreadPoolExecutor
+browser = Browser()
+browser.launch()
+def scrape_url(url):
+    """Scrape a single URL - each call gets its own isolated page."""
+    page = browser.new_page()
+    try:
+        page.goto(url)
+        return {
+            "url": url,
+            "title": page.get_title(),
+            "text": page.extract_text("main content")
+        }
+    finally:
+        page.close()
+urls = [
+    "https://example1.com",
+    "https://example2.com",
+    "https://example3.com",
+]
+# Scrape 5 pages concurrently
+with ThreadPoolExecutor(max_workers=5) as executor:
+    results = list(executor.map(scrape_url, urls))
+browser.close()
+for result in results:
+    print(f"{result['title']}: {result['text'][:100]}...")
+```
+### Proxy with Stealth
+```python
+from owl_browser import Browser, ProxyConfig, ProxyType
+with Browser() as browser:
+    # Create page with proxy and timezone spoofing
+    page = browser.new_page(proxy=ProxyConfig(
+        type=ProxyType.SOCKS5H,  # SOCKS5 with remote DNS
+        host="proxy.example.com",
+        port=1080,
+        username="user",
+        password="pass",
+        stealth=True,           # Block WebRTC leaks
+        timezone_override="America/New_York"  # Match proxy location
+    ))
+    page.goto("https://whatismyip.com")
+    page.screenshot("proxy-test.png")
+    # Check proxy status
+    status = page.get_proxy_status()
+    print(f"Proxy connected: {status.connected}")
+```
+### Cookie Management
+```python
+from owl_browser import Browser
+with Browser() as browser:
+    page = browser.new_page()
+    page.goto("https://example.com")
+    # Get all cookies
+    cookies = page.get_cookies()
+    for cookie in cookies:
+        print(f"{cookie.name}: {cookie.value}")
+    # Set a cookie
+    page.set_cookie(
+        url="https://example.com",
+        name="session",
+        value="abc123",
+        secure=True,
+        http_only=True
+    )
+    # Delete specific cookie
+    page.delete_cookies("https://example.com", "session")
+    # Delete all cookies
+    page.delete_cookies()
+```
+### Video Recording
+```python
+from owl_browser import Browser
+with Browser() as browser:
+    page = browser.new_page()
+    # Start recording
+    page.start_video_recording(fps=30)
+    page.goto("https://example.com")
+    page.click("some button")
+    page.type("input field", "test data")
+    # Stop and get video path
+    video_path = page.stop_video_recording()
+    print(f"Video saved to: {video_path}")
+```
+### Content Extraction
+```python
+from owl_browser import Browser, CleanLevel, ExtractionTemplate
+with Browser() as browser:
+    page = browser.new_page()
+    page.goto("https://example.com")
+    # Extract text
+    text = page.extract_text()
+    article = page.extract_text("main article")
+    # Get as Markdown
+    markdown = page.get_markdown(include_links=True, include_images=False)
+    # Get clean HTML
+    html = page.get_html(CleanLevel.AGGRESSIVE)
+    # Extract structured JSON (auto-detects template)
+    data = page.extract_json()
+    # Use specific template
+    data = page.extract_json(ExtractionTemplate.GOOGLE_SEARCH)
+```
+### Test Execution
+Run tests exported from the Developer Playground:
+```python
+from owl_browser import Browser
+with Browser() as browser:
+    page = browser.new_page()
+    # Run test from JSON file
+    result = page.run_test("my-test.json", verbose=True)
+    # Or define inline
+    result = page.run_test({
+        "name": "Login Test",
+        "steps": [
+            {"type": "navigate", "url": "https://example.com/login"},
+            {"type": "type", "selector": "#email", "text": "user@example.com"},
+            {"type": "type", "selector": "#password", "text": "password123"},
+            {"type": "click", "selector": "button[type='submit']"},
+            {"type": "wait", "duration": 2000},
+            {"type": "screenshot", "filename": "logged-in.png"}
+        ]
+    })
+    print(f"Test: {result.test_name}")
+    print(f"Success: {result.successful_steps}/{result.total_steps}")
+    print(f"Time: {result.execution_time}ms")
+```
+### Quick Utilities
+For simple one-off operations:
+```python
+from owl_browser import quick_screenshot, quick_extract, quick_query
+# Take a quick screenshot
+quick_screenshot("https://example.com", "example.png")
+# Extract text quickly
+text = quick_extract("https://example.com", "main content")
+# Query a page
+answer = quick_query("https://news.com", "What is the top headline?")
+```
+### Async/Await Usage
+For asyncio-based applications:
+```python
+import asyncio
+from owl_browser import AsyncBrowser
+async def main():
+    async with AsyncBrowser() as browser:
+        page = await browser.new_page()
+        await page.goto("https://example.com")
+        # All methods are async
+        await page.click("search button")
+        await page.type("search input", "hello world")
+        text = await page.extract_text()
+        await page.screenshot("screenshot.png")
+asyncio.run(main())
+```
+#### Concurrent Async Scraping
+```python
+import asyncio
+from owl_browser import AsyncBrowser, ProxyConfig, ProxyType
+async def scrape_url(browser, url):
+    """Scrape a single URL."""
+    page = await browser.new_page()
+    try:
+        await page.goto(url)
+        return {
+            "url": url,
+            "title": await page.get_title(),
+            "text": await page.extract_text("main")
+        }
+    finally:
+        await page.close()
+async def main():
+    urls = [
+        "https://example1.com",
+        "https://example2.com",
+        "https://example3.com",
+    ]
+    async with AsyncBrowser() as browser:
+        # Scrape all URLs concurrently
+        results = await asyncio.gather(*[
+            scrape_url(browser, url) for url in urls
+        ])
+    for result in results:
+        print(f"{result['title']}: {result['text'][:50]}...")
+asyncio.run(main())
+```
+#### Quick Async Utilities
+```python
+import asyncio
+from owl_browser import async_screenshot, async_extract, async_query
+async def main():
+    # Quick screenshot
+    await async_screenshot("https://example.com", "example.png")
+    # Quick extraction
+    text = await async_extract("https://example.com", "main content")
+    # Quick LLM query
+    answer = await async_query("https://news.com", "What is the top headline?")
+asyncio.run(main())
+```
+## API Reference
+### Browser
+| Method | Description |
+|--------|-------------|
+| `launch()` | Start the browser process |
+| `new_page(proxy?, llm?)` | Create a new page (context) |
+| `pages()` | Get all active pages |
+| `get_llm_status()` | Check LLM availability |
+| `get_demographics()` | Get location, time, weather |
+| `close()` | Close browser and all pages |
+### BrowserContext (Page)
+#### Navigation
+| Method | Description |
+|--------|-------------|
+| `goto(url)` | Navigate to URL |
+| `reload(ignore_cache?)` | Reload page |
+| `go_back()` | Navigate back |
+| `go_forward()` | Navigate forward |
+#### Interaction
+| Method | Description |
+|--------|-------------|
+| `click(selector)` | Click element |
+| `type(selector, text)` | Type into input |
+| `pick(selector, value)` | Select from dropdown |
+| `press_key(key)` | Press special key |
+| `submit_form()` | Submit focused form |
+| `highlight(selector)` | Highlight element |
+#### Content
+| Method | Description |
+|--------|-------------|
+| `extract_text(selector?)` | Extract text content |
+| `get_html(clean_level?)` | Get HTML |
+| `get_markdown(...)` | Get as Markdown |
+| `extract_json(template?)` | Extract structured JSON |
+| `summarize_page()` | Get LLM page summary |
+#### AI Features
+| Method | Description |
+|--------|-------------|
+| `query_page(query)` | Ask LLM about page |
+| `execute_nla(command)` | Execute NL command |
+| `solve_captcha()` | Auto-solve CAPTCHA |
+#### Screenshot & Video
+| Method | Description |
+|--------|-------------|
+| `screenshot(path?)` | Take screenshot |
+| `start_video_recording(fps?)` | Start recording |
+| `stop_video_recording()` | Stop and save video |
+#### Cookies & Proxy
+| Method | Description |
+|--------|-------------|
+| `get_cookies(url?)` | Get cookies |
+| `set_cookie(...)` | Set a cookie |
+| `delete_cookies(...)` | Delete cookies |
+| `set_proxy(config)` | Configure proxy |
+| `get_proxy_status()` | Get proxy status |
+| `connect_proxy()` | Enable proxy |
+| `disconnect_proxy()` | Disable proxy |
+## Error Handling
+The SDK provides specific exception types for different error scenarios:
+```python
+from owl_browser import (
+    Browser, RemoteConfig, JWTConfig, AuthMode,
+    AuthenticationError, RateLimitError, IPBlockedError,
+    LicenseError, BrowserInitializationError
+)
+try:
+    browser = Browser(remote=RemoteConfig(
+        url="http://localhost:8080",
+        auth_mode=AuthMode.JWT,
+        jwt=JWTConfig(private_key="/path/to/private.pem")
+    ))
+    browser.launch()
+    page = browser.new_page()
+    page.goto("https://example.com")
+except AuthenticationError as e:
+    # 401 - Invalid or expired token
+    print(f"Auth failed: {e.message}")
+    print(f"Reason: {e.reason}")
+except RateLimitError as e:
+    # 429 - Too many requests
+    print(f"Rate limited. Retry after: {e.retry_after} seconds")
+except IPBlockedError as e:
+    # 403 - IP not whitelisted
+    print(f"IP blocked: {e.ip_address}")
+except LicenseError as e:
+    # License validation failed
+    print(f"License error: {e.status}")
+except BrowserInitializationError as e:
+    print(f"Failed to start browser: {e}")
+finally:
+    browser.close()
+```
+### Exception Types
+| Exception | HTTP Code | Description |
+|-----------|-----------|-------------|
+| `AuthenticationError` | 401 | Invalid/expired token or JWT signature mismatch |
+| `RateLimitError` | 429 | Too many requests, includes `retry_after` in seconds |
+| `IPBlockedError` | 403 | Client IP not in whitelist |
+| `LicenseError` | 503 | Browser license validation failed |
+| `BrowserInitializationError` | - | Failed to start/connect to browser |
+| `CommandTimeoutError` | - | Operation timed out |
+| `ElementNotFoundError` | - | Element not found on page |
+## Thread Safety
+The SDK is designed for concurrent usage:
+- **Browser** instance can be shared across threads
+- Each **BrowserContext** (page) is isolated
+- Multiple pages can run operations simultaneously
+- IPC communication is thread-safe with proper locking
+Best practice for concurrent scraping:
+1. Create one `Browser` instance
+2. Create separate pages for each concurrent task
+3. Close pages when done to free resources
+## Requirements
+- Python 3.8+
+- macOS or Linux
+- Built Owl Browser binary
+## License
+MIT License - see the main project for details.