crawlix 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. crawlix-0.1.0/.gitignore +38 -0
  2. crawlix-0.1.0/AGENTS.md +384 -0
  3. crawlix-0.1.0/CHANGELOG.md +24 -0
  4. crawlix-0.1.0/CONTRIBUTING.md +53 -0
  5. crawlix-0.1.0/LICENSE +21 -0
  6. crawlix-0.1.0/PKG-INFO +189 -0
  7. crawlix-0.1.0/README.md +153 -0
  8. crawlix-0.1.0/SECURITY.md +20 -0
  9. crawlix-0.1.0/docs/superpowers/plans/2025-05-17-crawlix-implementation.md +2597 -0
  10. crawlix-0.1.0/docs/superpowers/specs/2025-05-17-crawlix-design.md +430 -0
  11. crawlix-0.1.0/pyproject.toml +76 -0
  12. crawlix-0.1.0/src/crawlix/__init__.py +28 -0
  13. crawlix-0.1.0/src/crawlix/_version.py +1 -0
  14. crawlix-0.1.0/src/crawlix/async_api.py +82 -0
  15. crawlix-0.1.0/src/crawlix/backends/__init__.py +79 -0
  16. crawlix-0.1.0/src/crawlix/backends/httpx.py +92 -0
  17. crawlix-0.1.0/src/crawlix/backends/playwright.py +223 -0
  18. crawlix-0.1.0/src/crawlix/backends/protocol.py +176 -0
  19. crawlix-0.1.0/src/crawlix/backends/requests.py +139 -0
  20. crawlix-0.1.0/src/crawlix/backends/selenium.py +270 -0
  21. crawlix-0.1.0/src/crawlix/browser.py +71 -0
  22. crawlix-0.1.0/src/crawlix/element.py +171 -0
  23. crawlix-0.1.0/src/crawlix/exceptions.py +26 -0
  24. crawlix-0.1.0/src/crawlix/page.py +223 -0
  25. crawlix-0.1.0/src/crawlix/utils.py +36 -0
  26. crawlix-0.1.0/tests/__init__.py +0 -0
  27. crawlix-0.1.0/tests/conftest.py +4 -0
  28. crawlix-0.1.0/tests/test_async_api.py +49 -0
  29. crawlix-0.1.0/tests/test_backends/__init__.py +0 -0
  30. crawlix-0.1.0/tests/test_backends/test_playwright.py +9 -0
  31. crawlix-0.1.0/tests/test_backends/test_requests.py +80 -0
  32. crawlix-0.1.0/tests/test_backends/test_selenium.py +11 -0
  33. crawlix-0.1.0/tests/test_browser.py +86 -0
  34. crawlix-0.1.0/tests/test_element.py +138 -0
  35. crawlix-0.1.0/tests/test_page.py +125 -0
@@ -0,0 +1,38 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.so
4
+ .Python
5
+ build/
6
+ develop-eggs/
7
+ dist/
8
+ downloads/
9
+ eggs/
10
+ .eggs/
11
+ lib/
12
+ lib64/
13
+ parts/
14
+ sdist/
15
+ var/
16
+ wheels/
17
+ *.egg-info/
18
+ .installed.cfg
19
+ *.egg
20
+ MANIFEST
21
+ .pytest_cache/
22
+ .coverage
23
+ .coverage.*
24
+ htmlcov/
25
+ .mypy_cache/
26
+ .ruff_cache/
27
+ *.log
28
+ .env
29
+ .venv/
30
+ venv/
31
+ ENV/
32
+ .idea/
33
+ .vscode/
34
+ *.swp
35
+ *.swo
36
+ *~
37
+ .DS_Store
38
+ node_modules/
@@ -0,0 +1,384 @@
1
+ # crawlix
2
+
3
+ > One API. Any backend. Full browser automation to lightweight scraping.
4
+
5
+ **PyPI**: `pip install crawlix`
6
+ **Author**: keylordelrey
7
+ **License**: MIT
8
+ **Python**: 3.8+
9
+
10
+ ---
11
+
12
+ ## What crawlix is
13
+
14
+ crawlix is a Python browser automation and web scraping library with a unified API across multiple backends. The same code works whether you're doing simple HTTP scraping or full Playwright-powered browser automation — you switch backends, not code.
15
+
16
+ ```python
17
+ # Zero setup
18
+ from crawlix import Browser
19
+
20
+ with Browser() as b:
21
+ page = b.open("https://example.com")
22
+ print(page.find("h1").text)
23
+
24
+ # Full browser automation — same API
25
+ with Browser(backend="playwright") as b:
26
+ page = b.open("https://example.com")
27
+ page.click("#login")
28
+ page.type("#email", "user@example.com")
29
+ page.type("#password", "secret")
30
+ page.submit("form")
31
+ page.wait_for(".dashboard")
32
+ page.screenshot("result.png")
33
+ ```
34
+
35
+ ---
36
+
37
+ ## Install
38
+
39
+ ```bash
40
+ # Core — lightweight HTTP scraping
41
+ pip install crawlix
42
+
43
+ # With specific backend
44
+ pip install crawlix[requests] # requests + BeautifulSoup4
45
+ pip install crawlix[playwright] # full browser via Playwright
46
+ pip install crawlix[selenium] # full browser via Selenium
47
+ pip install crawlix[async] # async support via httpx
48
+ pip install crawlix[full] # everything
49
+ ```
50
+
51
+ ---
52
+
53
+ ## Core Design Rules
54
+
55
+ 1. **Same API across all backends** — switching backend never requires rewriting user code
56
+ 2. **Auto-detect best available backend** — no config needed, crawlix figures it out
57
+ 3. **Zero hard dependencies** — `pip install crawlix` always succeeds
58
+ 4. **Fail with helpful errors** — if an operation needs a browser backend, the error tells you exactly what to install
59
+ 5. **Context manager always** — resources always cleaned up properly
60
+ 6. **Stealth on by default** — realistic headers, UA rotation, no bot fingerprint out of the box
61
+
62
+ ---
63
+
64
+ ## Backend Priority (auto-detect order)
65
+
66
+ ```
67
+ playwright → selenium → httpx → requests+bs4 → error with install hint
68
+ ```
69
+
70
+ Override anytime:
71
+ ```python
72
+ Browser(backend="playwright")
73
+ Browser(backend="requests")
74
+ Browser(backend="selenium")
75
+ Browser(backend="httpx")
76
+ ```
77
+
78
+ ---
79
+
80
+ ## Full API
81
+
82
+ ### Browser
83
+
84
+ ```python
85
+ Browser(
86
+ backend="auto", # backend selection
87
+ headless=True, # browser backends
88
+ stealth=True, # realistic headers + UA rotation
89
+ timeout=30, # seconds, applies to all operations
90
+ proxy=None, # "http://user:pass@host:port"
91
+ locale="en-US", # browser locale
92
+ user_agent=None, # override UA
93
+ )
94
+
95
+ b.open(url) # → Page
96
+ b.new_page() # → blank Page
97
+ b.close()
98
+ b.backend_name # → str, which backend is active
99
+ b.supports_js # → bool
100
+ ```
101
+
102
+ ### Page
103
+
104
+ ```python
105
+ # Navigation
106
+ page.goto(url) # → Page (chainable)
107
+ page.reload() # → Page
108
+ page.back() # → Page
109
+ page.forward() # → Page
110
+
111
+ # Properties
112
+ page.url # → str
113
+ page.title # → str
114
+ page.html # → str, full HTML
115
+ page.text # → str, visible text only
116
+ page.status # → int, HTTP status code
117
+ page.headers # → dict
118
+
119
+ # Querying
120
+ page.find(selector) # → Element | None
121
+ page.find_all(selector) # → list[Element]
122
+ page.find_text(text) # → Element | None
123
+ page.xpath(expr) # → list[Element]
124
+
125
+ # Interaction (browser backends)
126
+ page.click(selector)
127
+ page.double_click(selector)
128
+ page.right_click(selector)
129
+ page.type(selector, text)
130
+ page.clear(selector)
131
+ page.submit(selector="form")
132
+ page.select(selector, value)
133
+ page.hover(selector)
134
+ page.focus(selector)
135
+ page.blur(selector)
136
+ page.scroll(x=0, y=500)
137
+ page.scroll_to(selector)
138
+ page.drag(source, target)
139
+ page.key(key) # e.g. "Enter", "Tab", "Escape"
140
+ page.upload(selector, path) # file upload
141
+
142
+ # Waiting (browser backends)
143
+ page.wait_for(selector, timeout=10)
144
+ page.wait_for_text(text, timeout=10)
145
+ page.wait_for_url(pattern, timeout=10)
146
+ page.wait_for_load(timeout=30)
147
+ page.wait_for_network_idle(timeout=30)
148
+ page.sleep(seconds)
149
+
150
+ # JavaScript (browser backends)
151
+ page.evaluate(js_code) # → any
152
+ page.evaluate_on(selector, js) # → any
153
+
154
+ # Network
155
+ page.set_headers(headers)
156
+ page.set_cookies(cookies)
157
+ page.get_cookies() # → list[dict]
158
+ page.clear_cookies()
159
+ page.intercept(pattern, handler) # intercept requests (playwright)
160
+
161
+ # Extraction helpers
162
+ page.links() # → list[str], all hrefs
163
+ page.images() # → list[str], all srcs
164
+ page.tables() # → list[list[list[str]]]
165
+ page.forms() # → list[dict]
166
+ page.json() # → dict, parse response as JSON
167
+ page.meta() # → dict, all meta tags
168
+
169
+ # Output
170
+ page.screenshot(path=None) # → bytes, saves if path given
171
+ page.pdf(path=None) # → bytes
172
+ page.save(path) # save HTML to file
173
+ page.show() # print pretty HTML (debug)
174
+ ```
175
+
176
+ ### Element
177
+
178
+ ```python
179
+ # Properties
180
+ el.text # → str, inner text
181
+ el.html # → str, inner HTML
182
+ el.outer_html # → str, outer HTML
183
+ el.tag # → str
184
+ el.id # → str
185
+ el.classes # → list[str]
186
+
187
+ # Attributes
188
+ el.attr(name, default="") # → str
189
+ el.attrs # → dict, all attributes
190
+ el.has_attr(name) # → bool
191
+
192
+ # Traversal
193
+ el.find(selector) # → Element | None
194
+ el.find_all(selector) # → list[Element]
195
+ el.parent() # → Element | None
196
+ el.children() # → list[Element]
197
+ el.siblings() # → list[Element]
198
+ el.next() # → Element | None
199
+ el.prev() # → Element | None
200
+
201
+ # Interaction (browser backends)
202
+ el.click()
203
+ el.double_click()
204
+ el.type(text)
205
+ el.clear()
206
+ el.hover()
207
+ el.focus()
208
+ el.scroll_into_view()
209
+ el.is_visible() # → bool
210
+ el.is_enabled() # → bool
211
+ el.is_checked() # → bool (checkboxes)
212
+ el.bounding_box() # → dict {x, y, width, height}
213
+ el.screenshot(path=None) # → bytes, screenshot of element only
214
+
215
+ # Magic
216
+ el.__str__() # → .text
217
+ el.__repr__() # → "Element(<tag> .class #id)"
218
+ el.__bool__() # → True (so `if page.find("x"):` works naturally)
219
+ ```
220
+
221
+ ---
222
+
223
+ ## Convenience Functions
224
+
225
+ ```python
226
+ from crawlix import get, fetch, browse
227
+
228
+ # One-liner scraping
229
+ page = get("https://example.com") # → Page
230
+ html = fetch("https://example.com") # → str HTML
231
+ page = browse("https://example.com") # → Page, forces browser backend
232
+
233
+ # Async variants
234
+ from crawlix.async_api import aget, afetch
235
+
236
+ page = await aget("https://example.com")
237
+ html = await afetch("https://example.com")
238
+ ```
239
+
240
+ ---
241
+
242
+ ## Exceptions
243
+
244
+ ```python
245
+ from crawlix.exceptions import (
246
+ CrawlixError, # base — catch-all
247
+ BackendError, # backend unavailable or op not supported
248
+ TimeoutError, # wait exceeded timeout
249
+ NavigationError, # page failed to load
250
+ SelectorError, # invalid selector or element not found
251
+ NetworkError, # connection error
252
+ JavaScriptError, # JS evaluation failed
253
+ )
254
+ ```
255
+
256
+ `BackendError` always includes install hint:
257
+ ```
258
+ BackendError: screenshot() requires a browser backend.
259
+ Install one:
260
+ pip install crawlix[playwright] ← recommended
261
+ pip install crawlix[selenium]
262
+ ```
263
+
264
+ ---
265
+
266
+ ## Usage Examples
267
+
268
+ ```python
269
+ # Basic scraping
270
+ from crawlix import Browser
271
+
272
+ with Browser() as b:
273
+ page = b.open("https://news.ycombinator.com")
274
+ for item in page.find_all(".titleline > a"):
275
+ print(item.text, item.attr("href"))
276
+
277
+ # Login flow
278
+ with Browser(backend="playwright") as b:
279
+ page = b.open("https://github.com/login")
280
+ page.type("#login_field", "username")
281
+ page.type("#password", "password")
282
+ page.click("[type=submit]")
283
+ page.wait_for(".dashboard-sidebar")
284
+ print("logged in:", page.url)
285
+
286
+ # Screenshot
287
+ with Browser(backend="playwright", headless=True) as b:
288
+ page = b.open("https://github.com/keyreyla/crawlix")
289
+ page.screenshot("crawlix.png")
290
+
291
+ # Intercept network requests
292
+ with Browser(backend="playwright") as b:
293
+ page = b.open("https://example.com")
294
+ page.intercept("**/api/**", lambda req: req.respond({"mocked": True}))
295
+
296
+ # JSON API
297
+ with Browser() as b:
298
+ data = b.open("https://api.github.com/users/keyreyla").json()
299
+
300
+ # Async
301
+ import asyncio
302
+ from crawlix.async_api import AsyncBrowser
303
+
304
+ async def main():
305
+ async with AsyncBrowser() as b:
306
+ page = await b.open("https://example.com")
307
+ items = await page.find_all("a")
308
+ print(len(items))
309
+
310
+ asyncio.run(main())
311
+
312
+ # Proxy
313
+ with Browser(proxy="http://user:pass@proxy:8080") as b:
314
+ page = b.open("https://ipinfo.io/json")
315
+ print(page.json()["ip"])
316
+
317
+ # Table extraction
318
+ with Browser() as b:
319
+ page = b.open("https://en.wikipedia.org/wiki/Python_(programming_language)")
320
+ for row in page.tables()[0]:
321
+ print(row)
322
+
323
+ # JavaScript evaluation
324
+ with Browser(backend="playwright") as b:
325
+ page = b.open("https://example.com")
326
+ title = page.evaluate("document.title")
327
+ count = page.evaluate_on("ul > li", "el => el.childElementCount")
328
+
329
+ # File upload
330
+ with Browser(backend="playwright") as b:
331
+ page = b.open("https://example.com/upload")
332
+ page.upload("#file-input", "/path/to/file.pdf")
333
+ page.click("#submit")
334
+ page.wait_for(".success")
335
+
336
+ # Chaining
337
+ with Browser(backend="playwright") as b:
338
+ page = (
339
+ b.open("https://example.com/login")
340
+ .type("#email", "user@example.com")
341
+ .type("#password", "secret")
342
+ .click("[type=submit]")
343
+ .wait_for(".dashboard")
344
+ )
345
+ print(page.title)
346
+ ```
347
+
348
+ ---
349
+
350
+ ## Publish to TestPyPI & PyPI
351
+
352
+ ```bash
353
+ # Build
354
+ rm -rf dist/
355
+ python -m build
356
+
357
+ # TestPyPI first — always
358
+ twine upload --repository testpypi dist/*
359
+ pip install --index-url https://test.pypi.org/simple/ crawlix
360
+
361
+ # Real PyPI
362
+ twine upload dist/*
363
+ ```
364
+
365
+ Auto-publish via GitHub Actions on `git tag v*` push.
366
+
367
+ ---
368
+
369
+ ## Agent Rules
370
+
371
+ When implementing crawlix, always follow these non-negotiable rules:
372
+
373
+ 1. Core `crawlix/` package must have **zero imports from optional libraries at module level** — all optional imports inside functions/methods only, wrapped in try/except ImportError
374
+ 2. Every method that requires a browser backend must raise `BackendError` with a helpful install message on HTTP backends — never `NotImplementedError`, never silent failure
375
+ 3. All public Page methods return `self` for chaining — `page.type(...).click(...).wait_for(...)` must always work
376
+ 4. `Browser.__exit__` must always call `close()` even if an exception occurred inside the block
377
+ 5. `Element.__bool__` always returns `True` — `if page.find("x"):` works naturally
378
+ 6. Type hints on all public API, Python 3.8 compatible — use `Optional[str]` not `str | None`
379
+ 7. Auto-detect backend runs once at `Browser.__init__` and caches — never re-detect per request
380
+ 8. Stealth headers applied by default unless `stealth=False` explicitly passed
381
+ 9. All wait methods respect `timeout` set at `Browser()` level as default, overridable per-call
382
+ 10. Unit tests use mocked HTTP responses only — never hit real URLs in test suite
383
+ 11. Single source of truth for version — only in `src/crawlix/_version.py`
384
+ 12. `pyproject.toml` uses hatchling, dynamic version from `_version.py`
@@ -0,0 +1,24 @@
1
+ # Changelog
2
+
3
+ All notable changes to crawlix will be documented in this file.
4
+
5
+ ## [0.1.0] - 2026-05-17
6
+
7
+ ### Added
8
+
9
+ - Initial release
10
+ - `Browser` class with context manager support and auto-detect backend
11
+ - `Page` class with navigation, querying, interaction, and chaining support
12
+ - `Element` class with traversal, attributes, and magic methods
13
+ - `Backend` ABC with strategy pattern for multiple backends
14
+ - `RequestsBackend` — lightweight HTTP scraping via requests + BeautifulSoup4
15
+ - `PlaywrightBackend` — full browser automation via Playwright
16
+ - `SeleniumBackend` — full browser automation via Selenium
17
+ - `HttpxBackend` — async HTTP support via httpx
18
+ - `AsyncBrowser` and `AsyncPage` for async workflows
19
+ - `aget()` and `afetch()` convenience functions
20
+ - Auto-detect backend (playwright > selenium > requests)
21
+ - Stealth mode with realistic headers and UA rotation
22
+ - BackendError with helpful install hints for browser-only features
23
+ - Full type hints (Python 3.10+)
24
+ - Comprehensive test suite with mocked responses
@@ -0,0 +1,53 @@
1
+ # Contributing to crawlix
2
+
3
+ Thank you for considering contributing to crawlix! This document outlines the guidelines for contributing.
4
+
5
+ ## Development Setup
6
+
7
+ ```bash
8
+ git clone https://github.com/keyreyla/crawlix.git
9
+ cd crawlix
10
+ python -m venv .venv
11
+ source .venv/bin/activate
12
+ pip install -e ".[full]"
13
+ pip install pytest pytest-mock responses ruff mypy build
14
+ ```
15
+
16
+ ## Running Tests
17
+
18
+ ```bash
19
+ pytest
20
+ pytest -v # verbose
21
+ pytest --cov # coverage
22
+ ```
23
+
24
+ ## Code Quality
25
+
26
+ ```bash
27
+ ruff check src/
28
+ mypy src/
29
+ ```
30
+
31
+ ## Pull Request Process
32
+
33
+ 1. Fork the repository
34
+ 2. Create a feature branch (`git checkout -b feat/my-feature`)
35
+ 3. Commit your changes using conventional commits
36
+ 4. Ensure all tests pass
37
+ 5. Open a pull request
38
+
39
+ ## Conventional Commits
40
+
41
+ - `feat:` — new feature
42
+ - `fix:` — bug fix
43
+ - `docs:` — documentation
44
+ - `test:` — tests
45
+ - `refactor:` — code restructuring
46
+ - `chore:` — maintenance
47
+
48
+ ## Code Style
49
+
50
+ - Python 3.10+ type hints
51
+ - Ruff for linting and formatting
52
+ - Line length: 100
53
+ - Quotes: double
crawlix-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 keylordelrey
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
crawlix-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,189 @@
1
+ Metadata-Version: 2.4
2
+ Name: crawlix
3
+ Version: 0.1.0
4
+ Summary: One API. Any backend. Full browser automation to lightweight scraping.
5
+ Project-URL: Homepage, https://github.com/keylordelrey/crawlix
6
+ Project-URL: Source, https://github.com/keylordelrey/crawlix
7
+ Project-URL: Issues, https://github.com/keylordelrey/crawlix/issues
8
+ Author: keylordelrey
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: browser-automation,playwright,selenium,web-scraping
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Internet :: WWW/HTTP
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: beautifulsoup4>=4.12
24
+ Requires-Dist: requests>=2.28
25
+ Provides-Extra: async
26
+ Requires-Dist: httpx>=0.24; extra == 'async'
27
+ Provides-Extra: full
28
+ Requires-Dist: httpx>=0.24; extra == 'full'
29
+ Requires-Dist: playwright>=1.40; extra == 'full'
30
+ Requires-Dist: selenium>=4.15; extra == 'full'
31
+ Provides-Extra: playwright
32
+ Requires-Dist: playwright>=1.40; extra == 'playwright'
33
+ Provides-Extra: selenium
34
+ Requires-Dist: selenium>=4.15; extra == 'selenium'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # crawlix
38
+
39
+ > One API. Any backend. Full browser automation to lightweight scraping.
40
+
41
+ **PyPI**: `pip install crawlix`
42
+ **Author**: keylordelrey
43
+ **License**: MIT
44
+ **Python**: 3.10+
45
+
46
+ ---
47
+
48
+ ## What crawlix is
49
+
50
+ crawlix is a Python browser automation and web scraping library with a unified API across multiple backends. The same code works whether you are doing simple HTTP scraping or full Playwright-powered browser automation — you switch backends, not code.
51
+
52
+ ```python
53
+ from crawlix import Browser
54
+
55
+ with Browser() as b:
56
+ page = b.open("https://example.com")
57
+ print(page.find("h1").text)
58
+
59
+ with Browser(backend="playwright") as b:
60
+ page = b.open("https://example.com")
61
+ page.click("#login")
62
+ page.type("#email", "user@example.com")
63
+ page.submit("form")
64
+ page.wait_for(".dashboard")
65
+ page.screenshot("result.png")
66
+ ```
67
+
68
+ ---
69
+
70
+ ## Install
71
+
72
+ ```bash
73
+ pip install crawlix
74
+ pip install crawlix[playwright]
75
+ pip install crawlix[selenium]
76
+ pip install crawlix[async]
77
+ pip install crawlix[full]
78
+ ```
79
+
80
+ ---
81
+
82
+ ## Core Design Rules
83
+
84
+ 1. **Same API across all backends** — switching backend never requires rewriting user code
85
+ 2. **Auto-detect best available backend** — no config needed, crawlix figures it out
86
+ 3. **Zero hard dependencies** — `pip install crawlix` always succeeds
87
+ 4. **Fail with helpful errors** — BackendError tells you exactly what to install
88
+ 5. **Context manager always** — resources always cleaned up properly
89
+ 6. **Stealth on by default** — realistic headers, UA rotation, no bot fingerprint
90
+
91
+ ---
92
+
93
+ ## Backend Priority
94
+
95
+ ```
96
+ playwright > selenium > requests+bs4 (core)
97
+ ```
98
+
99
+ Override anytime:
100
+ ```python
101
+ Browser(backend="playwright")
102
+ Browser(backend="requests")
103
+ Browser(backend="selenium")
104
+ ```
105
+
106
+ ---
107
+
108
+ ## Quick Examples
109
+
110
+ ```python
111
+ from crawlix import Browser, get, fetch
112
+
113
+ with Browser() as b:
114
+ page = b.open("https://news.ycombinator.com")
115
+ for item in page.find_all(".titleline > a"):
116
+ print(item.text, item.attr("href"))
117
+
118
+ data = get("https://api.github.com/users/keyreyla").json()
119
+ html = fetch("https://example.com")
120
+ ```
121
+
122
+ For async:
123
+ ```python
124
+ import asyncio
125
+ from crawlix.async_api import AsyncBrowser
126
+
127
+ async def main():
128
+ async with AsyncBrowser() as b:
129
+ page = await b.open("https://example.com")
130
+ print(page.html)
131
+
132
+ asyncio.run(main())
133
+ ```
134
+
135
+ ---
136
+
137
+ ## API Overview
138
+
139
+ ### Browser
140
+ ```python
141
+ Browser(backend="auto", headless=True, stealth=True, timeout=30, proxy=None, locale="en-US", user_agent=None)
142
+ b.open(url) -> Page
143
+ b.new_page() -> Page
144
+ b.close()
145
+ b.backend_name -> str
146
+ b.supports_js -> bool
147
+ ```
148
+
149
+ ### Page (all methods return `self` for chaining)
150
+ ```python
151
+ page.find(selector) -> Element | None
152
+ page.find_all(selector) -> list[Element]
153
+ page.click(selector) -> Page
154
+ page.type(selector, text) -> Page
155
+ page.screenshot(path=None) -> bytes
156
+ page.html -> str
157
+ page.text -> str
158
+ page.json() -> dict
159
+ page.links() -> list[str]
160
+ ```
161
+
162
+ ### Element
163
+ ```python
164
+ el.text -> str
165
+ el.attr(name) -> str
166
+ el.attrs -> dict
167
+ el.find(selector) -> Element | None
168
+ el.click() -> Element
169
+ bool(el) # always True
170
+ ```
171
+
172
+ ---
173
+
174
+ ## Exceptions
175
+
176
+ ```python
177
+ from crawlix.exceptions import CrawlixError, BackendError, TimeoutError, NavigationError, SelectorError, NetworkError, JavaScriptError
178
+ ```
179
+
180
+ ---
181
+
182
+ ## Development
183
+
184
+ ```bash
185
+ git clone https://github.com/keyreyla/crawlix.git
186
+ cd crawlix
187
+ pip install -e ".[full]"
188
+ pytest
189
+ ```