scrapling 0.4.2__tar.gz → 0.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {scrapling-0.4.2/scrapling.egg-info → scrapling-0.4.4}/PKG-INFO +76 -30
  2. {scrapling-0.4.2 → scrapling-0.4.4}/README.md +70 -25
  3. {scrapling-0.4.2 → scrapling-0.4.4}/pyproject.toml +8 -7
  4. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/__init__.py +1 -1
  5. scrapling-0.4.4/scrapling/cli.py +637 -0
  6. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/ai.py +370 -158
  7. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/custom_types.py +3 -3
  8. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/shell.py +37 -2
  9. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/_browsers/_base.py +48 -12
  10. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/_browsers/_controllers.py +35 -7
  11. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/_browsers/_stealth.py +37 -13
  12. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/_browsers/_types.py +2 -0
  13. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/_browsers/_validators.py +9 -0
  14. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/static.py +1 -0
  15. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/toolbelt/convertor.py +39 -22
  16. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/toolbelt/custom.py +13 -1
  17. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/toolbelt/proxy_rotation.py +1 -1
  18. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/spiders/engine.py +85 -6
  19. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/spiders/result.py +2 -0
  20. scrapling-0.4.4/scrapling/spiders/robotstxt.py +77 -0
  21. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/spiders/session.py +4 -2
  22. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/spiders/spider.py +3 -0
  23. {scrapling-0.4.2 → scrapling-0.4.4/scrapling.egg-info}/PKG-INFO +76 -30
  24. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling.egg-info/SOURCES.txt +1 -0
  25. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling.egg-info/requires.txt +5 -4
  26. {scrapling-0.4.2 → scrapling-0.4.4}/setup.cfg +1 -1
  27. scrapling-0.4.2/scrapling/cli.py +0 -826
  28. {scrapling-0.4.2 → scrapling-0.4.4}/LICENSE +0 -0
  29. {scrapling-0.4.2 → scrapling-0.4.4}/MANIFEST.in +0 -0
  30. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/__init__.py +0 -0
  31. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/_shell_signatures.py +0 -0
  32. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/_types.py +0 -0
  33. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/mixins.py +0 -0
  34. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/storage.py +0 -0
  35. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/translator.py +0 -0
  36. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/utils/__init__.py +0 -0
  37. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/utils/_shell.py +0 -0
  38. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/core/utils/_utils.py +0 -0
  39. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/__init__.py +0 -0
  40. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/_browsers/__init__.py +0 -0
  41. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/_browsers/_config_tools.py +0 -0
  42. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/_browsers/_page.py +0 -0
  43. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/constants.py +0 -0
  44. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/toolbelt/__init__.py +0 -0
  45. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/toolbelt/fingerprints.py +0 -0
  46. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/engines/toolbelt/navigation.py +0 -0
  47. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/fetchers/__init__.py +0 -0
  48. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/fetchers/chrome.py +0 -0
  49. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/fetchers/requests.py +0 -0
  50. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/fetchers/stealth_chrome.py +0 -0
  51. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/parser.py +0 -0
  52. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/py.typed +0 -0
  53. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/spiders/__init__.py +0 -0
  54. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/spiders/checkpoint.py +0 -0
  55. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/spiders/request.py +0 -0
  56. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling/spiders/scheduler.py +0 -0
  57. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling.egg-info/dependency_links.txt +0 -0
  58. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling.egg-info/entry_points.txt +0 -0
  59. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling.egg-info/not-zip-safe +0 -0
  60. {scrapling-0.4.2 → scrapling-0.4.4}/scrapling.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrapling
3
- Version: 0.4.2
3
+ Version: 0.4.4
4
4
  Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -70,19 +70,20 @@ Description-Content-Type: text/markdown
70
70
  License-File: LICENSE
71
71
  Requires-Dist: lxml>=6.0.2
72
72
  Requires-Dist: cssselect>=1.4.0
73
- Requires-Dist: orjson>=3.11.7
73
+ Requires-Dist: orjson>=3.11.8
74
74
  Requires-Dist: tld>=0.13.2
75
- Requires-Dist: w3lib>=2.4.0
75
+ Requires-Dist: w3lib>=2.4.1
76
76
  Requires-Dist: typing_extensions
77
77
  Provides-Extra: fetchers
78
78
  Requires-Dist: click>=8.3.0; extra == "fetchers"
79
- Requires-Dist: curl_cffi>=0.14.0; extra == "fetchers"
79
+ Requires-Dist: curl_cffi>=0.15.0; extra == "fetchers"
80
80
  Requires-Dist: playwright==1.58.0; extra == "fetchers"
81
81
  Requires-Dist: patchright==1.58.2; extra == "fetchers"
82
82
  Requires-Dist: browserforge>=1.2.4; extra == "fetchers"
83
- Requires-Dist: apify-fingerprint-datapoints>=0.11.0; extra == "fetchers"
83
+ Requires-Dist: apify-fingerprint-datapoints>=0.12.0; extra == "fetchers"
84
84
  Requires-Dist: msgspec>=0.20.0; extra == "fetchers"
85
85
  Requires-Dist: anyio>=4.12.1; extra == "fetchers"
86
+ Requires-Dist: protego>=0.6.0; extra == "fetchers"
86
87
  Provides-Extra: ai
87
88
  Requires-Dist: mcp>=1.26.0; extra == "ai"
88
89
  Requires-Dist: markdownify>=1.2.0; extra == "ai"
@@ -111,14 +112,13 @@ Dynamic: license-file
111
112
  <p align="center">
112
113
  <a href="https://trendshift.io/repositories/14244" target="_blank"><img src="https://trendshift.io/api/badge/repositories/14244" alt="D4Vinci%2FScrapling | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
113
114
  <br/>
114
- <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_AR.md">العربيه</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_ES.md">Español</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_FR.md">Français</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_DE.md">Deutsch</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_CN.md">简体中文</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_JP.md">日本語</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_RU.md">Русский</a>
115
+ <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_AR.md">العربيه</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_ES.md">Español</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_FR.md">Français</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_DE.md">Deutsch</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_CN.md">简体中文</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_JP.md">日本語</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_RU.md">Русский</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_KR.md">한국어</a>
115
116
  <br/>
116
117
  <a href="https://github.com/D4Vinci/Scrapling/actions/workflows/tests.yml" alt="Tests">
117
118
  <img alt="Tests" src="https://github.com/D4Vinci/Scrapling/actions/workflows/tests.yml/badge.svg"></a>
118
119
  <a href="https://badge.fury.io/py/Scrapling" alt="PyPI version">
119
120
  <img alt="PyPI version" src="https://badge.fury.io/py/Scrapling.svg"></a>
120
- <a href="https://pepy.tech/project/scrapling" alt="PyPI Downloads">
121
- <img alt="PyPI Downloads" src="https://static.pepy.tech/personalized-badge/scrapling?period=total&units=INTERNATIONAL_SYSTEM&left_color=GREY&right_color=GREEN&left_text=Downloads"></a>
121
+ <a href="https://clickpy.clickhouse.com/dashboard/scrapling" rel="nofollow"><img src="https://img.shields.io/pypi/dm/scrapling" alt="PyPI package downloads"></a>
122
122
  <a href="https://github.com/D4Vinci/Scrapling/tree/main/agent-skill" alt="AI Agent Skill directory">
123
123
  <img alt="Static Badge" src="https://img.shields.io/badge/Skill-black?style=flat&label=Agent&link=https%3A%2F%2Fgithub.com%2FD4Vinci%2FScrapling%2Ftree%2Fmain%2Fagent-skill"></a>
124
124
  <a href="https://clawhub.ai/D4Vinci/scrapling-official" alt="OpenClaw Skill">
@@ -136,22 +136,22 @@ Dynamic: license-file
136
136
  </p>
137
137
 
138
138
  <p align="center">
139
- <a href="https://scrapling.readthedocs.io/en/latest/parsing/selection/"><strong>Selection methods</strong></a>
139
+ <a href="https://scrapling.readthedocs.io/en/latest/parsing/selection.html"><strong>Selection methods</strong></a>
140
140
  &middot;
141
- <a href="https://scrapling.readthedocs.io/en/latest/fetching/choosing/"><strong>Fetchers</strong></a>
141
+ <a href="https://scrapling.readthedocs.io/en/latest/fetching/choosing.html"><strong>Fetchers</strong></a>
142
142
  &middot;
143
143
  <a href="https://scrapling.readthedocs.io/en/latest/spiders/architecture.html"><strong>Spiders</strong></a>
144
144
  &middot;
145
145
  <a href="https://scrapling.readthedocs.io/en/latest/spiders/proxy-blocking.html"><strong>Proxy Rotation</strong></a>
146
146
  &middot;
147
- <a href="https://scrapling.readthedocs.io/en/latest/cli/overview/"><strong>CLI</strong></a>
147
+ <a href="https://scrapling.readthedocs.io/en/latest/cli/overview.html"><strong>CLI</strong></a>
148
148
  &middot;
149
- <a href="https://scrapling.readthedocs.io/en/latest/ai/mcp-server/"><strong>MCP</strong></a>
149
+ <a href="https://scrapling.readthedocs.io/en/latest/ai/mcp-server.html"><strong>MCP</strong></a>
150
150
  </p>
151
151
 
152
152
  Scrapling is an adaptive Web Scraping framework that handles everything from a single request to a full-scale crawl.
153
153
 
154
- Its parser learns from website changes and automatically relocates your elements when pages update. Its fetchers bypass anti-bot systems like Cloudflare Turnstile out of the box. And its spider framework lets you scale up to concurrent, multi-session crawls with pause/resume and automatic proxy rotation all in a few lines of Python. One library, zero compromises.
154
+ Its parser learns from website changes and automatically relocates your elements when pages update. Its fetchers bypass anti-bot systems like Cloudflare Turnstile out of the box. And its spider framework lets you scale up to concurrent, multi-session crawls with pause/resume and automatic proxy rotation - all in a few lines of Python. One library, zero compromises.
155
155
 
156
156
  Blazing fast crawls with real-time stats and streaming. Built by Web Scrapers for Web Scrapers and regular users, there's something for everyone.
157
157
 
@@ -189,7 +189,6 @@ MySpider().start()
189
189
  <td width="200">
190
190
  <a href="https://hypersolutions.co/?utm_source=github&utm_medium=readme&utm_campaign=scrapling" target="_blank" title="Bot Protection Bypass API for Akamai, DataDome, Incapsula & Kasada">
191
191
  <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/HyperSolutions.png">
192
- <br />
193
192
  </a>
194
193
  </td>
195
194
  <td> Scrapling handles Cloudflare Turnstile. For enterprise-grade protection, <a href="https://hypersolutions.co?utm_source=github&utm_medium=readme&utm_campaign=scrapling">
@@ -200,7 +199,6 @@ MySpider().start()
200
199
  <td width="200">
201
200
  <a href="https://birdproxies.com/t/scrapling" target="_blank" title="At Bird Proxies, we eliminate your pains such as banned IPs, geo restriction, and high costs so you can focus on your work.">
202
201
  <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/BirdProxies.jpg">
203
- <br />
204
202
  </a>
205
203
  </td>
206
204
  <td>Hey, we built <a href="https://birdproxies.com/t/scrapling">
@@ -213,7 +211,6 @@ MySpider().start()
213
211
  <td width="200">
214
212
  <a href="https://evomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling" target="_blank" title="Evomi is your Swiss Quality Proxy Provider, starting at $0.49/GB">
215
213
  <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/evomi.png">
216
- <br />
217
214
  </a>
218
215
  </td>
219
216
  <td>
@@ -225,13 +222,63 @@ MySpider().start()
225
222
  </tr>
226
223
  <tr>
227
224
  <td width="200">
228
- <a href="https://tikhub.io/?ref=KarimShoair" target="_blank" title="Unlock the Power of Social Media Data & AI">
225
+ <a href="https://tikhub.io/?utm_source=github.com/D4Vinci/Scrapling&utm_medium=marketing_social&utm_campaign=retargeting&utm_content=carousel_ad" target="_blank" title="Unlock the Power of Social Media Data & AI">
229
226
  <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/TikHub.jpg">
230
- <br />
231
227
  </a>
232
228
  </td>
233
229
  <td>
234
- <a href="https://tikhub.io/?ref=KarimShoair" target="_blank">TikHub.io</a> provides 900+ stable APIs across 16+ platforms including TikTok, X, YouTube & Instagram, with 40M+ datasets. <br /> Also offers <a href="https://ai.tikhub.io/?ref=KarimShoair" target="_blank">DISCOUNTED AI models</a> Claude, GPT, GEMINI & more up to 71% off.
230
+ <a href="https://tikhub.io/?utm_source=github.com/D4Vinci/Scrapling&utm_medium=marketing_social&utm_campaign=retargeting&utm_content=carousel_ad" target="_blank">TikHub.io</a> provides 900+ stable APIs across 16+ platforms including TikTok, X, YouTube & Instagram, with 40M+ datasets. <br /> Also offers <a href="https://ai.tikhub.io/?ref=KarimShoair" target="_blank">DISCOUNTED AI models</a> - Claude, GPT, GEMINI & more up to 71% off.
231
+ </td>
232
+ </tr>
233
+ <tr>
234
+ <td width="200">
235
+ <a href="https://www.nsocks.com/?keyword=2p67aivg" target="_blank" title="Scalable Web Data Access for AI Applications">
236
+ <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/nsocks.png">
237
+ </a>
238
+ </td>
239
+ <td>
240
+ <a href="https://www.nsocks.com/?keyword=2p67aivg" target="_blank">Nsocks</a> provides fast Residential and ISP proxies for developers and scrapers. Global IP coverage, high anonymity, smart rotation, and reliable performance for automation and data extraction. Use <a href="https://www.xcrawl.com/?keyword=2p67aivg" target="_blank">Xcrawl</a> to simplify large-scale web crawling.
241
+ </td>
242
+ </tr>
243
+ <tr>
244
+ <td width="200">
245
+ <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting.">
246
+ <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png">
247
+ </a>
248
+ </td>
249
+ <td>
250
+ Close your laptop. Your scrapers keep running. <br />
251
+ <a href="https://petrosky.io/d4vinci" target="_blank">PetroSky VPS</a> - cloud servers built for nonstop automation. Windows and Linux machines with full control. From €6.99/mo.
252
+ </td>
253
+ </tr>
254
+ <tr>
255
+ <td width="200">
256
+ <a href="https://substack.thewebscraping.club/p/scrapling-hands-on-guide?utm_source=github&utm_medium=repo&utm_campaign=scrapling" target="_blank" title="The #1 newsletter dedicated to Web Scraping">
257
+ <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/TWSC.png">
258
+ </a>
259
+ </td>
260
+ <td>
261
+ Read a full review of <a href="https://substack.thewebscraping.club/p/scrapling-hands-on-guide?utm_source=github&utm_medium=repo&utm_campaign=scrapling" target="_blank">Scrapling on The Web Scraping Club</a> (Nov 2025), the #1 newsletter dedicated to Web Scraping.
262
+ </td>
263
+ </tr>
264
+ <tr>
265
+ <td width="200">
266
+ <a href="https://proxy-seller.com/?partner=CU9CAA5TBYFFT2" target="_blank" title="Proxy-Seller provides reliable proxy infrastructure for Web Scraping">
267
+ <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/ProxySeller.png">
268
+ </a>
269
+ </td>
270
+ <td>
271
+ <a href="https://proxy-seller.com/?partner=CU9CAA5TBYFFT2" target="_blank">Proxy-Seller</a> provides reliable proxy infrastructure for web scraping, offering IPv4, IPv6, ISP, Residential, and Mobile proxies with stable performance, broad geo coverage, and flexible plans for business-scale data collection.
272
+ </td>
273
+ </tr>
274
+ <tr>
275
+ <td width="200">
276
+ <a href="http://mangoproxy.com/?utm_source=D4Vinci&utm_medium=GitHub&utm_campaign=D4Vinci" target="_blank" title="Proxies You Can Rely On: Residential, Server, and Mobile">
277
+ <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/MangoProxy.png">
278
+ </a>
279
+ </td>
280
+ <td>
281
+ <a href="http://mangoproxy.com/?utm_source=D4Vinci&utm_medium=GitHub&utm_campaign=D4Vinci" target="_blank">Stable proxies</a> for scraping, automation, and multi-accounting. Clean IPs, fast response, and reliable performance under load. Built for scalable workflows.
235
282
  </td>
236
283
  </tr>
237
284
  </table>
@@ -241,15 +288,13 @@ MySpider().start()
241
288
 
242
289
  <!-- sponsors -->
243
290
 
291
+
244
292
  <a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
245
293
  <a href="https://visit.decodo.com/Dy6W0b" target="_blank" title="Try the Most Efficient Residential Proxies for Free"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/decodo.png"></a>
246
- <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
247
294
  <a href="https://hasdata.com/?utm_source=github&utm_medium=banner&utm_campaign=D4Vinci" target="_blank" title="The web scraping service that actually beats anti-bot systems!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/hasdata.png"></a>
248
295
  <a href="https://proxyempire.io/?ref=scrapling&utm_source=scrapling" target="_blank" title="Collect The Data Your Project Needs with the Best Residential Proxies"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/ProxyEmpire.png"></a>
249
-
250
-
251
- <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
252
- <a href="https://www.rapidproxy.io/?ref=d4v" target="_blank" title="Affordable Access to the Proxy World – bypass CAPTCHAs blocks, and avoid additional costs."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/rapidproxy.jpg"></a>
296
+ <a href="https://www.webshare.io/?referral_code=48r2m2cd5uz1" target="_blank" title="The Most Reliable Proxy with Unparalleled Performance"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/webshare.png"></a>
297
+ <a href="https://www.crawleo.dev/?utm_source=github&utm_medium=sponsor&utm_campaign=scrapling" target="_blank" title="Supercharge your AI with Real-Time Web Intelligence"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/crawleo.png"></a>
253
298
  <a href="https://browser.cash/?utm_source=D4Vinci&utm_medium=referral" target="_blank" title="Browser Automation & AI Browser Agent Platform"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/browserCash.png"></a>
254
299
 
255
300
  <!-- /sponsors -->
@@ -260,13 +305,14 @@ MySpider().start()
260
305
 
261
306
  ## Key Features
262
307
 
263
- ### Spiders A Full Crawling Framework
308
+ ### Spiders - A Full Crawling Framework
264
309
  - 🕷️ **Scrapy-like Spider API**: Define spiders with `start_urls`, async `parse` callbacks, and `Request`/`Response` objects.
265
310
  - ⚡ **Concurrent Crawling**: Configurable concurrency limits, per-domain throttling, and download delays.
266
- - 🔄 **Multi-Session Support**: Unified interface for HTTP requests, and stealthy headless browsers in a single spider route requests to different sessions by ID.
311
+ - 🔄 **Multi-Session Support**: Unified interface for HTTP requests, and stealthy headless browsers in a single spider - route requests to different sessions by ID.
267
312
  - 💾 **Pause & Resume**: Checkpoint-based crawl persistence. Press Ctrl+C for a graceful shutdown; restart to resume from where you left off.
268
- - 📡 **Streaming Mode**: Stream scraped items as they arrive via `async for item in spider.stream()` with real-time stats ideal for UI, pipelines, and long-running crawls.
313
+ - 📡 **Streaming Mode**: Stream scraped items as they arrive via `async for item in spider.stream()` with real-time stats - ideal for UI, pipelines, and long-running crawls.
269
314
  - 🛡️ **Blocked Request Detection**: Automatic detection and retry of blocked requests with customizable logic.
315
+ - 🤖 **Robots.txt Compliance**: Optional `robots_txt_obey` flag that respects `Disallow`, `Crawl-delay`, and `Request-rate` directives with per-domain caching.
270
316
  - 📦 **Built-in Export**: Export results through hooks and your own pipeline or the built-in JSON/JSONL with `result.items.to_json()` / `result.items.to_jsonl()` respectively.
271
317
 
272
318
  ### Advanced Websites Fetching with Session Support
@@ -392,7 +438,7 @@ Pause and resume long crawls with checkpoints by running the spider like this:
392
438
  ```python
393
439
  QuotesSpider(crawldir="./crawl_data").start()
394
440
  ```
395
- Press Ctrl+C to pause gracefully progress is saved automatically. Later, when you start the spider again, pass the same `crawldir`, and it will resume from where it stopped.
441
+ Press Ctrl+C to pause gracefully - progress is saved automatically. Later, when you start the spider again, pass the same `crawldir`, and it will resume from where it stopped.
396
442
 
397
443
  ### Advanced Parsing & Navigation
398
444
  ```python
@@ -477,7 +523,7 @@ scrapling extract stealthy-fetch 'https://nopecha.com/demo/cloudflare' captchas.
477
523
 
478
524
  ## Performance Benchmarks
479
525
 
480
- Scrapling isn't just powerfulit's also blazing fast. The following benchmarks compare Scrapling's parser with the latest versions of other popular libraries.
526
+ Scrapling isn't just powerful-it's also blazing fast. The following benchmarks compare Scrapling's parser with the latest versions of other popular libraries.
481
527
 
482
528
  ### Text Extraction Speed Test (5000 nested elements)
483
529
 
@@ -589,7 +635,7 @@ This work is licensed under the BSD-3-Clause License.
589
635
  ## Acknowledgments
590
636
 
591
637
  This project includes code adapted from:
592
- - Parsel (BSD License)Used for [translator](https://github.com/D4Vinci/Scrapling/blob/main/scrapling/core/translator.py) submodule
638
+ - Parsel (BSD License)-Used for [translator](https://github.com/D4Vinci/Scrapling/blob/main/scrapling/core/translator.py) submodule
593
639
 
594
640
  ---
595
641
  <div align="center"><small>Designed & crafted with ❤️ by Karim Shoair.</small></div><br>
@@ -14,14 +14,13 @@
14
14
  <p align="center">
15
15
  <a href="https://trendshift.io/repositories/14244" target="_blank"><img src="https://trendshift.io/api/badge/repositories/14244" alt="D4Vinci%2FScrapling | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
16
16
  <br/>
17
- <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_AR.md">العربيه</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_ES.md">Español</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_FR.md">Français</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_DE.md">Deutsch</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_CN.md">简体中文</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_JP.md">日本語</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_RU.md">Русский</a>
17
+ <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_AR.md">العربيه</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_ES.md">Español</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_FR.md">Français</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_DE.md">Deutsch</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_CN.md">简体中文</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_JP.md">日本語</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_RU.md">Русский</a> | <a href="https://github.com/D4Vinci/Scrapling/blob/main/docs/README_KR.md">한국어</a>
18
18
  <br/>
19
19
  <a href="https://github.com/D4Vinci/Scrapling/actions/workflows/tests.yml" alt="Tests">
20
20
  <img alt="Tests" src="https://github.com/D4Vinci/Scrapling/actions/workflows/tests.yml/badge.svg"></a>
21
21
  <a href="https://badge.fury.io/py/Scrapling" alt="PyPI version">
22
22
  <img alt="PyPI version" src="https://badge.fury.io/py/Scrapling.svg"></a>
23
- <a href="https://pepy.tech/project/scrapling" alt="PyPI Downloads">
24
- <img alt="PyPI Downloads" src="https://static.pepy.tech/personalized-badge/scrapling?period=total&units=INTERNATIONAL_SYSTEM&left_color=GREY&right_color=GREEN&left_text=Downloads"></a>
23
+ <a href="https://clickpy.clickhouse.com/dashboard/scrapling" rel="nofollow"><img src="https://img.shields.io/pypi/dm/scrapling" alt="PyPI package downloads"></a>
25
24
  <a href="https://github.com/D4Vinci/Scrapling/tree/main/agent-skill" alt="AI Agent Skill directory">
26
25
  <img alt="Static Badge" src="https://img.shields.io/badge/Skill-black?style=flat&label=Agent&link=https%3A%2F%2Fgithub.com%2FD4Vinci%2FScrapling%2Ftree%2Fmain%2Fagent-skill"></a>
27
26
  <a href="https://clawhub.ai/D4Vinci/scrapling-official" alt="OpenClaw Skill">
@@ -39,22 +38,22 @@
39
38
  </p>
40
39
 
41
40
  <p align="center">
42
- <a href="https://scrapling.readthedocs.io/en/latest/parsing/selection/"><strong>Selection methods</strong></a>
41
+ <a href="https://scrapling.readthedocs.io/en/latest/parsing/selection.html"><strong>Selection methods</strong></a>
43
42
  &middot;
44
- <a href="https://scrapling.readthedocs.io/en/latest/fetching/choosing/"><strong>Fetchers</strong></a>
43
+ <a href="https://scrapling.readthedocs.io/en/latest/fetching/choosing.html"><strong>Fetchers</strong></a>
45
44
  &middot;
46
45
  <a href="https://scrapling.readthedocs.io/en/latest/spiders/architecture.html"><strong>Spiders</strong></a>
47
46
  &middot;
48
47
  <a href="https://scrapling.readthedocs.io/en/latest/spiders/proxy-blocking.html"><strong>Proxy Rotation</strong></a>
49
48
  &middot;
50
- <a href="https://scrapling.readthedocs.io/en/latest/cli/overview/"><strong>CLI</strong></a>
49
+ <a href="https://scrapling.readthedocs.io/en/latest/cli/overview.html"><strong>CLI</strong></a>
51
50
  &middot;
52
- <a href="https://scrapling.readthedocs.io/en/latest/ai/mcp-server/"><strong>MCP</strong></a>
51
+ <a href="https://scrapling.readthedocs.io/en/latest/ai/mcp-server.html"><strong>MCP</strong></a>
53
52
  </p>
54
53
 
55
54
  Scrapling is an adaptive Web Scraping framework that handles everything from a single request to a full-scale crawl.
56
55
 
57
- Its parser learns from website changes and automatically relocates your elements when pages update. Its fetchers bypass anti-bot systems like Cloudflare Turnstile out of the box. And its spider framework lets you scale up to concurrent, multi-session crawls with pause/resume and automatic proxy rotation all in a few lines of Python. One library, zero compromises.
56
+ Its parser learns from website changes and automatically relocates your elements when pages update. Its fetchers bypass anti-bot systems like Cloudflare Turnstile out of the box. And its spider framework lets you scale up to concurrent, multi-session crawls with pause/resume and automatic proxy rotation - all in a few lines of Python. One library, zero compromises.
58
57
 
59
58
  Blazing fast crawls with real-time stats and streaming. Built by Web Scrapers for Web Scrapers and regular users, there's something for everyone.
60
59
 
@@ -92,7 +91,6 @@ MySpider().start()
92
91
  <td width="200">
93
92
  <a href="https://hypersolutions.co/?utm_source=github&utm_medium=readme&utm_campaign=scrapling" target="_blank" title="Bot Protection Bypass API for Akamai, DataDome, Incapsula & Kasada">
94
93
  <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/HyperSolutions.png">
95
- <br />
96
94
  </a>
97
95
  </td>
98
96
  <td> Scrapling handles Cloudflare Turnstile. For enterprise-grade protection, <a href="https://hypersolutions.co?utm_source=github&utm_medium=readme&utm_campaign=scrapling">
@@ -103,7 +101,6 @@ MySpider().start()
103
101
  <td width="200">
104
102
  <a href="https://birdproxies.com/t/scrapling" target="_blank" title="At Bird Proxies, we eliminate your pains such as banned IPs, geo restriction, and high costs so you can focus on your work.">
105
103
  <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/BirdProxies.jpg">
106
- <br />
107
104
  </a>
108
105
  </td>
109
106
  <td>Hey, we built <a href="https://birdproxies.com/t/scrapling">
@@ -116,7 +113,6 @@ MySpider().start()
116
113
  <td width="200">
117
114
  <a href="https://evomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling" target="_blank" title="Evomi is your Swiss Quality Proxy Provider, starting at $0.49/GB">
118
115
  <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/evomi.png">
119
- <br />
120
116
  </a>
121
117
  </td>
122
118
  <td>
@@ -128,13 +124,63 @@ MySpider().start()
128
124
  </tr>
129
125
  <tr>
130
126
  <td width="200">
131
- <a href="https://tikhub.io/?ref=KarimShoair" target="_blank" title="Unlock the Power of Social Media Data & AI">
127
+ <a href="https://tikhub.io/?utm_source=github.com/D4Vinci/Scrapling&utm_medium=marketing_social&utm_campaign=retargeting&utm_content=carousel_ad" target="_blank" title="Unlock the Power of Social Media Data & AI">
132
128
  <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/TikHub.jpg">
133
- <br />
134
129
  </a>
135
130
  </td>
136
131
  <td>
137
- <a href="https://tikhub.io/?ref=KarimShoair" target="_blank">TikHub.io</a> provides 900+ stable APIs across 16+ platforms including TikTok, X, YouTube & Instagram, with 40M+ datasets. <br /> Also offers <a href="https://ai.tikhub.io/?ref=KarimShoair" target="_blank">DISCOUNTED AI models</a> Claude, GPT, GEMINI & more up to 71% off.
132
+ <a href="https://tikhub.io/?utm_source=github.com/D4Vinci/Scrapling&utm_medium=marketing_social&utm_campaign=retargeting&utm_content=carousel_ad" target="_blank">TikHub.io</a> provides 900+ stable APIs across 16+ platforms including TikTok, X, YouTube & Instagram, with 40M+ datasets. <br /> Also offers <a href="https://ai.tikhub.io/?ref=KarimShoair" target="_blank">DISCOUNTED AI models</a> - Claude, GPT, GEMINI & more up to 71% off.
133
+ </td>
134
+ </tr>
135
+ <tr>
136
+ <td width="200">
137
+ <a href="https://www.nsocks.com/?keyword=2p67aivg" target="_blank" title="Scalable Web Data Access for AI Applications">
138
+ <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/nsocks.png">
139
+ </a>
140
+ </td>
141
+ <td>
142
+ <a href="https://www.nsocks.com/?keyword=2p67aivg" target="_blank">Nsocks</a> provides fast Residential and ISP proxies for developers and scrapers. Global IP coverage, high anonymity, smart rotation, and reliable performance for automation and data extraction. Use <a href="https://www.xcrawl.com/?keyword=2p67aivg" target="_blank">Xcrawl</a> to simplify large-scale web crawling.
143
+ </td>
144
+ </tr>
145
+ <tr>
146
+ <td width="200">
147
+ <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting.">
148
+ <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png">
149
+ </a>
150
+ </td>
151
+ <td>
152
+ Close your laptop. Your scrapers keep running. <br />
153
+ <a href="https://petrosky.io/d4vinci" target="_blank">PetroSky VPS</a> - cloud servers built for nonstop automation. Windows and Linux machines with full control. From €6.99/mo.
154
+ </td>
155
+ </tr>
156
+ <tr>
157
+ <td width="200">
158
+ <a href="https://substack.thewebscraping.club/p/scrapling-hands-on-guide?utm_source=github&utm_medium=repo&utm_campaign=scrapling" target="_blank" title="The #1 newsletter dedicated to Web Scraping">
159
+ <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/TWSC.png">
160
+ </a>
161
+ </td>
162
+ <td>
163
+ Read a full review of <a href="https://substack.thewebscraping.club/p/scrapling-hands-on-guide?utm_source=github&utm_medium=repo&utm_campaign=scrapling" target="_blank">Scrapling on The Web Scraping Club</a> (Nov 2025), the #1 newsletter dedicated to Web Scraping.
164
+ </td>
165
+ </tr>
166
+ <tr>
167
+ <td width="200">
168
+ <a href="https://proxy-seller.com/?partner=CU9CAA5TBYFFT2" target="_blank" title="Proxy-Seller provides reliable proxy infrastructure for Web Scraping">
169
+ <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/ProxySeller.png">
170
+ </a>
171
+ </td>
172
+ <td>
173
+ <a href="https://proxy-seller.com/?partner=CU9CAA5TBYFFT2" target="_blank">Proxy-Seller</a> provides reliable proxy infrastructure for web scraping, offering IPv4, IPv6, ISP, Residential, and Mobile proxies with stable performance, broad geo coverage, and flexible plans for business-scale data collection.
174
+ </td>
175
+ </tr>
176
+ <tr>
177
+ <td width="200">
178
+ <a href="http://mangoproxy.com/?utm_source=D4Vinci&utm_medium=GitHub&utm_campaign=D4Vinci" target="_blank" title="Proxies You Can Rely On: Residential, Server, and Mobile">
179
+ <img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/MangoProxy.png">
180
+ </a>
181
+ </td>
182
+ <td>
183
+ <a href="http://mangoproxy.com/?utm_source=D4Vinci&utm_medium=GitHub&utm_campaign=D4Vinci" target="_blank">Stable proxies</a> for scraping, automation, and multi-accounting. Clean IPs, fast response, and reliable performance under load. Built for scalable workflows.
138
184
  </td>
139
185
  </tr>
140
186
  </table>
@@ -144,15 +190,13 @@ MySpider().start()
144
190
 
145
191
  <!-- sponsors -->
146
192
 
193
+
147
194
  <a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
148
195
  <a href="https://visit.decodo.com/Dy6W0b" target="_blank" title="Try the Most Efficient Residential Proxies for Free"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/decodo.png"></a>
149
- <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
150
196
  <a href="https://hasdata.com/?utm_source=github&utm_medium=banner&utm_campaign=D4Vinci" target="_blank" title="The web scraping service that actually beats anti-bot systems!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/hasdata.png"></a>
151
197
  <a href="https://proxyempire.io/?ref=scrapling&utm_source=scrapling" target="_blank" title="Collect The Data Your Project Needs with the Best Residential Proxies"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/ProxyEmpire.png"></a>
152
-
153
-
154
- <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
155
- <a href="https://www.rapidproxy.io/?ref=d4v" target="_blank" title="Affordable Access to the Proxy World – bypass CAPTCHAs blocks, and avoid additional costs."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/rapidproxy.jpg"></a>
198
+ <a href="https://www.webshare.io/?referral_code=48r2m2cd5uz1" target="_blank" title="The Most Reliable Proxy with Unparalleled Performance"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/webshare.png"></a>
199
+ <a href="https://www.crawleo.dev/?utm_source=github&utm_medium=sponsor&utm_campaign=scrapling" target="_blank" title="Supercharge your AI with Real-Time Web Intelligence"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/crawleo.png"></a>
156
200
  <a href="https://browser.cash/?utm_source=D4Vinci&utm_medium=referral" target="_blank" title="Browser Automation & AI Browser Agent Platform"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/browserCash.png"></a>
157
201
 
158
202
  <!-- /sponsors -->
@@ -163,13 +207,14 @@ MySpider().start()
163
207
 
164
208
  ## Key Features
165
209
 
166
- ### Spiders A Full Crawling Framework
210
+ ### Spiders - A Full Crawling Framework
167
211
  - 🕷️ **Scrapy-like Spider API**: Define spiders with `start_urls`, async `parse` callbacks, and `Request`/`Response` objects.
168
212
  - ⚡ **Concurrent Crawling**: Configurable concurrency limits, per-domain throttling, and download delays.
169
- - 🔄 **Multi-Session Support**: Unified interface for HTTP requests, and stealthy headless browsers in a single spider route requests to different sessions by ID.
213
+ - 🔄 **Multi-Session Support**: Unified interface for HTTP requests, and stealthy headless browsers in a single spider - route requests to different sessions by ID.
170
214
  - 💾 **Pause & Resume**: Checkpoint-based crawl persistence. Press Ctrl+C for a graceful shutdown; restart to resume from where you left off.
171
- - 📡 **Streaming Mode**: Stream scraped items as they arrive via `async for item in spider.stream()` with real-time stats ideal for UI, pipelines, and long-running crawls.
215
+ - 📡 **Streaming Mode**: Stream scraped items as they arrive via `async for item in spider.stream()` with real-time stats - ideal for UI, pipelines, and long-running crawls.
172
216
  - 🛡️ **Blocked Request Detection**: Automatic detection and retry of blocked requests with customizable logic.
217
+ - 🤖 **Robots.txt Compliance**: Optional `robots_txt_obey` flag that respects `Disallow`, `Crawl-delay`, and `Request-rate` directives with per-domain caching.
173
218
  - 📦 **Built-in Export**: Export results through hooks and your own pipeline or the built-in JSON/JSONL with `result.items.to_json()` / `result.items.to_jsonl()` respectively.
174
219
 
175
220
  ### Advanced Websites Fetching with Session Support
@@ -295,7 +340,7 @@ Pause and resume long crawls with checkpoints by running the spider like this:
295
340
  ```python
296
341
  QuotesSpider(crawldir="./crawl_data").start()
297
342
  ```
298
- Press Ctrl+C to pause gracefully progress is saved automatically. Later, when you start the spider again, pass the same `crawldir`, and it will resume from where it stopped.
343
+ Press Ctrl+C to pause gracefully - progress is saved automatically. Later, when you start the spider again, pass the same `crawldir`, and it will resume from where it stopped.
299
344
 
300
345
  ### Advanced Parsing & Navigation
301
346
  ```python
@@ -380,7 +425,7 @@ scrapling extract stealthy-fetch 'https://nopecha.com/demo/cloudflare' captchas.
380
425
 
381
426
  ## Performance Benchmarks
382
427
 
383
- Scrapling isn't just powerfulit's also blazing fast. The following benchmarks compare Scrapling's parser with the latest versions of other popular libraries.
428
+ Scrapling isn't just powerful-it's also blazing fast. The following benchmarks compare Scrapling's parser with the latest versions of other popular libraries.
384
429
 
385
430
  ### Text Extraction Speed Test (5000 nested elements)
386
431
 
@@ -492,7 +537,7 @@ This work is licensed under the BSD-3-Clause License.
492
537
  ## Acknowledgments
493
538
 
494
539
  This project includes code adapted from:
495
- - Parsel (BSD License)Used for [translator](https://github.com/D4Vinci/Scrapling/blob/main/scrapling/core/translator.py) submodule
540
+ - Parsel (BSD License)-Used for [translator](https://github.com/D4Vinci/Scrapling/blob/main/scrapling/core/translator.py) submodule
496
541
 
497
542
  ---
498
543
  <div align="center"><small>Designed & crafted with ❤️ by Karim Shoair.</small></div><br>
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
  [project]
6
6
  name = "scrapling"
7
7
  # Static version instead of a dynamic version so we can get better layer caching while building docker, check the docker file to understand
8
- version = "0.4.2"
8
+ version = "0.4.4"
9
9
  description = "Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!"
10
10
  readme = {file = "README.md", content-type = "text/markdown"}
11
11
  license = {file = "LICENSE"}
@@ -63,22 +63,23 @@ classifiers = [
63
63
  dependencies = [
64
64
  "lxml>=6.0.2",
65
65
  "cssselect>=1.4.0",
66
- "orjson>=3.11.7",
66
+ "orjson>=3.11.8",
67
67
  "tld>=0.13.2",
68
- "w3lib>=2.4.0",
69
- "typing_extensions",
68
+ "w3lib>=2.4.1",
69
+ "typing_extensions"
70
70
  ]
71
71
 
72
72
  [project.optional-dependencies]
73
73
  fetchers = [
74
74
  "click>=8.3.0",
75
- "curl_cffi>=0.14.0",
75
+ "curl_cffi>=0.15.0",
76
76
  "playwright==1.58.0",
77
77
  "patchright==1.58.2",
78
78
  "browserforge>=1.2.4",
79
- "apify-fingerprint-datapoints>=0.11.0",
79
+ "apify-fingerprint-datapoints>=0.12.0",
80
80
  "msgspec>=0.20.0",
81
- "anyio>=4.12.1"
81
+ "anyio>=4.12.1",
82
+ "protego>=0.6.0",
82
83
  ]
83
84
  ai = [
84
85
  "mcp>=1.26.0",
@@ -1,5 +1,5 @@
1
1
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
2
- __version__ = "0.4.2"
2
+ __version__ = "0.4.4"
3
3
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
4
4
 
5
5
  from typing import Any, TYPE_CHECKING