PyPI - scrapling - Versions diffs - 0.3__tar.gz → 0.3.2__tar.gz - Mend

scrapling 0.3tar.gz → 0.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

{scrapling-0.3/scrapling.egg-info → scrapling-0.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scrapling
-Version: 0.3
+Version: 0.3.2
 Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!
 Home-page: https://github.com/D4Vinci/Scrapling
 Author: Karim Shoair
@@ -64,23 +64,26 @@ Classifier: Typing :: Typed
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: lxml>=6.0.0
+Requires-Dist: lxml>=6.0.1
 Requires-Dist: cssselect>=1.3.0
-Requires-Dist: click>=8.2.1
-Requires-Dist: orjson>=3.11.2
+Requires-Dist: orjson>=3.11.3
 Requires-Dist: tldextract>=5.3.0
-Requires-Dist: curl_cffi>=0.13.0
-Requires-Dist: playwright>=1.52.0
-Requires-Dist: rebrowser-playwright>=1.52.0
-Requires-Dist: camoufox>=0.4.11
-Requires-Dist: geoip2>=5.1.0
-Requires-Dist: msgspec>=0.19.0
+Provides-Extra: fetchers
+Requires-Dist: click>=8.2.1; extra == "fetchers"
+Requires-Dist: curl_cffi>=0.13.0; extra == "fetchers"
+Requires-Dist: playwright>=1.52.0; extra == "fetchers"
+Requires-Dist: rebrowser-playwright>=1.52.0; extra == "fetchers"
+Requires-Dist: camoufox>=0.4.11; extra == "fetchers"
+Requires-Dist: geoip2>=5.1.0; extra == "fetchers"
+Requires-Dist: msgspec>=0.19.0; extra == "fetchers"
 Provides-Extra: ai
-Requires-Dist: mcp>=1.13.0; extra == "ai"
+Requires-Dist: mcp>=1.14.0; extra == "ai"
 Requires-Dist: markdownify>=1.2.0; extra == "ai"
+Requires-Dist: scrapling[fetchers]; extra == "ai"
 Provides-Extra: shell
 Requires-Dist: IPython>=8.37; extra == "shell"
 Requires-Dist: markdownify>=1.2.0; extra == "shell"
+Requires-Dist: scrapling[fetchers]; extra == "shell"
 Provides-Extra: all
 Requires-Dist: scrapling[ai,shell]; extra == "all"
 Dynamic: license-file
@@ -155,9 +158,10 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
 <!-- sponsors -->
 <a href="https://evomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling" target="_blank" title="Evomi is your Swiss Quality Proxy Provider, starting at $0.49/GB"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/evomi.png"></a>
-<a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
 <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
+<a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
 <a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
+<a href="https://www.nstproxy.com/?type=flow&utm_source=scrapling" target="_blank" title="One Proxy Service, Infinite Solutions at Unbeatable Prices!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/NSTproxy.png"></a>
 <!-- /sponsors -->
@@ -178,7 +182,7 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
 - 🔄 **Smart Element Tracking**: Relocate elements after website changes using intelligent similarity algorithms.
 - 🎯 **Smart Flexible Selection**: CSS selectors, XPath selectors, filter-based search, text search, regex search, and more.
 - 🔍 **Find Similar Elements**: Automatically locate elements similar to found elements.
-- 🤖 **MCP Server to be used with AI**: Built-in MCP server for AI-assisted Web Scraping and data extraction. The MCP server features custom, powerful capabilities that utilize Scrapling to extract targeted content before passing it to the AI (Claude/Cursor/etc), thereby speeding up operations and reducing costs by minimizing token usage.
+- 🤖 **MCP Server to be used with AI**: Built-in MCP server for AI-assisted Web Scraping and data extraction. The MCP server features custom, powerful capabilities that utilize Scrapling to extract targeted content before passing it to the AI (Claude/Cursor/etc), thereby speeding up operations and reducing costs by minimizing token usage. ([demo video](https://www.youtube.com/watch?v=qyFk3ZNwOxE))
 ### High-Performance & battle-tested Architecture
 - 🚀 **Lightning Fast**: Optimized performance outperforming most Python scraping libraries.
@@ -220,7 +224,7 @@ quotes = page.css('.quote .text::text')
 # Advanced stealth mode (Keep the browser open until you finish)
 with StealthySession(headless=True, solve_cloudflare=True) as session:
-    page = session.fetch('https://nopecha.com/demo/cloudflare')
+    page = session.fetch('https://nopecha.com/demo/cloudflare', google_search=False)
     data = page.css('#padded_content a')
 # Or use one-off request style, it opens the browser for this request, then closes it after finishing
@@ -229,7 +233,7 @@ data = page.css('#padded_content a')
 # Full browser automation (Keep the browser open until you finish)
 with DynamicSession(headless=True, disable_resources=False, network_idle=True) as session:
-    page = session.fetch('https://quotes.toscrape.com/')
+    page = session.fetch('https://quotes.toscrape.com/', load_dom=False)
     data = page.xpath('//span[@class="text"]/text()')  # XPath selector if you prefer it
 # Or use one-off request style, it opens the browser for this request, then closes it after finishing
@@ -273,7 +277,7 @@ from scrapling.parser import Selector
 page = Selector("<html>...</html>")
 ```
-And it works exactly the same!
+And it works precisely the same way!
 ### Async Session Management Examples
 ```python
@@ -302,6 +306,8 @@ async with AsyncStealthySession(max_pages=2) as session:
 Scrapling v0.3 includes a powerful command-line interface:
+[![asciicast](https://asciinema.org/a/736339.svg)](https://asciinema.org/a/736339)
 ```bash
 # Launch interactive Web Scraping shell
 scrapling shell
@@ -320,20 +326,20 @@ scrapling extract stealthy-fetch 'https://nopecha.com/demo/cloudflare' captchas.
 ## Performance Benchmarks
-Scrapling isn't just powerful—it's also blazing fast, and version 0.3 delivers exceptional performance improvements across all operations!
+Scrapling isn't just powerful—it's also blazing fast, and the updates since version 0.3 deliver exceptional performance improvements across all operations!
 ### Text Extraction Speed Test (5000 nested elements)
 | # |      Library      | Time (ms) | vs Scrapling |
 |---|:-----------------:|:---------:|:------------:|
-| 1 |     Scrapling     |   1.88    |     1.0x     |
-| 2 |   Parsel/Scrapy   |   1.96    |    1.043x    |
-| 3 |     Raw Lxml      |   2.32    |    1.234x    |
-| 4 |      PyQuery      |   20.2    |     ~11x     |
-| 5 |    Selectolax     |   85.2    |     ~45x     |
-| 6 |  MechanicalSoup   |  1305.84  |    ~695x     |
-| 7 |   BS4 with Lxml   |  1307.92  |    ~696x     |
-| 8 | BS4 with html5lib |  3336.28  |    ~1775x    |
+| 1 |     Scrapling     |   1.92    |     1.0x     |
+| 2 |   Parsel/Scrapy   |   1.99    |    1.036x    |
+| 3 |     Raw Lxml      |   2.33    |    1.214x    |
+| 4 |      PyQuery      |   20.61   |     ~11x     |
+| 5 |    Selectolax     |   80.65   |     ~42x     |
+| 6 |   BS4 with Lxml   |  1283.21  |    ~698x     |
+| 7 |  MechanicalSoup   |  1304.57  |    ~679x     |
+| 8 | BS4 with html5lib |  3331.96  |    ~1735x    |
 ### Element Similarity & Text Search Performance
@@ -341,8 +347,8 @@ Scrapling's adaptive element finding capabilities significantly outperform alter
 |   Library   | Time (ms) | vs Scrapling |
 |-------------|:---------:|:------------:|
-|  Scrapling  |   2.02    |     1.0x     |
-| AutoScraper |   10.26   |    5.08x     |
+|  Scrapling  |   1.87    |     1.0x     |
+| AutoScraper |   10.24   |    5.476x    |
 > All benchmarks represent averages of 100+ runs. See [benchmarks.py](https://github.com/D4Vinci/Scrapling/blob/main/benchmarks.py) for methodology.
@@ -355,29 +361,33 @@ Scrapling requires Python 3.10 or higher:
 pip install scrapling
 ```
-#### Fetchers Setup
-If you are going to use any of the fetchers or their classes, then install browser dependencies with
-```bash
-scrapling install
-```
-This downloads all browsers with their system dependencies and fingerprint manipulation dependencies.
+Starting with v0.3.2, this installation only includes the parser engine and its dependencies, without any fetchers or commandline dependencies.
 ### Optional Dependencies
-- Install the MCP server feature:
-```bash
-pip install "scrapling[ai]"
-```
-- Install shell features (Web Scraping shell and the `extract` command):
-```bash
-pip install "scrapling[shell]"
-```
-- Install everything:
-```bash
-pip install "scrapling[all]"
-```
+1. If you are going to use any of the extra features below, the fetchers, or their classes, then you need to install fetchers' dependencies, and then install their browser dependencies with
+    ```bash
+    pip install "scrapling[fetchers]"
+    scrapling install
+    ```
+    This downloads all browsers with their system dependencies and fingerprint manipulation dependencies.
+2. Extra features:
+   - Install the MCP server feature:
+       ```bash
+       pip install "scrapling[ai]"
+       ```
+   - Install shell features (Web Scraping shell and the `extract` command):
+       ```bash
+       pip install "scrapling[shell]"
+       ```
+   - Install everything:
+       ```bash
+       pip install "scrapling[all]"
+       ```
+   Don't forget that you need to install the browser dependencies with `scrapling install` after any of these extras (if you didn't already)
 ## Contributing

{scrapling-0.3 → scrapling-0.3.2}/README.md RENAMED Viewed

@@ -68,9 +68,10 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
 <!-- sponsors -->
 <a href="https://evomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling" target="_blank" title="Evomi is your Swiss Quality Proxy Provider, starting at $0.49/GB"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/evomi.png"></a>
-<a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
 <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
+<a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
 <a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
+<a href="https://www.nstproxy.com/?type=flow&utm_source=scrapling" target="_blank" title="One Proxy Service, Infinite Solutions at Unbeatable Prices!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/NSTproxy.png"></a>
 <!-- /sponsors -->
@@ -91,7 +92,7 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
 - 🔄 **Smart Element Tracking**: Relocate elements after website changes using intelligent similarity algorithms.
 - 🎯 **Smart Flexible Selection**: CSS selectors, XPath selectors, filter-based search, text search, regex search, and more.
 - 🔍 **Find Similar Elements**: Automatically locate elements similar to found elements.
-- 🤖 **MCP Server to be used with AI**: Built-in MCP server for AI-assisted Web Scraping and data extraction. The MCP server features custom, powerful capabilities that utilize Scrapling to extract targeted content before passing it to the AI (Claude/Cursor/etc), thereby speeding up operations and reducing costs by minimizing token usage.
+- 🤖 **MCP Server to be used with AI**: Built-in MCP server for AI-assisted Web Scraping and data extraction. The MCP server features custom, powerful capabilities that utilize Scrapling to extract targeted content before passing it to the AI (Claude/Cursor/etc), thereby speeding up operations and reducing costs by minimizing token usage. ([demo video](https://www.youtube.com/watch?v=qyFk3ZNwOxE))
 ### High-Performance & battle-tested Architecture
 - 🚀 **Lightning Fast**: Optimized performance outperforming most Python scraping libraries.
@@ -133,7 +134,7 @@ quotes = page.css('.quote .text::text')
 # Advanced stealth mode (Keep the browser open until you finish)
 with StealthySession(headless=True, solve_cloudflare=True) as session:
-    page = session.fetch('https://nopecha.com/demo/cloudflare')
+    page = session.fetch('https://nopecha.com/demo/cloudflare', google_search=False)
     data = page.css('#padded_content a')
 # Or use one-off request style, it opens the browser for this request, then closes it after finishing
@@ -142,7 +143,7 @@ data = page.css('#padded_content a')
 # Full browser automation (Keep the browser open until you finish)
 with DynamicSession(headless=True, disable_resources=False, network_idle=True) as session:
-    page = session.fetch('https://quotes.toscrape.com/')
+    page = session.fetch('https://quotes.toscrape.com/', load_dom=False)
     data = page.xpath('//span[@class="text"]/text()')  # XPath selector if you prefer it
 # Or use one-off request style, it opens the browser for this request, then closes it after finishing
@@ -186,7 +187,7 @@ from scrapling.parser import Selector
 page = Selector("<html>...</html>")
 ```
-And it works exactly the same!
+And it works precisely the same way!
 ### Async Session Management Examples
 ```python
@@ -215,6 +216,8 @@ async with AsyncStealthySession(max_pages=2) as session:
 Scrapling v0.3 includes a powerful command-line interface:
+[![asciicast](https://asciinema.org/a/736339.svg)](https://asciinema.org/a/736339)
 ```bash
 # Launch interactive Web Scraping shell
 scrapling shell
@@ -233,20 +236,20 @@ scrapling extract stealthy-fetch 'https://nopecha.com/demo/cloudflare' captchas.
 ## Performance Benchmarks
-Scrapling isn't just powerful—it's also blazing fast, and version 0.3 delivers exceptional performance improvements across all operations!
+Scrapling isn't just powerful—it's also blazing fast, and the updates since version 0.3 deliver exceptional performance improvements across all operations!
 ### Text Extraction Speed Test (5000 nested elements)
 | # |      Library      | Time (ms) | vs Scrapling |
 |---|:-----------------:|:---------:|:------------:|
-| 1 |     Scrapling     |   1.88    |     1.0x     |
-| 2 |   Parsel/Scrapy   |   1.96    |    1.043x    |
-| 3 |     Raw Lxml      |   2.32    |    1.234x    |
-| 4 |      PyQuery      |   20.2    |     ~11x     |
-| 5 |    Selectolax     |   85.2    |     ~45x     |
-| 6 |  MechanicalSoup   |  1305.84  |    ~695x     |
-| 7 |   BS4 with Lxml   |  1307.92  |    ~696x     |
-| 8 | BS4 with html5lib |  3336.28  |    ~1775x    |
+| 1 |     Scrapling     |   1.92    |     1.0x     |
+| 2 |   Parsel/Scrapy   |   1.99    |    1.036x    |
+| 3 |     Raw Lxml      |   2.33    |    1.214x    |
+| 4 |      PyQuery      |   20.61   |     ~11x     |
+| 5 |    Selectolax     |   80.65   |     ~42x     |
+| 6 |   BS4 with Lxml   |  1283.21  |    ~698x     |
+| 7 |  MechanicalSoup   |  1304.57  |    ~679x     |
+| 8 | BS4 with html5lib |  3331.96  |    ~1735x    |
 ### Element Similarity & Text Search Performance
@@ -254,8 +257,8 @@ Scrapling's adaptive element finding capabilities significantly outperform alter
 |   Library   | Time (ms) | vs Scrapling |
 |-------------|:---------:|:------------:|
-|  Scrapling  |   2.02    |     1.0x     |
-| AutoScraper |   10.26   |    5.08x     |
+|  Scrapling  |   1.87    |     1.0x     |
+| AutoScraper |   10.24   |    5.476x    |
 > All benchmarks represent averages of 100+ runs. See [benchmarks.py](https://github.com/D4Vinci/Scrapling/blob/main/benchmarks.py) for methodology.
@@ -268,29 +271,33 @@ Scrapling requires Python 3.10 or higher:
 pip install scrapling
 ```
-#### Fetchers Setup
-If you are going to use any of the fetchers or their classes, then install browser dependencies with
-```bash
-scrapling install
-```
-This downloads all browsers with their system dependencies and fingerprint manipulation dependencies.
+Starting with v0.3.2, this installation only includes the parser engine and its dependencies, without any fetchers or commandline dependencies.
 ### Optional Dependencies
-- Install the MCP server feature:
-```bash
-pip install "scrapling[ai]"
-```
-- Install shell features (Web Scraping shell and the `extract` command):
-```bash
-pip install "scrapling[shell]"
-```
-- Install everything:
-```bash
-pip install "scrapling[all]"
-```
+1. If you are going to use any of the extra features below, the fetchers, or their classes, then you need to install fetchers' dependencies, and then install their browser dependencies with
+    ```bash
+    pip install "scrapling[fetchers]"
+    scrapling install
+    ```
+    This downloads all browsers with their system dependencies and fingerprint manipulation dependencies.
+2. Extra features:
+   - Install the MCP server feature:
+       ```bash
+       pip install "scrapling[ai]"
+       ```
+   - Install shell features (Web Scraping shell and the `extract` command):
+       ```bash
+       pip install "scrapling[shell]"
+       ```
+   - Install everything:
+       ```bash
+       pip install "scrapling[all]"
+       ```
+   Don't forget that you need to install the browser dependencies with `scrapling install` after any of these extras (if you didn't already)
 ## Contributing
@@ -319,4 +326,4 @@ This project includes code adapted from:
 - [rebrowser-patches](https://github.com/rebrowser/rebrowser-patches) for stealth improvements
 ---
-<div align="center"><small>Designed & crafted with ❤️ by Karim Shoair.</small></div><br>
+<div align="center"><small>Designed & crafted with ❤️ by Karim Shoair.</small></div><br>

{scrapling-0.3 → scrapling-0.3.2}/pyproject.toml RENAMED Viewed

@@ -56,11 +56,15 @@ classifiers = [
     "Typing :: Typed",
 ]
 dependencies = [
-    "lxml>=6.0.0",
+    "lxml>=6.0.1",
     "cssselect>=1.3.0",
-    "click>=8.2.1",
-    "orjson>=3.11.2",
+    "orjson>=3.11.3",
     "tldextract>=5.3.0",
+]
+[project.optional-dependencies]
+fetchers = [
+    "click>=8.2.1",
     "curl_cffi>=0.13.0",
     "playwright>=1.52.0",
     "rebrowser-playwright>=1.52.0",
@@ -68,15 +72,15 @@ dependencies = [
     "geoip2>=5.1.0",
     "msgspec>=0.19.0",
 ]
-[project.optional-dependencies]
 ai = [
-    "mcp>=1.13.0",
+    "mcp>=1.14.0",
     "markdownify>=1.2.0",
+    "scrapling[fetchers]",
 ]
 shell = [
     "IPython>=8.37",  # The last version that supports Python 3.10
     "markdownify>=1.2.0",
+    "scrapling[fetchers]",
 ]
 all = [
     "scrapling[ai,shell]",

{scrapling-0.3 → scrapling-0.3.2}/scrapling/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 __author__ = "Karim Shoair (karim.shoair@pm.me)"
-__version__ = "0.3"
+__version__ = "0.3.2"
 __copyright__ = "Copyright (c) 2024 Karim Shoair"

{scrapling-0.3 → scrapling-0.3.2}/scrapling/cli.py RENAMED Viewed

@@ -2,14 +2,18 @@ from pathlib import Path
 from subprocess import check_output
 from sys import executable as python_executable
-from scrapling.core.utils import log
-from scrapling.engines.toolbelt import Response
+from scrapling.engines.toolbelt.custom import Response
+from scrapling.core.utils import log, _CookieParser, _ParseHeaders
 from scrapling.core._types import List, Optional, Dict, Tuple, Any, Callable
-from scrapling.fetchers import Fetcher, DynamicFetcher, StealthyFetcher
-from scrapling.core.shell import Convertor, _CookieParser, _ParseHeaders
 from orjson import loads as json_loads, JSONDecodeError
-from click import command, option, Choice, group, argument
+try:
+    from click import command, option, Choice, group, argument
+except (ImportError, ModuleNotFoundError) as e:
+    raise ModuleNotFoundError(
+        "You need to install scrapling with any of the extras to enable Shell commands. See: https://scrapling.readthedocs.io/en/latest/#installation"
+    ) from e
 __OUTPUT_FILE_HELP__ = "The output file path can be an HTML file, a Markdown file of the HTML content, or the text content itself. Use file extensions (`.html`/`.md`/`.txt`) respectively."
 __PACKAGE_DIR__ = Path(__file__).parent
@@ -40,6 +44,8 @@ def __Request_and_Save(
     **kwargs,
 ) -> None:
     """Make a request using the specified fetcher function and save the result"""
+    from scrapling.core.shell import Convertor
     # Handle relative paths - convert to an absolute path based on the current working directory
     output_path = Path(output_file)
     if not output_path.is_absolute():
@@ -72,14 +78,10 @@ def __ParseExtractArguments(
     return parsed_headers, parsed_cookies, parsed_params, parsed_json
-def __BuildRequest(
-    headers: List[str], cookies: str, params: str, json: Optional[str] = None, **kwargs
-) -> Dict:
+def __BuildRequest(headers: List[str], cookies: str, params: str, json: Optional[str] = None, **kwargs) -> Dict:
     """Build a request object using the specified arguments"""
     # Parse parameters
-    parsed_headers, parsed_cookies, parsed_params, parsed_json = (
-        __ParseExtractArguments(headers, cookies, params, json)
-    )
+    parsed_headers, parsed_cookies, parsed_params, parsed_json = __ParseExtractArguments(headers, cookies, params, json)
     # Build request arguments
     request_kwargs = {
         "headers": parsed_headers if parsed_headers else None,
@@ -106,10 +108,7 @@ def __BuildRequest(
     help="Force Scrapling to reinstall all Fetchers dependencies",
 )
 def install(force):  # pragma: no cover
-    if (
-        force
-        or not __PACKAGE_DIR__.joinpath(".scrapling_dependencies_installed").exists()
-    ):
+    if force or not __PACKAGE_DIR__.joinpath(".scrapling_dependencies_installed").exists():
         __Execute(
             [python_executable, "-m", "playwright", "install", "chromium"],
             "Playwright browsers",
@@ -158,9 +157,7 @@ def mcp():
     "level",
     is_flag=False,
     default="debug",
-    type=Choice(
-        ["debug", "info", "warning", "error", "critical", "fatal"], case_sensitive=False
-    ),
+    type=Choice(["debug", "info", "warning", "error", "critical", "fatal"], case_sensitive=False),
     help="Log level (default: DEBUG)",
 )
 def shell(code, level):
@@ -178,9 +175,7 @@ def extract():
     pass
-@extract.command(
-    help=f"Perform a GET request and save the content to a file.\n\n{__OUTPUT_FILE_HELP__}"
-)
+@extract.command(help=f"Perform a GET request and save the content to a file.\n\n{__OUTPUT_FILE_HELP__}")
 @argument("url", required=True)
 @argument("output_file", required=True)
 @option(
@@ -190,9 +185,7 @@ def extract():
     help='HTTP headers in format "Key: Value" (can be used multiple times)',
 )
 @option("--cookies", help='Cookies string in format "name1=value1; name2=value2"')
-@option(
-    "--timeout", type=int, default=30, help="Request timeout in seconds (default: 30)"
-)
+@option("--timeout", type=int, default=30, help="Request timeout in seconds (default: 30)")
 @option("--proxy", help='Proxy URL in format "http://username:password@host:port"')
 @option(
     "--css-selector",
@@ -264,12 +257,12 @@ def get(
         impersonate=impersonate,
         proxy=proxy,
     )
+    from scrapling.fetchers import Fetcher
     __Request_and_Save(Fetcher.get, url, output_file, css_selector, **kwargs)
-@extract.command(
-    help=f"Perform a POST request and save the content to a file.\n\n{__OUTPUT_FILE_HELP__}"
-)
+@extract.command(help=f"Perform a POST request and save the content to a file.\n\n{__OUTPUT_FILE_HELP__}")
 @argument("url", required=True)
 @argument("output_file", required=True)
 @option(
@@ -285,9 +278,7 @@ def get(
     help='HTTP headers in format "Key: Value" (can be used multiple times)',
 )
 @option("--cookies", help='Cookies string in format "name1=value1; name2=value2"')
-@option(
-    "--timeout", type=int, default=30, help="Request timeout in seconds (default: 30)"
-)
+@option("--timeout", type=int, default=30, help="Request timeout in seconds (default: 30)")
 @option("--proxy", help='Proxy URL in format "http://username:password@host:port"')
 @option(
     "--css-selector",
@@ -364,12 +355,12 @@ def post(
         proxy=proxy,
         data=data,
     )
+    from scrapling.fetchers import Fetcher
     __Request_and_Save(Fetcher.post, url, output_file, css_selector, **kwargs)
-@extract.command(
-    help=f"Perform a PUT request and save the content to a file.\n\n{__OUTPUT_FILE_HELP__}"
-)
+@extract.command(help=f"Perform a PUT request and save the content to a file.\n\n{__OUTPUT_FILE_HELP__}")
 @argument("url", required=True)
 @argument("output_file", required=True)
 @option("--data", "-d", help="Form data to include in the request body")
@@ -381,9 +372,7 @@ def post(
     help='HTTP headers in format "Key: Value" (can be used multiple times)',
 )
 @option("--cookies", help='Cookies string in format "name1=value1; name2=value2"')
-@option(
-    "--timeout", type=int, default=30, help="Request timeout in seconds (default: 30)"
-)
+@option("--timeout", type=int, default=30, help="Request timeout in seconds (default: 30)")
 @option("--proxy", help='Proxy URL in format "http://username:password@host:port"')
 @option(
     "--css-selector",
@@ -460,12 +449,12 @@ def put(
         proxy=proxy,
         data=data,
     )
+    from scrapling.fetchers import Fetcher
     __Request_and_Save(Fetcher.put, url, output_file, css_selector, **kwargs)
-@extract.command(
-    help=f"Perform a DELETE request and save the content to a file.\n\n{__OUTPUT_FILE_HELP__}"
-)
+@extract.command(help=f"Perform a DELETE request and save the content to a file.\n\n{__OUTPUT_FILE_HELP__}")
 @argument("url", required=True)
 @argument("output_file", required=True)
 @option(
@@ -475,9 +464,7 @@ def put(
     help='HTTP headers in format "Key: Value" (can be used multiple times)',
 )
 @option("--cookies", help='Cookies string in format "name1=value1; name2=value2"')
-@option(
-    "--timeout", type=int, default=30, help="Request timeout in seconds (default: 30)"
-)
+@option("--timeout", type=int, default=30, help="Request timeout in seconds (default: 30)")
 @option("--proxy", help='Proxy URL in format "http://username:password@host:port"')
 @option(
     "--css-selector",
@@ -549,12 +536,12 @@ def delete(
         impersonate=impersonate,
         proxy=proxy,
     )
+    from scrapling.fetchers import Fetcher
     __Request_and_Save(Fetcher.delete, url, output_file, css_selector, **kwargs)
-@extract.command(
-    help=f"Use DynamicFetcher to fetch content with browser automation.\n\n{__OUTPUT_FILE_HELP__}"
-)
+@extract.command(help=f"Use DynamicFetcher to fetch content with browser automation.\n\n{__OUTPUT_FILE_HELP__}")
 @argument("url", required=True)
 @argument("output_file", required=True)
 @option(
@@ -591,9 +578,7 @@ def delete(
 )
 @option("--wait-selector", help="CSS selector to wait for before proceeding")
 @option("--locale", default="en-US", help="Browser locale (default: en-US)")
-@option(
-    "--stealth/--no-stealth", default=False, help="Enable stealth mode (default: False)"
-)
+@option("--stealth/--no-stealth", default=False, help="Enable stealth mode (default: False)")
 @option(
     "--hide-canvas/--show-canvas",
     default=False,
@@ -672,12 +657,12 @@ def fetch(
     if parsed_headers:
         kwargs["extra_headers"] = parsed_headers
+    from scrapling.fetchers import DynamicFetcher
     __Request_and_Save(DynamicFetcher.fetch, url, output_file, css_selector, **kwargs)
-@extract.command(
-    help=f"Use StealthyFetcher to fetch content with advanced stealth features.\n\n{__OUTPUT_FILE_HELP__}"
-)
+@extract.command(help=f"Use StealthyFetcher to fetch content with advanced stealth features.\n\n{__OUTPUT_FILE_HELP__}")
 @argument("url", required=True)
 @argument("output_file", required=True)
 @option(
@@ -821,6 +806,8 @@ def stealthy_fetch(
     if parsed_headers:
         kwargs["extra_headers"] = parsed_headers
+    from scrapling.fetchers import StealthyFetcher
     __Request_and_Save(StealthyFetcher.fetch, url, output_file, css_selector, **kwargs)

{scrapling-0.3 → scrapling-0.3.2}/scrapling/core/_html_utils.py RENAMED Viewed

@@ -269,17 +269,13 @@ name2codepoint = {
 }
-def to_unicode(
-    text: StrOrBytes, encoding: Optional[str] = None, errors: str = "strict"
-) -> str:
+def to_unicode(text: StrOrBytes, encoding: Optional[str] = None, errors: str = "strict") -> str:
     """Return the Unicode representation of a bytes object `text`. If `text`
     is already a Unicode object, return it as-is."""
     if isinstance(text, str):
         return text
     if not isinstance(text, (bytes, str)):
-        raise TypeError(
-            f"to_unicode must receive bytes or str, got {type(text).__name__}"
-        )
+        raise TypeError(f"to_unicode must receive bytes or str, got {type(text).__name__}")
     if encoding is None:
         encoding = "utf-8"
     return text.decode(encoding, errors)
@@ -328,9 +324,7 @@ def _replace_entities(
             entity_name = groups["named"]
             if entity_name.lower() in keep:
                 return m.group(0)
-            number = name2codepoint.get(entity_name) or name2codepoint.get(
-                entity_name.lower()
-            )
+            number = name2codepoint.get(entity_name) or name2codepoint.get(entity_name.lower())
         if number is not None:
             # Browsers typically
             # interpret numeric character references in the 80-9F range as representing the characters mapped

scrapling 0.3__tar.gz → 0.3.2__tar.gz

scrapling 0.3tar.gz → 0.3.2tar.gz