PyPI - scrapingbee-cli - Versions diffs - 1.4.0__tar.gz → 1.4.2__tar.gz - Mend

scrapingbee-cli 1.4.0tar.gz → 1.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{scrapingbee_cli-1.4.0/src/scrapingbee_cli.egg-info → scrapingbee_cli-1.4.2}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,14 @@
 Metadata-Version: 2.4
 Name: scrapingbee-cli
-Version: 1.4.0
+Version: 1.4.2
 Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
 Author: ScrapingBee
 License-Expression: MIT
 Project-URL: Homepage, https://www.scrapingbee.com/
 Project-URL: Documentation, https://www.scrapingbee.com/documentation/
 Project-URL: Repository, https://github.com/ScrapingBee/scrapingbee-cli
+Project-URL: Changelog, https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md
+Project-URL: Issues, https://github.com/ScrapingBee/scrapingbee-cli/issues
 Keywords: scrapingbee,scraping,crawl,scrapy,batch,google-search,amazon,walmart,youtube,chatgpt,cli,api
 Classifier: Development Status :: 4 - Beta
 Classifier: Environment :: Console

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "scrapingbee-cli"
-version = "1.4.0"
+version = "1.4.2"
 description = "Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal."
 readme = "README.md"
 license = "MIT"
@@ -48,6 +48,8 @@ dependencies = [
 Homepage = "https://www.scrapingbee.com/"
 Documentation = "https://www.scrapingbee.com/documentation/"
 Repository = "https://github.com/ScrapingBee/scrapingbee-cli"
+Changelog = "https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md"
+Issues = "https://github.com/ScrapingBee/scrapingbee-cli/issues"
 [project.optional-dependencies]
 dev = [

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/__init__.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import platform
 import sys
-__version__ = "1.4.0"
+__version__ = "1.4.2"
 def user_agent_headers() -> dict[str, str]:
@@ -12,7 +12,7 @@ def user_agent_headers() -> dict[str, str]:
     Returns a dict of headers:
         User-Agent: ScrapingBee/CLI
         User-Agent-Client: scrapingbee-cli
-        User-Agent-Client-Version: 1.4.0
+        User-Agent-Client-Version: 1.4.2
         User-Agent-Environment: python
         User-Agent-Environment-Version: 3.14.2
         User-Agent-OS: Darwin arm64

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/cli_utils.py RENAMED Viewed

@@ -1304,6 +1304,7 @@ def build_scrape_kwargs(
     device: str | None = None,
     custom_google: str | None = None,
     transparent_status_code: str | None = None,
+    tag: str | None = None,
     body: str | None = None,
     scraping_config: str | None = None,
 ) -> dict[str, Any]:
@@ -1344,6 +1345,7 @@ def build_scrape_kwargs(
         "device": device,
         "custom_google": parse_bool(custom_google),
         "transparent_status_code": parse_bool(transparent_status_code),
+        "tag": tag,
         "body": body,
         "scraping_config": scraping_config,
     }
@@ -1564,6 +1566,7 @@ def write_output(
             ("spb-cost", "Credit Cost"),
             ("spb-resolved-url", "Resolved URL"),
             ("spb-initial-status-code", "Initial Status Code"),
+            ("tag", "Tag"),
         ]:
             if key in headers_lower:
                 _, val = headers_lower[key]

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/client.py RENAMED Viewed

@@ -176,6 +176,7 @@ class Client:
         device: str | None = None,
         custom_google: bool | None = None,
         transparent_status_code: bool | None = None,
+        tag: str | None = None,
         body: str | None = None,
         scraping_config: str | None = None,
         retries: int = 3,
@@ -218,6 +219,7 @@ class Client:
             ("device", device),
             ("custom_google", self._bool(custom_google)),
             ("transparent_status_code", self._bool(transparent_status_code)),
+            ("tag", tag),
             ("scraping_config", scraping_config),
         ]:
             if v is not None:
@@ -290,6 +292,8 @@ class Client:
         extra_params: str | None = None,
         add_html: bool | None = None,
         light_request: bool | None = None,
+        tag: str | None = None,
+        date_range: str | None = None,
         retries: int = 3,
         backoff: float = 2.0,
     ) -> tuple[bytes, dict, int]:
@@ -304,6 +308,8 @@ class Client:
             "extra_params": extra_params,
             "add_html": self._bool(add_html),
             "light_request": self._bool(light_request),
+            "tag": tag,
+            "date_range": date_range,
         }
         return await self._get_with_retry(
             "/google",
@@ -318,6 +324,7 @@ class Client:
         page: int | None = None,
         country_code: str | None = None,
         language: str | None = None,
+        tag: str | None = None,
         retries: int = 3,
         backoff: float = 2.0,
     ) -> tuple[bytes, dict, int]:
@@ -326,6 +333,7 @@ class Client:
             "page": page if page is not None else None,
             "country_code": country_code,
             "language": language,
+            "tag": tag,
         }
         return await self._get_with_retry(
             "/fast_search",
@@ -346,6 +354,7 @@ class Client:
         add_html: bool | None = None,
         light_request: bool | None = None,
         screenshot: bool | None = None,
+        tag: str | None = None,
         retries: int = 3,
         backoff: float = 2.0,
     ) -> tuple[bytes, dict, int]:
@@ -360,6 +369,7 @@ class Client:
             "add_html": self._bool(add_html),
             "light_request": self._bool(light_request),
             "screenshot": self._bool(screenshot),
+            "tag": tag,
         }
         return await self._get_with_retry(
             "/amazon/product",
@@ -386,6 +396,7 @@ class Client:
         add_html: bool | None = None,
         light_request: bool | None = None,
         screenshot: bool | None = None,
+        tag: str | None = None,
         retries: int = 3,
         backoff: float = 2.0,
     ) -> tuple[bytes, dict, int]:
@@ -406,6 +417,7 @@ class Client:
             "add_html": self._bool(add_html),
             "light_request": self._bool(light_request),
             "screenshot": self._bool(screenshot),
+            "tag": tag,
         }
         return await self._get_with_retry(
             "/amazon/search",
@@ -430,6 +442,7 @@ class Client:
         add_html: bool | None = None,
         light_request: bool | None = None,
         screenshot: bool | None = None,
+        tag: str | None = None,
         retries: int = 3,
         backoff: float = 2.0,
     ) -> tuple[bytes, dict, int]:
@@ -448,6 +461,7 @@ class Client:
             "add_html": self._bool(add_html),
             "light_request": self._bool(light_request),
             "screenshot": self._bool(screenshot),
+            "tag": tag,
         }
         return await self._get_with_retry(
             "/walmart/search",
@@ -466,6 +480,7 @@ class Client:
         add_html: bool | None = None,
         light_request: bool | None = None,
         screenshot: bool | None = None,
+        tag: str | None = None,
         retries: int = 3,
         backoff: float = 2.0,
     ) -> tuple[bytes, dict, int]:
@@ -478,6 +493,7 @@ class Client:
             "add_html": self._bool(add_html),
             "light_request": self._bool(light_request),
             "screenshot": self._bool(screenshot),
+            "tag": tag,
         }
         return await self._get_with_retry(
             "/walmart/product",
@@ -504,6 +520,7 @@ class Client:
         location: bool | None = None,
         vr180: bool | None = None,
         purchased: bool | None = None,
+        tag: str | None = None,
         retries: int = 3,
         backoff: float = 2.0,
     ) -> tuple[bytes, dict, int]:
@@ -524,6 +541,7 @@ class Client:
             "location": self._bool(location),
             "vr180": self._bool(vr180),
             "purchased": self._bool(purchased),
+            "tag": tag,
         }
         return await self._get_with_retry(
             "/youtube/search",
@@ -535,12 +553,13 @@ class Client:
     async def youtube_metadata(
         self,
         video_id: str,
+        tag: str | None = None,
         retries: int = 3,
         backoff: float = 2.0,
     ) -> tuple[bytes, dict, int]:
         return await self._get_with_retry(
             "/youtube/metadata",
-            {"video_id": video_id},
+            {"video_id": video_id, "tag": tag},
             retries=retries,
             backoff=backoff,
         )
@@ -551,6 +570,7 @@ class Client:
         search: bool | None = None,
         add_html: bool | None = None,
         country_code: str | None = None,
+        tag: str | None = None,
         retries: int = 3,
         backoff: float = 2.0,
     ) -> tuple[bytes, dict, int]:
@@ -561,6 +581,8 @@ class Client:
             params["add_html"] = str(add_html).lower()
         if country_code is not None:
             params["country_code"] = country_code
+        if tag is not None:
+            params["tag"] = tag
         return await self._get_with_retry(
             "/chatgpt",
             params,

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/amazon.py RENAMED Viewed

@@ -66,6 +66,12 @@ AMAZON_SORT_BY = [
 )
 @optgroup.option("--light-request", type=str, default=None, help="Light request mode (true/false).")
 @optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
+@optgroup.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @_batch_options
 @click.pass_obj
 def amazon_product_cmd(
@@ -80,6 +86,7 @@ def amazon_product_cmd(
     add_html: str | None,
     light_request: str | None,
     screenshot: str | None,
+    tag: str | None,
     **kwargs,
 ) -> None:
     """Fetch Amazon product details by ASIN."""
@@ -125,6 +132,7 @@ def amazon_product_cmd(
                 add_html=parse_bool(add_html),
                 light_request=parse_bool(light_request),
                 screenshot=parse_bool(screenshot),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -167,6 +175,7 @@ def amazon_product_cmd(
                 add_html=parse_bool(add_html),
                 light_request=parse_bool(light_request),
                 screenshot=parse_bool(screenshot),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -224,6 +233,12 @@ def amazon_product_cmd(
 @optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
 @optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
 @optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
+@optgroup.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @_batch_options
 @click.pass_obj
 def amazon_search_cmd(
@@ -244,6 +259,7 @@ def amazon_search_cmd(
     add_html: str | None,
     light_request: str | None,
     screenshot: str | None,
+    tag: str | None,
     **kwargs,
 ) -> None:
     """Search Amazon products."""
@@ -297,6 +313,7 @@ def amazon_search_cmd(
                 add_html=parse_bool(add_html),
                 light_request=parse_bool(light_request),
                 screenshot=parse_bool(screenshot),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -345,6 +362,7 @@ def amazon_search_cmd(
                 add_html=parse_bool(add_html),
                 light_request=parse_bool(light_request),
                 screenshot=parse_bool(screenshot),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/chatgpt.py RENAMED Viewed

@@ -46,6 +46,12 @@ from ..config import BASE_URL, get_api_key
     default=None,
     help="Country code for geolocation (ISO 3166-1).",
 )
+@click.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @_batch_options  # must be after command-specific options
 @click.pass_obj
 def chatgpt_cmd(
@@ -54,6 +60,7 @@ def chatgpt_cmd(
     search: str | None,
     add_html: str | None,
     country_code: str | None,
+    tag: str | None,
     **kwargs,
 ) -> None:
     """Send a prompt to the ChatGPT API."""
@@ -93,6 +100,7 @@ def chatgpt_cmd(
                 search=parse_bool(search),
                 add_html=parse_bool(add_html),
                 country_code=country_code,
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -131,6 +139,7 @@ def chatgpt_cmd(
                 search=parse_bool(search),
                 add_html=parse_bool(add_html),
                 country_code=country_code,
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/crawl.py RENAMED Viewed

@@ -60,6 +60,7 @@ def _crawl_build_params(
     device: str | None,
     custom_google: str | None,
     transparent_status_code: str | None,
+    tag: str | None = None,
     scraping_config: str | None = None,
 ) -> dict[str, str]:
     """Build ScrapingBee API params dict from crawl options (quick-crawl URL mode)."""
@@ -98,6 +99,7 @@ def _crawl_build_params(
         device=device,
         custom_google=custom_google,
         transparent_status_code=transparent_status_code,
+        tag=tag,
         body=None,
         scraping_config=scraping_config,
     )
@@ -248,6 +250,12 @@ def _crawl_build_params(
     default=None,
     help="Return target status as-is (true/false).",
 )
+@optgroup.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @optgroup.group("Crawl", help="Quick-crawl: depth, pages, output, throttling")
 @optgroup.option(
     "--max-depth",
@@ -372,6 +380,7 @@ def crawl_cmd(
     device: str | None,
     custom_google: str | None,
     transparent_status_code: str | None,
+    tag: str | None,
     max_depth: int,
     max_pages: int,
     allowed_domains: str | None,
@@ -500,6 +509,7 @@ def crawl_cmd(
                 device=device,
                 custom_google=custom_google,
                 transparent_status_code=transparent_status_code,
+                tag=tag,
                 scraping_config=scraping_config,
             )
         except ValueError as e:
@@ -602,6 +612,7 @@ def crawl_cmd(
             "--device": device,
             "--custom-google": custom_google,
             "--transparent-status-code": transparent_status_code,
+            "--tag": tag,
         }
         used = [flag for flag, val in api_flags.items() if val is not None]
         if headers:

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/fast_search.py RENAMED Viewed

@@ -38,6 +38,12 @@ from ..config import BASE_URL, get_api_key
     help="Country code for results (ISO 3166-1, e.g. us, fr).",
 )
 @optgroup.option("--language", type=str, default=None, help="Language code (e.g. en, fr).")
+@optgroup.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @_batch_options
 @click.pass_obj
 def fast_search_cmd(
@@ -46,6 +52,7 @@ def fast_search_cmd(
     page: int | None,
     country_code: str | None,
     language: str | None,
+    tag: str | None,
     **kwargs,
 ) -> None:
     """Search using the Fast Search API (sub-second results)."""
@@ -86,6 +93,7 @@ def fast_search_cmd(
                 page=page,
                 country_code=country_code,
                 language=language,
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -122,6 +130,7 @@ def fast_search_cmd(
                 page=page,
                 country_code=country_code,
                 language=language,
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/google.py RENAMED Viewed

@@ -83,6 +83,15 @@ def _warn_empty_organic(data: bytes, search_type: str | None) -> None:
     default=None,
     help="Language code for results (e.g. en, fr, de). Default: en.",
 )
+@optgroup.option(
+    "--date-range",
+    type=NormalizedChoice(
+        ["past-hour", "past-day", "past-week", "past-month", "past-year"],
+        case_sensitive=False,
+    ),
+    default=None,
+    help="Restrict results to the past hour/day/week/month/year.",
+)
 @optgroup.group("Filters", help="Autocorrection, extra params, and response format")
 @optgroup.option("--nfpr", type=str, default=None, help="Disable autocorrection (true/false).")
 @optgroup.option(
@@ -97,6 +106,12 @@ def _warn_empty_organic(data: bytes, search_type: str | None) -> None:
     default=None,
     help="Light request mode, 10 credits (true/false). Fewer data than regular.",
 )
+@optgroup.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @_batch_options
 @click.pass_obj
 def google_cmd(
@@ -111,6 +126,8 @@ def google_cmd(
     extra_params: str | None,
     add_html: str | None,
     light_request: str | None,
+    tag: str | None,
+    date_range: str | None,
     **kwargs,
 ) -> None:
     """Search Google using the Google Search API."""
@@ -157,6 +174,8 @@ def google_cmd(
                 extra_params=extra_params,
                 add_html=parse_bool(add_html),
                 light_request=parse_bool(light_request),
+                tag=tag,
+                date_range=norm_val(date_range),
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -199,6 +218,8 @@ def google_cmd(
                 extra_params=extra_params,
                 add_html=parse_bool(add_html),
                 light_request=parse_bool(light_request),
+                tag=tag,
+                date_range=norm_val(date_range),
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/scrape.py RENAMED Viewed

@@ -293,6 +293,12 @@ SCRAPE_PRESETS = (
     default=None,
     help="Return target status/body as-is (true/false). No retry on 500.",
 )
+@optgroup.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @optgroup.option(
     "-X",
     "--method",
@@ -351,6 +357,7 @@ def scrape_cmd(
     device: str | None,
     custom_google: str | None,
     transparent_status_code: str | None,
+    tag: str | None,
     method: str,
     body: str | None,
     escalate_proxy: bool,
@@ -476,6 +483,7 @@ def scrape_cmd(
             device=device,
             custom_google=custom_google,
             transparent_status_code=transparent_status_code,
+            tag=tag,
             body=body,
             scraping_config=scraping_config,
         )

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/walmart.py RENAMED Viewed

@@ -72,6 +72,12 @@ WALMART_SORT_BY = ["best-match", "price-low", "price-high", "best-seller"]
 @optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
 @optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
 @optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
+@optgroup.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @_batch_options
 @click.pass_obj
 def walmart_search_cmd(
@@ -90,6 +96,7 @@ def walmart_search_cmd(
     add_html: str | None,
     light_request: str | None,
     screenshot: str | None,
+    tag: str | None,
     **kwargs,
 ) -> None:
     """Search Walmart products."""
@@ -141,6 +148,7 @@ def walmart_search_cmd(
                 add_html=parse_bool(add_html),
                 light_request=parse_bool(light_request),
                 screenshot=parse_bool(screenshot),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -187,6 +195,7 @@ def walmart_search_cmd(
                 add_html=parse_bool(add_html),
                 light_request=parse_bool(light_request),
                 screenshot=parse_bool(screenshot),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -225,6 +234,12 @@ def walmart_search_cmd(
 @optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
 @optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
 @optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
+@optgroup.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @_batch_options
 @click.pass_obj
 def walmart_product_cmd(
@@ -237,6 +252,7 @@ def walmart_product_cmd(
     add_html: str | None,
     light_request: str | None,
     screenshot: str | None,
+    tag: str | None,
     **kwargs,
 ) -> None:
     """Fetch Walmart product details by product ID."""
@@ -280,6 +296,7 @@ def walmart_product_cmd(
                 add_html=parse_bool(add_html),
                 light_request=parse_bool(light_request),
                 screenshot=parse_bool(screenshot),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -320,6 +337,7 @@ def walmart_product_cmd(
                 add_html=parse_bool(add_html),
                 light_request=parse_bool(light_request),
                 screenshot=parse_bool(screenshot),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/youtube.py RENAMED Viewed

@@ -155,6 +155,12 @@ YOUTUBE_SORT_BY = ["relevance", "rating", "view-count", "upload-date"]
 @optgroup.option("--location", type=str, default=None, help="With location (true/false).")
 @optgroup.option("--vr180", type=str, default=None, help="VR180 only (true/false).")
 @optgroup.option("--purchased", type=str, default=None, help="Purchased only (true/false).")
+@optgroup.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @_batch_options
 @click.pass_obj
 def youtube_search_cmd(
@@ -175,6 +181,7 @@ def youtube_search_cmd(
     location: str | None,
     vr180: str | None,
     purchased: str | None,
+    tag: str | None,
     **kwargs,
 ) -> None:
     """Search YouTube videos."""
@@ -227,6 +234,7 @@ def youtube_search_cmd(
                 location=parse_bool(location),
                 vr180=parse_bool(vr180),
                 purchased=parse_bool(purchased),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -276,6 +284,7 @@ def youtube_search_cmd(
                 location=parse_bool(location),
                 vr180=parse_bool(vr180),
                 purchased=parse_bool(purchased),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -299,11 +308,18 @@ def youtube_search_cmd(
 @click.command("youtube-metadata")
 @click.argument("video_id", required=False)
+@click.option(
+    "--tag",
+    type=str,
+    default=None,
+    help="Optional label included in API response headers.",
+)
 @_batch_options
 @click.pass_obj
 def youtube_metadata_cmd(
     obj: dict,
     video_id: str | None,
+    tag: str | None,
     **kwargs,
 ) -> None:
     """Fetch YouTube video metadata."""
@@ -340,6 +356,7 @@ def youtube_metadata_cmd(
         async def api_call(client, vid):
             return await client.youtube_metadata(
                 _extract_video_id(vid),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )
@@ -373,6 +390,7 @@ def youtube_metadata_cmd(
         async with Client(key, BASE_URL) as client:
             data, headers, status_code = await client.youtube_metadata(
                 _extract_video_id(video_id),
+                tag=tag,
                 retries=int(obj.get("retries") or 3),
                 backoff=float(obj.get("backoff") or 2.0),
             )

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/crawl.py RENAMED Viewed

@@ -90,7 +90,8 @@ def _params_for_discovery(params: dict[str, Any]) -> dict[str, Any]:
 def _preferred_extension_from_scrape_params(params: dict[str, Any]) -> str | None:
     """Return extension when scrape params force a response type (skip detection).
     Priority: screenshot+json_response -> json; screenshot -> png;
-    return_page_markdown -> md; return_page_text -> txt; json_response -> json.
+    return_page_markdown -> md; return_page_text -> txt;
+    json_response / extract_rules / ai_extract_rules / ai_query -> json.
     """
     if _param_truthy(params, "screenshot") and _param_truthy(params, "json_response"):
         return "json"
@@ -102,6 +103,11 @@ def _preferred_extension_from_scrape_params(params: dict[str, Any]) -> str | Non
         return "txt"
     if _param_truthy(params, "json_response"):
         return "json"
+    # extract_rules, ai_extract_rules, ai_query always return JSON regardless of URL.
+    # Without this, URLs ending in .html would be saved as .html despite JSON body
+    # (the URL-path heuristic in extension_for_crawl wins before body sniff).
+    if params.get("extract_rules") or params.get("ai_extract_rules") or params.get("ai_query"):
+        return "json"
     return None

{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2/src/scrapingbee_cli.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,14 @@
 Metadata-Version: 2.4
 Name: scrapingbee-cli
-Version: 1.4.0
+Version: 1.4.2
 Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
 Author: ScrapingBee
 License-Expression: MIT
 Project-URL: Homepage, https://www.scrapingbee.com/
 Project-URL: Documentation, https://www.scrapingbee.com/documentation/
 Project-URL: Repository, https://github.com/ScrapingBee/scrapingbee-cli
+Project-URL: Changelog, https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md
+Project-URL: Issues, https://github.com/ScrapingBee/scrapingbee-cli/issues
 Keywords: scrapingbee,scraping,crawl,scrapy,batch,google-search,amazon,walmart,youtube,chatgpt,cli,api
 Classifier: Development Status :: 4 - Beta
 Classifier: Environment :: Console