scrapingbee-cli 1.4.0__tar.gz → 1.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scrapingbee_cli-1.4.0/src/scrapingbee_cli.egg-info → scrapingbee_cli-1.4.1}/PKG-INFO +3 -1
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/pyproject.toml +3 -1
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/__init__.py +2 -2
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/crawl.py +7 -1
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1/src/scrapingbee_cli.egg-info}/PKG-INFO +3 -1
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/LICENSE +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/README.md +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/setup.cfg +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/audit.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/batch.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/cli.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/cli_utils.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/client.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/__init__.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/amazon.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/auth.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/chatgpt.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/crawl.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/export.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/fast_search.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/google.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/schedule.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/scrape.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/tutorial.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/unsafe.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/usage.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/walmart.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/youtube.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/config.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/credits.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/exec_gate.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/extract.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/tutorial/__init__.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/tutorial/runner.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/tutorial/steps.py +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/SOURCES.txt +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/dependency_links.txt +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/entry_points.txt +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/requires.txt +0 -0
- {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/top_level.txt +0 -0
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scrapingbee-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.1
|
|
4
4
|
Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
|
|
5
5
|
Author: ScrapingBee
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://www.scrapingbee.com/
|
|
8
8
|
Project-URL: Documentation, https://www.scrapingbee.com/documentation/
|
|
9
9
|
Project-URL: Repository, https://github.com/ScrapingBee/scrapingbee-cli
|
|
10
|
+
Project-URL: Changelog, https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md
|
|
11
|
+
Project-URL: Issues, https://github.com/ScrapingBee/scrapingbee-cli/issues
|
|
10
12
|
Keywords: scrapingbee,scraping,crawl,scrapy,batch,google-search,amazon,walmart,youtube,chatgpt,cli,api
|
|
11
13
|
Classifier: Development Status :: 4 - Beta
|
|
12
14
|
Classifier: Environment :: Console
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "scrapingbee-cli"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.1"
|
|
8
8
|
description = "Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -48,6 +48,8 @@ dependencies = [
|
|
|
48
48
|
Homepage = "https://www.scrapingbee.com/"
|
|
49
49
|
Documentation = "https://www.scrapingbee.com/documentation/"
|
|
50
50
|
Repository = "https://github.com/ScrapingBee/scrapingbee-cli"
|
|
51
|
+
Changelog = "https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md"
|
|
52
|
+
Issues = "https://github.com/ScrapingBee/scrapingbee-cli/issues"
|
|
51
53
|
|
|
52
54
|
[project.optional-dependencies]
|
|
53
55
|
dev = [
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import platform
|
|
4
4
|
import sys
|
|
5
5
|
|
|
6
|
-
__version__ = "1.4.
|
|
6
|
+
__version__ = "1.4.1"
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def user_agent_headers() -> dict[str, str]:
|
|
@@ -12,7 +12,7 @@ def user_agent_headers() -> dict[str, str]:
|
|
|
12
12
|
Returns a dict of headers:
|
|
13
13
|
User-Agent: ScrapingBee/CLI
|
|
14
14
|
User-Agent-Client: scrapingbee-cli
|
|
15
|
-
User-Agent-Client-Version: 1.4.
|
|
15
|
+
User-Agent-Client-Version: 1.4.1
|
|
16
16
|
User-Agent-Environment: python
|
|
17
17
|
User-Agent-Environment-Version: 3.14.2
|
|
18
18
|
User-Agent-OS: Darwin arm64
|
|
@@ -90,7 +90,8 @@ def _params_for_discovery(params: dict[str, Any]) -> dict[str, Any]:
|
|
|
90
90
|
def _preferred_extension_from_scrape_params(params: dict[str, Any]) -> str | None:
|
|
91
91
|
"""Return extension when scrape params force a response type (skip detection).
|
|
92
92
|
Priority: screenshot+json_response -> json; screenshot -> png;
|
|
93
|
-
return_page_markdown -> md; return_page_text -> txt;
|
|
93
|
+
return_page_markdown -> md; return_page_text -> txt;
|
|
94
|
+
json_response / extract_rules / ai_extract_rules / ai_query -> json.
|
|
94
95
|
"""
|
|
95
96
|
if _param_truthy(params, "screenshot") and _param_truthy(params, "json_response"):
|
|
96
97
|
return "json"
|
|
@@ -102,6 +103,11 @@ def _preferred_extension_from_scrape_params(params: dict[str, Any]) -> str | Non
|
|
|
102
103
|
return "txt"
|
|
103
104
|
if _param_truthy(params, "json_response"):
|
|
104
105
|
return "json"
|
|
106
|
+
# extract_rules, ai_extract_rules, ai_query always return JSON regardless of URL.
|
|
107
|
+
# Without this, URLs ending in .html would be saved as .html despite JSON body
|
|
108
|
+
# (the URL-path heuristic in extension_for_crawl wins before body sniff).
|
|
109
|
+
if params.get("extract_rules") or params.get("ai_extract_rules") or params.get("ai_query"):
|
|
110
|
+
return "json"
|
|
105
111
|
return None
|
|
106
112
|
|
|
107
113
|
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scrapingbee-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.1
|
|
4
4
|
Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
|
|
5
5
|
Author: ScrapingBee
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://www.scrapingbee.com/
|
|
8
8
|
Project-URL: Documentation, https://www.scrapingbee.com/documentation/
|
|
9
9
|
Project-URL: Repository, https://github.com/ScrapingBee/scrapingbee-cli
|
|
10
|
+
Project-URL: Changelog, https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md
|
|
11
|
+
Project-URL: Issues, https://github.com/ScrapingBee/scrapingbee-cli/issues
|
|
10
12
|
Keywords: scrapingbee,scraping,crawl,scrapy,batch,google-search,amazon,walmart,youtube,chatgpt,cli,api
|
|
11
13
|
Classifier: Development Status :: 4 - Beta
|
|
12
14
|
Classifier: Environment :: Console
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|