scrapingbee-cli 1.4.0__tar.gz → 1.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {scrapingbee_cli-1.4.0/src/scrapingbee_cli.egg-info → scrapingbee_cli-1.4.1}/PKG-INFO +3 -1
  2. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/pyproject.toml +3 -1
  3. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/__init__.py +2 -2
  4. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/crawl.py +7 -1
  5. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1/src/scrapingbee_cli.egg-info}/PKG-INFO +3 -1
  6. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/LICENSE +0 -0
  7. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/README.md +0 -0
  8. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/setup.cfg +0 -0
  9. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/audit.py +0 -0
  10. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/batch.py +0 -0
  11. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/cli.py +0 -0
  12. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/cli_utils.py +0 -0
  13. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/client.py +0 -0
  14. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/__init__.py +0 -0
  15. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/amazon.py +0 -0
  16. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/auth.py +0 -0
  17. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/chatgpt.py +0 -0
  18. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/crawl.py +0 -0
  19. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/export.py +0 -0
  20. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/fast_search.py +0 -0
  21. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/google.py +0 -0
  22. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/schedule.py +0 -0
  23. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/scrape.py +0 -0
  24. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/tutorial.py +0 -0
  25. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/unsafe.py +0 -0
  26. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/usage.py +0 -0
  27. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/walmart.py +0 -0
  28. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/commands/youtube.py +0 -0
  29. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/config.py +0 -0
  30. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/credits.py +0 -0
  31. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/exec_gate.py +0 -0
  32. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/extract.py +0 -0
  33. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/tutorial/__init__.py +0 -0
  34. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/tutorial/runner.py +0 -0
  35. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli/tutorial/steps.py +0 -0
  36. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/SOURCES.txt +0 -0
  37. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/dependency_links.txt +0 -0
  38. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/entry_points.txt +0 -0
  39. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/requires.txt +0 -0
  40. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.1}/src/scrapingbee_cli.egg-info/top_level.txt +0 -0
@@ -1,12 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrapingbee-cli
3
- Version: 1.4.0
3
+ Version: 1.4.1
4
4
  Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
5
5
  Author: ScrapingBee
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://www.scrapingbee.com/
8
8
  Project-URL: Documentation, https://www.scrapingbee.com/documentation/
9
9
  Project-URL: Repository, https://github.com/ScrapingBee/scrapingbee-cli
10
+ Project-URL: Changelog, https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md
11
+ Project-URL: Issues, https://github.com/ScrapingBee/scrapingbee-cli/issues
10
12
  Keywords: scrapingbee,scraping,crawl,scrapy,batch,google-search,amazon,walmart,youtube,chatgpt,cli,api
11
13
  Classifier: Development Status :: 4 - Beta
12
14
  Classifier: Environment :: Console
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scrapingbee-cli"
7
- version = "1.4.0"
7
+ version = "1.4.1"
8
8
  description = "Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -48,6 +48,8 @@ dependencies = [
48
48
  Homepage = "https://www.scrapingbee.com/"
49
49
  Documentation = "https://www.scrapingbee.com/documentation/"
50
50
  Repository = "https://github.com/ScrapingBee/scrapingbee-cli"
51
+ Changelog = "https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md"
52
+ Issues = "https://github.com/ScrapingBee/scrapingbee-cli/issues"
51
53
 
52
54
  [project.optional-dependencies]
53
55
  dev = [
@@ -3,7 +3,7 @@
3
3
  import platform
4
4
  import sys
5
5
 
6
- __version__ = "1.4.0"
6
+ __version__ = "1.4.1"
7
7
 
8
8
 
9
9
  def user_agent_headers() -> dict[str, str]:
@@ -12,7 +12,7 @@ def user_agent_headers() -> dict[str, str]:
12
12
  Returns a dict of headers:
13
13
  User-Agent: ScrapingBee/CLI
14
14
  User-Agent-Client: scrapingbee-cli
15
- User-Agent-Client-Version: 1.4.0
15
+ User-Agent-Client-Version: 1.4.1
16
16
  User-Agent-Environment: python
17
17
  User-Agent-Environment-Version: 3.14.2
18
18
  User-Agent-OS: Darwin arm64
@@ -90,7 +90,8 @@ def _params_for_discovery(params: dict[str, Any]) -> dict[str, Any]:
90
90
  def _preferred_extension_from_scrape_params(params: dict[str, Any]) -> str | None:
91
91
  """Return extension when scrape params force a response type (skip detection).
92
92
  Priority: screenshot+json_response -> json; screenshot -> png;
93
- return_page_markdown -> md; return_page_text -> txt; json_response -> json.
93
+ return_page_markdown -> md; return_page_text -> txt;
94
+ json_response / extract_rules / ai_extract_rules / ai_query -> json.
94
95
  """
95
96
  if _param_truthy(params, "screenshot") and _param_truthy(params, "json_response"):
96
97
  return "json"
@@ -102,6 +103,11 @@ def _preferred_extension_from_scrape_params(params: dict[str, Any]) -> str | Non
102
103
  return "txt"
103
104
  if _param_truthy(params, "json_response"):
104
105
  return "json"
106
+ # extract_rules, ai_extract_rules, ai_query always return JSON regardless of URL.
107
+ # Without this, URLs ending in .html would be saved as .html despite JSON body
108
+ # (the URL-path heuristic in extension_for_crawl wins before body sniff).
109
+ if params.get("extract_rules") or params.get("ai_extract_rules") or params.get("ai_query"):
110
+ return "json"
105
111
  return None
106
112
 
107
113
 
@@ -1,12 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrapingbee-cli
3
- Version: 1.4.0
3
+ Version: 1.4.1
4
4
  Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
5
5
  Author: ScrapingBee
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://www.scrapingbee.com/
8
8
  Project-URL: Documentation, https://www.scrapingbee.com/documentation/
9
9
  Project-URL: Repository, https://github.com/ScrapingBee/scrapingbee-cli
10
+ Project-URL: Changelog, https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md
11
+ Project-URL: Issues, https://github.com/ScrapingBee/scrapingbee-cli/issues
10
12
  Keywords: scrapingbee,scraping,crawl,scrapy,batch,google-search,amazon,walmart,youtube,chatgpt,cli,api
11
13
  Classifier: Development Status :: 4 - Beta
12
14
  Classifier: Environment :: Console
File without changes