scrapingbee-cli 1.2.3__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {scrapingbee_cli-1.2.3/src/scrapingbee_cli.egg-info → scrapingbee_cli-1.3.1}/PKG-INFO +13 -2
  2. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/README.md +12 -1
  3. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/pyproject.toml +1 -1
  4. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/__init__.py +1 -1
  5. scrapingbee_cli-1.3.1/src/scrapingbee_cli/audit.py +60 -0
  6. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/batch.py +7 -0
  7. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/cli_utils.py +28 -3
  8. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/client.py +8 -0
  9. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/__init__.py +3 -0
  10. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/amazon.py +2 -1
  11. scrapingbee_cli-1.3.1/src/scrapingbee_cli/commands/auth.py +264 -0
  12. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/crawl.py +11 -1
  13. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/google.py +2 -1
  14. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/schedule.py +7 -1
  15. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/scrape.py +8 -0
  16. scrapingbee_cli-1.3.1/src/scrapingbee_cli/commands/unsafe.py +84 -0
  17. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/walmart.py +19 -3
  18. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/youtube.py +9 -4
  19. scrapingbee_cli-1.3.1/src/scrapingbee_cli/exec_gate.py +192 -0
  20. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1/src/scrapingbee_cli.egg-info}/PKG-INFO +13 -2
  21. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli.egg-info/SOURCES.txt +3 -0
  22. scrapingbee_cli-1.2.3/src/scrapingbee_cli/commands/auth.py +0 -141
  23. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/LICENSE +0 -0
  24. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/setup.cfg +0 -0
  25. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/cli.py +0 -0
  26. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/chatgpt.py +0 -0
  27. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/export.py +0 -0
  28. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/fast_search.py +0 -0
  29. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/commands/usage.py +0 -0
  30. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/config.py +0 -0
  31. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/crawl.py +0 -0
  32. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli/credits.py +0 -0
  33. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli.egg-info/dependency_links.txt +0 -0
  34. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli.egg-info/entry_points.txt +0 -0
  35. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli.egg-info/requires.txt +0 -0
  36. {scrapingbee_cli-1.2.3 → scrapingbee_cli-1.3.1}/src/scrapingbee_cli.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrapingbee-cli
3
- Version: 1.2.3
3
+ Version: 1.3.1
4
4
  Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
5
5
  Author: ScrapingBee
6
6
  License-Expression: MIT
@@ -81,7 +81,9 @@ scrapingbee [command] [arguments] [options]
81
81
  - **`scrapingbee --help`** – List all commands.
82
82
  - **`scrapingbee [command] --help`** – Options and parameters for that command.
83
83
 
84
- **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
84
+ **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, `--scraping-config`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
85
+
86
+ **Parameter values:** Choice parameters accept both hyphens and underscores interchangeably (e.g. `--sort-by price-low` and `--sort-by price_low` both work).
85
87
 
86
88
  ### Commands
87
89
 
@@ -117,6 +119,7 @@ scrapingbee [command] [arguments] [options]
117
119
  - **Scheduling:** `scrapingbee schedule --every 1d --name prices scrape --input-file products.csv --update-csv` registers a cron job. Use `--list`, `--stop NAME`, or `--stop all`.
118
120
  - **Deduplication & sampling:** `--deduplicate` removes duplicate URLs; `--sample 100` processes only 100 random items.
119
121
  - **RAG chunking:** `scrape --chunk-size 500 --chunk-overlap 50 --return-page-markdown true` outputs NDJSON chunks ready for vector DB ingestion.
122
+ - **Scraping configurations:** `--scraping-config "My-Config"` applies a pre-saved configuration from your ScrapingBee dashboard. Inline options override config settings. Create configurations in the [request builder](https://app.scrapingbee.com/).
120
123
 
121
124
  ### Examples
122
125
 
@@ -133,6 +136,14 @@ scrapingbee schedule --every 1d --name price-tracker scrape --input-file product
133
136
  scrapingbee schedule --list
134
137
  ```
135
138
 
139
+ ## Security
140
+
141
+ The `--post-process`, `--on-complete`, and `schedule` commands execute arbitrary shell commands on your machine. These features are **disabled by default** and require explicit human setup to enable.
142
+
143
+ For advanced features setup, see the Security section in our [CLI documentation](https://www.scrapingbee.com/documentation/cli/).
144
+
145
+ **Do not enable these features in AI agent environments** where commands may be constructed from scraped web content. ScrapingBee is not responsible for any damages caused by shell execution features. Use at your own discretion.
146
+
136
147
  ## More information
137
148
 
138
149
  - **[CLI Documentation](https://www.scrapingbee.com/documentation/cli/)** – Full CLI reference with pipelines, parameters, and examples.
@@ -44,7 +44,9 @@ scrapingbee [command] [arguments] [options]
44
44
  - **`scrapingbee --help`** – List all commands.
45
45
  - **`scrapingbee [command] --help`** – Options and parameters for that command.
46
46
 
47
- **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
47
+ **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, `--scraping-config`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
48
+
49
+ **Parameter values:** Choice parameters accept both hyphens and underscores interchangeably (e.g. `--sort-by price-low` and `--sort-by price_low` both work).
48
50
 
49
51
  ### Commands
50
52
 
@@ -80,6 +82,7 @@ scrapingbee [command] [arguments] [options]
80
82
  - **Scheduling:** `scrapingbee schedule --every 1d --name prices scrape --input-file products.csv --update-csv` registers a cron job. Use `--list`, `--stop NAME`, or `--stop all`.
81
83
  - **Deduplication & sampling:** `--deduplicate` removes duplicate URLs; `--sample 100` processes only 100 random items.
82
84
  - **RAG chunking:** `scrape --chunk-size 500 --chunk-overlap 50 --return-page-markdown true` outputs NDJSON chunks ready for vector DB ingestion.
85
+ - **Scraping configurations:** `--scraping-config "My-Config"` applies a pre-saved configuration from your ScrapingBee dashboard. Inline options override config settings. Create configurations in the [request builder](https://app.scrapingbee.com/).
83
86
 
84
87
  ### Examples
85
88
 
@@ -96,6 +99,14 @@ scrapingbee schedule --every 1d --name price-tracker scrape --input-file product
96
99
  scrapingbee schedule --list
97
100
  ```
98
101
 
102
+ ## Security
103
+
104
+ The `--post-process`, `--on-complete`, and `schedule` commands execute arbitrary shell commands on your machine. These features are **disabled by default** and require explicit human setup to enable.
105
+
106
+ For advanced features setup, see the Security section in our [CLI documentation](https://www.scrapingbee.com/documentation/cli/).
107
+
108
+ **Do not enable these features in AI agent environments** where commands may be constructed from scraped web content. ScrapingBee is not responsible for any damages caused by shell execution features. Use at your own discretion.
109
+
99
110
  ## More information
100
111
 
101
112
  - **[CLI Documentation](https://www.scrapingbee.com/documentation/cli/)** – Full CLI reference with pipelines, parameters, and examples.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scrapingbee-cli"
7
- version = "1.2.3"
7
+ version = "1.3.1"
8
8
  description = "Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -3,7 +3,7 @@
3
3
  import platform
4
4
  import sys
5
5
 
6
- __version__ = "1.2.3"
6
+ __version__ = "1.3.1"
7
7
 
8
8
 
9
9
  def user_agent() -> str:
@@ -0,0 +1,60 @@
1
+ """Audit logging for exec features (--post-process, --on-complete, schedule).
2
+
3
+ Logs every shell command execution to a fixed location for forensics
4
+ and guard skill monitoring.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+
12
+ AUDIT_LOG_PATH = Path.home() / ".config" / "scrapingbee-cli" / "audit.log"
13
+ MAX_LINES = 10_000
14
+
15
+
16
+ def log_exec(
17
+ feature: str,
18
+ command: str,
19
+ *,
20
+ input_source: str = "",
21
+ output_dir: str = "",
22
+ ) -> None:
23
+ """Append an entry to the audit log.
24
+
25
+ Format: ISO_TIMESTAMP | FEATURE | COMMAND | INPUT | OUTPUT_DIR
26
+ """
27
+ AUDIT_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
28
+ timestamp = datetime.now(timezone.utc).isoformat()
29
+ entry = f"{timestamp} | {feature} | {command} | {input_source} | {output_dir}\n"
30
+ try:
31
+ with open(AUDIT_LOG_PATH, "a", encoding="utf-8") as f:
32
+ f.write(entry)
33
+ _rotate_if_needed()
34
+ except OSError:
35
+ pass
36
+
37
+
38
+ def read_audit_log(n: int = 50) -> str:
39
+ """Read the last N lines of the audit log."""
40
+ if not AUDIT_LOG_PATH.is_file():
41
+ return "No audit log found."
42
+ try:
43
+ with open(AUDIT_LOG_PATH, encoding="utf-8") as f:
44
+ lines = f.readlines()
45
+ recent = lines[-n:] if len(lines) > n else lines
46
+ return "".join(recent)
47
+ except OSError:
48
+ return "Could not read audit log."
49
+
50
+
51
+ def _rotate_if_needed() -> None:
52
+ """Keep only the last MAX_LINES entries."""
53
+ try:
54
+ with open(AUDIT_LOG_PATH, encoding="utf-8") as f:
55
+ lines = f.readlines()
56
+ if len(lines) > MAX_LINES:
57
+ with open(AUDIT_LOG_PATH, "w", encoding="utf-8") as f:
58
+ f.writelines(lines[-MAX_LINES:])
59
+ except OSError:
60
+ pass
@@ -705,6 +705,13 @@ def apply_post_process(body: bytes, cmd: str) -> bytes:
705
705
  """Run shell command with body as stdin, return stdout. On failure, return original body."""
706
706
  import subprocess
707
707
 
708
+ from .audit import log_exec
709
+ from .exec_gate import require_exec
710
+
711
+ require_exec("--post-process", cmd)
712
+ log_exec("post-process", cmd)
713
+ click.echo(f"⚠ Executing: {cmd.split()[0] if cmd.split() else cmd} (whitelisted)", err=True)
714
+
708
715
  try:
709
716
  result = subprocess.run(
710
717
  cmd,
@@ -9,6 +9,23 @@ from typing import Any
9
9
  import click
10
10
 
11
11
 
12
+ class NormalizedChoice(click.Choice):
13
+ """Choice type that accepts both hyphens and underscores.
14
+
15
+ Automatically converts underscores to hyphens before validation,
16
+ allowing users to use either format interchangeably.
17
+ Example: both --sort-by price-low and --sort-by price_low work.
18
+ """
19
+
20
+ def convert(self, value: str, param: Any, ctx: Any) -> str:
21
+ """Convert underscores to hyphens before validation."""
22
+ if value is not None:
23
+ normalized = value.replace("_", "-")
24
+ else:
25
+ normalized = value
26
+ return super().convert(normalized, param, ctx)
27
+
28
+
12
29
  def _output_options(f: Any) -> Any:
13
30
  """Output + Retry options (for commands without batch support)."""
14
31
  f = click.option(
@@ -105,7 +122,7 @@ def _batch_options(f: Any) -> Any:
105
122
  "post_process",
106
123
  type=str,
107
124
  default=None,
108
- help="Batch: pipe each result through a shell command (e.g. 'jq .title').",
125
+ help="[Advanced] Batch: pipe each result through a shell command (e.g. 'jq .title'). Requires unsafe mode.",
109
126
  )(f)
110
127
  f = click.option(
111
128
  "--update-csv",
@@ -132,7 +149,7 @@ def _batch_options(f: Any) -> Any:
132
149
  "on_complete",
133
150
  type=str,
134
151
  default=None,
135
- help="Batch: shell command to run after completion.",
152
+ help="[Advanced] Batch: shell command to run after completion. Requires unsafe mode.",
136
153
  )(f)
137
154
  f = click.option("--retries", type=int, default=3, help="Retry on errors (default: 3).")(f)
138
155
  f = click.option(
@@ -385,6 +402,7 @@ def build_scrape_kwargs(
385
402
  custom_google: str | None = None,
386
403
  transparent_status_code: str | None = None,
387
404
  body: str | None = None,
405
+ scraping_config: str | None = None,
388
406
  ) -> dict[str, Any]:
389
407
  """Build kwargs for Client.scrape() from scrape command options.
390
408
  Single source of parse_bool for bool-like opts."""
@@ -424,6 +442,7 @@ def build_scrape_kwargs(
424
442
  "custom_google": parse_bool(custom_google),
425
443
  "transparent_status_code": parse_bool(transparent_status_code),
426
444
  "body": body,
445
+ "scraping_config": scraping_config,
427
446
  }
428
447
 
429
448
 
@@ -595,11 +614,17 @@ def run_on_complete(
595
614
  import os
596
615
  import subprocess
597
616
 
617
+ from .audit import log_exec
618
+ from .exec_gate import require_exec
619
+
620
+ require_exec("--on-complete", cmd)
621
+ log_exec("on-complete", cmd, output_dir=output_dir)
622
+ click.echo(f"⚠ Executing: {cmd.split()[0] if cmd.split() else cmd} (whitelisted)", err=True)
623
+
598
624
  env = os.environ.copy()
599
625
  env["SCRAPINGBEE_OUTPUT_DIR"] = output_dir
600
626
  env["SCRAPINGBEE_SUCCEEDED"] = str(succeeded)
601
627
  env["SCRAPINGBEE_FAILED"] = str(failed)
602
- click.echo(f"[on-complete] Running: {cmd}", err=True)
603
628
  result = subprocess.run(cmd, shell=True, env=env) # noqa: S602
604
629
  if result.returncode != 0:
605
630
  click.echo(f"[on-complete] Exit code: {result.returncode}", err=True)
@@ -177,6 +177,7 @@ class Client:
177
177
  custom_google: bool | None = None,
178
178
  transparent_status_code: bool | None = None,
179
179
  body: str | None = None,
180
+ scraping_config: str | None = None,
180
181
  retries: int = 3,
181
182
  backoff: float = 2.0,
182
183
  **kwargs: Any,
@@ -217,6 +218,7 @@ class Client:
217
218
  ("device", device),
218
219
  ("custom_google", self._bool(custom_google)),
219
220
  ("transparent_status_code", self._bool(transparent_status_code)),
221
+ ("scraping_config", scraping_config),
220
222
  ]:
221
223
  if v is not None:
222
224
  params[k] = str(v) if not isinstance(v, str) else v
@@ -415,6 +417,7 @@ class Client:
415
417
  async def walmart_search(
416
418
  self,
417
419
  query: str,
420
+ start_page: int | None = None,
418
421
  min_price: int | None = None,
419
422
  max_price: int | None = None,
420
423
  sort_by: str | None = None,
@@ -432,6 +435,7 @@ class Client:
432
435
  ) -> tuple[bytes, dict, int]:
433
436
  params = {
434
437
  "query": query,
438
+ "start_page": start_page if start_page is not None else None,
435
439
  "min_price": min_price if min_price is not None else None,
436
440
  "max_price": max_price if max_price is not None else None,
437
441
  "sort_by": sort_by,
@@ -455,6 +459,7 @@ class Client:
455
459
  async def walmart_product(
456
460
  self,
457
461
  product_id: str,
462
+ device: str | None = None,
458
463
  domain: str | None = None,
459
464
  delivery_zip: str | None = None,
460
465
  store_id: str | None = None,
@@ -466,6 +471,7 @@ class Client:
466
471
  ) -> tuple[bytes, dict, int]:
467
472
  params = {
468
473
  "product_id": product_id,
474
+ "device": device,
469
475
  "domain": domain,
470
476
  "delivery_zip": delivery_zip,
471
477
  "store_id": store_id,
@@ -497,6 +503,7 @@ class Client:
497
503
  hdr: bool | None = None,
498
504
  location: bool | None = None,
499
505
  vr180: bool | None = None,
506
+ purchased: bool | None = None,
500
507
  retries: int = 3,
501
508
  backoff: float = 2.0,
502
509
  ) -> tuple[bytes, dict, int]:
@@ -516,6 +523,7 @@ class Client:
516
523
  "hdr": self._bool(hdr),
517
524
  "location": self._bool(location),
518
525
  "vr180": self._bool(vr180),
526
+ "purchased": self._bool(purchased),
519
527
  }
520
528
  return await self._get_with_retry(
521
529
  "/youtube/search",
@@ -35,3 +35,6 @@ def register_commands(cli: click.Group) -> None:
35
35
  chatgpt.register(cli)
36
36
  export.register(cli)
37
37
  schedule.register(cli)
38
+ from . import unsafe
39
+
40
+ unsafe.register(cli)
@@ -17,6 +17,7 @@ from ..batch import (
17
17
  )
18
18
  from ..cli_utils import (
19
19
  DEVICE_DESKTOP_MOBILE_TABLET,
20
+ NormalizedChoice,
20
21
  _batch_options,
21
22
  _validate_page,
22
23
  check_api_response,
@@ -191,7 +192,7 @@ def amazon_product_cmd(
191
192
  @optgroup.option("--pages", type=int, default=None, help="Number of pages to fetch.")
192
193
  @optgroup.option(
193
194
  "--sort-by",
194
- type=click.Choice(AMAZON_SORT_BY, case_sensitive=False),
195
+ type=NormalizedChoice(AMAZON_SORT_BY, case_sensitive=False),
195
196
  default=None,
196
197
  help="Sort order.",
197
198
  )
@@ -0,0 +1,264 @@
1
+ """Auth, docs, and logout commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import getpass
7
+
8
+ import click
9
+
10
+ from ..client import Client
11
+ from ..config import (
12
+ BASE_URL,
13
+ auth_config_path,
14
+ get_api_key_if_set,
15
+ remove_api_key_from_dotenv,
16
+ save_api_key_to_dotenv,
17
+ )
18
+
19
+ DOCS_URL = "https://www.scrapingbee.com/documentation/"
20
+
21
+
22
+ def _validate_api_key(key: str) -> bool:
23
+ """Validate API key by calling the usage endpoint. Returns True if valid."""
24
+
25
+ async def _check() -> int:
26
+ async with Client(key, BASE_URL) as client:
27
+ _, _, status_code = await client.usage(retries=1, backoff=1.0)
28
+ return status_code
29
+
30
+ try:
31
+ status = asyncio.run(_check())
32
+ return status == 200
33
+ except Exception:
34
+ return False
35
+
36
+
37
+ _UNSAFE_DISCLAIMER = """
38
+ ════════════════════════════════════════════════════════════════
39
+ ⚠ WARNING: UNSAFE MODE
40
+ ════════════════════════════════════════════════════════════════
41
+
42
+ You are enabling shell execution features (--post-process,
43
+ --on-complete, and the schedule command). These execute ARBITRARY SHELL COMMANDS
44
+ on your machine.
45
+
46
+ RISKS:
47
+ • Data exfiltration (SSH keys, credentials, files)
48
+ • Arbitrary code execution
49
+ • Persistent backdoors via cron scheduling
50
+
51
+ DO NOT enable this in AI agent environments where commands
52
+ may be constructed from scraped web content.
53
+
54
+ ScrapingBee is NOT responsible for any damages caused by
55
+ these features. Use at your own discretion.
56
+
57
+ ════════════════════════════════════════════════════════════════
58
+ """
59
+
60
+
61
+ def _wipe_api_key_everywhere() -> None:
62
+ """Remove the API key from config .env, cwd .env, and os.environ."""
63
+ import os
64
+ from pathlib import Path
65
+
66
+ from ..config import ENV_API_KEY
67
+
68
+ # Remove from config .env
69
+ remove_api_key_from_dotenv()
70
+
71
+ # Remove from cwd .env if present
72
+ cwd_env = Path.cwd() / ".env"
73
+ if cwd_env.is_file():
74
+ try:
75
+ lines = []
76
+ with open(cwd_env, encoding="utf-8") as f:
77
+ for line in f:
78
+ if ENV_API_KEY not in line:
79
+ lines.append(line)
80
+ with open(cwd_env, "w", encoding="utf-8") as f:
81
+ f.writelines(lines)
82
+ except OSError:
83
+ pass
84
+
85
+ # Remove from current process env
86
+ os.environ.pop(ENV_API_KEY, None)
87
+
88
+
89
+ @click.command()
90
+ @click.option(
91
+ "--api-key",
92
+ "auth_api_key",
93
+ default=None,
94
+ help="API key to save (non-interactive); otherwise uses env/.env or prompt.",
95
+ )
96
+ @click.option(
97
+ "--show",
98
+ "show_path_only",
99
+ is_flag=True,
100
+ default=False,
101
+ help="Only show the path where the API key is or would be stored; do not save.",
102
+ )
103
+ @click.option(
104
+ "--unsafe",
105
+ "unsafe_mode",
106
+ is_flag=True,
107
+ default=False,
108
+ hidden=True,
109
+ help="Enable advanced shell execution features.",
110
+ )
111
+ @click.pass_obj
112
+ def auth_cmd(obj: dict, auth_api_key: str | None, show_path_only: bool, unsafe_mode: bool) -> None:
113
+ """Save API key to ~/.config/scrapingbee-cli/.env (from --api-key, env/.env, or prompt)."""
114
+ from ..exec_gate import is_exec_enabled, require_auth_unsafe, set_unsafe_verified
115
+
116
+ path = auth_config_path()
117
+
118
+ if show_path_only:
119
+ click.echo(str(path))
120
+ return
121
+
122
+ if unsafe_mode:
123
+ # Gate: check env vars are set (vague error if not)
124
+ if not require_auth_unsafe():
125
+ raise SystemExit(1)
126
+
127
+ # Gate: reject --api-key (must be interactive only)
128
+ if auth_api_key:
129
+ click.echo("Something went wrong. Please try again later.", err=True)
130
+ raise SystemExit(1)
131
+
132
+ # Wipe API key from everywhere
133
+ _wipe_api_key_everywhere()
134
+ click.echo("API key removed for security re-authentication.", err=True)
135
+
136
+ # Show disclaimer
137
+ click.echo(_UNSAFE_DISCLAIMER, err=True)
138
+
139
+ # Require acceptance
140
+ try:
141
+ answer = input("Do you accept the risks? (yes/no): ").strip().lower()
142
+ except (EOFError, KeyboardInterrupt):
143
+ click.echo("\nAborted.", err=True)
144
+ raise SystemExit(1)
145
+ if answer != "yes":
146
+ click.echo("Aborted. Unsafe mode not enabled.", err=True)
147
+ raise SystemExit(1)
148
+
149
+ # Prompt for API key (interactive only)
150
+ try:
151
+ raw = getpass.getpass("ScrapingBee API key: ")
152
+ except (EOFError, KeyboardInterrupt):
153
+ click.echo("\nAborted.", err=True)
154
+ raise SystemExit(1)
155
+ key = raw.strip()
156
+ if not key:
157
+ click.echo("No API key entered.", err=True)
158
+ raise SystemExit(1)
159
+
160
+ click.echo("Validating API key...", err=True)
161
+ if not _validate_api_key(key):
162
+ click.echo("Invalid API key.", err=True)
163
+ raise SystemExit(1)
164
+
165
+ # Save key and set unsafe verified
166
+ save_api_key_to_dotenv(key)
167
+ set_unsafe_verified()
168
+ click.echo("API key saved. Unsafe mode enabled.", err=True)
169
+ return
170
+
171
+ # Normal auth flow (show warning if unsafe is enabled)
172
+ if is_exec_enabled():
173
+ click.echo("⚠ Unsafe mode is active. Shell execution features are enabled.", err=True)
174
+
175
+ key = auth_api_key or get_api_key_if_set(None)
176
+ if not key:
177
+ try:
178
+ raw = getpass.getpass("ScrapingBee API key: ")
179
+ except (EOFError, KeyboardInterrupt):
180
+ click.echo(
181
+ "Cannot read API key (non-interactive). Use --api-key KEY or set SCRAPINGBEE_API_KEY.",
182
+ err=True,
183
+ )
184
+ raise SystemExit(1)
185
+ key = raw.strip()
186
+ if not key:
187
+ click.echo("No API key entered.", err=True)
188
+ raise SystemExit(1)
189
+ click.echo("Validating API key...", err=True)
190
+ if not _validate_api_key(key):
191
+ click.echo("Invalid API key. Please check your key and try again.", err=True)
192
+ raise SystemExit(1)
193
+ path = save_api_key_to_dotenv(key)
194
+ click.echo(f"API key saved to {path}. You can now run scrapingbee commands.")
195
+
196
+
197
+ @click.command()
198
+ @click.option(
199
+ "--open/--no-open",
200
+ "open_browser",
201
+ default=False,
202
+ help="Open the documentation URL in the default browser.",
203
+ )
204
+ def docs_cmd(open_browser: bool) -> None:
205
+ """Print or open the ScrapingBee API documentation URL."""
206
+ click.echo(DOCS_URL)
207
+ if open_browser:
208
+ import webbrowser
209
+
210
+ webbrowser.open(DOCS_URL)
211
+
212
+
213
+ @click.command()
214
+ @click.pass_obj
215
+ def logout_cmd(obj: dict) -> None:
216
+ """Remove stored API key from ~/.config/scrapingbee-cli/.env."""
217
+ import json
218
+ from pathlib import Path
219
+
220
+ # Check for active schedules
221
+ registry_path = Path.home() / ".config" / "scrapingbee-cli" / "schedules.json"
222
+ try:
223
+ registry = json.loads(registry_path.read_text(encoding="utf-8"))
224
+ except Exception:
225
+ registry = {}
226
+
227
+ if registry:
228
+ names = ", ".join(registry)
229
+ click.echo(
230
+ f"Warning: you have {len(registry)} active schedule(s): {names}.\n"
231
+ "These will fail without an API key.",
232
+ err=True,
233
+ )
234
+ if not click.confirm("Stop all schedules and logout?"):
235
+ click.echo("Logout cancelled.", err=True)
236
+ return
237
+ # Stop all schedules
238
+ from .schedule import _remove_cron_entry, _save_registry
239
+
240
+ for name in list(registry):
241
+ _remove_cron_entry(name)
242
+ click.echo(f" Stopped schedule '{name}'.", err=True)
243
+ _save_registry({})
244
+
245
+ removed = remove_api_key_from_dotenv()
246
+
247
+ # Also remove unsafe verified flag
248
+ from ..exec_gate import remove_unsafe_verified
249
+
250
+ remove_unsafe_verified()
251
+
252
+ if removed:
253
+ click.echo(f"API key removed from {auth_config_path()}.")
254
+ else:
255
+ click.echo(f"No stored API key found in {auth_config_path()}.")
256
+ click.echo(
257
+ "If you set SCRAPINGBEE_API_KEY in your shell, unset it with: unset SCRAPINGBEE_API_KEY"
258
+ )
259
+
260
+
261
+ def register(cli: click.Group) -> None:
262
+ cli.add_command(auth_cmd, "auth")
263
+ cli.add_command(docs_cmd, "docs")
264
+ cli.add_command(logout_cmd, "logout")
@@ -59,6 +59,7 @@ def _crawl_build_params(
59
59
  device: str | None,
60
60
  custom_google: str | None,
61
61
  transparent_status_code: str | None,
62
+ scraping_config: str | None = None,
62
63
  ) -> dict[str, str]:
63
64
  """Build ScrapingBee API params dict from crawl options (quick-crawl URL mode)."""
64
65
  kwargs = build_scrape_kwargs(
@@ -97,6 +98,7 @@ def _crawl_build_params(
97
98
  custom_google=custom_google,
98
99
  transparent_status_code=transparent_status_code,
99
100
  body=None,
101
+ scraping_config=scraping_config,
100
102
  )
101
103
  return scrape_kwargs_to_api_params(kwargs)
102
104
 
@@ -117,6 +119,12 @@ def _crawl_build_params(
117
119
  default=None,
118
120
  help="Path to Scrapy project. Spider mode only.",
119
121
  )
122
+ @click.option(
123
+ "--scraping-config",
124
+ type=str,
125
+ default=None,
126
+ help="Apply a pre-saved scraping configuration by name. Create configs in the ScrapingBee dashboard. Inline options override config settings.",
127
+ )
120
128
  @optgroup.group("Rendering", help="JavaScript rendering and viewport options")
121
129
  @optgroup.option(
122
130
  "--render-js",
@@ -314,7 +322,7 @@ def _crawl_build_params(
314
322
  "on_complete",
315
323
  type=str,
316
324
  default=None,
317
- help="Shell command to run after crawl completes.",
325
+ help="[Advanced] Shell command to run after crawl completes. Requires unsafe mode.",
318
326
  )
319
327
  @_output_options
320
328
  @click.pass_obj
@@ -323,6 +331,7 @@ def crawl_cmd(
323
331
  target: tuple[str, ...],
324
332
  from_sitemap: str | None,
325
333
  project: str | None,
334
+ scraping_config: str | None,
326
335
  render_js: str | None,
327
336
  js_scenario: str | None,
328
337
  wait: int | None,
@@ -467,6 +476,7 @@ def crawl_cmd(
467
476
  device=device,
468
477
  custom_google=custom_google,
469
478
  transparent_status_code=transparent_status_code,
479
+ scraping_config=scraping_config,
470
480
  )
471
481
  except ValueError as e:
472
482
  click.echo(str(e), err=True)
@@ -17,6 +17,7 @@ from ..batch import (
17
17
  )
18
18
  from ..cli_utils import (
19
19
  DEVICE_DESKTOP_MOBILE,
20
+ NormalizedChoice,
20
21
  _batch_options,
21
22
  _validate_page,
22
23
  check_api_response,
@@ -56,7 +57,7 @@ def _warn_empty_organic(data: bytes, search_type: str | None) -> None:
56
57
  @optgroup.group("Search", help="Search type, locale, and pagination")
57
58
  @optgroup.option(
58
59
  "--search-type",
59
- type=click.Choice(
60
+ type=NormalizedChoice(
60
61
  ["classic", "news", "maps", "lens", "shopping", "images", "ai-mode"],
61
62
  case_sensitive=False,
62
63
  ),