scrapingbee-cli 1.4.0__tar.gz → 1.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {scrapingbee_cli-1.4.0/src/scrapingbee_cli.egg-info → scrapingbee_cli-1.4.2}/PKG-INFO +3 -1
  2. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/pyproject.toml +3 -1
  3. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/__init__.py +2 -2
  4. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/cli_utils.py +3 -0
  5. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/client.py +23 -1
  6. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/amazon.py +18 -0
  7. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/chatgpt.py +9 -0
  8. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/crawl.py +11 -0
  9. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/fast_search.py +9 -0
  10. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/google.py +21 -0
  11. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/scrape.py +8 -0
  12. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/walmart.py +18 -0
  13. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/youtube.py +18 -0
  14. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/crawl.py +7 -1
  15. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2/src/scrapingbee_cli.egg-info}/PKG-INFO +3 -1
  16. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/LICENSE +0 -0
  17. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/README.md +0 -0
  18. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/setup.cfg +0 -0
  19. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/audit.py +0 -0
  20. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/batch.py +0 -0
  21. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/cli.py +0 -0
  22. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/__init__.py +0 -0
  23. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/auth.py +0 -0
  24. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/export.py +0 -0
  25. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/schedule.py +0 -0
  26. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/tutorial.py +0 -0
  27. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/unsafe.py +0 -0
  28. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/usage.py +0 -0
  29. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/config.py +0 -0
  30. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/credits.py +0 -0
  31. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/exec_gate.py +0 -0
  32. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/extract.py +0 -0
  33. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/tutorial/__init__.py +0 -0
  34. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/tutorial/runner.py +0 -0
  35. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/tutorial/steps.py +0 -0
  36. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/SOURCES.txt +0 -0
  37. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/dependency_links.txt +0 -0
  38. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/entry_points.txt +0 -0
  39. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/requires.txt +0 -0
  40. {scrapingbee_cli-1.4.0 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/top_level.txt +0 -0
@@ -1,12 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrapingbee-cli
3
- Version: 1.4.0
3
+ Version: 1.4.2
4
4
  Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
5
5
  Author: ScrapingBee
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://www.scrapingbee.com/
8
8
  Project-URL: Documentation, https://www.scrapingbee.com/documentation/
9
9
  Project-URL: Repository, https://github.com/ScrapingBee/scrapingbee-cli
10
+ Project-URL: Changelog, https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md
11
+ Project-URL: Issues, https://github.com/ScrapingBee/scrapingbee-cli/issues
10
12
  Keywords: scrapingbee,scraping,crawl,scrapy,batch,google-search,amazon,walmart,youtube,chatgpt,cli,api
11
13
  Classifier: Development Status :: 4 - Beta
12
14
  Classifier: Environment :: Console
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scrapingbee-cli"
7
- version = "1.4.0"
7
+ version = "1.4.2"
8
8
  description = "Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -48,6 +48,8 @@ dependencies = [
48
48
  Homepage = "https://www.scrapingbee.com/"
49
49
  Documentation = "https://www.scrapingbee.com/documentation/"
50
50
  Repository = "https://github.com/ScrapingBee/scrapingbee-cli"
51
+ Changelog = "https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md"
52
+ Issues = "https://github.com/ScrapingBee/scrapingbee-cli/issues"
51
53
 
52
54
  [project.optional-dependencies]
53
55
  dev = [
@@ -3,7 +3,7 @@
3
3
  import platform
4
4
  import sys
5
5
 
6
- __version__ = "1.4.0"
6
+ __version__ = "1.4.2"
7
7
 
8
8
 
9
9
  def user_agent_headers() -> dict[str, str]:
@@ -12,7 +12,7 @@ def user_agent_headers() -> dict[str, str]:
12
12
  Returns a dict of headers:
13
13
  User-Agent: ScrapingBee/CLI
14
14
  User-Agent-Client: scrapingbee-cli
15
- User-Agent-Client-Version: 1.4.0
15
+ User-Agent-Client-Version: 1.4.2
16
16
  User-Agent-Environment: python
17
17
  User-Agent-Environment-Version: 3.14.2
18
18
  User-Agent-OS: Darwin arm64
@@ -1304,6 +1304,7 @@ def build_scrape_kwargs(
1304
1304
  device: str | None = None,
1305
1305
  custom_google: str | None = None,
1306
1306
  transparent_status_code: str | None = None,
1307
+ tag: str | None = None,
1307
1308
  body: str | None = None,
1308
1309
  scraping_config: str | None = None,
1309
1310
  ) -> dict[str, Any]:
@@ -1344,6 +1345,7 @@ def build_scrape_kwargs(
1344
1345
  "device": device,
1345
1346
  "custom_google": parse_bool(custom_google),
1346
1347
  "transparent_status_code": parse_bool(transparent_status_code),
1348
+ "tag": tag,
1347
1349
  "body": body,
1348
1350
  "scraping_config": scraping_config,
1349
1351
  }
@@ -1564,6 +1566,7 @@ def write_output(
1564
1566
  ("spb-cost", "Credit Cost"),
1565
1567
  ("spb-resolved-url", "Resolved URL"),
1566
1568
  ("spb-initial-status-code", "Initial Status Code"),
1569
+ ("tag", "Tag"),
1567
1570
  ]:
1568
1571
  if key in headers_lower:
1569
1572
  _, val = headers_lower[key]
@@ -176,6 +176,7 @@ class Client:
176
176
  device: str | None = None,
177
177
  custom_google: bool | None = None,
178
178
  transparent_status_code: bool | None = None,
179
+ tag: str | None = None,
179
180
  body: str | None = None,
180
181
  scraping_config: str | None = None,
181
182
  retries: int = 3,
@@ -218,6 +219,7 @@ class Client:
218
219
  ("device", device),
219
220
  ("custom_google", self._bool(custom_google)),
220
221
  ("transparent_status_code", self._bool(transparent_status_code)),
222
+ ("tag", tag),
221
223
  ("scraping_config", scraping_config),
222
224
  ]:
223
225
  if v is not None:
@@ -290,6 +292,8 @@ class Client:
290
292
  extra_params: str | None = None,
291
293
  add_html: bool | None = None,
292
294
  light_request: bool | None = None,
295
+ tag: str | None = None,
296
+ date_range: str | None = None,
293
297
  retries: int = 3,
294
298
  backoff: float = 2.0,
295
299
  ) -> tuple[bytes, dict, int]:
@@ -304,6 +308,8 @@ class Client:
304
308
  "extra_params": extra_params,
305
309
  "add_html": self._bool(add_html),
306
310
  "light_request": self._bool(light_request),
311
+ "tag": tag,
312
+ "date_range": date_range,
307
313
  }
308
314
  return await self._get_with_retry(
309
315
  "/google",
@@ -318,6 +324,7 @@ class Client:
318
324
  page: int | None = None,
319
325
  country_code: str | None = None,
320
326
  language: str | None = None,
327
+ tag: str | None = None,
321
328
  retries: int = 3,
322
329
  backoff: float = 2.0,
323
330
  ) -> tuple[bytes, dict, int]:
@@ -326,6 +333,7 @@ class Client:
326
333
  "page": page if page is not None else None,
327
334
  "country_code": country_code,
328
335
  "language": language,
336
+ "tag": tag,
329
337
  }
330
338
  return await self._get_with_retry(
331
339
  "/fast_search",
@@ -346,6 +354,7 @@ class Client:
346
354
  add_html: bool | None = None,
347
355
  light_request: bool | None = None,
348
356
  screenshot: bool | None = None,
357
+ tag: str | None = None,
349
358
  retries: int = 3,
350
359
  backoff: float = 2.0,
351
360
  ) -> tuple[bytes, dict, int]:
@@ -360,6 +369,7 @@ class Client:
360
369
  "add_html": self._bool(add_html),
361
370
  "light_request": self._bool(light_request),
362
371
  "screenshot": self._bool(screenshot),
372
+ "tag": tag,
363
373
  }
364
374
  return await self._get_with_retry(
365
375
  "/amazon/product",
@@ -386,6 +396,7 @@ class Client:
386
396
  add_html: bool | None = None,
387
397
  light_request: bool | None = None,
388
398
  screenshot: bool | None = None,
399
+ tag: str | None = None,
389
400
  retries: int = 3,
390
401
  backoff: float = 2.0,
391
402
  ) -> tuple[bytes, dict, int]:
@@ -406,6 +417,7 @@ class Client:
406
417
  "add_html": self._bool(add_html),
407
418
  "light_request": self._bool(light_request),
408
419
  "screenshot": self._bool(screenshot),
420
+ "tag": tag,
409
421
  }
410
422
  return await self._get_with_retry(
411
423
  "/amazon/search",
@@ -430,6 +442,7 @@ class Client:
430
442
  add_html: bool | None = None,
431
443
  light_request: bool | None = None,
432
444
  screenshot: bool | None = None,
445
+ tag: str | None = None,
433
446
  retries: int = 3,
434
447
  backoff: float = 2.0,
435
448
  ) -> tuple[bytes, dict, int]:
@@ -448,6 +461,7 @@ class Client:
448
461
  "add_html": self._bool(add_html),
449
462
  "light_request": self._bool(light_request),
450
463
  "screenshot": self._bool(screenshot),
464
+ "tag": tag,
451
465
  }
452
466
  return await self._get_with_retry(
453
467
  "/walmart/search",
@@ -466,6 +480,7 @@ class Client:
466
480
  add_html: bool | None = None,
467
481
  light_request: bool | None = None,
468
482
  screenshot: bool | None = None,
483
+ tag: str | None = None,
469
484
  retries: int = 3,
470
485
  backoff: float = 2.0,
471
486
  ) -> tuple[bytes, dict, int]:
@@ -478,6 +493,7 @@ class Client:
478
493
  "add_html": self._bool(add_html),
479
494
  "light_request": self._bool(light_request),
480
495
  "screenshot": self._bool(screenshot),
496
+ "tag": tag,
481
497
  }
482
498
  return await self._get_with_retry(
483
499
  "/walmart/product",
@@ -504,6 +520,7 @@ class Client:
504
520
  location: bool | None = None,
505
521
  vr180: bool | None = None,
506
522
  purchased: bool | None = None,
523
+ tag: str | None = None,
507
524
  retries: int = 3,
508
525
  backoff: float = 2.0,
509
526
  ) -> tuple[bytes, dict, int]:
@@ -524,6 +541,7 @@ class Client:
524
541
  "location": self._bool(location),
525
542
  "vr180": self._bool(vr180),
526
543
  "purchased": self._bool(purchased),
544
+ "tag": tag,
527
545
  }
528
546
  return await self._get_with_retry(
529
547
  "/youtube/search",
@@ -535,12 +553,13 @@ class Client:
535
553
  async def youtube_metadata(
536
554
  self,
537
555
  video_id: str,
556
+ tag: str | None = None,
538
557
  retries: int = 3,
539
558
  backoff: float = 2.0,
540
559
  ) -> tuple[bytes, dict, int]:
541
560
  return await self._get_with_retry(
542
561
  "/youtube/metadata",
543
- {"video_id": video_id},
562
+ {"video_id": video_id, "tag": tag},
544
563
  retries=retries,
545
564
  backoff=backoff,
546
565
  )
@@ -551,6 +570,7 @@ class Client:
551
570
  search: bool | None = None,
552
571
  add_html: bool | None = None,
553
572
  country_code: str | None = None,
573
+ tag: str | None = None,
554
574
  retries: int = 3,
555
575
  backoff: float = 2.0,
556
576
  ) -> tuple[bytes, dict, int]:
@@ -561,6 +581,8 @@ class Client:
561
581
  params["add_html"] = str(add_html).lower()
562
582
  if country_code is not None:
563
583
  params["country_code"] = country_code
584
+ if tag is not None:
585
+ params["tag"] = tag
564
586
  return await self._get_with_retry(
565
587
  "/chatgpt",
566
588
  params,
@@ -66,6 +66,12 @@ AMAZON_SORT_BY = [
66
66
  )
67
67
  @optgroup.option("--light-request", type=str, default=None, help="Light request mode (true/false).")
68
68
  @optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
69
+ @optgroup.option(
70
+ "--tag",
71
+ type=str,
72
+ default=None,
73
+ help="Optional label included in API response headers.",
74
+ )
69
75
  @_batch_options
70
76
  @click.pass_obj
71
77
  def amazon_product_cmd(
@@ -80,6 +86,7 @@ def amazon_product_cmd(
80
86
  add_html: str | None,
81
87
  light_request: str | None,
82
88
  screenshot: str | None,
89
+ tag: str | None,
83
90
  **kwargs,
84
91
  ) -> None:
85
92
  """Fetch Amazon product details by ASIN."""
@@ -125,6 +132,7 @@ def amazon_product_cmd(
125
132
  add_html=parse_bool(add_html),
126
133
  light_request=parse_bool(light_request),
127
134
  screenshot=parse_bool(screenshot),
135
+ tag=tag,
128
136
  retries=int(obj.get("retries") or 3),
129
137
  backoff=float(obj.get("backoff") or 2.0),
130
138
  )
@@ -167,6 +175,7 @@ def amazon_product_cmd(
167
175
  add_html=parse_bool(add_html),
168
176
  light_request=parse_bool(light_request),
169
177
  screenshot=parse_bool(screenshot),
178
+ tag=tag,
170
179
  retries=int(obj.get("retries") or 3),
171
180
  backoff=float(obj.get("backoff") or 2.0),
172
181
  )
@@ -224,6 +233,12 @@ def amazon_product_cmd(
224
233
  @optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
225
234
  @optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
226
235
  @optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
236
+ @optgroup.option(
237
+ "--tag",
238
+ type=str,
239
+ default=None,
240
+ help="Optional label included in API response headers.",
241
+ )
227
242
  @_batch_options
228
243
  @click.pass_obj
229
244
  def amazon_search_cmd(
@@ -244,6 +259,7 @@ def amazon_search_cmd(
244
259
  add_html: str | None,
245
260
  light_request: str | None,
246
261
  screenshot: str | None,
262
+ tag: str | None,
247
263
  **kwargs,
248
264
  ) -> None:
249
265
  """Search Amazon products."""
@@ -297,6 +313,7 @@ def amazon_search_cmd(
297
313
  add_html=parse_bool(add_html),
298
314
  light_request=parse_bool(light_request),
299
315
  screenshot=parse_bool(screenshot),
316
+ tag=tag,
300
317
  retries=int(obj.get("retries") or 3),
301
318
  backoff=float(obj.get("backoff") or 2.0),
302
319
  )
@@ -345,6 +362,7 @@ def amazon_search_cmd(
345
362
  add_html=parse_bool(add_html),
346
363
  light_request=parse_bool(light_request),
347
364
  screenshot=parse_bool(screenshot),
365
+ tag=tag,
348
366
  retries=int(obj.get("retries") or 3),
349
367
  backoff=float(obj.get("backoff") or 2.0),
350
368
  )
@@ -46,6 +46,12 @@ from ..config import BASE_URL, get_api_key
46
46
  default=None,
47
47
  help="Country code for geolocation (ISO 3166-1).",
48
48
  )
49
+ @click.option(
50
+ "--tag",
51
+ type=str,
52
+ default=None,
53
+ help="Optional label included in API response headers.",
54
+ )
49
55
  @_batch_options # must be after command-specific options
50
56
  @click.pass_obj
51
57
  def chatgpt_cmd(
@@ -54,6 +60,7 @@ def chatgpt_cmd(
54
60
  search: str | None,
55
61
  add_html: str | None,
56
62
  country_code: str | None,
63
+ tag: str | None,
57
64
  **kwargs,
58
65
  ) -> None:
59
66
  """Send a prompt to the ChatGPT API."""
@@ -93,6 +100,7 @@ def chatgpt_cmd(
93
100
  search=parse_bool(search),
94
101
  add_html=parse_bool(add_html),
95
102
  country_code=country_code,
103
+ tag=tag,
96
104
  retries=int(obj.get("retries") or 3),
97
105
  backoff=float(obj.get("backoff") or 2.0),
98
106
  )
@@ -131,6 +139,7 @@ def chatgpt_cmd(
131
139
  search=parse_bool(search),
132
140
  add_html=parse_bool(add_html),
133
141
  country_code=country_code,
142
+ tag=tag,
134
143
  retries=int(obj.get("retries") or 3),
135
144
  backoff=float(obj.get("backoff") or 2.0),
136
145
  )
@@ -60,6 +60,7 @@ def _crawl_build_params(
60
60
  device: str | None,
61
61
  custom_google: str | None,
62
62
  transparent_status_code: str | None,
63
+ tag: str | None = None,
63
64
  scraping_config: str | None = None,
64
65
  ) -> dict[str, str]:
65
66
  """Build ScrapingBee API params dict from crawl options (quick-crawl URL mode)."""
@@ -98,6 +99,7 @@ def _crawl_build_params(
98
99
  device=device,
99
100
  custom_google=custom_google,
100
101
  transparent_status_code=transparent_status_code,
102
+ tag=tag,
101
103
  body=None,
102
104
  scraping_config=scraping_config,
103
105
  )
@@ -248,6 +250,12 @@ def _crawl_build_params(
248
250
  default=None,
249
251
  help="Return target status as-is (true/false).",
250
252
  )
253
+ @optgroup.option(
254
+ "--tag",
255
+ type=str,
256
+ default=None,
257
+ help="Optional label included in API response headers.",
258
+ )
251
259
  @optgroup.group("Crawl", help="Quick-crawl: depth, pages, output, throttling")
252
260
  @optgroup.option(
253
261
  "--max-depth",
@@ -372,6 +380,7 @@ def crawl_cmd(
372
380
  device: str | None,
373
381
  custom_google: str | None,
374
382
  transparent_status_code: str | None,
383
+ tag: str | None,
375
384
  max_depth: int,
376
385
  max_pages: int,
377
386
  allowed_domains: str | None,
@@ -500,6 +509,7 @@ def crawl_cmd(
500
509
  device=device,
501
510
  custom_google=custom_google,
502
511
  transparent_status_code=transparent_status_code,
512
+ tag=tag,
503
513
  scraping_config=scraping_config,
504
514
  )
505
515
  except ValueError as e:
@@ -602,6 +612,7 @@ def crawl_cmd(
602
612
  "--device": device,
603
613
  "--custom-google": custom_google,
604
614
  "--transparent-status-code": transparent_status_code,
615
+ "--tag": tag,
605
616
  }
606
617
  used = [flag for flag, val in api_flags.items() if val is not None]
607
618
  if headers:
@@ -38,6 +38,12 @@ from ..config import BASE_URL, get_api_key
38
38
  help="Country code for results (ISO 3166-1, e.g. us, fr).",
39
39
  )
40
40
  @optgroup.option("--language", type=str, default=None, help="Language code (e.g. en, fr).")
41
+ @optgroup.option(
42
+ "--tag",
43
+ type=str,
44
+ default=None,
45
+ help="Optional label included in API response headers.",
46
+ )
41
47
  @_batch_options
42
48
  @click.pass_obj
43
49
  def fast_search_cmd(
@@ -46,6 +52,7 @@ def fast_search_cmd(
46
52
  page: int | None,
47
53
  country_code: str | None,
48
54
  language: str | None,
55
+ tag: str | None,
49
56
  **kwargs,
50
57
  ) -> None:
51
58
  """Search using the Fast Search API (sub-second results)."""
@@ -86,6 +93,7 @@ def fast_search_cmd(
86
93
  page=page,
87
94
  country_code=country_code,
88
95
  language=language,
96
+ tag=tag,
89
97
  retries=int(obj.get("retries") or 3),
90
98
  backoff=float(obj.get("backoff") or 2.0),
91
99
  )
@@ -122,6 +130,7 @@ def fast_search_cmd(
122
130
  page=page,
123
131
  country_code=country_code,
124
132
  language=language,
133
+ tag=tag,
125
134
  retries=int(obj.get("retries") or 3),
126
135
  backoff=float(obj.get("backoff") or 2.0),
127
136
  )
@@ -83,6 +83,15 @@ def _warn_empty_organic(data: bytes, search_type: str | None) -> None:
83
83
  default=None,
84
84
  help="Language code for results (e.g. en, fr, de). Default: en.",
85
85
  )
86
+ @optgroup.option(
87
+ "--date-range",
88
+ type=NormalizedChoice(
89
+ ["past-hour", "past-day", "past-week", "past-month", "past-year"],
90
+ case_sensitive=False,
91
+ ),
92
+ default=None,
93
+ help="Restrict results to the past hour/day/week/month/year.",
94
+ )
86
95
  @optgroup.group("Filters", help="Autocorrection, extra params, and response format")
87
96
  @optgroup.option("--nfpr", type=str, default=None, help="Disable autocorrection (true/false).")
88
97
  @optgroup.option(
@@ -97,6 +106,12 @@ def _warn_empty_organic(data: bytes, search_type: str | None) -> None:
97
106
  default=None,
98
107
  help="Light request mode, 10 credits (true/false). Fewer data than regular.",
99
108
  )
109
+ @optgroup.option(
110
+ "--tag",
111
+ type=str,
112
+ default=None,
113
+ help="Optional label included in API response headers.",
114
+ )
100
115
  @_batch_options
101
116
  @click.pass_obj
102
117
  def google_cmd(
@@ -111,6 +126,8 @@ def google_cmd(
111
126
  extra_params: str | None,
112
127
  add_html: str | None,
113
128
  light_request: str | None,
129
+ tag: str | None,
130
+ date_range: str | None,
114
131
  **kwargs,
115
132
  ) -> None:
116
133
  """Search Google using the Google Search API."""
@@ -157,6 +174,8 @@ def google_cmd(
157
174
  extra_params=extra_params,
158
175
  add_html=parse_bool(add_html),
159
176
  light_request=parse_bool(light_request),
177
+ tag=tag,
178
+ date_range=norm_val(date_range),
160
179
  retries=int(obj.get("retries") or 3),
161
180
  backoff=float(obj.get("backoff") or 2.0),
162
181
  )
@@ -199,6 +218,8 @@ def google_cmd(
199
218
  extra_params=extra_params,
200
219
  add_html=parse_bool(add_html),
201
220
  light_request=parse_bool(light_request),
221
+ tag=tag,
222
+ date_range=norm_val(date_range),
202
223
  retries=int(obj.get("retries") or 3),
203
224
  backoff=float(obj.get("backoff") or 2.0),
204
225
  )
@@ -293,6 +293,12 @@ SCRAPE_PRESETS = (
293
293
  default=None,
294
294
  help="Return target status/body as-is (true/false). No retry on 500.",
295
295
  )
296
+ @optgroup.option(
297
+ "--tag",
298
+ type=str,
299
+ default=None,
300
+ help="Optional label included in API response headers.",
301
+ )
296
302
  @optgroup.option(
297
303
  "-X",
298
304
  "--method",
@@ -351,6 +357,7 @@ def scrape_cmd(
351
357
  device: str | None,
352
358
  custom_google: str | None,
353
359
  transparent_status_code: str | None,
360
+ tag: str | None,
354
361
  method: str,
355
362
  body: str | None,
356
363
  escalate_proxy: bool,
@@ -476,6 +483,7 @@ def scrape_cmd(
476
483
  device=device,
477
484
  custom_google=custom_google,
478
485
  transparent_status_code=transparent_status_code,
486
+ tag=tag,
479
487
  body=body,
480
488
  scraping_config=scraping_config,
481
489
  )
@@ -72,6 +72,12 @@ WALMART_SORT_BY = ["best-match", "price-low", "price-high", "best-seller"]
72
72
  @optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
73
73
  @optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
74
74
  @optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
75
+ @optgroup.option(
76
+ "--tag",
77
+ type=str,
78
+ default=None,
79
+ help="Optional label included in API response headers.",
80
+ )
75
81
  @_batch_options
76
82
  @click.pass_obj
77
83
  def walmart_search_cmd(
@@ -90,6 +96,7 @@ def walmart_search_cmd(
90
96
  add_html: str | None,
91
97
  light_request: str | None,
92
98
  screenshot: str | None,
99
+ tag: str | None,
93
100
  **kwargs,
94
101
  ) -> None:
95
102
  """Search Walmart products."""
@@ -141,6 +148,7 @@ def walmart_search_cmd(
141
148
  add_html=parse_bool(add_html),
142
149
  light_request=parse_bool(light_request),
143
150
  screenshot=parse_bool(screenshot),
151
+ tag=tag,
144
152
  retries=int(obj.get("retries") or 3),
145
153
  backoff=float(obj.get("backoff") or 2.0),
146
154
  )
@@ -187,6 +195,7 @@ def walmart_search_cmd(
187
195
  add_html=parse_bool(add_html),
188
196
  light_request=parse_bool(light_request),
189
197
  screenshot=parse_bool(screenshot),
198
+ tag=tag,
190
199
  retries=int(obj.get("retries") or 3),
191
200
  backoff=float(obj.get("backoff") or 2.0),
192
201
  )
@@ -225,6 +234,12 @@ def walmart_search_cmd(
225
234
  @optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
226
235
  @optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
227
236
  @optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
237
+ @optgroup.option(
238
+ "--tag",
239
+ type=str,
240
+ default=None,
241
+ help="Optional label included in API response headers.",
242
+ )
228
243
  @_batch_options
229
244
  @click.pass_obj
230
245
  def walmart_product_cmd(
@@ -237,6 +252,7 @@ def walmart_product_cmd(
237
252
  add_html: str | None,
238
253
  light_request: str | None,
239
254
  screenshot: str | None,
255
+ tag: str | None,
240
256
  **kwargs,
241
257
  ) -> None:
242
258
  """Fetch Walmart product details by product ID."""
@@ -280,6 +296,7 @@ def walmart_product_cmd(
280
296
  add_html=parse_bool(add_html),
281
297
  light_request=parse_bool(light_request),
282
298
  screenshot=parse_bool(screenshot),
299
+ tag=tag,
283
300
  retries=int(obj.get("retries") or 3),
284
301
  backoff=float(obj.get("backoff") or 2.0),
285
302
  )
@@ -320,6 +337,7 @@ def walmart_product_cmd(
320
337
  add_html=parse_bool(add_html),
321
338
  light_request=parse_bool(light_request),
322
339
  screenshot=parse_bool(screenshot),
340
+ tag=tag,
323
341
  retries=int(obj.get("retries") or 3),
324
342
  backoff=float(obj.get("backoff") or 2.0),
325
343
  )
@@ -155,6 +155,12 @@ YOUTUBE_SORT_BY = ["relevance", "rating", "view-count", "upload-date"]
155
155
  @optgroup.option("--location", type=str, default=None, help="With location (true/false).")
156
156
  @optgroup.option("--vr180", type=str, default=None, help="VR180 only (true/false).")
157
157
  @optgroup.option("--purchased", type=str, default=None, help="Purchased only (true/false).")
158
+ @optgroup.option(
159
+ "--tag",
160
+ type=str,
161
+ default=None,
162
+ help="Optional label included in API response headers.",
163
+ )
158
164
  @_batch_options
159
165
  @click.pass_obj
160
166
  def youtube_search_cmd(
@@ -175,6 +181,7 @@ def youtube_search_cmd(
175
181
  location: str | None,
176
182
  vr180: str | None,
177
183
  purchased: str | None,
184
+ tag: str | None,
178
185
  **kwargs,
179
186
  ) -> None:
180
187
  """Search YouTube videos."""
@@ -227,6 +234,7 @@ def youtube_search_cmd(
227
234
  location=parse_bool(location),
228
235
  vr180=parse_bool(vr180),
229
236
  purchased=parse_bool(purchased),
237
+ tag=tag,
230
238
  retries=int(obj.get("retries") or 3),
231
239
  backoff=float(obj.get("backoff") or 2.0),
232
240
  )
@@ -276,6 +284,7 @@ def youtube_search_cmd(
276
284
  location=parse_bool(location),
277
285
  vr180=parse_bool(vr180),
278
286
  purchased=parse_bool(purchased),
287
+ tag=tag,
279
288
  retries=int(obj.get("retries") or 3),
280
289
  backoff=float(obj.get("backoff") or 2.0),
281
290
  )
@@ -299,11 +308,18 @@ def youtube_search_cmd(
299
308
 
300
309
  @click.command("youtube-metadata")
301
310
  @click.argument("video_id", required=False)
311
+ @click.option(
312
+ "--tag",
313
+ type=str,
314
+ default=None,
315
+ help="Optional label included in API response headers.",
316
+ )
302
317
  @_batch_options
303
318
  @click.pass_obj
304
319
  def youtube_metadata_cmd(
305
320
  obj: dict,
306
321
  video_id: str | None,
322
+ tag: str | None,
307
323
  **kwargs,
308
324
  ) -> None:
309
325
  """Fetch YouTube video metadata."""
@@ -340,6 +356,7 @@ def youtube_metadata_cmd(
340
356
  async def api_call(client, vid):
341
357
  return await client.youtube_metadata(
342
358
  _extract_video_id(vid),
359
+ tag=tag,
343
360
  retries=int(obj.get("retries") or 3),
344
361
  backoff=float(obj.get("backoff") or 2.0),
345
362
  )
@@ -373,6 +390,7 @@ def youtube_metadata_cmd(
373
390
  async with Client(key, BASE_URL) as client:
374
391
  data, headers, status_code = await client.youtube_metadata(
375
392
  _extract_video_id(video_id),
393
+ tag=tag,
376
394
  retries=int(obj.get("retries") or 3),
377
395
  backoff=float(obj.get("backoff") or 2.0),
378
396
  )
@@ -90,7 +90,8 @@ def _params_for_discovery(params: dict[str, Any]) -> dict[str, Any]:
90
90
  def _preferred_extension_from_scrape_params(params: dict[str, Any]) -> str | None:
91
91
  """Return extension when scrape params force a response type (skip detection).
92
92
  Priority: screenshot+json_response -> json; screenshot -> png;
93
- return_page_markdown -> md; return_page_text -> txt; json_response -> json.
93
+ return_page_markdown -> md; return_page_text -> txt;
94
+ json_response / extract_rules / ai_extract_rules / ai_query -> json.
94
95
  """
95
96
  if _param_truthy(params, "screenshot") and _param_truthy(params, "json_response"):
96
97
  return "json"
@@ -102,6 +103,11 @@ def _preferred_extension_from_scrape_params(params: dict[str, Any]) -> str | Non
102
103
  return "txt"
103
104
  if _param_truthy(params, "json_response"):
104
105
  return "json"
106
+ # extract_rules, ai_extract_rules, ai_query always return JSON regardless of URL.
107
+ # Without this, URLs ending in .html would be saved as .html despite JSON body
108
+ # (the URL-path heuristic in extension_for_crawl wins before body sniff).
109
+ if params.get("extract_rules") or params.get("ai_extract_rules") or params.get("ai_query"):
110
+ return "json"
105
111
  return None
106
112
 
107
113
 
@@ -1,12 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrapingbee-cli
3
- Version: 1.4.0
3
+ Version: 1.4.2
4
4
  Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
5
5
  Author: ScrapingBee
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://www.scrapingbee.com/
8
8
  Project-URL: Documentation, https://www.scrapingbee.com/documentation/
9
9
  Project-URL: Repository, https://github.com/ScrapingBee/scrapingbee-cli
10
+ Project-URL: Changelog, https://github.com/ScrapingBee/scrapingbee-cli/blob/main/CHANGELOG.md
11
+ Project-URL: Issues, https://github.com/ScrapingBee/scrapingbee-cli/issues
10
12
  Keywords: scrapingbee,scraping,crawl,scrapy,batch,google-search,amazon,walmart,youtube,chatgpt,cli,api
11
13
  Classifier: Development Status :: 4 - Beta
12
14
  Classifier: Environment :: Console
File without changes