scrapingbee-cli 1.4.1__tar.gz → 1.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scrapingbee_cli-1.4.1/src/scrapingbee_cli.egg-info → scrapingbee_cli-1.4.3}/PKG-INFO +2 -2
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/README.md +1 -1
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/pyproject.toml +1 -1
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/__init__.py +2 -2
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/cli.py +1 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/cli_utils.py +4 -1
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/client.py +63 -1
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/amazon.py +173 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/chatgpt.py +9 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/crawl.py +11 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/fast_search.py +9 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/google.py +53 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/scrape.py +8 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/walmart.py +18 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/youtube.py +18 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/credits.py +1 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3/src/scrapingbee_cli.egg-info}/PKG-INFO +2 -2
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/LICENSE +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/setup.cfg +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/audit.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/batch.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/__init__.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/auth.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/export.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/schedule.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/tutorial.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/unsafe.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/commands/usage.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/config.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/crawl.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/exec_gate.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/extract.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/tutorial/__init__.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/tutorial/runner.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli/tutorial/steps.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli.egg-info/SOURCES.txt +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli.egg-info/dependency_links.txt +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli.egg-info/entry_points.txt +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli.egg-info/requires.txt +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scrapingbee-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
|
|
5
5
|
Author: ScrapingBee
|
|
6
6
|
License-Expression: MIT
|
|
@@ -97,7 +97,7 @@ scrapingbee [command] [arguments] [options]
|
|
|
97
97
|
| `scrape [url]` | Scrape a URL (HTML, JS, screenshot, extract) |
|
|
98
98
|
| `crawl` | Crawl sites following links, with AI extraction and save-pattern filtering |
|
|
99
99
|
| `google` / `fast-search` | Search SERP APIs |
|
|
100
|
-
| `amazon-product` / `amazon-search` | Amazon product and search |
|
|
100
|
+
| `amazon-product` / `amazon-pricing` / `amazon-search` | Amazon product, pricing and search |
|
|
101
101
|
| `walmart-search` / `walmart-product` | Walmart search and product |
|
|
102
102
|
| `youtube-search` / `youtube-metadata` | YouTube search and video metadata |
|
|
103
103
|
| `chatgpt` | ChatGPT API (`--search true` for web-enhanced responses) |
|
|
@@ -58,7 +58,7 @@ scrapingbee [command] [arguments] [options]
|
|
|
58
58
|
| `scrape [url]` | Scrape a URL (HTML, JS, screenshot, extract) |
|
|
59
59
|
| `crawl` | Crawl sites following links, with AI extraction and save-pattern filtering |
|
|
60
60
|
| `google` / `fast-search` | Search SERP APIs |
|
|
61
|
-
| `amazon-product` / `amazon-search` | Amazon product and search |
|
|
61
|
+
| `amazon-product` / `amazon-pricing` / `amazon-search` | Amazon product, pricing and search |
|
|
62
62
|
| `walmart-search` / `walmart-product` | Walmart search and product |
|
|
63
63
|
| `youtube-search` / `youtube-metadata` | YouTube search and video metadata |
|
|
64
64
|
| `chatgpt` | ChatGPT API (`--search true` for web-enhanced responses) |
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "scrapingbee-cli"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.3"
|
|
8
8
|
description = "Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import platform
|
|
4
4
|
import sys
|
|
5
5
|
|
|
6
|
-
__version__ = "1.4.
|
|
6
|
+
__version__ = "1.4.3"
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def user_agent_headers() -> dict[str, str]:
|
|
@@ -12,7 +12,7 @@ def user_agent_headers() -> dict[str, str]:
|
|
|
12
12
|
Returns a dict of headers:
|
|
13
13
|
User-Agent: ScrapingBee/CLI
|
|
14
14
|
User-Agent-Client: scrapingbee-cli
|
|
15
|
-
User-Agent-Client-Version: 1.4.
|
|
15
|
+
User-Agent-Client-Version: 1.4.3
|
|
16
16
|
User-Agent-Environment: python
|
|
17
17
|
User-Agent-Environment-Version: 3.14.2
|
|
18
18
|
User-Agent-OS: Darwin arm64
|
|
@@ -1231,7 +1231,7 @@ def _validate_page(value: int | None, name: str = "page") -> None:
|
|
|
1231
1231
|
raise SystemExit(1)
|
|
1232
1232
|
|
|
1233
1233
|
|
|
1234
|
-
def _validate_price_range(min_price:
|
|
1234
|
+
def _validate_price_range(min_price: float | None, max_price: float | None) -> None:
|
|
1235
1235
|
"""Validate min_price/max_price: non-negative and min <= max."""
|
|
1236
1236
|
if min_price is not None and min_price < 0:
|
|
1237
1237
|
click.echo("min_price must be >= 0", err=True)
|
|
@@ -1304,6 +1304,7 @@ def build_scrape_kwargs(
|
|
|
1304
1304
|
device: str | None = None,
|
|
1305
1305
|
custom_google: str | None = None,
|
|
1306
1306
|
transparent_status_code: str | None = None,
|
|
1307
|
+
tag: str | None = None,
|
|
1307
1308
|
body: str | None = None,
|
|
1308
1309
|
scraping_config: str | None = None,
|
|
1309
1310
|
) -> dict[str, Any]:
|
|
@@ -1344,6 +1345,7 @@ def build_scrape_kwargs(
|
|
|
1344
1345
|
"device": device,
|
|
1345
1346
|
"custom_google": parse_bool(custom_google),
|
|
1346
1347
|
"transparent_status_code": parse_bool(transparent_status_code),
|
|
1348
|
+
"tag": tag,
|
|
1347
1349
|
"body": body,
|
|
1348
1350
|
"scraping_config": scraping_config,
|
|
1349
1351
|
}
|
|
@@ -1564,6 +1566,7 @@ def write_output(
|
|
|
1564
1566
|
("spb-cost", "Credit Cost"),
|
|
1565
1567
|
("spb-resolved-url", "Resolved URL"),
|
|
1566
1568
|
("spb-initial-status-code", "Initial Status Code"),
|
|
1569
|
+
("tag", "Tag"),
|
|
1567
1570
|
]:
|
|
1568
1571
|
if key in headers_lower:
|
|
1569
1572
|
_, val = headers_lower[key]
|
|
@@ -176,6 +176,7 @@ class Client:
|
|
|
176
176
|
device: str | None = None,
|
|
177
177
|
custom_google: bool | None = None,
|
|
178
178
|
transparent_status_code: bool | None = None,
|
|
179
|
+
tag: str | None = None,
|
|
179
180
|
body: str | None = None,
|
|
180
181
|
scraping_config: str | None = None,
|
|
181
182
|
retries: int = 3,
|
|
@@ -218,6 +219,7 @@ class Client:
|
|
|
218
219
|
("device", device),
|
|
219
220
|
("custom_google", self._bool(custom_google)),
|
|
220
221
|
("transparent_status_code", self._bool(transparent_status_code)),
|
|
222
|
+
("tag", tag),
|
|
221
223
|
("scraping_config", scraping_config),
|
|
222
224
|
]:
|
|
223
225
|
if v is not None:
|
|
@@ -290,6 +292,11 @@ class Client:
|
|
|
290
292
|
extra_params: str | None = None,
|
|
291
293
|
add_html: bool | None = None,
|
|
292
294
|
light_request: bool | None = None,
|
|
295
|
+
tag: str | None = None,
|
|
296
|
+
date_range: str | None = None,
|
|
297
|
+
sort_by: str | None = None,
|
|
298
|
+
min_price: float | None = None,
|
|
299
|
+
max_price: float | None = None,
|
|
293
300
|
retries: int = 3,
|
|
294
301
|
backoff: float = 2.0,
|
|
295
302
|
) -> tuple[bytes, dict, int]:
|
|
@@ -304,6 +311,11 @@ class Client:
|
|
|
304
311
|
"extra_params": extra_params,
|
|
305
312
|
"add_html": self._bool(add_html),
|
|
306
313
|
"light_request": self._bool(light_request),
|
|
314
|
+
"tag": tag,
|
|
315
|
+
"date_range": date_range,
|
|
316
|
+
"sort_by": sort_by,
|
|
317
|
+
"min_price": min_price,
|
|
318
|
+
"max_price": max_price,
|
|
307
319
|
}
|
|
308
320
|
return await self._get_with_retry(
|
|
309
321
|
"/google",
|
|
@@ -318,6 +330,7 @@ class Client:
|
|
|
318
330
|
page: int | None = None,
|
|
319
331
|
country_code: str | None = None,
|
|
320
332
|
language: str | None = None,
|
|
333
|
+
tag: str | None = None,
|
|
321
334
|
retries: int = 3,
|
|
322
335
|
backoff: float = 2.0,
|
|
323
336
|
) -> tuple[bytes, dict, int]:
|
|
@@ -326,6 +339,7 @@ class Client:
|
|
|
326
339
|
"page": page if page is not None else None,
|
|
327
340
|
"country_code": country_code,
|
|
328
341
|
"language": language,
|
|
342
|
+
"tag": tag,
|
|
329
343
|
}
|
|
330
344
|
return await self._get_with_retry(
|
|
331
345
|
"/fast_search",
|
|
@@ -346,6 +360,7 @@ class Client:
|
|
|
346
360
|
add_html: bool | None = None,
|
|
347
361
|
light_request: bool | None = None,
|
|
348
362
|
screenshot: bool | None = None,
|
|
363
|
+
tag: str | None = None,
|
|
349
364
|
retries: int = 3,
|
|
350
365
|
backoff: float = 2.0,
|
|
351
366
|
) -> tuple[bytes, dict, int]:
|
|
@@ -360,6 +375,7 @@ class Client:
|
|
|
360
375
|
"add_html": self._bool(add_html),
|
|
361
376
|
"light_request": self._bool(light_request),
|
|
362
377
|
"screenshot": self._bool(screenshot),
|
|
378
|
+
"tag": tag,
|
|
363
379
|
}
|
|
364
380
|
return await self._get_with_retry(
|
|
365
381
|
"/amazon/product",
|
|
@@ -368,6 +384,40 @@ class Client:
|
|
|
368
384
|
backoff=backoff,
|
|
369
385
|
)
|
|
370
386
|
|
|
387
|
+
async def amazon_pricing(
|
|
388
|
+
self,
|
|
389
|
+
asin: str,
|
|
390
|
+
device: str | None = None,
|
|
391
|
+
domain: str | None = None,
|
|
392
|
+
country: str | None = None,
|
|
393
|
+
zip_code: str | None = None,
|
|
394
|
+
language: str | None = None,
|
|
395
|
+
currency: str | None = None,
|
|
396
|
+
add_html: bool | None = None,
|
|
397
|
+
light_request: bool | None = None,
|
|
398
|
+
tag: str | None = None,
|
|
399
|
+
retries: int = 3,
|
|
400
|
+
backoff: float = 2.0,
|
|
401
|
+
) -> tuple[bytes, dict, int]:
|
|
402
|
+
params = {
|
|
403
|
+
"asin": asin,
|
|
404
|
+
"device": device,
|
|
405
|
+
"domain": domain,
|
|
406
|
+
"country": country,
|
|
407
|
+
"zip_code": zip_code,
|
|
408
|
+
"language": language,
|
|
409
|
+
"currency": currency,
|
|
410
|
+
"add_html": self._bool(add_html),
|
|
411
|
+
"light_request": self._bool(light_request),
|
|
412
|
+
"tag": tag,
|
|
413
|
+
}
|
|
414
|
+
return await self._get_with_retry(
|
|
415
|
+
"/amazon/pricing",
|
|
416
|
+
params,
|
|
417
|
+
retries=retries,
|
|
418
|
+
backoff=backoff,
|
|
419
|
+
)
|
|
420
|
+
|
|
371
421
|
async def amazon_search(
|
|
372
422
|
self,
|
|
373
423
|
query: str,
|
|
@@ -386,6 +436,7 @@ class Client:
|
|
|
386
436
|
add_html: bool | None = None,
|
|
387
437
|
light_request: bool | None = None,
|
|
388
438
|
screenshot: bool | None = None,
|
|
439
|
+
tag: str | None = None,
|
|
389
440
|
retries: int = 3,
|
|
390
441
|
backoff: float = 2.0,
|
|
391
442
|
) -> tuple[bytes, dict, int]:
|
|
@@ -406,6 +457,7 @@ class Client:
|
|
|
406
457
|
"add_html": self._bool(add_html),
|
|
407
458
|
"light_request": self._bool(light_request),
|
|
408
459
|
"screenshot": self._bool(screenshot),
|
|
460
|
+
"tag": tag,
|
|
409
461
|
}
|
|
410
462
|
return await self._get_with_retry(
|
|
411
463
|
"/amazon/search",
|
|
@@ -430,6 +482,7 @@ class Client:
|
|
|
430
482
|
add_html: bool | None = None,
|
|
431
483
|
light_request: bool | None = None,
|
|
432
484
|
screenshot: bool | None = None,
|
|
485
|
+
tag: str | None = None,
|
|
433
486
|
retries: int = 3,
|
|
434
487
|
backoff: float = 2.0,
|
|
435
488
|
) -> tuple[bytes, dict, int]:
|
|
@@ -448,6 +501,7 @@ class Client:
|
|
|
448
501
|
"add_html": self._bool(add_html),
|
|
449
502
|
"light_request": self._bool(light_request),
|
|
450
503
|
"screenshot": self._bool(screenshot),
|
|
504
|
+
"tag": tag,
|
|
451
505
|
}
|
|
452
506
|
return await self._get_with_retry(
|
|
453
507
|
"/walmart/search",
|
|
@@ -466,6 +520,7 @@ class Client:
|
|
|
466
520
|
add_html: bool | None = None,
|
|
467
521
|
light_request: bool | None = None,
|
|
468
522
|
screenshot: bool | None = None,
|
|
523
|
+
tag: str | None = None,
|
|
469
524
|
retries: int = 3,
|
|
470
525
|
backoff: float = 2.0,
|
|
471
526
|
) -> tuple[bytes, dict, int]:
|
|
@@ -478,6 +533,7 @@ class Client:
|
|
|
478
533
|
"add_html": self._bool(add_html),
|
|
479
534
|
"light_request": self._bool(light_request),
|
|
480
535
|
"screenshot": self._bool(screenshot),
|
|
536
|
+
"tag": tag,
|
|
481
537
|
}
|
|
482
538
|
return await self._get_with_retry(
|
|
483
539
|
"/walmart/product",
|
|
@@ -504,6 +560,7 @@ class Client:
|
|
|
504
560
|
location: bool | None = None,
|
|
505
561
|
vr180: bool | None = None,
|
|
506
562
|
purchased: bool | None = None,
|
|
563
|
+
tag: str | None = None,
|
|
507
564
|
retries: int = 3,
|
|
508
565
|
backoff: float = 2.0,
|
|
509
566
|
) -> tuple[bytes, dict, int]:
|
|
@@ -524,6 +581,7 @@ class Client:
|
|
|
524
581
|
"location": self._bool(location),
|
|
525
582
|
"vr180": self._bool(vr180),
|
|
526
583
|
"purchased": self._bool(purchased),
|
|
584
|
+
"tag": tag,
|
|
527
585
|
}
|
|
528
586
|
return await self._get_with_retry(
|
|
529
587
|
"/youtube/search",
|
|
@@ -535,12 +593,13 @@ class Client:
|
|
|
535
593
|
async def youtube_metadata(
|
|
536
594
|
self,
|
|
537
595
|
video_id: str,
|
|
596
|
+
tag: str | None = None,
|
|
538
597
|
retries: int = 3,
|
|
539
598
|
backoff: float = 2.0,
|
|
540
599
|
) -> tuple[bytes, dict, int]:
|
|
541
600
|
return await self._get_with_retry(
|
|
542
601
|
"/youtube/metadata",
|
|
543
|
-
{"video_id": video_id},
|
|
602
|
+
{"video_id": video_id, "tag": tag},
|
|
544
603
|
retries=retries,
|
|
545
604
|
backoff=backoff,
|
|
546
605
|
)
|
|
@@ -551,6 +610,7 @@ class Client:
|
|
|
551
610
|
search: bool | None = None,
|
|
552
611
|
add_html: bool | None = None,
|
|
553
612
|
country_code: str | None = None,
|
|
613
|
+
tag: str | None = None,
|
|
554
614
|
retries: int = 3,
|
|
555
615
|
backoff: float = 2.0,
|
|
556
616
|
) -> tuple[bytes, dict, int]:
|
|
@@ -561,6 +621,8 @@ class Client:
|
|
|
561
621
|
params["add_html"] = str(add_html).lower()
|
|
562
622
|
if country_code is not None:
|
|
563
623
|
params["country_code"] = country_code
|
|
624
|
+
if tag is not None:
|
|
625
|
+
params["tag"] = tag
|
|
564
626
|
return await self._get_with_retry(
|
|
565
627
|
"/chatgpt",
|
|
566
628
|
params,
|
|
@@ -66,6 +66,12 @@ AMAZON_SORT_BY = [
|
|
|
66
66
|
)
|
|
67
67
|
@optgroup.option("--light-request", type=str, default=None, help="Light request mode (true/false).")
|
|
68
68
|
@optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
|
|
69
|
+
@optgroup.option(
|
|
70
|
+
"--tag",
|
|
71
|
+
type=str,
|
|
72
|
+
default=None,
|
|
73
|
+
help="Optional label included in API response headers.",
|
|
74
|
+
)
|
|
69
75
|
@_batch_options
|
|
70
76
|
@click.pass_obj
|
|
71
77
|
def amazon_product_cmd(
|
|
@@ -80,6 +86,7 @@ def amazon_product_cmd(
|
|
|
80
86
|
add_html: str | None,
|
|
81
87
|
light_request: str | None,
|
|
82
88
|
screenshot: str | None,
|
|
89
|
+
tag: str | None,
|
|
83
90
|
**kwargs,
|
|
84
91
|
) -> None:
|
|
85
92
|
"""Fetch Amazon product details by ASIN."""
|
|
@@ -125,6 +132,7 @@ def amazon_product_cmd(
|
|
|
125
132
|
add_html=parse_bool(add_html),
|
|
126
133
|
light_request=parse_bool(light_request),
|
|
127
134
|
screenshot=parse_bool(screenshot),
|
|
135
|
+
tag=tag,
|
|
128
136
|
retries=int(obj.get("retries") or 3),
|
|
129
137
|
backoff=float(obj.get("backoff") or 2.0),
|
|
130
138
|
)
|
|
@@ -167,6 +175,7 @@ def amazon_product_cmd(
|
|
|
167
175
|
add_html=parse_bool(add_html),
|
|
168
176
|
light_request=parse_bool(light_request),
|
|
169
177
|
screenshot=parse_bool(screenshot),
|
|
178
|
+
tag=tag,
|
|
170
179
|
retries=int(obj.get("retries") or 3),
|
|
171
180
|
backoff=float(obj.get("backoff") or 2.0),
|
|
172
181
|
)
|
|
@@ -189,6 +198,160 @@ def amazon_product_cmd(
|
|
|
189
198
|
asyncio.run(_single())
|
|
190
199
|
|
|
191
200
|
|
|
201
|
+
@click.command("amazon-pricing")
|
|
202
|
+
@click.argument("asin", required=False)
|
|
203
|
+
@optgroup.group("Locale", help="Device, domain, country, language, and currency")
|
|
204
|
+
@optgroup.option(
|
|
205
|
+
"--device",
|
|
206
|
+
type=click.Choice(["desktop"], case_sensitive=False),
|
|
207
|
+
default=None,
|
|
208
|
+
help="Device: desktop.",
|
|
209
|
+
)
|
|
210
|
+
@optgroup.option(
|
|
211
|
+
"--domain", type=str, default=None, help="Amazon domain (e.g. com, co.uk, de, fr)."
|
|
212
|
+
)
|
|
213
|
+
@optgroup.option("--country", type=str, default=None, help="Country code (e.g. us, gb, de).")
|
|
214
|
+
@optgroup.option(
|
|
215
|
+
"--zip-code", type=str, default=None, help="ZIP code for local availability/pricing."
|
|
216
|
+
)
|
|
217
|
+
@optgroup.option(
|
|
218
|
+
"--language", type=str, default=None, help="Language code (e.g. en_US, es_US, fr_FR)."
|
|
219
|
+
)
|
|
220
|
+
@optgroup.option("--currency", type=str, default=None, help="Currency code (e.g. USD, EUR, GBP).")
|
|
221
|
+
@optgroup.group("Output", help="Response format options")
|
|
222
|
+
@optgroup.option(
|
|
223
|
+
"--add-html", type=str, default=None, help="Include full HTML in response (true/false)."
|
|
224
|
+
)
|
|
225
|
+
@optgroup.option("--light-request", type=str, default=None, help="Light request mode (true/false).")
|
|
226
|
+
@optgroup.option(
|
|
227
|
+
"--tag",
|
|
228
|
+
type=str,
|
|
229
|
+
default=None,
|
|
230
|
+
help="Optional label included in API response headers.",
|
|
231
|
+
)
|
|
232
|
+
@_batch_options
|
|
233
|
+
@click.pass_obj
|
|
234
|
+
def amazon_pricing_cmd(
|
|
235
|
+
obj: dict,
|
|
236
|
+
asin: str | None,
|
|
237
|
+
device: str | None,
|
|
238
|
+
domain: str | None,
|
|
239
|
+
country: str | None,
|
|
240
|
+
zip_code: str | None,
|
|
241
|
+
language: str | None,
|
|
242
|
+
currency: str | None,
|
|
243
|
+
add_html: str | None,
|
|
244
|
+
light_request: str | None,
|
|
245
|
+
tag: str | None,
|
|
246
|
+
**kwargs,
|
|
247
|
+
) -> None:
|
|
248
|
+
"""Fetch Amazon pricing details by ASIN."""
|
|
249
|
+
store_common_options(obj, **kwargs)
|
|
250
|
+
input_file = obj.get("input_file")
|
|
251
|
+
try:
|
|
252
|
+
key = get_api_key(None)
|
|
253
|
+
except ValueError as e:
|
|
254
|
+
click.echo(str(e), err=True)
|
|
255
|
+
raise SystemExit(1)
|
|
256
|
+
|
|
257
|
+
if input_file:
|
|
258
|
+
if asin:
|
|
259
|
+
click.echo("cannot use both --input-file and positional ASIN", err=True)
|
|
260
|
+
raise SystemExit(1)
|
|
261
|
+
try:
|
|
262
|
+
inputs = read_input_file(input_file, input_column=obj.get("input_column"))
|
|
263
|
+
except ValueError as e:
|
|
264
|
+
click.echo(str(e), err=True)
|
|
265
|
+
raise SystemExit(1)
|
|
266
|
+
inputs = prepare_batch_inputs(inputs, obj)
|
|
267
|
+
usage_info = get_batch_usage(None)
|
|
268
|
+
try:
|
|
269
|
+
validate_batch_run(obj["concurrency"], len(inputs), usage_info)
|
|
270
|
+
except ValueError as e:
|
|
271
|
+
click.echo(str(e), err=True)
|
|
272
|
+
raise SystemExit(1)
|
|
273
|
+
concurrency = resolve_batch_concurrency(obj["concurrency"], usage_info, len(inputs))
|
|
274
|
+
|
|
275
|
+
skip_n = (
|
|
276
|
+
_find_completed_n(obj.get("output_dir") or "") if obj.get("resume") else frozenset()
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
async def api_call(client, a):
|
|
280
|
+
return await client.amazon_pricing(
|
|
281
|
+
a,
|
|
282
|
+
device=device,
|
|
283
|
+
domain=domain,
|
|
284
|
+
country=country,
|
|
285
|
+
zip_code=zip_code,
|
|
286
|
+
language=language,
|
|
287
|
+
currency=currency,
|
|
288
|
+
add_html=parse_bool(add_html),
|
|
289
|
+
light_request=parse_bool(light_request),
|
|
290
|
+
tag=tag,
|
|
291
|
+
retries=int(obj.get("retries") or 3),
|
|
292
|
+
backoff=float(obj.get("backoff") or 2.0),
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
run_api_batch(
|
|
296
|
+
key=key,
|
|
297
|
+
inputs=inputs,
|
|
298
|
+
concurrency=concurrency,
|
|
299
|
+
from_user=obj["concurrency"] > 0,
|
|
300
|
+
skip_n=skip_n,
|
|
301
|
+
output_dir=obj.get("output_dir") or None,
|
|
302
|
+
verbose=obj["verbose"],
|
|
303
|
+
show_progress=obj.get("progress", True),
|
|
304
|
+
api_call=api_call,
|
|
305
|
+
on_complete=obj.get("on_complete"),
|
|
306
|
+
output_format=obj.get("output_format"),
|
|
307
|
+
post_process=obj.get("post_process"),
|
|
308
|
+
update_csv_path=input_file if obj.get("update_csv") else None,
|
|
309
|
+
input_column=obj.get("input_column"),
|
|
310
|
+
output_file=obj.get("output_file") or None,
|
|
311
|
+
extract_field=obj.get("extract_field"),
|
|
312
|
+
fields=obj.get("fields"),
|
|
313
|
+
)
|
|
314
|
+
return
|
|
315
|
+
|
|
316
|
+
if not asin:
|
|
317
|
+
click.echo("expected one ASIN, or use --input-file for batch", err=True)
|
|
318
|
+
raise SystemExit(1)
|
|
319
|
+
|
|
320
|
+
async def _single() -> None:
|
|
321
|
+
async with Client(key, BASE_URL) as client:
|
|
322
|
+
data, headers, status_code = await client.amazon_pricing(
|
|
323
|
+
asin,
|
|
324
|
+
device=device,
|
|
325
|
+
domain=domain,
|
|
326
|
+
country=country,
|
|
327
|
+
zip_code=zip_code,
|
|
328
|
+
language=language,
|
|
329
|
+
currency=currency,
|
|
330
|
+
add_html=parse_bool(add_html),
|
|
331
|
+
light_request=parse_bool(light_request),
|
|
332
|
+
tag=tag,
|
|
333
|
+
retries=int(obj.get("retries") or 3),
|
|
334
|
+
backoff=float(obj.get("backoff") or 2.0),
|
|
335
|
+
)
|
|
336
|
+
check_api_response(data, status_code)
|
|
337
|
+
from ..credits import amazon_credits
|
|
338
|
+
|
|
339
|
+
write_output(
|
|
340
|
+
data,
|
|
341
|
+
headers,
|
|
342
|
+
status_code,
|
|
343
|
+
obj["output_file"],
|
|
344
|
+
obj["verbose"],
|
|
345
|
+
smart_extract=obj.get("smart_extract"),
|
|
346
|
+
extract_field=obj.get("extract_field"),
|
|
347
|
+
fields=obj.get("fields"),
|
|
348
|
+
command="amazon-pricing",
|
|
349
|
+
credit_cost=amazon_credits(parse_bool(light_request)),
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
asyncio.run(_single())
|
|
353
|
+
|
|
354
|
+
|
|
192
355
|
@click.command("amazon-search")
|
|
193
356
|
@click.argument("query", required=False)
|
|
194
357
|
@optgroup.group("Pagination & sort", help="Pages and sort order")
|
|
@@ -224,6 +387,12 @@ def amazon_product_cmd(
|
|
|
224
387
|
@optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
|
|
225
388
|
@optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
|
|
226
389
|
@optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
|
|
390
|
+
@optgroup.option(
|
|
391
|
+
"--tag",
|
|
392
|
+
type=str,
|
|
393
|
+
default=None,
|
|
394
|
+
help="Optional label included in API response headers.",
|
|
395
|
+
)
|
|
227
396
|
@_batch_options
|
|
228
397
|
@click.pass_obj
|
|
229
398
|
def amazon_search_cmd(
|
|
@@ -244,6 +413,7 @@ def amazon_search_cmd(
|
|
|
244
413
|
add_html: str | None,
|
|
245
414
|
light_request: str | None,
|
|
246
415
|
screenshot: str | None,
|
|
416
|
+
tag: str | None,
|
|
247
417
|
**kwargs,
|
|
248
418
|
) -> None:
|
|
249
419
|
"""Search Amazon products."""
|
|
@@ -297,6 +467,7 @@ def amazon_search_cmd(
|
|
|
297
467
|
add_html=parse_bool(add_html),
|
|
298
468
|
light_request=parse_bool(light_request),
|
|
299
469
|
screenshot=parse_bool(screenshot),
|
|
470
|
+
tag=tag,
|
|
300
471
|
retries=int(obj.get("retries") or 3),
|
|
301
472
|
backoff=float(obj.get("backoff") or 2.0),
|
|
302
473
|
)
|
|
@@ -345,6 +516,7 @@ def amazon_search_cmd(
|
|
|
345
516
|
add_html=parse_bool(add_html),
|
|
346
517
|
light_request=parse_bool(light_request),
|
|
347
518
|
screenshot=parse_bool(screenshot),
|
|
519
|
+
tag=tag,
|
|
348
520
|
retries=int(obj.get("retries") or 3),
|
|
349
521
|
backoff=float(obj.get("backoff") or 2.0),
|
|
350
522
|
)
|
|
@@ -369,4 +541,5 @@ def amazon_search_cmd(
|
|
|
369
541
|
|
|
370
542
|
def register(cli: click.Group) -> None:
|
|
371
543
|
cli.add_command(amazon_product_cmd, "amazon-product")
|
|
544
|
+
cli.add_command(amazon_pricing_cmd, "amazon-pricing")
|
|
372
545
|
cli.add_command(amazon_search_cmd, "amazon-search")
|
|
@@ -46,6 +46,12 @@ from ..config import BASE_URL, get_api_key
|
|
|
46
46
|
default=None,
|
|
47
47
|
help="Country code for geolocation (ISO 3166-1).",
|
|
48
48
|
)
|
|
49
|
+
@click.option(
|
|
50
|
+
"--tag",
|
|
51
|
+
type=str,
|
|
52
|
+
default=None,
|
|
53
|
+
help="Optional label included in API response headers.",
|
|
54
|
+
)
|
|
49
55
|
@_batch_options # must be after command-specific options
|
|
50
56
|
@click.pass_obj
|
|
51
57
|
def chatgpt_cmd(
|
|
@@ -54,6 +60,7 @@ def chatgpt_cmd(
|
|
|
54
60
|
search: str | None,
|
|
55
61
|
add_html: str | None,
|
|
56
62
|
country_code: str | None,
|
|
63
|
+
tag: str | None,
|
|
57
64
|
**kwargs,
|
|
58
65
|
) -> None:
|
|
59
66
|
"""Send a prompt to the ChatGPT API."""
|
|
@@ -93,6 +100,7 @@ def chatgpt_cmd(
|
|
|
93
100
|
search=parse_bool(search),
|
|
94
101
|
add_html=parse_bool(add_html),
|
|
95
102
|
country_code=country_code,
|
|
103
|
+
tag=tag,
|
|
96
104
|
retries=int(obj.get("retries") or 3),
|
|
97
105
|
backoff=float(obj.get("backoff") or 2.0),
|
|
98
106
|
)
|
|
@@ -131,6 +139,7 @@ def chatgpt_cmd(
|
|
|
131
139
|
search=parse_bool(search),
|
|
132
140
|
add_html=parse_bool(add_html),
|
|
133
141
|
country_code=country_code,
|
|
142
|
+
tag=tag,
|
|
134
143
|
retries=int(obj.get("retries") or 3),
|
|
135
144
|
backoff=float(obj.get("backoff") or 2.0),
|
|
136
145
|
)
|
|
@@ -60,6 +60,7 @@ def _crawl_build_params(
|
|
|
60
60
|
device: str | None,
|
|
61
61
|
custom_google: str | None,
|
|
62
62
|
transparent_status_code: str | None,
|
|
63
|
+
tag: str | None = None,
|
|
63
64
|
scraping_config: str | None = None,
|
|
64
65
|
) -> dict[str, str]:
|
|
65
66
|
"""Build ScrapingBee API params dict from crawl options (quick-crawl URL mode)."""
|
|
@@ -98,6 +99,7 @@ def _crawl_build_params(
|
|
|
98
99
|
device=device,
|
|
99
100
|
custom_google=custom_google,
|
|
100
101
|
transparent_status_code=transparent_status_code,
|
|
102
|
+
tag=tag,
|
|
101
103
|
body=None,
|
|
102
104
|
scraping_config=scraping_config,
|
|
103
105
|
)
|
|
@@ -248,6 +250,12 @@ def _crawl_build_params(
|
|
|
248
250
|
default=None,
|
|
249
251
|
help="Return target status as-is (true/false).",
|
|
250
252
|
)
|
|
253
|
+
@optgroup.option(
|
|
254
|
+
"--tag",
|
|
255
|
+
type=str,
|
|
256
|
+
default=None,
|
|
257
|
+
help="Optional label included in API response headers.",
|
|
258
|
+
)
|
|
251
259
|
@optgroup.group("Crawl", help="Quick-crawl: depth, pages, output, throttling")
|
|
252
260
|
@optgroup.option(
|
|
253
261
|
"--max-depth",
|
|
@@ -372,6 +380,7 @@ def crawl_cmd(
|
|
|
372
380
|
device: str | None,
|
|
373
381
|
custom_google: str | None,
|
|
374
382
|
transparent_status_code: str | None,
|
|
383
|
+
tag: str | None,
|
|
375
384
|
max_depth: int,
|
|
376
385
|
max_pages: int,
|
|
377
386
|
allowed_domains: str | None,
|
|
@@ -500,6 +509,7 @@ def crawl_cmd(
|
|
|
500
509
|
device=device,
|
|
501
510
|
custom_google=custom_google,
|
|
502
511
|
transparent_status_code=transparent_status_code,
|
|
512
|
+
tag=tag,
|
|
503
513
|
scraping_config=scraping_config,
|
|
504
514
|
)
|
|
505
515
|
except ValueError as e:
|
|
@@ -602,6 +612,7 @@ def crawl_cmd(
|
|
|
602
612
|
"--device": device,
|
|
603
613
|
"--custom-google": custom_google,
|
|
604
614
|
"--transparent-status-code": transparent_status_code,
|
|
615
|
+
"--tag": tag,
|
|
605
616
|
}
|
|
606
617
|
used = [flag for flag, val in api_flags.items() if val is not None]
|
|
607
618
|
if headers:
|
|
@@ -38,6 +38,12 @@ from ..config import BASE_URL, get_api_key
|
|
|
38
38
|
help="Country code for results (ISO 3166-1, e.g. us, fr).",
|
|
39
39
|
)
|
|
40
40
|
@optgroup.option("--language", type=str, default=None, help="Language code (e.g. en, fr).")
|
|
41
|
+
@optgroup.option(
|
|
42
|
+
"--tag",
|
|
43
|
+
type=str,
|
|
44
|
+
default=None,
|
|
45
|
+
help="Optional label included in API response headers.",
|
|
46
|
+
)
|
|
41
47
|
@_batch_options
|
|
42
48
|
@click.pass_obj
|
|
43
49
|
def fast_search_cmd(
|
|
@@ -46,6 +52,7 @@ def fast_search_cmd(
|
|
|
46
52
|
page: int | None,
|
|
47
53
|
country_code: str | None,
|
|
48
54
|
language: str | None,
|
|
55
|
+
tag: str | None,
|
|
49
56
|
**kwargs,
|
|
50
57
|
) -> None:
|
|
51
58
|
"""Search using the Fast Search API (sub-second results)."""
|
|
@@ -86,6 +93,7 @@ def fast_search_cmd(
|
|
|
86
93
|
page=page,
|
|
87
94
|
country_code=country_code,
|
|
88
95
|
language=language,
|
|
96
|
+
tag=tag,
|
|
89
97
|
retries=int(obj.get("retries") or 3),
|
|
90
98
|
backoff=float(obj.get("backoff") or 2.0),
|
|
91
99
|
)
|
|
@@ -122,6 +130,7 @@ def fast_search_cmd(
|
|
|
122
130
|
page=page,
|
|
123
131
|
country_code=country_code,
|
|
124
132
|
language=language,
|
|
133
|
+
tag=tag,
|
|
125
134
|
retries=int(obj.get("retries") or 3),
|
|
126
135
|
backoff=float(obj.get("backoff") or 2.0),
|
|
127
136
|
)
|
|
@@ -20,6 +20,7 @@ from ..cli_utils import (
|
|
|
20
20
|
NormalizedChoice,
|
|
21
21
|
_batch_options,
|
|
22
22
|
_validate_page,
|
|
23
|
+
_validate_price_range,
|
|
23
24
|
check_api_response,
|
|
24
25
|
norm_val,
|
|
25
26
|
parse_bool,
|
|
@@ -83,6 +84,36 @@ def _warn_empty_organic(data: bytes, search_type: str | None) -> None:
|
|
|
83
84
|
default=None,
|
|
84
85
|
help="Language code for results (e.g. en, fr, de). Default: en.",
|
|
85
86
|
)
|
|
87
|
+
@optgroup.option(
|
|
88
|
+
"--date-range",
|
|
89
|
+
type=NormalizedChoice(
|
|
90
|
+
["past-hour", "past-day", "past-week", "past-month", "past-year"],
|
|
91
|
+
case_sensitive=False,
|
|
92
|
+
),
|
|
93
|
+
default=None,
|
|
94
|
+
help="Restrict results to the past hour/day/week/month/year.",
|
|
95
|
+
)
|
|
96
|
+
@optgroup.group("Shopping", help="Options for --search-type shopping only")
|
|
97
|
+
@optgroup.option(
|
|
98
|
+
"--sort-by",
|
|
99
|
+
type=NormalizedChoice(
|
|
100
|
+
["relevance", "reviews", "price-asc", "price-desc"], case_sensitive=False
|
|
101
|
+
),
|
|
102
|
+
default=None,
|
|
103
|
+
help="Sort Shopping results: relevance, reviews, price-asc, price-desc.",
|
|
104
|
+
)
|
|
105
|
+
@optgroup.option(
|
|
106
|
+
"--min-price",
|
|
107
|
+
type=float,
|
|
108
|
+
default=None,
|
|
109
|
+
help="Minimum price filter, in the marketplace's native currency.",
|
|
110
|
+
)
|
|
111
|
+
@optgroup.option(
|
|
112
|
+
"--max-price",
|
|
113
|
+
type=float,
|
|
114
|
+
default=None,
|
|
115
|
+
help="Maximum price filter, in the marketplace's native currency.",
|
|
116
|
+
)
|
|
86
117
|
@optgroup.group("Filters", help="Autocorrection, extra params, and response format")
|
|
87
118
|
@optgroup.option("--nfpr", type=str, default=None, help="Disable autocorrection (true/false).")
|
|
88
119
|
@optgroup.option(
|
|
@@ -97,6 +128,12 @@ def _warn_empty_organic(data: bytes, search_type: str | None) -> None:
|
|
|
97
128
|
default=None,
|
|
98
129
|
help="Light request mode, 10 credits (true/false). Fewer data than regular.",
|
|
99
130
|
)
|
|
131
|
+
@optgroup.option(
|
|
132
|
+
"--tag",
|
|
133
|
+
type=str,
|
|
134
|
+
default=None,
|
|
135
|
+
help="Optional label included in API response headers.",
|
|
136
|
+
)
|
|
100
137
|
@_batch_options
|
|
101
138
|
@click.pass_obj
|
|
102
139
|
def google_cmd(
|
|
@@ -111,6 +148,11 @@ def google_cmd(
|
|
|
111
148
|
extra_params: str | None,
|
|
112
149
|
add_html: str | None,
|
|
113
150
|
light_request: str | None,
|
|
151
|
+
tag: str | None,
|
|
152
|
+
date_range: str | None,
|
|
153
|
+
sort_by: str | None,
|
|
154
|
+
min_price: float | None,
|
|
155
|
+
max_price: float | None,
|
|
114
156
|
**kwargs,
|
|
115
157
|
) -> None:
|
|
116
158
|
"""Search Google using the Google Search API."""
|
|
@@ -122,6 +164,7 @@ def google_cmd(
|
|
|
122
164
|
click.echo(str(e), err=True)
|
|
123
165
|
raise SystemExit(1)
|
|
124
166
|
_validate_page(page)
|
|
167
|
+
_validate_price_range(min_price, max_price)
|
|
125
168
|
|
|
126
169
|
if input_file:
|
|
127
170
|
if query:
|
|
@@ -157,6 +200,11 @@ def google_cmd(
|
|
|
157
200
|
extra_params=extra_params,
|
|
158
201
|
add_html=parse_bool(add_html),
|
|
159
202
|
light_request=parse_bool(light_request),
|
|
203
|
+
tag=tag,
|
|
204
|
+
date_range=norm_val(date_range),
|
|
205
|
+
sort_by=norm_val(sort_by),
|
|
206
|
+
min_price=min_price,
|
|
207
|
+
max_price=max_price,
|
|
160
208
|
retries=int(obj.get("retries") or 3),
|
|
161
209
|
backoff=float(obj.get("backoff") or 2.0),
|
|
162
210
|
)
|
|
@@ -199,6 +247,11 @@ def google_cmd(
|
|
|
199
247
|
extra_params=extra_params,
|
|
200
248
|
add_html=parse_bool(add_html),
|
|
201
249
|
light_request=parse_bool(light_request),
|
|
250
|
+
tag=tag,
|
|
251
|
+
date_range=norm_val(date_range),
|
|
252
|
+
sort_by=norm_val(sort_by),
|
|
253
|
+
min_price=min_price,
|
|
254
|
+
max_price=max_price,
|
|
202
255
|
retries=int(obj.get("retries") or 3),
|
|
203
256
|
backoff=float(obj.get("backoff") or 2.0),
|
|
204
257
|
)
|
|
@@ -293,6 +293,12 @@ SCRAPE_PRESETS = (
|
|
|
293
293
|
default=None,
|
|
294
294
|
help="Return target status/body as-is (true/false). No retry on 500.",
|
|
295
295
|
)
|
|
296
|
+
@optgroup.option(
|
|
297
|
+
"--tag",
|
|
298
|
+
type=str,
|
|
299
|
+
default=None,
|
|
300
|
+
help="Optional label included in API response headers.",
|
|
301
|
+
)
|
|
296
302
|
@optgroup.option(
|
|
297
303
|
"-X",
|
|
298
304
|
"--method",
|
|
@@ -351,6 +357,7 @@ def scrape_cmd(
|
|
|
351
357
|
device: str | None,
|
|
352
358
|
custom_google: str | None,
|
|
353
359
|
transparent_status_code: str | None,
|
|
360
|
+
tag: str | None,
|
|
354
361
|
method: str,
|
|
355
362
|
body: str | None,
|
|
356
363
|
escalate_proxy: bool,
|
|
@@ -476,6 +483,7 @@ def scrape_cmd(
|
|
|
476
483
|
device=device,
|
|
477
484
|
custom_google=custom_google,
|
|
478
485
|
transparent_status_code=transparent_status_code,
|
|
486
|
+
tag=tag,
|
|
479
487
|
body=body,
|
|
480
488
|
scraping_config=scraping_config,
|
|
481
489
|
)
|
|
@@ -72,6 +72,12 @@ WALMART_SORT_BY = ["best-match", "price-low", "price-high", "best-seller"]
|
|
|
72
72
|
@optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
|
|
73
73
|
@optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
|
|
74
74
|
@optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
|
|
75
|
+
@optgroup.option(
|
|
76
|
+
"--tag",
|
|
77
|
+
type=str,
|
|
78
|
+
default=None,
|
|
79
|
+
help="Optional label included in API response headers.",
|
|
80
|
+
)
|
|
75
81
|
@_batch_options
|
|
76
82
|
@click.pass_obj
|
|
77
83
|
def walmart_search_cmd(
|
|
@@ -90,6 +96,7 @@ def walmart_search_cmd(
|
|
|
90
96
|
add_html: str | None,
|
|
91
97
|
light_request: str | None,
|
|
92
98
|
screenshot: str | None,
|
|
99
|
+
tag: str | None,
|
|
93
100
|
**kwargs,
|
|
94
101
|
) -> None:
|
|
95
102
|
"""Search Walmart products."""
|
|
@@ -141,6 +148,7 @@ def walmart_search_cmd(
|
|
|
141
148
|
add_html=parse_bool(add_html),
|
|
142
149
|
light_request=parse_bool(light_request),
|
|
143
150
|
screenshot=parse_bool(screenshot),
|
|
151
|
+
tag=tag,
|
|
144
152
|
retries=int(obj.get("retries") or 3),
|
|
145
153
|
backoff=float(obj.get("backoff") or 2.0),
|
|
146
154
|
)
|
|
@@ -187,6 +195,7 @@ def walmart_search_cmd(
|
|
|
187
195
|
add_html=parse_bool(add_html),
|
|
188
196
|
light_request=parse_bool(light_request),
|
|
189
197
|
screenshot=parse_bool(screenshot),
|
|
198
|
+
tag=tag,
|
|
190
199
|
retries=int(obj.get("retries") or 3),
|
|
191
200
|
backoff=float(obj.get("backoff") or 2.0),
|
|
192
201
|
)
|
|
@@ -225,6 +234,12 @@ def walmart_search_cmd(
|
|
|
225
234
|
@optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
|
|
226
235
|
@optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
|
|
227
236
|
@optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
|
|
237
|
+
@optgroup.option(
|
|
238
|
+
"--tag",
|
|
239
|
+
type=str,
|
|
240
|
+
default=None,
|
|
241
|
+
help="Optional label included in API response headers.",
|
|
242
|
+
)
|
|
228
243
|
@_batch_options
|
|
229
244
|
@click.pass_obj
|
|
230
245
|
def walmart_product_cmd(
|
|
@@ -237,6 +252,7 @@ def walmart_product_cmd(
|
|
|
237
252
|
add_html: str | None,
|
|
238
253
|
light_request: str | None,
|
|
239
254
|
screenshot: str | None,
|
|
255
|
+
tag: str | None,
|
|
240
256
|
**kwargs,
|
|
241
257
|
) -> None:
|
|
242
258
|
"""Fetch Walmart product details by product ID."""
|
|
@@ -280,6 +296,7 @@ def walmart_product_cmd(
|
|
|
280
296
|
add_html=parse_bool(add_html),
|
|
281
297
|
light_request=parse_bool(light_request),
|
|
282
298
|
screenshot=parse_bool(screenshot),
|
|
299
|
+
tag=tag,
|
|
283
300
|
retries=int(obj.get("retries") or 3),
|
|
284
301
|
backoff=float(obj.get("backoff") or 2.0),
|
|
285
302
|
)
|
|
@@ -320,6 +337,7 @@ def walmart_product_cmd(
|
|
|
320
337
|
add_html=parse_bool(add_html),
|
|
321
338
|
light_request=parse_bool(light_request),
|
|
322
339
|
screenshot=parse_bool(screenshot),
|
|
340
|
+
tag=tag,
|
|
323
341
|
retries=int(obj.get("retries") or 3),
|
|
324
342
|
backoff=float(obj.get("backoff") or 2.0),
|
|
325
343
|
)
|
|
@@ -155,6 +155,12 @@ YOUTUBE_SORT_BY = ["relevance", "rating", "view-count", "upload-date"]
|
|
|
155
155
|
@optgroup.option("--location", type=str, default=None, help="With location (true/false).")
|
|
156
156
|
@optgroup.option("--vr180", type=str, default=None, help="VR180 only (true/false).")
|
|
157
157
|
@optgroup.option("--purchased", type=str, default=None, help="Purchased only (true/false).")
|
|
158
|
+
@optgroup.option(
|
|
159
|
+
"--tag",
|
|
160
|
+
type=str,
|
|
161
|
+
default=None,
|
|
162
|
+
help="Optional label included in API response headers.",
|
|
163
|
+
)
|
|
158
164
|
@_batch_options
|
|
159
165
|
@click.pass_obj
|
|
160
166
|
def youtube_search_cmd(
|
|
@@ -175,6 +181,7 @@ def youtube_search_cmd(
|
|
|
175
181
|
location: str | None,
|
|
176
182
|
vr180: str | None,
|
|
177
183
|
purchased: str | None,
|
|
184
|
+
tag: str | None,
|
|
178
185
|
**kwargs,
|
|
179
186
|
) -> None:
|
|
180
187
|
"""Search YouTube videos."""
|
|
@@ -227,6 +234,7 @@ def youtube_search_cmd(
|
|
|
227
234
|
location=parse_bool(location),
|
|
228
235
|
vr180=parse_bool(vr180),
|
|
229
236
|
purchased=parse_bool(purchased),
|
|
237
|
+
tag=tag,
|
|
230
238
|
retries=int(obj.get("retries") or 3),
|
|
231
239
|
backoff=float(obj.get("backoff") or 2.0),
|
|
232
240
|
)
|
|
@@ -276,6 +284,7 @@ def youtube_search_cmd(
|
|
|
276
284
|
location=parse_bool(location),
|
|
277
285
|
vr180=parse_bool(vr180),
|
|
278
286
|
purchased=parse_bool(purchased),
|
|
287
|
+
tag=tag,
|
|
279
288
|
retries=int(obj.get("retries") or 3),
|
|
280
289
|
backoff=float(obj.get("backoff") or 2.0),
|
|
281
290
|
)
|
|
@@ -299,11 +308,18 @@ def youtube_search_cmd(
|
|
|
299
308
|
|
|
300
309
|
@click.command("youtube-metadata")
|
|
301
310
|
@click.argument("video_id", required=False)
|
|
311
|
+
@click.option(
|
|
312
|
+
"--tag",
|
|
313
|
+
type=str,
|
|
314
|
+
default=None,
|
|
315
|
+
help="Optional label included in API response headers.",
|
|
316
|
+
)
|
|
302
317
|
@_batch_options
|
|
303
318
|
@click.pass_obj
|
|
304
319
|
def youtube_metadata_cmd(
|
|
305
320
|
obj: dict,
|
|
306
321
|
video_id: str | None,
|
|
322
|
+
tag: str | None,
|
|
307
323
|
**kwargs,
|
|
308
324
|
) -> None:
|
|
309
325
|
"""Fetch YouTube video metadata."""
|
|
@@ -340,6 +356,7 @@ def youtube_metadata_cmd(
|
|
|
340
356
|
async def api_call(client, vid):
|
|
341
357
|
return await client.youtube_metadata(
|
|
342
358
|
_extract_video_id(vid),
|
|
359
|
+
tag=tag,
|
|
343
360
|
retries=int(obj.get("retries") or 3),
|
|
344
361
|
backoff=float(obj.get("backoff") or 2.0),
|
|
345
362
|
)
|
|
@@ -373,6 +390,7 @@ def youtube_metadata_cmd(
|
|
|
373
390
|
async with Client(key, BASE_URL) as client:
|
|
374
391
|
data, headers, status_code = await client.youtube_metadata(
|
|
375
392
|
_extract_video_id(video_id),
|
|
393
|
+
tag=tag,
|
|
376
394
|
retries=int(obj.get("retries") or 3),
|
|
377
395
|
backoff=float(obj.get("backoff") or 2.0),
|
|
378
396
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scrapingbee-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
|
|
5
5
|
Author: ScrapingBee
|
|
6
6
|
License-Expression: MIT
|
|
@@ -97,7 +97,7 @@ scrapingbee [command] [arguments] [options]
|
|
|
97
97
|
| `scrape [url]` | Scrape a URL (HTML, JS, screenshot, extract) |
|
|
98
98
|
| `crawl` | Crawl sites following links, with AI extraction and save-pattern filtering |
|
|
99
99
|
| `google` / `fast-search` | Search SERP APIs |
|
|
100
|
-
| `amazon-product` / `amazon-search` | Amazon product and search |
|
|
100
|
+
| `amazon-product` / `amazon-pricing` / `amazon-search` | Amazon product, pricing and search |
|
|
101
101
|
| `walmart-search` / `walmart-product` | Walmart search and product |
|
|
102
102
|
| `youtube-search` / `youtube-metadata` | YouTube search and video metadata |
|
|
103
103
|
| `chatgpt` | ChatGPT API (`--search true` for web-enhanced responses) |
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.3}/src/scrapingbee_cli.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|