scrapingbee-cli 1.4.1__tar.gz → 1.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scrapingbee_cli-1.4.1/src/scrapingbee_cli.egg-info → scrapingbee_cli-1.4.2}/PKG-INFO +1 -1
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/pyproject.toml +1 -1
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/__init__.py +2 -2
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/cli_utils.py +3 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/client.py +23 -1
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/amazon.py +18 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/chatgpt.py +9 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/crawl.py +11 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/fast_search.py +9 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/google.py +21 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/scrape.py +8 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/walmart.py +18 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/youtube.py +18 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2/src/scrapingbee_cli.egg-info}/PKG-INFO +1 -1
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/LICENSE +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/README.md +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/setup.cfg +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/audit.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/batch.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/cli.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/__init__.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/auth.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/export.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/schedule.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/tutorial.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/unsafe.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/commands/usage.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/config.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/crawl.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/credits.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/exec_gate.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/extract.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/tutorial/__init__.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/tutorial/runner.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli/tutorial/steps.py +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/SOURCES.txt +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/dependency_links.txt +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/entry_points.txt +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/requires.txt +0 -0
- {scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scrapingbee-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.2
|
|
4
4
|
Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
|
|
5
5
|
Author: ScrapingBee
|
|
6
6
|
License-Expression: MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "scrapingbee-cli"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.2"
|
|
8
8
|
description = "Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import platform
|
|
4
4
|
import sys
|
|
5
5
|
|
|
6
|
-
__version__ = "1.4.
|
|
6
|
+
__version__ = "1.4.2"
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def user_agent_headers() -> dict[str, str]:
|
|
@@ -12,7 +12,7 @@ def user_agent_headers() -> dict[str, str]:
|
|
|
12
12
|
Returns a dict of headers:
|
|
13
13
|
User-Agent: ScrapingBee/CLI
|
|
14
14
|
User-Agent-Client: scrapingbee-cli
|
|
15
|
-
User-Agent-Client-Version: 1.4.
|
|
15
|
+
User-Agent-Client-Version: 1.4.2
|
|
16
16
|
User-Agent-Environment: python
|
|
17
17
|
User-Agent-Environment-Version: 3.14.2
|
|
18
18
|
User-Agent-OS: Darwin arm64
|
|
@@ -1304,6 +1304,7 @@ def build_scrape_kwargs(
|
|
|
1304
1304
|
device: str | None = None,
|
|
1305
1305
|
custom_google: str | None = None,
|
|
1306
1306
|
transparent_status_code: str | None = None,
|
|
1307
|
+
tag: str | None = None,
|
|
1307
1308
|
body: str | None = None,
|
|
1308
1309
|
scraping_config: str | None = None,
|
|
1309
1310
|
) -> dict[str, Any]:
|
|
@@ -1344,6 +1345,7 @@ def build_scrape_kwargs(
|
|
|
1344
1345
|
"device": device,
|
|
1345
1346
|
"custom_google": parse_bool(custom_google),
|
|
1346
1347
|
"transparent_status_code": parse_bool(transparent_status_code),
|
|
1348
|
+
"tag": tag,
|
|
1347
1349
|
"body": body,
|
|
1348
1350
|
"scraping_config": scraping_config,
|
|
1349
1351
|
}
|
|
@@ -1564,6 +1566,7 @@ def write_output(
|
|
|
1564
1566
|
("spb-cost", "Credit Cost"),
|
|
1565
1567
|
("spb-resolved-url", "Resolved URL"),
|
|
1566
1568
|
("spb-initial-status-code", "Initial Status Code"),
|
|
1569
|
+
("tag", "Tag"),
|
|
1567
1570
|
]:
|
|
1568
1571
|
if key in headers_lower:
|
|
1569
1572
|
_, val = headers_lower[key]
|
|
@@ -176,6 +176,7 @@ class Client:
|
|
|
176
176
|
device: str | None = None,
|
|
177
177
|
custom_google: bool | None = None,
|
|
178
178
|
transparent_status_code: bool | None = None,
|
|
179
|
+
tag: str | None = None,
|
|
179
180
|
body: str | None = None,
|
|
180
181
|
scraping_config: str | None = None,
|
|
181
182
|
retries: int = 3,
|
|
@@ -218,6 +219,7 @@ class Client:
|
|
|
218
219
|
("device", device),
|
|
219
220
|
("custom_google", self._bool(custom_google)),
|
|
220
221
|
("transparent_status_code", self._bool(transparent_status_code)),
|
|
222
|
+
("tag", tag),
|
|
221
223
|
("scraping_config", scraping_config),
|
|
222
224
|
]:
|
|
223
225
|
if v is not None:
|
|
@@ -290,6 +292,8 @@ class Client:
|
|
|
290
292
|
extra_params: str | None = None,
|
|
291
293
|
add_html: bool | None = None,
|
|
292
294
|
light_request: bool | None = None,
|
|
295
|
+
tag: str | None = None,
|
|
296
|
+
date_range: str | None = None,
|
|
293
297
|
retries: int = 3,
|
|
294
298
|
backoff: float = 2.0,
|
|
295
299
|
) -> tuple[bytes, dict, int]:
|
|
@@ -304,6 +308,8 @@ class Client:
|
|
|
304
308
|
"extra_params": extra_params,
|
|
305
309
|
"add_html": self._bool(add_html),
|
|
306
310
|
"light_request": self._bool(light_request),
|
|
311
|
+
"tag": tag,
|
|
312
|
+
"date_range": date_range,
|
|
307
313
|
}
|
|
308
314
|
return await self._get_with_retry(
|
|
309
315
|
"/google",
|
|
@@ -318,6 +324,7 @@ class Client:
|
|
|
318
324
|
page: int | None = None,
|
|
319
325
|
country_code: str | None = None,
|
|
320
326
|
language: str | None = None,
|
|
327
|
+
tag: str | None = None,
|
|
321
328
|
retries: int = 3,
|
|
322
329
|
backoff: float = 2.0,
|
|
323
330
|
) -> tuple[bytes, dict, int]:
|
|
@@ -326,6 +333,7 @@ class Client:
|
|
|
326
333
|
"page": page if page is not None else None,
|
|
327
334
|
"country_code": country_code,
|
|
328
335
|
"language": language,
|
|
336
|
+
"tag": tag,
|
|
329
337
|
}
|
|
330
338
|
return await self._get_with_retry(
|
|
331
339
|
"/fast_search",
|
|
@@ -346,6 +354,7 @@ class Client:
|
|
|
346
354
|
add_html: bool | None = None,
|
|
347
355
|
light_request: bool | None = None,
|
|
348
356
|
screenshot: bool | None = None,
|
|
357
|
+
tag: str | None = None,
|
|
349
358
|
retries: int = 3,
|
|
350
359
|
backoff: float = 2.0,
|
|
351
360
|
) -> tuple[bytes, dict, int]:
|
|
@@ -360,6 +369,7 @@ class Client:
|
|
|
360
369
|
"add_html": self._bool(add_html),
|
|
361
370
|
"light_request": self._bool(light_request),
|
|
362
371
|
"screenshot": self._bool(screenshot),
|
|
372
|
+
"tag": tag,
|
|
363
373
|
}
|
|
364
374
|
return await self._get_with_retry(
|
|
365
375
|
"/amazon/product",
|
|
@@ -386,6 +396,7 @@ class Client:
|
|
|
386
396
|
add_html: bool | None = None,
|
|
387
397
|
light_request: bool | None = None,
|
|
388
398
|
screenshot: bool | None = None,
|
|
399
|
+
tag: str | None = None,
|
|
389
400
|
retries: int = 3,
|
|
390
401
|
backoff: float = 2.0,
|
|
391
402
|
) -> tuple[bytes, dict, int]:
|
|
@@ -406,6 +417,7 @@ class Client:
|
|
|
406
417
|
"add_html": self._bool(add_html),
|
|
407
418
|
"light_request": self._bool(light_request),
|
|
408
419
|
"screenshot": self._bool(screenshot),
|
|
420
|
+
"tag": tag,
|
|
409
421
|
}
|
|
410
422
|
return await self._get_with_retry(
|
|
411
423
|
"/amazon/search",
|
|
@@ -430,6 +442,7 @@ class Client:
|
|
|
430
442
|
add_html: bool | None = None,
|
|
431
443
|
light_request: bool | None = None,
|
|
432
444
|
screenshot: bool | None = None,
|
|
445
|
+
tag: str | None = None,
|
|
433
446
|
retries: int = 3,
|
|
434
447
|
backoff: float = 2.0,
|
|
435
448
|
) -> tuple[bytes, dict, int]:
|
|
@@ -448,6 +461,7 @@ class Client:
|
|
|
448
461
|
"add_html": self._bool(add_html),
|
|
449
462
|
"light_request": self._bool(light_request),
|
|
450
463
|
"screenshot": self._bool(screenshot),
|
|
464
|
+
"tag": tag,
|
|
451
465
|
}
|
|
452
466
|
return await self._get_with_retry(
|
|
453
467
|
"/walmart/search",
|
|
@@ -466,6 +480,7 @@ class Client:
|
|
|
466
480
|
add_html: bool | None = None,
|
|
467
481
|
light_request: bool | None = None,
|
|
468
482
|
screenshot: bool | None = None,
|
|
483
|
+
tag: str | None = None,
|
|
469
484
|
retries: int = 3,
|
|
470
485
|
backoff: float = 2.0,
|
|
471
486
|
) -> tuple[bytes, dict, int]:
|
|
@@ -478,6 +493,7 @@ class Client:
|
|
|
478
493
|
"add_html": self._bool(add_html),
|
|
479
494
|
"light_request": self._bool(light_request),
|
|
480
495
|
"screenshot": self._bool(screenshot),
|
|
496
|
+
"tag": tag,
|
|
481
497
|
}
|
|
482
498
|
return await self._get_with_retry(
|
|
483
499
|
"/walmart/product",
|
|
@@ -504,6 +520,7 @@ class Client:
|
|
|
504
520
|
location: bool | None = None,
|
|
505
521
|
vr180: bool | None = None,
|
|
506
522
|
purchased: bool | None = None,
|
|
523
|
+
tag: str | None = None,
|
|
507
524
|
retries: int = 3,
|
|
508
525
|
backoff: float = 2.0,
|
|
509
526
|
) -> tuple[bytes, dict, int]:
|
|
@@ -524,6 +541,7 @@ class Client:
|
|
|
524
541
|
"location": self._bool(location),
|
|
525
542
|
"vr180": self._bool(vr180),
|
|
526
543
|
"purchased": self._bool(purchased),
|
|
544
|
+
"tag": tag,
|
|
527
545
|
}
|
|
528
546
|
return await self._get_with_retry(
|
|
529
547
|
"/youtube/search",
|
|
@@ -535,12 +553,13 @@ class Client:
|
|
|
535
553
|
async def youtube_metadata(
|
|
536
554
|
self,
|
|
537
555
|
video_id: str,
|
|
556
|
+
tag: str | None = None,
|
|
538
557
|
retries: int = 3,
|
|
539
558
|
backoff: float = 2.0,
|
|
540
559
|
) -> tuple[bytes, dict, int]:
|
|
541
560
|
return await self._get_with_retry(
|
|
542
561
|
"/youtube/metadata",
|
|
543
|
-
{"video_id": video_id},
|
|
562
|
+
{"video_id": video_id, "tag": tag},
|
|
544
563
|
retries=retries,
|
|
545
564
|
backoff=backoff,
|
|
546
565
|
)
|
|
@@ -551,6 +570,7 @@ class Client:
|
|
|
551
570
|
search: bool | None = None,
|
|
552
571
|
add_html: bool | None = None,
|
|
553
572
|
country_code: str | None = None,
|
|
573
|
+
tag: str | None = None,
|
|
554
574
|
retries: int = 3,
|
|
555
575
|
backoff: float = 2.0,
|
|
556
576
|
) -> tuple[bytes, dict, int]:
|
|
@@ -561,6 +581,8 @@ class Client:
|
|
|
561
581
|
params["add_html"] = str(add_html).lower()
|
|
562
582
|
if country_code is not None:
|
|
563
583
|
params["country_code"] = country_code
|
|
584
|
+
if tag is not None:
|
|
585
|
+
params["tag"] = tag
|
|
564
586
|
return await self._get_with_retry(
|
|
565
587
|
"/chatgpt",
|
|
566
588
|
params,
|
|
@@ -66,6 +66,12 @@ AMAZON_SORT_BY = [
|
|
|
66
66
|
)
|
|
67
67
|
@optgroup.option("--light-request", type=str, default=None, help="Light request mode (true/false).")
|
|
68
68
|
@optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
|
|
69
|
+
@optgroup.option(
|
|
70
|
+
"--tag",
|
|
71
|
+
type=str,
|
|
72
|
+
default=None,
|
|
73
|
+
help="Optional label included in API response headers.",
|
|
74
|
+
)
|
|
69
75
|
@_batch_options
|
|
70
76
|
@click.pass_obj
|
|
71
77
|
def amazon_product_cmd(
|
|
@@ -80,6 +86,7 @@ def amazon_product_cmd(
|
|
|
80
86
|
add_html: str | None,
|
|
81
87
|
light_request: str | None,
|
|
82
88
|
screenshot: str | None,
|
|
89
|
+
tag: str | None,
|
|
83
90
|
**kwargs,
|
|
84
91
|
) -> None:
|
|
85
92
|
"""Fetch Amazon product details by ASIN."""
|
|
@@ -125,6 +132,7 @@ def amazon_product_cmd(
|
|
|
125
132
|
add_html=parse_bool(add_html),
|
|
126
133
|
light_request=parse_bool(light_request),
|
|
127
134
|
screenshot=parse_bool(screenshot),
|
|
135
|
+
tag=tag,
|
|
128
136
|
retries=int(obj.get("retries") or 3),
|
|
129
137
|
backoff=float(obj.get("backoff") or 2.0),
|
|
130
138
|
)
|
|
@@ -167,6 +175,7 @@ def amazon_product_cmd(
|
|
|
167
175
|
add_html=parse_bool(add_html),
|
|
168
176
|
light_request=parse_bool(light_request),
|
|
169
177
|
screenshot=parse_bool(screenshot),
|
|
178
|
+
tag=tag,
|
|
170
179
|
retries=int(obj.get("retries") or 3),
|
|
171
180
|
backoff=float(obj.get("backoff") or 2.0),
|
|
172
181
|
)
|
|
@@ -224,6 +233,12 @@ def amazon_product_cmd(
|
|
|
224
233
|
@optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
|
|
225
234
|
@optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
|
|
226
235
|
@optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
|
|
236
|
+
@optgroup.option(
|
|
237
|
+
"--tag",
|
|
238
|
+
type=str,
|
|
239
|
+
default=None,
|
|
240
|
+
help="Optional label included in API response headers.",
|
|
241
|
+
)
|
|
227
242
|
@_batch_options
|
|
228
243
|
@click.pass_obj
|
|
229
244
|
def amazon_search_cmd(
|
|
@@ -244,6 +259,7 @@ def amazon_search_cmd(
|
|
|
244
259
|
add_html: str | None,
|
|
245
260
|
light_request: str | None,
|
|
246
261
|
screenshot: str | None,
|
|
262
|
+
tag: str | None,
|
|
247
263
|
**kwargs,
|
|
248
264
|
) -> None:
|
|
249
265
|
"""Search Amazon products."""
|
|
@@ -297,6 +313,7 @@ def amazon_search_cmd(
|
|
|
297
313
|
add_html=parse_bool(add_html),
|
|
298
314
|
light_request=parse_bool(light_request),
|
|
299
315
|
screenshot=parse_bool(screenshot),
|
|
316
|
+
tag=tag,
|
|
300
317
|
retries=int(obj.get("retries") or 3),
|
|
301
318
|
backoff=float(obj.get("backoff") or 2.0),
|
|
302
319
|
)
|
|
@@ -345,6 +362,7 @@ def amazon_search_cmd(
|
|
|
345
362
|
add_html=parse_bool(add_html),
|
|
346
363
|
light_request=parse_bool(light_request),
|
|
347
364
|
screenshot=parse_bool(screenshot),
|
|
365
|
+
tag=tag,
|
|
348
366
|
retries=int(obj.get("retries") or 3),
|
|
349
367
|
backoff=float(obj.get("backoff") or 2.0),
|
|
350
368
|
)
|
|
@@ -46,6 +46,12 @@ from ..config import BASE_URL, get_api_key
|
|
|
46
46
|
default=None,
|
|
47
47
|
help="Country code for geolocation (ISO 3166-1).",
|
|
48
48
|
)
|
|
49
|
+
@click.option(
|
|
50
|
+
"--tag",
|
|
51
|
+
type=str,
|
|
52
|
+
default=None,
|
|
53
|
+
help="Optional label included in API response headers.",
|
|
54
|
+
)
|
|
49
55
|
@_batch_options # must be after command-specific options
|
|
50
56
|
@click.pass_obj
|
|
51
57
|
def chatgpt_cmd(
|
|
@@ -54,6 +60,7 @@ def chatgpt_cmd(
|
|
|
54
60
|
search: str | None,
|
|
55
61
|
add_html: str | None,
|
|
56
62
|
country_code: str | None,
|
|
63
|
+
tag: str | None,
|
|
57
64
|
**kwargs,
|
|
58
65
|
) -> None:
|
|
59
66
|
"""Send a prompt to the ChatGPT API."""
|
|
@@ -93,6 +100,7 @@ def chatgpt_cmd(
|
|
|
93
100
|
search=parse_bool(search),
|
|
94
101
|
add_html=parse_bool(add_html),
|
|
95
102
|
country_code=country_code,
|
|
103
|
+
tag=tag,
|
|
96
104
|
retries=int(obj.get("retries") or 3),
|
|
97
105
|
backoff=float(obj.get("backoff") or 2.0),
|
|
98
106
|
)
|
|
@@ -131,6 +139,7 @@ def chatgpt_cmd(
|
|
|
131
139
|
search=parse_bool(search),
|
|
132
140
|
add_html=parse_bool(add_html),
|
|
133
141
|
country_code=country_code,
|
|
142
|
+
tag=tag,
|
|
134
143
|
retries=int(obj.get("retries") or 3),
|
|
135
144
|
backoff=float(obj.get("backoff") or 2.0),
|
|
136
145
|
)
|
|
@@ -60,6 +60,7 @@ def _crawl_build_params(
|
|
|
60
60
|
device: str | None,
|
|
61
61
|
custom_google: str | None,
|
|
62
62
|
transparent_status_code: str | None,
|
|
63
|
+
tag: str | None = None,
|
|
63
64
|
scraping_config: str | None = None,
|
|
64
65
|
) -> dict[str, str]:
|
|
65
66
|
"""Build ScrapingBee API params dict from crawl options (quick-crawl URL mode)."""
|
|
@@ -98,6 +99,7 @@ def _crawl_build_params(
|
|
|
98
99
|
device=device,
|
|
99
100
|
custom_google=custom_google,
|
|
100
101
|
transparent_status_code=transparent_status_code,
|
|
102
|
+
tag=tag,
|
|
101
103
|
body=None,
|
|
102
104
|
scraping_config=scraping_config,
|
|
103
105
|
)
|
|
@@ -248,6 +250,12 @@ def _crawl_build_params(
|
|
|
248
250
|
default=None,
|
|
249
251
|
help="Return target status as-is (true/false).",
|
|
250
252
|
)
|
|
253
|
+
@optgroup.option(
|
|
254
|
+
"--tag",
|
|
255
|
+
type=str,
|
|
256
|
+
default=None,
|
|
257
|
+
help="Optional label included in API response headers.",
|
|
258
|
+
)
|
|
251
259
|
@optgroup.group("Crawl", help="Quick-crawl: depth, pages, output, throttling")
|
|
252
260
|
@optgroup.option(
|
|
253
261
|
"--max-depth",
|
|
@@ -372,6 +380,7 @@ def crawl_cmd(
|
|
|
372
380
|
device: str | None,
|
|
373
381
|
custom_google: str | None,
|
|
374
382
|
transparent_status_code: str | None,
|
|
383
|
+
tag: str | None,
|
|
375
384
|
max_depth: int,
|
|
376
385
|
max_pages: int,
|
|
377
386
|
allowed_domains: str | None,
|
|
@@ -500,6 +509,7 @@ def crawl_cmd(
|
|
|
500
509
|
device=device,
|
|
501
510
|
custom_google=custom_google,
|
|
502
511
|
transparent_status_code=transparent_status_code,
|
|
512
|
+
tag=tag,
|
|
503
513
|
scraping_config=scraping_config,
|
|
504
514
|
)
|
|
505
515
|
except ValueError as e:
|
|
@@ -602,6 +612,7 @@ def crawl_cmd(
|
|
|
602
612
|
"--device": device,
|
|
603
613
|
"--custom-google": custom_google,
|
|
604
614
|
"--transparent-status-code": transparent_status_code,
|
|
615
|
+
"--tag": tag,
|
|
605
616
|
}
|
|
606
617
|
used = [flag for flag, val in api_flags.items() if val is not None]
|
|
607
618
|
if headers:
|
|
@@ -38,6 +38,12 @@ from ..config import BASE_URL, get_api_key
|
|
|
38
38
|
help="Country code for results (ISO 3166-1, e.g. us, fr).",
|
|
39
39
|
)
|
|
40
40
|
@optgroup.option("--language", type=str, default=None, help="Language code (e.g. en, fr).")
|
|
41
|
+
@optgroup.option(
|
|
42
|
+
"--tag",
|
|
43
|
+
type=str,
|
|
44
|
+
default=None,
|
|
45
|
+
help="Optional label included in API response headers.",
|
|
46
|
+
)
|
|
41
47
|
@_batch_options
|
|
42
48
|
@click.pass_obj
|
|
43
49
|
def fast_search_cmd(
|
|
@@ -46,6 +52,7 @@ def fast_search_cmd(
|
|
|
46
52
|
page: int | None,
|
|
47
53
|
country_code: str | None,
|
|
48
54
|
language: str | None,
|
|
55
|
+
tag: str | None,
|
|
49
56
|
**kwargs,
|
|
50
57
|
) -> None:
|
|
51
58
|
"""Search using the Fast Search API (sub-second results)."""
|
|
@@ -86,6 +93,7 @@ def fast_search_cmd(
|
|
|
86
93
|
page=page,
|
|
87
94
|
country_code=country_code,
|
|
88
95
|
language=language,
|
|
96
|
+
tag=tag,
|
|
89
97
|
retries=int(obj.get("retries") or 3),
|
|
90
98
|
backoff=float(obj.get("backoff") or 2.0),
|
|
91
99
|
)
|
|
@@ -122,6 +130,7 @@ def fast_search_cmd(
|
|
|
122
130
|
page=page,
|
|
123
131
|
country_code=country_code,
|
|
124
132
|
language=language,
|
|
133
|
+
tag=tag,
|
|
125
134
|
retries=int(obj.get("retries") or 3),
|
|
126
135
|
backoff=float(obj.get("backoff") or 2.0),
|
|
127
136
|
)
|
|
@@ -83,6 +83,15 @@ def _warn_empty_organic(data: bytes, search_type: str | None) -> None:
|
|
|
83
83
|
default=None,
|
|
84
84
|
help="Language code for results (e.g. en, fr, de). Default: en.",
|
|
85
85
|
)
|
|
86
|
+
@optgroup.option(
|
|
87
|
+
"--date-range",
|
|
88
|
+
type=NormalizedChoice(
|
|
89
|
+
["past-hour", "past-day", "past-week", "past-month", "past-year"],
|
|
90
|
+
case_sensitive=False,
|
|
91
|
+
),
|
|
92
|
+
default=None,
|
|
93
|
+
help="Restrict results to the past hour/day/week/month/year.",
|
|
94
|
+
)
|
|
86
95
|
@optgroup.group("Filters", help="Autocorrection, extra params, and response format")
|
|
87
96
|
@optgroup.option("--nfpr", type=str, default=None, help="Disable autocorrection (true/false).")
|
|
88
97
|
@optgroup.option(
|
|
@@ -97,6 +106,12 @@ def _warn_empty_organic(data: bytes, search_type: str | None) -> None:
|
|
|
97
106
|
default=None,
|
|
98
107
|
help="Light request mode, 10 credits (true/false). Fewer data than regular.",
|
|
99
108
|
)
|
|
109
|
+
@optgroup.option(
|
|
110
|
+
"--tag",
|
|
111
|
+
type=str,
|
|
112
|
+
default=None,
|
|
113
|
+
help="Optional label included in API response headers.",
|
|
114
|
+
)
|
|
100
115
|
@_batch_options
|
|
101
116
|
@click.pass_obj
|
|
102
117
|
def google_cmd(
|
|
@@ -111,6 +126,8 @@ def google_cmd(
|
|
|
111
126
|
extra_params: str | None,
|
|
112
127
|
add_html: str | None,
|
|
113
128
|
light_request: str | None,
|
|
129
|
+
tag: str | None,
|
|
130
|
+
date_range: str | None,
|
|
114
131
|
**kwargs,
|
|
115
132
|
) -> None:
|
|
116
133
|
"""Search Google using the Google Search API."""
|
|
@@ -157,6 +174,8 @@ def google_cmd(
|
|
|
157
174
|
extra_params=extra_params,
|
|
158
175
|
add_html=parse_bool(add_html),
|
|
159
176
|
light_request=parse_bool(light_request),
|
|
177
|
+
tag=tag,
|
|
178
|
+
date_range=norm_val(date_range),
|
|
160
179
|
retries=int(obj.get("retries") or 3),
|
|
161
180
|
backoff=float(obj.get("backoff") or 2.0),
|
|
162
181
|
)
|
|
@@ -199,6 +218,8 @@ def google_cmd(
|
|
|
199
218
|
extra_params=extra_params,
|
|
200
219
|
add_html=parse_bool(add_html),
|
|
201
220
|
light_request=parse_bool(light_request),
|
|
221
|
+
tag=tag,
|
|
222
|
+
date_range=norm_val(date_range),
|
|
202
223
|
retries=int(obj.get("retries") or 3),
|
|
203
224
|
backoff=float(obj.get("backoff") or 2.0),
|
|
204
225
|
)
|
|
@@ -293,6 +293,12 @@ SCRAPE_PRESETS = (
|
|
|
293
293
|
default=None,
|
|
294
294
|
help="Return target status/body as-is (true/false). No retry on 500.",
|
|
295
295
|
)
|
|
296
|
+
@optgroup.option(
|
|
297
|
+
"--tag",
|
|
298
|
+
type=str,
|
|
299
|
+
default=None,
|
|
300
|
+
help="Optional label included in API response headers.",
|
|
301
|
+
)
|
|
296
302
|
@optgroup.option(
|
|
297
303
|
"-X",
|
|
298
304
|
"--method",
|
|
@@ -351,6 +357,7 @@ def scrape_cmd(
|
|
|
351
357
|
device: str | None,
|
|
352
358
|
custom_google: str | None,
|
|
353
359
|
transparent_status_code: str | None,
|
|
360
|
+
tag: str | None,
|
|
354
361
|
method: str,
|
|
355
362
|
body: str | None,
|
|
356
363
|
escalate_proxy: bool,
|
|
@@ -476,6 +483,7 @@ def scrape_cmd(
|
|
|
476
483
|
device=device,
|
|
477
484
|
custom_google=custom_google,
|
|
478
485
|
transparent_status_code=transparent_status_code,
|
|
486
|
+
tag=tag,
|
|
479
487
|
body=body,
|
|
480
488
|
scraping_config=scraping_config,
|
|
481
489
|
)
|
|
@@ -72,6 +72,12 @@ WALMART_SORT_BY = ["best-match", "price-low", "price-high", "best-seller"]
|
|
|
72
72
|
@optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
|
|
73
73
|
@optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
|
|
74
74
|
@optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
|
|
75
|
+
@optgroup.option(
|
|
76
|
+
"--tag",
|
|
77
|
+
type=str,
|
|
78
|
+
default=None,
|
|
79
|
+
help="Optional label included in API response headers.",
|
|
80
|
+
)
|
|
75
81
|
@_batch_options
|
|
76
82
|
@click.pass_obj
|
|
77
83
|
def walmart_search_cmd(
|
|
@@ -90,6 +96,7 @@ def walmart_search_cmd(
|
|
|
90
96
|
add_html: str | None,
|
|
91
97
|
light_request: str | None,
|
|
92
98
|
screenshot: str | None,
|
|
99
|
+
tag: str | None,
|
|
93
100
|
**kwargs,
|
|
94
101
|
) -> None:
|
|
95
102
|
"""Search Walmart products."""
|
|
@@ -141,6 +148,7 @@ def walmart_search_cmd(
|
|
|
141
148
|
add_html=parse_bool(add_html),
|
|
142
149
|
light_request=parse_bool(light_request),
|
|
143
150
|
screenshot=parse_bool(screenshot),
|
|
151
|
+
tag=tag,
|
|
144
152
|
retries=int(obj.get("retries") or 3),
|
|
145
153
|
backoff=float(obj.get("backoff") or 2.0),
|
|
146
154
|
)
|
|
@@ -187,6 +195,7 @@ def walmart_search_cmd(
|
|
|
187
195
|
add_html=parse_bool(add_html),
|
|
188
196
|
light_request=parse_bool(light_request),
|
|
189
197
|
screenshot=parse_bool(screenshot),
|
|
198
|
+
tag=tag,
|
|
190
199
|
retries=int(obj.get("retries") or 3),
|
|
191
200
|
backoff=float(obj.get("backoff") or 2.0),
|
|
192
201
|
)
|
|
@@ -225,6 +234,12 @@ def walmart_search_cmd(
|
|
|
225
234
|
@optgroup.option("--add-html", type=str, default=None, help="Include full HTML (true/false).")
|
|
226
235
|
@optgroup.option("--light-request", type=str, default=None, help="Light request (true/false).")
|
|
227
236
|
@optgroup.option("--screenshot", type=str, default=None, help="Take screenshot (true/false).")
|
|
237
|
+
@optgroup.option(
|
|
238
|
+
"--tag",
|
|
239
|
+
type=str,
|
|
240
|
+
default=None,
|
|
241
|
+
help="Optional label included in API response headers.",
|
|
242
|
+
)
|
|
228
243
|
@_batch_options
|
|
229
244
|
@click.pass_obj
|
|
230
245
|
def walmart_product_cmd(
|
|
@@ -237,6 +252,7 @@ def walmart_product_cmd(
|
|
|
237
252
|
add_html: str | None,
|
|
238
253
|
light_request: str | None,
|
|
239
254
|
screenshot: str | None,
|
|
255
|
+
tag: str | None,
|
|
240
256
|
**kwargs,
|
|
241
257
|
) -> None:
|
|
242
258
|
"""Fetch Walmart product details by product ID."""
|
|
@@ -280,6 +296,7 @@ def walmart_product_cmd(
|
|
|
280
296
|
add_html=parse_bool(add_html),
|
|
281
297
|
light_request=parse_bool(light_request),
|
|
282
298
|
screenshot=parse_bool(screenshot),
|
|
299
|
+
tag=tag,
|
|
283
300
|
retries=int(obj.get("retries") or 3),
|
|
284
301
|
backoff=float(obj.get("backoff") or 2.0),
|
|
285
302
|
)
|
|
@@ -320,6 +337,7 @@ def walmart_product_cmd(
|
|
|
320
337
|
add_html=parse_bool(add_html),
|
|
321
338
|
light_request=parse_bool(light_request),
|
|
322
339
|
screenshot=parse_bool(screenshot),
|
|
340
|
+
tag=tag,
|
|
323
341
|
retries=int(obj.get("retries") or 3),
|
|
324
342
|
backoff=float(obj.get("backoff") or 2.0),
|
|
325
343
|
)
|
|
@@ -155,6 +155,12 @@ YOUTUBE_SORT_BY = ["relevance", "rating", "view-count", "upload-date"]
|
|
|
155
155
|
@optgroup.option("--location", type=str, default=None, help="With location (true/false).")
|
|
156
156
|
@optgroup.option("--vr180", type=str, default=None, help="VR180 only (true/false).")
|
|
157
157
|
@optgroup.option("--purchased", type=str, default=None, help="Purchased only (true/false).")
|
|
158
|
+
@optgroup.option(
|
|
159
|
+
"--tag",
|
|
160
|
+
type=str,
|
|
161
|
+
default=None,
|
|
162
|
+
help="Optional label included in API response headers.",
|
|
163
|
+
)
|
|
158
164
|
@_batch_options
|
|
159
165
|
@click.pass_obj
|
|
160
166
|
def youtube_search_cmd(
|
|
@@ -175,6 +181,7 @@ def youtube_search_cmd(
|
|
|
175
181
|
location: str | None,
|
|
176
182
|
vr180: str | None,
|
|
177
183
|
purchased: str | None,
|
|
184
|
+
tag: str | None,
|
|
178
185
|
**kwargs,
|
|
179
186
|
) -> None:
|
|
180
187
|
"""Search YouTube videos."""
|
|
@@ -227,6 +234,7 @@ def youtube_search_cmd(
|
|
|
227
234
|
location=parse_bool(location),
|
|
228
235
|
vr180=parse_bool(vr180),
|
|
229
236
|
purchased=parse_bool(purchased),
|
|
237
|
+
tag=tag,
|
|
230
238
|
retries=int(obj.get("retries") or 3),
|
|
231
239
|
backoff=float(obj.get("backoff") or 2.0),
|
|
232
240
|
)
|
|
@@ -276,6 +284,7 @@ def youtube_search_cmd(
|
|
|
276
284
|
location=parse_bool(location),
|
|
277
285
|
vr180=parse_bool(vr180),
|
|
278
286
|
purchased=parse_bool(purchased),
|
|
287
|
+
tag=tag,
|
|
279
288
|
retries=int(obj.get("retries") or 3),
|
|
280
289
|
backoff=float(obj.get("backoff") or 2.0),
|
|
281
290
|
)
|
|
@@ -299,11 +308,18 @@ def youtube_search_cmd(
|
|
|
299
308
|
|
|
300
309
|
@click.command("youtube-metadata")
|
|
301
310
|
@click.argument("video_id", required=False)
|
|
311
|
+
@click.option(
|
|
312
|
+
"--tag",
|
|
313
|
+
type=str,
|
|
314
|
+
default=None,
|
|
315
|
+
help="Optional label included in API response headers.",
|
|
316
|
+
)
|
|
302
317
|
@_batch_options
|
|
303
318
|
@click.pass_obj
|
|
304
319
|
def youtube_metadata_cmd(
|
|
305
320
|
obj: dict,
|
|
306
321
|
video_id: str | None,
|
|
322
|
+
tag: str | None,
|
|
307
323
|
**kwargs,
|
|
308
324
|
) -> None:
|
|
309
325
|
"""Fetch YouTube video metadata."""
|
|
@@ -340,6 +356,7 @@ def youtube_metadata_cmd(
|
|
|
340
356
|
async def api_call(client, vid):
|
|
341
357
|
return await client.youtube_metadata(
|
|
342
358
|
_extract_video_id(vid),
|
|
359
|
+
tag=tag,
|
|
343
360
|
retries=int(obj.get("retries") or 3),
|
|
344
361
|
backoff=float(obj.get("backoff") or 2.0),
|
|
345
362
|
)
|
|
@@ -373,6 +390,7 @@ def youtube_metadata_cmd(
|
|
|
373
390
|
async with Client(key, BASE_URL) as client:
|
|
374
391
|
data, headers, status_code = await client.youtube_metadata(
|
|
375
392
|
_extract_video_id(video_id),
|
|
393
|
+
tag=tag,
|
|
376
394
|
retries=int(obj.get("retries") or 3),
|
|
377
395
|
backoff=float(obj.get("backoff") or 2.0),
|
|
378
396
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scrapingbee-cli
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.2
|
|
4
4
|
Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
|
|
5
5
|
Author: ScrapingBee
|
|
6
6
|
License-Expression: MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{scrapingbee_cli-1.4.1 → scrapingbee_cli-1.4.2}/src/scrapingbee_cli.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|