scrapingbee-cli 1.3.1__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {scrapingbee_cli-1.3.1/src/scrapingbee_cli.egg-info → scrapingbee_cli-1.4.0}/PKG-INFO +14 -7
  2. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/README.md +13 -6
  3. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/pyproject.toml +5 -1
  4. scrapingbee_cli-1.4.0/src/scrapingbee_cli/__init__.py +29 -0
  5. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/audit.py +41 -4
  6. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/batch.py +389 -62
  7. scrapingbee_cli-1.4.0/src/scrapingbee_cli/cli.py +199 -0
  8. scrapingbee_cli-1.4.0/src/scrapingbee_cli/cli_utils.py +1606 -0
  9. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/client.py +2 -2
  10. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/__init__.py +2 -0
  11. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/amazon.py +22 -14
  12. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/auth.py +88 -15
  13. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/chatgpt.py +9 -5
  14. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/crawl.py +93 -4
  15. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/export.py +112 -20
  16. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/fast_search.py +11 -7
  17. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/google.py +11 -7
  18. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/schedule.py +36 -4
  19. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/scrape.py +133 -40
  20. scrapingbee_cli-1.4.0/src/scrapingbee_cli/commands/tutorial.py +135 -0
  21. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/unsafe.py +52 -2
  22. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/usage.py +6 -3
  23. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/walmart.py +22 -14
  24. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/youtube.py +22 -14
  25. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/crawl.py +57 -6
  26. scrapingbee_cli-1.4.0/src/scrapingbee_cli/extract.py +482 -0
  27. scrapingbee_cli-1.4.0/src/scrapingbee_cli/tutorial/__init__.py +1 -0
  28. scrapingbee_cli-1.4.0/src/scrapingbee_cli/tutorial/runner.py +925 -0
  29. scrapingbee_cli-1.4.0/src/scrapingbee_cli/tutorial/steps.py +712 -0
  30. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0/src/scrapingbee_cli.egg-info}/PKG-INFO +14 -7
  31. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli.egg-info/SOURCES.txt +6 -1
  32. scrapingbee_cli-1.3.1/src/scrapingbee_cli/__init__.py +0 -16
  33. scrapingbee_cli-1.3.1/src/scrapingbee_cli/cli.py +0 -101
  34. scrapingbee_cli-1.3.1/src/scrapingbee_cli/cli_utils.py +0 -693
  35. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/LICENSE +0 -0
  36. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/setup.cfg +0 -0
  37. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/config.py +0 -0
  38. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/credits.py +0 -0
  39. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/exec_gate.py +0 -0
  40. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli.egg-info/dependency_links.txt +0 -0
  41. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli.egg-info/entry_points.txt +0 -0
  42. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli.egg-info/requires.txt +0 -0
  43. {scrapingbee_cli-1.3.1 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrapingbee-cli
3
- Version: 1.3.1
3
+ Version: 1.4.0
4
4
  Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
5
5
  Author: ScrapingBee
6
6
  License-Expression: MIT
@@ -81,7 +81,7 @@ scrapingbee [command] [arguments] [options]
81
81
  - **`scrapingbee --help`** – List all commands.
82
82
  - **`scrapingbee [command] --help`** – Options and parameters for that command.
83
83
 
84
- **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, `--scraping-config`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
84
+ **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--overwrite`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--smart-extract`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, `--scraping-config`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
85
85
 
86
86
  **Parameter values:** Choice parameters accept both hyphens and underscores interchangeably (e.g. `--sort-by price-low` and `--sort-by price_low` both work).
87
87
 
@@ -101,8 +101,9 @@ scrapingbee [command] [arguments] [options]
101
101
  | `chatgpt` | ChatGPT API (`--search true` for web-enhanced responses) |
102
102
  | `export` | Merge batch/crawl output to ndjson, txt, or csv (with --flatten, --columns) |
103
103
  | `schedule` | Schedule commands via cron (--name, --list, --stop) |
104
+ | `tutorial` | Interactive step-by-step guide to CLI features (`--chapter N`, `--reset`, `--list`, `--output-dir`) |
104
105
 
105
- **Batch mode:** Commands that take a single input support `--input-file` (one line per input, or `.csv` with `--input-column`) and `--output-dir`. Use `--output-format` to choose between `files` (default), `csv`, or `ndjson` streaming. Add `--deduplicate` to remove duplicate URLs, `--sample N` to test on a subset, or `--post-process 'jq .title'` to transform each result. Use `--resume` to skip already-completed items after interruption.
106
+ **Batch mode:** Commands that take a single input support `--input-file` (one line per input, or `.csv` with `--input-column`) and `--output-dir`. Use `--output-format csv` or `--output-format ndjson` to stream all results to a single file (or stdout) instead of individual files. Add `--deduplicate` to remove duplicate URLs, `--sample N` to test on a subset, or `--post-process 'jq .title'` to transform each result. Use `--resume` to skip already-completed items after interruption. Run bare `scrapingbee --resume` to discover incomplete batches in the current directory.
106
107
 
107
108
  **Parameters and options:** Use space-separated values (e.g. `--render-js false`), not `--option=value`. For full parameter lists, response formats, and credit costs, see **`scrapingbee [command] --help`** and the [ScrapingBee API documentation](https://www.scrapingbee.com/documentation/).
108
109
 
@@ -110,16 +111,17 @@ scrapingbee [command] [arguments] [options]
110
111
 
111
112
  - **AI extraction:** `--ai-extract-rules '{"price": "product price", "title": "product name"}'` pulls structured data from any page using natural language — no CSS selectors needed. Works with `scrape`, `crawl`, and batch mode.
112
113
  - **CSS/XPath extraction:** `--extract-rules '{"title": "h1", "price": ".price"}'` for consistent, cheaper production scraping. Find selectors in browser DevTools.
113
- - **Pipelines:** Chain commands with `--extract-field` — e.g. `google QUERY --extract-field organic_results.url > urls.txt` then `scrape --input-file urls.txt`.
114
+ - **Pipelines:** Chain commands with `--extract-field` — e.g. `google QUERY --extract-field organic_results.url > urls.txt` then `scrape --input-file urls.txt`. Use `--fields` to filter JSON output keys; supports dot notation (e.g. `--fields product.title,product.price`).
115
+ - **Smart Extract:** `--smart-extract` extracts data from any format (JSON, HTML, XML, CSV, Markdown) using a path expression. Auto-detects format. Supports slicing, regex filtering, and JSON schema output.
114
116
  - **Update CSV:** `--update-csv` fetches fresh data and updates the input CSV in-place. Ideal for daily price tracking, inventory monitoring, or any dataset that needs periodic refresh.
115
117
  - **Crawl with filtering:** `--include-pattern`, `--exclude-pattern` control which links to follow. `--save-pattern` only saves pages matching a regex (others are visited for link discovery but not saved).
116
- - **Output formats:** `--output-format ndjson` streams results as JSON lines; `--output-format csv` writes a single CSV. Default `files` writes individual files.
118
+ - **Output formats:** `--output-format` accepts `ndjson` (streams results as JSON lines) or `csv` (writes a single CSV) — these are the only valid values. Default (no flag) writes individual files per item into `--output-dir`.
117
119
  - **CSV input:** `--input-file products.csv --input-column url` reads URLs from a CSV column.
118
120
  - **Export:** `scrapingbee export --input-dir batch/ --format csv --flatten --columns "title,price"` merges batch output with nested JSON flattening and column selection.
119
121
  - **Scheduling:** `scrapingbee schedule --every 1d --name prices scrape --input-file products.csv --update-csv` registers a cron job. Use `--list`, `--stop NAME`, or `--stop all`.
120
122
  - **Deduplication & sampling:** `--deduplicate` removes duplicate URLs; `--sample 100` processes only 100 random items.
121
123
  - **RAG chunking:** `scrape --chunk-size 500 --chunk-overlap 50 --return-page-markdown true` outputs NDJSON chunks ready for vector DB ingestion.
122
- - **Scraping configurations:** `--scraping-config "My-Config"` applies a pre-saved configuration from your ScrapingBee dashboard. Inline options override config settings. Create configurations in the [request builder](https://app.scrapingbee.com/).
124
+ - **Scraping configurations:** `--scraping-config "My-Config"` applies a pre-saved configuration from your ScrapingBee dashboard. Inline options override config settings. Create configurations in the [request builder](https://app.scrapingbee.com/). Running `scrapingbee --scraping-config NAME` (without a subcommand) auto-routes to `scrape`.
123
125
 
124
126
  ### Examples
125
127
 
@@ -134,6 +136,11 @@ scrapingbee export --input-dir products --format csv --flatten --columns "name,p
134
136
  scrapingbee scrape --input-file products.csv --input-column url --update-csv --ai-extract-rules '{"price": "current price"}'
135
137
  scrapingbee schedule --every 1d --name price-tracker scrape --input-file products.csv --input-column url --update-csv --ai-extract-rules '{"price": "price"}'
136
138
  scrapingbee schedule --list
139
+
140
+ # Smart Extract — pull fields from any format with a path expression
141
+ scrapingbee google "pizza new york" --smart-extract 'organic_results[0:3].title'
142
+ scrapingbee scrape "https://example.com" --smart-extract '...a[href=/mailto/].text'
143
+ scrapingbee scrape "https://example.com" --smart-extract '{"titles": "...h1", "links": "...href[0:5]"}'
137
144
  ```
138
145
 
139
146
  ## Security
@@ -149,7 +156,7 @@ For advanced features setup, see the Security section in our [CLI documentation]
149
156
  - **[CLI Documentation](https://www.scrapingbee.com/documentation/cli/)** – Full CLI reference with pipelines, parameters, and examples.
150
157
  - **[Advanced usage examples](docs/advanced-usage.md)** – Shell piping, command chaining, batch workflows, monitoring scripts, NDJSON streaming, screenshots, Google search patterns, LLM chunking, and more.
151
158
  - **[ScrapingBee API documentation](https://www.scrapingbee.com/documentation/)** – Parameters, response formats, credit costs, and best practices.
152
- - **Claude / AI agents:** This repo includes a [Claude Skill](https://github.com/ScrapingBee/scrapingbee-cli/tree/main/skills/scrapingbee-cli) and [Claude Plugin](.claude-plugin/) for agent use with file-based output and security rules.
159
+ - **Claude / AI agents:** This repo includes a [Claude Skill](https://github.com/ScrapingBee/scrapingbee-cli/tree/main/plugins/scrapingbee-cli/skills/scrapingbee-cli) and [Claude Plugin](plugins/scrapingbee-cli/.claude-plugin/) for agent use with file-based output and security rules.
153
160
 
154
161
  ## Testing
155
162
 
@@ -44,7 +44,7 @@ scrapingbee [command] [arguments] [options]
44
44
  - **`scrapingbee --help`** – List all commands.
45
45
  - **`scrapingbee [command] --help`** – Options and parameters for that command.
46
46
 
47
- **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, `--scraping-config`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
47
+ **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--overwrite`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--smart-extract`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, `--scraping-config`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
48
48
 
49
49
  **Parameter values:** Choice parameters accept both hyphens and underscores interchangeably (e.g. `--sort-by price-low` and `--sort-by price_low` both work).
50
50
 
@@ -64,8 +64,9 @@ scrapingbee [command] [arguments] [options]
64
64
  | `chatgpt` | ChatGPT API (`--search true` for web-enhanced responses) |
65
65
  | `export` | Merge batch/crawl output to ndjson, txt, or csv (with --flatten, --columns) |
66
66
  | `schedule` | Schedule commands via cron (--name, --list, --stop) |
67
+ | `tutorial` | Interactive step-by-step guide to CLI features (`--chapter N`, `--reset`, `--list`, `--output-dir`) |
67
68
 
68
- **Batch mode:** Commands that take a single input support `--input-file` (one line per input, or `.csv` with `--input-column`) and `--output-dir`. Use `--output-format` to choose between `files` (default), `csv`, or `ndjson` streaming. Add `--deduplicate` to remove duplicate URLs, `--sample N` to test on a subset, or `--post-process 'jq .title'` to transform each result. Use `--resume` to skip already-completed items after interruption.
69
+ **Batch mode:** Commands that take a single input support `--input-file` (one line per input, or `.csv` with `--input-column`) and `--output-dir`. Use `--output-format csv` or `--output-format ndjson` to stream all results to a single file (or stdout) instead of individual files. Add `--deduplicate` to remove duplicate URLs, `--sample N` to test on a subset, or `--post-process 'jq .title'` to transform each result. Use `--resume` to skip already-completed items after interruption. Run bare `scrapingbee --resume` to discover incomplete batches in the current directory.
69
70
 
70
71
  **Parameters and options:** Use space-separated values (e.g. `--render-js false`), not `--option=value`. For full parameter lists, response formats, and credit costs, see **`scrapingbee [command] --help`** and the [ScrapingBee API documentation](https://www.scrapingbee.com/documentation/).
71
72
 
@@ -73,16 +74,17 @@ scrapingbee [command] [arguments] [options]
73
74
 
74
75
  - **AI extraction:** `--ai-extract-rules '{"price": "product price", "title": "product name"}'` pulls structured data from any page using natural language — no CSS selectors needed. Works with `scrape`, `crawl`, and batch mode.
75
76
  - **CSS/XPath extraction:** `--extract-rules '{"title": "h1", "price": ".price"}'` for consistent, cheaper production scraping. Find selectors in browser DevTools.
76
- - **Pipelines:** Chain commands with `--extract-field` — e.g. `google QUERY --extract-field organic_results.url > urls.txt` then `scrape --input-file urls.txt`.
77
+ - **Pipelines:** Chain commands with `--extract-field` — e.g. `google QUERY --extract-field organic_results.url > urls.txt` then `scrape --input-file urls.txt`. Use `--fields` to filter JSON output keys; supports dot notation (e.g. `--fields product.title,product.price`).
78
+ - **Smart Extract:** `--smart-extract` extracts data from any format (JSON, HTML, XML, CSV, Markdown) using a path expression. Auto-detects format. Supports slicing, regex filtering, and JSON schema output.
77
79
  - **Update CSV:** `--update-csv` fetches fresh data and updates the input CSV in-place. Ideal for daily price tracking, inventory monitoring, or any dataset that needs periodic refresh.
78
80
  - **Crawl with filtering:** `--include-pattern`, `--exclude-pattern` control which links to follow. `--save-pattern` only saves pages matching a regex (others are visited for link discovery but not saved).
79
- - **Output formats:** `--output-format ndjson` streams results as JSON lines; `--output-format csv` writes a single CSV. Default `files` writes individual files.
81
+ - **Output formats:** `--output-format` accepts `ndjson` (streams results as JSON lines) or `csv` (writes a single CSV) — these are the only valid values. Default (no flag) writes individual files per item into `--output-dir`.
80
82
  - **CSV input:** `--input-file products.csv --input-column url` reads URLs from a CSV column.
81
83
  - **Export:** `scrapingbee export --input-dir batch/ --format csv --flatten --columns "title,price"` merges batch output with nested JSON flattening and column selection.
82
84
  - **Scheduling:** `scrapingbee schedule --every 1d --name prices scrape --input-file products.csv --update-csv` registers a cron job. Use `--list`, `--stop NAME`, or `--stop all`.
83
85
  - **Deduplication & sampling:** `--deduplicate` removes duplicate URLs; `--sample 100` processes only 100 random items.
84
86
  - **RAG chunking:** `scrape --chunk-size 500 --chunk-overlap 50 --return-page-markdown true` outputs NDJSON chunks ready for vector DB ingestion.
85
- - **Scraping configurations:** `--scraping-config "My-Config"` applies a pre-saved configuration from your ScrapingBee dashboard. Inline options override config settings. Create configurations in the [request builder](https://app.scrapingbee.com/).
87
+ - **Scraping configurations:** `--scraping-config "My-Config"` applies a pre-saved configuration from your ScrapingBee dashboard. Inline options override config settings. Create configurations in the [request builder](https://app.scrapingbee.com/). Running `scrapingbee --scraping-config NAME` (without a subcommand) auto-routes to `scrape`.
86
88
 
87
89
  ### Examples
88
90
 
@@ -97,6 +99,11 @@ scrapingbee export --input-dir products --format csv --flatten --columns "name,p
97
99
  scrapingbee scrape --input-file products.csv --input-column url --update-csv --ai-extract-rules '{"price": "current price"}'
98
100
  scrapingbee schedule --every 1d --name price-tracker scrape --input-file products.csv --input-column url --update-csv --ai-extract-rules '{"price": "price"}'
99
101
  scrapingbee schedule --list
102
+
103
+ # Smart Extract — pull fields from any format with a path expression
104
+ scrapingbee google "pizza new york" --smart-extract 'organic_results[0:3].title'
105
+ scrapingbee scrape "https://example.com" --smart-extract '...a[href=/mailto/].text'
106
+ scrapingbee scrape "https://example.com" --smart-extract '{"titles": "...h1", "links": "...href[0:5]"}'
100
107
  ```
101
108
 
102
109
  ## Security
@@ -112,7 +119,7 @@ For advanced features setup, see the Security section in our [CLI documentation]
112
119
  - **[CLI Documentation](https://www.scrapingbee.com/documentation/cli/)** – Full CLI reference with pipelines, parameters, and examples.
113
120
  - **[Advanced usage examples](docs/advanced-usage.md)** – Shell piping, command chaining, batch workflows, monitoring scripts, NDJSON streaming, screenshots, Google search patterns, LLM chunking, and more.
114
121
  - **[ScrapingBee API documentation](https://www.scrapingbee.com/documentation/)** – Parameters, response formats, credit costs, and best practices.
115
- - **Claude / AI agents:** This repo includes a [Claude Skill](https://github.com/ScrapingBee/scrapingbee-cli/tree/main/skills/scrapingbee-cli) and [Claude Plugin](.claude-plugin/) for agent use with file-based output and security rules.
122
+ - **Claude / AI agents:** This repo includes a [Claude Skill](https://github.com/ScrapingBee/scrapingbee-cli/tree/main/plugins/scrapingbee-cli/skills/scrapingbee-cli) and [Claude Plugin](plugins/scrapingbee-cli/.claude-plugin/) for agent use with file-based output and security rules.
116
123
 
117
124
  ## Testing
118
125
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scrapingbee-cli"
7
- version = "1.3.1"
7
+ version = "1.4.0"
8
8
  description = "Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -90,3 +90,7 @@ markers = [
90
90
  "integration: marks tests that call the live API (deselect with '-m \"not integration\"')",
91
91
  ]
92
92
  addopts = "-v --tb=short"
93
+ filterwarnings = [
94
+ "ignore::RuntimeWarning:cssselect",
95
+ "ignore:coroutine.*was never awaited:RuntimeWarning",
96
+ ]
@@ -0,0 +1,29 @@
1
+ """ScrapingBee CLI - Command-line client for the ScrapingBee API."""
2
+
3
+ import platform
4
+ import sys
5
+
6
+ __version__ = "1.4.0"
7
+
8
+
9
+ def user_agent_headers() -> dict[str, str]:
10
+ """Build structured User-Agent headers for API requests.
11
+
12
+ Returns a dict of headers:
13
+ User-Agent: ScrapingBee/CLI
14
+ User-Agent-Client: scrapingbee-cli
15
+ User-Agent-Client-Version: 1.4.0
16
+ User-Agent-Environment: python
17
+ User-Agent-Environment-Version: 3.14.2
18
+ User-Agent-OS: Darwin arm64
19
+ """
20
+ py = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
21
+ os_info = f"{platform.system()} {platform.machine()}"
22
+ return {
23
+ "User-Agent": "ScrapingBee/CLI",
24
+ "User-Agent-Client": "scrapingbee-cli",
25
+ "User-Agent-Client-Version": __version__,
26
+ "User-Agent-Environment": "python",
27
+ "User-Agent-Environment-Version": py,
28
+ "User-Agent-OS": os_info,
29
+ }
@@ -35,18 +35,55 @@ def log_exec(
35
35
  pass
36
36
 
37
37
 
38
- def read_audit_log(n: int = 50) -> str:
39
- """Read the last N lines of the audit log."""
38
+ def _parse_timestamp(line: str) -> datetime | None:
39
+ """Extract the ISO timestamp from the start of an audit log line."""
40
+ parts = line.split(" | ", 1)
41
+ if not parts:
42
+ return None
43
+ try:
44
+ return datetime.fromisoformat(parts[0].strip())
45
+ except (ValueError, IndexError):
46
+ return None
47
+
48
+
49
+ def read_audit_log(
50
+ n: int = 50,
51
+ since: datetime | None = None,
52
+ until: datetime | None = None,
53
+ ) -> str:
54
+ """Read audit log entries.
55
+
56
+ Args:
57
+ n: Maximum number of lines to return (from the end). Ignored if since/until is set.
58
+ since: Only return entries at or after this time.
59
+ until: Only return entries at or before this time.
60
+ """
40
61
  if not AUDIT_LOG_PATH.is_file():
41
62
  return "No audit log found."
42
63
  try:
43
64
  with open(AUDIT_LOG_PATH, encoding="utf-8") as f:
44
65
  lines = f.readlines()
45
- recent = lines[-n:] if len(lines) > n else lines
46
- return "".join(recent)
47
66
  except OSError:
48
67
  return "Could not read audit log."
49
68
 
69
+ if since or until:
70
+ filtered = []
71
+ for line in lines:
72
+ ts = _parse_timestamp(line)
73
+ if ts is None:
74
+ continue
75
+ if since and ts < since:
76
+ continue
77
+ if until and ts > until:
78
+ continue
79
+ filtered.append(line)
80
+ if not filtered:
81
+ return "No entries found in the specified time range."
82
+ return "".join(filtered)
83
+
84
+ recent = lines[-n:] if len(lines) > n else lines
85
+ return "".join(recent)
86
+
50
87
 
51
88
  def _rotate_if_needed() -> None:
52
89
  """Keep only the last MAX_LINES entries."""