scrapingbee-cli 1.3.0__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {scrapingbee_cli-1.3.0/src/scrapingbee_cli.egg-info → scrapingbee_cli-1.4.0}/PKG-INFO +16 -6
  2. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/README.md +15 -5
  3. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/pyproject.toml +5 -1
  4. scrapingbee_cli-1.4.0/src/scrapingbee_cli/__init__.py +29 -0
  5. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/audit.py +41 -4
  6. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/batch.py +389 -62
  7. scrapingbee_cli-1.4.0/src/scrapingbee_cli/cli.py +199 -0
  8. scrapingbee_cli-1.4.0/src/scrapingbee_cli/cli_utils.py +1606 -0
  9. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/client.py +10 -2
  10. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/__init__.py +2 -0
  11. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/amazon.py +24 -15
  12. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/auth.py +88 -15
  13. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/chatgpt.py +9 -5
  14. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/crawl.py +103 -4
  15. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/export.py +112 -20
  16. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/fast_search.py +11 -7
  17. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/google.py +13 -8
  18. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/schedule.py +36 -4
  19. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/scrape.py +140 -39
  20. scrapingbee_cli-1.4.0/src/scrapingbee_cli/commands/tutorial.py +135 -0
  21. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/unsafe.py +52 -2
  22. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/usage.py +6 -3
  23. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/walmart.py +41 -17
  24. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/commands/youtube.py +31 -18
  25. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/crawl.py +57 -6
  26. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/exec_gate.py +50 -5
  27. scrapingbee_cli-1.4.0/src/scrapingbee_cli/extract.py +482 -0
  28. scrapingbee_cli-1.4.0/src/scrapingbee_cli/tutorial/__init__.py +1 -0
  29. scrapingbee_cli-1.4.0/src/scrapingbee_cli/tutorial/runner.py +925 -0
  30. scrapingbee_cli-1.4.0/src/scrapingbee_cli/tutorial/steps.py +712 -0
  31. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0/src/scrapingbee_cli.egg-info}/PKG-INFO +16 -6
  32. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli.egg-info/SOURCES.txt +6 -1
  33. scrapingbee_cli-1.3.0/src/scrapingbee_cli/__init__.py +0 -16
  34. scrapingbee_cli-1.3.0/src/scrapingbee_cli/cli.py +0 -101
  35. scrapingbee_cli-1.3.0/src/scrapingbee_cli/cli_utils.py +0 -674
  36. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/LICENSE +0 -0
  37. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/setup.cfg +0 -0
  38. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/config.py +0 -0
  39. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli/credits.py +0 -0
  40. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli.egg-info/dependency_links.txt +0 -0
  41. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli.egg-info/entry_points.txt +0 -0
  42. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli.egg-info/requires.txt +0 -0
  43. {scrapingbee_cli-1.3.0 → scrapingbee_cli-1.4.0}/src/scrapingbee_cli.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrapingbee-cli
3
- Version: 1.3.0
3
+ Version: 1.4.0
4
4
  Summary: Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal.
5
5
  Author: ScrapingBee
6
6
  License-Expression: MIT
@@ -81,7 +81,9 @@ scrapingbee [command] [arguments] [options]
81
81
  - **`scrapingbee --help`** – List all commands.
82
82
  - **`scrapingbee [command] --help`** – Options and parameters for that command.
83
83
 
84
- **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
84
+ **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--overwrite`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--smart-extract`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, `--scraping-config`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
85
+
86
+ **Parameter values:** Choice parameters accept both hyphens and underscores interchangeably (e.g. `--sort-by price-low` and `--sort-by price_low` both work).
85
87
 
86
88
  ### Commands
87
89
 
@@ -99,8 +101,9 @@ scrapingbee [command] [arguments] [options]
99
101
  | `chatgpt` | ChatGPT API (`--search true` for web-enhanced responses) |
100
102
  | `export` | Merge batch/crawl output to ndjson, txt, or csv (with --flatten, --columns) |
101
103
  | `schedule` | Schedule commands via cron (--name, --list, --stop) |
104
+ | `tutorial` | Interactive step-by-step guide to CLI features (`--chapter N`, `--reset`, `--list`, `--output-dir`) |
102
105
 
103
- **Batch mode:** Commands that take a single input support `--input-file` (one line per input, or `.csv` with `--input-column`) and `--output-dir`. Use `--output-format` to choose between `files` (default), `csv`, or `ndjson` streaming. Add `--deduplicate` to remove duplicate URLs, `--sample N` to test on a subset, or `--post-process 'jq .title'` to transform each result. Use `--resume` to skip already-completed items after interruption.
106
+ **Batch mode:** Commands that take a single input support `--input-file` (one line per input, or `.csv` with `--input-column`) and `--output-dir`. Use `--output-format csv` or `--output-format ndjson` to stream all results to a single file (or stdout) instead of individual files. Add `--deduplicate` to remove duplicate URLs, `--sample N` to test on a subset, or `--post-process 'jq .title'` to transform each result. Use `--resume` to skip already-completed items after interruption. Run bare `scrapingbee --resume` to discover incomplete batches in the current directory.
104
107
 
105
108
  **Parameters and options:** Use space-separated values (e.g. `--render-js false`), not `--option=value`. For full parameter lists, response formats, and credit costs, see **`scrapingbee [command] --help`** and the [ScrapingBee API documentation](https://www.scrapingbee.com/documentation/).
106
109
 
@@ -108,15 +111,17 @@ scrapingbee [command] [arguments] [options]
108
111
 
109
112
  - **AI extraction:** `--ai-extract-rules '{"price": "product price", "title": "product name"}'` pulls structured data from any page using natural language — no CSS selectors needed. Works with `scrape`, `crawl`, and batch mode.
110
113
  - **CSS/XPath extraction:** `--extract-rules '{"title": "h1", "price": ".price"}'` for consistent, cheaper production scraping. Find selectors in browser DevTools.
111
- - **Pipelines:** Chain commands with `--extract-field` — e.g. `google QUERY --extract-field organic_results.url > urls.txt` then `scrape --input-file urls.txt`.
114
+ - **Pipelines:** Chain commands with `--extract-field` — e.g. `google QUERY --extract-field organic_results.url > urls.txt` then `scrape --input-file urls.txt`. Use `--fields` to filter JSON output keys; supports dot notation (e.g. `--fields product.title,product.price`).
115
+ - **Smart Extract:** `--smart-extract` extracts data from any format (JSON, HTML, XML, CSV, Markdown) using a path expression. Auto-detects format. Supports slicing, regex filtering, and JSON schema output.
112
116
  - **Update CSV:** `--update-csv` fetches fresh data and updates the input CSV in-place. Ideal for daily price tracking, inventory monitoring, or any dataset that needs periodic refresh.
113
117
  - **Crawl with filtering:** `--include-pattern`, `--exclude-pattern` control which links to follow. `--save-pattern` only saves pages matching a regex (others are visited for link discovery but not saved).
114
- - **Output formats:** `--output-format ndjson` streams results as JSON lines; `--output-format csv` writes a single CSV. Default `files` writes individual files.
118
+ - **Output formats:** `--output-format` accepts `ndjson` (streams results as JSON lines) or `csv` (writes a single CSV) — these are the only valid values. Default (no flag) writes individual files per item into `--output-dir`.
115
119
  - **CSV input:** `--input-file products.csv --input-column url` reads URLs from a CSV column.
116
120
  - **Export:** `scrapingbee export --input-dir batch/ --format csv --flatten --columns "title,price"` merges batch output with nested JSON flattening and column selection.
117
121
  - **Scheduling:** `scrapingbee schedule --every 1d --name prices scrape --input-file products.csv --update-csv` registers a cron job. Use `--list`, `--stop NAME`, or `--stop all`.
118
122
  - **Deduplication & sampling:** `--deduplicate` removes duplicate URLs; `--sample 100` processes only 100 random items.
119
123
  - **RAG chunking:** `scrape --chunk-size 500 --chunk-overlap 50 --return-page-markdown true` outputs NDJSON chunks ready for vector DB ingestion.
124
+ - **Scraping configurations:** `--scraping-config "My-Config"` applies a pre-saved configuration from your ScrapingBee dashboard. Inline options override config settings. Create configurations in the [request builder](https://app.scrapingbee.com/). Running `scrapingbee --scraping-config NAME` (without a subcommand) auto-routes to `scrape`.
120
125
 
121
126
  ### Examples
122
127
 
@@ -131,6 +136,11 @@ scrapingbee export --input-dir products --format csv --flatten --columns "name,p
131
136
  scrapingbee scrape --input-file products.csv --input-column url --update-csv --ai-extract-rules '{"price": "current price"}'
132
137
  scrapingbee schedule --every 1d --name price-tracker scrape --input-file products.csv --input-column url --update-csv --ai-extract-rules '{"price": "price"}'
133
138
  scrapingbee schedule --list
139
+
140
+ # Smart Extract — pull fields from any format with a path expression
141
+ scrapingbee google "pizza new york" --smart-extract 'organic_results[0:3].title'
142
+ scrapingbee scrape "https://example.com" --smart-extract '...a[href=/mailto/].text'
143
+ scrapingbee scrape "https://example.com" --smart-extract '{"titles": "...h1", "links": "...href[0:5]"}'
134
144
  ```
135
145
 
136
146
  ## Security
@@ -146,7 +156,7 @@ For advanced features setup, see the Security section in our [CLI documentation]
146
156
  - **[CLI Documentation](https://www.scrapingbee.com/documentation/cli/)** – Full CLI reference with pipelines, parameters, and examples.
147
157
  - **[Advanced usage examples](docs/advanced-usage.md)** – Shell piping, command chaining, batch workflows, monitoring scripts, NDJSON streaming, screenshots, Google search patterns, LLM chunking, and more.
148
158
  - **[ScrapingBee API documentation](https://www.scrapingbee.com/documentation/)** – Parameters, response formats, credit costs, and best practices.
149
- - **Claude / AI agents:** This repo includes a [Claude Skill](https://github.com/ScrapingBee/scrapingbee-cli/tree/main/skills/scrapingbee-cli) and [Claude Plugin](.claude-plugin/) for agent use with file-based output and security rules.
159
+ - **Claude / AI agents:** This repo includes a [Claude Skill](https://github.com/ScrapingBee/scrapingbee-cli/tree/main/plugins/scrapingbee-cli/skills/scrapingbee-cli) and [Claude Plugin](plugins/scrapingbee-cli/.claude-plugin/) for agent use with file-based output and security rules.
150
160
 
151
161
  ## Testing
152
162
 
@@ -44,7 +44,9 @@ scrapingbee [command] [arguments] [options]
44
44
  - **`scrapingbee --help`** – List all commands.
45
45
  - **`scrapingbee [command] --help`** – Options and parameters for that command.
46
46
 
47
- **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
47
+ **Options are per-command.** Each command has its own set of options — run `scrapingbee [command] --help` to see them. Common options across batch-capable commands include `--output-file`, `--output-dir`, `--input-file`, `--input-column`, `--concurrency`, `--output-format`, `--overwrite`, `--retries`, `--backoff`, `--resume`, `--update-csv`, `--no-progress`, `--extract-field`, `--fields`, `--smart-extract`, `--deduplicate`, `--sample`, `--post-process`, `--on-complete`, `--scraping-config`, and `--verbose`. For details, see the [documentation](https://www.scrapingbee.com/documentation/).
48
+
49
+ **Parameter values:** Choice parameters accept both hyphens and underscores interchangeably (e.g. `--sort-by price-low` and `--sort-by price_low` both work).
48
50
 
49
51
  ### Commands
50
52
 
@@ -62,8 +64,9 @@ scrapingbee [command] [arguments] [options]
62
64
  | `chatgpt` | ChatGPT API (`--search true` for web-enhanced responses) |
63
65
  | `export` | Merge batch/crawl output to ndjson, txt, or csv (with --flatten, --columns) |
64
66
  | `schedule` | Schedule commands via cron (--name, --list, --stop) |
67
+ | `tutorial` | Interactive step-by-step guide to CLI features (`--chapter N`, `--reset`, `--list`, `--output-dir`) |
65
68
 
66
- **Batch mode:** Commands that take a single input support `--input-file` (one line per input, or `.csv` with `--input-column`) and `--output-dir`. Use `--output-format` to choose between `files` (default), `csv`, or `ndjson` streaming. Add `--deduplicate` to remove duplicate URLs, `--sample N` to test on a subset, or `--post-process 'jq .title'` to transform each result. Use `--resume` to skip already-completed items after interruption.
69
+ **Batch mode:** Commands that take a single input support `--input-file` (one line per input, or `.csv` with `--input-column`) and `--output-dir`. Use `--output-format csv` or `--output-format ndjson` to stream all results to a single file (or stdout) instead of individual files. Add `--deduplicate` to remove duplicate URLs, `--sample N` to test on a subset, or `--post-process 'jq .title'` to transform each result. Use `--resume` to skip already-completed items after interruption. Run bare `scrapingbee --resume` to discover incomplete batches in the current directory.
67
70
 
68
71
  **Parameters and options:** Use space-separated values (e.g. `--render-js false`), not `--option=value`. For full parameter lists, response formats, and credit costs, see **`scrapingbee [command] --help`** and the [ScrapingBee API documentation](https://www.scrapingbee.com/documentation/).
69
72
 
@@ -71,15 +74,17 @@ scrapingbee [command] [arguments] [options]
71
74
 
72
75
  - **AI extraction:** `--ai-extract-rules '{"price": "product price", "title": "product name"}'` pulls structured data from any page using natural language — no CSS selectors needed. Works with `scrape`, `crawl`, and batch mode.
73
76
  - **CSS/XPath extraction:** `--extract-rules '{"title": "h1", "price": ".price"}'` for consistent, cheaper production scraping. Find selectors in browser DevTools.
74
- - **Pipelines:** Chain commands with `--extract-field` — e.g. `google QUERY --extract-field organic_results.url > urls.txt` then `scrape --input-file urls.txt`.
77
+ - **Pipelines:** Chain commands with `--extract-field` — e.g. `google QUERY --extract-field organic_results.url > urls.txt` then `scrape --input-file urls.txt`. Use `--fields` to filter JSON output keys; supports dot notation (e.g. `--fields product.title,product.price`).
78
+ - **Smart Extract:** `--smart-extract` extracts data from any format (JSON, HTML, XML, CSV, Markdown) using a path expression. Auto-detects format. Supports slicing, regex filtering, and JSON schema output.
75
79
  - **Update CSV:** `--update-csv` fetches fresh data and updates the input CSV in-place. Ideal for daily price tracking, inventory monitoring, or any dataset that needs periodic refresh.
76
80
  - **Crawl with filtering:** `--include-pattern`, `--exclude-pattern` control which links to follow. `--save-pattern` only saves pages matching a regex (others are visited for link discovery but not saved).
77
- - **Output formats:** `--output-format ndjson` streams results as JSON lines; `--output-format csv` writes a single CSV. Default `files` writes individual files.
81
+ - **Output formats:** `--output-format` accepts `ndjson` (streams results as JSON lines) or `csv` (writes a single CSV) — these are the only valid values. Default (no flag) writes individual files per item into `--output-dir`.
78
82
  - **CSV input:** `--input-file products.csv --input-column url` reads URLs from a CSV column.
79
83
  - **Export:** `scrapingbee export --input-dir batch/ --format csv --flatten --columns "title,price"` merges batch output with nested JSON flattening and column selection.
80
84
  - **Scheduling:** `scrapingbee schedule --every 1d --name prices scrape --input-file products.csv --update-csv` registers a cron job. Use `--list`, `--stop NAME`, or `--stop all`.
81
85
  - **Deduplication & sampling:** `--deduplicate` removes duplicate URLs; `--sample 100` processes only 100 random items.
82
86
  - **RAG chunking:** `scrape --chunk-size 500 --chunk-overlap 50 --return-page-markdown true` outputs NDJSON chunks ready for vector DB ingestion.
87
+ - **Scraping configurations:** `--scraping-config "My-Config"` applies a pre-saved configuration from your ScrapingBee dashboard. Inline options override config settings. Create configurations in the [request builder](https://app.scrapingbee.com/). Running `scrapingbee --scraping-config NAME` (without a subcommand) auto-routes to `scrape`.
83
88
 
84
89
  ### Examples
85
90
 
@@ -94,6 +99,11 @@ scrapingbee export --input-dir products --format csv --flatten --columns "name,p
94
99
  scrapingbee scrape --input-file products.csv --input-column url --update-csv --ai-extract-rules '{"price": "current price"}'
95
100
  scrapingbee schedule --every 1d --name price-tracker scrape --input-file products.csv --input-column url --update-csv --ai-extract-rules '{"price": "price"}'
96
101
  scrapingbee schedule --list
102
+
103
+ # Smart Extract — pull fields from any format with a path expression
104
+ scrapingbee google "pizza new york" --smart-extract 'organic_results[0:3].title'
105
+ scrapingbee scrape "https://example.com" --smart-extract '...a[href=/mailto/].text'
106
+ scrapingbee scrape "https://example.com" --smart-extract '{"titles": "...h1", "links": "...href[0:5]"}'
97
107
  ```
98
108
 
99
109
  ## Security
@@ -109,7 +119,7 @@ For advanced features setup, see the Security section in our [CLI documentation]
109
119
  - **[CLI Documentation](https://www.scrapingbee.com/documentation/cli/)** – Full CLI reference with pipelines, parameters, and examples.
110
120
  - **[Advanced usage examples](docs/advanced-usage.md)** – Shell piping, command chaining, batch workflows, monitoring scripts, NDJSON streaming, screenshots, Google search patterns, LLM chunking, and more.
111
121
  - **[ScrapingBee API documentation](https://www.scrapingbee.com/documentation/)** – Parameters, response formats, credit costs, and best practices.
112
- - **Claude / AI agents:** This repo includes a [Claude Skill](https://github.com/ScrapingBee/scrapingbee-cli/tree/main/skills/scrapingbee-cli) and [Claude Plugin](.claude-plugin/) for agent use with file-based output and security rules.
122
+ - **Claude / AI agents:** This repo includes a [Claude Skill](https://github.com/ScrapingBee/scrapingbee-cli/tree/main/plugins/scrapingbee-cli/skills/scrapingbee-cli) and [Claude Plugin](plugins/scrapingbee-cli/.claude-plugin/) for agent use with file-based output and security rules.
113
123
 
114
124
  ## Testing
115
125
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scrapingbee-cli"
7
- version = "1.3.0"
7
+ version = "1.4.0"
8
8
  description = "Command-line client for the ScrapingBee API: scrape pages (single or batch), crawl sites, check usage/credits, and use Google Search, Fast Search, Amazon, Walmart, YouTube, and ChatGPT from the terminal."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -90,3 +90,7 @@ markers = [
90
90
  "integration: marks tests that call the live API (deselect with '-m \"not integration\"')",
91
91
  ]
92
92
  addopts = "-v --tb=short"
93
+ filterwarnings = [
94
+ "ignore::RuntimeWarning:cssselect",
95
+ "ignore:coroutine.*was never awaited:RuntimeWarning",
96
+ ]
@@ -0,0 +1,29 @@
1
+ """ScrapingBee CLI - Command-line client for the ScrapingBee API."""
2
+
3
+ import platform
4
+ import sys
5
+
6
+ __version__ = "1.4.0"
7
+
8
+
9
+ def user_agent_headers() -> dict[str, str]:
10
+ """Build structured User-Agent headers for API requests.
11
+
12
+ Returns a dict of headers:
13
+ User-Agent: ScrapingBee/CLI
14
+ User-Agent-Client: scrapingbee-cli
15
+ User-Agent-Client-Version: 1.4.0
16
+ User-Agent-Environment: python
17
+ User-Agent-Environment-Version: 3.14.2
18
+ User-Agent-OS: Darwin arm64
19
+ """
20
+ py = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
21
+ os_info = f"{platform.system()} {platform.machine()}"
22
+ return {
23
+ "User-Agent": "ScrapingBee/CLI",
24
+ "User-Agent-Client": "scrapingbee-cli",
25
+ "User-Agent-Client-Version": __version__,
26
+ "User-Agent-Environment": "python",
27
+ "User-Agent-Environment-Version": py,
28
+ "User-Agent-OS": os_info,
29
+ }
@@ -35,18 +35,55 @@ def log_exec(
35
35
  pass
36
36
 
37
37
 
38
- def read_audit_log(n: int = 50) -> str:
39
- """Read the last N lines of the audit log."""
38
+ def _parse_timestamp(line: str) -> datetime | None:
39
+ """Extract the ISO timestamp from the start of an audit log line."""
40
+ parts = line.split(" | ", 1)
41
+ if not parts:
42
+ return None
43
+ try:
44
+ return datetime.fromisoformat(parts[0].strip())
45
+ except (ValueError, IndexError):
46
+ return None
47
+
48
+
49
+ def read_audit_log(
50
+ n: int = 50,
51
+ since: datetime | None = None,
52
+ until: datetime | None = None,
53
+ ) -> str:
54
+ """Read audit log entries.
55
+
56
+ Args:
57
+ n: Maximum number of lines to return (from the end). Ignored if since/until is set.
58
+ since: Only return entries at or after this time.
59
+ until: Only return entries at or before this time.
60
+ """
40
61
  if not AUDIT_LOG_PATH.is_file():
41
62
  return "No audit log found."
42
63
  try:
43
64
  with open(AUDIT_LOG_PATH, encoding="utf-8") as f:
44
65
  lines = f.readlines()
45
- recent = lines[-n:] if len(lines) > n else lines
46
- return "".join(recent)
47
66
  except OSError:
48
67
  return "Could not read audit log."
49
68
 
69
+ if since or until:
70
+ filtered = []
71
+ for line in lines:
72
+ ts = _parse_timestamp(line)
73
+ if ts is None:
74
+ continue
75
+ if since and ts < since:
76
+ continue
77
+ if until and ts > until:
78
+ continue
79
+ filtered.append(line)
80
+ if not filtered:
81
+ return "No entries found in the specified time range."
82
+ return "".join(filtered)
83
+
84
+ recent = lines[-n:] if len(lines) > n else lines
85
+ return "".join(recent)
86
+
50
87
 
51
88
  def _rotate_if_needed() -> None:
52
89
  """Keep only the last MAX_LINES entries."""