recurl-cli 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recurl/__init__.py +60 -0
- recurl/bin/LICENSE +21 -0
- recurl/bin/README.md +225 -0
- recurl/bin/bin/recurl +0 -0
- recurl/bin/bin/recurld +0 -0
- recurl/bin/install.sh +229 -0
- recurl/cli.py +20 -0
- recurl_cli-0.1.2.dist-info/METADATA +226 -0
- recurl_cli-0.1.2.dist-info/RECORD +12 -0
- recurl_cli-0.1.2.dist-info/WHEEL +5 -0
- recurl_cli-0.1.2.dist-info/entry_points.txt +3 -0
- recurl_cli-0.1.2.dist-info/top_level.txt +1 -0
recurl/__init__.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""
|
|
2
|
+
recurl - Drop-in curl replacement with automatic anti-bot bypass
|
|
3
|
+
|
|
4
|
+
This is a thin Python wrapper that delegates to the platform-specific
|
|
5
|
+
binary downloaded at install time.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import platform
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _find_binary(name: str) -> str:
|
|
16
|
+
"""Locate the downloaded recurl binary."""
|
|
17
|
+
# 1. Check alongside this package
|
|
18
|
+
package_dir = Path(__file__).parent
|
|
19
|
+
bin_path = package_dir / "bin" / name
|
|
20
|
+
if bin_path.exists():
|
|
21
|
+
return str(bin_path)
|
|
22
|
+
|
|
23
|
+
# 2. Check in PATH
|
|
24
|
+
found = shutil.which(name)
|
|
25
|
+
if found:
|
|
26
|
+
return found
|
|
27
|
+
|
|
28
|
+
raise FileNotFoundError(
|
|
29
|
+
f"Could not find {name} binary. "
|
|
30
|
+
"Try reinstalling: pip install --force-reinstall recurl-cli"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def run(args: list[str] = None) -> int:
|
|
35
|
+
"""
|
|
36
|
+
Run recurl with the given CLI arguments.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
args: List of arguments (e.g., ["-s", "https://example.com"]).
|
|
40
|
+
If None, uses sys.argv[1:].
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Exit code from the recurl process.
|
|
44
|
+
"""
|
|
45
|
+
import shutil
|
|
46
|
+
|
|
47
|
+
binary = _find_binary("recurl")
|
|
48
|
+
cmd = [binary] + (args if args is not None else sys.argv[1:])
|
|
49
|
+
result = subprocess.run(cmd)
|
|
50
|
+
return result.returncode
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def run_daemon(args: list[str] = None) -> int:
|
|
54
|
+
"""Run recurld with the given CLI arguments."""
|
|
55
|
+
import shutil
|
|
56
|
+
|
|
57
|
+
binary = _find_binary("recurld")
|
|
58
|
+
cmd = [binary] + (args if args is not None else sys.argv[1:])
|
|
59
|
+
result = subprocess.run(cmd)
|
|
60
|
+
return result.returncode
|
recurl/bin/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Neul Labs
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
recurl/bin/README.md
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# recurl
|
|
2
|
+
|
|
3
|
+
[](https://crates.io/crates/recurl)
|
|
4
|
+
[](https://docs.neullabs.com/recurl)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://github.com/neul-labs/recurl/actions)
|
|
7
|
+
|
|
8
|
+
**curl that just works.** Drop-in replacement with automatic anti-bot bypass.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Why recurl?
|
|
13
|
+
|
|
14
|
+
You're scraping a website. It works in your browser but `curl` gets blocked. You try different headers, user agents, maybe even `curl-impersonate`. Still blocked. Now you're writing Puppeteer scripts...
|
|
15
|
+
|
|
16
|
+
**recurl fixes this.** It runs real curl, detects when you're blocked, and automatically escalates through impersonation and headless browser rendering. Same curl syntax you know. No code changes.
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# This just works, even on Cloudflare-protected sites
|
|
20
|
+
recurl https://protected-site.com/api/data
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# npm / npx
|
|
27
|
+
npm install -g recurl-cli
|
|
28
|
+
|
|
29
|
+
# PyPI
|
|
30
|
+
pip install recurl-cli
|
|
31
|
+
|
|
32
|
+
# Homebrew (macOS / Linux)
|
|
33
|
+
brew tap neul-labs/tap
|
|
34
|
+
brew install recurl
|
|
35
|
+
|
|
36
|
+
# Cargo (Rust)
|
|
37
|
+
cargo install recurl
|
|
38
|
+
|
|
39
|
+
# From source
|
|
40
|
+
git clone https://github.com/neul-labs/recurl
|
|
41
|
+
cd recurl && cargo build --release
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# Use it exactly like curl
|
|
48
|
+
recurl https://example.com
|
|
49
|
+
|
|
50
|
+
# Or alias it as curl for seamless usage
|
|
51
|
+
alias curl=recurl
|
|
52
|
+
curl https://api.example.com/data
|
|
53
|
+
|
|
54
|
+
# Force JS rendering for heavy protection
|
|
55
|
+
recurl --recurl-js https://heavily-protected-site.com
|
|
56
|
+
|
|
57
|
+
# Debug mode to see what's happening
|
|
58
|
+
recurl --recurl-debug https://example.com
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## How It Works
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
curl (alias) --> recurl (shim)
|
|
65
|
+
|
|
|
66
|
+
+--> curl_engine --> success? done
|
|
67
|
+
|
|
|
68
|
+
+--> blocked? (403, 429, captcha, etc.)
|
|
69
|
+
|
|
|
70
|
+
+--> retry with impersonation (TLS fingerprint spoofing)
|
|
71
|
+
|
|
|
72
|
+
+--> retry with JS preflight (headless Chromium)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
1. **First try**: Runs real curl (fast, low overhead)
|
|
76
|
+
2. **If blocked**: Retries with TLS fingerprint impersonation
|
|
77
|
+
3. **Still blocked**: Renders page in headless Chromium, captures cookies/tokens, replays request
|
|
78
|
+
|
|
79
|
+
Chromium auto-downloads on first use. A background daemon (`recurld`) keeps it warm for fast subsequent requests.
|
|
80
|
+
|
|
81
|
+
## Bypass Coverage
|
|
82
|
+
|
|
83
|
+
recurl automatically handles:
|
|
84
|
+
|
|
85
|
+
| Provider | Detection Method |
|
|
86
|
+
|----------|------------------|
|
|
87
|
+
| Cloudflare | Bot Management, Turnstile, JS Challenge |
|
|
88
|
+
| Akamai | Bot Manager |
|
|
89
|
+
| PerimeterX | HUMAN Security |
|
|
90
|
+
| DataDome | Bot Protection |
|
|
91
|
+
| Imperva | Incapsula |
|
|
92
|
+
| Kasada | Bot Mitigation |
|
|
93
|
+
| AWS WAF | Bot Control |
|
|
94
|
+
| Shape/F5 | Bot Defense |
|
|
95
|
+
| Arkose Labs | FunCaptcha |
|
|
96
|
+
| hCaptcha | Challenge |
|
|
97
|
+
| reCAPTCHA | Challenge |
|
|
98
|
+
|
|
99
|
+
## CLI Reference
|
|
100
|
+
|
|
101
|
+
### recurl-specific flags
|
|
102
|
+
|
|
103
|
+
| Flag | Description |
|
|
104
|
+
|------|-------------|
|
|
105
|
+
| `--recurl-strict` | Disable fallback, pure curl passthrough |
|
|
106
|
+
| `--recurl-impersonate <profile>` | Force specific TLS fingerprint profile |
|
|
107
|
+
| `--recurl-js` | Force JS preflight (skip to Chromium) |
|
|
108
|
+
| `--recurl-js-rendered` | Return rendered DOM instead of raw response |
|
|
109
|
+
| `--recurl-js-wait <selector>` | Wait for CSS selector before capturing |
|
|
110
|
+
| `--recurl-js-timeout <ms>` | JS preflight timeout (default: 30000) |
|
|
111
|
+
| `--recurl-daemon on\|off` | Control background daemon usage |
|
|
112
|
+
| `--recurl-debug` | Show diagnostic output |
|
|
113
|
+
|
|
114
|
+
All standard curl flags work as expected.
|
|
115
|
+
|
|
116
|
+
### Environment Variables
|
|
117
|
+
|
|
118
|
+
| Variable | Description |
|
|
119
|
+
|----------|-------------|
|
|
120
|
+
| `RECURL_STRICT=1` | Same as `--recurl-strict` |
|
|
121
|
+
| `RECURL_DAEMON_IDLE_MS` | Daemon idle timeout (default: 60000) |
|
|
122
|
+
|
|
123
|
+
## Platform Support
|
|
124
|
+
|
|
125
|
+
| Platform | Arch | Impersonation | JS Preflight | Chromium Auto-Download |
|
|
126
|
+
|----------|------|:-------------:|:------------:|:----------------------:|
|
|
127
|
+
| Linux | x86_64 | Yes | Yes | Yes |
|
|
128
|
+
| Linux | aarch64 | Yes | Yes | Manual |
|
|
129
|
+
| macOS | Apple Silicon | Yes | Yes | Yes |
|
|
130
|
+
| macOS | Intel | Yes | Yes | Yes |
|
|
131
|
+
| Windows | x86_64 | No | Yes | Yes |
|
|
132
|
+
|
|
133
|
+
*Impersonation requires curl-impersonate (Linux/macOS only). Windows skips directly to JS preflight.*
|
|
134
|
+
|
|
135
|
+
## For Developers
|
|
136
|
+
|
|
137
|
+
### Building from Source
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# Debug build
|
|
141
|
+
cargo build
|
|
142
|
+
|
|
143
|
+
# Release build (optimized)
|
|
144
|
+
cargo build --release
|
|
145
|
+
|
|
146
|
+
# Build with daemon support
|
|
147
|
+
cargo build --release --features daemon
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Architecture
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
src/
|
|
154
|
+
main.rs # CLI entry point, argument parsing
|
|
155
|
+
engine.rs # curl_engine execution layer
|
|
156
|
+
detection/ # Anti-bot pattern detection
|
|
157
|
+
mod.rs
|
|
158
|
+
patterns.rs
|
|
159
|
+
status.rs
|
|
160
|
+
impersonation/ # TLS fingerprint impersonation
|
|
161
|
+
mod.rs
|
|
162
|
+
js_preflight/ # Headless Chromium rendering
|
|
163
|
+
mod.rs
|
|
164
|
+
browser.rs
|
|
165
|
+
browser_config.rs
|
|
166
|
+
chromium.rs
|
|
167
|
+
cookies.rs
|
|
168
|
+
preflight_state.rs
|
|
169
|
+
stealth.rs
|
|
170
|
+
escalation.rs # EscalationEngine state machine
|
|
171
|
+
daemon/
|
|
172
|
+
main.rs # recurld daemon entry point
|
|
173
|
+
lifecycle.rs # DaemonLifecycle state machine
|
|
174
|
+
browser_state.rs # BrowserState state machine
|
|
175
|
+
pool.rs # Browser instance pooling
|
|
176
|
+
ipc.rs # IPC transport
|
|
177
|
+
protocol.rs # IPC message protocol
|
|
178
|
+
config.rs # Configuration & defaults
|
|
179
|
+
daemon_client.rs # Daemon client interface
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Running Tests
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
# Unit tests
|
|
186
|
+
cargo test
|
|
187
|
+
|
|
188
|
+
# All tests including integration
|
|
189
|
+
cargo test --all-features
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Documentation
|
|
193
|
+
|
|
194
|
+
- [Installation Guide](docs/installation.md) - Platform-specific setup
|
|
195
|
+
- [Architecture](docs/architecture.md) - System design deep-dive
|
|
196
|
+
- [CLI Reference](docs/cli.md) - Complete flag documentation
|
|
197
|
+
- [Escalation Layers](docs/layers.md) - How bypass works
|
|
198
|
+
- [Daemon](docs/daemon.md) - recurld configuration
|
|
199
|
+
- [Compliance Testing](docs/compliance.md) - curl compatibility
|
|
200
|
+
|
|
201
|
+
## Contributing
|
|
202
|
+
|
|
203
|
+
Contributions welcome! Please read the architecture docs first to understand the codebase structure.
|
|
204
|
+
|
|
205
|
+
```bash
|
|
206
|
+
# Fork and clone
|
|
207
|
+
git clone https://github.com/YOUR_USERNAME/recurl
|
|
208
|
+
cd recurl
|
|
209
|
+
|
|
210
|
+
# Create a branch
|
|
211
|
+
git checkout -b feature/your-feature
|
|
212
|
+
|
|
213
|
+
# Make changes, then test
|
|
214
|
+
cargo test
|
|
215
|
+
|
|
216
|
+
# Submit a PR
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## License
|
|
220
|
+
|
|
221
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
Built by [Neul Labs](https://github.com/neul-labs)
|
recurl/bin/bin/recurl
ADDED
|
Binary file
|
recurl/bin/bin/recurld
ADDED
|
Binary file
|
recurl/bin/install.sh
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# recurl installer for Linux and macOS
|
|
5
|
+
# Usage: curl -fsSL https://recurl.dev/install.sh | bash
|
|
6
|
+
|
|
7
|
+
VERSION="${RECURL_VERSION:-latest}"
|
|
8
|
+
INSTALL_DIR="${RECURL_INSTALL_DIR:-}"
|
|
9
|
+
GITHUB_REPO="neul-labs/recurl"
|
|
10
|
+
BASE_URL="https://github.com/${GITHUB_REPO}/releases"
|
|
11
|
+
|
|
12
|
+
# Colors
|
|
13
|
+
RED='\033[0;31m'
|
|
14
|
+
GREEN='\033[0;32m'
|
|
15
|
+
YELLOW='\033[0;33m'
|
|
16
|
+
BLUE='\033[0;34m'
|
|
17
|
+
NC='\033[0m' # No Color
|
|
18
|
+
|
|
19
|
+
info() { echo -e "${BLUE}[info]${NC} $1"; }
|
|
20
|
+
success() { echo -e "${GREEN}[success]${NC} $1"; }
|
|
21
|
+
warn() { echo -e "${YELLOW}[warn]${NC} $1"; }
|
|
22
|
+
error() { echo -e "${RED}[error]${NC} $1"; exit 1; }
|
|
23
|
+
|
|
24
|
+
# Detect OS
|
|
25
|
+
detect_os() {
|
|
26
|
+
case "$(uname -s)" in
|
|
27
|
+
Linux*) echo "linux" ;;
|
|
28
|
+
Darwin*) echo "darwin" ;;
|
|
29
|
+
*) error "Unsupported OS: $(uname -s). Use Windows installer for Windows." ;;
|
|
30
|
+
esac
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
# Detect architecture
|
|
34
|
+
detect_arch() {
|
|
35
|
+
case "$(uname -m)" in
|
|
36
|
+
x86_64|amd64) echo "x86_64" ;;
|
|
37
|
+
arm64|aarch64) echo "aarch64" ;;
|
|
38
|
+
*) error "Unsupported architecture: $(uname -m)" ;;
|
|
39
|
+
esac
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# Detect shell config file
|
|
43
|
+
detect_shell_config() {
|
|
44
|
+
local shell_name
|
|
45
|
+
shell_name=$(basename "$SHELL")
|
|
46
|
+
|
|
47
|
+
case "$shell_name" in
|
|
48
|
+
bash)
|
|
49
|
+
if [[ -f "$HOME/.bashrc" ]]; then
|
|
50
|
+
echo "$HOME/.bashrc"
|
|
51
|
+
elif [[ -f "$HOME/.bash_profile" ]]; then
|
|
52
|
+
echo "$HOME/.bash_profile"
|
|
53
|
+
else
|
|
54
|
+
echo "$HOME/.bashrc"
|
|
55
|
+
fi
|
|
56
|
+
;;
|
|
57
|
+
zsh)
|
|
58
|
+
echo "$HOME/.zshrc"
|
|
59
|
+
;;
|
|
60
|
+
fish)
|
|
61
|
+
echo "$HOME/.config/fish/config.fish"
|
|
62
|
+
;;
|
|
63
|
+
*)
|
|
64
|
+
echo "$HOME/.profile"
|
|
65
|
+
;;
|
|
66
|
+
esac
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# Check if command exists
|
|
70
|
+
has_command() {
|
|
71
|
+
command -v "$1" &> /dev/null
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# Download file
|
|
75
|
+
download() {
|
|
76
|
+
local url="$1"
|
|
77
|
+
local output="$2"
|
|
78
|
+
|
|
79
|
+
if has_command curl; then
|
|
80
|
+
curl -fsSL "$url" -o "$output"
|
|
81
|
+
elif has_command wget; then
|
|
82
|
+
wget -q "$url" -O "$output"
|
|
83
|
+
else
|
|
84
|
+
error "Neither curl nor wget found. Please install one of them."
|
|
85
|
+
fi
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
# Get latest version from GitHub
|
|
89
|
+
get_latest_version() {
|
|
90
|
+
local url="${BASE_URL}/latest"
|
|
91
|
+
if has_command curl; then
|
|
92
|
+
curl -fsSL -o /dev/null -w '%{url_effective}' "$url" | rev | cut -d'/' -f1 | rev
|
|
93
|
+
elif has_command wget; then
|
|
94
|
+
wget -q -O /dev/null --server-response "$url" 2>&1 | grep -oP 'Location: .*/tag/\K[^/\s]+'
|
|
95
|
+
fi
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
# Main installation
|
|
99
|
+
main() {
|
|
100
|
+
echo ""
|
|
101
|
+
echo -e "${GREEN}╔═══════════════════════════════════════╗${NC}"
|
|
102
|
+
echo -e "${GREEN}║ recurl installer ║${NC}"
|
|
103
|
+
echo -e "${GREEN}╚═══════════════════════════════════════╝${NC}"
|
|
104
|
+
echo ""
|
|
105
|
+
|
|
106
|
+
# Detect platform
|
|
107
|
+
local os arch
|
|
108
|
+
os=$(detect_os)
|
|
109
|
+
arch=$(detect_arch)
|
|
110
|
+
info "Detected platform: ${os}-${arch}"
|
|
111
|
+
|
|
112
|
+
# Determine version
|
|
113
|
+
if [[ "$VERSION" == "latest" ]]; then
|
|
114
|
+
info "Fetching latest version..."
|
|
115
|
+
VERSION=$(get_latest_version)
|
|
116
|
+
if [[ -z "$VERSION" ]]; then
|
|
117
|
+
error "Failed to fetch latest version. Set RECURL_VERSION explicitly."
|
|
118
|
+
fi
|
|
119
|
+
fi
|
|
120
|
+
info "Installing version: ${VERSION}"
|
|
121
|
+
|
|
122
|
+
# Determine install directory
|
|
123
|
+
if [[ -z "$INSTALL_DIR" ]]; then
|
|
124
|
+
if [[ -w "/usr/local" ]]; then
|
|
125
|
+
INSTALL_DIR="/usr/local/recurl"
|
|
126
|
+
else
|
|
127
|
+
INSTALL_DIR="$HOME/.local/recurl"
|
|
128
|
+
fi
|
|
129
|
+
fi
|
|
130
|
+
info "Install directory: ${INSTALL_DIR}"
|
|
131
|
+
|
|
132
|
+
# Create temp directory
|
|
133
|
+
local tmp_dir
|
|
134
|
+
tmp_dir=$(mktemp -d)
|
|
135
|
+
trap "rm -rf $tmp_dir" EXIT
|
|
136
|
+
|
|
137
|
+
# Download archive
|
|
138
|
+
local archive_name="recurl-${os}-${arch}.tar.gz"
|
|
139
|
+
local download_url="${BASE_URL}/download/${VERSION}/${archive_name}"
|
|
140
|
+
info "Downloading ${archive_name}..."
|
|
141
|
+
download "$download_url" "${tmp_dir}/${archive_name}"
|
|
142
|
+
|
|
143
|
+
# Extract
|
|
144
|
+
info "Extracting..."
|
|
145
|
+
mkdir -p "$INSTALL_DIR"
|
|
146
|
+
tar -xzf "${tmp_dir}/${archive_name}" -C "$INSTALL_DIR" --strip-components=1
|
|
147
|
+
|
|
148
|
+
# Make binaries executable
|
|
149
|
+
chmod +x "$INSTALL_DIR/recurl"
|
|
150
|
+
chmod +x "$INSTALL_DIR/recurld"
|
|
151
|
+
chmod +x "$INSTALL_DIR/bin/"*
|
|
152
|
+
|
|
153
|
+
success "recurl installed to ${INSTALL_DIR}"
|
|
154
|
+
echo ""
|
|
155
|
+
|
|
156
|
+
# Verify installation
|
|
157
|
+
info "Verifying installation..."
|
|
158
|
+
if "$INSTALL_DIR/recurl" --recurl-debug --version &> /dev/null; then
|
|
159
|
+
success "recurl binary works correctly"
|
|
160
|
+
else
|
|
161
|
+
warn "recurl binary may have issues. Check ${INSTALL_DIR}/recurl"
|
|
162
|
+
fi
|
|
163
|
+
echo ""
|
|
164
|
+
|
|
165
|
+
# Ask about shell alias
|
|
166
|
+
echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
|
167
|
+
echo -e "${YELLOW}Shell configuration${NC}"
|
|
168
|
+
echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
|
169
|
+
echo ""
|
|
170
|
+
echo "To use recurl as a drop-in curl replacement, you can:"
|
|
171
|
+
echo ""
|
|
172
|
+
echo " 1. Use recurl directly: recurl https://example.com"
|
|
173
|
+
echo " 2. Create a shell alias: alias curl='${INSTALL_DIR}/recurl'"
|
|
174
|
+
echo ""
|
|
175
|
+
|
|
176
|
+
local shell_config
|
|
177
|
+
shell_config=$(detect_shell_config)
|
|
178
|
+
|
|
179
|
+
read -p "Add curl alias to ${shell_config}? [y/N] " -n 1 -r
|
|
180
|
+
echo ""
|
|
181
|
+
|
|
182
|
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
|
183
|
+
# Check if alias already exists
|
|
184
|
+
if grep -q "alias curl=.*recurl" "$shell_config" 2>/dev/null; then
|
|
185
|
+
warn "Alias already exists in ${shell_config}"
|
|
186
|
+
else
|
|
187
|
+
echo "" >> "$shell_config"
|
|
188
|
+
echo "# recurl - drop-in curl replacement with anti-bot bypass" >> "$shell_config"
|
|
189
|
+
echo "alias curl='${INSTALL_DIR}/recurl'" >> "$shell_config"
|
|
190
|
+
success "Alias added to ${shell_config}"
|
|
191
|
+
fi
|
|
192
|
+
|
|
193
|
+
echo ""
|
|
194
|
+
info "Run this to apply changes now:"
|
|
195
|
+
echo ""
|
|
196
|
+
echo " source ${shell_config}"
|
|
197
|
+
echo ""
|
|
198
|
+
else
|
|
199
|
+
info "Skipping alias configuration."
|
|
200
|
+
echo ""
|
|
201
|
+
echo "To use recurl, either:"
|
|
202
|
+
echo ""
|
|
203
|
+
echo " 1. Call recurl directly:"
|
|
204
|
+
echo " ${INSTALL_DIR}/recurl https://example.com"
|
|
205
|
+
echo ""
|
|
206
|
+
echo " 2. Add to your shell config manually:"
|
|
207
|
+
echo " echo \"alias curl='${INSTALL_DIR}/recurl'\" >> ${shell_config}"
|
|
208
|
+
echo ""
|
|
209
|
+
echo " 3. Add to PATH:"
|
|
210
|
+
echo " export PATH=\"${INSTALL_DIR}:\$PATH\""
|
|
211
|
+
echo ""
|
|
212
|
+
fi
|
|
213
|
+
|
|
214
|
+
# Final summary
|
|
215
|
+
echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
|
216
|
+
echo -e "${GREEN}Installation complete!${NC}"
|
|
217
|
+
echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
|
218
|
+
echo ""
|
|
219
|
+
echo "Installed files:"
|
|
220
|
+
echo " ${INSTALL_DIR}/recurl - main binary"
|
|
221
|
+
echo " ${INSTALL_DIR}/recurld - daemon"
|
|
222
|
+
echo " ${INSTALL_DIR}/bin/ - curl engines"
|
|
223
|
+
echo ""
|
|
224
|
+
echo "Documentation: https://github.com/${GITHUB_REPO}#readme"
|
|
225
|
+
echo ""
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
# Run main
|
|
229
|
+
main "$@"
|
recurl/cli.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
CLI entry points for the recurl Python wrapper.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from recurl import run, run_daemon
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main() -> None:
|
|
12
|
+
sys.exit(run())
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main_daemon() -> None:
|
|
16
|
+
sys.exit(run_daemon())
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
if __name__ == "__main__":
|
|
20
|
+
main()
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: recurl-cli
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Drop-in curl replacement with automatic anti-bot bypass
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/neul-labs/recurl
|
|
7
|
+
Project-URL: Repository, https://github.com/neul-labs/recurl
|
|
8
|
+
Project-URL: Documentation, https://docs.neullabs.com/recurl
|
|
9
|
+
Project-URL: Issues, https://github.com/neul-labs/recurl/issues
|
|
10
|
+
Keywords: curl,http,anti-bot,scraping,cli
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Topic :: Utilities
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# recurl-cli
|
|
28
|
+
|
|
29
|
+
**Python's missing curl.** Drop-in HTTP client with automatic anti-bot bypass for Python developers, data scientists, and web scrapers.
|
|
30
|
+
|
|
31
|
+
[](https://pypi.org/project/recurl-cli/)
|
|
32
|
+
[](https://opensource.org/licenses/MIT)
|
|
33
|
+
[](https://pypi.org/project/recurl-cli/)
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## What is recurl?
|
|
38
|
+
|
|
39
|
+
recurl is a smart drop-in replacement for `curl` that transparently handles bot detection and anti-bot protections used by modern websites. It runs real curl under the hood, detects when a request is blocked (403, captcha, Cloudflare challenge), and automatically escalates through browser impersonation and headless Chromium rendering to get the response you need.
|
|
40
|
+
|
|
41
|
+
**Same curl syntax. No code changes. It just works.**
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# Works even on Cloudflare-protected sites
|
|
45
|
+
python -m recurl https://protected-site.com/api/data
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Why Python developers need recurl
|
|
49
|
+
|
|
50
|
+
If you've ever written Python scripts for web scraping or API access, you've hit these walls:
|
|
51
|
+
|
|
52
|
+
- `requests.get()` returns **403 Forbidden** on protected sites
|
|
53
|
+
- `urllib` gets blocked by TLS fingerprinting
|
|
54
|
+
- You end up installing Selenium, Playwright, or Puppeteer just to fetch a single page
|
|
55
|
+
- Headless browser setup is heavy, slow, and overkill for simple requests
|
|
56
|
+
|
|
57
|
+
recurl solves this by being a **curl replacement with built-in escalation**:
|
|
58
|
+
|
|
59
|
+
1. **First attempt**: Standard curl request (fast, low overhead)
|
|
60
|
+
2. **If blocked**: Retries with browser TLS fingerprint impersonation
|
|
61
|
+
3. **Still blocked**: Launches headless Chromium, solves JS challenges, extracts cookies, replays the request
|
|
62
|
+
|
|
63
|
+
No Python dependencies for browser automation. No heavy browser setup. Just install and use.
|
|
64
|
+
|
|
65
|
+
## Installation
|
|
66
|
+
|
|
67
|
+
### pip (recommended)
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install recurl-cli
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Other package managers
|
|
74
|
+
|
|
75
|
+
| Platform | Command |
|
|
76
|
+
|----------|---------|
|
|
77
|
+
| **npm** | `npm install -g recurl-cli` |
|
|
78
|
+
| **Homebrew** | `brew tap neul-labs/tap && brew install recurl` |
|
|
79
|
+
| **Cargo** | `cargo install recurl` |
|
|
80
|
+
| **Scoop** | `scoop install recurl` |
|
|
81
|
+
|
|
82
|
+
See the [full installation guide](https://github.com/neul-labs/recurl#installation) for platform-specific instructions.
|
|
83
|
+
|
|
84
|
+
## Quick Start
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
# Use as a Python module
|
|
88
|
+
python -m recurl https://api.example.com/data
|
|
89
|
+
|
|
90
|
+
# Pass through all curl flags
|
|
91
|
+
python -m recurl -X POST -H "Content-Type: application/json" -d '{"key":"value"}' https://api.example.com
|
|
92
|
+
|
|
93
|
+
# Force JS rendering for heavily protected sites
|
|
94
|
+
python -m recurl --recurl-js https://cloudflare-protected-site.com
|
|
95
|
+
|
|
96
|
+
# Debug mode to see escalation steps
|
|
97
|
+
python -m recurl --recurl-debug https://example.com
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Python API (coming soon)
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from recurl import fetch
|
|
104
|
+
|
|
105
|
+
# Simple fetch that handles anti-bot protections automatically
|
|
106
|
+
response = fetch("https://protected-site.com")
|
|
107
|
+
print(response.text)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Supported Anti-Bot Services
|
|
111
|
+
|
|
112
|
+
recurl automatically detects and bypasses protection from:
|
|
113
|
+
|
|
114
|
+
| Service | Detection | Bypass |
|
|
115
|
+
|---------|-----------|--------|
|
|
116
|
+
| Cloudflare | Bot Management, Turnstile, JS Challenge | ✓ |
|
|
117
|
+
| Akamai Bot Manager | Behavioral analysis | ✓ |
|
|
118
|
+
| PerimeterX / HUMAN | Client-side fingerprinting | ✓ |
|
|
119
|
+
| DataDome | Bot Protection | ✓ |
|
|
120
|
+
| Imperva / Incapsula | Challenge pages | ✓ |
|
|
121
|
+
| Kasada | Bot Mitigation | ✓ |
|
|
122
|
+
| AWS WAF Bot Control | Request analysis | ✓ |
|
|
123
|
+
| Shape / F5 | Bot Defense | ✓ |
|
|
124
|
+
| hCaptcha | Challenge widget | ✓ |
|
|
125
|
+
| reCAPTCHA | Challenge widget | ✓ |
|
|
126
|
+
|
|
127
|
+
## Platform Support
|
|
128
|
+
|
|
129
|
+
| Platform | Architecture | Impersonation | JS Preflight |
|
|
130
|
+
|----------|-------------|:-------------:|:------------:|
|
|
131
|
+
| Linux | x86_64 | ✓ | ✓ |
|
|
132
|
+
| Linux | aarch64 | ✓ | ✓ |
|
|
133
|
+
| macOS | Apple Silicon | ✓ | ✓ |
|
|
134
|
+
| macOS | Intel | ✓ | ✓ |
|
|
135
|
+
| Windows | x86_64 | — | ✓ |
|
|
136
|
+
|
|
137
|
+
## CLI Reference
|
|
138
|
+
|
|
139
|
+
### recurl-specific flags
|
|
140
|
+
|
|
141
|
+
| Flag | Description |
|
|
142
|
+
|------|-------------|
|
|
143
|
+
| `--recurl-strict` | Disable fallback, pure curl passthrough |
|
|
144
|
+
| `--recurl-impersonate <profile>` | Force TLS fingerprint profile (chrome, firefox, safari) |
|
|
145
|
+
| `--recurl-js` | Force JS preflight (skip straight to Chromium) |
|
|
146
|
+
| `--recurl-js-rendered` | Return rendered DOM instead of raw response |
|
|
147
|
+
| `--recurl-js-wait <selector>` | Wait for CSS selector before capturing |
|
|
148
|
+
| `--recurl-js-timeout <ms>` | JS preflight timeout (default: 30000) |
|
|
149
|
+
| `--recurl-debug` | Show diagnostic output and escalation steps |
|
|
150
|
+
|
|
151
|
+
All standard curl flags work as expected.
|
|
152
|
+
|
|
153
|
+
## Use Cases for Python Developers
|
|
154
|
+
|
|
155
|
+
- **Web scraping** - Extract data from protected sites without Selenium/Playwright overhead
|
|
156
|
+
- **Data pipelines** - Reliable HTTP requests in Airflow, Luigi, or cron jobs
|
|
157
|
+
- **API integration** - Test and call APIs behind bot protection
|
|
158
|
+
- **Research & analytics** - Fetch pricing, inventory, or public datasets
|
|
159
|
+
- **CI/CD** - Reliable HTTP calls in GitHub Actions, GitLab CI, Jenkins
|
|
160
|
+
- **Shell scripting from Python** - Use `subprocess.run(["recurl", ...])` for guaranteed delivery
|
|
161
|
+
|
|
162
|
+
## How It Works
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
recurl receives request
|
|
166
|
+
|
|
|
167
|
+
+---> curl_engine (real curl binary)
|
|
168
|
+
| |
|
|
169
|
+
| +---> Success? Return response immediately
|
|
170
|
+
| |
|
|
171
|
+
| +---> Blocked? (403, 429, captcha, challenge page)
|
|
172
|
+
| |
|
|
173
|
+
| +---> Retry with impersonation (browser TLS fingerprint)
|
|
174
|
+
| | |
|
|
175
|
+
| | +---> Success? Return response
|
|
176
|
+
| | |
|
|
177
|
+
| | +---> Still blocked?
|
|
178
|
+
| | |
|
|
179
|
+
| | +---> JS preflight (headless Chromium)
|
|
180
|
+
| | |
|
|
181
|
+
| | +---> Solve challenge, extract cookies
|
|
182
|
+
| | |
|
|
183
|
+
| | +---> Replay request with cookies
|
|
184
|
+
| | |
|
|
185
|
+
| | +---> Return final response
|
|
186
|
+
|
|
|
187
|
+
+---> Return result to user
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
The user sees only the final successful response.
|
|
191
|
+
|
|
192
|
+
## Configuration
|
|
193
|
+
|
|
194
|
+
### Environment Variables
|
|
195
|
+
|
|
196
|
+
| Variable | Description |
|
|
197
|
+
|----------|-------------|
|
|
198
|
+
| `RECURL_STRICT=1` | Same as `--recurl-strict` |
|
|
199
|
+
| `RECURL_DEBUG=1` | Enable debug output |
|
|
200
|
+
| `RECURL_DAEMON_IDLE_MS` | Daemon idle timeout (default: 60000) |
|
|
201
|
+
|
|
202
|
+
### Daemon Mode
|
|
203
|
+
|
|
204
|
+
The optional `recurld` daemon keeps Chromium warm for sub-second responses:
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
# Start daemon
|
|
208
|
+
recurld start
|
|
209
|
+
|
|
210
|
+
# Check status
|
|
211
|
+
recurld status
|
|
212
|
+
|
|
213
|
+
# Stop daemon
|
|
214
|
+
recurld stop
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## Links
|
|
218
|
+
|
|
219
|
+
- **Main Repository**: [github.com/neul-labs/recurl](https://github.com/neul-labs/recurl)
|
|
220
|
+
- **Documentation**: [docs.neullabs.com/recurl](https://docs.neullabs.com/recurl)
|
|
221
|
+
- **Issues**: [github.com/neul-labs/recurl/issues](https://github.com/neul-labs/recurl/issues)
|
|
222
|
+
- **License**: MIT
|
|
223
|
+
|
|
224
|
+
## Keywords
|
|
225
|
+
|
|
226
|
+
Python HTTP client, curl replacement, web scraping Python, anti-bot bypass, Cloudflare bypass Python, headless browser Python, TLS fingerprint spoofing, bot detection evasion, requests alternative, urllib replacement, Python CLI tool, data extraction, API client Python, web crawler Python, Chromium automation Python
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
recurl/__init__.py,sha256=jVoICXp__Jl2C3ENf3PXoP1fEFY-2TiXu44Unvc3Wjc,1509
|
|
2
|
+
recurl/cli.py,sha256=wfzSrwka_v0xagoXtfWud05qOchlIsqA-o4dJgc4dGw,265
|
|
3
|
+
recurl/bin/LICENSE,sha256=yCrr3BbMKoyfUwl7I6QfYNnRO-xKIMriv6WsCn38hY4,1066
|
|
4
|
+
recurl/bin/README.md,sha256=hg4XZivfDWN6ngZNGZFHrto28Gpbatvaf3-piXLUNDo,6453
|
|
5
|
+
recurl/bin/install.sh,sha256=5bGGCfnk9Ilv3cLDNccsj0SObdGobLqOfoEnfydtnkM,7139
|
|
6
|
+
recurl/bin/bin/recurl,sha256=wb6eY2wvmVauXJPFzQpeFEe9555ZZaAriNfluSzKEgU,4038160
|
|
7
|
+
recurl/bin/bin/recurld,sha256=U5o8gDgzIRNBhNYfWydGhVu98pKtmDvkrFCA-_TfsHA,3454448
|
|
8
|
+
recurl_cli-0.1.2.dist-info/METADATA,sha256=ln47sMsSse0kN7x0GS3kh04b_FvwtasxuMVtQNK0T3U,8420
|
|
9
|
+
recurl_cli-0.1.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
10
|
+
recurl_cli-0.1.2.dist-info/entry_points.txt,sha256=q07Bzz_IkTNWVAGQ3nwVO4uByDordrCv3aZ3aheA4tE,76
|
|
11
|
+
recurl_cli-0.1.2.dist-info/top_level.txt,sha256=T2lvMOhUoHC2yKVtcxhGyGY24su4WZMjId2kyHY9l-I,7
|
|
12
|
+
recurl_cli-0.1.2.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
recurl
|