recurl-cli 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
recurl/__init__.py ADDED
@@ -0,0 +1,60 @@
1
+ """
2
+ recurl - Drop-in curl replacement with automatic anti-bot bypass
3
+
4
+ This is a thin Python wrapper that delegates to the platform-specific
5
+ binary downloaded at install time.
6
+ """
7
+
8
+ import os
9
+ import platform
10
+ import subprocess
11
+ import sys
12
+ from pathlib import Path
13
+
14
+
15
+ def _find_binary(name: str) -> str:
16
+ """Locate the downloaded recurl binary."""
17
+ # 1. Check alongside this package
18
+ package_dir = Path(__file__).parent
19
+ bin_path = package_dir / "bin" / name
20
+ if bin_path.exists():
21
+ return str(bin_path)
22
+
23
+ # 2. Check in PATH
24
+ found = shutil.which(name)
25
+ if found:
26
+ return found
27
+
28
+ raise FileNotFoundError(
29
+ f"Could not find {name} binary. "
30
+ "Try reinstalling: pip install --force-reinstall recurl-cli"
31
+ )
32
+
33
+
34
+ def run(args: list[str] = None) -> int:
35
+ """
36
+ Run recurl with the given CLI arguments.
37
+
38
+ Args:
39
+ args: List of arguments (e.g., ["-s", "https://example.com"]).
40
+ If None, uses sys.argv[1:].
41
+
42
+ Returns:
43
+ Exit code from the recurl process.
44
+ """
45
+ import shutil
46
+
47
+ binary = _find_binary("recurl")
48
+ cmd = [binary] + (args if args is not None else sys.argv[1:])
49
+ result = subprocess.run(cmd)
50
+ return result.returncode
51
+
52
+
53
+ def run_daemon(args: list[str] = None) -> int:
54
+ """Run recurld with the given CLI arguments."""
55
+ import shutil
56
+
57
+ binary = _find_binary("recurld")
58
+ cmd = [binary] + (args if args is not None else sys.argv[1:])
59
+ result = subprocess.run(cmd)
60
+ return result.returncode
recurl/bin/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Neul Labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
recurl/bin/README.md ADDED
@@ -0,0 +1,225 @@
1
+ # recurl
2
+
3
+ [![Crates.io](https://img.shields.io/crates/v/recurl.svg)](https://crates.io/crates/recurl)
4
+ [![Documentation](https://img.shields.io/badge/docs-docs.neullabs.com-blue)](https://docs.neullabs.com/recurl)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+ [![Build Status](https://img.shields.io/github/actions/workflow/status/neul-labs/recurl/ci.yml?branch=main)](https://github.com/neul-labs/recurl/actions)
7
+
8
+ **curl that just works.** Drop-in replacement with automatic anti-bot bypass.
9
+
10
+ ---
11
+
12
+ ## Why recurl?
13
+
14
+ You're scraping a website. It works in your browser but `curl` gets blocked. You try different headers, user agents, maybe even `curl-impersonate`. Still blocked. Now you're writing Puppeteer scripts...
15
+
16
+ **recurl fixes this.** It runs real curl, detects when you're blocked, and automatically escalates through impersonation and headless browser rendering. Same curl syntax you know. No code changes.
17
+
18
+ ```bash
19
+ # This just works, even on Cloudflare-protected sites
20
+ recurl https://protected-site.com/api/data
21
+ ```
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ # npm / npx
27
+ npm install -g recurl-cli
28
+
29
+ # PyPI
30
+ pip install recurl-cli
31
+
32
+ # Homebrew (macOS / Linux)
33
+ brew tap neul-labs/tap
34
+ brew install recurl
35
+
36
+ # Cargo (Rust)
37
+ cargo install recurl
38
+
39
+ # From source
40
+ git clone https://github.com/neul-labs/recurl
41
+ cd recurl && cargo build --release
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ ```bash
47
+ # Use it exactly like curl
48
+ recurl https://example.com
49
+
50
+ # Or alias it as curl for seamless usage
51
+ alias curl=recurl
52
+ curl https://api.example.com/data
53
+
54
+ # Force JS rendering for heavy protection
55
+ recurl --recurl-js https://heavily-protected-site.com
56
+
57
+ # Debug mode to see what's happening
58
+ recurl --recurl-debug https://example.com
59
+ ```
60
+
61
+ ## How It Works
62
+
63
+ ```
64
+ curl (alias) --> recurl (shim)
65
+ |
66
+ +--> curl_engine --> success? done
67
+ |
68
+ +--> blocked? (403, 429, captcha, etc.)
69
+ |
70
+ +--> retry with impersonation (TLS fingerprint spoofing)
71
+ |
72
+ +--> retry with JS preflight (headless Chromium)
73
+ ```
74
+
75
+ 1. **First try**: Runs real curl (fast, low overhead)
76
+ 2. **If blocked**: Retries with TLS fingerprint impersonation
77
+ 3. **Still blocked**: Renders page in headless Chromium, captures cookies/tokens, replays request
78
+
79
+ Chromium auto-downloads on first use. A background daemon (`recurld`) keeps it warm for fast subsequent requests.
80
+
81
+ ## Bypass Coverage
82
+
83
+ recurl automatically handles:
84
+
85
+ | Provider | Detection Method |
86
+ |----------|------------------|
87
+ | Cloudflare | Bot Management, Turnstile, JS Challenge |
88
+ | Akamai | Bot Manager |
89
+ | PerimeterX | HUMAN Security |
90
+ | DataDome | Bot Protection |
91
+ | Imperva | Incapsula |
92
+ | Kasada | Bot Mitigation |
93
+ | AWS WAF | Bot Control |
94
+ | Shape/F5 | Bot Defense |
95
+ | Arkose Labs | FunCaptcha |
96
+ | hCaptcha | Challenge |
97
+ | reCAPTCHA | Challenge |
98
+
99
+ ## CLI Reference
100
+
101
+ ### recurl-specific flags
102
+
103
+ | Flag | Description |
104
+ |------|-------------|
105
+ | `--recurl-strict` | Disable fallback, pure curl passthrough |
106
+ | `--recurl-impersonate <profile>` | Force specific TLS fingerprint profile |
107
+ | `--recurl-js` | Force JS preflight (skip to Chromium) |
108
+ | `--recurl-js-rendered` | Return rendered DOM instead of raw response |
109
+ | `--recurl-js-wait <selector>` | Wait for CSS selector before capturing |
110
+ | `--recurl-js-timeout <ms>` | JS preflight timeout (default: 30000) |
111
+ | `--recurl-daemon on\|off` | Control background daemon usage |
112
+ | `--recurl-debug` | Show diagnostic output |
113
+
114
+ All standard curl flags work as expected.
115
+
116
+ ### Environment Variables
117
+
118
+ | Variable | Description |
119
+ |----------|-------------|
120
+ | `RECURL_STRICT=1` | Same as `--recurl-strict` |
121
+ | `RECURL_DAEMON_IDLE_MS` | Daemon idle timeout (default: 60000) |
122
+
123
+ ## Platform Support
124
+
125
+ | Platform | Arch | Impersonation | JS Preflight | Chromium Auto-Download |
126
+ |----------|------|:-------------:|:------------:|:----------------------:|
127
+ | Linux | x86_64 | Yes | Yes | Yes |
128
+ | Linux | aarch64 | Yes | Yes | Manual |
129
+ | macOS | Apple Silicon | Yes | Yes | Yes |
130
+ | macOS | Intel | Yes | Yes | Yes |
131
+ | Windows | x86_64 | No | Yes | Yes |
132
+
133
+ *Impersonation requires curl-impersonate (Linux/macOS only). Windows skips directly to JS preflight.*
134
+
135
+ ## For Developers
136
+
137
+ ### Building from Source
138
+
139
+ ```bash
140
+ # Debug build
141
+ cargo build
142
+
143
+ # Release build (optimized)
144
+ cargo build --release
145
+
146
+ # Build with daemon support
147
+ cargo build --release --features daemon
148
+ ```
149
+
150
+ ### Architecture
151
+
152
+ ```
153
+ src/
154
+ main.rs # CLI entry point, argument parsing
155
+ engine.rs # curl_engine execution layer
156
+ detection/ # Anti-bot pattern detection
157
+ mod.rs
158
+ patterns.rs
159
+ status.rs
160
+ impersonation/ # TLS fingerprint impersonation
161
+ mod.rs
162
+ js_preflight/ # Headless Chromium rendering
163
+ mod.rs
164
+ browser.rs
165
+ browser_config.rs
166
+ chromium.rs
167
+ cookies.rs
168
+ preflight_state.rs
169
+ stealth.rs
170
+ escalation.rs # EscalationEngine state machine
171
+ daemon/
172
+ main.rs # recurld daemon entry point
173
+ lifecycle.rs # DaemonLifecycle state machine
174
+ browser_state.rs # BrowserState state machine
175
+ pool.rs # Browser instance pooling
176
+ ipc.rs # IPC transport
177
+ protocol.rs # IPC message protocol
178
+ config.rs # Configuration & defaults
179
+ daemon_client.rs # Daemon client interface
180
+ ```
181
+
182
+ ### Running Tests
183
+
184
+ ```bash
185
+ # Unit tests
186
+ cargo test
187
+
188
+ # All tests including integration
189
+ cargo test --all-features
190
+ ```
191
+
192
+ ### Documentation
193
+
194
+ - [Installation Guide](docs/installation.md) - Platform-specific setup
195
+ - [Architecture](docs/architecture.md) - System design deep-dive
196
+ - [CLI Reference](docs/cli.md) - Complete flag documentation
197
+ - [Escalation Layers](docs/layers.md) - How bypass works
198
+ - [Daemon](docs/daemon.md) - recurld configuration
199
+ - [Compliance Testing](docs/compliance.md) - curl compatibility
200
+
201
+ ## Contributing
202
+
203
+ Contributions welcome! Please read the architecture docs first to understand the codebase structure.
204
+
205
+ ```bash
206
+ # Fork and clone
207
+ git clone https://github.com/YOUR_USERNAME/recurl
208
+ cd recurl
209
+
210
+ # Create a branch
211
+ git checkout -b feature/your-feature
212
+
213
+ # Make changes, then test
214
+ cargo test
215
+
216
+ # Submit a PR
217
+ ```
218
+
219
+ ## License
220
+
221
+ MIT License - see [LICENSE](LICENSE) for details.
222
+
223
+ ---
224
+
225
+ Built by [Neul Labs](https://github.com/neul-labs)
recurl/bin/bin/recurl ADDED
Binary file
recurl/bin/bin/recurld ADDED
Binary file
recurl/bin/install.sh ADDED
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # recurl installer for Linux and macOS
5
+ # Usage: curl -fsSL https://recurl.dev/install.sh | bash
6
+
7
+ VERSION="${RECURL_VERSION:-latest}"
8
+ INSTALL_DIR="${RECURL_INSTALL_DIR:-}"
9
+ GITHUB_REPO="neul-labs/recurl"
10
+ BASE_URL="https://github.com/${GITHUB_REPO}/releases"
11
+
12
+ # Colors
13
+ RED='\033[0;31m'
14
+ GREEN='\033[0;32m'
15
+ YELLOW='\033[0;33m'
16
+ BLUE='\033[0;34m'
17
+ NC='\033[0m' # No Color
18
+
19
+ info() { echo -e "${BLUE}[info]${NC} $1"; }
20
+ success() { echo -e "${GREEN}[success]${NC} $1"; }
21
+ warn() { echo -e "${YELLOW}[warn]${NC} $1"; }
22
+ error() { echo -e "${RED}[error]${NC} $1"; exit 1; }
23
+
24
+ # Detect OS
25
+ detect_os() {
26
+ case "$(uname -s)" in
27
+ Linux*) echo "linux" ;;
28
+ Darwin*) echo "darwin" ;;
29
+ *) error "Unsupported OS: $(uname -s). Use Windows installer for Windows." ;;
30
+ esac
31
+ }
32
+
33
+ # Detect architecture
34
+ detect_arch() {
35
+ case "$(uname -m)" in
36
+ x86_64|amd64) echo "x86_64" ;;
37
+ arm64|aarch64) echo "aarch64" ;;
38
+ *) error "Unsupported architecture: $(uname -m)" ;;
39
+ esac
40
+ }
41
+
42
+ # Detect shell config file
43
+ detect_shell_config() {
44
+ local shell_name
45
+ shell_name=$(basename "$SHELL")
46
+
47
+ case "$shell_name" in
48
+ bash)
49
+ if [[ -f "$HOME/.bashrc" ]]; then
50
+ echo "$HOME/.bashrc"
51
+ elif [[ -f "$HOME/.bash_profile" ]]; then
52
+ echo "$HOME/.bash_profile"
53
+ else
54
+ echo "$HOME/.bashrc"
55
+ fi
56
+ ;;
57
+ zsh)
58
+ echo "$HOME/.zshrc"
59
+ ;;
60
+ fish)
61
+ echo "$HOME/.config/fish/config.fish"
62
+ ;;
63
+ *)
64
+ echo "$HOME/.profile"
65
+ ;;
66
+ esac
67
+ }
68
+
69
+ # Check if command exists
70
+ has_command() {
71
+ command -v "$1" &> /dev/null
72
+ }
73
+
74
+ # Download file
75
+ download() {
76
+ local url="$1"
77
+ local output="$2"
78
+
79
+ if has_command curl; then
80
+ curl -fsSL "$url" -o "$output"
81
+ elif has_command wget; then
82
+ wget -q "$url" -O "$output"
83
+ else
84
+ error "Neither curl nor wget found. Please install one of them."
85
+ fi
86
+ }
87
+
88
+ # Get latest version from GitHub
89
+ get_latest_version() {
90
+ local url="${BASE_URL}/latest"
91
+ if has_command curl; then
92
+ curl -fsSL -o /dev/null -w '%{url_effective}' "$url" | rev | cut -d'/' -f1 | rev
93
+ elif has_command wget; then
94
+ wget -q -O /dev/null --server-response "$url" 2>&1 | grep -oP 'Location: .*/tag/\K[^/\s]+'
95
+ fi
96
+ }
97
+
98
+ # Main installation
99
+ main() {
100
+ echo ""
101
+ echo -e "${GREEN}╔═══════════════════════════════════════╗${NC}"
102
+ echo -e "${GREEN}║ recurl installer ║${NC}"
103
+ echo -e "${GREEN}╚═══════════════════════════════════════╝${NC}"
104
+ echo ""
105
+
106
+ # Detect platform
107
+ local os arch
108
+ os=$(detect_os)
109
+ arch=$(detect_arch)
110
+ info "Detected platform: ${os}-${arch}"
111
+
112
+ # Determine version
113
+ if [[ "$VERSION" == "latest" ]]; then
114
+ info "Fetching latest version..."
115
+ VERSION=$(get_latest_version)
116
+ if [[ -z "$VERSION" ]]; then
117
+ error "Failed to fetch latest version. Set RECURL_VERSION explicitly."
118
+ fi
119
+ fi
120
+ info "Installing version: ${VERSION}"
121
+
122
+ # Determine install directory
123
+ if [[ -z "$INSTALL_DIR" ]]; then
124
+ if [[ -w "/usr/local" ]]; then
125
+ INSTALL_DIR="/usr/local/recurl"
126
+ else
127
+ INSTALL_DIR="$HOME/.local/recurl"
128
+ fi
129
+ fi
130
+ info "Install directory: ${INSTALL_DIR}"
131
+
132
+ # Create temp directory
133
+ local tmp_dir
134
+ tmp_dir=$(mktemp -d)
135
+ trap "rm -rf $tmp_dir" EXIT
136
+
137
+ # Download archive
138
+ local archive_name="recurl-${os}-${arch}.tar.gz"
139
+ local download_url="${BASE_URL}/download/${VERSION}/${archive_name}"
140
+ info "Downloading ${archive_name}..."
141
+ download "$download_url" "${tmp_dir}/${archive_name}"
142
+
143
+ # Extract
144
+ info "Extracting..."
145
+ mkdir -p "$INSTALL_DIR"
146
+ tar -xzf "${tmp_dir}/${archive_name}" -C "$INSTALL_DIR" --strip-components=1
147
+
148
+ # Make binaries executable
149
+ chmod +x "$INSTALL_DIR/recurl"
150
+ chmod +x "$INSTALL_DIR/recurld"
151
+ chmod +x "$INSTALL_DIR/bin/"*
152
+
153
+ success "recurl installed to ${INSTALL_DIR}"
154
+ echo ""
155
+
156
+ # Verify installation
157
+ info "Verifying installation..."
158
+ if "$INSTALL_DIR/recurl" --recurl-debug --version &> /dev/null; then
159
+ success "recurl binary works correctly"
160
+ else
161
+ warn "recurl binary may have issues. Check ${INSTALL_DIR}/recurl"
162
+ fi
163
+ echo ""
164
+
165
+ # Ask about shell alias
166
+ echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
167
+ echo -e "${YELLOW}Shell configuration${NC}"
168
+ echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
169
+ echo ""
170
+ echo "To use recurl as a drop-in curl replacement, you can:"
171
+ echo ""
172
+ echo " 1. Use recurl directly: recurl https://example.com"
173
+ echo " 2. Create a shell alias: alias curl='${INSTALL_DIR}/recurl'"
174
+ echo ""
175
+
176
+ local shell_config
177
+ shell_config=$(detect_shell_config)
178
+
179
+ read -p "Add curl alias to ${shell_config}? [y/N] " -n 1 -r
180
+ echo ""
181
+
182
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
183
+ # Check if alias already exists
184
+ if grep -q "alias curl=.*recurl" "$shell_config" 2>/dev/null; then
185
+ warn "Alias already exists in ${shell_config}"
186
+ else
187
+ echo "" >> "$shell_config"
188
+ echo "# recurl - drop-in curl replacement with anti-bot bypass" >> "$shell_config"
189
+ echo "alias curl='${INSTALL_DIR}/recurl'" >> "$shell_config"
190
+ success "Alias added to ${shell_config}"
191
+ fi
192
+
193
+ echo ""
194
+ info "Run this to apply changes now:"
195
+ echo ""
196
+ echo " source ${shell_config}"
197
+ echo ""
198
+ else
199
+ info "Skipping alias configuration."
200
+ echo ""
201
+ echo "To use recurl, either:"
202
+ echo ""
203
+ echo " 1. Call recurl directly:"
204
+ echo " ${INSTALL_DIR}/recurl https://example.com"
205
+ echo ""
206
+ echo " 2. Add to your shell config manually:"
207
+ echo " echo \"alias curl='${INSTALL_DIR}/recurl'\" >> ${shell_config}"
208
+ echo ""
209
+ echo " 3. Add to PATH:"
210
+ echo " export PATH=\"${INSTALL_DIR}:\$PATH\""
211
+ echo ""
212
+ fi
213
+
214
+ # Final summary
215
+ echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
216
+ echo -e "${GREEN}Installation complete!${NC}"
217
+ echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
218
+ echo ""
219
+ echo "Installed files:"
220
+ echo " ${INSTALL_DIR}/recurl - main binary"
221
+ echo " ${INSTALL_DIR}/recurld - daemon"
222
+ echo " ${INSTALL_DIR}/bin/ - curl engines"
223
+ echo ""
224
+ echo "Documentation: https://github.com/${GITHUB_REPO}#readme"
225
+ echo ""
226
+ }
227
+
228
+ # Run main
229
+ main "$@"
recurl/cli.py ADDED
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CLI entry points for the recurl Python wrapper.
4
+ """
5
+
6
+ import sys
7
+
8
+ from recurl import run, run_daemon
9
+
10
+
11
+ def main() -> None:
12
+ sys.exit(run())
13
+
14
+
15
+ def main_daemon() -> None:
16
+ sys.exit(run_daemon())
17
+
18
+
19
+ if __name__ == "__main__":
20
+ main()
@@ -0,0 +1,226 @@
1
+ Metadata-Version: 2.4
2
+ Name: recurl-cli
3
+ Version: 0.1.2
4
+ Summary: Drop-in curl replacement with automatic anti-bot bypass
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/neul-labs/recurl
7
+ Project-URL: Repository, https://github.com/neul-labs/recurl
8
+ Project-URL: Documentation, https://docs.neullabs.com/recurl
9
+ Project-URL: Issues, https://github.com/neul-labs/recurl/issues
10
+ Keywords: curl,http,anti-bot,scraping,cli
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Internet :: WWW/HTTP
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: Topic :: Utilities
24
+ Requires-Python: >=3.8
25
+ Description-Content-Type: text/markdown
26
+
27
+ # recurl-cli
28
+
29
+ **Python's missing curl.** Drop-in HTTP client with automatic anti-bot bypass for Python developers, data scientists, and web scrapers.
30
+
31
+ [![PyPI version](https://img.shields.io/pypi/v/recurl-cli.svg)](https://pypi.org/project/recurl-cli/)
32
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
33
+ [![Python Version](https://img.shields.io/pypi/pyversions/recurl-cli.svg)](https://pypi.org/project/recurl-cli/)
34
+
35
+ ---
36
+
37
+ ## What is recurl?
38
+
39
+ recurl is a smart drop-in replacement for `curl` that transparently handles bot detection and anti-bot protections used by modern websites. It runs real curl under the hood, detects when a request is blocked (403, captcha, Cloudflare challenge), and automatically escalates through browser impersonation and headless Chromium rendering to get the response you need.
40
+
41
+ **Same curl syntax. No code changes. It just works.**
42
+
43
+ ```bash
44
+ # Works even on Cloudflare-protected sites
45
+ python -m recurl https://protected-site.com/api/data
46
+ ```
47
+
48
+ ## Why Python developers need recurl
49
+
50
+ If you've ever written Python scripts for web scraping or API access, you've hit these walls:
51
+
52
+ - `requests.get()` returns **403 Forbidden** on protected sites
53
+ - `urllib` gets blocked by TLS fingerprinting
54
+ - You end up installing Selenium, Playwright, or Puppeteer just to fetch a single page
55
+ - Headless browser setup is heavy, slow, and overkill for simple requests
56
+
57
+ recurl solves this by being a **curl replacement with built-in escalation**:
58
+
59
+ 1. **First attempt**: Standard curl request (fast, low overhead)
60
+ 2. **If blocked**: Retries with browser TLS fingerprint impersonation
61
+ 3. **Still blocked**: Launches headless Chromium, solves JS challenges, extracts cookies, replays the request
62
+
63
+ No Python dependencies for browser automation. No heavy browser setup. Just install and use.
64
+
65
+ ## Installation
66
+
67
+ ### pip (recommended)
68
+
69
+ ```bash
70
+ pip install recurl-cli
71
+ ```
72
+
73
+ ### Other package managers
74
+
75
+ | Platform | Command |
76
+ |----------|---------|
77
+ | **npm** | `npm install -g recurl-cli` |
78
+ | **Homebrew** | `brew tap neul-labs/tap && brew install recurl` |
79
+ | **Cargo** | `cargo install recurl` |
80
+ | **Scoop** | `scoop install recurl` |
81
+
82
+ See the [full installation guide](https://github.com/neul-labs/recurl#installation) for platform-specific instructions.
83
+
84
+ ## Quick Start
85
+
86
+ ```bash
87
+ # Use as a Python module
88
+ python -m recurl https://api.example.com/data
89
+
90
+ # Pass through all curl flags
91
+ python -m recurl -X POST -H "Content-Type: application/json" -d '{"key":"value"}' https://api.example.com
92
+
93
+ # Force JS rendering for heavily protected sites
94
+ python -m recurl --recurl-js https://cloudflare-protected-site.com
95
+
96
+ # Debug mode to see escalation steps
97
+ python -m recurl --recurl-debug https://example.com
98
+ ```
99
+
100
+ ### Python API (coming soon)
101
+
102
+ ```python
103
+ from recurl import fetch
104
+
105
+ # Simple fetch that handles anti-bot protections automatically
106
+ response = fetch("https://protected-site.com")
107
+ print(response.text)
108
+ ```
109
+
110
+ ## Supported Anti-Bot Services
111
+
112
+ recurl automatically detects and bypasses protection from:
113
+
114
+ | Service | Detection | Bypass |
115
+ |---------|-----------|--------|
116
+ | Cloudflare | Bot Management, Turnstile, JS Challenge | ✓ |
117
+ | Akamai Bot Manager | Behavioral analysis | ✓ |
118
+ | PerimeterX / HUMAN | Client-side fingerprinting | ✓ |
119
+ | DataDome | Bot Protection | ✓ |
120
+ | Imperva / Incapsula | Challenge pages | ✓ |
121
+ | Kasada | Bot Mitigation | ✓ |
122
+ | AWS WAF Bot Control | Request analysis | ✓ |
123
+ | Shape / F5 | Bot Defense | ✓ |
124
+ | hCaptcha | Challenge widget | ✓ |
125
+ | reCAPTCHA | Challenge widget | ✓ |
126
+
127
+ ## Platform Support
128
+
129
+ | Platform | Architecture | Impersonation | JS Preflight |
130
+ |----------|-------------|:-------------:|:------------:|
131
+ | Linux | x86_64 | ✓ | ✓ |
132
+ | Linux | aarch64 | ✓ | ✓ |
133
+ | macOS | Apple Silicon | ✓ | ✓ |
134
+ | macOS | Intel | ✓ | ✓ |
135
+ | Windows | x86_64 | — | ✓ |
136
+
137
+ ## CLI Reference
138
+
139
+ ### recurl-specific flags
140
+
141
+ | Flag | Description |
142
+ |------|-------------|
143
+ | `--recurl-strict` | Disable fallback, pure curl passthrough |
144
+ | `--recurl-impersonate <profile>` | Force TLS fingerprint profile (chrome, firefox, safari) |
145
+ | `--recurl-js` | Force JS preflight (skip straight to Chromium) |
146
+ | `--recurl-js-rendered` | Return rendered DOM instead of raw response |
147
+ | `--recurl-js-wait <selector>` | Wait for CSS selector before capturing |
148
+ | `--recurl-js-timeout <ms>` | JS preflight timeout (default: 30000) |
149
+ | `--recurl-debug` | Show diagnostic output and escalation steps |
150
+
151
+ All standard curl flags work as expected.
152
+
153
+ ## Use Cases for Python Developers
154
+
155
+ - **Web scraping** - Extract data from protected sites without Selenium/Playwright overhead
156
+ - **Data pipelines** - Reliable HTTP requests in Airflow, Luigi, or cron jobs
157
+ - **API integration** - Test and call APIs behind bot protection
158
+ - **Research & analytics** - Fetch pricing, inventory, or public datasets
159
+ - **CI/CD** - Reliable HTTP calls in GitHub Actions, GitLab CI, Jenkins
160
+ - **Shell scripting from Python** - Use `subprocess.run(["recurl", ...])` for guaranteed delivery
161
+
162
+ ## How It Works
163
+
164
+ ```
165
+ recurl receives request
166
+ |
167
+ +---> curl_engine (real curl binary)
168
+ | |
169
+ | +---> Success? Return response immediately
170
+ | |
171
+ | +---> Blocked? (403, 429, captcha, challenge page)
172
+ | |
173
+ | +---> Retry with impersonation (browser TLS fingerprint)
174
+ | | |
175
+ | | +---> Success? Return response
176
+ | | |
177
+ | | +---> Still blocked?
178
+ | | |
179
+ | | +---> JS preflight (headless Chromium)
180
+ | | |
181
+ | | +---> Solve challenge, extract cookies
182
+ | | |
183
+ | | +---> Replay request with cookies
184
+ | | |
185
+ | | +---> Return final response
186
+ |
187
+ +---> Return result to user
188
+ ```
189
+
190
+ The user sees only the final successful response.
191
+
192
+ ## Configuration
193
+
194
+ ### Environment Variables
195
+
196
+ | Variable | Description |
197
+ |----------|-------------|
198
+ | `RECURL_STRICT=1` | Same as `--recurl-strict` |
199
+ | `RECURL_DEBUG=1` | Enable debug output |
200
+ | `RECURL_DAEMON_IDLE_MS` | Daemon idle timeout (default: 60000) |
201
+
202
+ ### Daemon Mode
203
+
204
+ The optional `recurld` daemon keeps Chromium warm for sub-second responses:
205
+
206
+ ```bash
207
+ # Start daemon
208
+ recurld start
209
+
210
+ # Check status
211
+ recurld status
212
+
213
+ # Stop daemon
214
+ recurld stop
215
+ ```
216
+
217
+ ## Links
218
+
219
+ - **Main Repository**: [github.com/neul-labs/recurl](https://github.com/neul-labs/recurl)
220
+ - **Documentation**: [docs.neullabs.com/recurl](https://docs.neullabs.com/recurl)
221
+ - **Issues**: [github.com/neul-labs/recurl/issues](https://github.com/neul-labs/recurl/issues)
222
+ - **License**: MIT
223
+
224
+ ## Keywords
225
+
226
+ Python HTTP client, curl replacement, web scraping Python, anti-bot bypass, Cloudflare bypass Python, headless browser Python, TLS fingerprint spoofing, bot detection evasion, requests alternative, urllib replacement, Python CLI tool, data extraction, API client Python, web crawler Python, Chromium automation Python
@@ -0,0 +1,12 @@
1
+ recurl/__init__.py,sha256=jVoICXp__Jl2C3ENf3PXoP1fEFY-2TiXu44Unvc3Wjc,1509
2
+ recurl/cli.py,sha256=wfzSrwka_v0xagoXtfWud05qOchlIsqA-o4dJgc4dGw,265
3
+ recurl/bin/LICENSE,sha256=yCrr3BbMKoyfUwl7I6QfYNnRO-xKIMriv6WsCn38hY4,1066
4
+ recurl/bin/README.md,sha256=hg4XZivfDWN6ngZNGZFHrto28Gpbatvaf3-piXLUNDo,6453
5
+ recurl/bin/install.sh,sha256=5bGGCfnk9Ilv3cLDNccsj0SObdGobLqOfoEnfydtnkM,7139
6
+ recurl/bin/bin/recurl,sha256=wb6eY2wvmVauXJPFzQpeFEe9555ZZaAriNfluSzKEgU,4038160
7
+ recurl/bin/bin/recurld,sha256=U5o8gDgzIRNBhNYfWydGhVu98pKtmDvkrFCA-_TfsHA,3454448
8
+ recurl_cli-0.1.2.dist-info/METADATA,sha256=ln47sMsSse0kN7x0GS3kh04b_FvwtasxuMVtQNK0T3U,8420
9
+ recurl_cli-0.1.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
10
+ recurl_cli-0.1.2.dist-info/entry_points.txt,sha256=q07Bzz_IkTNWVAGQ3nwVO4uByDordrCv3aZ3aheA4tE,76
11
+ recurl_cli-0.1.2.dist-info/top_level.txt,sha256=T2lvMOhUoHC2yKVtcxhGyGY24su4WZMjId2kyHY9l-I,7
12
+ recurl_cli-0.1.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ recurl = recurl.cli:main
3
+ recurld = recurl.cli:main_daemon
@@ -0,0 +1 @@
1
+ recurl