zenrows 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.standard.yml +8 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +28 -0
- data/CLAUDE.md +63 -0
- data/LICENSE.txt +21 -0
- data/README.md +174 -0
- data/Rakefile +15 -0
- data/lib/zenrows/backends/base.rb +95 -0
- data/lib/zenrows/backends/http_rb.rb +59 -0
- data/lib/zenrows/client.rb +159 -0
- data/lib/zenrows/configuration.rb +136 -0
- data/lib/zenrows/errors.rb +74 -0
- data/lib/zenrows/js_instructions.rb +267 -0
- data/lib/zenrows/proxy.rb +226 -0
- data/lib/zenrows/railtie.rb +25 -0
- data/lib/zenrows/version.rb +5 -0
- data/lib/zenrows.rb +67 -0
- data/plan.md +430 -0
- data/sig/zenrows.rbs +4 -0
- data/test/test_helper.rb +7 -0
- data/test/zenrows/client_test.rb +83 -0
- data/test/zenrows/js_instructions_test.rb +140 -0
- data/test/zenrows/proxy_test.rb +114 -0
- data/test/zenrows_test.rb +43 -0
- metadata +99 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: af4d111e269c080e71f047c74acec3a319f05e0b6c800cb7067b4d05a23cc209
|
|
4
|
+
data.tar.gz: 373c8b95c4f311141767d750a509e88ef74dc2e18c947a703b9868c5cce97ad4
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 81b92b61f13e82a6b2462e790df7f009f88d75f7d9cac822dfd5e685fab0c85b85898f50f3ce67b241622609ff02ebf2b98896e0b9a2ed870f41d1ea2231138c
|
|
7
|
+
data.tar.gz: 9740ef08c21c4d3701795786886df16183d188a0cf6d5fc7d04bc0a2a46b8f00693ec98fecfd4925bf9a2cd69ab9dca93add9e4c54f2631c630f2698444f912a
|
data/.standard.yml
ADDED
data/.yardopts
ADDED
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2025-12-25
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Initial release
|
|
15
|
+
- Multi-backend architecture with http.rb as primary adapter
|
|
16
|
+
- Proxy mode support (ZenRows superproxy)
|
|
17
|
+
- JavaScript rendering support
|
|
18
|
+
- Premium proxy and geolocation options
|
|
19
|
+
- JavaScript Instructions DSL for browser automation
|
|
20
|
+
- Screenshot support (full page and element)
|
|
21
|
+
- Session persistence
|
|
22
|
+
- Block resources option
|
|
23
|
+
- Wait and wait_for parameters
|
|
24
|
+
- Custom headers support
|
|
25
|
+
- YARD documentation
|
|
26
|
+
- Minitest test suite
|
|
27
|
+
- Standard RB linting
|
|
28
|
+
- Optional Rails integration with ActiveSupport::Duration support
|
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
Ruby gem for ZenRows web scraping proxy. Multi-backend HTTP client (http.rb primary).
|
|
4
|
+
|
|
5
|
+
## Context7 MCP
|
|
6
|
+
|
|
7
|
+
Use Context7 for docs lookup:
|
|
8
|
+
- `/websites/guides_rubygems` - RubyGems packaging
|
|
9
|
+
- `/docs.zenrows.com-4bed007/llmstxt` - ZenRows API
|
|
10
|
+
- `/websites/code_claude_en` - Claude Code CLI
|
|
11
|
+
|
|
12
|
+
## Commands
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
bundle install # Install dependencies
|
|
16
|
+
bundle exec rake test # Run tests
|
|
17
|
+
bundle exec rubocop # Lint code
|
|
18
|
+
bundle exec yard doc # Generate docs
|
|
19
|
+
bundle exec rake build # Build gem
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Architecture
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
lib/zenrows/
|
|
26
|
+
├── version.rb # Gem version
|
|
27
|
+
├── configuration.rb # Global config (api_key, host, port)
|
|
28
|
+
├── client.rb # Main client, returns HTTP instances
|
|
29
|
+
├── proxy.rb # Proxy URL builder (options in username)
|
|
30
|
+
├── js_instructions.rb # DSL for browser automation
|
|
31
|
+
├── errors.rb # Custom exceptions
|
|
32
|
+
├── railtie.rb # Optional Rails integration
|
|
33
|
+
└── backends/
|
|
34
|
+
├── base.rb # Backend interface
|
|
35
|
+
└── http_rb.rb # http.rb adapter (primary)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Usage
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
Zenrows.configure do |c|
|
|
42
|
+
c.api_key = 'YOUR_KEY'
|
|
43
|
+
c.host = 'superproxy.zenrows.com'
|
|
44
|
+
c.port = 1337
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
client = Zenrows::Client.new
|
|
48
|
+
http = client.http(js_render: true, premium_proxy: true)
|
|
49
|
+
response = http.get('https://example.com', ssl_context: client.ssl_context)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Key Options
|
|
53
|
+
|
|
54
|
+
| Option | Type | Description |
|
|
55
|
+
|--------|------|-------------|
|
|
56
|
+
| `js_render` | Boolean | Enable headless browser |
|
|
57
|
+
| `premium_proxy` | Boolean | Use residential IPs |
|
|
58
|
+
| `proxy_country` | String | Country code (us, gb, de) |
|
|
59
|
+
| `wait` | Integer | Wait time in ms |
|
|
60
|
+
| `wait_for` | String | CSS selector to wait for |
|
|
61
|
+
| `js_instructions` | Array/JSON | Browser automation |
|
|
62
|
+
| `session_id` | Bool/String | Sticky session |
|
|
63
|
+
| `screenshot` | Boolean | Take screenshot |
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Fourthwall
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# Zenrows
|
|
2
|
+
|
|
3
|
+
Ruby client for [ZenRows](https://www.zenrows.com/) web scraping proxy. Multi-backend HTTP client with http.rb as primary adapter.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Add to your Gemfile:
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
gem 'zenrows'
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Then run:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bundle install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Configuration
|
|
20
|
+
|
|
21
|
+
```ruby
|
|
22
|
+
Zenrows.configure do |config|
|
|
23
|
+
config.api_key = 'YOUR_API_KEY'
|
|
24
|
+
config.host = 'superproxy.zenrows.com' # default
|
|
25
|
+
config.port = 1337 # default
|
|
26
|
+
config.connect_timeout = 5 # seconds
|
|
27
|
+
config.read_timeout = 180 # seconds
|
|
28
|
+
end
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
|
|
33
|
+
### Basic Request
|
|
34
|
+
|
|
35
|
+
```ruby
|
|
36
|
+
client = Zenrows::Client.new
|
|
37
|
+
http = client.http(js_render: true, premium_proxy: true)
|
|
38
|
+
response = http.get('https://example.com', ssl_context: client.ssl_context)
|
|
39
|
+
|
|
40
|
+
puts response.body
|
|
41
|
+
puts response.status
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### With Options
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
http = client.http(
|
|
48
|
+
js_render: true, # Enable headless browser
|
|
49
|
+
premium_proxy: true, # Use residential IPs
|
|
50
|
+
proxy_country: 'us', # Geolocation
|
|
51
|
+
wait: 5000, # Wait 5 seconds after load
|
|
52
|
+
wait_for: '.content', # Wait for CSS selector
|
|
53
|
+
session_id: true # Sticky session
|
|
54
|
+
)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### JavaScript Instructions
|
|
58
|
+
|
|
59
|
+
Automate browser interactions:
|
|
60
|
+
|
|
61
|
+
```ruby
|
|
62
|
+
instructions = Zenrows::JsInstructions.build do
|
|
63
|
+
wait_for '.login-form'
|
|
64
|
+
fill '#email', 'user@example.com'
|
|
65
|
+
fill '#password', 'secret123'
|
|
66
|
+
click '#submit'
|
|
67
|
+
wait 2000
|
|
68
|
+
scroll_to :bottom
|
|
69
|
+
wait_for '.results'
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
http = client.http(js_render: true, js_instructions: instructions)
|
|
73
|
+
response = http.get(url, ssl_context: client.ssl_context)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Available instructions:
|
|
77
|
+
- `click(selector)` - Click element
|
|
78
|
+
- `wait(ms)` - Wait duration
|
|
79
|
+
- `wait_for(selector)` - Wait for element
|
|
80
|
+
- `wait_event(event)` - networkidle, load, domcontentloaded
|
|
81
|
+
- `fill(selector, value)` - Fill input
|
|
82
|
+
- `check(selector)` / `uncheck(selector)` - Checkboxes
|
|
83
|
+
- `select_option(selector, value)` - Dropdowns
|
|
84
|
+
- `scroll_y(pixels)` / `scroll_x(pixels)` - Scroll
|
|
85
|
+
- `scroll_to(:bottom)` / `scroll_to(:top)` - Scroll to position
|
|
86
|
+
- `evaluate(js_code)` - Execute JavaScript
|
|
87
|
+
- `frame_*` variants for iframe interactions
|
|
88
|
+
|
|
89
|
+
### Screenshots
|
|
90
|
+
|
|
91
|
+
```ruby
|
|
92
|
+
http = client.http(
|
|
93
|
+
js_render: true,
|
|
94
|
+
screenshot: true, # Take screenshot
|
|
95
|
+
screenshot_fullpage: true, # Full page
|
|
96
|
+
json_response: true # Get JSON with screenshot data
|
|
97
|
+
)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Block Resources
|
|
101
|
+
|
|
102
|
+
Speed up requests by blocking unnecessary resources:
|
|
103
|
+
|
|
104
|
+
```ruby
|
|
105
|
+
http = client.http(
|
|
106
|
+
js_render: true,
|
|
107
|
+
block_resources: 'image,media,font'
|
|
108
|
+
)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Options Reference
|
|
112
|
+
|
|
113
|
+
| Option | Type | Description |
|
|
114
|
+
|--------|------|-------------|
|
|
115
|
+
| `js_render` | Boolean | Enable JavaScript rendering |
|
|
116
|
+
| `premium_proxy` | Boolean | Use residential proxies |
|
|
117
|
+
| `proxy_country` | String | Country code (us, gb, de, etc.) |
|
|
118
|
+
| `wait` | Integer/Boolean | Wait time in ms (true = 15000) |
|
|
119
|
+
| `wait_for` | String | CSS selector to wait for |
|
|
120
|
+
| `session_id` | Boolean/String | Session persistence |
|
|
121
|
+
| `window_height` | Integer | Browser window height |
|
|
122
|
+
| `window_width` | Integer | Browser window width |
|
|
123
|
+
| `js_instructions` | Array/String | Browser automation |
|
|
124
|
+
| `json_response` | Boolean | Return JSON instead of HTML |
|
|
125
|
+
| `screenshot` | Boolean | Take screenshot |
|
|
126
|
+
| `screenshot_fullpage` | Boolean | Full page screenshot |
|
|
127
|
+
| `screenshot_selector` | String | Screenshot specific element |
|
|
128
|
+
| `block_resources` | String | Block resources (image,media,font) |
|
|
129
|
+
| `headers` | Hash | Custom HTTP headers |
|
|
130
|
+
|
|
131
|
+
## Error Handling
|
|
132
|
+
|
|
133
|
+
```ruby
|
|
134
|
+
begin
|
|
135
|
+
response = http.get(url, ssl_context: client.ssl_context)
|
|
136
|
+
rescue Zenrows::ConfigurationError => e
|
|
137
|
+
# Missing or invalid configuration
|
|
138
|
+
rescue Zenrows::RateLimitError => e
|
|
139
|
+
sleep(e.retry_after || 60)
|
|
140
|
+
retry
|
|
141
|
+
rescue Zenrows::BotDetectedError => e
|
|
142
|
+
# Try with premium proxy
|
|
143
|
+
http = client.http(premium_proxy: true, proxy_country: 'us')
|
|
144
|
+
retry
|
|
145
|
+
rescue Zenrows::WaitTimeError => e
|
|
146
|
+
# Wait time exceeded 3 minutes
|
|
147
|
+
rescue Zenrows::TimeoutError => e
|
|
148
|
+
# Request timed out
|
|
149
|
+
end
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Rails Integration
|
|
153
|
+
|
|
154
|
+
The gem automatically integrates with Rails when detected:
|
|
155
|
+
- Uses Rails.logger by default
|
|
156
|
+
- Supports ActiveSupport::Duration for wait times
|
|
157
|
+
|
|
158
|
+
```ruby
|
|
159
|
+
# In Rails, you can use duration objects
|
|
160
|
+
http = client.http(wait: 5.seconds)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Development
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
bundle install
|
|
167
|
+
bundle exec rake test # Run tests
|
|
168
|
+
bundle exec standardrb # Lint code
|
|
169
|
+
bundle exec yard doc # Generate docs
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## License
|
|
173
|
+
|
|
174
|
+
MIT License. See [LICENSE.txt](LICENSE.txt).
|
data/Rakefile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bundler/gem_tasks"
|
|
4
|
+
require "minitest/test_task"
|
|
5
|
+
|
|
6
|
+
Minitest::TestTask.create(:test) do |t|
|
|
7
|
+
t.libs << "test"
|
|
8
|
+
t.libs << "lib"
|
|
9
|
+
t.warning = false
|
|
10
|
+
t.test_globs = ["test/**/*_test.rb"]
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
require "standard/rake"
|
|
14
|
+
|
|
15
|
+
task default: %i[test standard]
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Zenrows
|
|
4
|
+
module Backends
|
|
5
|
+
# Abstract base class for HTTP backends
|
|
6
|
+
#
|
|
7
|
+
# Backends are responsible for building configured HTTP clients
|
|
8
|
+
# that route through the ZenRows proxy.
|
|
9
|
+
#
|
|
10
|
+
# @abstract Subclass and override {#build_client} to implement
|
|
11
|
+
# @author Ernest Bursa
|
|
12
|
+
# @since 0.1.0
|
|
13
|
+
# @api public
|
|
14
|
+
class Base
|
|
15
|
+
# @return [Zenrows::Proxy] Proxy configuration builder
|
|
16
|
+
attr_reader :proxy
|
|
17
|
+
|
|
18
|
+
# @return [Zenrows::Configuration] Configuration instance
|
|
19
|
+
attr_reader :config
|
|
20
|
+
|
|
21
|
+
# @param proxy [Zenrows::Proxy] Proxy configuration builder
|
|
22
|
+
# @param config [Zenrows::Configuration] Configuration instance
|
|
23
|
+
def initialize(proxy:, config:)
|
|
24
|
+
@proxy = proxy
|
|
25
|
+
@config = config
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Build a configured HTTP client
|
|
29
|
+
#
|
|
30
|
+
# @param options [Hash] Request options
|
|
31
|
+
# @return [Object] Configured HTTP client
|
|
32
|
+
# @raise [NotImplementedError] if not implemented by subclass
|
|
33
|
+
def build_client(options = {})
|
|
34
|
+
raise NotImplementedError, "#{self.class}#build_client must be implemented"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Build SSL context for proxy connections
|
|
38
|
+
#
|
|
39
|
+
# @return [OpenSSL::SSL::SSLContext] SSL context with verification disabled
|
|
40
|
+
def ssl_context
|
|
41
|
+
require "openssl"
|
|
42
|
+
ctx = OpenSSL::SSL::SSLContext.new
|
|
43
|
+
ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
44
|
+
ctx
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Calculate appropriate timeout based on options
|
|
48
|
+
#
|
|
49
|
+
# @param options [Hash] Request options
|
|
50
|
+
# @return [Hash] Timeout configuration with :connect and :read
|
|
51
|
+
def calculate_timeouts(options = {})
|
|
52
|
+
connect = config.connect_timeout
|
|
53
|
+
read = config.read_timeout
|
|
54
|
+
|
|
55
|
+
# Add time for JS rendering
|
|
56
|
+
read += 15 if options[:js_render]
|
|
57
|
+
|
|
58
|
+
# Add buffer for wait time
|
|
59
|
+
if options[:wait]
|
|
60
|
+
wait_seconds = normalize_wait_seconds(options[:wait])
|
|
61
|
+
read += wait_seconds + 20
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Add time for screenshots
|
|
65
|
+
read += 10 if options[:screenshot] || options[:screenshot_fullpage]
|
|
66
|
+
|
|
67
|
+
# Add time for JS instructions
|
|
68
|
+
if options[:js_instructions]
|
|
69
|
+
instructions = options[:js_instructions]
|
|
70
|
+
count = instructions.is_a?(Array) ? instructions.size : 1
|
|
71
|
+
read += count * 5
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
{connect: connect, read: read}
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
# Normalize wait value to seconds
|
|
80
|
+
#
|
|
81
|
+
# @param wait [Boolean, Integer, Object] Wait value
|
|
82
|
+
# @return [Integer] Wait time in seconds
|
|
83
|
+
def normalize_wait_seconds(wait)
|
|
84
|
+
case wait
|
|
85
|
+
when true then 15
|
|
86
|
+
when Integer then wait / 1000
|
|
87
|
+
when ->(w) { w.respond_to?(:to_i) && w.respond_to?(:parts) }
|
|
88
|
+
wait.to_i
|
|
89
|
+
else
|
|
90
|
+
(wait.to_i / 1000.0).ceil
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "http"
|
|
4
|
+
|
|
5
|
+
module Zenrows
|
|
6
|
+
module Backends
|
|
7
|
+
# HTTP.rb backend adapter
|
|
8
|
+
#
|
|
9
|
+
# Uses the http.rb gem to build configured HTTP clients
|
|
10
|
+
# that route through the ZenRows proxy.
|
|
11
|
+
#
|
|
12
|
+
# @example Basic usage
|
|
13
|
+
# backend = Zenrows::Backends::HttpRb.new(proxy: proxy, config: config)
|
|
14
|
+
# http = backend.build_client(js_render: true)
|
|
15
|
+
# response = http.get(url, ssl_context: backend.ssl_context)
|
|
16
|
+
#
|
|
17
|
+
# @author Ernest Bursa
|
|
18
|
+
# @since 0.1.0
|
|
19
|
+
# @api public
|
|
20
|
+
class HttpRb < Base
|
|
21
|
+
# Build a configured HTTP client
|
|
22
|
+
#
|
|
23
|
+
# @param options [Hash] Request options
|
|
24
|
+
# @option options [Boolean] :js_render Enable JavaScript rendering
|
|
25
|
+
# @option options [Boolean] :premium_proxy Use residential proxies
|
|
26
|
+
# @option options [String] :proxy_country Country code
|
|
27
|
+
# @option options [Boolean, Integer] :wait Wait time
|
|
28
|
+
# @option options [String] :wait_for CSS selector to wait for
|
|
29
|
+
# @option options [Hash] :headers Custom HTTP headers
|
|
30
|
+
# @return [HTTP::Client] Configured HTTP client
|
|
31
|
+
def build_client(options = {})
|
|
32
|
+
opts = options.dup
|
|
33
|
+
headers = opts.delete(:headers) || {}
|
|
34
|
+
|
|
35
|
+
# Enable custom_headers if we have headers
|
|
36
|
+
opts[:custom_headers] = true if headers.any?
|
|
37
|
+
|
|
38
|
+
# Get proxy configuration
|
|
39
|
+
proxy_config = proxy.build(opts)
|
|
40
|
+
|
|
41
|
+
# Calculate timeouts
|
|
42
|
+
timeouts = calculate_timeouts(opts)
|
|
43
|
+
|
|
44
|
+
# Build HTTP client
|
|
45
|
+
client = HTTP
|
|
46
|
+
.timeout(connect: timeouts[:connect], read: timeouts[:read])
|
|
47
|
+
.headers(headers)
|
|
48
|
+
|
|
49
|
+
# Configure proxy
|
|
50
|
+
client.via(
|
|
51
|
+
proxy_config[:host],
|
|
52
|
+
proxy_config[:port],
|
|
53
|
+
proxy_config[:username],
|
|
54
|
+
proxy_config[:password]
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Zenrows
|
|
4
|
+
# Main client for ZenRows proxy
|
|
5
|
+
#
|
|
6
|
+
# The client builds configured HTTP clients that route through
|
|
7
|
+
# the ZenRows proxy with specified options.
|
|
8
|
+
#
|
|
9
|
+
# @example Basic usage
|
|
10
|
+
# Zenrows.configure do |c|
|
|
11
|
+
# c.api_key = 'YOUR_API_KEY'
|
|
12
|
+
# end
|
|
13
|
+
#
|
|
14
|
+
# client = Zenrows::Client.new
|
|
15
|
+
# http = client.http(js_render: true)
|
|
16
|
+
# response = http.get('https://example.com', ssl_context: client.ssl_context)
|
|
17
|
+
#
|
|
18
|
+
# @example With custom configuration
|
|
19
|
+
# client = Zenrows::Client.new(api_key: 'KEY', host: 'proxy.zenrows.com')
|
|
20
|
+
# http = client.http(premium_proxy: true, proxy_country: 'us')
|
|
21
|
+
#
|
|
22
|
+
# @author Ernest Bursa
|
|
23
|
+
# @since 0.1.0
|
|
24
|
+
# @api public
|
|
25
|
+
class Client
|
|
26
|
+
# @return [Configuration] Client configuration
|
|
27
|
+
attr_reader :config
|
|
28
|
+
|
|
29
|
+
# @return [Proxy] Proxy builder instance
|
|
30
|
+
attr_reader :proxy
|
|
31
|
+
|
|
32
|
+
# @return [Backends::Base] HTTP backend instance
|
|
33
|
+
attr_reader :backend
|
|
34
|
+
|
|
35
|
+
# Initialize a new client
|
|
36
|
+
#
|
|
37
|
+
# @param api_key [String, nil] Override API key from global config
|
|
38
|
+
# @param host [String, nil] Override proxy host
|
|
39
|
+
# @param port [Integer, nil] Override proxy port
|
|
40
|
+
# @param backend [Symbol] Backend to use (:http_rb)
|
|
41
|
+
# @raise [ConfigurationError] if api_key is not configured
|
|
42
|
+
def initialize(api_key: nil, host: nil, port: nil, backend: nil)
|
|
43
|
+
@config = build_config(api_key: api_key, host: host, port: port, backend: backend)
|
|
44
|
+
@config.validate!
|
|
45
|
+
|
|
46
|
+
@proxy = Proxy.new(
|
|
47
|
+
api_key: @config.api_key,
|
|
48
|
+
host: @config.host,
|
|
49
|
+
port: @config.port
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@backend = build_backend
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Build a configured HTTP client
|
|
56
|
+
#
|
|
57
|
+
# @param options [Hash] Request options
|
|
58
|
+
# @option options [Boolean] :js_render Enable JavaScript rendering
|
|
59
|
+
# @option options [Boolean] :premium_proxy Use residential proxies
|
|
60
|
+
# @option options [String] :proxy_country Country code (us, gb, de, etc.)
|
|
61
|
+
# @option options [Boolean, Integer] :wait Wait time (true=15s, Integer=ms)
|
|
62
|
+
# @option options [String] :wait_for CSS selector to wait for
|
|
63
|
+
# @option options [Boolean, String, Integer] :session_id Session persistence
|
|
64
|
+
# @option options [Integer] :window_height Browser window height
|
|
65
|
+
# @option options [Integer] :window_width Browser window width
|
|
66
|
+
# @option options [Array, String] :js_instructions JavaScript instructions
|
|
67
|
+
# @option options [Boolean] :json_response Return JSON instead of HTML
|
|
68
|
+
# @option options [Boolean] :screenshot Take screenshot
|
|
69
|
+
# @option options [Boolean] :screenshot_fullpage Full page screenshot
|
|
70
|
+
# @option options [String] :screenshot_selector Screenshot specific element
|
|
71
|
+
# @option options [Hash] :headers Custom HTTP headers
|
|
72
|
+
# @option options [String] :block_resources Block resources (image,media,font)
|
|
73
|
+
# @return [HTTP::Client] Configured HTTP client ready for requests
|
|
74
|
+
#
|
|
75
|
+
# @example Basic request
|
|
76
|
+
# http = client.http(js_render: true)
|
|
77
|
+
# response = http.get(url, ssl_context: client.ssl_context)
|
|
78
|
+
#
|
|
79
|
+
# @example With premium proxy and country
|
|
80
|
+
# http = client.http(premium_proxy: true, proxy_country: 'us')
|
|
81
|
+
# response = http.get(url, ssl_context: client.ssl_context)
|
|
82
|
+
def http(options = {})
|
|
83
|
+
backend.build_client(options)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Get SSL context for proxy connections
|
|
87
|
+
#
|
|
88
|
+
# ZenRows proxy requires SSL verification to be disabled.
|
|
89
|
+
#
|
|
90
|
+
# @return [OpenSSL::SSL::SSLContext] SSL context
|
|
91
|
+
#
|
|
92
|
+
# @example
|
|
93
|
+
# http = client.http(js_render: true)
|
|
94
|
+
# response = http.get(url, ssl_context: client.ssl_context)
|
|
95
|
+
def ssl_context
|
|
96
|
+
backend.ssl_context
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Get proxy configuration for given options
|
|
100
|
+
#
|
|
101
|
+
# @param options [Hash] Proxy options
|
|
102
|
+
# @return [Hash] Proxy configuration with :host, :port, :username, :password
|
|
103
|
+
def proxy_config(options = {})
|
|
104
|
+
proxy.build(options)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Get proxy URL for given options
|
|
108
|
+
#
|
|
109
|
+
# @param options [Hash] Proxy options
|
|
110
|
+
# @return [String] Proxy URL
|
|
111
|
+
def proxy_url(options = {})
|
|
112
|
+
proxy.build_url(options)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
private
|
|
116
|
+
|
|
117
|
+
# Build configuration from params and global config
|
|
118
|
+
#
|
|
119
|
+
# @param api_key [String, nil] Override API key
|
|
120
|
+
# @param host [String, nil] Override host
|
|
121
|
+
# @param port [Integer, nil] Override port
|
|
122
|
+
# @param backend [Symbol, nil] Override backend
|
|
123
|
+
# @return [Configuration] Configuration instance
|
|
124
|
+
def build_config(api_key:, host:, port:, backend:)
|
|
125
|
+
cfg = Configuration.new
|
|
126
|
+
|
|
127
|
+
# Start with global config values
|
|
128
|
+
global = Zenrows.configuration
|
|
129
|
+
cfg.api_key = global.api_key
|
|
130
|
+
cfg.host = global.host
|
|
131
|
+
cfg.port = global.port
|
|
132
|
+
cfg.connect_timeout = global.connect_timeout
|
|
133
|
+
cfg.read_timeout = global.read_timeout
|
|
134
|
+
cfg.backend = global.backend
|
|
135
|
+
cfg.logger = global.logger
|
|
136
|
+
|
|
137
|
+
# Override with provided values
|
|
138
|
+
cfg.api_key = api_key if api_key
|
|
139
|
+
cfg.host = host if host
|
|
140
|
+
cfg.port = port if port
|
|
141
|
+
cfg.backend = backend if backend
|
|
142
|
+
|
|
143
|
+
cfg
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Build backend instance based on configuration
|
|
147
|
+
#
|
|
148
|
+
# @return [Backends::Base] Backend instance
|
|
149
|
+
# @raise [ConfigurationError] if backend is not supported
|
|
150
|
+
def build_backend
|
|
151
|
+
case config.backend
|
|
152
|
+
when :http_rb
|
|
153
|
+
Backends::HttpRb.new(proxy: proxy, config: config)
|
|
154
|
+
else
|
|
155
|
+
raise ConfigurationError, "Unsupported backend: #{config.backend}. Use :http_rb"
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|