zenrows 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.standard.yml +8 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +28 -0
- data/CLAUDE.md +63 -0
- data/LICENSE.txt +21 -0
- data/README.md +174 -0
- data/Rakefile +15 -0
- data/lib/zenrows/backends/base.rb +95 -0
- data/lib/zenrows/backends/http_rb.rb +59 -0
- data/lib/zenrows/client.rb +159 -0
- data/lib/zenrows/configuration.rb +136 -0
- data/lib/zenrows/errors.rb +74 -0
- data/lib/zenrows/js_instructions.rb +267 -0
- data/lib/zenrows/proxy.rb +226 -0
- data/lib/zenrows/railtie.rb +25 -0
- data/lib/zenrows/version.rb +5 -0
- data/lib/zenrows.rb +67 -0
- data/plan.md +430 -0
- data/sig/zenrows.rbs +4 -0
- data/test/test_helper.rb +7 -0
- data/test/zenrows/client_test.rb +83 -0
- data/test/zenrows/js_instructions_test.rb +140 -0
- data/test/zenrows/proxy_test.rb +114 -0
- data/test/zenrows_test.rb +43 -0
- metadata +99 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "monitor"
|
|
4
|
+
|
|
5
|
+
module Zenrows
|
|
6
|
+
# Global configuration for Zenrows client
|
|
7
|
+
#
|
|
8
|
+
# @example Configure with block
|
|
9
|
+
# Zenrows.configure do |config|
|
|
10
|
+
# config.api_key = 'YOUR_API_KEY'
|
|
11
|
+
# config.host = 'superproxy.zenrows.com'
|
|
12
|
+
# config.port = 1337
|
|
13
|
+
# end
|
|
14
|
+
#
|
|
15
|
+
# @example Configure directly
|
|
16
|
+
# Zenrows.configuration.api_key = 'YOUR_API_KEY'
|
|
17
|
+
#
|
|
18
|
+
# @author Ernest Bursa
|
|
19
|
+
# @since 0.1.0
|
|
20
|
+
# @api public
|
|
21
|
+
class Configuration
|
|
22
|
+
include MonitorMixin
|
|
23
|
+
|
|
24
|
+
# @return [String, nil] ZenRows API key (required)
|
|
25
|
+
attr_accessor :api_key
|
|
26
|
+
|
|
27
|
+
# @return [String] ZenRows proxy host
|
|
28
|
+
attr_accessor :host
|
|
29
|
+
|
|
30
|
+
# @return [Integer] ZenRows proxy port
|
|
31
|
+
attr_accessor :port
|
|
32
|
+
|
|
33
|
+
# @return [Integer] Default connection timeout in seconds
|
|
34
|
+
attr_accessor :connect_timeout
|
|
35
|
+
|
|
36
|
+
# @return [Integer] Default read timeout in seconds
|
|
37
|
+
attr_accessor :read_timeout
|
|
38
|
+
|
|
39
|
+
# @return [Symbol] HTTP backend to use (:http_rb, :faraday, :net_http)
|
|
40
|
+
attr_accessor :backend
|
|
41
|
+
|
|
42
|
+
# @return [Logger, nil] Logger instance for debug output
|
|
43
|
+
attr_accessor :logger
|
|
44
|
+
|
|
45
|
+
# Default configuration values
|
|
46
|
+
DEFAULTS = {
|
|
47
|
+
host: "superproxy.zenrows.com",
|
|
48
|
+
port: 1337,
|
|
49
|
+
connect_timeout: 5,
|
|
50
|
+
read_timeout: 180,
|
|
51
|
+
backend: :http_rb
|
|
52
|
+
}.freeze
|
|
53
|
+
|
|
54
|
+
def initialize
|
|
55
|
+
super # Initialize MonitorMixin
|
|
56
|
+
reset!
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Reset configuration to defaults
|
|
60
|
+
#
|
|
61
|
+
# @return [void]
|
|
62
|
+
def reset!
|
|
63
|
+
synchronize do
|
|
64
|
+
@api_key = nil
|
|
65
|
+
@host = DEFAULTS[:host]
|
|
66
|
+
@port = DEFAULTS[:port]
|
|
67
|
+
@connect_timeout = DEFAULTS[:connect_timeout]
|
|
68
|
+
@read_timeout = DEFAULTS[:read_timeout]
|
|
69
|
+
@backend = DEFAULTS[:backend]
|
|
70
|
+
@logger = nil
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Validate that required configuration is present
|
|
75
|
+
#
|
|
76
|
+
# @raise [ConfigurationError] if api_key is missing
|
|
77
|
+
# @return [true] if configuration is valid
|
|
78
|
+
def validate!
|
|
79
|
+
raise ConfigurationError, "api_key is required" if api_key.nil? || api_key.empty?
|
|
80
|
+
|
|
81
|
+
true
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Check if configuration is valid
|
|
85
|
+
#
|
|
86
|
+
# @return [Boolean] true if configuration is valid
|
|
87
|
+
def valid?
|
|
88
|
+
validate!
|
|
89
|
+
true
|
|
90
|
+
rescue ConfigurationError
|
|
91
|
+
false
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Convert configuration to hash
|
|
95
|
+
#
|
|
96
|
+
# @return [Hash] configuration as hash
|
|
97
|
+
def to_h
|
|
98
|
+
{
|
|
99
|
+
api_key: api_key,
|
|
100
|
+
host: host,
|
|
101
|
+
port: port,
|
|
102
|
+
connect_timeout: connect_timeout,
|
|
103
|
+
read_timeout: read_timeout,
|
|
104
|
+
backend: backend
|
|
105
|
+
}
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
class << self
|
|
110
|
+
# @return [Configuration] Global configuration instance
|
|
111
|
+
def configuration
|
|
112
|
+
@configuration ||= Configuration.new
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Configure Zenrows with a block
|
|
116
|
+
#
|
|
117
|
+
# @example
|
|
118
|
+
# Zenrows.configure do |config|
|
|
119
|
+
# config.api_key = 'YOUR_API_KEY'
|
|
120
|
+
# end
|
|
121
|
+
#
|
|
122
|
+
# @yield [Configuration] configuration instance
|
|
123
|
+
# @return [Configuration] configuration instance
|
|
124
|
+
def configure
|
|
125
|
+
yield(configuration) if block_given?
|
|
126
|
+
configuration
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Reset configuration to defaults
|
|
130
|
+
#
|
|
131
|
+
# @return [void]
|
|
132
|
+
def reset_configuration!
|
|
133
|
+
@configuration = Configuration.new
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Zenrows
|
|
4
|
+
# Base error class for all Zenrows errors
|
|
5
|
+
#
|
|
6
|
+
# @author Ernest Bursa
|
|
7
|
+
# @since 0.1.0
|
|
8
|
+
# @api public
|
|
9
|
+
class Error < StandardError; end
|
|
10
|
+
|
|
11
|
+
# Raised when configuration is invalid or missing
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# raise Zenrows::ConfigurationError, "API key is required"
|
|
15
|
+
class ConfigurationError < Error; end
|
|
16
|
+
|
|
17
|
+
# Raised when API authentication fails (invalid API key)
|
|
18
|
+
#
|
|
19
|
+
# @example
|
|
20
|
+
# raise Zenrows::AuthenticationError, "Invalid API key"
|
|
21
|
+
class AuthenticationError < Error; end
|
|
22
|
+
|
|
23
|
+
# Raised when rate limited (HTTP 429)
|
|
24
|
+
#
|
|
25
|
+
# @example Handling rate limits
|
|
26
|
+
# begin
|
|
27
|
+
# response = http.get(url)
|
|
28
|
+
# rescue Zenrows::RateLimitError => e
|
|
29
|
+
# sleep(e.retry_after || 60)
|
|
30
|
+
# retry
|
|
31
|
+
# end
|
|
32
|
+
class RateLimitError < Error
|
|
33
|
+
# @return [Integer, nil] Seconds to wait before retrying
|
|
34
|
+
attr_reader :retry_after
|
|
35
|
+
|
|
36
|
+
# @param message [String] Error message
|
|
37
|
+
# @param retry_after [Integer, nil] Seconds to wait before retrying
|
|
38
|
+
def initialize(message = "Rate limited", retry_after: nil)
|
|
39
|
+
@retry_after = retry_after
|
|
40
|
+
super(message)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Raised when bot detection triggers (HTTP 400/403/422)
|
|
45
|
+
#
|
|
46
|
+
# @example
|
|
47
|
+
# begin
|
|
48
|
+
# response = http.get(url)
|
|
49
|
+
# rescue Zenrows::BotDetectedError => e
|
|
50
|
+
# # Retry with premium proxy
|
|
51
|
+
# http = client.http(premium_proxy: true, proxy_country: 'us')
|
|
52
|
+
# response = http.get(url)
|
|
53
|
+
# end
|
|
54
|
+
class BotDetectedError < Error
|
|
55
|
+
# @return [String, nil] Suggested fix command
|
|
56
|
+
attr_reader :suggestion
|
|
57
|
+
|
|
58
|
+
# @param message [String] Error message
|
|
59
|
+
# @param suggestion [String, nil] Suggested fix
|
|
60
|
+
def initialize(message = "Bot detected", suggestion: nil)
|
|
61
|
+
@suggestion = suggestion
|
|
62
|
+
super(message)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Raised when request times out
|
|
67
|
+
class TimeoutError < Error; end
|
|
68
|
+
|
|
69
|
+
# Raised when proxy connection fails
|
|
70
|
+
class ProxyError < Error; end
|
|
71
|
+
|
|
72
|
+
# Raised when wait time exceeds maximum (180 seconds)
|
|
73
|
+
class WaitTimeError < Error; end
|
|
74
|
+
end
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Zenrows
|
|
6
|
+
# DSL for building JavaScript instructions
|
|
7
|
+
#
|
|
8
|
+
# JavaScript instructions enable dynamic interaction with web pages
|
|
9
|
+
# by automating user actions like clicking, filling forms, scrolling,
|
|
10
|
+
# and executing custom JavaScript.
|
|
11
|
+
#
|
|
12
|
+
# @example Building instructions with DSL
|
|
13
|
+
# instructions = Zenrows::JsInstructions.build do
|
|
14
|
+
# click '.load-more'
|
|
15
|
+
# wait 2000
|
|
16
|
+
# fill 'input#email', 'test@example.com'
|
|
17
|
+
# scroll_to :bottom
|
|
18
|
+
# wait_for '.results'
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
# @example Using with client
|
|
22
|
+
# client = Zenrows::Client.new
|
|
23
|
+
# http = client.http(
|
|
24
|
+
# js_render: true,
|
|
25
|
+
# js_instructions: instructions
|
|
26
|
+
# )
|
|
27
|
+
#
|
|
28
|
+
# @author Ernest Bursa
|
|
29
|
+
# @since 0.1.0
|
|
30
|
+
# @api public
|
|
31
|
+
class JsInstructions
|
|
32
|
+
# @return [Array<Hash>] List of instructions
|
|
33
|
+
attr_reader :instructions
|
|
34
|
+
|
|
35
|
+
def initialize
|
|
36
|
+
@instructions = []
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Build instructions using DSL block
|
|
40
|
+
#
|
|
41
|
+
# @yield [JsInstructions] Builder instance
|
|
42
|
+
# @return [JsInstructions] Built instructions
|
|
43
|
+
#
|
|
44
|
+
# @example
|
|
45
|
+
# Zenrows::JsInstructions.build do
|
|
46
|
+
# click '.button'
|
|
47
|
+
# wait 1000
|
|
48
|
+
# end
|
|
49
|
+
def self.build(&block)
|
|
50
|
+
builder = new
|
|
51
|
+
builder.instance_eval(&block) if block
|
|
52
|
+
builder
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Click an element
|
|
56
|
+
#
|
|
57
|
+
# @param selector [String] CSS selector
|
|
58
|
+
# @return [self]
|
|
59
|
+
#
|
|
60
|
+
# @example
|
|
61
|
+
# click '.submit-button'
|
|
62
|
+
# click '#load-more'
|
|
63
|
+
def click(selector)
|
|
64
|
+
@instructions << {click: selector}
|
|
65
|
+
self
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Wait for a duration in milliseconds
|
|
69
|
+
#
|
|
70
|
+
# @param duration [Integer] Milliseconds to wait (max 10000)
|
|
71
|
+
# @return [self]
|
|
72
|
+
#
|
|
73
|
+
# @example
|
|
74
|
+
# wait 2000 # Wait 2 seconds
|
|
75
|
+
def wait(duration)
|
|
76
|
+
@instructions << {wait: duration}
|
|
77
|
+
self
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Wait for an element to appear
|
|
81
|
+
#
|
|
82
|
+
# @param selector [String] CSS selector
|
|
83
|
+
# @return [self]
|
|
84
|
+
#
|
|
85
|
+
# @example
|
|
86
|
+
# wait_for '.dynamic-content'
|
|
87
|
+
# wait_for '#results-loaded'
|
|
88
|
+
def wait_for(selector)
|
|
89
|
+
@instructions << {wait_for: selector}
|
|
90
|
+
self
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Wait for a browser event
|
|
94
|
+
#
|
|
95
|
+
# @param event [String, Symbol] Event name: networkidle, load, domcontentloaded
|
|
96
|
+
# @return [self]
|
|
97
|
+
#
|
|
98
|
+
# @example
|
|
99
|
+
# wait_event :networkidle
|
|
100
|
+
def wait_event(event)
|
|
101
|
+
@instructions << {wait_event: event.to_s}
|
|
102
|
+
self
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Fill an input field
|
|
106
|
+
#
|
|
107
|
+
# @param selector [String] CSS selector for input
|
|
108
|
+
# @param value [String] Value to fill
|
|
109
|
+
# @return [self]
|
|
110
|
+
#
|
|
111
|
+
# @example
|
|
112
|
+
# fill 'input#email', 'user@example.com'
|
|
113
|
+
# fill 'input[name="password"]', 'secret123'
|
|
114
|
+
def fill(selector, value)
|
|
115
|
+
@instructions << {fill: [selector, value]}
|
|
116
|
+
self
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Check a checkbox
|
|
120
|
+
#
|
|
121
|
+
# @param selector [String] CSS selector
|
|
122
|
+
# @return [self]
|
|
123
|
+
def check(selector)
|
|
124
|
+
@instructions << {check: selector}
|
|
125
|
+
self
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Uncheck a checkbox
|
|
129
|
+
#
|
|
130
|
+
# @param selector [String] CSS selector
|
|
131
|
+
# @return [self]
|
|
132
|
+
def uncheck(selector)
|
|
133
|
+
@instructions << {uncheck: selector}
|
|
134
|
+
self
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Select an option from dropdown
|
|
138
|
+
#
|
|
139
|
+
# @param selector [String] CSS selector for select element
|
|
140
|
+
# @param value [String] Option value to select
|
|
141
|
+
# @return [self]
|
|
142
|
+
#
|
|
143
|
+
# @example
|
|
144
|
+
# select_option '#country', 'US'
|
|
145
|
+
def select_option(selector, value)
|
|
146
|
+
@instructions << {select_option: [selector, value]}
|
|
147
|
+
self
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Scroll vertically
|
|
151
|
+
#
|
|
152
|
+
# @param pixels [Integer] Pixels to scroll (negative = up)
|
|
153
|
+
# @return [self]
|
|
154
|
+
#
|
|
155
|
+
# @example
|
|
156
|
+
# scroll_y 1500 # Scroll down
|
|
157
|
+
# scroll_y -500 # Scroll up
|
|
158
|
+
def scroll_y(pixels)
|
|
159
|
+
@instructions << {scroll_y: pixels}
|
|
160
|
+
self
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Scroll horizontally
|
|
164
|
+
#
|
|
165
|
+
# @param pixels [Integer] Pixels to scroll (negative = left)
|
|
166
|
+
# @return [self]
|
|
167
|
+
def scroll_x(pixels)
|
|
168
|
+
@instructions << {scroll_x: pixels}
|
|
169
|
+
self
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Scroll to position
|
|
173
|
+
#
|
|
174
|
+
# @param position [String, Symbol] :bottom or :top
|
|
175
|
+
# @return [self]
|
|
176
|
+
#
|
|
177
|
+
# @example
|
|
178
|
+
# scroll_to :bottom
|
|
179
|
+
# scroll_to :top
|
|
180
|
+
def scroll_to(position)
|
|
181
|
+
@instructions << {scroll_to: position.to_s}
|
|
182
|
+
self
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Execute custom JavaScript
|
|
186
|
+
#
|
|
187
|
+
# @param code [String] JavaScript code to execute
|
|
188
|
+
# @return [self]
|
|
189
|
+
#
|
|
190
|
+
# @example
|
|
191
|
+
# evaluate "window.scrollTo(0, document.body.scrollHeight)"
|
|
192
|
+
# evaluate "document.querySelector('.modal').remove()"
|
|
193
|
+
def evaluate(code)
|
|
194
|
+
@instructions << {evaluate: code}
|
|
195
|
+
self
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Click element inside iframe
|
|
199
|
+
#
|
|
200
|
+
# @param iframe_selector [String] CSS selector for iframe
|
|
201
|
+
# @param element_selector [String] CSS selector for element inside iframe
|
|
202
|
+
# @return [self]
|
|
203
|
+
def frame_click(iframe_selector, element_selector)
|
|
204
|
+
@instructions << {frame_click: [iframe_selector, element_selector]}
|
|
205
|
+
self
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Wait for element inside iframe
|
|
209
|
+
#
|
|
210
|
+
# @param iframe_selector [String] CSS selector for iframe
|
|
211
|
+
# @param element_selector [String] CSS selector for element
|
|
212
|
+
# @return [self]
|
|
213
|
+
def frame_wait_for(iframe_selector, element_selector)
|
|
214
|
+
@instructions << {frame_wait_for: [iframe_selector, element_selector]}
|
|
215
|
+
self
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# Fill input inside iframe
|
|
219
|
+
#
|
|
220
|
+
# @param iframe_selector [String] CSS selector for iframe
|
|
221
|
+
# @param input_selector [String] CSS selector for input
|
|
222
|
+
# @param value [String] Value to fill
|
|
223
|
+
# @return [self]
|
|
224
|
+
def frame_fill(iframe_selector, input_selector, value)
|
|
225
|
+
@instructions << {frame_fill: [iframe_selector, input_selector, value]}
|
|
226
|
+
self
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Execute JavaScript inside iframe
|
|
230
|
+
#
|
|
231
|
+
# @param iframe_name [String] Iframe name or URL
|
|
232
|
+
# @param code [String] JavaScript code
|
|
233
|
+
# @return [self]
|
|
234
|
+
def frame_evaluate(iframe_name, code)
|
|
235
|
+
@instructions << {frame_evaluate: [iframe_name, code]}
|
|
236
|
+
self
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Convert to JSON string for API
|
|
240
|
+
#
|
|
241
|
+
# @return [String] JSON-encoded instructions
|
|
242
|
+
def to_json(*_args)
|
|
243
|
+
@instructions.to_json
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Convert to array
|
|
247
|
+
#
|
|
248
|
+
# @return [Array<Hash>] Instructions array
|
|
249
|
+
def to_a
|
|
250
|
+
@instructions.dup
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Check if there are any instructions
|
|
254
|
+
#
|
|
255
|
+
# @return [Boolean]
|
|
256
|
+
def empty?
|
|
257
|
+
@instructions.empty?
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Number of instructions
|
|
261
|
+
#
|
|
262
|
+
# @return [Integer]
|
|
263
|
+
def size
|
|
264
|
+
@instructions.size
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "cgi"
|
|
4
|
+
require "json"
|
|
5
|
+
require "securerandom"
|
|
6
|
+
|
|
7
|
+
module Zenrows
|
|
8
|
+
# Builds ZenRows proxy configuration
|
|
9
|
+
#
|
|
10
|
+
# ZenRows proxy encodes options in the password field of the proxy URL.
|
|
11
|
+
# Format: http://API_KEY-opt1=val1&opt2=val2:@host:port
|
|
12
|
+
#
|
|
13
|
+
# @example Basic proxy configuration
|
|
14
|
+
# proxy = Zenrows::Proxy.new(api_key: 'key', host: 'proxy.zenrows.com', port: 1337)
|
|
15
|
+
# proxy.build(js_render: true, premium_proxy: true)
|
|
16
|
+
# # => { host: 'proxy.zenrows.com', port: 1337, username: 'key', password: 'js_render=true&premium_proxy=true' }
|
|
17
|
+
#
|
|
18
|
+
# @author Ernest Bursa
|
|
19
|
+
# @since 0.1.0
|
|
20
|
+
# @api public
|
|
21
|
+
class Proxy
|
|
22
|
+
# Maximum wait time in milliseconds (3 minutes)
|
|
23
|
+
MAX_WAIT_MS = 180_000
|
|
24
|
+
|
|
25
|
+
# Valid sticky TTL values for HTTP proxy
|
|
26
|
+
VALID_STICKY_TTL = %w[30s 5m 30m 1h 1d].freeze
|
|
27
|
+
|
|
28
|
+
# Valid region codes for HTTP proxy
|
|
29
|
+
VALID_REGIONS = {
|
|
30
|
+
"africa" => "af",
|
|
31
|
+
"af" => "af",
|
|
32
|
+
"asia pacific" => "ap",
|
|
33
|
+
"ap" => "ap",
|
|
34
|
+
"europe" => "eu",
|
|
35
|
+
"eu" => "eu",
|
|
36
|
+
"middle east" => "me",
|
|
37
|
+
"me" => "me",
|
|
38
|
+
"north america" => "na",
|
|
39
|
+
"na" => "na",
|
|
40
|
+
"south america" => "sa",
|
|
41
|
+
"sa" => "sa",
|
|
42
|
+
"global" => nil
|
|
43
|
+
}.freeze
|
|
44
|
+
|
|
45
|
+
# @return [String] ZenRows API key
|
|
46
|
+
attr_reader :api_key
|
|
47
|
+
|
|
48
|
+
# @return [String] Proxy host
|
|
49
|
+
attr_reader :host
|
|
50
|
+
|
|
51
|
+
# @return [Integer] Proxy port
|
|
52
|
+
attr_reader :port
|
|
53
|
+
|
|
54
|
+
# @param api_key [String] ZenRows API key
|
|
55
|
+
# @param host [String] Proxy host
|
|
56
|
+
# @param port [Integer] Proxy port
|
|
57
|
+
def initialize(api_key:, host:, port:)
|
|
58
|
+
@api_key = api_key
|
|
59
|
+
@host = host
|
|
60
|
+
@port = port
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Build proxy configuration hash for HTTP client
|
|
64
|
+
#
|
|
65
|
+
# @param options [Hash] Proxy options
|
|
66
|
+
# @option options [Boolean] :js_render Enable JavaScript rendering
|
|
67
|
+
# @option options [Boolean] :premium_proxy Use residential proxies
|
|
68
|
+
# @option options [String] :proxy_country Country code (us, gb, de, etc.)
|
|
69
|
+
# @option options [Boolean, Integer] :wait Wait time (true=15s, Integer=ms)
|
|
70
|
+
# @option options [String] :wait_for CSS selector to wait for
|
|
71
|
+
# @option options [Boolean, String, Integer] :session_id Session persistence
|
|
72
|
+
# @option options [Integer] :window_height Browser window height
|
|
73
|
+
# @option options [Integer] :window_width Browser window width
|
|
74
|
+
# @option options [Array, String] :js_instructions JavaScript instructions
|
|
75
|
+
# @option options [Boolean] :json_response Return JSON instead of HTML
|
|
76
|
+
# @option options [Boolean] :original_status Return original HTTP status
|
|
77
|
+
# @option options [Boolean] :screenshot Take screenshot
|
|
78
|
+
# @option options [Boolean] :screenshot_fullpage Full page screenshot
|
|
79
|
+
# @option options [String] :screenshot_selector Screenshot specific element
|
|
80
|
+
# @option options [Boolean] :custom_headers Enable custom headers passthrough
|
|
81
|
+
# @option options [String] :block_resources Block resources (image,media,font)
|
|
82
|
+
# @return [Hash] Proxy configuration with :host, :port, :username, :password
|
|
83
|
+
# @raise [WaitTimeError] if wait time exceeds 3 minutes
|
|
84
|
+
def build(options = {})
|
|
85
|
+
opts = options.dup
|
|
86
|
+
proxy_params = build_params(opts)
|
|
87
|
+
|
|
88
|
+
{
|
|
89
|
+
host: host,
|
|
90
|
+
port: port,
|
|
91
|
+
username: api_key,
|
|
92
|
+
password: proxy_params.map { |k, v| "#{k}=#{v}" }.join("&")
|
|
93
|
+
}
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Build proxy URL string
|
|
97
|
+
#
|
|
98
|
+
# @param options [Hash] Proxy options (same as #build)
|
|
99
|
+
# @return [String] Proxy URL
|
|
100
|
+
def build_url(options = {})
|
|
101
|
+
config = build(options)
|
|
102
|
+
password = config[:password].empty? ? "" : config[:password]
|
|
103
|
+
"http://#{config[:username]}:#{password}@#{config[:host]}:#{config[:port]}"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Build proxy configuration as array [host, port, username, password]
|
|
107
|
+
#
|
|
108
|
+
# @param options [Hash] Proxy options (same as #build)
|
|
109
|
+
# @return [Array<String, Integer, String, String>] Proxy array
|
|
110
|
+
def build_array(options = {})
|
|
111
|
+
config = build(options)
|
|
112
|
+
[config[:host], config[:port], config[:username], config[:password]]
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
private
|
|
116
|
+
|
|
117
|
+
# Build proxy parameters hash from options
|
|
118
|
+
#
|
|
119
|
+
# @param opts [Hash] Options hash (will be modified)
|
|
120
|
+
# @return [Hash] Parameters for proxy password
|
|
121
|
+
def build_params(opts)
|
|
122
|
+
params = {}
|
|
123
|
+
|
|
124
|
+
# Custom headers must be enabled first if we'll use headers
|
|
125
|
+
params[:custom_headers] = true if opts[:custom_headers]
|
|
126
|
+
|
|
127
|
+
# JavaScript rendering
|
|
128
|
+
params[:js_render] = true if opts[:js_render]
|
|
129
|
+
|
|
130
|
+
# Premium proxy
|
|
131
|
+
params[:premium_proxy] = true if opts[:premium_proxy]
|
|
132
|
+
|
|
133
|
+
# Wait time handling
|
|
134
|
+
if opts[:wait]
|
|
135
|
+
wait_ms = normalize_wait(opts[:wait])
|
|
136
|
+
raise WaitTimeError, "Wait time cannot exceed 3 minutes (#{MAX_WAIT_MS}ms), got #{wait_ms}ms" if wait_ms > MAX_WAIT_MS
|
|
137
|
+
|
|
138
|
+
params[:wait] = wait_ms
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Wait for selector
|
|
142
|
+
params[:wait_for] = opts[:wait_for] if opts[:wait_for]
|
|
143
|
+
|
|
144
|
+
# JSON response
|
|
145
|
+
params[:json_response] = true if opts[:json_response]
|
|
146
|
+
|
|
147
|
+
# Original status
|
|
148
|
+
params[:original_status] = true if opts[:original_status]
|
|
149
|
+
|
|
150
|
+
# Session ID
|
|
151
|
+
if opts[:session_id] == true
|
|
152
|
+
params[:session_id] = SecureRandom.random_number(1_000)
|
|
153
|
+
elsif opts[:session_id]
|
|
154
|
+
params[:session_id] = opts[:session_id]
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Window dimensions
|
|
158
|
+
params[:window_height] = opts[:window_height].to_i if opts[:window_height]
|
|
159
|
+
params[:window_width] = opts[:window_width].to_i if opts[:window_width]
|
|
160
|
+
|
|
161
|
+
# Proxy country (auto-enables premium_proxy)
|
|
162
|
+
if opts[:proxy_country]
|
|
163
|
+
params[:proxy_country] = opts[:proxy_country].to_s.downcase
|
|
164
|
+
params[:premium_proxy] = true
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# JavaScript instructions
|
|
168
|
+
if opts[:js_instructions]
|
|
169
|
+
instructions = opts[:js_instructions]
|
|
170
|
+
instructions = instructions.to_json if instructions.is_a?(Array)
|
|
171
|
+
params[:js_instructions] = CGI.escape(instructions)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Screenshots
|
|
175
|
+
params[:screenshot] = true if opts[:screenshot]
|
|
176
|
+
if opts[:screenshot_fullpage]
|
|
177
|
+
params[:screenshot] = true
|
|
178
|
+
params[:screenshot_fullpage] = true
|
|
179
|
+
end
|
|
180
|
+
if opts[:screenshot_selector]
|
|
181
|
+
params[:screenshot] = true
|
|
182
|
+
params[:screenshot_selector] = opts[:screenshot_selector]
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Block resources
|
|
186
|
+
params[:block_resources] = opts[:block_resources] if opts[:block_resources]
|
|
187
|
+
|
|
188
|
+
# Auto-enable js_render if needed
|
|
189
|
+
params[:js_render] = true if requires_js_render?(params)
|
|
190
|
+
|
|
191
|
+
params
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Normalize wait time to milliseconds
|
|
195
|
+
#
|
|
196
|
+
# @param wait [Boolean, Integer, Object] Wait value
|
|
197
|
+
# @return [Integer] Wait time in milliseconds
|
|
198
|
+
def normalize_wait(wait)
|
|
199
|
+
case wait
|
|
200
|
+
when true then 15_000
|
|
201
|
+
when Integer then wait
|
|
202
|
+
when ->(w) { w.respond_to?(:to_i) && w.respond_to?(:parts) }
|
|
203
|
+
# ActiveSupport::Duration - convert seconds to ms
|
|
204
|
+
wait.to_i * 1000
|
|
205
|
+
else
|
|
206
|
+
wait.to_i
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Check if JS rendering is required based on params
|
|
211
|
+
#
|
|
212
|
+
# @param params [Hash] Current parameters
|
|
213
|
+
# @return [Boolean] true if js_render should be enabled
|
|
214
|
+
def requires_js_render?(params)
|
|
215
|
+
params[:wait] ||
|
|
216
|
+
params[:wait_for] ||
|
|
217
|
+
params[:json_response] ||
|
|
218
|
+
params[:window_height] ||
|
|
219
|
+
params[:window_width] ||
|
|
220
|
+
params[:js_instructions] ||
|
|
221
|
+
params[:screenshot] ||
|
|
222
|
+
params[:screenshot_fullpage] ||
|
|
223
|
+
params[:screenshot_selector]
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|