zenrows 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "monitor"
4
+
5
+ module Zenrows
6
+ # Global configuration for Zenrows client
7
+ #
8
+ # @example Configure with block
9
+ # Zenrows.configure do |config|
10
+ # config.api_key = 'YOUR_API_KEY'
11
+ # config.host = 'superproxy.zenrows.com'
12
+ # config.port = 1337
13
+ # end
14
+ #
15
+ # @example Configure directly
16
+ # Zenrows.configuration.api_key = 'YOUR_API_KEY'
17
+ #
18
+ # @author Ernest Bursa
19
+ # @since 0.1.0
20
+ # @api public
21
+ class Configuration
22
+ include MonitorMixin
23
+
24
+ # @return [String, nil] ZenRows API key (required)
25
+ attr_accessor :api_key
26
+
27
+ # @return [String] ZenRows proxy host
28
+ attr_accessor :host
29
+
30
+ # @return [Integer] ZenRows proxy port
31
+ attr_accessor :port
32
+
33
+ # @return [Integer] Default connection timeout in seconds
34
+ attr_accessor :connect_timeout
35
+
36
+ # @return [Integer] Default read timeout in seconds
37
+ attr_accessor :read_timeout
38
+
39
+ # @return [Symbol] HTTP backend to use (:http_rb, :faraday, :net_http)
40
+ attr_accessor :backend
41
+
42
+ # @return [Logger, nil] Logger instance for debug output
43
+ attr_accessor :logger
44
+
45
+ # Default configuration values
46
+ DEFAULTS = {
47
+ host: "superproxy.zenrows.com",
48
+ port: 1337,
49
+ connect_timeout: 5,
50
+ read_timeout: 180,
51
+ backend: :http_rb
52
+ }.freeze
53
+
54
+ def initialize
55
+ super # Initialize MonitorMixin
56
+ reset!
57
+ end
58
+
59
+ # Reset configuration to defaults
60
+ #
61
+ # @return [void]
62
+ def reset!
63
+ synchronize do
64
+ @api_key = nil
65
+ @host = DEFAULTS[:host]
66
+ @port = DEFAULTS[:port]
67
+ @connect_timeout = DEFAULTS[:connect_timeout]
68
+ @read_timeout = DEFAULTS[:read_timeout]
69
+ @backend = DEFAULTS[:backend]
70
+ @logger = nil
71
+ end
72
+ end
73
+
74
+ # Validate that required configuration is present
75
+ #
76
+ # @raise [ConfigurationError] if api_key is missing
77
+ # @return [true] if configuration is valid
78
+ def validate!
79
+ raise ConfigurationError, "api_key is required" if api_key.nil? || api_key.empty?
80
+
81
+ true
82
+ end
83
+
84
+ # Check if configuration is valid
85
+ #
86
+ # @return [Boolean] true if configuration is valid
87
+ def valid?
88
+ validate!
89
+ true
90
+ rescue ConfigurationError
91
+ false
92
+ end
93
+
94
+ # Convert configuration to hash
95
+ #
96
+ # @return [Hash] configuration as hash
97
+ def to_h
98
+ {
99
+ api_key: api_key,
100
+ host: host,
101
+ port: port,
102
+ connect_timeout: connect_timeout,
103
+ read_timeout: read_timeout,
104
+ backend: backend
105
+ }
106
+ end
107
+ end
108
+
109
+ class << self
110
+ # @return [Configuration] Global configuration instance
111
+ def configuration
112
+ @configuration ||= Configuration.new
113
+ end
114
+
115
+ # Configure Zenrows with a block
116
+ #
117
+ # @example
118
+ # Zenrows.configure do |config|
119
+ # config.api_key = 'YOUR_API_KEY'
120
+ # end
121
+ #
122
+ # @yield [Configuration] configuration instance
123
+ # @return [Configuration] configuration instance
124
+ def configure
125
+ yield(configuration) if block_given?
126
+ configuration
127
+ end
128
+
129
+ # Reset configuration to defaults
130
+ #
131
+ # @return [void]
132
+ def reset_configuration!
133
+ @configuration = Configuration.new
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Zenrows
4
+ # Base error class for all Zenrows errors
5
+ #
6
+ # @author Ernest Bursa
7
+ # @since 0.1.0
8
+ # @api public
9
+ class Error < StandardError; end
10
+
11
+ # Raised when configuration is invalid or missing
12
+ #
13
+ # @example
14
+ # raise Zenrows::ConfigurationError, "API key is required"
15
+ class ConfigurationError < Error; end
16
+
17
+ # Raised when API authentication fails (invalid API key)
18
+ #
19
+ # @example
20
+ # raise Zenrows::AuthenticationError, "Invalid API key"
21
+ class AuthenticationError < Error; end
22
+
23
+ # Raised when rate limited (HTTP 429)
24
+ #
25
+ # @example Handling rate limits
26
+ # begin
27
+ # response = http.get(url)
28
+ # rescue Zenrows::RateLimitError => e
29
+ # sleep(e.retry_after || 60)
30
+ # retry
31
+ # end
32
+ class RateLimitError < Error
33
+ # @return [Integer, nil] Seconds to wait before retrying
34
+ attr_reader :retry_after
35
+
36
+ # @param message [String] Error message
37
+ # @param retry_after [Integer, nil] Seconds to wait before retrying
38
+ def initialize(message = "Rate limited", retry_after: nil)
39
+ @retry_after = retry_after
40
+ super(message)
41
+ end
42
+ end
43
+
44
+ # Raised when bot detection triggers (HTTP 400/403/422)
45
+ #
46
+ # @example
47
+ # begin
48
+ # response = http.get(url)
49
+ # rescue Zenrows::BotDetectedError => e
50
+ # # Retry with premium proxy
51
+ # http = client.http(premium_proxy: true, proxy_country: 'us')
52
+ # response = http.get(url)
53
+ # end
54
+ class BotDetectedError < Error
55
+ # @return [String, nil] Suggested fix command
56
+ attr_reader :suggestion
57
+
58
+ # @param message [String] Error message
59
+ # @param suggestion [String, nil] Suggested fix
60
+ def initialize(message = "Bot detected", suggestion: nil)
61
+ @suggestion = suggestion
62
+ super(message)
63
+ end
64
+ end
65
+
66
+ # Raised when request times out
67
+ class TimeoutError < Error; end
68
+
69
+ # Raised when proxy connection fails
70
+ class ProxyError < Error; end
71
+
72
+ # Raised when wait time exceeds maximum (180 seconds)
73
+ class WaitTimeError < Error; end
74
+ end
@@ -0,0 +1,267 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Zenrows
6
+ # DSL for building JavaScript instructions
7
+ #
8
+ # JavaScript instructions enable dynamic interaction with web pages
9
+ # by automating user actions like clicking, filling forms, scrolling,
10
+ # and executing custom JavaScript.
11
+ #
12
+ # @example Building instructions with DSL
13
+ # instructions = Zenrows::JsInstructions.build do
14
+ # click '.load-more'
15
+ # wait 2000
16
+ # fill 'input#email', 'test@example.com'
17
+ # scroll_to :bottom
18
+ # wait_for '.results'
19
+ # end
20
+ #
21
+ # @example Using with client
22
+ # client = Zenrows::Client.new
23
+ # http = client.http(
24
+ # js_render: true,
25
+ # js_instructions: instructions
26
+ # )
27
+ #
28
+ # @author Ernest Bursa
29
+ # @since 0.1.0
30
+ # @api public
31
+ class JsInstructions
32
+ # @return [Array<Hash>] List of instructions
33
+ attr_reader :instructions
34
+
35
+ def initialize
36
+ @instructions = []
37
+ end
38
+
39
+ # Build instructions using DSL block
40
+ #
41
+ # @yield [JsInstructions] Builder instance
42
+ # @return [JsInstructions] Built instructions
43
+ #
44
+ # @example
45
+ # Zenrows::JsInstructions.build do
46
+ # click '.button'
47
+ # wait 1000
48
+ # end
49
+ def self.build(&block)
50
+ builder = new
51
+ builder.instance_eval(&block) if block
52
+ builder
53
+ end
54
+
55
+ # Click an element
56
+ #
57
+ # @param selector [String] CSS selector
58
+ # @return [self]
59
+ #
60
+ # @example
61
+ # click '.submit-button'
62
+ # click '#load-more'
63
+ def click(selector)
64
+ @instructions << {click: selector}
65
+ self
66
+ end
67
+
68
+ # Wait for a duration in milliseconds
69
+ #
70
+ # @param duration [Integer] Milliseconds to wait (max 10000)
71
+ # @return [self]
72
+ #
73
+ # @example
74
+ # wait 2000 # Wait 2 seconds
75
+ def wait(duration)
76
+ @instructions << {wait: duration}
77
+ self
78
+ end
79
+
80
+ # Wait for an element to appear
81
+ #
82
+ # @param selector [String] CSS selector
83
+ # @return [self]
84
+ #
85
+ # @example
86
+ # wait_for '.dynamic-content'
87
+ # wait_for '#results-loaded'
88
+ def wait_for(selector)
89
+ @instructions << {wait_for: selector}
90
+ self
91
+ end
92
+
93
+ # Wait for a browser event
94
+ #
95
+ # @param event [String, Symbol] Event name: networkidle, load, domcontentloaded
96
+ # @return [self]
97
+ #
98
+ # @example
99
+ # wait_event :networkidle
100
+ def wait_event(event)
101
+ @instructions << {wait_event: event.to_s}
102
+ self
103
+ end
104
+
105
+ # Fill an input field
106
+ #
107
+ # @param selector [String] CSS selector for input
108
+ # @param value [String] Value to fill
109
+ # @return [self]
110
+ #
111
+ # @example
112
+ # fill 'input#email', 'user@example.com'
113
+ # fill 'input[name="password"]', 'secret123'
114
+ def fill(selector, value)
115
+ @instructions << {fill: [selector, value]}
116
+ self
117
+ end
118
+
119
+ # Check a checkbox
120
+ #
121
+ # @param selector [String] CSS selector
122
+ # @return [self]
123
+ def check(selector)
124
+ @instructions << {check: selector}
125
+ self
126
+ end
127
+
128
+ # Uncheck a checkbox
129
+ #
130
+ # @param selector [String] CSS selector
131
+ # @return [self]
132
+ def uncheck(selector)
133
+ @instructions << {uncheck: selector}
134
+ self
135
+ end
136
+
137
+ # Select an option from dropdown
138
+ #
139
+ # @param selector [String] CSS selector for select element
140
+ # @param value [String] Option value to select
141
+ # @return [self]
142
+ #
143
+ # @example
144
+ # select_option '#country', 'US'
145
+ def select_option(selector, value)
146
+ @instructions << {select_option: [selector, value]}
147
+ self
148
+ end
149
+
150
+ # Scroll vertically
151
+ #
152
+ # @param pixels [Integer] Pixels to scroll (negative = up)
153
+ # @return [self]
154
+ #
155
+ # @example
156
+ # scroll_y 1500 # Scroll down
157
+ # scroll_y -500 # Scroll up
158
+ def scroll_y(pixels)
159
+ @instructions << {scroll_y: pixels}
160
+ self
161
+ end
162
+
163
+ # Scroll horizontally
164
+ #
165
+ # @param pixels [Integer] Pixels to scroll (negative = left)
166
+ # @return [self]
167
+ def scroll_x(pixels)
168
+ @instructions << {scroll_x: pixels}
169
+ self
170
+ end
171
+
172
+ # Scroll to position
173
+ #
174
+ # @param position [String, Symbol] :bottom or :top
175
+ # @return [self]
176
+ #
177
+ # @example
178
+ # scroll_to :bottom
179
+ # scroll_to :top
180
+ def scroll_to(position)
181
+ @instructions << {scroll_to: position.to_s}
182
+ self
183
+ end
184
+
185
+ # Execute custom JavaScript
186
+ #
187
+ # @param code [String] JavaScript code to execute
188
+ # @return [self]
189
+ #
190
+ # @example
191
+ # evaluate "window.scrollTo(0, document.body.scrollHeight)"
192
+ # evaluate "document.querySelector('.modal').remove()"
193
+ def evaluate(code)
194
+ @instructions << {evaluate: code}
195
+ self
196
+ end
197
+
198
+ # Click element inside iframe
199
+ #
200
+ # @param iframe_selector [String] CSS selector for iframe
201
+ # @param element_selector [String] CSS selector for element inside iframe
202
+ # @return [self]
203
+ def frame_click(iframe_selector, element_selector)
204
+ @instructions << {frame_click: [iframe_selector, element_selector]}
205
+ self
206
+ end
207
+
208
+ # Wait for element inside iframe
209
+ #
210
+ # @param iframe_selector [String] CSS selector for iframe
211
+ # @param element_selector [String] CSS selector for element
212
+ # @return [self]
213
+ def frame_wait_for(iframe_selector, element_selector)
214
+ @instructions << {frame_wait_for: [iframe_selector, element_selector]}
215
+ self
216
+ end
217
+
218
+ # Fill input inside iframe
219
+ #
220
+ # @param iframe_selector [String] CSS selector for iframe
221
+ # @param input_selector [String] CSS selector for input
222
+ # @param value [String] Value to fill
223
+ # @return [self]
224
+ def frame_fill(iframe_selector, input_selector, value)
225
+ @instructions << {frame_fill: [iframe_selector, input_selector, value]}
226
+ self
227
+ end
228
+
229
+ # Execute JavaScript inside iframe
230
+ #
231
+ # @param iframe_name [String] Iframe name or URL
232
+ # @param code [String] JavaScript code
233
+ # @return [self]
234
+ def frame_evaluate(iframe_name, code)
235
+ @instructions << {frame_evaluate: [iframe_name, code]}
236
+ self
237
+ end
238
+
239
+ # Convert to JSON string for API
240
+ #
241
+ # @return [String] JSON-encoded instructions
242
+ def to_json(*_args)
243
+ @instructions.to_json
244
+ end
245
+
246
+ # Convert to array
247
+ #
248
+ # @return [Array<Hash>] Instructions array
249
+ def to_a
250
+ @instructions.dup
251
+ end
252
+
253
+ # Check if there are any instructions
254
+ #
255
+ # @return [Boolean]
256
+ def empty?
257
+ @instructions.empty?
258
+ end
259
+
260
+ # Number of instructions
261
+ #
262
+ # @return [Integer]
263
+ def size
264
+ @instructions.size
265
+ end
266
+ end
267
+ end
@@ -0,0 +1,226 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cgi"
4
+ require "json"
5
+ require "securerandom"
6
+
7
+ module Zenrows
8
+ # Builds ZenRows proxy configuration
9
+ #
10
+ # ZenRows proxy encodes options in the password field of the proxy URL.
11
+ # Format: http://API_KEY-opt1=val1&opt2=val2:@host:port
12
+ #
13
+ # @example Basic proxy configuration
14
+ # proxy = Zenrows::Proxy.new(api_key: 'key', host: 'proxy.zenrows.com', port: 1337)
15
+ # proxy.build(js_render: true, premium_proxy: true)
16
+ # # => { host: 'proxy.zenrows.com', port: 1337, username: 'key', password: 'js_render=true&premium_proxy=true' }
17
+ #
18
+ # @author Ernest Bursa
19
+ # @since 0.1.0
20
+ # @api public
21
+ class Proxy
22
+ # Maximum wait time in milliseconds (3 minutes)
23
+ MAX_WAIT_MS = 180_000
24
+
25
+ # Valid sticky TTL values for HTTP proxy
26
+ VALID_STICKY_TTL = %w[30s 5m 30m 1h 1d].freeze
27
+
28
+ # Valid region codes for HTTP proxy
29
+ VALID_REGIONS = {
30
+ "africa" => "af",
31
+ "af" => "af",
32
+ "asia pacific" => "ap",
33
+ "ap" => "ap",
34
+ "europe" => "eu",
35
+ "eu" => "eu",
36
+ "middle east" => "me",
37
+ "me" => "me",
38
+ "north america" => "na",
39
+ "na" => "na",
40
+ "south america" => "sa",
41
+ "sa" => "sa",
42
+ "global" => nil
43
+ }.freeze
44
+
45
+ # @return [String] ZenRows API key
46
+ attr_reader :api_key
47
+
48
+ # @return [String] Proxy host
49
+ attr_reader :host
50
+
51
+ # @return [Integer] Proxy port
52
+ attr_reader :port
53
+
54
+ # @param api_key [String] ZenRows API key
55
+ # @param host [String] Proxy host
56
+ # @param port [Integer] Proxy port
57
+ def initialize(api_key:, host:, port:)
58
+ @api_key = api_key
59
+ @host = host
60
+ @port = port
61
+ end
62
+
63
+ # Build proxy configuration hash for HTTP client
64
+ #
65
+ # @param options [Hash] Proxy options
66
+ # @option options [Boolean] :js_render Enable JavaScript rendering
67
+ # @option options [Boolean] :premium_proxy Use residential proxies
68
+ # @option options [String] :proxy_country Country code (us, gb, de, etc.)
69
+ # @option options [Boolean, Integer] :wait Wait time (true=15s, Integer=ms)
70
+ # @option options [String] :wait_for CSS selector to wait for
71
+ # @option options [Boolean, String, Integer] :session_id Session persistence
72
+ # @option options [Integer] :window_height Browser window height
73
+ # @option options [Integer] :window_width Browser window width
74
+ # @option options [Array, String] :js_instructions JavaScript instructions
75
+ # @option options [Boolean] :json_response Return JSON instead of HTML
76
+ # @option options [Boolean] :original_status Return original HTTP status
77
+ # @option options [Boolean] :screenshot Take screenshot
78
+ # @option options [Boolean] :screenshot_fullpage Full page screenshot
79
+ # @option options [String] :screenshot_selector Screenshot specific element
80
+ # @option options [Boolean] :custom_headers Enable custom headers passthrough
81
+ # @option options [String] :block_resources Block resources (image,media,font)
82
+ # @return [Hash] Proxy configuration with :host, :port, :username, :password
83
+ # @raise [WaitTimeError] if wait time exceeds 3 minutes
84
+ def build(options = {})
85
+ opts = options.dup
86
+ proxy_params = build_params(opts)
87
+
88
+ {
89
+ host: host,
90
+ port: port,
91
+ username: api_key,
92
+ password: proxy_params.map { |k, v| "#{k}=#{v}" }.join("&")
93
+ }
94
+ end
95
+
96
+ # Build proxy URL string
97
+ #
98
+ # @param options [Hash] Proxy options (same as #build)
99
+ # @return [String] Proxy URL
100
+ def build_url(options = {})
101
+ config = build(options)
102
+ password = config[:password].empty? ? "" : config[:password]
103
+ "http://#{config[:username]}:#{password}@#{config[:host]}:#{config[:port]}"
104
+ end
105
+
106
+ # Build proxy configuration as array [host, port, username, password]
107
+ #
108
+ # @param options [Hash] Proxy options (same as #build)
109
+ # @return [Array<String, Integer, String, String>] Proxy array
110
+ def build_array(options = {})
111
+ config = build(options)
112
+ [config[:host], config[:port], config[:username], config[:password]]
113
+ end
114
+
115
+ private
116
+
117
+ # Build proxy parameters hash from options
118
+ #
119
+ # @param opts [Hash] Options hash (will be modified)
120
+ # @return [Hash] Parameters for proxy password
121
+ def build_params(opts)
122
+ params = {}
123
+
124
+ # Custom headers must be enabled first if we'll use headers
125
+ params[:custom_headers] = true if opts[:custom_headers]
126
+
127
+ # JavaScript rendering
128
+ params[:js_render] = true if opts[:js_render]
129
+
130
+ # Premium proxy
131
+ params[:premium_proxy] = true if opts[:premium_proxy]
132
+
133
+ # Wait time handling
134
+ if opts[:wait]
135
+ wait_ms = normalize_wait(opts[:wait])
136
+ raise WaitTimeError, "Wait time cannot exceed 3 minutes (#{MAX_WAIT_MS}ms), got #{wait_ms}ms" if wait_ms > MAX_WAIT_MS
137
+
138
+ params[:wait] = wait_ms
139
+ end
140
+
141
+ # Wait for selector
142
+ params[:wait_for] = opts[:wait_for] if opts[:wait_for]
143
+
144
+ # JSON response
145
+ params[:json_response] = true if opts[:json_response]
146
+
147
+ # Original status
148
+ params[:original_status] = true if opts[:original_status]
149
+
150
+ # Session ID
151
+ if opts[:session_id] == true
152
+ params[:session_id] = SecureRandom.random_number(1_000)
153
+ elsif opts[:session_id]
154
+ params[:session_id] = opts[:session_id]
155
+ end
156
+
157
+ # Window dimensions
158
+ params[:window_height] = opts[:window_height].to_i if opts[:window_height]
159
+ params[:window_width] = opts[:window_width].to_i if opts[:window_width]
160
+
161
+ # Proxy country (auto-enables premium_proxy)
162
+ if opts[:proxy_country]
163
+ params[:proxy_country] = opts[:proxy_country].to_s.downcase
164
+ params[:premium_proxy] = true
165
+ end
166
+
167
+ # JavaScript instructions
168
+ if opts[:js_instructions]
169
+ instructions = opts[:js_instructions]
170
+ instructions = instructions.to_json if instructions.is_a?(Array)
171
+ params[:js_instructions] = CGI.escape(instructions)
172
+ end
173
+
174
+ # Screenshots
175
+ params[:screenshot] = true if opts[:screenshot]
176
+ if opts[:screenshot_fullpage]
177
+ params[:screenshot] = true
178
+ params[:screenshot_fullpage] = true
179
+ end
180
+ if opts[:screenshot_selector]
181
+ params[:screenshot] = true
182
+ params[:screenshot_selector] = opts[:screenshot_selector]
183
+ end
184
+
185
+ # Block resources
186
+ params[:block_resources] = opts[:block_resources] if opts[:block_resources]
187
+
188
+ # Auto-enable js_render if needed
189
+ params[:js_render] = true if requires_js_render?(params)
190
+
191
+ params
192
+ end
193
+
194
+ # Normalize wait time to milliseconds
195
+ #
196
+ # @param wait [Boolean, Integer, Object] Wait value
197
+ # @return [Integer] Wait time in milliseconds
198
+ def normalize_wait(wait)
199
+ case wait
200
+ when true then 15_000
201
+ when Integer then wait
202
+ when ->(w) { w.respond_to?(:to_i) && w.respond_to?(:parts) }
203
+ # ActiveSupport::Duration - convert seconds to ms
204
+ wait.to_i * 1000
205
+ else
206
+ wait.to_i
207
+ end
208
+ end
209
+
210
+ # Check if JS rendering is required based on params
211
+ #
212
+ # @param params [Hash] Current parameters
213
+ # @return [Boolean] true if js_render should be enabled
214
+ def requires_js_render?(params)
215
+ params[:wait] ||
216
+ params[:wait_for] ||
217
+ params[:json_response] ||
218
+ params[:window_height] ||
219
+ params[:window_width] ||
220
+ params[:js_instructions] ||
221
+ params[:screenshot] ||
222
+ params[:screenshot_fullpage] ||
223
+ params[:screenshot_selector]
224
+ end
225
+ end
226
+ end