zenrows 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mcp.json +10 -0
- data/.tool-versions +1 -0
- data/CHANGELOG.md +45 -0
- data/CLAUDE.md +1 -1
- data/Makefile +19 -0
- data/README.md +140 -19
- data/lib/zenrows/api_client.rb +243 -0
- data/lib/zenrows/api_response.rb +185 -0
- data/lib/zenrows/backends/base.rb +31 -1
- data/lib/zenrows/backends/http_rb.rb +17 -10
- data/lib/zenrows/backends/net_http.rb +149 -0
- data/lib/zenrows/client.rb +120 -11
- data/lib/zenrows/configuration.rb +117 -0
- data/lib/zenrows/css_extractor.rb +111 -0
- data/lib/zenrows/hooks/context.rb +142 -0
- data/lib/zenrows/hooks/log_subscriber.rb +124 -0
- data/lib/zenrows/hooks.rb +213 -0
- data/lib/zenrows/instrumented_client.rb +187 -0
- data/lib/zenrows/proxy.rb +19 -0
- data/lib/zenrows/version.rb +1 -1
- data/lib/zenrows.rb +14 -2
- data/sig/manifest.yaml +5 -0
- data/sig/zenrows/api_client.rbs +18 -0
- data/sig/zenrows/api_response.rbs +28 -0
- data/sig/zenrows/backends/base.rbs +12 -0
- data/sig/zenrows/backends/http_rb.rbs +3 -0
- data/sig/zenrows/backends/net_http.rbs +28 -0
- data/sig/zenrows/backends.rbs +2 -0
- data/sig/zenrows/client.rbs +12 -0
- data/sig/zenrows/configuration.rbs +29 -0
- data/sig/zenrows/css_extractor.rbs +14 -0
- data/sig/zenrows/errors.rbs +27 -0
- data/sig/zenrows/hook_configurator.rbs +9 -0
- data/sig/zenrows/hooks/context.rbs +6 -0
- data/sig/zenrows/hooks/log_subscriber.rbs +15 -0
- data/sig/zenrows/hooks.rbs +23 -0
- data/sig/zenrows/instrumented_client.rbs +22 -0
- data/sig/zenrows/js_instructions.rbs +28 -0
- data/sig/zenrows/proxy.rbs +14 -0
- data/sig/zenrows.rbs +4 -1
- data/test/test_helper.rb +42 -0
- data/test/zenrows/api_client_test.rb +161 -0
- data/test/zenrows/api_response_test.rb +142 -0
- data/test/zenrows/client_hooks_test.rb +105 -0
- data/test/zenrows/configuration_hooks_test.rb +101 -0
- data/test/zenrows/css_extractor_test.rb +84 -0
- data/test/zenrows/hooks/context_test.rb +150 -0
- data/test/zenrows/hooks/log_subscriber_test.rb +105 -0
- data/test/zenrows/hooks_test.rb +215 -0
- data/test/zenrows/instrumented_client_test.rb +153 -0
- data/test/zenrows/js_instructions_test.rb +2 -1
- data/test/zenrows/proxy_test.rb +39 -0
- metadata +42 -4
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Zenrows
|
|
6
|
+
# Response wrapper for ZenRows API responses
|
|
7
|
+
#
|
|
8
|
+
# Provides convenient accessors for different response types based on
|
|
9
|
+
# the options used in the request.
|
|
10
|
+
#
|
|
11
|
+
# @example HTML response
|
|
12
|
+
# response = api.get(url)
|
|
13
|
+
# response.html # => "<html>..."
|
|
14
|
+
#
|
|
15
|
+
# @example JSON response with XHR data
|
|
16
|
+
# response = api.get(url, json_response: true)
|
|
17
|
+
# response.data # => { "html" => "...", "xhr" => [...] }
|
|
18
|
+
# response.html # => "<html>..."
|
|
19
|
+
# response.xhr # => [...]
|
|
20
|
+
#
|
|
21
|
+
# @example Autoparse response
|
|
22
|
+
# response = api.get(url, autoparse: true)
|
|
23
|
+
# response.parsed # => { "title" => "...", "price" => "..." }
|
|
24
|
+
#
|
|
25
|
+
# @example CSS extraction
|
|
26
|
+
# response = api.get(url, css_extractor: { title: 'h1' })
|
|
27
|
+
# response.extracted # => { "title" => "Page Title" }
|
|
28
|
+
#
|
|
29
|
+
# @example Markdown response
|
|
30
|
+
# response = api.get(url, response_type: 'markdown')
|
|
31
|
+
# response.markdown # => "# Page Title\n\n..."
|
|
32
|
+
#
|
|
33
|
+
# @author Ernest Bursa
|
|
34
|
+
# @since 0.2.0
|
|
35
|
+
# @api public
|
|
36
|
+
class ApiResponse
|
|
37
|
+
# @return [HTTP::Response] Raw HTTP response
|
|
38
|
+
attr_reader :raw
|
|
39
|
+
|
|
40
|
+
# @return [Integer] HTTP status code
|
|
41
|
+
attr_reader :status
|
|
42
|
+
|
|
43
|
+
# @return [Hash] Request options used
|
|
44
|
+
attr_reader :options
|
|
45
|
+
|
|
46
|
+
# Initialize response wrapper
|
|
47
|
+
#
|
|
48
|
+
# @param http_response [HTTP::Response] Raw HTTP response
|
|
49
|
+
# @param options [Hash] Request options
|
|
50
|
+
def initialize(http_response, options = {})
|
|
51
|
+
@raw = http_response
|
|
52
|
+
@status = http_response.status.code
|
|
53
|
+
@options = options
|
|
54
|
+
@body = http_response.body.to_s
|
|
55
|
+
@parsed_json = nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Response body as string
|
|
59
|
+
#
|
|
60
|
+
# @return [String] Raw response body
|
|
61
|
+
attr_reader :body
|
|
62
|
+
|
|
63
|
+
# Parsed data (for JSON responses)
|
|
64
|
+
#
|
|
65
|
+
# @return [Hash, Array, String] Parsed response data
|
|
66
|
+
def data
|
|
67
|
+
@parsed_json ||= parse_body
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# HTML content
|
|
71
|
+
#
|
|
72
|
+
# Returns HTML from json_response data or raw body
|
|
73
|
+
#
|
|
74
|
+
# @return [String] HTML content
|
|
75
|
+
def html
|
|
76
|
+
if json_response?
|
|
77
|
+
data.is_a?(Hash) ? data["html"] : data
|
|
78
|
+
else
|
|
79
|
+
@body
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Markdown content (when response_type: 'markdown')
|
|
84
|
+
#
|
|
85
|
+
# @return [String] Markdown content
|
|
86
|
+
def markdown
|
|
87
|
+
@body
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Parsed/extracted data (for autoparse or css_extractor)
|
|
91
|
+
#
|
|
92
|
+
# @return [Hash] Structured data
|
|
93
|
+
def parsed
|
|
94
|
+
data
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Alias for parsed data when using css_extractor
|
|
98
|
+
#
|
|
99
|
+
# @return [Hash] Extracted data
|
|
100
|
+
def extracted
|
|
101
|
+
data
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# XHR/fetch request data (when json_response: true)
|
|
105
|
+
#
|
|
106
|
+
# @return [Array, nil] XHR request data
|
|
107
|
+
def xhr
|
|
108
|
+
data.is_a?(Hash) ? data["xhr"] : nil
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# JS instructions execution report (when json_response: true)
|
|
112
|
+
#
|
|
113
|
+
# @return [Hash, nil] Instructions report
|
|
114
|
+
def js_instructions_report
|
|
115
|
+
data.is_a?(Hash) ? data["js_instructions_report"] : nil
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Screenshot data (when screenshot options used with json_response)
|
|
119
|
+
#
|
|
120
|
+
# @return [String, nil] Base64 encoded screenshot
|
|
121
|
+
def screenshot
|
|
122
|
+
data.is_a?(Hash) ? data["screenshot"] : nil
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Response headers
|
|
126
|
+
#
|
|
127
|
+
# @return [Hash] Response headers
|
|
128
|
+
def headers
|
|
129
|
+
@raw.headers.to_h
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Concurrency limit from headers
|
|
133
|
+
#
|
|
134
|
+
# @return [Integer, nil] Max concurrent requests
|
|
135
|
+
def concurrency_limit
|
|
136
|
+
headers["Concurrency-Limit"]&.to_i
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Remaining concurrency from headers
|
|
140
|
+
#
|
|
141
|
+
# @return [Integer, nil] Available concurrent request slots
|
|
142
|
+
def concurrency_remaining
|
|
143
|
+
headers["Concurrency-Remaining"]&.to_i
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Request cost from headers
|
|
147
|
+
#
|
|
148
|
+
# @return [Float, nil] Credit cost of request
|
|
149
|
+
def request_cost
|
|
150
|
+
headers["X-Request-Cost"]&.to_f
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Final URL after redirects
|
|
154
|
+
#
|
|
155
|
+
# @return [String, nil] Final URL
|
|
156
|
+
def final_url
|
|
157
|
+
headers["Zr-Final-Url"]
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Check if response is successful
|
|
161
|
+
#
|
|
162
|
+
# @return [Boolean]
|
|
163
|
+
def success?
|
|
164
|
+
status >= 200 && status < 300
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
private
|
|
168
|
+
|
|
169
|
+
def json_response?
|
|
170
|
+
options[:json_response] || options[:autoparse] || options[:css_extractor] || options[:outputs]
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def parse_body
|
|
174
|
+
return @body unless json_response? || looks_like_json?
|
|
175
|
+
|
|
176
|
+
JSON.parse(@body)
|
|
177
|
+
rescue JSON::ParserError
|
|
178
|
+
@body
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def looks_like_json?
|
|
182
|
+
@body.start_with?("{") || @body.start_with?("[")
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
@@ -18,11 +18,16 @@ module Zenrows
|
|
|
18
18
|
# @return [Zenrows::Configuration] Configuration instance
|
|
19
19
|
attr_reader :config
|
|
20
20
|
|
|
21
|
+
# @return [Zenrows::Hooks] Hook registry for this backend
|
|
22
|
+
attr_reader :hooks
|
|
23
|
+
|
|
21
24
|
# @param proxy [Zenrows::Proxy] Proxy configuration builder
|
|
22
25
|
# @param config [Zenrows::Configuration] Configuration instance
|
|
23
|
-
|
|
26
|
+
# @param hooks [Zenrows::Hooks, nil] Optional hook registry (defaults to config.hooks)
|
|
27
|
+
def initialize(proxy:, config:, hooks: nil)
|
|
24
28
|
@proxy = proxy
|
|
25
29
|
@config = config
|
|
30
|
+
@hooks = hooks || config.hooks&.dup || Hooks.new
|
|
26
31
|
end
|
|
27
32
|
|
|
28
33
|
# Build a configured HTTP client
|
|
@@ -74,6 +79,31 @@ module Zenrows
|
|
|
74
79
|
{connect: connect, read: read}
|
|
75
80
|
end
|
|
76
81
|
|
|
82
|
+
# Wrap HTTP client with instrumentation if hooks are registered
|
|
83
|
+
#
|
|
84
|
+
# @param client [Object] The underlying HTTP client
|
|
85
|
+
# @param options [Hash] Request options used for this client
|
|
86
|
+
# @return [Object] Instrumented client or original if no hooks
|
|
87
|
+
def wrap_client(client, options)
|
|
88
|
+
return client if hooks.empty?
|
|
89
|
+
|
|
90
|
+
InstrumentedClient.new(
|
|
91
|
+
client,
|
|
92
|
+
hooks: hooks,
|
|
93
|
+
context_base: {
|
|
94
|
+
options: options,
|
|
95
|
+
backend: backend_name
|
|
96
|
+
}
|
|
97
|
+
)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Get the backend name for context
|
|
101
|
+
#
|
|
102
|
+
# @return [Symbol] Backend identifier
|
|
103
|
+
def backend_name
|
|
104
|
+
:base
|
|
105
|
+
end
|
|
106
|
+
|
|
77
107
|
private
|
|
78
108
|
|
|
79
109
|
# Normalize wait value to seconds
|
|
@@ -12,7 +12,7 @@ module Zenrows
|
|
|
12
12
|
# @example Basic usage
|
|
13
13
|
# backend = Zenrows::Backends::HttpRb.new(proxy: proxy, config: config)
|
|
14
14
|
# http = backend.build_client(js_render: true)
|
|
15
|
-
# response = http.get(url
|
|
15
|
+
# response = http.get(url) # SSL context is auto-configured
|
|
16
16
|
#
|
|
17
17
|
# @author Ernest Bursa
|
|
18
18
|
# @since 0.1.0
|
|
@@ -27,7 +27,7 @@ module Zenrows
|
|
|
27
27
|
# @option options [Boolean, Integer] :wait Wait time
|
|
28
28
|
# @option options [String] :wait_for CSS selector to wait for
|
|
29
29
|
# @option options [Hash] :headers Custom HTTP headers
|
|
30
|
-
# @return [HTTP::Client] Configured HTTP client
|
|
30
|
+
# @return [HTTP::Client, InstrumentedClient] Configured HTTP client (instrumented if hooks registered)
|
|
31
31
|
def build_client(options = {})
|
|
32
32
|
opts = options.dup
|
|
33
33
|
headers = opts.delete(:headers) || {}
|
|
@@ -41,18 +41,25 @@ module Zenrows
|
|
|
41
41
|
# Calculate timeouts
|
|
42
42
|
timeouts = calculate_timeouts(opts)
|
|
43
43
|
|
|
44
|
-
# Build HTTP client
|
|
44
|
+
# Build HTTP client with SSL context and proxy
|
|
45
45
|
client = HTTP
|
|
46
46
|
.timeout(connect: timeouts[:connect], read: timeouts[:read])
|
|
47
47
|
.headers(headers)
|
|
48
|
+
.via(
|
|
49
|
+
proxy_config[:host],
|
|
50
|
+
proxy_config[:port],
|
|
51
|
+
proxy_config[:username],
|
|
52
|
+
proxy_config[:password],
|
|
53
|
+
ssl_context: ssl_context
|
|
54
|
+
)
|
|
48
55
|
|
|
49
|
-
#
|
|
50
|
-
client
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
+
# Wrap with instrumentation if hooks registered
|
|
57
|
+
wrap_client(client, opts)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @return [Symbol] Backend identifier
|
|
61
|
+
def backend_name
|
|
62
|
+
:http_rb
|
|
56
63
|
end
|
|
57
64
|
end
|
|
58
65
|
end
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "openssl"
|
|
6
|
+
|
|
7
|
+
module Zenrows
|
|
8
|
+
module Backends
|
|
9
|
+
# Net::HTTP backend adapter (stdlib fallback)
|
|
10
|
+
#
|
|
11
|
+
# Uses Ruby's built-in Net::HTTP when http.rb is not available.
|
|
12
|
+
# Provides basic proxy support with SSL verification disabled.
|
|
13
|
+
#
|
|
14
|
+
# @example Basic usage
|
|
15
|
+
# backend = Zenrows::Backends::NetHttp.new(proxy: proxy, config: config)
|
|
16
|
+
# http = backend.build_client(js_render: true)
|
|
17
|
+
# response = http.get(url)
|
|
18
|
+
#
|
|
19
|
+
# @author Ernest Bursa
|
|
20
|
+
# @since 0.2.1
|
|
21
|
+
# @api public
|
|
22
|
+
class NetHttp < Base
|
|
23
|
+
# Build a configured HTTP client wrapper
|
|
24
|
+
#
|
|
25
|
+
# @param options [Hash] Request options
|
|
26
|
+
# @return [NetHttpClient, InstrumentedClient] Configured client wrapper (instrumented if hooks registered)
|
|
27
|
+
def build_client(options = {})
|
|
28
|
+
opts = options.dup
|
|
29
|
+
headers = opts.delete(:headers) || {}
|
|
30
|
+
opts[:custom_headers] = true if headers.any?
|
|
31
|
+
|
|
32
|
+
proxy_config = proxy.build(opts)
|
|
33
|
+
timeouts = calculate_timeouts(opts)
|
|
34
|
+
|
|
35
|
+
client = NetHttpClient.new(
|
|
36
|
+
proxy_config: proxy_config,
|
|
37
|
+
headers: headers,
|
|
38
|
+
timeouts: timeouts,
|
|
39
|
+
ssl_context: ssl_context
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Wrap with instrumentation if hooks registered
|
|
43
|
+
wrap_client(client, opts)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# @return [Symbol] Backend identifier
|
|
47
|
+
def backend_name
|
|
48
|
+
:net_http
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Wrapper around Net::HTTP that mimics http.rb interface
|
|
53
|
+
#
|
|
54
|
+
# @api private
|
|
55
|
+
class NetHttpClient
|
|
56
|
+
# @param proxy_config [Hash] Proxy configuration
|
|
57
|
+
# @param headers [Hash] Default headers
|
|
58
|
+
# @param timeouts [Hash] Timeout configuration
|
|
59
|
+
# @param ssl_context [OpenSSL::SSL::SSLContext] SSL context
|
|
60
|
+
def initialize(proxy_config:, headers:, timeouts:, ssl_context:)
|
|
61
|
+
@proxy_config = proxy_config
|
|
62
|
+
@headers = headers
|
|
63
|
+
@timeouts = timeouts
|
|
64
|
+
@ssl_context = ssl_context
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Make GET request
|
|
68
|
+
#
|
|
69
|
+
# @param url [String] Target URL
|
|
70
|
+
# @param options [Hash] Request options
|
|
71
|
+
# @return [NetHttpResponse] Response wrapper
|
|
72
|
+
def get(url, **options)
|
|
73
|
+
uri = URI.parse(url)
|
|
74
|
+
request(uri, Net::HTTP::Get.new(uri), options)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Make POST request
|
|
78
|
+
#
|
|
79
|
+
# @param url [String] Target URL
|
|
80
|
+
# @param body [String, nil] Request body
|
|
81
|
+
# @param options [Hash] Request options
|
|
82
|
+
# @return [NetHttpResponse] Response wrapper
|
|
83
|
+
def post(url, body: nil, **options)
|
|
84
|
+
uri = URI.parse(url)
|
|
85
|
+
req = Net::HTTP::Post.new(uri)
|
|
86
|
+
req.body = body if body
|
|
87
|
+
request(uri, req, options)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
def request(uri, req, options)
|
|
93
|
+
@headers.each { |k, v| req[k] = v }
|
|
94
|
+
|
|
95
|
+
http = Net::HTTP.new(
|
|
96
|
+
uri.host,
|
|
97
|
+
uri.port,
|
|
98
|
+
@proxy_config[:host],
|
|
99
|
+
@proxy_config[:port],
|
|
100
|
+
@proxy_config[:username],
|
|
101
|
+
@proxy_config[:password]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
http.use_ssl = uri.scheme == "https"
|
|
105
|
+
http.open_timeout = @timeouts[:connect]
|
|
106
|
+
http.read_timeout = @timeouts[:read]
|
|
107
|
+
|
|
108
|
+
# Apply SSL context
|
|
109
|
+
ctx = options[:ssl_context] || @ssl_context
|
|
110
|
+
http.verify_mode = ctx.verify_mode if ctx
|
|
111
|
+
|
|
112
|
+
response = http.request(req)
|
|
113
|
+
NetHttpResponse.new(response)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Response wrapper that mimics http.rb response interface
|
|
118
|
+
#
|
|
119
|
+
# @api private
|
|
120
|
+
class NetHttpResponse
|
|
121
|
+
# @return [Net::HTTPResponse] Raw response
|
|
122
|
+
attr_reader :raw
|
|
123
|
+
|
|
124
|
+
def initialize(response)
|
|
125
|
+
@raw = response
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# @return [String] Response body
|
|
129
|
+
def body
|
|
130
|
+
@raw.body
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# @return [Integer] HTTP status code
|
|
134
|
+
def status
|
|
135
|
+
@raw.code.to_i
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# @return [Hash] Response headers
|
|
139
|
+
def headers
|
|
140
|
+
@raw.to_hash.transform_values(&:first)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Alias for body (http.rb compatibility)
|
|
144
|
+
def to_s
|
|
145
|
+
body
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
data/lib/zenrows/client.rb
CHANGED
|
@@ -13,12 +13,17 @@ module Zenrows
|
|
|
13
13
|
#
|
|
14
14
|
# client = Zenrows::Client.new
|
|
15
15
|
# http = client.http(js_render: true)
|
|
16
|
-
# response = http.get('https://example.com'
|
|
16
|
+
# response = http.get('https://example.com')
|
|
17
17
|
#
|
|
18
18
|
# @example With custom configuration
|
|
19
19
|
# client = Zenrows::Client.new(api_key: 'KEY', host: 'proxy.zenrows.com')
|
|
20
20
|
# http = client.http(premium_proxy: true, proxy_country: 'us')
|
|
21
21
|
#
|
|
22
|
+
# @example With per-client hooks
|
|
23
|
+
# client = Zenrows::Client.new do |c|
|
|
24
|
+
# c.on_response { |resp, ctx| puts "#{ctx[:host]} -> #{resp.status}" }
|
|
25
|
+
# end
|
|
26
|
+
#
|
|
22
27
|
# @author Ernest Bursa
|
|
23
28
|
# @since 0.1.0
|
|
24
29
|
# @api public
|
|
@@ -32,17 +37,30 @@ module Zenrows
|
|
|
32
37
|
# @return [Backends::Base] HTTP backend instance
|
|
33
38
|
attr_reader :backend
|
|
34
39
|
|
|
40
|
+
# @return [Hooks] Hook registry for this client
|
|
41
|
+
attr_reader :hooks
|
|
42
|
+
|
|
35
43
|
# Initialize a new client
|
|
36
44
|
#
|
|
37
45
|
# @param api_key [String, nil] Override API key from global config
|
|
38
46
|
# @param host [String, nil] Override proxy host
|
|
39
47
|
# @param port [Integer, nil] Override proxy port
|
|
40
48
|
# @param backend [Symbol] Backend to use (:http_rb)
|
|
49
|
+
# @yield [config] Optional block for per-client configuration (hooks, etc.)
|
|
50
|
+
# @yieldparam config [Configuration] Client configuration for hook registration
|
|
41
51
|
# @raise [ConfigurationError] if api_key is not configured
|
|
42
|
-
|
|
52
|
+
#
|
|
53
|
+
# @example With per-client hooks
|
|
54
|
+
# client = Zenrows::Client.new do |c|
|
|
55
|
+
# c.on_response { |resp, ctx| puts resp.status }
|
|
56
|
+
# end
|
|
57
|
+
def initialize(api_key: nil, host: nil, port: nil, backend: nil, &block)
|
|
43
58
|
@config = build_config(api_key: api_key, host: host, port: port, backend: backend)
|
|
44
59
|
@config.validate!
|
|
45
60
|
|
|
61
|
+
# Build hooks: start with global, allow per-client additions
|
|
62
|
+
@hooks = block ? build_hooks(&block) : Zenrows.configuration.hooks.dup
|
|
63
|
+
|
|
46
64
|
@proxy = Proxy.new(
|
|
47
65
|
api_key: @config.api_key,
|
|
48
66
|
host: @config.host,
|
|
@@ -74,11 +92,11 @@ module Zenrows
|
|
|
74
92
|
#
|
|
75
93
|
# @example Basic request
|
|
76
94
|
# http = client.http(js_render: true)
|
|
77
|
-
# response = http.get(url
|
|
95
|
+
# response = http.get(url)
|
|
78
96
|
#
|
|
79
97
|
# @example With premium proxy and country
|
|
80
98
|
# http = client.http(premium_proxy: true, proxy_country: 'us')
|
|
81
|
-
# response = http.get(url
|
|
99
|
+
# response = http.get(url)
|
|
82
100
|
def http(options = {})
|
|
83
101
|
backend.build_client(options)
|
|
84
102
|
end
|
|
@@ -86,12 +104,10 @@ module Zenrows
|
|
|
86
104
|
# Get SSL context for proxy connections
|
|
87
105
|
#
|
|
88
106
|
# ZenRows proxy requires SSL verification to be disabled.
|
|
107
|
+
# This is automatically applied when using #http, but exposed
|
|
108
|
+
# for advanced use cases.
|
|
89
109
|
#
|
|
90
110
|
# @return [OpenSSL::SSL::SSLContext] SSL context
|
|
91
|
-
#
|
|
92
|
-
# @example
|
|
93
|
-
# http = client.http(js_render: true)
|
|
94
|
-
# response = http.get(url, ssl_context: client.ssl_context)
|
|
95
111
|
def ssl_context
|
|
96
112
|
backend.ssl_context
|
|
97
113
|
end
|
|
@@ -148,12 +164,105 @@ module Zenrows
|
|
|
148
164
|
# @return [Backends::Base] Backend instance
|
|
149
165
|
# @raise [ConfigurationError] if backend is not supported
|
|
150
166
|
def build_backend
|
|
151
|
-
|
|
167
|
+
backend_name = resolve_backend
|
|
168
|
+
case backend_name
|
|
152
169
|
when :http_rb
|
|
153
|
-
Backends::HttpRb.new(proxy: proxy, config: config)
|
|
170
|
+
Backends::HttpRb.new(proxy: proxy, config: config, hooks: hooks)
|
|
171
|
+
when :net_http
|
|
172
|
+
Backends::NetHttp.new(proxy: proxy, config: config, hooks: hooks)
|
|
154
173
|
else
|
|
155
|
-
raise ConfigurationError, "Unsupported backend: #{
|
|
174
|
+
raise ConfigurationError, "Unsupported backend: #{backend_name}. Use :http_rb or :net_http"
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Build hooks registry for this client
|
|
179
|
+
#
|
|
180
|
+
# Starts with global hooks, then applies per-client hooks from block.
|
|
181
|
+
#
|
|
182
|
+
# @yield [config] Block for registering per-client hooks
|
|
183
|
+
# @return [Hooks] Combined hooks registry
|
|
184
|
+
def build_hooks
|
|
185
|
+
# Start with a copy of global hooks
|
|
186
|
+
client_hooks = Zenrows.configuration.hooks.dup
|
|
187
|
+
|
|
188
|
+
# Create a temporary config-like object for hook registration
|
|
189
|
+
hook_config = HookConfigurator.new(client_hooks)
|
|
190
|
+
yield(hook_config)
|
|
191
|
+
|
|
192
|
+
client_hooks
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Resolve which backend to use
|
|
196
|
+
#
|
|
197
|
+
# @return [Symbol] Backend name
|
|
198
|
+
def resolve_backend
|
|
199
|
+
return config.backend if config.backend == :net_http
|
|
200
|
+
|
|
201
|
+
# Try http_rb first (preferred), fallback to net_http
|
|
202
|
+
if config.backend == :http_rb
|
|
203
|
+
return :http_rb if http_rb_available?
|
|
204
|
+
return :net_http
|
|
156
205
|
end
|
|
206
|
+
|
|
207
|
+
config.backend
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Check if http.rb gem is available
|
|
211
|
+
#
|
|
212
|
+
# @return [Boolean]
|
|
213
|
+
def http_rb_available?
|
|
214
|
+
require "http"
|
|
215
|
+
true
|
|
216
|
+
rescue LoadError
|
|
217
|
+
false
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Helper class for per-client hook configuration
|
|
222
|
+
#
|
|
223
|
+
# Provides the same hook registration DSL as Configuration.
|
|
224
|
+
#
|
|
225
|
+
# @api private
|
|
226
|
+
class HookConfigurator
|
|
227
|
+
# @param hooks [Hooks] Hook registry to configure
|
|
228
|
+
def initialize(hooks)
|
|
229
|
+
@hooks = hooks
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Register a before_request callback
|
|
233
|
+
def before_request(callable = nil, &block)
|
|
234
|
+
@hooks.register(:before_request, callable, &block)
|
|
235
|
+
self
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Register an after_request callback
|
|
239
|
+
def after_request(callable = nil, &block)
|
|
240
|
+
@hooks.register(:after_request, callable, &block)
|
|
241
|
+
self
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Register an on_response callback
|
|
245
|
+
def on_response(callable = nil, &block)
|
|
246
|
+
@hooks.register(:on_response, callable, &block)
|
|
247
|
+
self
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Register an on_error callback
|
|
251
|
+
def on_error(callable = nil, &block)
|
|
252
|
+
@hooks.register(:on_error, callable, &block)
|
|
253
|
+
self
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Register an around_request callback
|
|
257
|
+
def around_request(callable = nil, &block)
|
|
258
|
+
@hooks.register(:around_request, callable, &block)
|
|
259
|
+
self
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Add a subscriber object
|
|
263
|
+
def add_subscriber(subscriber)
|
|
264
|
+
@hooks.add_subscriber(subscriber)
|
|
265
|
+
self
|
|
157
266
|
end
|
|
158
267
|
end
|
|
159
268
|
end
|