zenrows 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mcp.json +10 -0
- data/.tool-versions +1 -0
- data/CHANGELOG.md +28 -0
- data/CLAUDE.md +1 -1
- data/Makefile +19 -0
- data/README.md +100 -19
- data/lib/zenrows/api_client.rb +180 -0
- data/lib/zenrows/api_response.rb +185 -0
- data/lib/zenrows/backends/http_rb.rb +10 -11
- data/lib/zenrows/backends/net_http.rb +141 -0
- data/lib/zenrows/client.rb +35 -9
- data/lib/zenrows/configuration.rb +6 -0
- data/lib/zenrows/css_extractor.rb +111 -0
- data/lib/zenrows/proxy.rb +19 -0
- data/lib/zenrows/version.rb +1 -1
- data/lib/zenrows.rb +10 -2
- data/sig/manifest.yaml +5 -0
- data/sig/zenrows/api_client.rbs +15 -0
- data/sig/zenrows/api_response.rbs +28 -0
- data/sig/zenrows/backends/base.rbs +9 -0
- data/sig/zenrows/backends/http_rb.rbs +3 -0
- data/sig/zenrows/backends/net_http.rbs +28 -0
- data/sig/zenrows/backends.rbs +2 -0
- data/sig/zenrows/client.rbs +11 -0
- data/sig/zenrows/configuration.rbs +20 -0
- data/sig/zenrows/css_extractor.rbs +14 -0
- data/sig/zenrows/errors.rbs +27 -0
- data/sig/zenrows/js_instructions.rbs +28 -0
- data/sig/zenrows/proxy.rbs +14 -0
- data/sig/zenrows.rbs +4 -1
- data/test/zenrows/api_client_test.rb +161 -0
- data/test/zenrows/api_response_test.rb +142 -0
- data/test/zenrows/css_extractor_test.rb +84 -0
- data/test/zenrows/js_instructions_test.rb +2 -1
- data/test/zenrows/proxy_test.rb +39 -0
- metadata +25 -2
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "openssl"
|
|
6
|
+
|
|
7
|
+
module Zenrows
|
|
8
|
+
module Backends
|
|
9
|
+
# Net::HTTP backend adapter (stdlib fallback)
|
|
10
|
+
#
|
|
11
|
+
# Uses Ruby's built-in Net::HTTP when http.rb is not available.
|
|
12
|
+
# Provides basic proxy support with SSL verification disabled.
|
|
13
|
+
#
|
|
14
|
+
# @example Basic usage
|
|
15
|
+
# backend = Zenrows::Backends::NetHttp.new(proxy: proxy, config: config)
|
|
16
|
+
# http = backend.build_client(js_render: true)
|
|
17
|
+
# response = http.get(url)
|
|
18
|
+
#
|
|
19
|
+
# @author Ernest Bursa
|
|
20
|
+
# @since 0.2.1
|
|
21
|
+
# @api public
|
|
22
|
+
class NetHttp < Base
|
|
23
|
+
# Build a configured HTTP client wrapper
|
|
24
|
+
#
|
|
25
|
+
# @param options [Hash] Request options
|
|
26
|
+
# @return [NetHttpClient] Configured client wrapper
|
|
27
|
+
def build_client(options = {})
|
|
28
|
+
opts = options.dup
|
|
29
|
+
headers = opts.delete(:headers) || {}
|
|
30
|
+
opts[:custom_headers] = true if headers.any?
|
|
31
|
+
|
|
32
|
+
proxy_config = proxy.build(opts)
|
|
33
|
+
timeouts = calculate_timeouts(opts)
|
|
34
|
+
|
|
35
|
+
NetHttpClient.new(
|
|
36
|
+
proxy_config: proxy_config,
|
|
37
|
+
headers: headers,
|
|
38
|
+
timeouts: timeouts,
|
|
39
|
+
ssl_context: ssl_context
|
|
40
|
+
)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Wrapper around Net::HTTP that mimics http.rb interface
|
|
45
|
+
#
|
|
46
|
+
# @api private
|
|
47
|
+
class NetHttpClient
|
|
48
|
+
# @param proxy_config [Hash] Proxy configuration
|
|
49
|
+
# @param headers [Hash] Default headers
|
|
50
|
+
# @param timeouts [Hash] Timeout configuration
|
|
51
|
+
# @param ssl_context [OpenSSL::SSL::SSLContext] SSL context
|
|
52
|
+
def initialize(proxy_config:, headers:, timeouts:, ssl_context:)
|
|
53
|
+
@proxy_config = proxy_config
|
|
54
|
+
@headers = headers
|
|
55
|
+
@timeouts = timeouts
|
|
56
|
+
@ssl_context = ssl_context
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Make GET request
|
|
60
|
+
#
|
|
61
|
+
# @param url [String] Target URL
|
|
62
|
+
# @param options [Hash] Request options
|
|
63
|
+
# @return [NetHttpResponse] Response wrapper
|
|
64
|
+
def get(url, **options)
|
|
65
|
+
uri = URI.parse(url)
|
|
66
|
+
request(uri, Net::HTTP::Get.new(uri), options)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Make POST request
|
|
70
|
+
#
|
|
71
|
+
# @param url [String] Target URL
|
|
72
|
+
# @param body [String, nil] Request body
|
|
73
|
+
# @param options [Hash] Request options
|
|
74
|
+
# @return [NetHttpResponse] Response wrapper
|
|
75
|
+
def post(url, body: nil, **options)
|
|
76
|
+
uri = URI.parse(url)
|
|
77
|
+
req = Net::HTTP::Post.new(uri)
|
|
78
|
+
req.body = body if body
|
|
79
|
+
request(uri, req, options)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def request(uri, req, options)
|
|
85
|
+
@headers.each { |k, v| req[k] = v }
|
|
86
|
+
|
|
87
|
+
http = Net::HTTP.new(
|
|
88
|
+
uri.host,
|
|
89
|
+
uri.port,
|
|
90
|
+
@proxy_config[:host],
|
|
91
|
+
@proxy_config[:port],
|
|
92
|
+
@proxy_config[:username],
|
|
93
|
+
@proxy_config[:password]
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
http.use_ssl = uri.scheme == "https"
|
|
97
|
+
http.open_timeout = @timeouts[:connect]
|
|
98
|
+
http.read_timeout = @timeouts[:read]
|
|
99
|
+
|
|
100
|
+
# Apply SSL context
|
|
101
|
+
ctx = options[:ssl_context] || @ssl_context
|
|
102
|
+
http.verify_mode = ctx.verify_mode if ctx
|
|
103
|
+
|
|
104
|
+
response = http.request(req)
|
|
105
|
+
NetHttpResponse.new(response)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Response wrapper that mimics http.rb response interface
|
|
110
|
+
#
|
|
111
|
+
# @api private
|
|
112
|
+
class NetHttpResponse
|
|
113
|
+
# @return [Net::HTTPResponse] Raw response
|
|
114
|
+
attr_reader :raw
|
|
115
|
+
|
|
116
|
+
def initialize(response)
|
|
117
|
+
@raw = response
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# @return [String] Response body
|
|
121
|
+
def body
|
|
122
|
+
@raw.body
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# @return [Integer] HTTP status code
|
|
126
|
+
def status
|
|
127
|
+
@raw.code.to_i
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# @return [Hash] Response headers
|
|
131
|
+
def headers
|
|
132
|
+
@raw.to_hash.transform_values(&:first)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Alias for body (http.rb compatibility)
|
|
136
|
+
def to_s
|
|
137
|
+
body
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
data/lib/zenrows/client.rb
CHANGED
|
@@ -13,7 +13,7 @@ module Zenrows
|
|
|
13
13
|
#
|
|
14
14
|
# client = Zenrows::Client.new
|
|
15
15
|
# http = client.http(js_render: true)
|
|
16
|
-
# response = http.get('https://example.com'
|
|
16
|
+
# response = http.get('https://example.com')
|
|
17
17
|
#
|
|
18
18
|
# @example With custom configuration
|
|
19
19
|
# client = Zenrows::Client.new(api_key: 'KEY', host: 'proxy.zenrows.com')
|
|
@@ -74,11 +74,11 @@ module Zenrows
|
|
|
74
74
|
#
|
|
75
75
|
# @example Basic request
|
|
76
76
|
# http = client.http(js_render: true)
|
|
77
|
-
# response = http.get(url
|
|
77
|
+
# response = http.get(url)
|
|
78
78
|
#
|
|
79
79
|
# @example With premium proxy and country
|
|
80
80
|
# http = client.http(premium_proxy: true, proxy_country: 'us')
|
|
81
|
-
# response = http.get(url
|
|
81
|
+
# response = http.get(url)
|
|
82
82
|
def http(options = {})
|
|
83
83
|
backend.build_client(options)
|
|
84
84
|
end
|
|
@@ -86,12 +86,10 @@ module Zenrows
|
|
|
86
86
|
# Get SSL context for proxy connections
|
|
87
87
|
#
|
|
88
88
|
# ZenRows proxy requires SSL verification to be disabled.
|
|
89
|
+
# This is automatically applied when using #http, but exposed
|
|
90
|
+
# for advanced use cases.
|
|
89
91
|
#
|
|
90
92
|
# @return [OpenSSL::SSL::SSLContext] SSL context
|
|
91
|
-
#
|
|
92
|
-
# @example
|
|
93
|
-
# http = client.http(js_render: true)
|
|
94
|
-
# response = http.get(url, ssl_context: client.ssl_context)
|
|
95
93
|
def ssl_context
|
|
96
94
|
backend.ssl_context
|
|
97
95
|
end
|
|
@@ -148,12 +146,40 @@ module Zenrows
|
|
|
148
146
|
# @return [Backends::Base] Backend instance
|
|
149
147
|
# @raise [ConfigurationError] if backend is not supported
|
|
150
148
|
def build_backend
|
|
151
|
-
|
|
149
|
+
backend_name = resolve_backend
|
|
150
|
+
case backend_name
|
|
152
151
|
when :http_rb
|
|
153
152
|
Backends::HttpRb.new(proxy: proxy, config: config)
|
|
153
|
+
when :net_http
|
|
154
|
+
Backends::NetHttp.new(proxy: proxy, config: config)
|
|
154
155
|
else
|
|
155
|
-
raise ConfigurationError, "Unsupported backend: #{
|
|
156
|
+
raise ConfigurationError, "Unsupported backend: #{backend_name}. Use :http_rb or :net_http"
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Resolve which backend to use
|
|
161
|
+
#
|
|
162
|
+
# @return [Symbol] Backend name
|
|
163
|
+
def resolve_backend
|
|
164
|
+
return config.backend if config.backend == :net_http
|
|
165
|
+
|
|
166
|
+
# Try http_rb first (preferred), fallback to net_http
|
|
167
|
+
if config.backend == :http_rb
|
|
168
|
+
return :http_rb if http_rb_available?
|
|
169
|
+
return :net_http
|
|
156
170
|
end
|
|
171
|
+
|
|
172
|
+
config.backend
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Check if http.rb gem is available
|
|
176
|
+
#
|
|
177
|
+
# @return [Boolean]
|
|
178
|
+
def http_rb_available?
|
|
179
|
+
require "http"
|
|
180
|
+
true
|
|
181
|
+
rescue LoadError
|
|
182
|
+
false
|
|
157
183
|
end
|
|
158
184
|
end
|
|
159
185
|
end
|
|
@@ -42,10 +42,14 @@ module Zenrows
|
|
|
42
42
|
# @return [Logger, nil] Logger instance for debug output
|
|
43
43
|
attr_accessor :logger
|
|
44
44
|
|
|
45
|
+
# @return [String] ZenRows API endpoint for ApiClient
|
|
46
|
+
attr_accessor :api_endpoint
|
|
47
|
+
|
|
45
48
|
# Default configuration values
|
|
46
49
|
DEFAULTS = {
|
|
47
50
|
host: "superproxy.zenrows.com",
|
|
48
51
|
port: 1337,
|
|
52
|
+
api_endpoint: "https://api.zenrows.com/v1/",
|
|
49
53
|
connect_timeout: 5,
|
|
50
54
|
read_timeout: 180,
|
|
51
55
|
backend: :http_rb
|
|
@@ -64,6 +68,7 @@ module Zenrows
|
|
|
64
68
|
@api_key = nil
|
|
65
69
|
@host = DEFAULTS[:host]
|
|
66
70
|
@port = DEFAULTS[:port]
|
|
71
|
+
@api_endpoint = DEFAULTS[:api_endpoint]
|
|
67
72
|
@connect_timeout = DEFAULTS[:connect_timeout]
|
|
68
73
|
@read_timeout = DEFAULTS[:read_timeout]
|
|
69
74
|
@backend = DEFAULTS[:backend]
|
|
@@ -99,6 +104,7 @@ module Zenrows
|
|
|
99
104
|
api_key: api_key,
|
|
100
105
|
host: host,
|
|
101
106
|
port: port,
|
|
107
|
+
api_endpoint: api_endpoint,
|
|
102
108
|
connect_timeout: connect_timeout,
|
|
103
109
|
read_timeout: read_timeout,
|
|
104
110
|
backend: backend
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Zenrows
|
|
6
|
+
# DSL for building CSS extraction rules
|
|
7
|
+
#
|
|
8
|
+
# Provides a clean interface for defining CSS selectors to extract
|
|
9
|
+
# data from web pages using the ZenRows API.
|
|
10
|
+
#
|
|
11
|
+
# @example Basic extraction
|
|
12
|
+
# extractor = Zenrows::CssExtractor.build do
|
|
13
|
+
# extract :title, 'h1'
|
|
14
|
+
# extract :description, 'meta[name="description"]', attribute: 'content'
|
|
15
|
+
# end
|
|
16
|
+
#
|
|
17
|
+
# @example With attribute extraction
|
|
18
|
+
# extractor = Zenrows::CssExtractor.build do
|
|
19
|
+
# extract :links, 'a.product-link', attribute: 'href'
|
|
20
|
+
# extract :images, 'img.product-image', attribute: 'src'
|
|
21
|
+
# end
|
|
22
|
+
#
|
|
23
|
+
# @example Using with ApiClient
|
|
24
|
+
# api = Zenrows::ApiClient.new
|
|
25
|
+
# response = api.get(url, css_extractor: extractor)
|
|
26
|
+
# response.extracted # => { "title" => "...", "links" => [...] }
|
|
27
|
+
#
|
|
28
|
+
# @author Ernest Bursa
|
|
29
|
+
# @since 0.2.0
|
|
30
|
+
# @api public
|
|
31
|
+
class CssExtractor
|
|
32
|
+
# @return [Hash{Symbol => String}] Extraction rules
|
|
33
|
+
attr_reader :rules
|
|
34
|
+
|
|
35
|
+
# Build extractor using DSL block
|
|
36
|
+
#
|
|
37
|
+
# @yield [extractor] Block for defining extraction rules
|
|
38
|
+
# @return [CssExtractor] Configured extractor
|
|
39
|
+
def self.build(&block)
|
|
40
|
+
new.tap { |e| e.instance_eval(&block) }
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Initialize empty extractor
|
|
44
|
+
def initialize
|
|
45
|
+
@rules = {}
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Define extraction rule
|
|
49
|
+
#
|
|
50
|
+
# @param name [Symbol, String] Key for extracted data
|
|
51
|
+
# @param selector [String] CSS selector
|
|
52
|
+
# @param attribute [String, nil] Attribute to extract (nil for text content)
|
|
53
|
+
# @return [self] For chaining
|
|
54
|
+
#
|
|
55
|
+
# @example Extract text content
|
|
56
|
+
# extract :title, 'h1'
|
|
57
|
+
#
|
|
58
|
+
# @example Extract attribute
|
|
59
|
+
# extract :link, 'a.main', attribute: 'href'
|
|
60
|
+
def extract(name, selector, attribute: nil)
|
|
61
|
+
@rules[name.to_sym] = attribute ? "#{selector} @#{attribute}" : selector
|
|
62
|
+
self
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Add rule for extracting href attributes
|
|
66
|
+
#
|
|
67
|
+
# @param name [Symbol, String] Key for extracted data
|
|
68
|
+
# @param selector [String] CSS selector for anchor elements
|
|
69
|
+
# @return [self] For chaining
|
|
70
|
+
def links(name, selector)
|
|
71
|
+
extract(name, selector, attribute: "href")
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Add rule for extracting src attributes
|
|
75
|
+
#
|
|
76
|
+
# @param name [Symbol, String] Key for extracted data
|
|
77
|
+
# @param selector [String] CSS selector for elements with src
|
|
78
|
+
# @return [self] For chaining
|
|
79
|
+
def images(name, selector)
|
|
80
|
+
extract(name, selector, attribute: "src")
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Convert to hash
|
|
84
|
+
#
|
|
85
|
+
# @return [Hash{Symbol => String}] Rules hash
|
|
86
|
+
def to_h
|
|
87
|
+
@rules
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Convert to JSON string for API
|
|
91
|
+
#
|
|
92
|
+
# @return [String] JSON representation
|
|
93
|
+
def to_json(*)
|
|
94
|
+
@rules.transform_keys(&:to_s).to_json
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Check if extractor has rules
|
|
98
|
+
#
|
|
99
|
+
# @return [Boolean]
|
|
100
|
+
def empty?
|
|
101
|
+
@rules.empty?
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Number of extraction rules
|
|
105
|
+
#
|
|
106
|
+
# @return [Integer]
|
|
107
|
+
def size
|
|
108
|
+
@rules.size
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
data/lib/zenrows/proxy.rb
CHANGED
|
@@ -79,8 +79,12 @@ module Zenrows
|
|
|
79
79
|
# @option options [String] :screenshot_selector Screenshot specific element
|
|
80
80
|
# @option options [Boolean] :custom_headers Enable custom headers passthrough
|
|
81
81
|
# @option options [String] :block_resources Block resources (image,media,font)
|
|
82
|
+
# @option options [String] :device Device emulation ('mobile' or 'desktop')
|
|
83
|
+
# @option options [Boolean] :antibot Enhanced antibot bypass mode
|
|
84
|
+
# @option options [String] :session_ttl Session duration ('30s', '5m', '30m', '1h', '1d')
|
|
82
85
|
# @return [Hash] Proxy configuration with :host, :port, :username, :password
|
|
83
86
|
# @raise [WaitTimeError] if wait time exceeds 3 minutes
|
|
87
|
+
# @raise [ArgumentError] if session_ttl is invalid
|
|
84
88
|
def build(options = {})
|
|
85
89
|
opts = options.dup
|
|
86
90
|
proxy_params = build_params(opts)
|
|
@@ -185,6 +189,21 @@ module Zenrows
|
|
|
185
189
|
# Block resources
|
|
186
190
|
params[:block_resources] = opts[:block_resources] if opts[:block_resources]
|
|
187
191
|
|
|
192
|
+
# Device emulation
|
|
193
|
+
params[:device] = opts[:device].to_s if opts[:device]
|
|
194
|
+
|
|
195
|
+
# Antibot bypass
|
|
196
|
+
params[:antibot] = true if opts[:antibot]
|
|
197
|
+
|
|
198
|
+
# Session TTL (duration)
|
|
199
|
+
if opts[:session_ttl]
|
|
200
|
+
ttl = opts[:session_ttl].to_s
|
|
201
|
+
unless VALID_STICKY_TTL.include?(ttl)
|
|
202
|
+
raise ArgumentError, "Invalid session_ttl: #{ttl}. Valid values: #{VALID_STICKY_TTL.join(", ")}"
|
|
203
|
+
end
|
|
204
|
+
params[:session_ttl] = ttl
|
|
205
|
+
end
|
|
206
|
+
|
|
188
207
|
# Auto-enable js_render if needed
|
|
189
208
|
params[:js_render] = true if requires_js_render?(params)
|
|
190
209
|
|
data/lib/zenrows/version.rb
CHANGED
data/lib/zenrows.rb
CHANGED
|
@@ -6,8 +6,16 @@ require_relative "zenrows/configuration"
|
|
|
6
6
|
require_relative "zenrows/proxy"
|
|
7
7
|
require_relative "zenrows/js_instructions"
|
|
8
8
|
require_relative "zenrows/backends/base"
|
|
9
|
-
require_relative "zenrows/backends/
|
|
9
|
+
require_relative "zenrows/backends/net_http"
|
|
10
|
+
begin
|
|
11
|
+
require_relative "zenrows/backends/http_rb"
|
|
12
|
+
rescue LoadError
|
|
13
|
+
# http.rb not available, will use net_http fallback
|
|
14
|
+
end
|
|
10
15
|
require_relative "zenrows/client"
|
|
16
|
+
require_relative "zenrows/css_extractor"
|
|
17
|
+
require_relative "zenrows/api_response"
|
|
18
|
+
require_relative "zenrows/api_client"
|
|
11
19
|
|
|
12
20
|
# ZenRows Ruby client for web scraping proxy
|
|
13
21
|
#
|
|
@@ -18,7 +26,7 @@ require_relative "zenrows/client"
|
|
|
18
26
|
#
|
|
19
27
|
# client = Zenrows::Client.new
|
|
20
28
|
# http = client.http(js_render: true, premium_proxy: true)
|
|
21
|
-
# response = http.get('https://example.com'
|
|
29
|
+
# response = http.get('https://example.com')
|
|
22
30
|
#
|
|
23
31
|
# @example With JavaScript instructions
|
|
24
32
|
# instructions = Zenrows::JsInstructions.build do
|
data/sig/manifest.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
class Zenrows::ApiClient
|
|
2
|
+
attr_reader api_key: String
|
|
3
|
+
attr_reader api_endpoint: String
|
|
4
|
+
attr_reader config: Zenrows::Configuration
|
|
5
|
+
|
|
6
|
+
def initialize: (?api_key: String?, ?api_endpoint: String?) -> void
|
|
7
|
+
def get: (String url, **untyped options) -> Zenrows::ApiResponse
|
|
8
|
+
def post: (String url, ?body: String?, **untyped options) -> Zenrows::ApiResponse
|
|
9
|
+
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
def build_http_client: () -> untyped
|
|
13
|
+
def build_params: (String url, Hash[Symbol, untyped] options) -> Hash[Symbol, untyped]
|
|
14
|
+
def handle_response: (untyped http_response, Hash[Symbol, untyped] options) -> Zenrows::ApiResponse
|
|
15
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
class Zenrows::ApiResponse
|
|
2
|
+
attr_reader raw: untyped
|
|
3
|
+
attr_reader status: Integer
|
|
4
|
+
attr_reader options: Hash[Symbol, untyped]
|
|
5
|
+
|
|
6
|
+
def initialize: (untyped http_response, ?Hash[Symbol, untyped] options) -> void
|
|
7
|
+
def body: () -> String
|
|
8
|
+
def data: () -> untyped
|
|
9
|
+
def html: () -> String
|
|
10
|
+
def markdown: () -> String
|
|
11
|
+
def parsed: () -> untyped
|
|
12
|
+
def extracted: () -> untyped
|
|
13
|
+
def xhr: () -> Array[untyped]?
|
|
14
|
+
def js_instructions_report: () -> Hash[String, untyped]?
|
|
15
|
+
def screenshot: () -> String?
|
|
16
|
+
def headers: () -> Hash[String, String]
|
|
17
|
+
def concurrency_limit: () -> Integer?
|
|
18
|
+
def concurrency_remaining: () -> Integer?
|
|
19
|
+
def request_cost: () -> Float?
|
|
20
|
+
def final_url: () -> String?
|
|
21
|
+
def success?: () -> bool
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def json_response?: () -> bool
|
|
26
|
+
def parse_body: () -> untyped
|
|
27
|
+
def looks_like_json?: () -> bool
|
|
28
|
+
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
class Zenrows::Backends::Base
|
|
2
|
+
attr_reader proxy: Zenrows::Proxy
|
|
3
|
+
attr_reader config: Zenrows::Configuration
|
|
4
|
+
|
|
5
|
+
def initialize: (proxy: Zenrows::Proxy, config: Zenrows::Configuration) -> void
|
|
6
|
+
def build_client: (?Hash[Symbol, untyped] options) -> untyped
|
|
7
|
+
def ssl_context: () -> OpenSSL::SSL::SSLContext
|
|
8
|
+
def calculate_timeouts: (?Hash[Symbol, untyped] options) -> Hash[Symbol, Integer]
|
|
9
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
class Zenrows::Backends::NetHttp < Zenrows::Backends::Base
|
|
2
|
+
def build_client: (?Hash[Symbol, untyped] options) -> Zenrows::Backends::NetHttpClient
|
|
3
|
+
end
|
|
4
|
+
|
|
5
|
+
class Zenrows::Backends::NetHttpClient
|
|
6
|
+
@proxy_config: Hash[Symbol, untyped]
|
|
7
|
+
@headers: Hash[String, String]
|
|
8
|
+
@timeouts: Hash[Symbol, Integer]
|
|
9
|
+
@ssl_context: OpenSSL::SSL::SSLContext
|
|
10
|
+
|
|
11
|
+
def initialize: (proxy_config: Hash[Symbol, untyped], headers: Hash[String, String], timeouts: Hash[Symbol, Integer], ssl_context: OpenSSL::SSL::SSLContext) -> void
|
|
12
|
+
def get: (String url, **untyped options) -> Zenrows::Backends::NetHttpResponse
|
|
13
|
+
def post: (String url, ?body: String?, **untyped options) -> Zenrows::Backends::NetHttpResponse
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
def request: (untyped uri, untyped req, Hash[Symbol, untyped] options) -> Zenrows::Backends::NetHttpResponse
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
class Zenrows::Backends::NetHttpResponse
|
|
21
|
+
attr_reader raw: untyped
|
|
22
|
+
|
|
23
|
+
def initialize: (untyped response) -> void
|
|
24
|
+
def body: () -> String?
|
|
25
|
+
def status: () -> Integer
|
|
26
|
+
def headers: () -> Hash[String, String]
|
|
27
|
+
def to_s: () -> String?
|
|
28
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
class Zenrows::Client
|
|
2
|
+
attr_reader config: Zenrows::Configuration
|
|
3
|
+
attr_reader proxy: Zenrows::Proxy
|
|
4
|
+
attr_reader backend: Zenrows::Backends::Base
|
|
5
|
+
|
|
6
|
+
def initialize: (?api_key: String?, ?host: String?, ?port: Integer?, ?backend: Symbol?) -> void
|
|
7
|
+
def http: (?Hash[Symbol, untyped] options) -> untyped
|
|
8
|
+
def ssl_context: () -> OpenSSL::SSL::SSLContext
|
|
9
|
+
def proxy_config: (?Hash[Symbol, untyped] options) -> Hash[Symbol, untyped]
|
|
10
|
+
def proxy_url: (?Hash[Symbol, untyped] options) -> String
|
|
11
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
class Zenrows::Configuration
|
|
2
|
+
include MonitorMixin
|
|
3
|
+
|
|
4
|
+
DEFAULTS: Hash[Symbol, untyped]
|
|
5
|
+
|
|
6
|
+
attr_accessor api_key: String?
|
|
7
|
+
attr_accessor host: String
|
|
8
|
+
attr_accessor port: Integer
|
|
9
|
+
attr_accessor api_endpoint: String
|
|
10
|
+
attr_accessor connect_timeout: Integer
|
|
11
|
+
attr_accessor read_timeout: Integer
|
|
12
|
+
attr_accessor backend: Symbol
|
|
13
|
+
attr_accessor logger: Logger?
|
|
14
|
+
|
|
15
|
+
def initialize: () -> void
|
|
16
|
+
def reset!: () -> void
|
|
17
|
+
def validate!: () -> true
|
|
18
|
+
def valid?: () -> bool
|
|
19
|
+
def to_h: () -> Hash[Symbol, untyped]
|
|
20
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
class Zenrows::CssExtractor
|
|
2
|
+
attr_reader rules: Hash[Symbol, String]
|
|
3
|
+
|
|
4
|
+
def self.build: () { (Zenrows::CssExtractor) -> void } -> Zenrows::CssExtractor
|
|
5
|
+
|
|
6
|
+
def initialize: () -> void
|
|
7
|
+
def extract: (Symbol | String name, String selector, ?attribute: String?) -> self
|
|
8
|
+
def links: (Symbol | String name, String selector) -> self
|
|
9
|
+
def images: (Symbol | String name, String selector) -> self
|
|
10
|
+
def to_h: () -> Hash[Symbol, String]
|
|
11
|
+
def to_json: (*untyped) -> String
|
|
12
|
+
def empty?: () -> bool
|
|
13
|
+
def size: () -> Integer
|
|
14
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
class Zenrows::Error < StandardError
|
|
2
|
+
end
|
|
3
|
+
|
|
4
|
+
class Zenrows::ConfigurationError < Zenrows::Error
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
class Zenrows::AuthenticationError < Zenrows::Error
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
class Zenrows::RateLimitError < Zenrows::Error
|
|
11
|
+
attr_reader retry_after: Integer?
|
|
12
|
+
def initialize: (?String message, ?retry_after: Integer?) -> void
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
class Zenrows::BotDetectedError < Zenrows::Error
|
|
16
|
+
attr_reader suggestion: String?
|
|
17
|
+
def initialize: (?String message, ?suggestion: String?) -> void
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
class Zenrows::TimeoutError < Zenrows::Error
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
class Zenrows::ProxyError < Zenrows::Error
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
class Zenrows::WaitTimeError < Zenrows::Error
|
|
27
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
class Zenrows::JsInstructions
|
|
2
|
+
attr_reader instructions: Array[Hash[Symbol, untyped]]
|
|
3
|
+
|
|
4
|
+
def initialize: () -> void
|
|
5
|
+
def self.build: () { (Zenrows::JsInstructions) -> void } -> Zenrows::JsInstructions
|
|
6
|
+
|
|
7
|
+
def click: (String selector) -> self
|
|
8
|
+
def wait: (Integer duration) -> self
|
|
9
|
+
def wait_for: (String selector) -> self
|
|
10
|
+
def wait_event: (String | Symbol event) -> self
|
|
11
|
+
def fill: (String selector, String value) -> self
|
|
12
|
+
def check: (String selector) -> self
|
|
13
|
+
def uncheck: (String selector) -> self
|
|
14
|
+
def select_option: (String selector, String value) -> self
|
|
15
|
+
def scroll_y: (Integer pixels) -> self
|
|
16
|
+
def scroll_x: (Integer pixels) -> self
|
|
17
|
+
def scroll_to: (String | Symbol position) -> self
|
|
18
|
+
def evaluate: (String code) -> self
|
|
19
|
+
def frame_click: (String iframe_selector, String element_selector) -> self
|
|
20
|
+
def frame_wait_for: (String iframe_selector, String element_selector) -> self
|
|
21
|
+
def frame_fill: (String iframe_selector, String input_selector, String value) -> self
|
|
22
|
+
def frame_evaluate: (String iframe_name, String code) -> self
|
|
23
|
+
|
|
24
|
+
def to_json: (*untyped) -> String
|
|
25
|
+
def to_a: () -> Array[Hash[Symbol, untyped]]
|
|
26
|
+
def empty?: () -> bool
|
|
27
|
+
def size: () -> Integer
|
|
28
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
class Zenrows::Proxy
|
|
2
|
+
MAX_WAIT_MS: Integer
|
|
3
|
+
VALID_STICKY_TTL: Array[String]
|
|
4
|
+
VALID_REGIONS: Hash[String, String]
|
|
5
|
+
|
|
6
|
+
attr_reader api_key: String
|
|
7
|
+
attr_reader host: String
|
|
8
|
+
attr_reader port: Integer
|
|
9
|
+
|
|
10
|
+
def initialize: (api_key: String, host: String, port: Integer) -> void
|
|
11
|
+
def build: (?Hash[Symbol, untyped] options) -> Hash[Symbol, untyped]
|
|
12
|
+
def build_url: (?Hash[Symbol, untyped] options) -> String
|
|
13
|
+
def build_array: (?Hash[Symbol, untyped] options) -> [String, Integer, String, String]
|
|
14
|
+
end
|
data/sig/zenrows.rbs
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
module Zenrows
|
|
2
2
|
VERSION: String
|
|
3
|
-
|
|
3
|
+
|
|
4
|
+
def self.configuration: () -> Configuration
|
|
5
|
+
def self.configure: () { (Configuration) -> void } -> Configuration
|
|
6
|
+
def self.reset_configuration!: () -> void
|
|
4
7
|
end
|