webscraping_ai 3.2.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/LICENSE +21 -0
- data/README.md +110 -85
- data/lib/webscraping_ai/client.rb +130 -0
- data/lib/webscraping_ai/configuration.rb +10 -300
- data/lib/webscraping_ai/errors.rb +44 -0
- data/lib/webscraping_ai/query_encoder.rb +74 -0
- data/lib/webscraping_ai/version.rb +1 -13
- data/lib/webscraping_ai.rb +15 -39
- data/webscraping_ai.gemspec +33 -36
- metadata +23 -72
- data/Gemfile +0 -9
- data/Rakefile +0 -10
- data/docs/AIApi.md +0 -209
- data/docs/Account.md +0 -24
- data/docs/AccountApi.md +0 -76
- data/docs/Error.md +0 -24
- data/docs/HTMLApi.md +0 -109
- data/docs/SelectedHTMLApi.md +0 -209
- data/docs/TextApi.md +0 -109
- data/git_push.sh +0 -57
- data/lib/webscraping_ai/api/account_api.rb +0 -79
- data/lib/webscraping_ai/api/ai_api.rb +0 -295
- data/lib/webscraping_ai/api/html_api.rb +0 -160
- data/lib/webscraping_ai/api/selected_html_api.rb +0 -291
- data/lib/webscraping_ai/api/text_api.rb +0 -160
- data/lib/webscraping_ai/api_client.rb +0 -394
- data/lib/webscraping_ai/api_error.rb +0 -58
- data/lib/webscraping_ai/models/account.rb +0 -245
- data/lib/webscraping_ai/models/error.rb +0 -245
- data/spec/api/account_api_spec.rb +0 -46
- data/spec/api/ai_api_spec.rb +0 -86
- data/spec/api/html_api_spec.rb +0 -61
- data/spec/api/selected_html_api_spec.rb +0 -86
- data/spec/api/text_api_spec.rb +0 -61
- data/spec/models/account_spec.rb +0 -54
- data/spec/models/error_spec.rb +0 -54
- data/spec/spec_helper.rb +0 -111
|
@@ -1,245 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.0
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.11.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
require 'date'
|
|
14
|
-
require 'time'
|
|
15
|
-
|
|
16
|
-
module WebScrapingAI
|
|
17
|
-
class Error
|
|
18
|
-
# Error description
|
|
19
|
-
attr_accessor :message
|
|
20
|
-
|
|
21
|
-
# Target page response HTTP status code (403, 500, etc)
|
|
22
|
-
attr_accessor :status_code
|
|
23
|
-
|
|
24
|
-
# Target page response HTTP status message
|
|
25
|
-
attr_accessor :status_message
|
|
26
|
-
|
|
27
|
-
# Target page response body
|
|
28
|
-
attr_accessor :body
|
|
29
|
-
|
|
30
|
-
# Attribute mapping from ruby-style variable name to JSON key.
|
|
31
|
-
def self.attribute_map
|
|
32
|
-
{
|
|
33
|
-
:'message' => :'message',
|
|
34
|
-
:'status_code' => :'status_code',
|
|
35
|
-
:'status_message' => :'status_message',
|
|
36
|
-
:'body' => :'body'
|
|
37
|
-
}
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Returns all the JSON keys this model knows about
|
|
41
|
-
def self.acceptable_attributes
|
|
42
|
-
attribute_map.values
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# Attribute type mapping.
|
|
46
|
-
def self.openapi_types
|
|
47
|
-
{
|
|
48
|
-
:'message' => :'String',
|
|
49
|
-
:'status_code' => :'Integer',
|
|
50
|
-
:'status_message' => :'String',
|
|
51
|
-
:'body' => :'String'
|
|
52
|
-
}
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# List of attributes with nullable: true
|
|
56
|
-
def self.openapi_nullable
|
|
57
|
-
Set.new([
|
|
58
|
-
])
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Initializes the object
|
|
62
|
-
# @param [Hash] attributes Model attributes in the form of hash
|
|
63
|
-
def initialize(attributes = {})
|
|
64
|
-
if (!attributes.is_a?(Hash))
|
|
65
|
-
fail ArgumentError, "The input argument (attributes) must be a hash in `WebScrapingAI::Error` initialize method"
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
# check to see if the attribute exists and convert string to symbol for hash key
|
|
69
|
-
attributes = attributes.each_with_object({}) { |(k, v), h|
|
|
70
|
-
if (!self.class.attribute_map.key?(k.to_sym))
|
|
71
|
-
fail ArgumentError, "`#{k}` is not a valid attribute in `WebScrapingAI::Error`. Please check the name to make sure it's valid. List of attributes: " + self.class.attribute_map.keys.inspect
|
|
72
|
-
end
|
|
73
|
-
h[k.to_sym] = v
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
if attributes.key?(:'message')
|
|
77
|
-
self.message = attributes[:'message']
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
if attributes.key?(:'status_code')
|
|
81
|
-
self.status_code = attributes[:'status_code']
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
if attributes.key?(:'status_message')
|
|
85
|
-
self.status_message = attributes[:'status_message']
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
if attributes.key?(:'body')
|
|
89
|
-
self.body = attributes[:'body']
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
# Show invalid properties with the reasons. Usually used together with valid?
|
|
94
|
-
# @return Array for valid properties with the reasons
|
|
95
|
-
def list_invalid_properties
|
|
96
|
-
warn '[DEPRECATED] the `list_invalid_properties` method is obsolete'
|
|
97
|
-
invalid_properties = Array.new
|
|
98
|
-
invalid_properties
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
# Check to see if the all the properties in the model are valid
|
|
102
|
-
# @return true if the model is valid
|
|
103
|
-
def valid?
|
|
104
|
-
warn '[DEPRECATED] the `valid?` method is obsolete'
|
|
105
|
-
true
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
# Checks equality by comparing each attribute.
|
|
109
|
-
# @param [Object] Object to be compared
|
|
110
|
-
def ==(o)
|
|
111
|
-
return true if self.equal?(o)
|
|
112
|
-
self.class == o.class &&
|
|
113
|
-
message == o.message &&
|
|
114
|
-
status_code == o.status_code &&
|
|
115
|
-
status_message == o.status_message &&
|
|
116
|
-
body == o.body
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
# @see the `==` method
|
|
120
|
-
# @param [Object] Object to be compared
|
|
121
|
-
def eql?(o)
|
|
122
|
-
self == o
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
# Calculates hash code according to all attributes.
|
|
126
|
-
# @return [Integer] Hash code
|
|
127
|
-
def hash
|
|
128
|
-
[message, status_code, status_message, body].hash
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
# Builds the object from hash
|
|
132
|
-
# @param [Hash] attributes Model attributes in the form of hash
|
|
133
|
-
# @return [Object] Returns the model itself
|
|
134
|
-
def self.build_from_hash(attributes)
|
|
135
|
-
return nil unless attributes.is_a?(Hash)
|
|
136
|
-
attributes = attributes.transform_keys(&:to_sym)
|
|
137
|
-
transformed_hash = {}
|
|
138
|
-
openapi_types.each_pair do |key, type|
|
|
139
|
-
if attributes.key?(attribute_map[key]) && attributes[attribute_map[key]].nil?
|
|
140
|
-
transformed_hash["#{key}"] = nil
|
|
141
|
-
elsif type =~ /\AArray<(.*)>/i
|
|
142
|
-
# check to ensure the input is an array given that the attribute
|
|
143
|
-
# is documented as an array but the input is not
|
|
144
|
-
if attributes[attribute_map[key]].is_a?(Array)
|
|
145
|
-
transformed_hash["#{key}"] = attributes[attribute_map[key]].map { |v| _deserialize($1, v) }
|
|
146
|
-
end
|
|
147
|
-
elsif !attributes[attribute_map[key]].nil?
|
|
148
|
-
transformed_hash["#{key}"] = _deserialize(type, attributes[attribute_map[key]])
|
|
149
|
-
end
|
|
150
|
-
end
|
|
151
|
-
new(transformed_hash)
|
|
152
|
-
end
|
|
153
|
-
|
|
154
|
-
# Deserializes the data based on type
|
|
155
|
-
# @param string type Data type
|
|
156
|
-
# @param string value Value to be deserialized
|
|
157
|
-
# @return [Object] Deserialized data
|
|
158
|
-
def self._deserialize(type, value)
|
|
159
|
-
case type.to_sym
|
|
160
|
-
when :Time
|
|
161
|
-
Time.parse(value)
|
|
162
|
-
when :Date
|
|
163
|
-
Date.parse(value)
|
|
164
|
-
when :String
|
|
165
|
-
value.to_s
|
|
166
|
-
when :Integer
|
|
167
|
-
value.to_i
|
|
168
|
-
when :Float
|
|
169
|
-
value.to_f
|
|
170
|
-
when :Boolean
|
|
171
|
-
if value.to_s =~ /\A(true|t|yes|y|1)\z/i
|
|
172
|
-
true
|
|
173
|
-
else
|
|
174
|
-
false
|
|
175
|
-
end
|
|
176
|
-
when :Object
|
|
177
|
-
# generic object (usually a Hash), return directly
|
|
178
|
-
value
|
|
179
|
-
when /\AArray<(?<inner_type>.+)>\z/
|
|
180
|
-
inner_type = Regexp.last_match[:inner_type]
|
|
181
|
-
value.map { |v| _deserialize(inner_type, v) }
|
|
182
|
-
when /\AHash<(?<k_type>.+?), (?<v_type>.+)>\z/
|
|
183
|
-
k_type = Regexp.last_match[:k_type]
|
|
184
|
-
v_type = Regexp.last_match[:v_type]
|
|
185
|
-
{}.tap do |hash|
|
|
186
|
-
value.each do |k, v|
|
|
187
|
-
hash[_deserialize(k_type, k)] = _deserialize(v_type, v)
|
|
188
|
-
end
|
|
189
|
-
end
|
|
190
|
-
else # model
|
|
191
|
-
# models (e.g. Pet) or oneOf
|
|
192
|
-
klass = WebScrapingAI.const_get(type)
|
|
193
|
-
klass.respond_to?(:openapi_any_of) || klass.respond_to?(:openapi_one_of) ? klass.build(value) : klass.build_from_hash(value)
|
|
194
|
-
end
|
|
195
|
-
end
|
|
196
|
-
|
|
197
|
-
# Returns the string representation of the object
|
|
198
|
-
# @return [String] String presentation of the object
|
|
199
|
-
def to_s
|
|
200
|
-
to_hash.to_s
|
|
201
|
-
end
|
|
202
|
-
|
|
203
|
-
# to_body is an alias to to_hash (backward compatibility)
|
|
204
|
-
# @return [Hash] Returns the object in the form of hash
|
|
205
|
-
def to_body
|
|
206
|
-
to_hash
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
# Returns the object in the form of hash
|
|
210
|
-
# @return [Hash] Returns the object in the form of hash
|
|
211
|
-
def to_hash
|
|
212
|
-
hash = {}
|
|
213
|
-
self.class.attribute_map.each_pair do |attr, param|
|
|
214
|
-
value = self.send(attr)
|
|
215
|
-
if value.nil?
|
|
216
|
-
is_nullable = self.class.openapi_nullable.include?(attr)
|
|
217
|
-
next if !is_nullable || (is_nullable && !instance_variable_defined?(:"@#{attr}"))
|
|
218
|
-
end
|
|
219
|
-
|
|
220
|
-
hash[param] = _to_hash(value)
|
|
221
|
-
end
|
|
222
|
-
hash
|
|
223
|
-
end
|
|
224
|
-
|
|
225
|
-
# Outputs non-array value in the form of hash
|
|
226
|
-
# For object, use to_hash. Otherwise, just return the value
|
|
227
|
-
# @param [Object] value Any valid value
|
|
228
|
-
# @return [Hash] Returns the value in the form of hash
|
|
229
|
-
def _to_hash(value)
|
|
230
|
-
if value.is_a?(Array)
|
|
231
|
-
value.compact.map { |v| _to_hash(v) }
|
|
232
|
-
elsif value.is_a?(Hash)
|
|
233
|
-
{}.tap do |hash|
|
|
234
|
-
value.each { |k, v| hash[k] = _to_hash(v) }
|
|
235
|
-
end
|
|
236
|
-
elsif value.respond_to? :to_hash
|
|
237
|
-
value.to_hash
|
|
238
|
-
else
|
|
239
|
-
value
|
|
240
|
-
end
|
|
241
|
-
end
|
|
242
|
-
|
|
243
|
-
end
|
|
244
|
-
|
|
245
|
-
end
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.0
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.11.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
require 'spec_helper'
|
|
14
|
-
require 'json'
|
|
15
|
-
|
|
16
|
-
# Unit tests for WebScrapingAI::AccountApi
|
|
17
|
-
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
|
18
|
-
# Please update as you see appropriate
|
|
19
|
-
describe 'AccountApi' do
|
|
20
|
-
before do
|
|
21
|
-
# run before each test
|
|
22
|
-
@api_instance = WebScrapingAI::AccountApi.new
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
after do
|
|
26
|
-
# run after each test
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
describe 'test an instance of AccountApi' do
|
|
30
|
-
it 'should create an instance of AccountApi' do
|
|
31
|
-
expect(@api_instance).to be_instance_of(WebScrapingAI::AccountApi)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# unit tests for account
|
|
36
|
-
# Information about your account calls quota
|
|
37
|
-
# Returns information about your account, including the remaining API credits quota, the next billing cycle start time, and the remaining concurrent requests. The response is in JSON format.
|
|
38
|
-
# @param [Hash] opts the optional parameters
|
|
39
|
-
# @return [Account]
|
|
40
|
-
describe 'account test' do
|
|
41
|
-
it 'should work' do
|
|
42
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
43
|
-
end
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
end
|
data/spec/api/ai_api_spec.rb
DELETED
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.0
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.11.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
require 'spec_helper'
|
|
14
|
-
require 'json'
|
|
15
|
-
|
|
16
|
-
# Unit tests for WebScrapingAI::AIApi
|
|
17
|
-
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
|
18
|
-
# Please update as you see appropriate
|
|
19
|
-
describe 'AIApi' do
|
|
20
|
-
before do
|
|
21
|
-
# run before each test
|
|
22
|
-
@api_instance = WebScrapingAI::AIApi.new
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
after do
|
|
26
|
-
# run after each test
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
describe 'test an instance of AIApi' do
|
|
30
|
-
it 'should create an instance of AIApi' do
|
|
31
|
-
expect(@api_instance).to be_instance_of(WebScrapingAI::AIApi)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# unit tests for get_fields
|
|
36
|
-
# Extract structured data fields from a web page
|
|
37
|
-
# Returns structured data fields extracted from the webpage using an LLM model. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
|
|
38
|
-
# @param url URL of the target page.
|
|
39
|
-
# @param fields Object describing fields to extract from the page and their descriptions
|
|
40
|
-
# @param [Hash] opts the optional parameters
|
|
41
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
|
42
|
-
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
|
43
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
|
44
|
-
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
|
45
|
-
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
|
46
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
|
47
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
|
48
|
-
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
|
49
|
-
# @option opts [String] :device Type of device emulation.
|
|
50
|
-
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
|
51
|
-
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
|
52
|
-
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
|
53
|
-
# @return [Hash<String, String>]
|
|
54
|
-
describe 'get_fields test' do
|
|
55
|
-
it 'should work' do
|
|
56
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
# unit tests for get_question
|
|
61
|
-
# Get an answer to a question about a given web page
|
|
62
|
-
# Returns the answer in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing, then the answer is extracted using an LLM model.
|
|
63
|
-
# @param url URL of the target page.
|
|
64
|
-
# @param [Hash] opts the optional parameters
|
|
65
|
-
# @option opts [String] :question Question or instructions to ask the LLM model about the target page.
|
|
66
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
|
67
|
-
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
|
68
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
|
69
|
-
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
|
70
|
-
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
|
71
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
|
72
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
|
73
|
-
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
|
74
|
-
# @option opts [String] :device Type of device emulation.
|
|
75
|
-
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
|
76
|
-
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
|
77
|
-
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
|
78
|
-
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response.
|
|
79
|
-
# @return [String]
|
|
80
|
-
describe 'get_question test' do
|
|
81
|
-
it 'should work' do
|
|
82
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
end
|
data/spec/api/html_api_spec.rb
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.0
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.11.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
require 'spec_helper'
|
|
14
|
-
require 'json'
|
|
15
|
-
|
|
16
|
-
# Unit tests for WebScrapingAI::HTMLApi
|
|
17
|
-
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
|
18
|
-
# Please update as you see appropriate
|
|
19
|
-
describe 'HTMLApi' do
|
|
20
|
-
before do
|
|
21
|
-
# run before each test
|
|
22
|
-
@api_instance = WebScrapingAI::HTMLApi.new
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
after do
|
|
26
|
-
# run after each test
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
describe 'test an instance of HTMLApi' do
|
|
30
|
-
it 'should create an instance of HTMLApi' do
|
|
31
|
-
expect(@api_instance).to be_instance_of(WebScrapingAI::HTMLApi)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# unit tests for get_html
|
|
36
|
-
# Page HTML by URL
|
|
37
|
-
# Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
|
|
38
|
-
# @param url URL of the target page.
|
|
39
|
-
# @param [Hash] opts the optional parameters
|
|
40
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
|
41
|
-
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
|
42
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
|
43
|
-
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
|
44
|
-
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
|
45
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
|
46
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
|
47
|
-
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
|
48
|
-
# @option opts [String] :device Type of device emulation.
|
|
49
|
-
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
|
50
|
-
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
|
51
|
-
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
|
52
|
-
# @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).
|
|
53
|
-
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response.
|
|
54
|
-
# @return [String]
|
|
55
|
-
describe 'get_html test' do
|
|
56
|
-
it 'should work' do
|
|
57
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
end
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.0
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.11.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
require 'spec_helper'
|
|
14
|
-
require 'json'
|
|
15
|
-
|
|
16
|
-
# Unit tests for WebScrapingAI::SelectedHTMLApi
|
|
17
|
-
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
|
18
|
-
# Please update as you see appropriate
|
|
19
|
-
describe 'SelectedHTMLApi' do
|
|
20
|
-
before do
|
|
21
|
-
# run before each test
|
|
22
|
-
@api_instance = WebScrapingAI::SelectedHTMLApi.new
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
after do
|
|
26
|
-
# run after each test
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
describe 'test an instance of SelectedHTMLApi' do
|
|
30
|
-
it 'should create an instance of SelectedHTMLApi' do
|
|
31
|
-
expect(@api_instance).to be_instance_of(WebScrapingAI::SelectedHTMLApi)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# unit tests for get_selected
|
|
36
|
-
# HTML of a selected page area by URL and CSS selector
|
|
37
|
-
# Returns HTML of a selected page area by URL and CSS selector. Useful if you don't want to do the HTML parsing on your side.
|
|
38
|
-
# @param url URL of the target page.
|
|
39
|
-
# @param [Hash] opts the optional parameters
|
|
40
|
-
# @option opts [String] :selector CSS selector (null by default, returns whole page HTML)
|
|
41
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
|
42
|
-
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
|
43
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
|
44
|
-
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
|
45
|
-
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
|
46
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
|
47
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
|
48
|
-
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
|
49
|
-
# @option opts [String] :device Type of device emulation.
|
|
50
|
-
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
|
51
|
-
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
|
52
|
-
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
|
53
|
-
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response.
|
|
54
|
-
# @return [String]
|
|
55
|
-
describe 'get_selected test' do
|
|
56
|
-
it 'should work' do
|
|
57
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# unit tests for get_selected_multiple
|
|
62
|
-
# HTML of multiple page areas by URL and CSS selectors
|
|
63
|
-
# Returns HTML of multiple page areas by URL and CSS selectors. Useful if you don't want to do the HTML parsing on your side.
|
|
64
|
-
# @param url URL of the target page.
|
|
65
|
-
# @param [Hash] opts the optional parameters
|
|
66
|
-
# @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML)
|
|
67
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
|
68
|
-
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
|
69
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
|
70
|
-
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
|
71
|
-
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
|
72
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
|
73
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
|
74
|
-
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
|
75
|
-
# @option opts [String] :device Type of device emulation.
|
|
76
|
-
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
|
77
|
-
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
|
78
|
-
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
|
79
|
-
# @return [Array<String>]
|
|
80
|
-
describe 'get_selected_multiple test' do
|
|
81
|
-
it 'should work' do
|
|
82
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
end
|
data/spec/api/text_api_spec.rb
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.0
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.11.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
require 'spec_helper'
|
|
14
|
-
require 'json'
|
|
15
|
-
|
|
16
|
-
# Unit tests for WebScrapingAI::TextApi
|
|
17
|
-
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
|
18
|
-
# Please update as you see appropriate
|
|
19
|
-
describe 'TextApi' do
|
|
20
|
-
before do
|
|
21
|
-
# run before each test
|
|
22
|
-
@api_instance = WebScrapingAI::TextApi.new
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
after do
|
|
26
|
-
# run after each test
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
describe 'test an instance of TextApi' do
|
|
30
|
-
it 'should create an instance of TextApi' do
|
|
31
|
-
expect(@api_instance).to be_instance_of(WebScrapingAI::TextApi)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# unit tests for get_text
|
|
36
|
-
# Page text by URL
|
|
37
|
-
# Returns the visible text content of a webpage specified by the URL. Can be used to feed data to LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
|
|
38
|
-
# @param url URL of the target page.
|
|
39
|
-
# @param [Hash] opts the optional parameters
|
|
40
|
-
# @option opts [String] :text_format Format of the text response (plain by default). \"plain\" will return only the page body text. \"json\" and \"xml\" will return a json/xml with \"title\", \"description\" and \"content\" keys.
|
|
41
|
-
# @option opts [Boolean] :return_links [Works only with text_format=json] Return links from the page body text (false by default). Useful for building web crawlers.
|
|
42
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
|
43
|
-
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
|
44
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
|
45
|
-
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
|
46
|
-
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
|
47
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
|
48
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
|
49
|
-
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
|
50
|
-
# @option opts [String] :device Type of device emulation.
|
|
51
|
-
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
|
52
|
-
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
|
53
|
-
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
|
54
|
-
# @return [String]
|
|
55
|
-
describe 'get_text test' do
|
|
56
|
-
it 'should work' do
|
|
57
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
end
|
data/spec/models/account_spec.rb
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.0
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.11.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
require 'spec_helper'
|
|
14
|
-
require 'json'
|
|
15
|
-
require 'date'
|
|
16
|
-
|
|
17
|
-
# Unit tests for WebScrapingAI::Account
|
|
18
|
-
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
|
19
|
-
# Please update as you see appropriate
|
|
20
|
-
describe WebScrapingAI::Account do
|
|
21
|
-
let(:instance) { WebScrapingAI::Account.new }
|
|
22
|
-
|
|
23
|
-
describe 'test an instance of Account' do
|
|
24
|
-
it 'should create an instance of Account' do
|
|
25
|
-
# uncomment below to test the instance creation
|
|
26
|
-
#expect(instance).to be_instance_of(WebScrapingAI::Account)
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
describe 'test attribute "email"' do
|
|
31
|
-
it 'should work' do
|
|
32
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
describe 'test attribute "remaining_api_calls"' do
|
|
37
|
-
it 'should work' do
|
|
38
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
describe 'test attribute "resets_at"' do
|
|
43
|
-
it 'should work' do
|
|
44
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
describe 'test attribute "remaining_concurrency"' do
|
|
49
|
-
it 'should work' do
|
|
50
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
end
|