webscraping_ai 2.0.2 → 3.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +31 -20
- data/docs/AIApi.md +109 -0
- data/docs/Account.md +22 -0
- data/docs/AccountApi.md +76 -0
- data/docs/Error.md +14 -7
- data/docs/HTMLApi.md +45 -82
- data/docs/SelectedHTMLApi.md +92 -173
- data/docs/TextApi.md +105 -0
- data/git_push.sh +3 -4
- data/lib/webscraping_ai/api/account_api.rb +79 -0
- data/lib/webscraping_ai/api/ai_api.rb +164 -0
- data/lib/webscraping_ai/api/html_api.rb +54 -107
- data/lib/webscraping_ai/api/selected_html_api.rb +99 -217
- data/lib/webscraping_ai/api/text_api.rb +154 -0
- data/lib/webscraping_ai/api_client.rb +71 -65
- data/lib/webscraping_ai/api_error.rb +4 -3
- data/lib/webscraping_ai/configuration.rb +65 -15
- data/lib/webscraping_ai/models/{page_error.rb → account.rb} +60 -42
- data/lib/webscraping_ai/models/error.rb +66 -28
- data/lib/webscraping_ai/version.rb +4 -4
- data/lib/webscraping_ai.rb +7 -4
- data/spec/api/account_api_spec.rb +46 -0
- data/spec/api/ai_api_spec.rb +61 -0
- data/spec/api/html_api_spec.rb +17 -27
- data/spec/api/selected_html_api_spec.rb +29 -53
- data/spec/api/text_api_spec.rb +59 -0
- data/spec/models/account_spec.rb +48 -0
- data/spec/models/error_spec.rb +27 -14
- data/spec/spec_helper.rb +3 -3
- data/webscraping_ai.gemspec +7 -7
- metadata +22 -34
- data/docs/PageError.md +0 -19
- data/spec/api_client_spec.rb +0 -226
- data/spec/configuration_spec.rb +0 -42
- data/spec/models/page_error_spec.rb +0 -47
@@ -1,33 +1,54 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document:
|
6
|
+
The version of the OpenAPI document: 3.1.3
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
OpenAPI Generator version:
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
13
13
|
require 'date'
|
14
|
+
require 'time'
|
14
15
|
|
15
16
|
module WebScrapingAI
|
16
17
|
class Error
|
17
18
|
# Error description
|
18
19
|
attr_accessor :message
|
19
20
|
|
21
|
+
# Target page response HTTP status code (403, 500, etc)
|
22
|
+
attr_accessor :status_code
|
23
|
+
|
24
|
+
# Target page response HTTP status message
|
25
|
+
attr_accessor :status_message
|
26
|
+
|
27
|
+
# Target page response body
|
28
|
+
attr_accessor :body
|
29
|
+
|
20
30
|
# Attribute mapping from ruby-style variable name to JSON key.
|
21
31
|
def self.attribute_map
|
22
32
|
{
|
23
|
-
:'message' => :'message'
|
33
|
+
:'message' => :'message',
|
34
|
+
:'status_code' => :'status_code',
|
35
|
+
:'status_message' => :'status_message',
|
36
|
+
:'body' => :'body'
|
24
37
|
}
|
25
38
|
end
|
26
39
|
|
40
|
+
# Returns all the JSON keys this model knows about
|
41
|
+
def self.acceptable_attributes
|
42
|
+
attribute_map.values
|
43
|
+
end
|
44
|
+
|
27
45
|
# Attribute type mapping.
|
28
46
|
def self.openapi_types
|
29
47
|
{
|
30
|
-
:'message' => :'String'
|
48
|
+
:'message' => :'String',
|
49
|
+
:'status_code' => :'Integer',
|
50
|
+
:'status_message' => :'String',
|
51
|
+
:'body' => :'String'
|
31
52
|
}
|
32
53
|
end
|
33
54
|
|
@@ -55,11 +76,24 @@ module WebScrapingAI
|
|
55
76
|
if attributes.key?(:'message')
|
56
77
|
self.message = attributes[:'message']
|
57
78
|
end
|
79
|
+
|
80
|
+
if attributes.key?(:'status_code')
|
81
|
+
self.status_code = attributes[:'status_code']
|
82
|
+
end
|
83
|
+
|
84
|
+
if attributes.key?(:'status_message')
|
85
|
+
self.status_message = attributes[:'status_message']
|
86
|
+
end
|
87
|
+
|
88
|
+
if attributes.key?(:'body')
|
89
|
+
self.body = attributes[:'body']
|
90
|
+
end
|
58
91
|
end
|
59
92
|
|
60
93
|
# Show invalid properties with the reasons. Usually used together with valid?
|
61
94
|
# @return Array for valid properties with the reasons
|
62
95
|
def list_invalid_properties
|
96
|
+
warn '[DEPRECATED] the `list_invalid_properties` method is obsolete'
|
63
97
|
invalid_properties = Array.new
|
64
98
|
invalid_properties
|
65
99
|
end
|
@@ -67,6 +101,7 @@ module WebScrapingAI
|
|
67
101
|
# Check to see if the all the properties in the model are valid
|
68
102
|
# @return true if the model is valid
|
69
103
|
def valid?
|
104
|
+
warn '[DEPRECATED] the `valid?` method is obsolete'
|
70
105
|
true
|
71
106
|
end
|
72
107
|
|
@@ -75,7 +110,10 @@ module WebScrapingAI
|
|
75
110
|
def ==(o)
|
76
111
|
return true if self.equal?(o)
|
77
112
|
self.class == o.class &&
|
78
|
-
message == o.message
|
113
|
+
message == o.message &&
|
114
|
+
status_code == o.status_code &&
|
115
|
+
status_message == o.status_message &&
|
116
|
+
body == o.body
|
79
117
|
end
|
80
118
|
|
81
119
|
# @see the `==` method
|
@@ -87,44 +125,40 @@ module WebScrapingAI
|
|
87
125
|
# Calculates hash code according to all attributes.
|
88
126
|
# @return [Integer] Hash code
|
89
127
|
def hash
|
90
|
-
[message].hash
|
128
|
+
[message, status_code, status_message, body].hash
|
91
129
|
end
|
92
130
|
|
93
131
|
# Builds the object from hash
|
94
132
|
# @param [Hash] attributes Model attributes in the form of hash
|
95
133
|
# @return [Object] Returns the model itself
|
96
134
|
def self.build_from_hash(attributes)
|
97
|
-
new.build_from_hash(attributes)
|
98
|
-
end
|
99
|
-
|
100
|
-
# Builds the object from hash
|
101
|
-
# @param [Hash] attributes Model attributes in the form of hash
|
102
|
-
# @return [Object] Returns the model itself
|
103
|
-
def build_from_hash(attributes)
|
104
135
|
return nil unless attributes.is_a?(Hash)
|
105
|
-
|
106
|
-
|
136
|
+
attributes = attributes.transform_keys(&:to_sym)
|
137
|
+
transformed_hash = {}
|
138
|
+
openapi_types.each_pair do |key, type|
|
139
|
+
if attributes.key?(attribute_map[key]) && attributes[attribute_map[key]].nil?
|
140
|
+
transformed_hash["#{key}"] = nil
|
141
|
+
elsif type =~ /\AArray<(.*)>/i
|
107
142
|
# check to ensure the input is an array given that the attribute
|
108
143
|
# is documented as an array but the input is not
|
109
|
-
if attributes[
|
110
|
-
|
144
|
+
if attributes[attribute_map[key]].is_a?(Array)
|
145
|
+
transformed_hash["#{key}"] = attributes[attribute_map[key]].map { |v| _deserialize($1, v) }
|
111
146
|
end
|
112
|
-
elsif !attributes[
|
113
|
-
|
114
|
-
end
|
147
|
+
elsif !attributes[attribute_map[key]].nil?
|
148
|
+
transformed_hash["#{key}"] = _deserialize(type, attributes[attribute_map[key]])
|
149
|
+
end
|
115
150
|
end
|
116
|
-
|
117
|
-
self
|
151
|
+
new(transformed_hash)
|
118
152
|
end
|
119
153
|
|
120
154
|
# Deserializes the data based on type
|
121
155
|
# @param string type Data type
|
122
156
|
# @param string value Value to be deserialized
|
123
157
|
# @return [Object] Deserialized data
|
124
|
-
def _deserialize(type, value)
|
158
|
+
def self._deserialize(type, value)
|
125
159
|
case type.to_sym
|
126
|
-
when :
|
127
|
-
|
160
|
+
when :Time
|
161
|
+
Time.parse(value)
|
128
162
|
when :Date
|
129
163
|
Date.parse(value)
|
130
164
|
when :String
|
@@ -154,7 +188,9 @@ module WebScrapingAI
|
|
154
188
|
end
|
155
189
|
end
|
156
190
|
else # model
|
157
|
-
|
191
|
+
# models (e.g. Pet) or oneOf
|
192
|
+
klass = WebScrapingAI.const_get(type)
|
193
|
+
klass.respond_to?(:openapi_any_of) || klass.respond_to?(:openapi_one_of) ? klass.build(value) : klass.build_from_hash(value)
|
158
194
|
end
|
159
195
|
end
|
160
196
|
|
@@ -180,7 +216,7 @@ module WebScrapingAI
|
|
180
216
|
is_nullable = self.class.openapi_nullable.include?(attr)
|
181
217
|
next if !is_nullable || (is_nullable && !instance_variable_defined?(:"@#{attr}"))
|
182
218
|
end
|
183
|
-
|
219
|
+
|
184
220
|
hash[param] = _to_hash(value)
|
185
221
|
end
|
186
222
|
hash
|
@@ -203,5 +239,7 @@ module WebScrapingAI
|
|
203
239
|
value
|
204
240
|
end
|
205
241
|
end
|
242
|
+
|
206
243
|
end
|
244
|
+
|
207
245
|
end
|
@@ -1,15 +1,15 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document:
|
6
|
+
The version of the OpenAPI document: 3.1.3
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
OpenAPI Generator version:
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
13
13
|
module WebScrapingAI
|
14
|
-
VERSION = '
|
14
|
+
VERSION = '3.1.3'
|
15
15
|
end
|
data/lib/webscraping_ai.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document:
|
6
|
+
The version of the OpenAPI document: 3.1.3
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
OpenAPI Generator version:
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -17,12 +17,15 @@ require 'webscraping_ai/version'
|
|
17
17
|
require 'webscraping_ai/configuration'
|
18
18
|
|
19
19
|
# Models
|
20
|
+
require 'webscraping_ai/models/account'
|
20
21
|
require 'webscraping_ai/models/error'
|
21
|
-
require 'webscraping_ai/models/page_error'
|
22
22
|
|
23
23
|
# APIs
|
24
|
+
require 'webscraping_ai/api/ai_api'
|
25
|
+
require 'webscraping_ai/api/account_api'
|
24
26
|
require 'webscraping_ai/api/html_api'
|
25
27
|
require 'webscraping_ai/api/selected_html_api'
|
28
|
+
require 'webscraping_ai/api/text_api'
|
26
29
|
|
27
30
|
module WebScrapingAI
|
28
31
|
class << self
|
@@ -0,0 +1,46 @@
|
|
1
|
+
=begin
|
2
|
+
#WebScraping.AI
|
3
|
+
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
|
+
|
6
|
+
The version of the OpenAPI document: 3.1.3
|
7
|
+
Contact: support@webscraping.ai
|
8
|
+
Generated by: https://openapi-generator.tech
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
require 'spec_helper'
|
14
|
+
require 'json'
|
15
|
+
|
16
|
+
# Unit tests for WebScrapingAI::AccountApi
|
17
|
+
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
18
|
+
# Please update as you see appropriate
|
19
|
+
describe 'AccountApi' do
|
20
|
+
before do
|
21
|
+
# run before each test
|
22
|
+
@api_instance = WebScrapingAI::AccountApi.new
|
23
|
+
end
|
24
|
+
|
25
|
+
after do
|
26
|
+
# run after each test
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'test an instance of AccountApi' do
|
30
|
+
it 'should create an instance of AccountApi' do
|
31
|
+
expect(@api_instance).to be_instance_of(WebScrapingAI::AccountApi)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# unit tests for account
|
36
|
+
# Information about your account calls quota
|
37
|
+
# Returns information about your account, including the remaining API credits quota, the next billing cycle start time, and the remaining concurrent requests. The response is in JSON format.
|
38
|
+
# @param [Hash] opts the optional parameters
|
39
|
+
# @return [Account]
|
40
|
+
describe 'account test' do
|
41
|
+
it 'should work' do
|
42
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
=begin
|
2
|
+
#WebScraping.AI
|
3
|
+
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
|
+
|
6
|
+
The version of the OpenAPI document: 3.1.3
|
7
|
+
Contact: support@webscraping.ai
|
8
|
+
Generated by: https://openapi-generator.tech
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
require 'spec_helper'
|
14
|
+
require 'json'
|
15
|
+
|
16
|
+
# Unit tests for WebScrapingAI::AIApi
|
17
|
+
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
18
|
+
# Please update as you see appropriate
|
19
|
+
describe 'AIApi' do
|
20
|
+
before do
|
21
|
+
# run before each test
|
22
|
+
@api_instance = WebScrapingAI::AIApi.new
|
23
|
+
end
|
24
|
+
|
25
|
+
after do
|
26
|
+
# run after each test
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'test an instance of AIApi' do
|
30
|
+
it 'should create an instance of AIApi' do
|
31
|
+
expect(@api_instance).to be_instance_of(WebScrapingAI::AIApi)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# unit tests for get_question
|
36
|
+
# Get an answer to a question about a given web page
|
37
|
+
# Returns the answer in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing, then the answer is extracted using an LLM model.
|
38
|
+
# @param url URL of the target page.
|
39
|
+
# @param [Hash] opts the optional parameters
|
40
|
+
# @option opts [String] :question Question or instructions to ask the LLM model about the target page.
|
41
|
+
# @option opts [Integer] :context_limit Maximum number of tokens to use as context for the LLM model (4000 by default).
|
42
|
+
# @option opts [Integer] :response_tokens Maximum number of tokens to return in the LLM model response. The total context size (context_limit) includes the question, the target page content and the response, so this parameter reserves tokens for the response (see also on_context_limit).
|
43
|
+
# @option opts [String] :on_context_limit What to do if the context_limit parameter is exceeded (truncate by default). The context is exceeded when the target page content is too long.
|
44
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
45
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
46
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
47
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
48
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
49
|
+
# @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans.
|
50
|
+
# @option opts [String] :device Type of device emulation.
|
51
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
52
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
53
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
54
|
+
# @return [String]
|
55
|
+
describe 'get_question test' do
|
56
|
+
it 'should work' do
|
57
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
data/spec/api/html_api_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document:
|
6
|
+
The version of the OpenAPI document: 3.1.3
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
OpenAPI Generator version:
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -34,34 +34,24 @@ describe 'HTMLApi' do
|
|
34
34
|
|
35
35
|
# unit tests for get_html
|
36
36
|
# Page HTML by URL
|
37
|
-
# Returns
|
38
|
-
# @param url URL of the target page
|
37
|
+
# Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
|
38
|
+
# @param url URL of the target page.
|
39
39
|
# @param [Hash] opts the optional parameters
|
40
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
41
|
-
# @option opts [Integer] :timeout Maximum
|
42
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default)
|
43
|
-
# @option opts [
|
40
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
41
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
42
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
43
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
44
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
45
|
+
# @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans.
|
46
|
+
# @option opts [String] :device Type of device emulation.
|
47
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
48
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
49
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
50
|
+
# @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).
|
44
51
|
# @return [String]
|
45
52
|
describe 'get_html test' do
|
46
53
|
it 'should work' do
|
47
|
-
# assertion here. ref: https://
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
# unit tests for post_html
|
52
|
-
# Page HTML by URL with POST request to the target page
|
53
|
-
# Returns just HTML on success, JSON on error. Request body will be passed to the target page.
|
54
|
-
# @param url URL of the target page
|
55
|
-
# @param [Hash] opts the optional parameters
|
56
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
57
|
-
# @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
58
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
59
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
60
|
-
# @option opts [Hash<String, Object>] :request_body Request body to pass to the target page
|
61
|
-
# @return [String]
|
62
|
-
describe 'post_html test' do
|
63
|
-
it 'should work' do
|
64
|
-
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
54
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
65
55
|
end
|
66
56
|
end
|
67
57
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document:
|
6
|
+
The version of the OpenAPI document: 3.1.3
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
OpenAPI Generator version:
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -34,71 +34,47 @@ describe 'SelectedHTMLApi' do
|
|
34
34
|
|
35
35
|
# unit tests for get_selected
|
36
36
|
# HTML of a selected page area by URL and CSS selector
|
37
|
-
# Returns
|
38
|
-
# @param url URL of the target page
|
37
|
+
# Returns HTML of a selected page area by URL and CSS selector. Useful if you don't want to do the HTML parsing on your side.
|
38
|
+
# @param url URL of the target page.
|
39
39
|
# @param [Hash] opts the optional parameters
|
40
40
|
# @option opts [String] :selector CSS selector (null by default, returns whole page HTML)
|
41
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
42
|
-
# @option opts [Integer] :timeout Maximum
|
43
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default)
|
44
|
-
# @option opts [
|
41
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
42
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
43
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
44
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
45
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
46
|
+
# @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans.
|
47
|
+
# @option opts [String] :device Type of device emulation.
|
48
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
49
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
50
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
45
51
|
# @return [String]
|
46
52
|
describe 'get_selected test' do
|
47
53
|
it 'should work' do
|
48
|
-
# assertion here. ref: https://
|
54
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
49
55
|
end
|
50
56
|
end
|
51
57
|
|
52
58
|
# unit tests for get_selected_multiple
|
53
59
|
# HTML of multiple page areas by URL and CSS selectors
|
54
|
-
#
|
55
|
-
# @param url URL of the target page
|
60
|
+
# Returns HTML of multiple page areas by URL and CSS selectors. Useful if you don't want to do the HTML parsing on your side.
|
61
|
+
# @param url URL of the target page.
|
56
62
|
# @param [Hash] opts the optional parameters
|
57
63
|
# @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML)
|
58
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
59
|
-
# @option opts [Integer] :timeout Maximum
|
60
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default)
|
61
|
-
# @option opts [
|
64
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
65
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
66
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
67
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
68
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
69
|
+
# @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans.
|
70
|
+
# @option opts [String] :device Type of device emulation.
|
71
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
72
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
73
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
62
74
|
# @return [Array<String>]
|
63
75
|
describe 'get_selected_multiple test' do
|
64
76
|
it 'should work' do
|
65
|
-
# assertion here. ref: https://
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
# unit tests for post_selected
|
70
|
-
# HTML of a selected page areas by URL and CSS selector, with POST request to the target page
|
71
|
-
# Returns just HTML on success, JSON on error. Request body will be passed to the target page.
|
72
|
-
# @param url URL of the target page
|
73
|
-
# @param [Hash] opts the optional parameters
|
74
|
-
# @option opts [String] :selector CSS selector (null by default, returns whole page HTML)
|
75
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
76
|
-
# @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
77
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
78
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
79
|
-
# @option opts [Hash<String, Object>] :request_body Request body to pass to the target page
|
80
|
-
# @return [String]
|
81
|
-
describe 'post_selected test' do
|
82
|
-
it 'should work' do
|
83
|
-
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
# unit tests for post_selected_multiple
|
88
|
-
# HTML of multiple page areas by URL and CSS selectors, with POST request to the target page
|
89
|
-
# Always returns JSON. Request body will be passed to the target page.
|
90
|
-
# @param url URL of the target page
|
91
|
-
# @param [Hash] opts the optional parameters
|
92
|
-
# @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML)
|
93
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
94
|
-
# @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
95
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
96
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
97
|
-
# @option opts [Hash<String, Object>] :request_body Request body to pass to the target page
|
98
|
-
# @return [Array<String>]
|
99
|
-
describe 'post_selected_multiple test' do
|
100
|
-
it 'should work' do
|
101
|
-
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
77
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
102
78
|
end
|
103
79
|
end
|
104
80
|
|
@@ -0,0 +1,59 @@
|
|
1
|
+
=begin
|
2
|
+
#WebScraping.AI
|
3
|
+
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
|
+
|
6
|
+
The version of the OpenAPI document: 3.1.3
|
7
|
+
Contact: support@webscraping.ai
|
8
|
+
Generated by: https://openapi-generator.tech
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
require 'spec_helper'
|
14
|
+
require 'json'
|
15
|
+
|
16
|
+
# Unit tests for WebScrapingAI::TextApi
|
17
|
+
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
18
|
+
# Please update as you see appropriate
|
19
|
+
describe 'TextApi' do
|
20
|
+
before do
|
21
|
+
# run before each test
|
22
|
+
@api_instance = WebScrapingAI::TextApi.new
|
23
|
+
end
|
24
|
+
|
25
|
+
after do
|
26
|
+
# run after each test
|
27
|
+
end
|
28
|
+
|
29
|
+
describe 'test an instance of TextApi' do
|
30
|
+
it 'should create an instance of TextApi' do
|
31
|
+
expect(@api_instance).to be_instance_of(WebScrapingAI::TextApi)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# unit tests for get_text
|
36
|
+
# Page text by URL
|
37
|
+
# Returns the visible text content of a webpage specified by the URL. Can be used to feed data to GPT or other LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
|
38
|
+
# @param url URL of the target page.
|
39
|
+
# @param [Hash] opts the optional parameters
|
40
|
+
# @option opts [String] :text_format Format of the text response (plain by default). \"plain\" will return only the page body text. \"json\" and \"xml\" will return a json/xml with \"title\", \"description\" and \"content\" keys.
|
41
|
+
# @option opts [Boolean] :return_links [Works only with text_format=json] Return links from the page body text (false by default). Useful for building web crawlers.
|
42
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
43
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
44
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
45
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
46
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
47
|
+
# @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans.
|
48
|
+
# @option opts [String] :device Type of device emulation.
|
49
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
50
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
51
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
52
|
+
# @return [String]
|
53
|
+
describe 'get_text test' do
|
54
|
+
it 'should work' do
|
55
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
=begin
|
2
|
+
#WebScraping.AI
|
3
|
+
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
|
+
|
6
|
+
The version of the OpenAPI document: 3.1.3
|
7
|
+
Contact: support@webscraping.ai
|
8
|
+
Generated by: https://openapi-generator.tech
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
require 'spec_helper'
|
14
|
+
require 'json'
|
15
|
+
require 'date'
|
16
|
+
|
17
|
+
# Unit tests for WebScrapingAI::Account
|
18
|
+
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
19
|
+
# Please update as you see appropriate
|
20
|
+
describe WebScrapingAI::Account do
|
21
|
+
let(:instance) { WebScrapingAI::Account.new }
|
22
|
+
|
23
|
+
describe 'test an instance of Account' do
|
24
|
+
it 'should create an instance of Account' do
|
25
|
+
# uncomment below to test the instance creation
|
26
|
+
#expect(instance).to be_instance_of(WebScrapingAI::Account)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe 'test attribute "remaining_api_calls"' do
|
31
|
+
it 'should work' do
|
32
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe 'test attribute "resets_at"' do
|
37
|
+
it 'should work' do
|
38
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe 'test attribute "remaining_concurrency"' do
|
43
|
+
it 'should work' do
|
44
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|