webscraping_ai 3.1.3 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -15
- data/docs/AIApi.md +110 -10
- data/docs/Account.md +2 -0
- data/docs/HTMLApi.md +10 -4
- data/docs/SelectedHTMLApi.md +17 -7
- data/docs/TextApi.md +8 -4
- data/lib/webscraping_ai/api/account_api.rb +4 -4
- data/lib/webscraping_ai/api/ai_api.rb +155 -24
- data/lib/webscraping_ai/api/html_api.rb +20 -7
- data/lib/webscraping_ai/api/selected_html_api.rb +30 -11
- data/lib/webscraping_ai/api/text_api.rb +15 -9
- data/lib/webscraping_ai/api_client.rb +5 -5
- data/lib/webscraping_ai/api_error.rb +3 -3
- data/lib/webscraping_ai/configuration.rb +13 -3
- data/lib/webscraping_ai/models/account.rb +14 -4
- data/lib/webscraping_ai/models/error.rb +3 -3
- data/lib/webscraping_ai/version.rb +4 -4
- data/lib/webscraping_ai.rb +3 -3
- data/spec/api/account_api_spec.rb +3 -3
- data/spec/api/ai_api_spec.rb +32 -7
- data/spec/api/html_api_spec.rb +7 -4
- data/spec/api/selected_html_api_spec.rb +10 -5
- data/spec/api/text_api_spec.rb +7 -5
- data/spec/models/account_spec.rb +9 -3
- data/spec/models/error_spec.rb +3 -3
- data/spec/spec_helper.rb +3 -3
- data/webscraping_ai.gemspec +4 -4
- metadata +4 -4
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -212,7 +212,7 @@ module WebScrapingAI
|
|
212
212
|
# @param [String] mime MIME
|
213
213
|
# @return [Boolean] True if the MIME is application/json
|
214
214
|
def json_mime?(mime)
|
215
|
-
(mime == '*/*') || !(mime =~
|
215
|
+
(mime == '*/*') || !(mime =~ /^Application\/.*json(?!p)(;.*)?/i).nil?
|
216
216
|
end
|
217
217
|
|
218
218
|
# Deserialize the response to the given return type.
|
@@ -291,7 +291,7 @@ module WebScrapingAI
|
|
291
291
|
# @param [String] filename the filename to be sanitized
|
292
292
|
# @return [String] the sanitized filename
|
293
293
|
def sanitize_filename(filename)
|
294
|
-
filename.
|
294
|
+
filename.split(/[\/\\]/).last
|
295
295
|
end
|
296
296
|
|
297
297
|
def build_request_url(path, opts = {})
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -79,6 +79,14 @@ module WebScrapingAI
|
|
79
79
|
# @return [true, false]
|
80
80
|
attr_accessor :debugging
|
81
81
|
|
82
|
+
# Set this to ignore operation servers for the API client. This is useful when you need to
|
83
|
+
# send requests to a different server than the one specified in the OpenAPI document.
|
84
|
+
# Will default to the base url defined in the spec but can be overridden by setting
|
85
|
+
# `scheme`, `host`, `base_path` directly.
|
86
|
+
# Default to false.
|
87
|
+
# @return [true, false]
|
88
|
+
attr_accessor :ignore_operation_servers
|
89
|
+
|
82
90
|
# Defines the logger used for debugging.
|
83
91
|
# Default to `Rails.logger` (when in Rails) or logging to STDOUT.
|
84
92
|
#
|
@@ -166,6 +174,7 @@ module WebScrapingAI
|
|
166
174
|
@timeout = 0
|
167
175
|
@params_encoding = nil
|
168
176
|
@debugging = false
|
177
|
+
@ignore_operation_servers = false
|
169
178
|
@inject_format = false
|
170
179
|
@force_ending_format = false
|
171
180
|
@logger = defined?(Rails) ? Rails.logger : Logger.new(STDOUT)
|
@@ -200,6 +209,7 @@ module WebScrapingAI
|
|
200
209
|
|
201
210
|
# Returns base URL for specified operation based on server settings
|
202
211
|
def base_url(operation = nil)
|
212
|
+
return "#{scheme}://#{[host, base_path].join('/').gsub(/\/+/, '/')}".sub(/\/+\z/, '') if ignore_operation_servers
|
203
213
|
if operation_server_settings.key?(operation) then
|
204
214
|
index = server_operation_index.fetch(operation, server_index)
|
205
215
|
server_url(index.nil? ? 0 : index, server_operation_variables.fetch(operation, server_variables), operation_server_settings[operation])
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -15,6 +15,9 @@ require 'time'
|
|
15
15
|
|
16
16
|
module WebScrapingAI
|
17
17
|
class Account
|
18
|
+
# Your account email
|
19
|
+
attr_accessor :email
|
20
|
+
|
18
21
|
# Remaining API credits quota
|
19
22
|
attr_accessor :remaining_api_calls
|
20
23
|
|
@@ -27,6 +30,7 @@ module WebScrapingAI
|
|
27
30
|
# Attribute mapping from ruby-style variable name to JSON key.
|
28
31
|
def self.attribute_map
|
29
32
|
{
|
33
|
+
:'email' => :'email',
|
30
34
|
:'remaining_api_calls' => :'remaining_api_calls',
|
31
35
|
:'resets_at' => :'resets_at',
|
32
36
|
:'remaining_concurrency' => :'remaining_concurrency'
|
@@ -41,6 +45,7 @@ module WebScrapingAI
|
|
41
45
|
# Attribute type mapping.
|
42
46
|
def self.openapi_types
|
43
47
|
{
|
48
|
+
:'email' => :'String',
|
44
49
|
:'remaining_api_calls' => :'Integer',
|
45
50
|
:'resets_at' => :'Integer',
|
46
51
|
:'remaining_concurrency' => :'Integer'
|
@@ -68,6 +73,10 @@ module WebScrapingAI
|
|
68
73
|
h[k.to_sym] = v
|
69
74
|
}
|
70
75
|
|
76
|
+
if attributes.key?(:'email')
|
77
|
+
self.email = attributes[:'email']
|
78
|
+
end
|
79
|
+
|
71
80
|
if attributes.key?(:'remaining_api_calls')
|
72
81
|
self.remaining_api_calls = attributes[:'remaining_api_calls']
|
73
82
|
end
|
@@ -101,6 +110,7 @@ module WebScrapingAI
|
|
101
110
|
def ==(o)
|
102
111
|
return true if self.equal?(o)
|
103
112
|
self.class == o.class &&
|
113
|
+
email == o.email &&
|
104
114
|
remaining_api_calls == o.remaining_api_calls &&
|
105
115
|
resets_at == o.resets_at &&
|
106
116
|
remaining_concurrency == o.remaining_concurrency
|
@@ -115,7 +125,7 @@ module WebScrapingAI
|
|
115
125
|
# Calculates hash code according to all attributes.
|
116
126
|
# @return [Integer] Hash code
|
117
127
|
def hash
|
118
|
-
[remaining_api_calls, resets_at, remaining_concurrency].hash
|
128
|
+
[email, remaining_api_calls, resets_at, remaining_concurrency].hash
|
119
129
|
end
|
120
130
|
|
121
131
|
# Builds the object from hash
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -1,15 +1,15 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
13
13
|
module WebScrapingAI
|
14
|
-
VERSION = '3.
|
14
|
+
VERSION = '3.2.0'
|
15
15
|
end
|
data/lib/webscraping_ai.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
data/spec/api/ai_api_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -32,25 +32,50 @@ describe 'AIApi' do
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
+
# unit tests for get_fields
|
36
|
+
# Extract structured data fields from a web page
|
37
|
+
# Returns structured data fields extracted from the webpage using an LLM model. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
|
38
|
+
# @param url URL of the target page.
|
39
|
+
# @param fields Object describing fields to extract from the page and their descriptions
|
40
|
+
# @param [Hash] opts the optional parameters
|
41
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
42
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
43
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
44
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
45
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
46
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
47
|
+
# @option opts [String] :country Country of the proxy to use (US by default).
|
48
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
49
|
+
# @option opts [String] :device Type of device emulation.
|
50
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
51
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
52
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
53
|
+
# @return [Hash<String, String>]
|
54
|
+
describe 'get_fields test' do
|
55
|
+
it 'should work' do
|
56
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
35
60
|
# unit tests for get_question
|
36
61
|
# Get an answer to a question about a given web page
|
37
62
|
# Returns the answer in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing, then the answer is extracted using an LLM model.
|
38
63
|
# @param url URL of the target page.
|
39
64
|
# @param [Hash] opts the optional parameters
|
40
65
|
# @option opts [String] :question Question or instructions to ask the LLM model about the target page.
|
41
|
-
# @option opts [Integer] :context_limit Maximum number of tokens to use as context for the LLM model (4000 by default).
|
42
|
-
# @option opts [Integer] :response_tokens Maximum number of tokens to return in the LLM model response. The total context size (context_limit) includes the question, the target page content and the response, so this parameter reserves tokens for the response (see also on_context_limit).
|
43
|
-
# @option opts [String] :on_context_limit What to do if the context_limit parameter is exceeded (truncate by default). The context is exceeded when the target page content is too long.
|
44
66
|
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
45
67
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
46
68
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
47
69
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
70
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
48
71
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
49
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
72
|
+
# @option opts [String] :country Country of the proxy to use (US by default).
|
73
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
50
74
|
# @option opts [String] :device Type of device emulation.
|
51
75
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
52
76
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
53
77
|
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
78
|
+
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response.
|
54
79
|
# @return [String]
|
55
80
|
describe 'get_question test' do
|
56
81
|
it 'should work' do
|
data/spec/api/html_api_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -41,13 +41,16 @@ describe 'HTMLApi' do
|
|
41
41
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
42
42
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
43
43
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
44
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
44
45
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
45
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
46
|
+
# @option opts [String] :country Country of the proxy to use (US by default).
|
47
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
46
48
|
# @option opts [String] :device Type of device emulation.
|
47
49
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
48
50
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
49
51
|
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
50
52
|
# @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned).
|
53
|
+
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response.
|
51
54
|
# @return [String]
|
52
55
|
describe 'get_html test' do
|
53
56
|
it 'should work' do
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -42,12 +42,15 @@ describe 'SelectedHTMLApi' do
|
|
42
42
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
43
43
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
44
44
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
45
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
45
46
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
46
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
47
|
+
# @option opts [String] :country Country of the proxy to use (US by default).
|
48
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
47
49
|
# @option opts [String] :device Type of device emulation.
|
48
50
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
49
51
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
50
52
|
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
53
|
+
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response.
|
51
54
|
# @return [String]
|
52
55
|
describe 'get_selected test' do
|
53
56
|
it 'should work' do
|
@@ -65,8 +68,10 @@ describe 'SelectedHTMLApi' do
|
|
65
68
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
66
69
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
67
70
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
71
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
68
72
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
69
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
73
|
+
# @option opts [String] :country Country of the proxy to use (US by default).
|
74
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
70
75
|
# @option opts [String] :device Type of device emulation.
|
71
76
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
72
77
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
data/spec/api/text_api_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -34,7 +34,7 @@ describe 'TextApi' do
|
|
34
34
|
|
35
35
|
# unit tests for get_text
|
36
36
|
# Page text by URL
|
37
|
-
# Returns the visible text content of a webpage specified by the URL. Can be used to feed data to
|
37
|
+
# Returns the visible text content of a webpage specified by the URL. Can be used to feed data to LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
|
38
38
|
# @param url URL of the target page.
|
39
39
|
# @param [Hash] opts the optional parameters
|
40
40
|
# @option opts [String] :text_format Format of the text response (plain by default). \"plain\" will return only the page body text. \"json\" and \"xml\" will return a json/xml with \"title\", \"description\" and \"content\" keys.
|
@@ -43,8 +43,10 @@ describe 'TextApi' do
|
|
43
43
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
44
44
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
45
45
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
46
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
46
47
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details.
|
47
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
48
|
+
# @option opts [String] :country Country of the proxy to use (US by default).
|
49
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
48
50
|
# @option opts [String] :device Type of device emulation.
|
49
51
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
50
52
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
data/spec/models/account_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -27,6 +27,12 @@ describe WebScrapingAI::Account do
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
+
describe 'test attribute "email"' do
|
31
|
+
it 'should work' do
|
32
|
+
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
30
36
|
describe 'test attribute "remaining_api_calls"' do
|
31
37
|
it 'should work' do
|
32
38
|
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
data/spec/models/error_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
data/spec/spec_helper.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
data/webscraping_ai.gemspec
CHANGED
@@ -3,12 +3,12 @@
|
|
3
3
|
=begin
|
4
4
|
#WebScraping.AI
|
5
5
|
|
6
|
-
#WebScraping.AI scraping API provides
|
6
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
7
7
|
|
8
|
-
The version of the OpenAPI document: 3.
|
8
|
+
The version of the OpenAPI document: 3.2.0
|
9
9
|
Contact: support@webscraping.ai
|
10
10
|
Generated by: https://openapi-generator.tech
|
11
|
-
|
11
|
+
Generator version: 7.11.0
|
12
12
|
|
13
13
|
=end
|
14
14
|
|
@@ -23,7 +23,7 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.email = ["hello@webscraping.ai"]
|
24
24
|
s.homepage = "https://webscraping.ai"
|
25
25
|
s.summary = "WebScraping.AI Ruby Gem"
|
26
|
-
s.description = "WebScraping.AI scraping API provides
|
26
|
+
s.description = "WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing."
|
27
27
|
s.license = "MIT"
|
28
28
|
s.required_ruby_version = ">= 2.7"
|
29
29
|
s.metadata = {}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webscraping_ai
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- WebScraping.AI
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: typhoeus
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
- - ">="
|
51
51
|
- !ruby/object:Gem::Version
|
52
52
|
version: 3.6.0
|
53
|
-
description: WebScraping.AI scraping API provides
|
53
|
+
description: WebScraping.AI scraping API provides LLM-powered tools with Chromium
|
54
54
|
JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
55
55
|
email:
|
56
56
|
- hello@webscraping.ai
|
@@ -109,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
requirements: []
|
112
|
-
rubygems_version: 3.
|
112
|
+
rubygems_version: 3.5.22
|
113
113
|
signing_key:
|
114
114
|
specification_version: 4
|
115
115
|
summary: WebScraping.AI Ruby Gem
|