webscraping_ai 2.0.2 → 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +31 -20
- data/docs/AIApi.md +109 -0
- data/docs/Account.md +22 -0
- data/docs/AccountApi.md +76 -0
- data/docs/Error.md +14 -7
- data/docs/HTMLApi.md +45 -82
- data/docs/SelectedHTMLApi.md +92 -173
- data/docs/TextApi.md +105 -0
- data/git_push.sh +3 -4
- data/lib/webscraping_ai/api/account_api.rb +79 -0
- data/lib/webscraping_ai/api/ai_api.rb +164 -0
- data/lib/webscraping_ai/api/html_api.rb +54 -107
- data/lib/webscraping_ai/api/selected_html_api.rb +99 -217
- data/lib/webscraping_ai/api/text_api.rb +154 -0
- data/lib/webscraping_ai/api_client.rb +71 -65
- data/lib/webscraping_ai/api_error.rb +4 -3
- data/lib/webscraping_ai/configuration.rb +65 -15
- data/lib/webscraping_ai/models/{page_error.rb → account.rb} +60 -42
- data/lib/webscraping_ai/models/error.rb +66 -28
- data/lib/webscraping_ai/version.rb +4 -4
- data/lib/webscraping_ai.rb +7 -4
- data/spec/api/account_api_spec.rb +46 -0
- data/spec/api/ai_api_spec.rb +61 -0
- data/spec/api/html_api_spec.rb +17 -27
- data/spec/api/selected_html_api_spec.rb +29 -53
- data/spec/api/text_api_spec.rb +59 -0
- data/spec/models/account_spec.rb +48 -0
- data/spec/models/error_spec.rb +27 -14
- data/spec/spec_helper.rb +3 -3
- data/webscraping_ai.gemspec +7 -7
- metadata +22 -34
- data/docs/PageError.md +0 -19
- data/spec/api_client_spec.rb +0 -226
- data/spec/configuration_spec.rb +0 -42
- data/spec/models/page_error_spec.rb +0 -47
@@ -0,0 +1,164 @@
|
|
1
|
+
=begin
|
2
|
+
#WebScraping.AI
|
3
|
+
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
|
+
|
6
|
+
The version of the OpenAPI document: 3.1.2
|
7
|
+
Contact: support@webscraping.ai
|
8
|
+
Generated by: https://openapi-generator.tech
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
|
+
|
11
|
+
=end
|
12
|
+
|
13
|
+
require 'cgi'
|
14
|
+
|
15
|
+
module WebScrapingAI
|
16
|
+
class AIApi
|
17
|
+
attr_accessor :api_client
|
18
|
+
|
19
|
+
def initialize(api_client = ApiClient.default)
|
20
|
+
@api_client = api_client
|
21
|
+
end
|
22
|
+
# Get an answer to a question about a given web page
|
23
|
+
# Returns the answer in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing, then the answer is extracted using an LLM model.
|
24
|
+
# @param url [String] URL of the target page.
|
25
|
+
# @param [Hash] opts the optional parameters
|
26
|
+
# @option opts [String] :question Question or instructions to ask the LLM model about the target page.
|
27
|
+
# @option opts [Integer] :context_limit Maximum number of tokens to use as context for the LLM model (4000 by default). (default to 8000)
|
28
|
+
# @option opts [Integer] :response_tokens Maximum number of tokens to return in the LLM model response. The total context size (context_limit) includes the question, the target page content and the response, so this parameter reserves tokens for the response (see also on_context_limit). (default to 100)
|
29
|
+
# @option opts [String] :on_context_limit What to do if the context_limit parameter is exceeded (truncate by default). The context is exceeded when the target page content is too long. (default to 'truncate')
|
30
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
31
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
32
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
33
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
34
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
35
|
+
# @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
|
36
|
+
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
37
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
38
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
39
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
40
|
+
# @return [String]
|
41
|
+
def get_question(url, opts = {})
|
42
|
+
data, _status_code, _headers = get_question_with_http_info(url, opts)
|
43
|
+
data
|
44
|
+
end
|
45
|
+
|
46
|
+
# Get an answer to a question about a given web page
|
47
|
+
# Returns the answer in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing, then the answer is extracted using an LLM model.
|
48
|
+
# @param url [String] URL of the target page.
|
49
|
+
# @param [Hash] opts the optional parameters
|
50
|
+
# @option opts [String] :question Question or instructions to ask the LLM model about the target page.
|
51
|
+
# @option opts [Integer] :context_limit Maximum number of tokens to use as context for the LLM model (4000 by default). (default to 8000)
|
52
|
+
# @option opts [Integer] :response_tokens Maximum number of tokens to return in the LLM model response. The total context size (context_limit) includes the question, the target page content and the response, so this parameter reserves tokens for the response (see also on_context_limit). (default to 100)
|
53
|
+
# @option opts [String] :on_context_limit What to do if the context_limit parameter is exceeded (truncate by default). The context is exceeded when the target page content is too long. (default to 'truncate')
|
54
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
55
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
56
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
57
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
58
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
59
|
+
# @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
|
60
|
+
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
61
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
62
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
63
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
64
|
+
# @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
|
65
|
+
def get_question_with_http_info(url, opts = {})
|
66
|
+
if @api_client.config.debugging
|
67
|
+
@api_client.config.logger.debug 'Calling API: AIApi.get_question ...'
|
68
|
+
end
|
69
|
+
# verify the required parameter 'url' is set
|
70
|
+
if @api_client.config.client_side_validation && url.nil?
|
71
|
+
fail ArgumentError, "Missing the required parameter 'url' when calling AIApi.get_question"
|
72
|
+
end
|
73
|
+
allowable_values = [4000, 8000, 16000]
|
74
|
+
if @api_client.config.client_side_validation && opts[:'context_limit'] && !allowable_values.include?(opts[:'context_limit'])
|
75
|
+
fail ArgumentError, "invalid value for \"context_limit\", must be one of #{allowable_values}"
|
76
|
+
end
|
77
|
+
allowable_values = ["truncate", "error"]
|
78
|
+
if @api_client.config.client_side_validation && opts[:'on_context_limit'] && !allowable_values.include?(opts[:'on_context_limit'])
|
79
|
+
fail ArgumentError, "invalid value for \"on_context_limit\", must be one of #{allowable_values}"
|
80
|
+
end
|
81
|
+
if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
|
82
|
+
fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_question, must be smaller than or equal to 30000.'
|
83
|
+
end
|
84
|
+
|
85
|
+
if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1
|
86
|
+
fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_question, must be greater than or equal to 1.'
|
87
|
+
end
|
88
|
+
|
89
|
+
if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000
|
90
|
+
fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling AIApi.get_question, must be smaller than or equal to 20000.'
|
91
|
+
end
|
92
|
+
|
93
|
+
if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1
|
94
|
+
fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling AIApi.get_question, must be greater than or equal to 1.'
|
95
|
+
end
|
96
|
+
|
97
|
+
allowable_values = ["datacenter", "residential"]
|
98
|
+
if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
|
99
|
+
fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
|
100
|
+
end
|
101
|
+
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
|
102
|
+
if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
|
103
|
+
fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
|
104
|
+
end
|
105
|
+
allowable_values = ["desktop", "mobile", "tablet"]
|
106
|
+
if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
|
107
|
+
fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
|
108
|
+
end
|
109
|
+
# resource path
|
110
|
+
local_var_path = '/ai/question'
|
111
|
+
|
112
|
+
# query parameters
|
113
|
+
query_params = opts[:query_params] || {}
|
114
|
+
query_params[:'url'] = url
|
115
|
+
query_params[:'question'] = opts[:'question'] if !opts[:'question'].nil?
|
116
|
+
query_params[:'context_limit'] = opts[:'context_limit'] if !opts[:'context_limit'].nil?
|
117
|
+
query_params[:'response_tokens'] = opts[:'response_tokens'] if !opts[:'response_tokens'].nil?
|
118
|
+
query_params[:'on_context_limit'] = opts[:'on_context_limit'] if !opts[:'on_context_limit'].nil?
|
119
|
+
query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
|
120
|
+
query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
|
121
|
+
query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
|
122
|
+
query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
|
123
|
+
query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
|
124
|
+
query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
|
125
|
+
query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
|
126
|
+
query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
|
127
|
+
query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
|
128
|
+
query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
|
129
|
+
|
130
|
+
# header parameters
|
131
|
+
header_params = opts[:header_params] || {}
|
132
|
+
# HTTP header 'Accept' (if needed)
|
133
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html'])
|
134
|
+
|
135
|
+
# form parameters
|
136
|
+
form_params = opts[:form_params] || {}
|
137
|
+
|
138
|
+
# http body (model)
|
139
|
+
post_body = opts[:debug_body]
|
140
|
+
|
141
|
+
# return_type
|
142
|
+
return_type = opts[:debug_return_type] || 'String'
|
143
|
+
|
144
|
+
# auth_names
|
145
|
+
auth_names = opts[:debug_auth_names] || ['api_key']
|
146
|
+
|
147
|
+
new_options = opts.merge(
|
148
|
+
:operation => :"AIApi.get_question",
|
149
|
+
:header_params => header_params,
|
150
|
+
:query_params => query_params,
|
151
|
+
:form_params => form_params,
|
152
|
+
:body => post_body,
|
153
|
+
:auth_names => auth_names,
|
154
|
+
:return_type => return_type
|
155
|
+
)
|
156
|
+
|
157
|
+
data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
|
158
|
+
if @api_client.config.debugging
|
159
|
+
@api_client.config.logger.debug "API called: AIApi#get_question\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
160
|
+
end
|
161
|
+
return data, status_code, headers
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#
|
4
|
+
#WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document:
|
6
|
+
The version of the OpenAPI document: 3.1.2
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
OpenAPI Generator version:
|
9
|
+
OpenAPI Generator version: 7.2.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -20,13 +20,20 @@ module WebScrapingAI
|
|
20
20
|
@api_client = api_client
|
21
21
|
end
|
22
22
|
# Page HTML by URL
|
23
|
-
# Returns
|
24
|
-
# @param url [String] URL of the target page
|
23
|
+
# Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
|
24
|
+
# @param url [String] URL of the target page.
|
25
25
|
# @param [Hash] opts the optional parameters
|
26
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
27
|
-
# @option opts [Integer] :timeout Maximum
|
28
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default)
|
29
|
-
# @option opts [
|
26
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
27
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
28
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
29
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
30
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
31
|
+
# @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
|
32
|
+
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
33
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
34
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
35
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
36
|
+
# @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned). (default to false)
|
30
37
|
# @return [String]
|
31
38
|
def get_html(url, opts = {})
|
32
39
|
data, _status_code, _headers = get_html_with_http_info(url, opts)
|
@@ -34,13 +41,20 @@ module WebScrapingAI
|
|
34
41
|
end
|
35
42
|
|
36
43
|
# Page HTML by URL
|
37
|
-
# Returns
|
38
|
-
# @param url [String] URL of the target page
|
44
|
+
# Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
|
45
|
+
# @param url [String] URL of the target page.
|
39
46
|
# @param [Hash] opts the optional parameters
|
40
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
41
|
-
# @option opts [Integer] :timeout Maximum
|
42
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default)
|
43
|
-
# @option opts [
|
47
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
48
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
49
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
50
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
51
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
52
|
+
# @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
|
53
|
+
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
54
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
55
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
56
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
57
|
+
# @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned). (default to false)
|
44
58
|
# @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
|
45
59
|
def get_html_with_http_info(url, opts = {})
|
46
60
|
if @api_client.config.debugging
|
@@ -58,99 +72,26 @@ module WebScrapingAI
|
|
58
72
|
fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling HTMLApi.get_html, must be greater than or equal to 1.'
|
59
73
|
end
|
60
74
|
|
61
|
-
|
62
|
-
|
63
|
-
fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
|
75
|
+
if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000
|
76
|
+
fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling HTMLApi.get_html, must be smaller than or equal to 20000.'
|
64
77
|
end
|
65
|
-
# resource path
|
66
|
-
local_var_path = '/html'
|
67
|
-
|
68
|
-
# query parameters
|
69
|
-
query_params = opts[:query_params] || {}
|
70
|
-
query_params[:'url'] = url
|
71
|
-
query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
|
72
|
-
query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
|
73
|
-
query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
|
74
|
-
query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
|
75
|
-
|
76
|
-
# header parameters
|
77
|
-
header_params = opts[:header_params] || {}
|
78
|
-
# HTTP header 'Accept' (if needed)
|
79
|
-
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html'])
|
80
|
-
|
81
|
-
# form parameters
|
82
|
-
form_params = opts[:form_params] || {}
|
83
|
-
|
84
|
-
# http body (model)
|
85
|
-
post_body = opts[:body]
|
86
|
-
|
87
|
-
# return_type
|
88
|
-
return_type = opts[:return_type] || 'String'
|
89
|
-
|
90
|
-
# auth_names
|
91
|
-
auth_names = opts[:auth_names] || ['api_key']
|
92
78
|
|
93
|
-
|
94
|
-
:
|
95
|
-
:query_params => query_params,
|
96
|
-
:form_params => form_params,
|
97
|
-
:body => post_body,
|
98
|
-
:auth_names => auth_names,
|
99
|
-
:return_type => return_type
|
100
|
-
)
|
101
|
-
|
102
|
-
data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
|
103
|
-
if @api_client.config.debugging
|
104
|
-
@api_client.config.logger.debug "API called: HTMLApi#get_html\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
105
|
-
end
|
106
|
-
return data, status_code, headers
|
107
|
-
end
|
108
|
-
|
109
|
-
# Page HTML by URL with POST request to the target page
|
110
|
-
# Returns just HTML on success, JSON on error. Request body will be passed to the target page.
|
111
|
-
# @param url [String] URL of the target page
|
112
|
-
# @param [Hash] opts the optional parameters
|
113
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
114
|
-
# @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) (default to 5000)
|
115
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests (default to true)
|
116
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) (default to 'datacenter')
|
117
|
-
# @option opts [Hash<String, Object>] :request_body Request body to pass to the target page
|
118
|
-
# @return [String]
|
119
|
-
def post_html(url, opts = {})
|
120
|
-
data, _status_code, _headers = post_html_with_http_info(url, opts)
|
121
|
-
data
|
122
|
-
end
|
123
|
-
|
124
|
-
# Page HTML by URL with POST request to the target page
|
125
|
-
# Returns just HTML on success, JSON on error. Request body will be passed to the target page.
|
126
|
-
# @param url [String] URL of the target page
|
127
|
-
# @param [Hash] opts the optional parameters
|
128
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
129
|
-
# @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
130
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
131
|
-
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
132
|
-
# @option opts [Hash<String, Object>] :request_body Request body to pass to the target page
|
133
|
-
# @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
|
134
|
-
def post_html_with_http_info(url, opts = {})
|
135
|
-
if @api_client.config.debugging
|
136
|
-
@api_client.config.logger.debug 'Calling API: HTMLApi.post_html ...'
|
137
|
-
end
|
138
|
-
# verify the required parameter 'url' is set
|
139
|
-
if @api_client.config.client_side_validation && url.nil?
|
140
|
-
fail ArgumentError, "Missing the required parameter 'url' when calling HTMLApi.post_html"
|
141
|
-
end
|
142
|
-
if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
|
143
|
-
fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling HTMLApi.post_html, must be smaller than or equal to 30000.'
|
144
|
-
end
|
145
|
-
|
146
|
-
if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1
|
147
|
-
fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling HTMLApi.post_html, must be greater than or equal to 1.'
|
79
|
+
if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1
|
80
|
+
fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling HTMLApi.get_html, must be greater than or equal to 1.'
|
148
81
|
end
|
149
82
|
|
150
83
|
allowable_values = ["datacenter", "residential"]
|
151
84
|
if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
|
152
85
|
fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
|
153
86
|
end
|
87
|
+
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
|
88
|
+
if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
|
89
|
+
fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
|
90
|
+
end
|
91
|
+
allowable_values = ["desktop", "mobile", "tablet"]
|
92
|
+
if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
|
93
|
+
fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
|
94
|
+
end
|
154
95
|
# resource path
|
155
96
|
local_var_path = '/html'
|
156
97
|
|
@@ -160,28 +101,34 @@ module WebScrapingAI
|
|
160
101
|
query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
|
161
102
|
query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
|
162
103
|
query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
|
104
|
+
query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
|
163
105
|
query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
|
106
|
+
query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
|
107
|
+
query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
|
108
|
+
query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
|
109
|
+
query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
|
110
|
+
query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
|
111
|
+
query_params[:'return_script_result'] = opts[:'return_script_result'] if !opts[:'return_script_result'].nil?
|
164
112
|
|
165
113
|
# header parameters
|
166
114
|
header_params = opts[:header_params] || {}
|
167
115
|
# HTTP header 'Accept' (if needed)
|
168
116
|
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html'])
|
169
|
-
# HTTP header 'Content-Type'
|
170
|
-
header_params['Content-Type'] = @api_client.select_header_content_type(['application/json', 'application/x-www-form-urlencoded', 'application/xml', 'text/plain'])
|
171
117
|
|
172
118
|
# form parameters
|
173
119
|
form_params = opts[:form_params] || {}
|
174
120
|
|
175
121
|
# http body (model)
|
176
|
-
post_body = opts[:
|
122
|
+
post_body = opts[:debug_body]
|
177
123
|
|
178
124
|
# return_type
|
179
|
-
return_type = opts[:
|
125
|
+
return_type = opts[:debug_return_type] || 'String'
|
180
126
|
|
181
127
|
# auth_names
|
182
|
-
auth_names = opts[:
|
128
|
+
auth_names = opts[:debug_auth_names] || ['api_key']
|
183
129
|
|
184
130
|
new_options = opts.merge(
|
131
|
+
:operation => :"HTMLApi.get_html",
|
185
132
|
:header_params => header_params,
|
186
133
|
:query_params => query_params,
|
187
134
|
:form_params => form_params,
|
@@ -190,9 +137,9 @@ module WebScrapingAI
|
|
190
137
|
:return_type => return_type
|
191
138
|
)
|
192
139
|
|
193
|
-
data, status_code, headers = @api_client.call_api(:
|
140
|
+
data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
|
194
141
|
if @api_client.config.debugging
|
195
|
-
@api_client.config.logger.debug "API called: HTMLApi#
|
142
|
+
@api_client.config.logger.debug "API called: HTMLApi#get_html\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
196
143
|
end
|
197
144
|
return data, status_code, headers
|
198
145
|
end
|