webscraping_ai 3.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,295 +0,0 @@
1
- =begin
2
- #WebScraping.AI
3
-
4
- #WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
5
-
6
- The version of the OpenAPI document: 3.2.0
7
- Contact: support@webscraping.ai
8
- Generated by: https://openapi-generator.tech
9
- Generator version: 7.11.0
10
-
11
- =end
12
-
13
- require 'cgi'
14
-
15
- module WebScrapingAI
16
- class AIApi
17
- attr_accessor :api_client
18
-
19
- def initialize(api_client = ApiClient.default)
20
- @api_client = api_client
21
- end
22
- # Extract structured data fields from a web page
23
- # Returns structured data fields extracted from the webpage using an LLM model. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
24
- # @param url [String] URL of the target page.
25
- # @param fields [Hash<String, String>] Object describing fields to extract from the page and their descriptions
26
- # @param [Hash] opts the optional parameters
27
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
28
- # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
29
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
30
- # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
31
- # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
32
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
33
- # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
34
- # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
35
- # @option opts [String] :device Type of device emulation. (default to 'desktop')
36
- # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
37
- # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
38
- # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
39
- # @return [Hash<String, String>]
40
- def get_fields(url, fields, opts = {})
41
- data, _status_code, _headers = get_fields_with_http_info(url, fields, opts)
42
- data
43
- end
44
-
45
- # Extract structured data fields from a web page
46
- # Returns structured data fields extracted from the webpage using an LLM model. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
47
- # @param url [String] URL of the target page.
48
- # @param fields [Hash<String, String>] Object describing fields to extract from the page and their descriptions
49
- # @param [Hash] opts the optional parameters
50
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
51
- # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
52
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
53
- # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
54
- # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
55
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
56
- # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
57
- # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
58
- # @option opts [String] :device Type of device emulation. (default to 'desktop')
59
- # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
60
- # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
61
- # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
62
- # @return [Array<(Hash<String, String>, Integer, Hash)>] Hash<String, String> data, response status code and response headers
63
- def get_fields_with_http_info(url, fields, opts = {})
64
- if @api_client.config.debugging
65
- @api_client.config.logger.debug 'Calling API: AIApi.get_fields ...'
66
- end
67
- # verify the required parameter 'url' is set
68
- if @api_client.config.client_side_validation && url.nil?
69
- fail ArgumentError, "Missing the required parameter 'url' when calling AIApi.get_fields"
70
- end
71
- # verify the required parameter 'fields' is set
72
- if @api_client.config.client_side_validation && fields.nil?
73
- fail ArgumentError, "Missing the required parameter 'fields' when calling AIApi.get_fields"
74
- end
75
- if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
76
- fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_fields, must be smaller than or equal to 30000.'
77
- end
78
-
79
- if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1
80
- fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_fields, must be greater than or equal to 1.'
81
- end
82
-
83
- if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000
84
- fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling AIApi.get_fields, must be smaller than or equal to 20000.'
85
- end
86
-
87
- if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1
88
- fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling AIApi.get_fields, must be greater than or equal to 1.'
89
- end
90
-
91
- allowable_values = ["datacenter", "residential"]
92
- if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
93
- fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
94
- end
95
- allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
96
- if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
97
- fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
98
- end
99
- allowable_values = ["desktop", "mobile", "tablet"]
100
- if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
101
- fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
102
- end
103
- # resource path
104
- local_var_path = '/ai/fields'
105
-
106
- # query parameters
107
- query_params = opts[:query_params] || {}
108
- query_params[:'url'] = url
109
- query_params[:'fields'] = fields
110
- query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
111
- query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
112
- query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
113
- query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
114
- query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
115
- query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
116
- query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
117
- query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
118
- query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
119
- query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
120
- query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
121
- query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
122
-
123
- # header parameters
124
- header_params = opts[:header_params] || {}
125
- # HTTP header 'Accept' (if needed)
126
- header_params['Accept'] = @api_client.select_header_accept(['application/json']) unless header_params['Accept']
127
-
128
- # form parameters
129
- form_params = opts[:form_params] || {}
130
-
131
- # http body (model)
132
- post_body = opts[:debug_body]
133
-
134
- # return_type
135
- return_type = opts[:debug_return_type] || 'Hash<String, String>'
136
-
137
- # auth_names
138
- auth_names = opts[:debug_auth_names] || ['api_key']
139
-
140
- new_options = opts.merge(
141
- :operation => :"AIApi.get_fields",
142
- :header_params => header_params,
143
- :query_params => query_params,
144
- :form_params => form_params,
145
- :body => post_body,
146
- :auth_names => auth_names,
147
- :return_type => return_type
148
- )
149
-
150
- data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
151
- if @api_client.config.debugging
152
- @api_client.config.logger.debug "API called: AIApi#get_fields\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
153
- end
154
- return data, status_code, headers
155
- end
156
-
157
- # Get an answer to a question about a given web page
158
- # Returns the answer in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing, then the answer is extracted using an LLM model.
159
- # @param url [String] URL of the target page.
160
- # @param [Hash] opts the optional parameters
161
- # @option opts [String] :question Question or instructions to ask the LLM model about the target page.
162
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
163
- # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
164
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
165
- # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
166
- # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
167
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
168
- # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
169
- # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
170
- # @option opts [String] :device Type of device emulation. (default to 'desktop')
171
- # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
172
- # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
173
- # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
174
- # @option opts [String] :format Format of the response (text by default). \&quot;json\&quot; will return a JSON object with the response, \&quot;text\&quot; will return a plain text/HTML response. (default to 'json')
175
- # @return [String]
176
- def get_question(url, opts = {})
177
- data, _status_code, _headers = get_question_with_http_info(url, opts)
178
- data
179
- end
180
-
181
- # Get an answer to a question about a given web page
182
- # Returns the answer in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing, then the answer is extracted using an LLM model.
183
- # @param url [String] URL of the target page.
184
- # @param [Hash] opts the optional parameters
185
- # @option opts [String] :question Question or instructions to ask the LLM model about the target page.
186
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
187
- # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
188
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
189
- # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
190
- # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
191
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
192
- # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
193
- # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
194
- # @option opts [String] :device Type of device emulation. (default to 'desktop')
195
- # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
196
- # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
197
- # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
198
- # @option opts [String] :format Format of the response (text by default). \&quot;json\&quot; will return a JSON object with the response, \&quot;text\&quot; will return a plain text/HTML response. (default to 'json')
199
- # @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
200
- def get_question_with_http_info(url, opts = {})
201
- if @api_client.config.debugging
202
- @api_client.config.logger.debug 'Calling API: AIApi.get_question ...'
203
- end
204
- # verify the required parameter 'url' is set
205
- if @api_client.config.client_side_validation && url.nil?
206
- fail ArgumentError, "Missing the required parameter 'url' when calling AIApi.get_question"
207
- end
208
- if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
209
- fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_question, must be smaller than or equal to 30000.'
210
- end
211
-
212
- if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1
213
- fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_question, must be greater than or equal to 1.'
214
- end
215
-
216
- if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000
217
- fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling AIApi.get_question, must be smaller than or equal to 20000.'
218
- end
219
-
220
- if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1
221
- fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling AIApi.get_question, must be greater than or equal to 1.'
222
- end
223
-
224
- allowable_values = ["datacenter", "residential"]
225
- if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
226
- fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
227
- end
228
- allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
229
- if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
230
- fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
231
- end
232
- allowable_values = ["desktop", "mobile", "tablet"]
233
- if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
234
- fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
235
- end
236
- allowable_values = ["json", "text"]
237
- if @api_client.config.client_side_validation && opts[:'format'] && !allowable_values.include?(opts[:'format'])
238
- fail ArgumentError, "invalid value for \"format\", must be one of #{allowable_values}"
239
- end
240
- # resource path
241
- local_var_path = '/ai/question'
242
-
243
- # query parameters
244
- query_params = opts[:query_params] || {}
245
- query_params[:'url'] = url
246
- query_params[:'question'] = opts[:'question'] if !opts[:'question'].nil?
247
- query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
248
- query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
249
- query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
250
- query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
251
- query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
252
- query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
253
- query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
254
- query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
255
- query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
256
- query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
257
- query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
258
- query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
259
- query_params[:'format'] = opts[:'format'] if !opts[:'format'].nil?
260
-
261
- # header parameters
262
- header_params = opts[:header_params] || {}
263
- # HTTP header 'Accept' (if needed)
264
- header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) unless header_params['Accept']
265
-
266
- # form parameters
267
- form_params = opts[:form_params] || {}
268
-
269
- # http body (model)
270
- post_body = opts[:debug_body]
271
-
272
- # return_type
273
- return_type = opts[:debug_return_type] || 'String'
274
-
275
- # auth_names
276
- auth_names = opts[:debug_auth_names] || ['api_key']
277
-
278
- new_options = opts.merge(
279
- :operation => :"AIApi.get_question",
280
- :header_params => header_params,
281
- :query_params => query_params,
282
- :form_params => form_params,
283
- :body => post_body,
284
- :auth_names => auth_names,
285
- :return_type => return_type
286
- )
287
-
288
- data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
289
- if @api_client.config.debugging
290
- @api_client.config.logger.debug "API called: AIApi#get_question\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
291
- end
292
- return data, status_code, headers
293
- end
294
- end
295
- end
@@ -1,160 +0,0 @@
1
- =begin
2
- #WebScraping.AI
3
-
4
- #WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
5
-
6
- The version of the OpenAPI document: 3.2.0
7
- Contact: support@webscraping.ai
8
- Generated by: https://openapi-generator.tech
9
- Generator version: 7.11.0
10
-
11
- =end
12
-
13
- require 'cgi'
14
-
15
- module WebScrapingAI
16
- class HTMLApi
17
- attr_accessor :api_client
18
-
19
- def initialize(api_client = ApiClient.default)
20
- @api_client = api_client
21
- end
22
- # Page HTML by URL
23
- # Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
24
- # @param url [String] URL of the target page.
25
- # @param [Hash] opts the optional parameters
26
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
27
- # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
28
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
29
- # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
30
- # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
31
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
32
- # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
33
- # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
34
- # @option opts [String] :device Type of device emulation. (default to 'desktop')
35
- # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
36
- # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
37
- # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
38
- # @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned). (default to false)
39
- # @option opts [String] :format Format of the response (text by default). \&quot;json\&quot; will return a JSON object with the response, \&quot;text\&quot; will return a plain text/HTML response. (default to 'json')
40
- # @return [String]
41
- def get_html(url, opts = {})
42
- data, _status_code, _headers = get_html_with_http_info(url, opts)
43
- data
44
- end
45
-
46
- # Page HTML by URL
47
- # Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
48
- # @param url [String] URL of the target page.
49
- # @param [Hash] opts the optional parameters
50
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
51
- # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
52
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
53
- # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
54
- # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
55
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
56
- # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
57
- # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
58
- # @option opts [String] :device Type of device emulation. (default to 'desktop')
59
- # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
60
- # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
61
- # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
62
- # @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned). (default to false)
63
- # @option opts [String] :format Format of the response (text by default). \&quot;json\&quot; will return a JSON object with the response, \&quot;text\&quot; will return a plain text/HTML response. (default to 'json')
64
- # @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
65
- def get_html_with_http_info(url, opts = {})
66
- if @api_client.config.debugging
67
- @api_client.config.logger.debug 'Calling API: HTMLApi.get_html ...'
68
- end
69
- # verify the required parameter 'url' is set
70
- if @api_client.config.client_side_validation && url.nil?
71
- fail ArgumentError, "Missing the required parameter 'url' when calling HTMLApi.get_html"
72
- end
73
- if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
74
- fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling HTMLApi.get_html, must be smaller than or equal to 30000.'
75
- end
76
-
77
- if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1
78
- fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling HTMLApi.get_html, must be greater than or equal to 1.'
79
- end
80
-
81
- if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000
82
- fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling HTMLApi.get_html, must be smaller than or equal to 20000.'
83
- end
84
-
85
- if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1
86
- fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling HTMLApi.get_html, must be greater than or equal to 1.'
87
- end
88
-
89
- allowable_values = ["datacenter", "residential"]
90
- if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
91
- fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
92
- end
93
- allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
94
- if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
95
- fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
96
- end
97
- allowable_values = ["desktop", "mobile", "tablet"]
98
- if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
99
- fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
100
- end
101
- allowable_values = ["json", "text"]
102
- if @api_client.config.client_side_validation && opts[:'format'] && !allowable_values.include?(opts[:'format'])
103
- fail ArgumentError, "invalid value for \"format\", must be one of #{allowable_values}"
104
- end
105
- # resource path
106
- local_var_path = '/html'
107
-
108
- # query parameters
109
- query_params = opts[:query_params] || {}
110
- query_params[:'url'] = url
111
- query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
112
- query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
113
- query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
114
- query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
115
- query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
116
- query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
117
- query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
118
- query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
119
- query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
120
- query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
121
- query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
122
- query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
123
- query_params[:'return_script_result'] = opts[:'return_script_result'] if !opts[:'return_script_result'].nil?
124
- query_params[:'format'] = opts[:'format'] if !opts[:'format'].nil?
125
-
126
- # header parameters
127
- header_params = opts[:header_params] || {}
128
- # HTTP header 'Accept' (if needed)
129
- header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) unless header_params['Accept']
130
-
131
- # form parameters
132
- form_params = opts[:form_params] || {}
133
-
134
- # http body (model)
135
- post_body = opts[:debug_body]
136
-
137
- # return_type
138
- return_type = opts[:debug_return_type] || 'String'
139
-
140
- # auth_names
141
- auth_names = opts[:debug_auth_names] || ['api_key']
142
-
143
- new_options = opts.merge(
144
- :operation => :"HTMLApi.get_html",
145
- :header_params => header_params,
146
- :query_params => query_params,
147
- :form_params => form_params,
148
- :body => post_body,
149
- :auth_names => auth_names,
150
- :return_type => return_type
151
- )
152
-
153
- data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
154
- if @api_client.config.debugging
155
- @api_client.config.logger.debug "API called: HTMLApi#get_html\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
156
- end
157
- return data, status_code, headers
158
- end
159
- end
160
- end