webscraping_ai 2.0.2 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,12 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #A client for https://webscraping.ai API. It provides a web scaping automation API with Chrome JS rendering, rotating proxies and builtin HTML parsing.
4
+ #WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
5
5
 
6
- The version of the OpenAPI document: 2.0.2
6
+ The version of the OpenAPI document: 3.1.2
7
7
  Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 4.3.1
9
+ OpenAPI Generator version: 7.2.0
10
10
 
11
11
  =end
12
12
 
@@ -20,14 +20,20 @@ module WebScrapingAI
20
20
  @api_client = api_client
21
21
  end
22
22
  # HTML of a selected page area by URL and CSS selector
23
- # Returns just HTML on success, JSON on error
24
- # @param url [String] URL of the target page
23
+ # Returns HTML of a selected page area by URL and CSS selector. Useful if you don't want to do the HTML parsing on your side.
24
+ # @param url [String] URL of the target page.
25
25
  # @param [Hash] opts the optional parameters
26
26
  # @option opts [String] :selector CSS selector (null by default, returns whole page HTML)
27
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;})
28
- # @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) (default to 5000)
29
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests (default to true)
30
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) (default to 'datacenter')
27
+ # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
28
+ # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
29
+ # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
30
+ # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
31
+ # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
32
+ # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
33
+ # @option opts [String] :device Type of device emulation. (default to 'desktop')
34
+ # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
35
+ # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
36
+ # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
31
37
  # @return [String]
32
38
  def get_selected(url, opts = {})
33
39
  data, _status_code, _headers = get_selected_with_http_info(url, opts)
@@ -35,14 +41,20 @@ module WebScrapingAI
35
41
  end
36
42
 
37
43
  # HTML of a selected page area by URL and CSS selector
38
- # Returns just HTML on success, JSON on error
39
- # @param url [String] URL of the target page
44
+ # Returns HTML of a selected page area by URL and CSS selector. Useful if you don&#39;t want to do the HTML parsing on your side.
45
+ # @param url [String] URL of the target page.
40
46
  # @param [Hash] opts the optional parameters
41
47
  # @option opts [String] :selector CSS selector (null by default, returns whole page HTML)
42
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;})
43
- # @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
44
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
45
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
48
+ # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
49
+ # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
50
+ # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
51
+ # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
52
+ # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
53
+ # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
54
+ # @option opts [String] :device Type of device emulation. (default to 'desktop')
55
+ # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
56
+ # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
57
+ # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
46
58
  # @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
47
59
  def get_selected_with_http_info(url, opts = {})
48
60
  if @api_client.config.debugging
@@ -60,10 +72,26 @@ module WebScrapingAI
60
72
  fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.get_selected, must be greater than or equal to 1.'
61
73
  end
62
74
 
75
+ if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000
76
+ fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling SelectedHTMLApi.get_selected, must be smaller than or equal to 20000.'
77
+ end
78
+
79
+ if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1
80
+ fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling SelectedHTMLApi.get_selected, must be greater than or equal to 1.'
81
+ end
82
+
63
83
  allowable_values = ["datacenter", "residential"]
64
84
  if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
65
85
  fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
66
86
  end
87
+ allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
88
+ if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
89
+ fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
90
+ end
91
+ allowable_values = ["desktop", "mobile", "tablet"]
92
+ if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
93
+ fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
94
+ end
67
95
  # resource path
68
96
  local_var_path = '/selected'
69
97
 
@@ -74,7 +102,13 @@ module WebScrapingAI
74
102
  query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
75
103
  query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
76
104
  query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
105
+ query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
77
106
  query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
107
+ query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
108
+ query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
109
+ query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
110
+ query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
111
+ query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
78
112
 
79
113
  # header parameters
80
114
  header_params = opts[:header_params] || {}
@@ -85,15 +119,16 @@ module WebScrapingAI
85
119
  form_params = opts[:form_params] || {}
86
120
 
87
121
  # http body (model)
88
- post_body = opts[:body]
122
+ post_body = opts[:debug_body]
89
123
 
90
124
  # return_type
91
- return_type = opts[:return_type] || 'String'
125
+ return_type = opts[:debug_return_type] || 'String'
92
126
 
93
127
  # auth_names
94
- auth_names = opts[:auth_names] || ['api_key']
128
+ auth_names = opts[:debug_auth_names] || ['api_key']
95
129
 
96
130
  new_options = opts.merge(
131
+ :operation => :"SelectedHTMLApi.get_selected",
97
132
  :header_params => header_params,
98
133
  :query_params => query_params,
99
134
  :form_params => form_params,
@@ -110,14 +145,20 @@ module WebScrapingAI
110
145
  end
111
146
 
112
147
  # HTML of multiple page areas by URL and CSS selectors
113
- # Always returns JSON
114
- # @param url [String] URL of the target page
148
+ # Returns HTML of multiple page areas by URL and CSS selectors. Useful if you don't want to do the HTML parsing on your side.
149
+ # @param url [String] URL of the target page.
115
150
  # @param [Hash] opts the optional parameters
116
151
  # @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML)
117
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;})
118
- # @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) (default to 5000)
119
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests (default to true)
120
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) (default to 'datacenter')
152
+ # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
153
+ # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
154
+ # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
155
+ # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
156
+ # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
157
+ # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
158
+ # @option opts [String] :device Type of device emulation. (default to 'desktop')
159
+ # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
160
+ # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
161
+ # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
121
162
  # @return [Array<String>]
122
163
  def get_selected_multiple(url, opts = {})
123
164
  data, _status_code, _headers = get_selected_multiple_with_http_info(url, opts)
@@ -125,14 +166,20 @@ module WebScrapingAI
125
166
  end
126
167
 
127
168
  # HTML of multiple page areas by URL and CSS selectors
128
- # Always returns JSON
129
- # @param url [String] URL of the target page
169
+ # Returns HTML of multiple page areas by URL and CSS selectors. Useful if you don&#39;t want to do the HTML parsing on your side.
170
+ # @param url [String] URL of the target page.
130
171
  # @param [Hash] opts the optional parameters
131
172
  # @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML)
132
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;})
133
- # @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
134
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
135
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
173
+ # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
174
+ # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
175
+ # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
176
+ # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
177
+ # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
178
+ # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
179
+ # @option opts [String] :device Type of device emulation. (default to 'desktop')
180
+ # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
181
+ # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
182
+ # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
136
183
  # @return [Array<(Array<String>, Integer, Hash)>] Array<String> data, response status code and response headers
137
184
  def get_selected_multiple_with_http_info(url, opts = {})
138
185
  if @api_client.config.debugging
@@ -150,195 +197,25 @@ module WebScrapingAI
150
197
  fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.get_selected_multiple, must be greater than or equal to 1.'
151
198
  end
152
199
 
153
- allowable_values = ["datacenter", "residential"]
154
- if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
155
- fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
156
- end
157
- # resource path
158
- local_var_path = '/selected-multiple'
159
-
160
- # query parameters
161
- query_params = opts[:query_params] || {}
162
- query_params[:'url'] = url
163
- query_params[:'selectors'] = @api_client.build_collection_param(opts[:'selectors'], :multi) if !opts[:'selectors'].nil?
164
- query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
165
- query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
166
- query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
167
- query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
168
-
169
- # header parameters
170
- header_params = opts[:header_params] || {}
171
- # HTTP header 'Accept' (if needed)
172
- header_params['Accept'] = @api_client.select_header_accept(['application/json'])
173
-
174
- # form parameters
175
- form_params = opts[:form_params] || {}
176
-
177
- # http body (model)
178
- post_body = opts[:body]
179
-
180
- # return_type
181
- return_type = opts[:return_type] || 'Array<String>'
182
-
183
- # auth_names
184
- auth_names = opts[:auth_names] || ['api_key']
185
-
186
- new_options = opts.merge(
187
- :header_params => header_params,
188
- :query_params => query_params,
189
- :form_params => form_params,
190
- :body => post_body,
191
- :auth_names => auth_names,
192
- :return_type => return_type
193
- )
194
-
195
- data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
196
- if @api_client.config.debugging
197
- @api_client.config.logger.debug "API called: SelectedHTMLApi#get_selected_multiple\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
198
- end
199
- return data, status_code, headers
200
- end
201
-
202
- # HTML of a selected page areas by URL and CSS selector, with POST request to the target page
203
- # Returns just HTML on success, JSON on error. Request body will be passed to the target page.
204
- # @param url [String] URL of the target page
205
- # @param [Hash] opts the optional parameters
206
- # @option opts [String] :selector CSS selector (null by default, returns whole page HTML)
207
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;})
208
- # @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) (default to 5000)
209
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests (default to true)
210
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) (default to 'datacenter')
211
- # @option opts [Hash<String, Object>] :request_body Request body to pass to the target page
212
- # @return [String]
213
- def post_selected(url, opts = {})
214
- data, _status_code, _headers = post_selected_with_http_info(url, opts)
215
- data
216
- end
217
-
218
- # HTML of a selected page areas by URL and CSS selector, with POST request to the target page
219
- # Returns just HTML on success, JSON on error. Request body will be passed to the target page.
220
- # @param url [String] URL of the target page
221
- # @param [Hash] opts the optional parameters
222
- # @option opts [String] :selector CSS selector (null by default, returns whole page HTML)
223
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;})
224
- # @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
225
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
226
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
227
- # @option opts [Hash<String, Object>] :request_body Request body to pass to the target page
228
- # @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
229
- def post_selected_with_http_info(url, opts = {})
230
- if @api_client.config.debugging
231
- @api_client.config.logger.debug 'Calling API: SelectedHTMLApi.post_selected ...'
232
- end
233
- # verify the required parameter 'url' is set
234
- if @api_client.config.client_side_validation && url.nil?
235
- fail ArgumentError, "Missing the required parameter 'url' when calling SelectedHTMLApi.post_selected"
236
- end
237
- if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
238
- fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.post_selected, must be smaller than or equal to 30000.'
200
+ if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000
201
+ fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling SelectedHTMLApi.get_selected_multiple, must be smaller than or equal to 20000.'
239
202
  end
240
203
 
241
- if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1
242
- fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.post_selected, must be greater than or equal to 1.'
204
+ if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1
205
+ fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling SelectedHTMLApi.get_selected_multiple, must be greater than or equal to 1.'
243
206
  end
244
207
 
245
208
  allowable_values = ["datacenter", "residential"]
246
209
  if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
247
210
  fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
248
211
  end
249
- # resource path
250
- local_var_path = '/selected'
251
-
252
- # query parameters
253
- query_params = opts[:query_params] || {}
254
- query_params[:'url'] = url
255
- query_params[:'selector'] = opts[:'selector'] if !opts[:'selector'].nil?
256
- query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
257
- query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
258
- query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
259
- query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
260
-
261
- # header parameters
262
- header_params = opts[:header_params] || {}
263
- # HTTP header 'Accept' (if needed)
264
- header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html'])
265
- # HTTP header 'Content-Type'
266
- header_params['Content-Type'] = @api_client.select_header_content_type(['application/json', 'application/x-www-form-urlencoded', 'application/xml', 'text/plain'])
267
-
268
- # form parameters
269
- form_params = opts[:form_params] || {}
270
-
271
- # http body (model)
272
- post_body = opts[:body] || @api_client.object_to_http_body(opts[:'request_body'])
273
-
274
- # return_type
275
- return_type = opts[:return_type] || 'String'
276
-
277
- # auth_names
278
- auth_names = opts[:auth_names] || ['api_key']
279
-
280
- new_options = opts.merge(
281
- :header_params => header_params,
282
- :query_params => query_params,
283
- :form_params => form_params,
284
- :body => post_body,
285
- :auth_names => auth_names,
286
- :return_type => return_type
287
- )
288
-
289
- data, status_code, headers = @api_client.call_api(:POST, local_var_path, new_options)
290
- if @api_client.config.debugging
291
- @api_client.config.logger.debug "API called: SelectedHTMLApi#post_selected\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
292
- end
293
- return data, status_code, headers
294
- end
295
-
296
- # HTML of multiple page areas by URL and CSS selectors, with POST request to the target page
297
- # Always returns JSON. Request body will be passed to the target page.
298
- # @param url [String] URL of the target page
299
- # @param [Hash] opts the optional parameters
300
- # @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML)
301
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;})
302
- # @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) (default to 5000)
303
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests (default to true)
304
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) (default to 'datacenter')
305
- # @option opts [Hash<String, Object>] :request_body Request body to pass to the target page
306
- # @return [Array<String>]
307
- def post_selected_multiple(url, opts = {})
308
- data, _status_code, _headers = post_selected_multiple_with_http_info(url, opts)
309
- data
310
- end
311
-
312
- # HTML of multiple page areas by URL and CSS selectors, with POST request to the target page
313
- # Always returns JSON. Request body will be passed to the target page.
314
- # @param url [String] URL of the target page
315
- # @param [Hash] opts the optional parameters
316
- # @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML)
317
- # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;})
318
- # @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
319
- # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
320
- # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
321
- # @option opts [Hash<String, Object>] :request_body Request body to pass to the target page
322
- # @return [Array<(Array<String>, Integer, Hash)>] Array<String> data, response status code and response headers
323
- def post_selected_multiple_with_http_info(url, opts = {})
324
- if @api_client.config.debugging
325
- @api_client.config.logger.debug 'Calling API: SelectedHTMLApi.post_selected_multiple ...'
326
- end
327
- # verify the required parameter 'url' is set
328
- if @api_client.config.client_side_validation && url.nil?
329
- fail ArgumentError, "Missing the required parameter 'url' when calling SelectedHTMLApi.post_selected_multiple"
330
- end
331
- if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
332
- fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.post_selected_multiple, must be smaller than or equal to 30000.'
333
- end
334
-
335
- if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1
336
- fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.post_selected_multiple, must be greater than or equal to 1.'
212
+ allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
213
+ if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
214
+ fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
337
215
  end
338
-
339
- allowable_values = ["datacenter", "residential"]
340
- if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
341
- fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
216
+ allowable_values = ["desktop", "mobile", "tablet"]
217
+ if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
218
+ fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
342
219
  end
343
220
  # resource path
344
221
  local_var_path = '/selected-multiple'
@@ -350,28 +227,33 @@ module WebScrapingAI
350
227
  query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
351
228
  query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
352
229
  query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
230
+ query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
353
231
  query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
232
+ query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
233
+ query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
234
+ query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
235
+ query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
236
+ query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
354
237
 
355
238
  # header parameters
356
239
  header_params = opts[:header_params] || {}
357
240
  # HTTP header 'Accept' (if needed)
358
241
  header_params['Accept'] = @api_client.select_header_accept(['application/json'])
359
- # HTTP header 'Content-Type'
360
- header_params['Content-Type'] = @api_client.select_header_content_type(['application/json', 'application/x-www-form-urlencoded', 'application/xml', 'text/plain'])
361
242
 
362
243
  # form parameters
363
244
  form_params = opts[:form_params] || {}
364
245
 
365
246
  # http body (model)
366
- post_body = opts[:body] || @api_client.object_to_http_body(opts[:'request_body'])
247
+ post_body = opts[:debug_body]
367
248
 
368
249
  # return_type
369
- return_type = opts[:return_type] || 'Array<String>'
250
+ return_type = opts[:debug_return_type] || 'Array<String>'
370
251
 
371
252
  # auth_names
372
- auth_names = opts[:auth_names] || ['api_key']
253
+ auth_names = opts[:debug_auth_names] || ['api_key']
373
254
 
374
255
  new_options = opts.merge(
256
+ :operation => :"SelectedHTMLApi.get_selected_multiple",
375
257
  :header_params => header_params,
376
258
  :query_params => query_params,
377
259
  :form_params => form_params,
@@ -380,9 +262,9 @@ module WebScrapingAI
380
262
  :return_type => return_type
381
263
  )
382
264
 
383
- data, status_code, headers = @api_client.call_api(:POST, local_var_path, new_options)
265
+ data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
384
266
  if @api_client.config.debugging
385
- @api_client.config.logger.debug "API called: SelectedHTMLApi#post_selected_multiple\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
267
+ @api_client.config.logger.debug "API called: SelectedHTMLApi#get_selected_multiple\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
386
268
  end
387
269
  return data, status_code, headers
388
270
  end
@@ -0,0 +1,154 @@
1
+ =begin
2
+ #WebScraping.AI
3
+
4
+ #WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
5
+
6
+ The version of the OpenAPI document: 3.1.2
7
+ Contact: support@webscraping.ai
8
+ Generated by: https://openapi-generator.tech
9
+ OpenAPI Generator version: 7.2.0
10
+
11
+ =end
12
+
13
+ require 'cgi'
14
+
15
+ module WebScrapingAI
16
+ class TextApi
17
+ attr_accessor :api_client
18
+
19
+ def initialize(api_client = ApiClient.default)
20
+ @api_client = api_client
21
+ end
22
+ # Page text by URL
23
+ # Returns the visible text content of a webpage specified by the URL. Can be used to feed data to GPT or other LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
24
+ # @param url [String] URL of the target page.
25
+ # @param [Hash] opts the optional parameters
26
+ # @option opts [String] :text_format Format of the text response (plain by default). \&quot;plain\&quot; will return only the page body text. \&quot;json\&quot; and \&quot;xml\&quot; will return a json/xml with \&quot;title\&quot;, \&quot;description\&quot; and \&quot;content\&quot; keys. (default to 'plain')
27
+ # @option opts [Boolean] :return_links [Works only with text_format&#x3D;json] Return links from the page body text (false by default). Useful for building web crawlers. (default to false)
28
+ # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
29
+ # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
30
+ # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
31
+ # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
32
+ # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
33
+ # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
34
+ # @option opts [String] :device Type of device emulation. (default to 'desktop')
35
+ # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
36
+ # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
37
+ # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
38
+ # @return [String]
39
+ def get_text(url, opts = {})
40
+ data, _status_code, _headers = get_text_with_http_info(url, opts)
41
+ data
42
+ end
43
+
44
+ # Page text by URL
45
+ # Returns the visible text content of a webpage specified by the URL. Can be used to feed data to GPT or other LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
46
+ # @param url [String] URL of the target page.
47
+ # @param [Hash] opts the optional parameters
48
+ # @option opts [String] :text_format Format of the text response (plain by default). \&quot;plain\&quot; will return only the page body text. \&quot;json\&quot; and \&quot;xml\&quot; will return a json/xml with \&quot;title\&quot;, \&quot;description\&quot; and \&quot;content\&quot; keys. (default to 'plain')
49
+ # @option opts [Boolean] :return_links [Works only with text_format&#x3D;json] Return links from the page body text (false by default). Useful for building web crawlers. (default to false)
50
+ # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
51
+ # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
52
+ # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
53
+ # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
54
+ # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
55
+ # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
56
+ # @option opts [String] :device Type of device emulation. (default to 'desktop')
57
+ # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
58
+ # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
59
+ # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
60
+ # @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
61
+ def get_text_with_http_info(url, opts = {})
62
+ if @api_client.config.debugging
63
+ @api_client.config.logger.debug 'Calling API: TextApi.get_text ...'
64
+ end
65
+ # verify the required parameter 'url' is set
66
+ if @api_client.config.client_side_validation && url.nil?
67
+ fail ArgumentError, "Missing the required parameter 'url' when calling TextApi.get_text"
68
+ end
69
+ allowable_values = ["plain", "xml", "json"]
70
+ if @api_client.config.client_side_validation && opts[:'text_format'] && !allowable_values.include?(opts[:'text_format'])
71
+ fail ArgumentError, "invalid value for \"text_format\", must be one of #{allowable_values}"
72
+ end
73
+ if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
74
+ fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling TextApi.get_text, must be smaller than or equal to 30000.'
75
+ end
76
+
77
+ if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1
78
+ fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling TextApi.get_text, must be greater than or equal to 1.'
79
+ end
80
+
81
+ if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000
82
+ fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling TextApi.get_text, must be smaller than or equal to 20000.'
83
+ end
84
+
85
+ if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1
86
+ fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling TextApi.get_text, must be greater than or equal to 1.'
87
+ end
88
+
89
+ allowable_values = ["datacenter", "residential"]
90
+ if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
91
+ fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
92
+ end
93
+ allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
94
+ if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
95
+ fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
96
+ end
97
+ allowable_values = ["desktop", "mobile", "tablet"]
98
+ if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
99
+ fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
100
+ end
101
+ # resource path
102
+ local_var_path = '/text'
103
+
104
+ # query parameters
105
+ query_params = opts[:query_params] || {}
106
+ query_params[:'url'] = url
107
+ query_params[:'text_format'] = opts[:'text_format'] if !opts[:'text_format'].nil?
108
+ query_params[:'return_links'] = opts[:'return_links'] if !opts[:'return_links'].nil?
109
+ query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
110
+ query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
111
+ query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
112
+ query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
113
+ query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
114
+ query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
115
+ query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
116
+ query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
117
+ query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
118
+ query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
119
+
120
+ # header parameters
121
+ header_params = opts[:header_params] || {}
122
+ # HTTP header 'Accept' (if needed)
123
+ header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html', 'text/xml'])
124
+
125
+ # form parameters
126
+ form_params = opts[:form_params] || {}
127
+
128
+ # http body (model)
129
+ post_body = opts[:debug_body]
130
+
131
+ # return_type
132
+ return_type = opts[:debug_return_type] || 'String'
133
+
134
+ # auth_names
135
+ auth_names = opts[:debug_auth_names] || ['api_key']
136
+
137
+ new_options = opts.merge(
138
+ :operation => :"TextApi.get_text",
139
+ :header_params => header_params,
140
+ :query_params => query_params,
141
+ :form_params => form_params,
142
+ :body => post_body,
143
+ :auth_names => auth_names,
144
+ :return_type => return_type
145
+ )
146
+
147
+ data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
148
+ if @api_client.config.debugging
149
+ @api_client.config.logger.debug "API called: TextApi#get_text\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
150
+ end
151
+ return data, status_code, headers
152
+ end
153
+ end
154
+ end