webscraping_ai 3.1.3 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,12 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
4
+ #WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
5
5
 
6
- The version of the OpenAPI document: 3.1.3
6
+ The version of the OpenAPI document: 3.2.0
7
7
  Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 7.2.0
9
+ Generator version: 7.11.0
10
10
 
11
11
  =end
12
12
 
@@ -19,24 +19,159 @@ module WebScrapingAI
19
19
  def initialize(api_client = ApiClient.default)
20
20
  @api_client = api_client
21
21
  end
22
+ # Extract structured data fields from a web page
23
+ # Returns structured data fields extracted from the webpage using an LLM model. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
24
+ # @param url [String] URL of the target page.
25
+ # @param fields [Hash<String, String>] Object describing fields to extract from the page and their descriptions
26
+ # @param [Hash] opts the optional parameters
27
+ # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
28
+ # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
29
+ # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
30
+ # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
31
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
32
+ # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
33
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
34
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
35
+ # @option opts [String] :device Type of device emulation. (default to 'desktop')
36
+ # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
37
+ # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
38
+ # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
39
+ # @return [Hash<String, String>]
40
+ def get_fields(url, fields, opts = {})
41
+ data, _status_code, _headers = get_fields_with_http_info(url, fields, opts)
42
+ data
43
+ end
44
+
45
+ # Extract structured data fields from a web page
46
+ # Returns structured data fields extracted from the webpage using an LLM model. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
47
+ # @param url [String] URL of the target page.
48
+ # @param fields [Hash<String, String>] Object describing fields to extract from the page and their descriptions
49
+ # @param [Hash] opts the optional parameters
50
+ # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
51
+ # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
52
+ # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
53
+ # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
54
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
55
+ # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
56
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
57
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
58
+ # @option opts [String] :device Type of device emulation. (default to 'desktop')
59
+ # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
60
+ # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
61
+ # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
62
+ # @return [Array<(Hash<String, String>, Integer, Hash)>] Hash<String, String> data, response status code and response headers
63
+ def get_fields_with_http_info(url, fields, opts = {})
64
+ if @api_client.config.debugging
65
+ @api_client.config.logger.debug 'Calling API: AIApi.get_fields ...'
66
+ end
67
+ # verify the required parameter 'url' is set
68
+ if @api_client.config.client_side_validation && url.nil?
69
+ fail ArgumentError, "Missing the required parameter 'url' when calling AIApi.get_fields"
70
+ end
71
+ # verify the required parameter 'fields' is set
72
+ if @api_client.config.client_side_validation && fields.nil?
73
+ fail ArgumentError, "Missing the required parameter 'fields' when calling AIApi.get_fields"
74
+ end
75
+ if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
76
+ fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_fields, must be smaller than or equal to 30000.'
77
+ end
78
+
79
+ if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1
80
+ fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_fields, must be greater than or equal to 1.'
81
+ end
82
+
83
+ if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000
84
+ fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling AIApi.get_fields, must be smaller than or equal to 20000.'
85
+ end
86
+
87
+ if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1
88
+ fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling AIApi.get_fields, must be greater than or equal to 1.'
89
+ end
90
+
91
+ allowable_values = ["datacenter", "residential"]
92
+ if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
93
+ fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
94
+ end
95
+ allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
96
+ if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
97
+ fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
98
+ end
99
+ allowable_values = ["desktop", "mobile", "tablet"]
100
+ if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
101
+ fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
102
+ end
103
+ # resource path
104
+ local_var_path = '/ai/fields'
105
+
106
+ # query parameters
107
+ query_params = opts[:query_params] || {}
108
+ query_params[:'url'] = url
109
+ query_params[:'fields'] = fields
110
+ query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
111
+ query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
112
+ query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
113
+ query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
114
+ query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
115
+ query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
116
+ query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
117
+ query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
118
+ query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
119
+ query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
120
+ query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
121
+ query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
122
+
123
+ # header parameters
124
+ header_params = opts[:header_params] || {}
125
+ # HTTP header 'Accept' (if needed)
126
+ header_params['Accept'] = @api_client.select_header_accept(['application/json']) unless header_params['Accept']
127
+
128
+ # form parameters
129
+ form_params = opts[:form_params] || {}
130
+
131
+ # http body (model)
132
+ post_body = opts[:debug_body]
133
+
134
+ # return_type
135
+ return_type = opts[:debug_return_type] || 'Hash<String, String>'
136
+
137
+ # auth_names
138
+ auth_names = opts[:debug_auth_names] || ['api_key']
139
+
140
+ new_options = opts.merge(
141
+ :operation => :"AIApi.get_fields",
142
+ :header_params => header_params,
143
+ :query_params => query_params,
144
+ :form_params => form_params,
145
+ :body => post_body,
146
+ :auth_names => auth_names,
147
+ :return_type => return_type
148
+ )
149
+
150
+ data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
151
+ if @api_client.config.debugging
152
+ @api_client.config.logger.debug "API called: AIApi#get_fields\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
153
+ end
154
+ return data, status_code, headers
155
+ end
156
+
22
157
  # Get an answer to a question about a given web page
23
158
  # Returns the answer in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing, then the answer is extracted using an LLM model.
24
159
  # @param url [String] URL of the target page.
25
160
  # @param [Hash] opts the optional parameters
26
161
  # @option opts [String] :question Question or instructions to ask the LLM model about the target page.
27
- # @option opts [Integer] :context_limit Maximum number of tokens to use as context for the LLM model (4000 by default). (default to 4000)
28
- # @option opts [Integer] :response_tokens Maximum number of tokens to return in the LLM model response. The total context size (context_limit) includes the question, the target page content and the response, so this parameter reserves tokens for the response (see also on_context_limit). (default to 100)
29
- # @option opts [String] :on_context_limit What to do if the context_limit parameter is exceeded (truncate by default). The context is exceeded when the target page content is too long. (default to 'error')
30
162
  # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
31
163
  # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
32
164
  # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
33
165
  # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
166
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
34
167
  # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
35
- # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
168
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
169
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
36
170
  # @option opts [String] :device Type of device emulation. (default to 'desktop')
37
171
  # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
38
172
  # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
39
173
  # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
174
+ # @option opts [String] :format Format of the response (text by default). \&quot;json\&quot; will return a JSON object with the response, \&quot;text\&quot; will return a plain text/HTML response. (default to 'json')
40
175
  # @return [String]
41
176
  def get_question(url, opts = {})
42
177
  data, _status_code, _headers = get_question_with_http_info(url, opts)
@@ -48,19 +183,19 @@ module WebScrapingAI
48
183
  # @param url [String] URL of the target page.
49
184
  # @param [Hash] opts the optional parameters
50
185
  # @option opts [String] :question Question or instructions to ask the LLM model about the target page.
51
- # @option opts [Integer] :context_limit Maximum number of tokens to use as context for the LLM model (4000 by default). (default to 4000)
52
- # @option opts [Integer] :response_tokens Maximum number of tokens to return in the LLM model response. The total context size (context_limit) includes the question, the target page content and the response, so this parameter reserves tokens for the response (see also on_context_limit). (default to 100)
53
- # @option opts [String] :on_context_limit What to do if the context_limit parameter is exceeded (truncate by default). The context is exceeded when the target page content is too long. (default to 'error')
54
186
  # @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&amp;headers[One]&#x3D;value1&amp;headers&#x3D;[Another]&#x3D;value2) or as a JSON encoded object (...&amp;headers&#x3D;{\&quot;One\&quot;: \&quot;value1\&quot;, \&quot;Another\&quot;: \&quot;value2\&quot;}).
55
187
  # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
56
188
  # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
57
189
  # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
190
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
58
191
  # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
59
- # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
192
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
193
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
60
194
  # @option opts [String] :device Type of device emulation. (default to 'desktop')
61
195
  # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
62
196
  # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
63
197
  # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
198
+ # @option opts [String] :format Format of the response (text by default). \&quot;json\&quot; will return a JSON object with the response, \&quot;text\&quot; will return a plain text/HTML response. (default to 'json')
64
199
  # @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
65
200
  def get_question_with_http_info(url, opts = {})
66
201
  if @api_client.config.debugging
@@ -70,14 +205,6 @@ module WebScrapingAI
70
205
  if @api_client.config.client_side_validation && url.nil?
71
206
  fail ArgumentError, "Missing the required parameter 'url' when calling AIApi.get_question"
72
207
  end
73
- allowable_values = [4000, 8000, 16000]
74
- if @api_client.config.client_side_validation && opts[:'context_limit'] && !allowable_values.include?(opts[:'context_limit'])
75
- fail ArgumentError, "invalid value for \"context_limit\", must be one of #{allowable_values}"
76
- end
77
- allowable_values = ["truncate", "error"]
78
- if @api_client.config.client_side_validation && opts[:'on_context_limit'] && !allowable_values.include?(opts[:'on_context_limit'])
79
- fail ArgumentError, "invalid value for \"on_context_limit\", must be one of #{allowable_values}"
80
- end
81
208
  if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
82
209
  fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_question, must be smaller than or equal to 30000.'
83
210
  end
@@ -98,7 +225,7 @@ module WebScrapingAI
98
225
  if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
99
226
  fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
100
227
  end
101
- allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
228
+ allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
102
229
  if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
103
230
  fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
104
231
  end
@@ -106,6 +233,10 @@ module WebScrapingAI
106
233
  if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
107
234
  fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
108
235
  end
236
+ allowable_values = ["json", "text"]
237
+ if @api_client.config.client_side_validation && opts[:'format'] && !allowable_values.include?(opts[:'format'])
238
+ fail ArgumentError, "invalid value for \"format\", must be one of #{allowable_values}"
239
+ end
109
240
  # resource path
110
241
  local_var_path = '/ai/question'
111
242
 
@@ -113,24 +244,24 @@ module WebScrapingAI
113
244
  query_params = opts[:query_params] || {}
114
245
  query_params[:'url'] = url
115
246
  query_params[:'question'] = opts[:'question'] if !opts[:'question'].nil?
116
- query_params[:'context_limit'] = opts[:'context_limit'] if !opts[:'context_limit'].nil?
117
- query_params[:'response_tokens'] = opts[:'response_tokens'] if !opts[:'response_tokens'].nil?
118
- query_params[:'on_context_limit'] = opts[:'on_context_limit'] if !opts[:'on_context_limit'].nil?
119
247
  query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
120
248
  query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
121
249
  query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
122
250
  query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
251
+ query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
123
252
  query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
124
253
  query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
254
+ query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
125
255
  query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
126
256
  query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
127
257
  query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
128
258
  query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
259
+ query_params[:'format'] = opts[:'format'] if !opts[:'format'].nil?
129
260
 
130
261
  # header parameters
131
262
  header_params = opts[:header_params] || {}
132
263
  # HTTP header 'Accept' (if needed)
133
- header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html'])
264
+ header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) unless header_params['Accept']
134
265
 
135
266
  # form parameters
136
267
  form_params = opts[:form_params] || {}
@@ -1,12 +1,12 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
4
+ #WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
5
5
 
6
- The version of the OpenAPI document: 3.1.3
6
+ The version of the OpenAPI document: 3.2.0
7
7
  Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 7.2.0
9
+ Generator version: 7.11.0
10
10
 
11
11
  =end
12
12
 
@@ -27,13 +27,16 @@ module WebScrapingAI
27
27
  # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
28
28
  # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
29
29
  # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
30
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
30
31
  # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
31
- # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
32
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
33
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
32
34
  # @option opts [String] :device Type of device emulation. (default to 'desktop')
33
35
  # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
34
36
  # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
35
37
  # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
36
38
  # @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned). (default to false)
39
+ # @option opts [String] :format Format of the response (text by default). \&quot;json\&quot; will return a JSON object with the response, \&quot;text\&quot; will return a plain text/HTML response. (default to 'json')
37
40
  # @return [String]
38
41
  def get_html(url, opts = {})
39
42
  data, _status_code, _headers = get_html_with_http_info(url, opts)
@@ -48,13 +51,16 @@ module WebScrapingAI
48
51
  # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
49
52
  # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
50
53
  # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
54
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
51
55
  # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
52
- # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
56
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
57
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
53
58
  # @option opts [String] :device Type of device emulation. (default to 'desktop')
54
59
  # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
55
60
  # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
56
61
  # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
57
62
  # @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned). (default to false)
63
+ # @option opts [String] :format Format of the response (text by default). \&quot;json\&quot; will return a JSON object with the response, \&quot;text\&quot; will return a plain text/HTML response. (default to 'json')
58
64
  # @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
59
65
  def get_html_with_http_info(url, opts = {})
60
66
  if @api_client.config.debugging
@@ -84,7 +90,7 @@ module WebScrapingAI
84
90
  if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
85
91
  fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
86
92
  end
87
- allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
93
+ allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
88
94
  if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
89
95
  fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
90
96
  end
@@ -92,6 +98,10 @@ module WebScrapingAI
92
98
  if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
93
99
  fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
94
100
  end
101
+ allowable_values = ["json", "text"]
102
+ if @api_client.config.client_side_validation && opts[:'format'] && !allowable_values.include?(opts[:'format'])
103
+ fail ArgumentError, "invalid value for \"format\", must be one of #{allowable_values}"
104
+ end
95
105
  # resource path
96
106
  local_var_path = '/html'
97
107
 
@@ -102,18 +112,21 @@ module WebScrapingAI
102
112
  query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
103
113
  query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
104
114
  query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
115
+ query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
105
116
  query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
106
117
  query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
118
+ query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
107
119
  query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
108
120
  query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
109
121
  query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
110
122
  query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
111
123
  query_params[:'return_script_result'] = opts[:'return_script_result'] if !opts[:'return_script_result'].nil?
124
+ query_params[:'format'] = opts[:'format'] if !opts[:'format'].nil?
112
125
 
113
126
  # header parameters
114
127
  header_params = opts[:header_params] || {}
115
128
  # HTTP header 'Accept' (if needed)
116
- header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html'])
129
+ header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) unless header_params['Accept']
117
130
 
118
131
  # form parameters
119
132
  form_params = opts[:form_params] || {}
@@ -1,12 +1,12 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
4
+ #WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
5
5
 
6
- The version of the OpenAPI document: 3.1.3
6
+ The version of the OpenAPI document: 3.2.0
7
7
  Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 7.2.0
9
+ Generator version: 7.11.0
10
10
 
11
11
  =end
12
12
 
@@ -28,12 +28,15 @@ module WebScrapingAI
28
28
  # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
29
29
  # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
30
30
  # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
31
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
31
32
  # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
32
- # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
33
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
34
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
33
35
  # @option opts [String] :device Type of device emulation. (default to 'desktop')
34
36
  # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
35
37
  # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
36
38
  # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
39
+ # @option opts [String] :format Format of the response (text by default). \&quot;json\&quot; will return a JSON object with the response, \&quot;text\&quot; will return a plain text/HTML response. (default to 'json')
37
40
  # @return [String]
38
41
  def get_selected(url, opts = {})
39
42
  data, _status_code, _headers = get_selected_with_http_info(url, opts)
@@ -49,12 +52,15 @@ module WebScrapingAI
49
52
  # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
50
53
  # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
51
54
  # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
55
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
52
56
  # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
53
- # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
57
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
58
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
54
59
  # @option opts [String] :device Type of device emulation. (default to 'desktop')
55
60
  # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
56
61
  # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
57
62
  # @option opts [String] :js_script Custom JavaScript code to execute on the target page.
63
+ # @option opts [String] :format Format of the response (text by default). \&quot;json\&quot; will return a JSON object with the response, \&quot;text\&quot; will return a plain text/HTML response. (default to 'json')
58
64
  # @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
59
65
  def get_selected_with_http_info(url, opts = {})
60
66
  if @api_client.config.debugging
@@ -84,7 +90,7 @@ module WebScrapingAI
84
90
  if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
85
91
  fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
86
92
  end
87
- allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
93
+ allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
88
94
  if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
89
95
  fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
90
96
  end
@@ -92,6 +98,10 @@ module WebScrapingAI
92
98
  if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
93
99
  fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
94
100
  end
101
+ allowable_values = ["json", "text"]
102
+ if @api_client.config.client_side_validation && opts[:'format'] && !allowable_values.include?(opts[:'format'])
103
+ fail ArgumentError, "invalid value for \"format\", must be one of #{allowable_values}"
104
+ end
95
105
  # resource path
96
106
  local_var_path = '/selected'
97
107
 
@@ -103,17 +113,20 @@ module WebScrapingAI
103
113
  query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
104
114
  query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
105
115
  query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
116
+ query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
106
117
  query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
107
118
  query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
119
+ query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
108
120
  query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
109
121
  query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
110
122
  query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
111
123
  query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
124
+ query_params[:'format'] = opts[:'format'] if !opts[:'format'].nil?
112
125
 
113
126
  # header parameters
114
127
  header_params = opts[:header_params] || {}
115
128
  # HTTP header 'Accept' (if needed)
116
- header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html'])
129
+ header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) unless header_params['Accept']
117
130
 
118
131
  # form parameters
119
132
  form_params = opts[:form_params] || {}
@@ -153,8 +166,10 @@ module WebScrapingAI
153
166
  # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
154
167
  # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
155
168
  # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
169
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
156
170
  # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
157
- # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
171
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
172
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
158
173
  # @option opts [String] :device Type of device emulation. (default to 'desktop')
159
174
  # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
160
175
  # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
@@ -174,8 +189,10 @@ module WebScrapingAI
174
189
  # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
175
190
  # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
176
191
  # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
192
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
177
193
  # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
178
- # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
194
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
195
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
179
196
  # @option opts [String] :device Type of device emulation. (default to 'desktop')
180
197
  # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
181
198
  # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
@@ -209,7 +226,7 @@ module WebScrapingAI
209
226
  if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
210
227
  fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
211
228
  end
212
- allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
229
+ allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
213
230
  if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
214
231
  fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
215
232
  end
@@ -228,8 +245,10 @@ module WebScrapingAI
228
245
  query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
229
246
  query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
230
247
  query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
248
+ query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
231
249
  query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
232
250
  query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
251
+ query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
233
252
  query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
234
253
  query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
235
254
  query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
@@ -238,7 +257,7 @@ module WebScrapingAI
238
257
  # header parameters
239
258
  header_params = opts[:header_params] || {}
240
259
  # HTTP header 'Accept' (if needed)
241
- header_params['Accept'] = @api_client.select_header_accept(['application/json'])
260
+ header_params['Accept'] = @api_client.select_header_accept(['application/json']) unless header_params['Accept']
242
261
 
243
262
  # form parameters
244
263
  form_params = opts[:form_params] || {}
@@ -1,12 +1,12 @@
1
1
  =begin
2
2
  #WebScraping.AI
3
3
 
4
- #WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
4
+ #WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
5
5
 
6
- The version of the OpenAPI document: 3.1.3
6
+ The version of the OpenAPI document: 3.2.0
7
7
  Contact: support@webscraping.ai
8
8
  Generated by: https://openapi-generator.tech
9
- OpenAPI Generator version: 7.2.0
9
+ Generator version: 7.11.0
10
10
 
11
11
  =end
12
12
 
@@ -20,7 +20,7 @@ module WebScrapingAI
20
20
  @api_client = api_client
21
21
  end
22
22
  # Page text by URL
23
- # Returns the visible text content of a webpage specified by the URL. Can be used to feed data to GPT or other LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
23
+ # Returns the visible text content of a webpage specified by the URL. Can be used to feed data to LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
24
24
  # @param url [String] URL of the target page.
25
25
  # @param [Hash] opts the optional parameters
26
26
  # @option opts [String] :text_format Format of the text response (plain by default). \&quot;plain\&quot; will return only the page body text. \&quot;json\&quot; and \&quot;xml\&quot; will return a json/xml with \&quot;title\&quot;, \&quot;description\&quot; and \&quot;content\&quot; keys. (default to 'plain')
@@ -29,8 +29,10 @@ module WebScrapingAI
29
29
  # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
30
30
  # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
31
31
  # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
32
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
32
33
  # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
33
- # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
34
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
35
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
34
36
  # @option opts [String] :device Type of device emulation. (default to 'desktop')
35
37
  # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
36
38
  # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
@@ -42,7 +44,7 @@ module WebScrapingAI
42
44
  end
43
45
 
44
46
  # Page text by URL
45
- # Returns the visible text content of a webpage specified by the URL. Can be used to feed data to GPT or other LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
47
+ # Returns the visible text content of a webpage specified by the URL. Can be used to feed data to LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
46
48
  # @param url [String] URL of the target page.
47
49
  # @param [Hash] opts the optional parameters
48
50
  # @option opts [String] :text_format Format of the text response (plain by default). \&quot;plain\&quot; will return only the page body text. \&quot;json\&quot; and \&quot;xml\&quot; will return a json/xml with \&quot;title\&quot;, \&quot;description\&quot; and \&quot;content\&quot; keys. (default to 'plain')
@@ -51,8 +53,10 @@ module WebScrapingAI
51
53
  # @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
52
54
  # @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
53
55
  # @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
56
+ # @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
54
57
  # @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
55
- # @option opts [String] :country Country of the proxy to use (US by default). Only available on Startup and Custom plans. (default to 'us')
58
+ # @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
59
+ # @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \&quot;http://user:password@host:port\&quot; format (&lt;a target&#x3D;\&quot;_blank\&quot; href&#x3D;\&quot;https://webscraping.ai/proxies/smartproxy\&quot;&gt;Smartproxy&lt;/a&gt; for example).
56
60
  # @option opts [String] :device Type of device emulation. (default to 'desktop')
57
61
  # @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
58
62
  # @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
@@ -90,7 +94,7 @@ module WebScrapingAI
90
94
  if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
91
95
  fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
92
96
  end
93
- allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
97
+ allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
94
98
  if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
95
99
  fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
96
100
  end
@@ -110,8 +114,10 @@ module WebScrapingAI
110
114
  query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
111
115
  query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
112
116
  query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
117
+ query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
113
118
  query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
114
119
  query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
120
+ query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
115
121
  query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
116
122
  query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
117
123
  query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
@@ -120,7 +126,7 @@ module WebScrapingAI
120
126
  # header parameters
121
127
  header_params = opts[:header_params] || {}
122
128
  # HTTP header 'Accept' (if needed)
123
- header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html', 'text/xml'])
129
+ header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html', 'text/xml']) unless header_params['Accept']
124
130
 
125
131
  # form parameters
126
132
  form_params = opts[:form_params] || {}