webscraping_ai 3.1.3 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -15
- data/docs/AIApi.md +110 -10
- data/docs/Account.md +2 -0
- data/docs/HTMLApi.md +10 -4
- data/docs/SelectedHTMLApi.md +17 -7
- data/docs/TextApi.md +8 -4
- data/lib/webscraping_ai/api/account_api.rb +4 -4
- data/lib/webscraping_ai/api/ai_api.rb +155 -24
- data/lib/webscraping_ai/api/html_api.rb +20 -7
- data/lib/webscraping_ai/api/selected_html_api.rb +30 -11
- data/lib/webscraping_ai/api/text_api.rb +15 -9
- data/lib/webscraping_ai/api_client.rb +5 -5
- data/lib/webscraping_ai/api_error.rb +3 -3
- data/lib/webscraping_ai/configuration.rb +13 -3
- data/lib/webscraping_ai/models/account.rb +14 -4
- data/lib/webscraping_ai/models/error.rb +3 -3
- data/lib/webscraping_ai/version.rb +4 -4
- data/lib/webscraping_ai.rb +3 -3
- data/spec/api/account_api_spec.rb +3 -3
- data/spec/api/ai_api_spec.rb +32 -7
- data/spec/api/html_api_spec.rb +7 -4
- data/spec/api/selected_html_api_spec.rb +10 -5
- data/spec/api/text_api_spec.rb +7 -5
- data/spec/models/account_spec.rb +9 -3
- data/spec/models/error_spec.rb +3 -3
- data/spec/spec_helper.rb +3 -3
- data/webscraping_ai.gemspec +4 -4
- metadata +4 -4
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -19,24 +19,159 @@ module WebScrapingAI
|
|
19
19
|
def initialize(api_client = ApiClient.default)
|
20
20
|
@api_client = api_client
|
21
21
|
end
|
22
|
+
# Extract structured data fields from a web page
|
23
|
+
# Returns structured data fields extracted from the webpage using an LLM model. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
|
24
|
+
# @param url [String] URL of the target page.
|
25
|
+
# @param fields [Hash<String, String>] Object describing fields to extract from the page and their descriptions
|
26
|
+
# @param [Hash] opts the optional parameters
|
27
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
28
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
29
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
30
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
31
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
32
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
33
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
34
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
35
|
+
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
36
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
37
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
38
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
39
|
+
# @return [Hash<String, String>]
|
40
|
+
def get_fields(url, fields, opts = {})
|
41
|
+
data, _status_code, _headers = get_fields_with_http_info(url, fields, opts)
|
42
|
+
data
|
43
|
+
end
|
44
|
+
|
45
|
+
# Extract structured data fields from a web page
|
46
|
+
# Returns structured data fields extracted from the webpage using an LLM model. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
|
47
|
+
# @param url [String] URL of the target page.
|
48
|
+
# @param fields [Hash<String, String>] Object describing fields to extract from the page and their descriptions
|
49
|
+
# @param [Hash] opts the optional parameters
|
50
|
+
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
51
|
+
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
52
|
+
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
53
|
+
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
54
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
55
|
+
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
56
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
57
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
58
|
+
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
59
|
+
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
60
|
+
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
61
|
+
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
62
|
+
# @return [Array<(Hash<String, String>, Integer, Hash)>] Hash<String, String> data, response status code and response headers
|
63
|
+
def get_fields_with_http_info(url, fields, opts = {})
|
64
|
+
if @api_client.config.debugging
|
65
|
+
@api_client.config.logger.debug 'Calling API: AIApi.get_fields ...'
|
66
|
+
end
|
67
|
+
# verify the required parameter 'url' is set
|
68
|
+
if @api_client.config.client_side_validation && url.nil?
|
69
|
+
fail ArgumentError, "Missing the required parameter 'url' when calling AIApi.get_fields"
|
70
|
+
end
|
71
|
+
# verify the required parameter 'fields' is set
|
72
|
+
if @api_client.config.client_side_validation && fields.nil?
|
73
|
+
fail ArgumentError, "Missing the required parameter 'fields' when calling AIApi.get_fields"
|
74
|
+
end
|
75
|
+
if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
|
76
|
+
fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_fields, must be smaller than or equal to 30000.'
|
77
|
+
end
|
78
|
+
|
79
|
+
if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1
|
80
|
+
fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_fields, must be greater than or equal to 1.'
|
81
|
+
end
|
82
|
+
|
83
|
+
if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000
|
84
|
+
fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling AIApi.get_fields, must be smaller than or equal to 20000.'
|
85
|
+
end
|
86
|
+
|
87
|
+
if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1
|
88
|
+
fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling AIApi.get_fields, must be greater than or equal to 1.'
|
89
|
+
end
|
90
|
+
|
91
|
+
allowable_values = ["datacenter", "residential"]
|
92
|
+
if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
|
93
|
+
fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
|
94
|
+
end
|
95
|
+
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
|
96
|
+
if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
|
97
|
+
fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
|
98
|
+
end
|
99
|
+
allowable_values = ["desktop", "mobile", "tablet"]
|
100
|
+
if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
|
101
|
+
fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
|
102
|
+
end
|
103
|
+
# resource path
|
104
|
+
local_var_path = '/ai/fields'
|
105
|
+
|
106
|
+
# query parameters
|
107
|
+
query_params = opts[:query_params] || {}
|
108
|
+
query_params[:'url'] = url
|
109
|
+
query_params[:'fields'] = fields
|
110
|
+
query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
|
111
|
+
query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
|
112
|
+
query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
|
113
|
+
query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
|
114
|
+
query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
|
115
|
+
query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
|
116
|
+
query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
|
117
|
+
query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
|
118
|
+
query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
|
119
|
+
query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
|
120
|
+
query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
|
121
|
+
query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
|
122
|
+
|
123
|
+
# header parameters
|
124
|
+
header_params = opts[:header_params] || {}
|
125
|
+
# HTTP header 'Accept' (if needed)
|
126
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json']) unless header_params['Accept']
|
127
|
+
|
128
|
+
# form parameters
|
129
|
+
form_params = opts[:form_params] || {}
|
130
|
+
|
131
|
+
# http body (model)
|
132
|
+
post_body = opts[:debug_body]
|
133
|
+
|
134
|
+
# return_type
|
135
|
+
return_type = opts[:debug_return_type] || 'Hash<String, String>'
|
136
|
+
|
137
|
+
# auth_names
|
138
|
+
auth_names = opts[:debug_auth_names] || ['api_key']
|
139
|
+
|
140
|
+
new_options = opts.merge(
|
141
|
+
:operation => :"AIApi.get_fields",
|
142
|
+
:header_params => header_params,
|
143
|
+
:query_params => query_params,
|
144
|
+
:form_params => form_params,
|
145
|
+
:body => post_body,
|
146
|
+
:auth_names => auth_names,
|
147
|
+
:return_type => return_type
|
148
|
+
)
|
149
|
+
|
150
|
+
data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options)
|
151
|
+
if @api_client.config.debugging
|
152
|
+
@api_client.config.logger.debug "API called: AIApi#get_fields\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
153
|
+
end
|
154
|
+
return data, status_code, headers
|
155
|
+
end
|
156
|
+
|
22
157
|
# Get an answer to a question about a given web page
|
23
158
|
# Returns the answer in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing, then the answer is extracted using an LLM model.
|
24
159
|
# @param url [String] URL of the target page.
|
25
160
|
# @param [Hash] opts the optional parameters
|
26
161
|
# @option opts [String] :question Question or instructions to ask the LLM model about the target page.
|
27
|
-
# @option opts [Integer] :context_limit Maximum number of tokens to use as context for the LLM model (4000 by default). (default to 4000)
|
28
|
-
# @option opts [Integer] :response_tokens Maximum number of tokens to return in the LLM model response. The total context size (context_limit) includes the question, the target page content and the response, so this parameter reserves tokens for the response (see also on_context_limit). (default to 100)
|
29
|
-
# @option opts [String] :on_context_limit What to do if the context_limit parameter is exceeded (truncate by default). The context is exceeded when the target page content is too long. (default to 'error')
|
30
162
|
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
31
163
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
32
164
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
33
165
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
166
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
34
167
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
35
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
168
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
169
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
36
170
|
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
37
171
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
38
172
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
39
173
|
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
174
|
+
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response. (default to 'json')
|
40
175
|
# @return [String]
|
41
176
|
def get_question(url, opts = {})
|
42
177
|
data, _status_code, _headers = get_question_with_http_info(url, opts)
|
@@ -48,19 +183,19 @@ module WebScrapingAI
|
|
48
183
|
# @param url [String] URL of the target page.
|
49
184
|
# @param [Hash] opts the optional parameters
|
50
185
|
# @option opts [String] :question Question or instructions to ask the LLM model about the target page.
|
51
|
-
# @option opts [Integer] :context_limit Maximum number of tokens to use as context for the LLM model (4000 by default). (default to 4000)
|
52
|
-
# @option opts [Integer] :response_tokens Maximum number of tokens to return in the LLM model response. The total context size (context_limit) includes the question, the target page content and the response, so this parameter reserves tokens for the response (see also on_context_limit). (default to 100)
|
53
|
-
# @option opts [String] :on_context_limit What to do if the context_limit parameter is exceeded (truncate by default). The context is exceeded when the target page content is too long. (default to 'error')
|
54
186
|
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
55
187
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
56
188
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
57
189
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
190
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
58
191
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
59
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
192
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
193
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
60
194
|
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
61
195
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
62
196
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
63
197
|
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
198
|
+
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response. (default to 'json')
|
64
199
|
# @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
|
65
200
|
def get_question_with_http_info(url, opts = {})
|
66
201
|
if @api_client.config.debugging
|
@@ -70,14 +205,6 @@ module WebScrapingAI
|
|
70
205
|
if @api_client.config.client_side_validation && url.nil?
|
71
206
|
fail ArgumentError, "Missing the required parameter 'url' when calling AIApi.get_question"
|
72
207
|
end
|
73
|
-
allowable_values = [4000, 8000, 16000]
|
74
|
-
if @api_client.config.client_side_validation && opts[:'context_limit'] && !allowable_values.include?(opts[:'context_limit'])
|
75
|
-
fail ArgumentError, "invalid value for \"context_limit\", must be one of #{allowable_values}"
|
76
|
-
end
|
77
|
-
allowable_values = ["truncate", "error"]
|
78
|
-
if @api_client.config.client_side_validation && opts[:'on_context_limit'] && !allowable_values.include?(opts[:'on_context_limit'])
|
79
|
-
fail ArgumentError, "invalid value for \"on_context_limit\", must be one of #{allowable_values}"
|
80
|
-
end
|
81
208
|
if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000
|
82
209
|
fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling AIApi.get_question, must be smaller than or equal to 30000.'
|
83
210
|
end
|
@@ -98,7 +225,7 @@ module WebScrapingAI
|
|
98
225
|
if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
|
99
226
|
fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
|
100
227
|
end
|
101
|
-
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
|
228
|
+
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
|
102
229
|
if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
|
103
230
|
fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
|
104
231
|
end
|
@@ -106,6 +233,10 @@ module WebScrapingAI
|
|
106
233
|
if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
|
107
234
|
fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
|
108
235
|
end
|
236
|
+
allowable_values = ["json", "text"]
|
237
|
+
if @api_client.config.client_side_validation && opts[:'format'] && !allowable_values.include?(opts[:'format'])
|
238
|
+
fail ArgumentError, "invalid value for \"format\", must be one of #{allowable_values}"
|
239
|
+
end
|
109
240
|
# resource path
|
110
241
|
local_var_path = '/ai/question'
|
111
242
|
|
@@ -113,24 +244,24 @@ module WebScrapingAI
|
|
113
244
|
query_params = opts[:query_params] || {}
|
114
245
|
query_params[:'url'] = url
|
115
246
|
query_params[:'question'] = opts[:'question'] if !opts[:'question'].nil?
|
116
|
-
query_params[:'context_limit'] = opts[:'context_limit'] if !opts[:'context_limit'].nil?
|
117
|
-
query_params[:'response_tokens'] = opts[:'response_tokens'] if !opts[:'response_tokens'].nil?
|
118
|
-
query_params[:'on_context_limit'] = opts[:'on_context_limit'] if !opts[:'on_context_limit'].nil?
|
119
247
|
query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil?
|
120
248
|
query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
|
121
249
|
query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
|
122
250
|
query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
|
251
|
+
query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
|
123
252
|
query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
|
124
253
|
query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
|
254
|
+
query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
|
125
255
|
query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
|
126
256
|
query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
|
127
257
|
query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
|
128
258
|
query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
|
259
|
+
query_params[:'format'] = opts[:'format'] if !opts[:'format'].nil?
|
129
260
|
|
130
261
|
# header parameters
|
131
262
|
header_params = opts[:header_params] || {}
|
132
263
|
# HTTP header 'Accept' (if needed)
|
133
|
-
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html'])
|
264
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) unless header_params['Accept']
|
134
265
|
|
135
266
|
# form parameters
|
136
267
|
form_params = opts[:form_params] || {}
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -27,13 +27,16 @@ module WebScrapingAI
|
|
27
27
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
28
28
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
29
29
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
30
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
30
31
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
31
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
32
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
33
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
32
34
|
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
33
35
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
34
36
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
35
37
|
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
36
38
|
# @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned). (default to false)
|
39
|
+
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response. (default to 'json')
|
37
40
|
# @return [String]
|
38
41
|
def get_html(url, opts = {})
|
39
42
|
data, _status_code, _headers = get_html_with_http_info(url, opts)
|
@@ -48,13 +51,16 @@ module WebScrapingAI
|
|
48
51
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
49
52
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
50
53
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
54
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
51
55
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
52
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
56
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
57
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
53
58
|
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
54
59
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
55
60
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
56
61
|
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
57
62
|
# @option opts [Boolean] :return_script_result Return result of the custom JavaScript code (js_script parameter) execution on the target page (false by default, page HTML will be returned). (default to false)
|
63
|
+
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response. (default to 'json')
|
58
64
|
# @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
|
59
65
|
def get_html_with_http_info(url, opts = {})
|
60
66
|
if @api_client.config.debugging
|
@@ -84,7 +90,7 @@ module WebScrapingAI
|
|
84
90
|
if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
|
85
91
|
fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
|
86
92
|
end
|
87
|
-
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
|
93
|
+
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
|
88
94
|
if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
|
89
95
|
fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
|
90
96
|
end
|
@@ -92,6 +98,10 @@ module WebScrapingAI
|
|
92
98
|
if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
|
93
99
|
fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
|
94
100
|
end
|
101
|
+
allowable_values = ["json", "text"]
|
102
|
+
if @api_client.config.client_side_validation && opts[:'format'] && !allowable_values.include?(opts[:'format'])
|
103
|
+
fail ArgumentError, "invalid value for \"format\", must be one of #{allowable_values}"
|
104
|
+
end
|
95
105
|
# resource path
|
96
106
|
local_var_path = '/html'
|
97
107
|
|
@@ -102,18 +112,21 @@ module WebScrapingAI
|
|
102
112
|
query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
|
103
113
|
query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
|
104
114
|
query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
|
115
|
+
query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
|
105
116
|
query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
|
106
117
|
query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
|
118
|
+
query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
|
107
119
|
query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
|
108
120
|
query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
|
109
121
|
query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
|
110
122
|
query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
|
111
123
|
query_params[:'return_script_result'] = opts[:'return_script_result'] if !opts[:'return_script_result'].nil?
|
124
|
+
query_params[:'format'] = opts[:'format'] if !opts[:'format'].nil?
|
112
125
|
|
113
126
|
# header parameters
|
114
127
|
header_params = opts[:header_params] || {}
|
115
128
|
# HTTP header 'Accept' (if needed)
|
116
|
-
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html'])
|
129
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) unless header_params['Accept']
|
117
130
|
|
118
131
|
# form parameters
|
119
132
|
form_params = opts[:form_params] || {}
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -28,12 +28,15 @@ module WebScrapingAI
|
|
28
28
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
29
29
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
30
30
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
31
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
31
32
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
32
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
33
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
34
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
33
35
|
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
34
36
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
35
37
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
36
38
|
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
39
|
+
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response. (default to 'json')
|
37
40
|
# @return [String]
|
38
41
|
def get_selected(url, opts = {})
|
39
42
|
data, _status_code, _headers = get_selected_with_http_info(url, opts)
|
@@ -49,12 +52,15 @@ module WebScrapingAI
|
|
49
52
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
50
53
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
51
54
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
55
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
52
56
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
53
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
57
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
58
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
54
59
|
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
55
60
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
56
61
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
57
62
|
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
63
|
+
# @option opts [String] :format Format of the response (text by default). \"json\" will return a JSON object with the response, \"text\" will return a plain text/HTML response. (default to 'json')
|
58
64
|
# @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
|
59
65
|
def get_selected_with_http_info(url, opts = {})
|
60
66
|
if @api_client.config.debugging
|
@@ -84,7 +90,7 @@ module WebScrapingAI
|
|
84
90
|
if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
|
85
91
|
fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
|
86
92
|
end
|
87
|
-
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
|
93
|
+
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
|
88
94
|
if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
|
89
95
|
fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
|
90
96
|
end
|
@@ -92,6 +98,10 @@ module WebScrapingAI
|
|
92
98
|
if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device'])
|
93
99
|
fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}"
|
94
100
|
end
|
101
|
+
allowable_values = ["json", "text"]
|
102
|
+
if @api_client.config.client_side_validation && opts[:'format'] && !allowable_values.include?(opts[:'format'])
|
103
|
+
fail ArgumentError, "invalid value for \"format\", must be one of #{allowable_values}"
|
104
|
+
end
|
95
105
|
# resource path
|
96
106
|
local_var_path = '/selected'
|
97
107
|
|
@@ -103,17 +113,20 @@ module WebScrapingAI
|
|
103
113
|
query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
|
104
114
|
query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
|
105
115
|
query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
|
116
|
+
query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
|
106
117
|
query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
|
107
118
|
query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
|
119
|
+
query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
|
108
120
|
query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
|
109
121
|
query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
|
110
122
|
query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
|
111
123
|
query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil?
|
124
|
+
query_params[:'format'] = opts[:'format'] if !opts[:'format'].nil?
|
112
125
|
|
113
126
|
# header parameters
|
114
127
|
header_params = opts[:header_params] || {}
|
115
128
|
# HTTP header 'Accept' (if needed)
|
116
|
-
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html'])
|
129
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) unless header_params['Accept']
|
117
130
|
|
118
131
|
# form parameters
|
119
132
|
form_params = opts[:form_params] || {}
|
@@ -153,8 +166,10 @@ module WebScrapingAI
|
|
153
166
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
154
167
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
155
168
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
169
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
156
170
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
157
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
171
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
172
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
158
173
|
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
159
174
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
160
175
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
@@ -174,8 +189,10 @@ module WebScrapingAI
|
|
174
189
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
175
190
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
176
191
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
192
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
177
193
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
178
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
194
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
195
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
179
196
|
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
180
197
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
181
198
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
@@ -209,7 +226,7 @@ module WebScrapingAI
|
|
209
226
|
if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
|
210
227
|
fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
|
211
228
|
end
|
212
|
-
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
|
229
|
+
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
|
213
230
|
if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
|
214
231
|
fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
|
215
232
|
end
|
@@ -228,8 +245,10 @@ module WebScrapingAI
|
|
228
245
|
query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
|
229
246
|
query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
|
230
247
|
query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
|
248
|
+
query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
|
231
249
|
query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
|
232
250
|
query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
|
251
|
+
query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
|
233
252
|
query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
|
234
253
|
query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
|
235
254
|
query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
|
@@ -238,7 +257,7 @@ module WebScrapingAI
|
|
238
257
|
# header parameters
|
239
258
|
header_params = opts[:header_params] || {}
|
240
259
|
# HTTP header 'Accept' (if needed)
|
241
|
-
header_params['Accept'] = @api_client.select_header_accept(['application/json'])
|
260
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json']) unless header_params['Accept']
|
242
261
|
|
243
262
|
# form parameters
|
244
263
|
form_params = opts[:form_params] || {}
|
@@ -1,12 +1,12 @@
|
|
1
1
|
=begin
|
2
2
|
#WebScraping.AI
|
3
3
|
|
4
|
-
#WebScraping.AI scraping API provides
|
4
|
+
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
5
5
|
|
6
|
-
The version of the OpenAPI document: 3.
|
6
|
+
The version of the OpenAPI document: 3.2.0
|
7
7
|
Contact: support@webscraping.ai
|
8
8
|
Generated by: https://openapi-generator.tech
|
9
|
-
|
9
|
+
Generator version: 7.11.0
|
10
10
|
|
11
11
|
=end
|
12
12
|
|
@@ -20,7 +20,7 @@ module WebScrapingAI
|
|
20
20
|
@api_client = api_client
|
21
21
|
end
|
22
22
|
# Page text by URL
|
23
|
-
# Returns the visible text content of a webpage specified by the URL. Can be used to feed data to
|
23
|
+
# Returns the visible text content of a webpage specified by the URL. Can be used to feed data to LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
|
24
24
|
# @param url [String] URL of the target page.
|
25
25
|
# @param [Hash] opts the optional parameters
|
26
26
|
# @option opts [String] :text_format Format of the text response (plain by default). \"plain\" will return only the page body text. \"json\" and \"xml\" will return a json/xml with \"title\", \"description\" and \"content\" keys. (default to 'plain')
|
@@ -29,8 +29,10 @@ module WebScrapingAI
|
|
29
29
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
30
30
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
31
31
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
32
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
32
33
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
33
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
34
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
35
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
34
36
|
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
35
37
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
36
38
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
@@ -42,7 +44,7 @@ module WebScrapingAI
|
|
42
44
|
end
|
43
45
|
|
44
46
|
# Page text by URL
|
45
|
-
# Returns the visible text content of a webpage specified by the URL. Can be used to feed data to
|
47
|
+
# Returns the visible text content of a webpage specified by the URL. Can be used to feed data to LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
|
46
48
|
# @param url [String] URL of the target page.
|
47
49
|
# @param [Hash] opts the optional parameters
|
48
50
|
# @option opts [String] :text_format Format of the text response (plain by default). \"plain\" will return only the page body text. \"json\" and \"xml\" will return a json/xml with \"title\", \"description\" and \"content\" keys. (default to 'plain')
|
@@ -51,8 +53,10 @@ module WebScrapingAI
|
|
51
53
|
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (default to 10000)
|
52
54
|
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default). (default to true)
|
53
55
|
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (default to 2000)
|
56
|
+
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
54
57
|
# @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (default to 'datacenter')
|
55
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
58
|
+
# @option opts [String] :country Country of the proxy to use (US by default). (default to 'us')
|
59
|
+
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
56
60
|
# @option opts [String] :device Type of device emulation. (default to 'desktop')
|
57
61
|
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default). (default to false)
|
58
62
|
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default). (default to false)
|
@@ -90,7 +94,7 @@ module WebScrapingAI
|
|
90
94
|
if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy'])
|
91
95
|
fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}"
|
92
96
|
end
|
93
|
-
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"]
|
97
|
+
allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr", "in"]
|
94
98
|
if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country'])
|
95
99
|
fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}"
|
96
100
|
end
|
@@ -110,8 +114,10 @@ module WebScrapingAI
|
|
110
114
|
query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil?
|
111
115
|
query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil?
|
112
116
|
query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil?
|
117
|
+
query_params[:'wait_for'] = opts[:'wait_for'] if !opts[:'wait_for'].nil?
|
113
118
|
query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil?
|
114
119
|
query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil?
|
120
|
+
query_params[:'custom_proxy'] = opts[:'custom_proxy'] if !opts[:'custom_proxy'].nil?
|
115
121
|
query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil?
|
116
122
|
query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil?
|
117
123
|
query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil?
|
@@ -120,7 +126,7 @@ module WebScrapingAI
|
|
120
126
|
# header parameters
|
121
127
|
header_params = opts[:header_params] || {}
|
122
128
|
# HTTP header 'Accept' (if needed)
|
123
|
-
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html', 'text/xml'])
|
129
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html', 'text/xml']) unless header_params['Accept']
|
124
130
|
|
125
131
|
# form parameters
|
126
132
|
form_params = opts[:form_params] || {}
|