webscraping_ai 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/README.md +58 -21
- data/docs/Error.md +17 -0
- data/docs/HTMLApi.md +136 -0
- data/docs/PageError.md +19 -0
- data/docs/SelectedHTMLApi.md +274 -0
- data/lib/webscraping_ai.rb +7 -5
- data/lib/webscraping_ai/api/html_api.rb +133 -35
- data/lib/webscraping_ai/api/selected_html_api.rb +382 -0
- data/lib/webscraping_ai/api_client.rb +11 -9
- data/lib/webscraping_ai/api_error.rb +4 -4
- data/lib/webscraping_ai/configuration.rb +7 -7
- data/lib/webscraping_ai/models/error.rb +207 -0
- data/lib/webscraping_ai/models/{scrapped_page.rb → page_error.rb} +16 -36
- data/lib/webscraping_ai/version.rb +5 -5
- data/spec/api/html_api_spec.rb +36 -15
- data/spec/api/selected_html_api_spec.rb +103 -0
- data/spec/api_client_spec.rb +5 -5
- data/spec/configuration_spec.rb +7 -7
- data/spec/models/error_spec.rb +41 -0
- data/spec/models/page_error_spec.rb +47 -0
- data/spec/spec_helper.rb +4 -4
- data/webscraping_ai.gemspec +5 -5
- metadata +30 -23
- data/Gemfile.lock +0 -70
- data/docs/HtmlApi.md +0 -73
- data/docs/ScrappedPage.md +0 -23
- data/spec/models/scrapped_page_spec.rb +0 -59
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07d4d1eb8c57b1363022778dda4537e1e4780a88bdeedd261e26a9e8f319c664
|
4
|
+
data.tar.gz: 473ebf4a64f87b3fdb5d04209b3a359a8e841a501ec97b26f71dd8261f40efda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 26b7ab5c70da5df4802990aae74c3448751d9942d3fe2cfb7dcba186ef13f53887b9ae95b8bb495624c28a2f5400436212d5180facc631bda134499901e19e17
|
7
|
+
data.tar.gz: a0ae994a5d415e381effa2bb8d7169954b781452d967de888cf04374715092ac8d1e0c912be3bd6378c1b8f72522056d4fe827ad9c4ec00ea5a98d9b950b2d96
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -2,21 +2,52 @@
|
|
2
2
|
|
3
3
|
WebScrapingAI - the Ruby gem for the WebScraping.AI
|
4
4
|
|
5
|
-
A client for https://webscraping.ai API. It provides Chrome JS rendering, rotating proxies and HTML parsing
|
5
|
+
A client for https://webscraping.ai API. It provides a web scaping automation API with Chrome JS rendering, rotating proxies and builtin HTML parsing.
|
6
6
|
|
7
7
|
This SDK is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project:
|
8
8
|
|
9
|
-
- API version:
|
10
|
-
- Package version:
|
9
|
+
- API version: 2.0.0
|
10
|
+
- Package version: 2.0.0
|
11
11
|
- Build package: org.openapitools.codegen.languages.RubyClientCodegen
|
12
|
+
For more information, please visit [https://webscraping.ai](https://webscraping.ai)
|
12
13
|
|
13
14
|
## Installation
|
14
15
|
|
15
|
-
###
|
16
|
+
### Build a gem
|
16
17
|
|
17
|
-
|
18
|
+
To build the Ruby code into a gem:
|
18
19
|
|
19
|
-
|
20
|
+
```shell
|
21
|
+
gem build webscraping_ai.gemspec
|
22
|
+
```
|
23
|
+
|
24
|
+
Then either install the gem locally:
|
25
|
+
|
26
|
+
```shell
|
27
|
+
gem install ./webscraping_ai-2.0.0.gem
|
28
|
+
```
|
29
|
+
|
30
|
+
(for development, run `gem install --dev ./webscraping_ai-2.0.0.gem` to install the development dependencies)
|
31
|
+
|
32
|
+
or publish the gem to a gem hosting service, e.g. [RubyGems](https://rubygems.org/).
|
33
|
+
|
34
|
+
Finally add this to the Gemfile:
|
35
|
+
|
36
|
+
gem 'webscraping_ai', '~> 2.0.0'
|
37
|
+
|
38
|
+
### Install from Git
|
39
|
+
|
40
|
+
If the Ruby gem is hosted at a git repository: https://github.com/webscraping-ai/webscraping-ai-ruby, then add the following in the Gemfile:
|
41
|
+
|
42
|
+
gem 'webscraping_ai', :git => 'https://github.com/webscraping-ai/webscraping-ai-ruby.git'
|
43
|
+
|
44
|
+
### Include the Ruby code directly
|
45
|
+
|
46
|
+
Include the Ruby code directly using `-I` as follows:
|
47
|
+
|
48
|
+
```shell
|
49
|
+
ruby -Ilib script.rb
|
50
|
+
```
|
20
51
|
|
21
52
|
## Getting Started
|
22
53
|
|
@@ -29,41 +60,47 @@ require 'webscraping_ai'
|
|
29
60
|
# Setup authorization
|
30
61
|
WebScrapingAI.configure do |config|
|
31
62
|
# Configure API key authorization: api_key
|
32
|
-
config.api_key['api_key'] = '
|
63
|
+
config.api_key['api_key'] = 'YOUR API KEY'
|
64
|
+
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
|
65
|
+
#config.api_key_prefix['api_key'] = 'Bearer'
|
33
66
|
end
|
34
67
|
|
35
|
-
api_instance = WebScrapingAI::
|
36
|
-
url = 'https://example.com' # String | URL of the page
|
68
|
+
api_instance = WebScrapingAI::HTMLApi.new
|
69
|
+
url = 'https://example.com' # String | URL of the target page
|
37
70
|
opts = {
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
inline_css: false # Boolean | Inline included CSS files to make page viewable on other domains (false by default)
|
71
|
+
headers: {'key' => '{\"Cookie\":\"session=some_id\"}'}, # Hash<String, String> | HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
72
|
+
timeout: 5000, # Integer | Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
73
|
+
js: true, # Boolean | Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
74
|
+
proxy: 'datacenter' # String | Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
43
75
|
}
|
44
76
|
|
45
77
|
begin
|
46
|
-
#
|
47
|
-
|
48
|
-
p result
|
78
|
+
#Page HTML by URL
|
79
|
+
api_instance.get_html(url, opts)
|
49
80
|
rescue WebScrapingAI::ApiError => e
|
50
|
-
puts "Exception when calling
|
81
|
+
puts "Exception when calling HTMLApi->get_html: #{e}"
|
51
82
|
end
|
52
83
|
|
53
84
|
```
|
54
85
|
|
55
86
|
## Documentation for API Endpoints
|
56
87
|
|
57
|
-
All URIs are relative to *https://webscraping.ai
|
88
|
+
All URIs are relative to *https://api.webscraping.ai*
|
58
89
|
|
59
90
|
Class | Method | HTTP request | Description
|
60
91
|
------------ | ------------- | ------------- | -------------
|
61
|
-
*WebScrapingAI::
|
92
|
+
*WebScrapingAI::HTMLApi* | [**get_html**](docs/HTMLApi.md#get_html) | **GET** /html | Page HTML by URL
|
93
|
+
*WebScrapingAI::HTMLApi* | [**post_html**](docs/HTMLApi.md#post_html) | **POST** /html | Page HTML by URL with POST request to the target page
|
94
|
+
*WebScrapingAI::SelectedHTMLApi* | [**get_selected**](docs/SelectedHTMLApi.md#get_selected) | **GET** /selected | HTML of a selected page area by URL and CSS selector
|
95
|
+
*WebScrapingAI::SelectedHTMLApi* | [**get_selected_multiple**](docs/SelectedHTMLApi.md#get_selected_multiple) | **GET** /selected-multiple | HTML of multiple page areas by URL and CSS selectors
|
96
|
+
*WebScrapingAI::SelectedHTMLApi* | [**post_selected**](docs/SelectedHTMLApi.md#post_selected) | **POST** /selected | HTML of a selected page areas by URL and CSS selector, with POST request to the target page
|
97
|
+
*WebScrapingAI::SelectedHTMLApi* | [**post_selected_multiple**](docs/SelectedHTMLApi.md#post_selected_multiple) | **POST** /selected-multiple | HTML of multiple page areas by URL and CSS selectors, with POST request to the target page
|
62
98
|
|
63
99
|
|
64
100
|
## Documentation for Models
|
65
101
|
|
66
|
-
- [WebScrapingAI::
|
102
|
+
- [WebScrapingAI::Error](docs/Error.md)
|
103
|
+
- [WebScrapingAI::PageError](docs/PageError.md)
|
67
104
|
|
68
105
|
|
69
106
|
## Documentation for Authorization
|
data/docs/Error.md
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# WebScrapingAI::Error
|
2
|
+
|
3
|
+
## Properties
|
4
|
+
|
5
|
+
Name | Type | Description | Notes
|
6
|
+
------------ | ------------- | ------------- | -------------
|
7
|
+
**message** | **String** | Error description | [optional]
|
8
|
+
|
9
|
+
## Code Sample
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
require 'WebScrapingAI'
|
13
|
+
|
14
|
+
instance = WebScrapingAI::Error.new(message: null)
|
15
|
+
```
|
16
|
+
|
17
|
+
|
data/docs/HTMLApi.md
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
# WebScrapingAI::HTMLApi
|
2
|
+
|
3
|
+
All URIs are relative to *https://api.webscraping.ai*
|
4
|
+
|
5
|
+
Method | HTTP request | Description
|
6
|
+
------------- | ------------- | -------------
|
7
|
+
[**get_html**](HTMLApi.md#get_html) | **GET** /html | Page HTML by URL
|
8
|
+
[**post_html**](HTMLApi.md#post_html) | **POST** /html | Page HTML by URL with POST request to the target page
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
## get_html
|
13
|
+
|
14
|
+
> get_html(url, opts)
|
15
|
+
|
16
|
+
Page HTML by URL
|
17
|
+
|
18
|
+
Returns just HTML on success, JSON on error
|
19
|
+
|
20
|
+
### Example
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
# load the gem
|
24
|
+
require 'webscraping_ai'
|
25
|
+
# setup authorization
|
26
|
+
WebScrapingAI.configure do |config|
|
27
|
+
# Configure API key authorization: api_key
|
28
|
+
config.api_key['api_key'] = 'YOUR API KEY'
|
29
|
+
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
|
30
|
+
#config.api_key_prefix['api_key'] = 'Bearer'
|
31
|
+
end
|
32
|
+
|
33
|
+
api_instance = WebScrapingAI::HTMLApi.new
|
34
|
+
url = 'https://example.com' # String | URL of the target page
|
35
|
+
opts = {
|
36
|
+
headers: {'key' => '{\"Cookie\":\"session=some_id\"}'}, # Hash<String, String> | HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
37
|
+
timeout: 5000, # Integer | Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
38
|
+
js: true, # Boolean | Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
39
|
+
proxy: 'datacenter' # String | Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
40
|
+
}
|
41
|
+
|
42
|
+
begin
|
43
|
+
#Page HTML by URL
|
44
|
+
api_instance.get_html(url, opts)
|
45
|
+
rescue WebScrapingAI::ApiError => e
|
46
|
+
puts "Exception when calling HTMLApi->get_html: #{e}"
|
47
|
+
end
|
48
|
+
```
|
49
|
+
|
50
|
+
### Parameters
|
51
|
+
|
52
|
+
|
53
|
+
Name | Type | Description | Notes
|
54
|
+
------------- | ------------- | ------------- | -------------
|
55
|
+
**url** | **String**| URL of the target page |
|
56
|
+
**headers** | [**Hash<String, String>**](String.md)| HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}) | [optional]
|
57
|
+
**timeout** | **Integer**| Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) | [optional] [default to 5000]
|
58
|
+
**js** | **Boolean**| Execute on-page JavaScript using a headless browser (true by default), costs 2 requests | [optional] [default to true]
|
59
|
+
**proxy** | **String**| Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) | [optional] [default to 'datacenter']
|
60
|
+
|
61
|
+
### Return type
|
62
|
+
|
63
|
+
nil (empty response body)
|
64
|
+
|
65
|
+
### Authorization
|
66
|
+
|
67
|
+
[api_key](../README.md#api_key)
|
68
|
+
|
69
|
+
### HTTP request headers
|
70
|
+
|
71
|
+
- **Content-Type**: Not defined
|
72
|
+
- **Accept**: application/json, text/html
|
73
|
+
|
74
|
+
|
75
|
+
## post_html
|
76
|
+
|
77
|
+
> post_html(url, opts)
|
78
|
+
|
79
|
+
Page HTML by URL with POST request to the target page
|
80
|
+
|
81
|
+
Returns just HTML on success, JSON on error. Request body will be passed to the target page.
|
82
|
+
|
83
|
+
### Example
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
# load the gem
|
87
|
+
require 'webscraping_ai'
|
88
|
+
# setup authorization
|
89
|
+
WebScrapingAI.configure do |config|
|
90
|
+
# Configure API key authorization: api_key
|
91
|
+
config.api_key['api_key'] = 'YOUR API KEY'
|
92
|
+
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
|
93
|
+
#config.api_key_prefix['api_key'] = 'Bearer'
|
94
|
+
end
|
95
|
+
|
96
|
+
api_instance = WebScrapingAI::HTMLApi.new
|
97
|
+
url = 'https://example.com' # String | URL of the target page
|
98
|
+
opts = {
|
99
|
+
headers: {'key' => '{\"Cookie\":\"session=some_id\"}'}, # Hash<String, String> | HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
100
|
+
timeout: 5000, # Integer | Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
101
|
+
js: true, # Boolean | Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
102
|
+
proxy: 'datacenter' # String | Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
103
|
+
}
|
104
|
+
|
105
|
+
begin
|
106
|
+
#Page HTML by URL with POST request to the target page
|
107
|
+
api_instance.post_html(url, opts)
|
108
|
+
rescue WebScrapingAI::ApiError => e
|
109
|
+
puts "Exception when calling HTMLApi->post_html: #{e}"
|
110
|
+
end
|
111
|
+
```
|
112
|
+
|
113
|
+
### Parameters
|
114
|
+
|
115
|
+
|
116
|
+
Name | Type | Description | Notes
|
117
|
+
------------- | ------------- | ------------- | -------------
|
118
|
+
**url** | **String**| URL of the target page |
|
119
|
+
**headers** | [**Hash<String, String>**](String.md)| HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}) | [optional]
|
120
|
+
**timeout** | **Integer**| Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) | [optional] [default to 5000]
|
121
|
+
**js** | **Boolean**| Execute on-page JavaScript using a headless browser (true by default), costs 2 requests | [optional] [default to true]
|
122
|
+
**proxy** | **String**| Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) | [optional] [default to 'datacenter']
|
123
|
+
|
124
|
+
### Return type
|
125
|
+
|
126
|
+
nil (empty response body)
|
127
|
+
|
128
|
+
### Authorization
|
129
|
+
|
130
|
+
[api_key](../README.md#api_key)
|
131
|
+
|
132
|
+
### HTTP request headers
|
133
|
+
|
134
|
+
- **Content-Type**: Not defined
|
135
|
+
- **Accept**: application/json, text/html
|
136
|
+
|
data/docs/PageError.md
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# WebScrapingAI::PageError
|
2
|
+
|
3
|
+
## Properties
|
4
|
+
|
5
|
+
Name | Type | Description | Notes
|
6
|
+
------------ | ------------- | ------------- | -------------
|
7
|
+
**status_code** | **Integer** | Response HTTP status code (403, 500, etc) | [optional]
|
8
|
+
**status_message** | **String** | Response HTTP status message | [optional]
|
9
|
+
|
10
|
+
## Code Sample
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
require 'WebScrapingAI'
|
14
|
+
|
15
|
+
instance = WebScrapingAI::PageError.new(status_code: null,
|
16
|
+
status_message: null)
|
17
|
+
```
|
18
|
+
|
19
|
+
|
@@ -0,0 +1,274 @@
|
|
1
|
+
# WebScrapingAI::SelectedHTMLApi
|
2
|
+
|
3
|
+
All URIs are relative to *https://api.webscraping.ai*
|
4
|
+
|
5
|
+
Method | HTTP request | Description
|
6
|
+
------------- | ------------- | -------------
|
7
|
+
[**get_selected**](SelectedHTMLApi.md#get_selected) | **GET** /selected | HTML of a selected page area by URL and CSS selector
|
8
|
+
[**get_selected_multiple**](SelectedHTMLApi.md#get_selected_multiple) | **GET** /selected-multiple | HTML of multiple page areas by URL and CSS selectors
|
9
|
+
[**post_selected**](SelectedHTMLApi.md#post_selected) | **POST** /selected | HTML of a selected page areas by URL and CSS selector, with POST request to the target page
|
10
|
+
[**post_selected_multiple**](SelectedHTMLApi.md#post_selected_multiple) | **POST** /selected-multiple | HTML of multiple page areas by URL and CSS selectors, with POST request to the target page
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
## get_selected
|
15
|
+
|
16
|
+
> get_selected(url, opts)
|
17
|
+
|
18
|
+
HTML of a selected page area by URL and CSS selector
|
19
|
+
|
20
|
+
Returns just HTML on success, JSON on error
|
21
|
+
|
22
|
+
### Example
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
# load the gem
|
26
|
+
require 'webscraping_ai'
|
27
|
+
# setup authorization
|
28
|
+
WebScrapingAI.configure do |config|
|
29
|
+
# Configure API key authorization: api_key
|
30
|
+
config.api_key['api_key'] = 'YOUR API KEY'
|
31
|
+
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
|
32
|
+
#config.api_key_prefix['api_key'] = 'Bearer'
|
33
|
+
end
|
34
|
+
|
35
|
+
api_instance = WebScrapingAI::SelectedHTMLApi.new
|
36
|
+
url = 'https://example.com' # String | URL of the target page
|
37
|
+
opts = {
|
38
|
+
selector: 'h1', # String | CSS selector (null by default, returns whole page HTML)
|
39
|
+
headers: {'key' => '{\"Cookie\":\"session=some_id\"}'}, # Hash<String, String> | HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
40
|
+
timeout: 5000, # Integer | Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
41
|
+
js: true, # Boolean | Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
42
|
+
proxy: 'datacenter' # String | Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
43
|
+
}
|
44
|
+
|
45
|
+
begin
|
46
|
+
#HTML of a selected page area by URL and CSS selector
|
47
|
+
api_instance.get_selected(url, opts)
|
48
|
+
rescue WebScrapingAI::ApiError => e
|
49
|
+
puts "Exception when calling SelectedHTMLApi->get_selected: #{e}"
|
50
|
+
end
|
51
|
+
```
|
52
|
+
|
53
|
+
### Parameters
|
54
|
+
|
55
|
+
|
56
|
+
Name | Type | Description | Notes
|
57
|
+
------------- | ------------- | ------------- | -------------
|
58
|
+
**url** | **String**| URL of the target page |
|
59
|
+
**selector** | **String**| CSS selector (null by default, returns whole page HTML) | [optional]
|
60
|
+
**headers** | [**Hash<String, String>**](String.md)| HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}) | [optional]
|
61
|
+
**timeout** | **Integer**| Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) | [optional] [default to 5000]
|
62
|
+
**js** | **Boolean**| Execute on-page JavaScript using a headless browser (true by default), costs 2 requests | [optional] [default to true]
|
63
|
+
**proxy** | **String**| Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) | [optional] [default to 'datacenter']
|
64
|
+
|
65
|
+
### Return type
|
66
|
+
|
67
|
+
nil (empty response body)
|
68
|
+
|
69
|
+
### Authorization
|
70
|
+
|
71
|
+
[api_key](../README.md#api_key)
|
72
|
+
|
73
|
+
### HTTP request headers
|
74
|
+
|
75
|
+
- **Content-Type**: Not defined
|
76
|
+
- **Accept**: application/json, text/html
|
77
|
+
|
78
|
+
|
79
|
+
## get_selected_multiple
|
80
|
+
|
81
|
+
> Array<String> get_selected_multiple(url, opts)
|
82
|
+
|
83
|
+
HTML of multiple page areas by URL and CSS selectors
|
84
|
+
|
85
|
+
Always returns JSON
|
86
|
+
|
87
|
+
### Example
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
# load the gem
|
91
|
+
require 'webscraping_ai'
|
92
|
+
# setup authorization
|
93
|
+
WebScrapingAI.configure do |config|
|
94
|
+
# Configure API key authorization: api_key
|
95
|
+
config.api_key['api_key'] = 'YOUR API KEY'
|
96
|
+
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
|
97
|
+
#config.api_key_prefix['api_key'] = 'Bearer'
|
98
|
+
end
|
99
|
+
|
100
|
+
api_instance = WebScrapingAI::SelectedHTMLApi.new
|
101
|
+
url = 'https://example.com' # String | URL of the target page
|
102
|
+
opts = {
|
103
|
+
selectors: ['[\"h1\"]'], # Array<String> | Multiple CSS selectors (null by default, returns whole page HTML)
|
104
|
+
headers: {'key' => '{\"Cookie\":\"session=some_id\"}'}, # Hash<String, String> | HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
105
|
+
timeout: 5000, # Integer | Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
106
|
+
js: true, # Boolean | Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
107
|
+
proxy: 'datacenter' # String | Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
108
|
+
}
|
109
|
+
|
110
|
+
begin
|
111
|
+
#HTML of multiple page areas by URL and CSS selectors
|
112
|
+
result = api_instance.get_selected_multiple(url, opts)
|
113
|
+
p result
|
114
|
+
rescue WebScrapingAI::ApiError => e
|
115
|
+
puts "Exception when calling SelectedHTMLApi->get_selected_multiple: #{e}"
|
116
|
+
end
|
117
|
+
```
|
118
|
+
|
119
|
+
### Parameters
|
120
|
+
|
121
|
+
|
122
|
+
Name | Type | Description | Notes
|
123
|
+
------------- | ------------- | ------------- | -------------
|
124
|
+
**url** | **String**| URL of the target page |
|
125
|
+
**selectors** | [**Array<String>**](String.md)| Multiple CSS selectors (null by default, returns whole page HTML) | [optional]
|
126
|
+
**headers** | [**Hash<String, String>**](String.md)| HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}) | [optional]
|
127
|
+
**timeout** | **Integer**| Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) | [optional] [default to 5000]
|
128
|
+
**js** | **Boolean**| Execute on-page JavaScript using a headless browser (true by default), costs 2 requests | [optional] [default to true]
|
129
|
+
**proxy** | **String**| Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) | [optional] [default to 'datacenter']
|
130
|
+
|
131
|
+
### Return type
|
132
|
+
|
133
|
+
**Array<String>**
|
134
|
+
|
135
|
+
### Authorization
|
136
|
+
|
137
|
+
[api_key](../README.md#api_key)
|
138
|
+
|
139
|
+
### HTTP request headers
|
140
|
+
|
141
|
+
- **Content-Type**: Not defined
|
142
|
+
- **Accept**: application/json
|
143
|
+
|
144
|
+
|
145
|
+
## post_selected
|
146
|
+
|
147
|
+
> post_selected(url, opts)
|
148
|
+
|
149
|
+
HTML of a selected page areas by URL and CSS selector, with POST request to the target page
|
150
|
+
|
151
|
+
Returns just HTML on success, JSON on error. Request body will be passed to the target page.
|
152
|
+
|
153
|
+
### Example
|
154
|
+
|
155
|
+
```ruby
|
156
|
+
# load the gem
|
157
|
+
require 'webscraping_ai'
|
158
|
+
# setup authorization
|
159
|
+
WebScrapingAI.configure do |config|
|
160
|
+
# Configure API key authorization: api_key
|
161
|
+
config.api_key['api_key'] = 'YOUR API KEY'
|
162
|
+
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
|
163
|
+
#config.api_key_prefix['api_key'] = 'Bearer'
|
164
|
+
end
|
165
|
+
|
166
|
+
api_instance = WebScrapingAI::SelectedHTMLApi.new
|
167
|
+
url = 'https://example.com' # String | URL of the target page
|
168
|
+
opts = {
|
169
|
+
selector: 'h1', # String | CSS selector (null by default, returns whole page HTML)
|
170
|
+
headers: {'key' => '{\"Cookie\":\"session=some_id\"}'}, # Hash<String, String> | HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
171
|
+
timeout: 5000, # Integer | Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
172
|
+
js: true, # Boolean | Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
173
|
+
proxy: 'datacenter' # String | Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
174
|
+
}
|
175
|
+
|
176
|
+
begin
|
177
|
+
#HTML of a selected page areas by URL and CSS selector, with POST request to the target page
|
178
|
+
api_instance.post_selected(url, opts)
|
179
|
+
rescue WebScrapingAI::ApiError => e
|
180
|
+
puts "Exception when calling SelectedHTMLApi->post_selected: #{e}"
|
181
|
+
end
|
182
|
+
```
|
183
|
+
|
184
|
+
### Parameters
|
185
|
+
|
186
|
+
|
187
|
+
Name | Type | Description | Notes
|
188
|
+
------------- | ------------- | ------------- | -------------
|
189
|
+
**url** | **String**| URL of the target page |
|
190
|
+
**selector** | **String**| CSS selector (null by default, returns whole page HTML) | [optional]
|
191
|
+
**headers** | [**Hash<String, String>**](String.md)| HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}) | [optional]
|
192
|
+
**timeout** | **Integer**| Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) | [optional] [default to 5000]
|
193
|
+
**js** | **Boolean**| Execute on-page JavaScript using a headless browser (true by default), costs 2 requests | [optional] [default to true]
|
194
|
+
**proxy** | **String**| Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) | [optional] [default to 'datacenter']
|
195
|
+
|
196
|
+
### Return type
|
197
|
+
|
198
|
+
nil (empty response body)
|
199
|
+
|
200
|
+
### Authorization
|
201
|
+
|
202
|
+
[api_key](../README.md#api_key)
|
203
|
+
|
204
|
+
### HTTP request headers
|
205
|
+
|
206
|
+
- **Content-Type**: Not defined
|
207
|
+
- **Accept**: application/json, text/html
|
208
|
+
|
209
|
+
|
210
|
+
## post_selected_multiple
|
211
|
+
|
212
|
+
> Array<String> post_selected_multiple(url, opts)
|
213
|
+
|
214
|
+
HTML of multiple page areas by URL and CSS selectors, with POST request to the target page
|
215
|
+
|
216
|
+
Always returns JSON. Request body will be passed to the target page.
|
217
|
+
|
218
|
+
### Example
|
219
|
+
|
220
|
+
```ruby
|
221
|
+
# load the gem
|
222
|
+
require 'webscraping_ai'
|
223
|
+
# setup authorization
|
224
|
+
WebScrapingAI.configure do |config|
|
225
|
+
# Configure API key authorization: api_key
|
226
|
+
config.api_key['api_key'] = 'YOUR API KEY'
|
227
|
+
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
|
228
|
+
#config.api_key_prefix['api_key'] = 'Bearer'
|
229
|
+
end
|
230
|
+
|
231
|
+
api_instance = WebScrapingAI::SelectedHTMLApi.new
|
232
|
+
url = 'https://example.com' # String | URL of the target page
|
233
|
+
opts = {
|
234
|
+
selectors: ['[\"h1\"]'], # Array<String> | Multiple CSS selectors (null by default, returns whole page HTML)
|
235
|
+
headers: {'key' => '{\"Cookie\":\"session=some_id\"}'}, # Hash<String, String> | HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"})
|
236
|
+
timeout: 5000, # Integer | Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000)
|
237
|
+
js: true, # Boolean | Execute on-page JavaScript using a headless browser (true by default), costs 2 requests
|
238
|
+
proxy: 'datacenter' # String | Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default)
|
239
|
+
}
|
240
|
+
|
241
|
+
begin
|
242
|
+
#HTML of multiple page areas by URL and CSS selectors, with POST request to the target page
|
243
|
+
result = api_instance.post_selected_multiple(url, opts)
|
244
|
+
p result
|
245
|
+
rescue WebScrapingAI::ApiError => e
|
246
|
+
puts "Exception when calling SelectedHTMLApi->post_selected_multiple: #{e}"
|
247
|
+
end
|
248
|
+
```
|
249
|
+
|
250
|
+
### Parameters
|
251
|
+
|
252
|
+
|
253
|
+
Name | Type | Description | Notes
|
254
|
+
------------- | ------------- | ------------- | -------------
|
255
|
+
**url** | **String**| URL of the target page |
|
256
|
+
**selectors** | [**Array<String>**](String.md)| Multiple CSS selectors (null by default, returns whole page HTML) | [optional]
|
257
|
+
**headers** | [**Hash<String, String>**](String.md)| HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}) | [optional]
|
258
|
+
**timeout** | **Integer**| Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) | [optional] [default to 5000]
|
259
|
+
**js** | **Boolean**| Execute on-page JavaScript using a headless browser (true by default), costs 2 requests | [optional] [default to true]
|
260
|
+
**proxy** | **String**| Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) | [optional] [default to 'datacenter']
|
261
|
+
|
262
|
+
### Return type
|
263
|
+
|
264
|
+
**Array<String>**
|
265
|
+
|
266
|
+
### Authorization
|
267
|
+
|
268
|
+
[api_key](../README.md#api_key)
|
269
|
+
|
270
|
+
### HTTP request headers
|
271
|
+
|
272
|
+
- **Content-Type**: Not defined
|
273
|
+
- **Accept**: application/json
|
274
|
+
|