webscraping_ai 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/README.md +58 -21
- data/docs/Error.md +17 -0
- data/docs/HTMLApi.md +136 -0
- data/docs/PageError.md +19 -0
- data/docs/SelectedHTMLApi.md +274 -0
- data/lib/webscraping_ai.rb +7 -5
- data/lib/webscraping_ai/api/html_api.rb +133 -35
- data/lib/webscraping_ai/api/selected_html_api.rb +382 -0
- data/lib/webscraping_ai/api_client.rb +11 -9
- data/lib/webscraping_ai/api_error.rb +4 -4
- data/lib/webscraping_ai/configuration.rb +7 -7
- data/lib/webscraping_ai/models/error.rb +207 -0
- data/lib/webscraping_ai/models/{scrapped_page.rb → page_error.rb} +16 -36
- data/lib/webscraping_ai/version.rb +5 -5
- data/spec/api/html_api_spec.rb +36 -15
- data/spec/api/selected_html_api_spec.rb +103 -0
- data/spec/api_client_spec.rb +5 -5
- data/spec/configuration_spec.rb +7 -7
- data/spec/models/error_spec.rb +41 -0
- data/spec/models/page_error_spec.rb +47 -0
- data/spec/spec_helper.rb +4 -4
- data/webscraping_ai.gemspec +5 -5
- metadata +30 -23
- data/Gemfile.lock +0 -70
- data/docs/HtmlApi.md +0 -73
- data/docs/ScrappedPage.md +0 -23
- data/spec/models/scrapped_page_spec.rb +0 -59
data/docs/HtmlApi.md
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
# WebScrapingAI::HtmlApi
|
2
|
-
|
3
|
-
All URIs are relative to *https://webscraping.ai/api*
|
4
|
-
|
5
|
-
Method | HTTP request | Description
|
6
|
-
------------- | ------------- | -------------
|
7
|
-
[**get_page**](HtmlApi.md#get_page) | **GET** / | Get page HTML by URL (renders JS in Chrome and uses rotating proxies)
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
## get_page
|
12
|
-
|
13
|
-
> ScrappedPage get_page(url, opts)
|
14
|
-
|
15
|
-
Get page HTML by URL (renders JS in Chrome and uses rotating proxies)
|
16
|
-
|
17
|
-
### Example
|
18
|
-
|
19
|
-
```ruby
|
20
|
-
# load the gem
|
21
|
-
require 'webscraping_ai'
|
22
|
-
# setup authorization
|
23
|
-
WebScrapingAI.configure do |config|
|
24
|
-
# Configure API key authorization: api_key
|
25
|
-
config.api_key['api_key'] = 'YOUR API KEY'
|
26
|
-
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
|
27
|
-
#config.api_key_prefix['api_key'] = 'Bearer'
|
28
|
-
end
|
29
|
-
|
30
|
-
api_instance = WebScrapingAI::HtmlApi.new
|
31
|
-
url = 'https://example.com' # String | URL of the page to get
|
32
|
-
opts = {
|
33
|
-
selector: 'html', # String | CSS selector to get a part of the page (null by default, returns whole page HTML)
|
34
|
-
outer_html: false, # Boolean | Return outer HTML of the selected element (false by default, returns inner HTML)
|
35
|
-
proxy: 'US', # String | Proxy country code, for geotargeting (US by default)
|
36
|
-
disable_js: false, # Boolean | Disable JS execution (false by default)
|
37
|
-
inline_css: false # Boolean | Inline included CSS files to make page viewable on other domains (false by default)
|
38
|
-
}
|
39
|
-
|
40
|
-
begin
|
41
|
-
#Get page HTML by URL (renders JS in Chrome and uses rotating proxies)
|
42
|
-
result = api_instance.get_page(url, opts)
|
43
|
-
p result
|
44
|
-
rescue WebScrapingAI::ApiError => e
|
45
|
-
puts "Exception when calling HtmlApi->get_page: #{e}"
|
46
|
-
end
|
47
|
-
```
|
48
|
-
|
49
|
-
### Parameters
|
50
|
-
|
51
|
-
|
52
|
-
Name | Type | Description | Notes
|
53
|
-
------------- | ------------- | ------------- | -------------
|
54
|
-
**url** | **String**| URL of the page to get |
|
55
|
-
**selector** | **String**| CSS selector to get a part of the page (null by default, returns whole page HTML) | [optional]
|
56
|
-
**outer_html** | **Boolean**| Return outer HTML of the selected element (false by default, returns inner HTML) | [optional]
|
57
|
-
**proxy** | **String**| Proxy country code, for geotargeting (US by default) | [optional]
|
58
|
-
**disable_js** | **Boolean**| Disable JS execution (false by default) | [optional]
|
59
|
-
**inline_css** | **Boolean**| Inline included CSS files to make page viewable on other domains (false by default) | [optional]
|
60
|
-
|
61
|
-
### Return type
|
62
|
-
|
63
|
-
[**ScrappedPage**](ScrappedPage.md)
|
64
|
-
|
65
|
-
### Authorization
|
66
|
-
|
67
|
-
[api_key](../README.md#api_key)
|
68
|
-
|
69
|
-
### HTTP request headers
|
70
|
-
|
71
|
-
- **Content-Type**: Not defined
|
72
|
-
- **Accept**: application/json
|
73
|
-
|
data/docs/ScrappedPage.md
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
# WebScrapingAI::ScrappedPage
|
2
|
-
|
3
|
-
## Properties
|
4
|
-
|
5
|
-
Name | Type | Description | Notes
|
6
|
-
------------ | ------------- | ------------- | -------------
|
7
|
-
**size_bytes** | **Integer** | Page HTML content size in bytes | [optional]
|
8
|
-
**html** | **String** | HTML of the full page or a selected area | [optional]
|
9
|
-
**status** | **Integer** | Response HTTP status code (200, 404, 302, etc) | [optional]
|
10
|
-
**status_message** | **String** | Response HTTP status message | [optional]
|
11
|
-
|
12
|
-
## Code Sample
|
13
|
-
|
14
|
-
```ruby
|
15
|
-
require 'WebScrapingAI'
|
16
|
-
|
17
|
-
instance = WebScrapingAI::ScrappedPage.new(size_bytes: null,
|
18
|
-
html: null,
|
19
|
-
status: null,
|
20
|
-
status_message: null)
|
21
|
-
```
|
22
|
-
|
23
|
-
|
@@ -1,59 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
#WebScraping.AI
|
3
|
-
|
4
|
-
#This is a sample server Petstore server. For this sample, you can use the api key `special-key` to test the authorization filters.
|
5
|
-
|
6
|
-
The version of the OpenAPI document: 1.0.0
|
7
|
-
|
8
|
-
Generated by: https://openapi-generator.tech
|
9
|
-
OpenAPI Generator version: 4.2.3
|
10
|
-
|
11
|
-
=end
|
12
|
-
|
13
|
-
require 'spec_helper'
|
14
|
-
require 'json'
|
15
|
-
require 'date'
|
16
|
-
|
17
|
-
# Unit tests for WebScrapingAI::ScrappedPage
|
18
|
-
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
19
|
-
# Please update as you see appropriate
|
20
|
-
describe 'ScrappedPage' do
|
21
|
-
before do
|
22
|
-
# run before each test
|
23
|
-
@instance = WebScrapingAI::ScrappedPage.new
|
24
|
-
end
|
25
|
-
|
26
|
-
after do
|
27
|
-
# run after each test
|
28
|
-
end
|
29
|
-
|
30
|
-
describe 'test an instance of ScrappedPage' do
|
31
|
-
it 'should create an instance of ScrappedPage' do
|
32
|
-
expect(@instance).to be_instance_of(WebScrapingAI::ScrappedPage)
|
33
|
-
end
|
34
|
-
end
|
35
|
-
describe 'test attribute "size_bytes"' do
|
36
|
-
it 'should work' do
|
37
|
-
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
describe 'test attribute "html"' do
|
42
|
-
it 'should work' do
|
43
|
-
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
describe 'test attribute "status"' do
|
48
|
-
it 'should work' do
|
49
|
-
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
describe 'test attribute "status_message"' do
|
54
|
-
it 'should work' do
|
55
|
-
# assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
end
|