webscraping_ai 3.2.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/LICENSE +21 -0
- data/README.md +110 -85
- data/lib/webscraping_ai/client.rb +130 -0
- data/lib/webscraping_ai/configuration.rb +10 -300
- data/lib/webscraping_ai/errors.rb +44 -0
- data/lib/webscraping_ai/query_encoder.rb +74 -0
- data/lib/webscraping_ai/version.rb +1 -13
- data/lib/webscraping_ai.rb +15 -40
- data/webscraping_ai.gemspec +33 -36
- metadata +27 -74
- data/Gemfile +0 -9
- data/Rakefile +0 -10
- data/docs/AIApi.md +0 -209
- data/docs/Account.md +0 -24
- data/docs/AccountApi.md +0 -76
- data/docs/Error.md +0 -24
- data/docs/HTMLApi.md +0 -109
- data/docs/SelectedHTMLApi.md +0 -209
- data/docs/TextApi.md +0 -109
- data/git_push.sh +0 -57
- data/lib/webscraping_ai/api/account_api.rb +0 -79
- data/lib/webscraping_ai/api/ai_api.rb +0 -295
- data/lib/webscraping_ai/api/html_api.rb +0 -160
- data/lib/webscraping_ai/api/selected_html_api.rb +0 -291
- data/lib/webscraping_ai/api/text_api.rb +0 -160
- data/lib/webscraping_ai/api_client.rb +0 -397
- data/lib/webscraping_ai/api_error.rb +0 -58
- data/lib/webscraping_ai/api_model_base.rb +0 -88
- data/lib/webscraping_ai/models/account.rb +0 -178
- data/lib/webscraping_ai/models/error.rb +0 -178
- data/spec/api/account_api_spec.rb +0 -46
- data/spec/api/ai_api_spec.rb +0 -86
- data/spec/api/html_api_spec.rb +0 -61
- data/spec/api/selected_html_api_spec.rb +0 -86
- data/spec/api/text_api_spec.rb +0 -61
- data/spec/models/account_spec.rb +0 -54
- data/spec/models/error_spec.rb +0 -54
- data/spec/spec_helper.rb +0 -111
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9118cbe0e21d02653f6d56c66be8c05a21735241529782164f1e5352236d8567
|
|
4
|
+
data.tar.gz: 3e6d144a7dac8202e8e1405a612c1cacfda06301d17cb6c815ba35e6333237f7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: af8e1bab5b5887224e6a0bae94ce178e2cc1a7405310fa60f4d11de621a6fcc1dbe28034b11247a29ff38cc33a63b7a3fc12908e88d54efe4500fe3cf617f801
|
|
7
|
+
data.tar.gz: 1e90885ed4a79632c94102863325eabfe5968cbe1901dec0f6a568526a416a3cf22550f8b8534927008c8aeef6e55c33372cdba6b50c62f74832cd74213879bd
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file. This project follows [Semantic Versioning](https://semver.org/).
|
|
4
|
+
|
|
5
|
+
## [4.0.0] - Unreleased
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- **Complete rewrite**: the gem is now a hand-written, idiomatic Ruby client rather than OpenAPI-generated code.
|
|
10
|
+
- New unified entry point: `WebScrapingAI::Client.new(api_key: ...)` with one method per endpoint (`#html`, `#text`, `#selected`, `#selected_multiple`, `#question`, `#fields`, `#account`).
|
|
11
|
+
- Switched HTTP layer from `typhoeus` to `faraday ~> 2.0`.
|
|
12
|
+
- Minimum Ruby version is now `3.1`.
|
|
13
|
+
|
|
14
|
+
### Removed
|
|
15
|
+
|
|
16
|
+
- `WebScrapingAI::HTMLApi`, `WebScrapingAI::TextApi`, `WebScrapingAI::SelectedHTMLApi`, `WebScrapingAI::AIApi`, `WebScrapingAI::AccountApi` classes and all generated model classes. This is a hard break — see the README for the new API surface.
|
|
17
|
+
- `typhoeus` runtime dependency.
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
|
|
21
|
+
- Typed error hierarchy: `BadRequestError`, `PaymentRequiredError`, `AuthenticationError`, `RateLimitError`, `ServerError`, `GatewayTimeoutError` (all `< WebScrapingAI::ApiError`), plus `TimeoutError` and `ConnectionError` for transport failures.
|
|
22
|
+
- Module-level configuration: `WebScrapingAI.configure { |c| c.api_key = "..." }`.
|
|
23
|
+
- `WEBSCRAPING_AI_API_KEY` environment variable picked up by default.
|
|
24
|
+
- RSpec test suite with WebMock-based stubs.
|
|
25
|
+
- GitHub Actions workflows for CI and RubyGems trusted publishing on release.
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) WebScraping.AI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
CHANGED
|
@@ -1,128 +1,153 @@
|
|
|
1
|
-
#
|
|
1
|
+
# WebScraping.AI Ruby Client
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Official Ruby client for the [WebScraping.AI](https://webscraping.ai) API. Provides LLM-powered web scraping with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
This SDK is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project:
|
|
8
|
-
|
|
9
|
-
- API version: 3.2.1
|
|
10
|
-
- Package version: 3.2.1
|
|
11
|
-
- Generator version: 7.22.0
|
|
12
|
-
- Build package: org.openapitools.codegen.languages.RubyClientCodegen
|
|
13
|
-
For more information, please visit [https://webscraping.ai](https://webscraping.ai)
|
|
5
|
+
[](https://rubygems.org/gems/webscraping_ai)
|
|
14
6
|
|
|
15
7
|
## Installation
|
|
16
8
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
```shell
|
|
22
|
-
gem build webscraping_ai.gemspec
|
|
9
|
+
```ruby
|
|
10
|
+
# Gemfile
|
|
11
|
+
gem "webscraping_ai", "~> 4.0"
|
|
23
12
|
```
|
|
24
13
|
|
|
25
|
-
|
|
14
|
+
Or:
|
|
26
15
|
|
|
27
|
-
```
|
|
28
|
-
gem install
|
|
16
|
+
```bash
|
|
17
|
+
gem install webscraping_ai
|
|
29
18
|
```
|
|
30
19
|
|
|
31
|
-
|
|
20
|
+
Requires Ruby 3.1+.
|
|
32
21
|
|
|
33
|
-
|
|
22
|
+
## Quick start
|
|
34
23
|
|
|
35
|
-
|
|
24
|
+
```ruby
|
|
25
|
+
require "webscraping_ai"
|
|
26
|
+
|
|
27
|
+
client = WebScrapingAI::Client.new(api_key: ENV.fetch("WEBSCRAPING_AI_API_KEY"))
|
|
36
28
|
|
|
37
|
-
|
|
29
|
+
# Page HTML
|
|
30
|
+
html = client.html("https://example.com", js: true)
|
|
38
31
|
|
|
39
|
-
|
|
32
|
+
# Visible text
|
|
33
|
+
text = client.text("https://example.com")
|
|
40
34
|
|
|
41
|
-
|
|
35
|
+
# CSS-selected fragment
|
|
36
|
+
title = client.selected("https://example.com", selector: "h1")
|
|
42
37
|
|
|
43
|
-
|
|
38
|
+
# Multiple selectors at once
|
|
39
|
+
fragments = client.selected_multiple("https://example.com", selectors: ["h1", ".price"])
|
|
44
40
|
|
|
45
|
-
|
|
41
|
+
# Ask the LLM a question about the page
|
|
42
|
+
answer = client.question("https://example.com", question: "What is the main product?")
|
|
46
43
|
|
|
47
|
-
|
|
44
|
+
# Extract structured fields with the LLM
|
|
45
|
+
data = client.fields(
|
|
46
|
+
"https://example.com",
|
|
47
|
+
fields: {
|
|
48
|
+
title: "Main product title",
|
|
49
|
+
price: "Current product price",
|
|
50
|
+
description: "Full product description"
|
|
51
|
+
}
|
|
52
|
+
)
|
|
48
53
|
|
|
49
|
-
|
|
50
|
-
|
|
54
|
+
# Check your account quota
|
|
55
|
+
info = client.account
|
|
56
|
+
# => { "remaining_api_calls" => 200_000, "resets_at" => 1_617_073_667, "remaining_concurrency" => 100 }
|
|
51
57
|
```
|
|
52
58
|
|
|
53
|
-
##
|
|
59
|
+
## Configuration
|
|
54
60
|
|
|
55
|
-
|
|
61
|
+
Configure globally once, then create clients without arguments:
|
|
56
62
|
|
|
57
63
|
```ruby
|
|
58
|
-
# Load the gem
|
|
59
|
-
require 'webscraping_ai'
|
|
60
|
-
|
|
61
|
-
# Setup authorization
|
|
62
64
|
WebScrapingAI.configure do |config|
|
|
63
|
-
|
|
64
|
-
config.
|
|
65
|
-
|
|
66
|
-
# config.api_key_prefix['api_key'] = 'Bearer'
|
|
65
|
+
config.api_key = ENV.fetch("WEBSCRAPING_AI_API_KEY")
|
|
66
|
+
config.timeout = 60 # seconds, total request timeout
|
|
67
|
+
config.open_timeout = 10 # seconds, connection timeout
|
|
67
68
|
end
|
|
68
69
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
headers: { key: { key: 'inner_example'}}, # Hash<String, String> | HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
|
74
|
-
timeout: 10000, # Integer | Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
|
75
|
-
js: true, # Boolean | Execute on-page JavaScript using a headless browser (true by default).
|
|
76
|
-
js_timeout: 2000, # Integer | Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
|
77
|
-
wait_for: 'wait_for_example', # String | CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
|
78
|
-
proxy: 'datacenter', # String | Type of proxy. Use `residential` if your site restricts traffic from datacenters, or `stealth` for the most heavily protected sites with advanced anti-bot detection (`datacenter` by default). Residential and stealth proxy requests are more expensive than datacenter, see the pricing page for details.
|
|
79
|
-
country: 'us', # String | Country of the proxy to use (US by default).
|
|
80
|
-
custom_proxy: 'custom_proxy_example', # String | Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
|
81
|
-
device: 'desktop', # String | Type of device emulation.
|
|
82
|
-
error_on_404: false, # Boolean | Return error on 404 HTTP status on the target page (false by default).
|
|
83
|
-
error_on_redirect: false, # Boolean | Return error on redirect on the target page (false by default).
|
|
84
|
-
js_script: 'document.querySelector('button').click();' # String | Custom JavaScript code to execute on the target page.
|
|
85
|
-
}
|
|
70
|
+
client = WebScrapingAI::Client.new
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The gem also reads `WEBSCRAPING_AI_API_KEY` from the environment automatically.
|
|
86
74
|
|
|
87
|
-
|
|
88
|
-
#Extract structured data fields from a web page
|
|
89
|
-
result = api_instance.get_fields(url, fields, opts)
|
|
90
|
-
p result
|
|
91
|
-
rescue WebScrapingAI::ApiError => e
|
|
92
|
-
puts "Exception when calling AIApi->get_fields: #{e}"
|
|
93
|
-
end
|
|
75
|
+
Per-instance overrides:
|
|
94
76
|
|
|
77
|
+
```ruby
|
|
78
|
+
client = WebScrapingAI::Client.new(
|
|
79
|
+
api_key: "...",
|
|
80
|
+
timeout: 90,
|
|
81
|
+
base_url: "https://api.webscraping.ai"
|
|
82
|
+
)
|
|
95
83
|
```
|
|
96
84
|
|
|
97
|
-
##
|
|
85
|
+
## Endpoints and options
|
|
98
86
|
|
|
99
|
-
All
|
|
87
|
+
All page-fetching endpoints accept these common options (passed as keyword arguments):
|
|
100
88
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
89
|
+
| Option | Type | Default | Description |
|
|
90
|
+
| --- | --- | --- | --- |
|
|
91
|
+
| `headers` | `Hash` | — | HTTP headers to send to the target page (e.g. `{ "Cookie" => "session=..." }`) |
|
|
92
|
+
| `timeout` | `Integer` | `10000` | Page retrieval timeout in ms (1–30000) |
|
|
93
|
+
| `js` | `Boolean` | `true` | Execute on-page JavaScript via headless Chromium |
|
|
94
|
+
| `js_timeout` | `Integer` | `2000` | JS rendering timeout in ms (1–20000) |
|
|
95
|
+
| `wait_for` | `String` | — | CSS selector to wait for before returning (overrides `js_timeout`) |
|
|
96
|
+
| `proxy` | `String` | `"datacenter"` | One of `datacenter`, `residential`, `stealth` |
|
|
97
|
+
| `country` | `String` | `"us"` | Proxy country: `us`, `gb`, `de`, `it`, `fr`, `ca`, `es`, `ru`, `jp`, `kr`, `in`, `hk`, `tr` |
|
|
98
|
+
| `custom_proxy` | `String` | — | Your own proxy in `http://user:pass@host:port` form |
|
|
99
|
+
| `device` | `String` | `"desktop"` | One of `desktop`, `mobile`, `tablet` |
|
|
100
|
+
| `error_on_404` | `Boolean` | `false` | Raise an error if the target page returns 404 |
|
|
101
|
+
| `error_on_redirect` | `Boolean` | `false` | Raise an error if the target page redirects |
|
|
102
|
+
| `js_script` | `String` | — | Custom JS to execute on the page |
|
|
110
103
|
|
|
104
|
+
Endpoint-specific options:
|
|
111
105
|
|
|
112
|
-
|
|
106
|
+
- `#html` — `return_script_result` (`Boolean`), `format` (`"json"`/`"text"`)
|
|
107
|
+
- `#text` — `text_format` (`"plain"`/`"xml"`/`"json"`), `return_links` (`Boolean`, only with `text_format: "json"`)
|
|
108
|
+
- `#selected` — `selector` (`String`), `format` (`"json"`/`"text"`)
|
|
109
|
+
- `#selected_multiple` — `selectors` (`Array<String>` or single `String`)
|
|
110
|
+
- `#question` — `question` (`String`, required), `format` (`"json"`/`"text"`)
|
|
111
|
+
- `#fields` — `fields` (`Hash<String, String>`, required) — keys are field names, values are descriptions
|
|
113
112
|
|
|
114
|
-
|
|
115
|
-
- [WebScrapingAI::Error](docs/Error.md)
|
|
113
|
+
Returns: `String` for HTML/text responses, `Hash`/`Array` for JSON responses.
|
|
116
114
|
|
|
115
|
+
## Error handling
|
|
117
116
|
|
|
118
|
-
|
|
117
|
+
All API errors inherit from `WebScrapingAI::ApiError` and expose `#status`, `#message`, `#status_code`, `#status_message`, `#body`, and `#response_body`.
|
|
119
118
|
|
|
119
|
+
```ruby
|
|
120
|
+
begin
|
|
121
|
+
client.html("https://example.com")
|
|
122
|
+
rescue WebScrapingAI::RateLimitError => e
|
|
123
|
+
# 429 — too many concurrent requests
|
|
124
|
+
sleep 1 and retry
|
|
125
|
+
rescue WebScrapingAI::PaymentRequiredError => e
|
|
126
|
+
# 402 — out of API credits
|
|
127
|
+
rescue WebScrapingAI::AuthenticationError => e
|
|
128
|
+
# 403 — wrong API key
|
|
129
|
+
rescue WebScrapingAI::BadRequestError => e
|
|
130
|
+
# 400 — invalid parameters
|
|
131
|
+
rescue WebScrapingAI::ServerError => e
|
|
132
|
+
# 500 — target page returned a non-2xx code, or unexpected error.
|
|
133
|
+
# e.status_code / e.status_message expose the target page's response.
|
|
134
|
+
rescue WebScrapingAI::GatewayTimeoutError => e
|
|
135
|
+
# 504 — page took longer than `timeout` ms to load. Try a higher `timeout:`.
|
|
136
|
+
rescue WebScrapingAI::TimeoutError => e
|
|
137
|
+
# Client-side: the HTTP request exceeded `Client#timeout`.
|
|
138
|
+
rescue WebScrapingAI::ConnectionError => e
|
|
139
|
+
# Network failure before a response was received.
|
|
140
|
+
end
|
|
141
|
+
```
|
|
120
142
|
|
|
121
|
-
|
|
122
|
-
### api_key
|
|
143
|
+
## Development
|
|
123
144
|
|
|
145
|
+
```bash
|
|
146
|
+
bin/setup # bundle install
|
|
147
|
+
bundle exec rspec
|
|
148
|
+
bundle exec rubocop
|
|
149
|
+
```
|
|
124
150
|
|
|
125
|
-
|
|
126
|
-
- **API key parameter name**: api_key
|
|
127
|
-
- **Location**: URL query string
|
|
151
|
+
## License
|
|
128
152
|
|
|
153
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
require "faraday"
|
|
2
|
+
require "json"
|
|
3
|
+
|
|
4
|
+
module WebScrapingAI
|
|
5
|
+
class Client
|
|
6
|
+
PROXY_TYPES = %w[datacenter residential stealth].freeze
|
|
7
|
+
COUNTRIES = %w[us gb de it fr ca es ru jp kr in hk tr].freeze
|
|
8
|
+
DEVICES = %w[desktop mobile tablet].freeze
|
|
9
|
+
TEXT_FORMATS = %w[plain xml json].freeze
|
|
10
|
+
FORMATS = %w[json text].freeze
|
|
11
|
+
|
|
12
|
+
PAGE_FETCH_OPTIONS = %i[
|
|
13
|
+
headers timeout js js_timeout wait_for proxy country
|
|
14
|
+
custom_proxy device error_on_404 error_on_redirect js_script
|
|
15
|
+
].freeze
|
|
16
|
+
|
|
17
|
+
attr_reader :configuration
|
|
18
|
+
|
|
19
|
+
def initialize(api_key: nil, base_url: nil, timeout: nil, open_timeout: nil, adapter: nil, user_agent: nil)
|
|
20
|
+
global = WebScrapingAI.configuration
|
|
21
|
+
@configuration = Configuration.new.tap do |c|
|
|
22
|
+
c.api_key = api_key || global.api_key
|
|
23
|
+
c.base_url = base_url || global.base_url
|
|
24
|
+
c.timeout = timeout || global.timeout
|
|
25
|
+
c.open_timeout = open_timeout || global.open_timeout
|
|
26
|
+
c.adapter = adapter || global.adapter
|
|
27
|
+
c.user_agent = user_agent || global.user_agent
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
return unless @configuration.api_key.nil? || @configuration.api_key.to_s.empty?
|
|
31
|
+
|
|
32
|
+
raise ConfigurationError,
|
|
33
|
+
"api_key is required (pass api_key: or set WebScrapingAI.configure { |c| c.api_key = ... })"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# GET /ai/question — returns the LLM's answer about the page.
|
|
37
|
+
# Returns a String by default, or a Hash when format: "json".
|
|
38
|
+
def question(url, question:, **opts)
|
|
39
|
+
get("/ai/question", url: url, question: question, **opts.slice(*PAGE_FETCH_OPTIONS, :format))
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# GET /ai/fields — extracts the named fields from the page.
|
|
43
|
+
# `fields` is a Hash of { field_name => description }. Returns a Hash.
|
|
44
|
+
def fields(url, fields:, **opts)
|
|
45
|
+
get("/ai/fields", url: url, fields: fields, **opts.slice(*PAGE_FETCH_OPTIONS))
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# GET /html — returns the full page HTML as a String.
|
|
49
|
+
def html(url, **opts)
|
|
50
|
+
get("/html", url: url, **opts.slice(*PAGE_FETCH_OPTIONS, :return_script_result, :format))
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# GET /text — returns the visible text content of the page.
|
|
54
|
+
# Returns a String when text_format is "plain"/"xml" (default), or a Hash when text_format: "json".
|
|
55
|
+
def text(url, **opts)
|
|
56
|
+
get("/text", url: url, **opts.slice(*PAGE_FETCH_OPTIONS, :text_format, :return_links))
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# GET /selected — returns HTML of the element matching `selector` as a String.
|
|
60
|
+
def selected(url, selector: nil, **opts)
|
|
61
|
+
get("/selected", url: url, selector: selector, **opts.slice(*PAGE_FETCH_OPTIONS, :format))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# GET /selected-multiple — returns an Array of HTML strings, one per selector.
|
|
65
|
+
def selected_multiple(url, selectors:, **opts)
|
|
66
|
+
get("/selected-multiple", url: url, selectors: Array(selectors), **opts.slice(*PAGE_FETCH_OPTIONS))
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# GET /account — returns Hash with remaining_api_calls, resets_at, remaining_concurrency, email.
|
|
70
|
+
def account
|
|
71
|
+
get("/account")
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
private
|
|
75
|
+
|
|
76
|
+
def connection
|
|
77
|
+
@connection ||= Faraday.new(url: configuration.base_url) do |conn|
|
|
78
|
+
conn.options.timeout = configuration.timeout
|
|
79
|
+
conn.options.open_timeout = configuration.open_timeout
|
|
80
|
+
conn.options.params_encoder = QueryEncoder
|
|
81
|
+
conn.headers["User-Agent"] = configuration.user_agent
|
|
82
|
+
conn.headers["Accept"] = "application/json, text/html, text/xml, text/plain"
|
|
83
|
+
conn.adapter(configuration.adapter || Faraday.default_adapter)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def get(path, **params)
|
|
88
|
+
response = connection.get(path) do |req|
|
|
89
|
+
req.params = params.merge(api_key: configuration.api_key)
|
|
90
|
+
end
|
|
91
|
+
handle_response(response)
|
|
92
|
+
rescue Faraday::TimeoutError => e
|
|
93
|
+
raise TimeoutError, e.message
|
|
94
|
+
rescue Faraday::ConnectionFailed => e
|
|
95
|
+
raise ConnectionError, e.message
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def handle_response(response)
|
|
99
|
+
return parse_body(response) if response.status.between?(200, 299)
|
|
100
|
+
|
|
101
|
+
error_class = STATUS_TO_ERROR.fetch(response.status, ApiError)
|
|
102
|
+
data = safe_parse_json(response.body) || {}
|
|
103
|
+
raise error_class.new(
|
|
104
|
+
message: data["message"] || "HTTP #{response.status}",
|
|
105
|
+
status: response.status,
|
|
106
|
+
status_code: data["status_code"],
|
|
107
|
+
status_message: data["status_message"],
|
|
108
|
+
body: data["body"],
|
|
109
|
+
response_body: response.body
|
|
110
|
+
)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def parse_body(response)
|
|
114
|
+
content_type = response.headers["content-type"].to_s
|
|
115
|
+
if content_type.include?("application/json")
|
|
116
|
+
JSON.parse(response.body)
|
|
117
|
+
else
|
|
118
|
+
response.body
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def safe_parse_json(body)
|
|
123
|
+
return nil if body.nil? || body.empty?
|
|
124
|
+
|
|
125
|
+
JSON.parse(body)
|
|
126
|
+
rescue JSON::ParserError
|
|
127
|
+
nil
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|