webscraping_ai 3.2.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/LICENSE +21 -0
- data/README.md +110 -85
- data/lib/webscraping_ai/client.rb +130 -0
- data/lib/webscraping_ai/configuration.rb +10 -300
- data/lib/webscraping_ai/errors.rb +44 -0
- data/lib/webscraping_ai/query_encoder.rb +74 -0
- data/lib/webscraping_ai/version.rb +1 -13
- data/lib/webscraping_ai.rb +15 -40
- data/webscraping_ai.gemspec +33 -36
- metadata +27 -74
- data/Gemfile +0 -9
- data/Rakefile +0 -10
- data/docs/AIApi.md +0 -209
- data/docs/Account.md +0 -24
- data/docs/AccountApi.md +0 -76
- data/docs/Error.md +0 -24
- data/docs/HTMLApi.md +0 -109
- data/docs/SelectedHTMLApi.md +0 -209
- data/docs/TextApi.md +0 -109
- data/git_push.sh +0 -57
- data/lib/webscraping_ai/api/account_api.rb +0 -79
- data/lib/webscraping_ai/api/ai_api.rb +0 -295
- data/lib/webscraping_ai/api/html_api.rb +0 -160
- data/lib/webscraping_ai/api/selected_html_api.rb +0 -291
- data/lib/webscraping_ai/api/text_api.rb +0 -160
- data/lib/webscraping_ai/api_client.rb +0 -397
- data/lib/webscraping_ai/api_error.rb +0 -58
- data/lib/webscraping_ai/api_model_base.rb +0 -88
- data/lib/webscraping_ai/models/account.rb +0 -178
- data/lib/webscraping_ai/models/error.rb +0 -178
- data/spec/api/account_api_spec.rb +0 -46
- data/spec/api/ai_api_spec.rb +0 -86
- data/spec/api/html_api_spec.rb +0 -61
- data/spec/api/selected_html_api_spec.rb +0 -86
- data/spec/api/text_api_spec.rb +0 -61
- data/spec/models/account_spec.rb +0 -54
- data/spec/models/error_spec.rb +0 -54
- data/spec/spec_helper.rb +0 -111
data/spec/api/text_api_spec.rb
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.1
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.22.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
require 'spec_helper'
|
|
14
|
-
require 'json'
|
|
15
|
-
|
|
16
|
-
# Unit tests for WebScrapingAI::TextApi
|
|
17
|
-
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
|
18
|
-
# Please update as you see appropriate
|
|
19
|
-
describe 'TextApi' do
|
|
20
|
-
before do
|
|
21
|
-
# run before each test
|
|
22
|
-
@api_instance = WebScrapingAI::TextApi.new
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
after do
|
|
26
|
-
# run after each test
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
describe 'test an instance of TextApi' do
|
|
30
|
-
it 'should create an instance of TextApi' do
|
|
31
|
-
expect(@api_instance).to be_instance_of(WebScrapingAI::TextApi)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# unit tests for get_text
|
|
36
|
-
# Page text by URL
|
|
37
|
-
# Returns the visible text content of a webpage specified by the URL. Can be used to feed data to LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
|
|
38
|
-
# @param url URL of the target page.
|
|
39
|
-
# @param [Hash] opts the optional parameters
|
|
40
|
-
# @option opts [String] :text_format Format of the text response (plain by default). \"plain\" will return only the page body text. \"json\" and \"xml\" will return a json/xml with \"title\", \"description\" and \"content\" keys.
|
|
41
|
-
# @option opts [Boolean] :return_links [Works only with text_format=json] Return links from the page body text (false by default). Useful for building web crawlers.
|
|
42
|
-
# @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}).
|
|
43
|
-
# @option opts [Integer] :timeout Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000).
|
|
44
|
-
# @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default).
|
|
45
|
-
# @option opts [Integer] :js_timeout Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page.
|
|
46
|
-
# @option opts [String] :wait_for CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout.
|
|
47
|
-
# @option opts [String] :proxy Type of proxy. Use `residential` if your site restricts traffic from datacenters, or `stealth` for the most heavily protected sites with advanced anti-bot detection (`datacenter` by default). Residential and stealth proxy requests are more expensive than datacenter, see the pricing page for details.
|
|
48
|
-
# @option opts [String] :country Country of the proxy to use (US by default).
|
|
49
|
-
# @option opts [String] :custom_proxy Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example).
|
|
50
|
-
# @option opts [String] :device Type of device emulation.
|
|
51
|
-
# @option opts [Boolean] :error_on_404 Return error on 404 HTTP status on the target page (false by default).
|
|
52
|
-
# @option opts [Boolean] :error_on_redirect Return error on redirect on the target page (false by default).
|
|
53
|
-
# @option opts [String] :js_script Custom JavaScript code to execute on the target page.
|
|
54
|
-
# @return [String]
|
|
55
|
-
describe 'get_text test' do
|
|
56
|
-
it 'should work' do
|
|
57
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
end
|
data/spec/models/account_spec.rb
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.1
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.22.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
require 'spec_helper'
|
|
14
|
-
require 'json'
|
|
15
|
-
require 'date'
|
|
16
|
-
|
|
17
|
-
# Unit tests for WebScrapingAI::Account
|
|
18
|
-
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
|
19
|
-
# Please update as you see appropriate
|
|
20
|
-
describe WebScrapingAI::Account do
|
|
21
|
-
#let(:instance) { WebScrapingAI::Account.new }
|
|
22
|
-
|
|
23
|
-
describe 'test an instance of Account' do
|
|
24
|
-
it 'should create an instance of Account' do
|
|
25
|
-
# uncomment below to test the instance creation
|
|
26
|
-
#expect(instance).to be_instance_of(WebScrapingAI::Account)
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
describe 'test attribute "email"' do
|
|
31
|
-
it 'should work' do
|
|
32
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
describe 'test attribute "remaining_api_calls"' do
|
|
37
|
-
it 'should work' do
|
|
38
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
describe 'test attribute "resets_at"' do
|
|
43
|
-
it 'should work' do
|
|
44
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
describe 'test attribute "remaining_concurrency"' do
|
|
49
|
-
it 'should work' do
|
|
50
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
end
|
data/spec/models/error_spec.rb
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.1
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.22.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
require 'spec_helper'
|
|
14
|
-
require 'json'
|
|
15
|
-
require 'date'
|
|
16
|
-
|
|
17
|
-
# Unit tests for WebScrapingAI::Error
|
|
18
|
-
# Automatically generated by openapi-generator (https://openapi-generator.tech)
|
|
19
|
-
# Please update as you see appropriate
|
|
20
|
-
describe WebScrapingAI::Error do
|
|
21
|
-
#let(:instance) { WebScrapingAI::Error.new }
|
|
22
|
-
|
|
23
|
-
describe 'test an instance of Error' do
|
|
24
|
-
it 'should create an instance of Error' do
|
|
25
|
-
# uncomment below to test the instance creation
|
|
26
|
-
#expect(instance).to be_instance_of(WebScrapingAI::Error)
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
describe 'test attribute "message"' do
|
|
31
|
-
it 'should work' do
|
|
32
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
describe 'test attribute "status_code"' do
|
|
37
|
-
it 'should work' do
|
|
38
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
describe 'test attribute "status_message"' do
|
|
43
|
-
it 'should work' do
|
|
44
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
describe 'test attribute "body"' do
|
|
49
|
-
it 'should work' do
|
|
50
|
-
# assertion here. ref: https://rspec.info/features/3-12/rspec-expectations/built-in-matchers/
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
end
|
data/spec/spec_helper.rb
DELETED
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
=begin
|
|
2
|
-
#WebScraping.AI
|
|
3
|
-
|
|
4
|
-
#WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
|
|
5
|
-
|
|
6
|
-
The version of the OpenAPI document: 3.2.1
|
|
7
|
-
Contact: support@webscraping.ai
|
|
8
|
-
Generated by: https://openapi-generator.tech
|
|
9
|
-
Generator version: 7.22.0
|
|
10
|
-
|
|
11
|
-
=end
|
|
12
|
-
|
|
13
|
-
# load the gem
|
|
14
|
-
require 'webscraping_ai'
|
|
15
|
-
|
|
16
|
-
# The following was generated by the `rspec --init` command. Conventionally, all
|
|
17
|
-
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
|
18
|
-
# The generated `.rspec` file contains `--require spec_helper` which will cause
|
|
19
|
-
# this file to always be loaded, without a need to explicitly require it in any
|
|
20
|
-
# files.
|
|
21
|
-
#
|
|
22
|
-
# Given that it is always loaded, you are encouraged to keep this file as
|
|
23
|
-
# light-weight as possible. Requiring heavyweight dependencies from this file
|
|
24
|
-
# will add to the boot time of your test suite on EVERY test run, even for an
|
|
25
|
-
# individual file that may not need all of that loaded. Instead, consider making
|
|
26
|
-
# a separate helper file that requires the additional dependencies and performs
|
|
27
|
-
# the additional setup, and require it from the spec files that actually need
|
|
28
|
-
# it.
|
|
29
|
-
#
|
|
30
|
-
# The `.rspec` file also contains a few flags that are not defaults but that
|
|
31
|
-
# users commonly want.
|
|
32
|
-
#
|
|
33
|
-
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
|
34
|
-
RSpec.configure do |config|
|
|
35
|
-
# rspec-expectations config goes here. You can use an alternate
|
|
36
|
-
# assertion/expectation library such as wrong or the stdlib/minitest
|
|
37
|
-
# assertions if you prefer.
|
|
38
|
-
config.expect_with :rspec do |expectations|
|
|
39
|
-
# This option will default to `true` in RSpec 4. It makes the `description`
|
|
40
|
-
# and `failure_message` of custom matchers include text for helper methods
|
|
41
|
-
# defined using `chain`, e.g.:
|
|
42
|
-
# be_bigger_than(2).and_smaller_than(4).description
|
|
43
|
-
# # => "be bigger than 2 and smaller than 4"
|
|
44
|
-
# ...rather than:
|
|
45
|
-
# # => "be bigger than 2"
|
|
46
|
-
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# rspec-mocks config goes here. You can use an alternate test double
|
|
50
|
-
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
|
51
|
-
config.mock_with :rspec do |mocks|
|
|
52
|
-
# Prevents you from mocking or stubbing a method that does not exist on
|
|
53
|
-
# a real object. This is generally recommended, and will default to
|
|
54
|
-
# `true` in RSpec 4.
|
|
55
|
-
mocks.verify_partial_doubles = true
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# The settings below are suggested to provide a good initial experience
|
|
59
|
-
# with RSpec, but feel free to customize to your heart's content.
|
|
60
|
-
=begin
|
|
61
|
-
# These two settings work together to allow you to limit a spec run
|
|
62
|
-
# to individual examples or groups you care about by tagging them with
|
|
63
|
-
# `:focus` metadata. When nothing is tagged with `:focus`, all examples
|
|
64
|
-
# get run.
|
|
65
|
-
config.filter_run :focus
|
|
66
|
-
config.run_all_when_everything_filtered = true
|
|
67
|
-
|
|
68
|
-
# Allows RSpec to persist some state between runs in order to support
|
|
69
|
-
# the `--only-failures` and `--next-failure` CLI options. We recommend
|
|
70
|
-
# you configure your source control system to ignore this file.
|
|
71
|
-
config.example_status_persistence_file_path = "spec/examples.txt"
|
|
72
|
-
|
|
73
|
-
# Limits the available syntax to the non-monkey patched syntax that is
|
|
74
|
-
# recommended. For more details, see:
|
|
75
|
-
# - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
|
|
76
|
-
# - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
|
|
77
|
-
# - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
|
|
78
|
-
config.disable_monkey_patching!
|
|
79
|
-
|
|
80
|
-
# This setting enables warnings. It's recommended, but in some cases may
|
|
81
|
-
# be too noisy due to issues in dependencies.
|
|
82
|
-
config.warnings = true
|
|
83
|
-
|
|
84
|
-
# Many RSpec users commonly either run the entire suite or an individual
|
|
85
|
-
# file, and it's useful to allow more verbose output when running an
|
|
86
|
-
# individual spec file.
|
|
87
|
-
if config.files_to_run.one?
|
|
88
|
-
# Use the documentation formatter for detailed output,
|
|
89
|
-
# unless a formatter has already been configured
|
|
90
|
-
# (e.g. via a command-line flag).
|
|
91
|
-
config.default_formatter = 'doc'
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
# Print the 10 slowest examples and example groups at the
|
|
95
|
-
# end of the spec run, to help surface which specs are running
|
|
96
|
-
# particularly slow.
|
|
97
|
-
config.profile_examples = 10
|
|
98
|
-
|
|
99
|
-
# Run specs in random order to surface order dependencies. If you find an
|
|
100
|
-
# order dependency and want to debug it, you can fix the order by providing
|
|
101
|
-
# the seed, which is printed after each run.
|
|
102
|
-
# --seed 1234
|
|
103
|
-
config.order = :random
|
|
104
|
-
|
|
105
|
-
# Seed global randomization in this process using the `--seed` CLI option.
|
|
106
|
-
# Setting this allows you to use `--seed` to deterministically reproduce
|
|
107
|
-
# test failures related to randomization by passing the same `--seed` value
|
|
108
|
-
# as the one that triggered the failure.
|
|
109
|
-
Kernel.srand config.seed
|
|
110
|
-
=end
|
|
111
|
-
end
|