proxycrawl 0.3.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +7 -1
- data/lib/proxycrawl/api.rb +10 -2
- data/lib/proxycrawl/leads_api.rb +11 -2
- data/lib/proxycrawl/storage_api.rb +12 -2
- data/lib/proxycrawl/version.rb +1 -1
- data/proxycrawl.gemspec +3 -0
- metadata +5 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c2e422acaf7de6fef3bc43e6241265852a9d699dcf8a114af5aab12b84def901
|
|
4
|
+
data.tar.gz: a78a2265f2dde7c80918b36167499667d6c85871bb33c2c63e59d727cf9db47e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8b0d9ec78313bdccc5083432f53a1c384280ac2f7725d9309fef20cb488e0e5bc7f625ad03865a34735cd7724d562a45cf91c1abd86e11f7dc610c36aabb354f
|
|
7
|
+
data.tar.gz: 95a92e684149d66a072f47c98d41f4de23cd538d0a85685ec17ff979e3c773e6ca8061191d3455411f6a6dddefe272b6c291cc09e117da2e2d367fca60311fec
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
# DEPRECATION NOTICE
|
|
2
|
+
|
|
3
|
+
> :warning: **IMPORTANT:** This gem is no longer maintained or supported. For the latest updates, please use our new gem at [crawlbase-ruby](https://github.com/crawlbase-source/crawlbase-ruby).
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
1
7
|
# ProxyCrawl
|
|
2
8
|
|
|
3
9
|
Dependency free gem for scraping and crawling websites using the ProxyCrawl API.
|
|
@@ -361,4 +367,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
|
|
|
361
367
|
|
|
362
368
|
---
|
|
363
369
|
|
|
364
|
-
Copyright
|
|
370
|
+
Copyright 2023 ProxyCrawl
|
data/lib/proxycrawl/api.rb
CHANGED
|
@@ -6,7 +6,7 @@ require 'uri'
|
|
|
6
6
|
|
|
7
7
|
module ProxyCrawl
|
|
8
8
|
class API
|
|
9
|
-
attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url
|
|
9
|
+
attr_reader :token, :body, :timeout, :status_code, :original_status, :pc_status, :url, :storage_url
|
|
10
10
|
|
|
11
11
|
INVALID_TOKEN = 'Token is required'
|
|
12
12
|
INVALID_URL = 'URL is required'
|
|
@@ -15,14 +15,22 @@ module ProxyCrawl
|
|
|
15
15
|
raise INVALID_TOKEN if options[:token].nil?
|
|
16
16
|
|
|
17
17
|
@token = options[:token]
|
|
18
|
+
@timeout = options[:timeout] || 120
|
|
18
19
|
end
|
|
19
20
|
|
|
20
21
|
def get(url, options = {})
|
|
21
22
|
raise INVALID_URL if url.empty?
|
|
22
23
|
|
|
23
24
|
uri = prepare_uri(url, options)
|
|
25
|
+
req = Net::HTTP::Get.new(uri)
|
|
24
26
|
|
|
25
|
-
|
|
27
|
+
req_options = {
|
|
28
|
+
read_timeout: timeout,
|
|
29
|
+
use_ssl: uri.scheme == 'https',
|
|
30
|
+
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
|
|
26
34
|
|
|
27
35
|
prepare_response(response, options[:format])
|
|
28
36
|
|
data/lib/proxycrawl/leads_api.rb
CHANGED
|
@@ -6,7 +6,7 @@ require 'uri'
|
|
|
6
6
|
|
|
7
7
|
module ProxyCrawl
|
|
8
8
|
class LeadsAPI
|
|
9
|
-
attr_reader :token, :body, :status_code, :success, :remaining_requests
|
|
9
|
+
attr_reader :token, :timeout, :body, :status_code, :success, :remaining_requests
|
|
10
10
|
|
|
11
11
|
INVALID_TOKEN = 'Token is required'
|
|
12
12
|
INVALID_DOMAIN = 'Domain is required'
|
|
@@ -15,6 +15,7 @@ module ProxyCrawl
|
|
|
15
15
|
raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
|
|
16
16
|
|
|
17
17
|
@token = options[:token]
|
|
18
|
+
@timeout = options[:timeout] || 120
|
|
18
19
|
end
|
|
19
20
|
|
|
20
21
|
def get(domain)
|
|
@@ -23,7 +24,15 @@ module ProxyCrawl
|
|
|
23
24
|
uri = URI('https://api.proxycrawl.com/leads')
|
|
24
25
|
uri.query = URI.encode_www_form({ token: token, domain: domain })
|
|
25
26
|
|
|
26
|
-
|
|
27
|
+
req = Net::HTTP::Get.new(uri)
|
|
28
|
+
|
|
29
|
+
req_options = {
|
|
30
|
+
read_timeout: timeout,
|
|
31
|
+
use_ssl: uri.scheme == 'https',
|
|
32
|
+
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
|
|
27
36
|
@status_code = response.code.to_i
|
|
28
37
|
@body = response.body
|
|
29
38
|
|
|
@@ -6,7 +6,7 @@ require 'uri'
|
|
|
6
6
|
|
|
7
7
|
module ProxyCrawl
|
|
8
8
|
class StorageAPI
|
|
9
|
-
attr_reader :token, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
|
|
9
|
+
attr_reader :token, :timeout, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
|
|
10
10
|
|
|
11
11
|
INVALID_TOKEN = 'Token is required'
|
|
12
12
|
INVALID_RID = 'RID is required'
|
|
@@ -18,6 +18,7 @@ module ProxyCrawl
|
|
|
18
18
|
raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
|
|
19
19
|
|
|
20
20
|
@token = options[:token]
|
|
21
|
+
@timeout = options[:timeout] || 120
|
|
21
22
|
end
|
|
22
23
|
|
|
23
24
|
def get(url_or_rid, format = 'html')
|
|
@@ -25,7 +26,16 @@ module ProxyCrawl
|
|
|
25
26
|
|
|
26
27
|
uri = URI(BASE_URL)
|
|
27
28
|
uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
|
|
28
|
-
|
|
29
|
+
|
|
30
|
+
req = Net::HTTP::Get.new(uri)
|
|
31
|
+
|
|
32
|
+
req_options = {
|
|
33
|
+
read_timeout: timeout,
|
|
34
|
+
use_ssl: uri.scheme == 'https',
|
|
35
|
+
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
|
|
29
39
|
|
|
30
40
|
res = format == 'json' ? JSON.parse(response.body) : response
|
|
31
41
|
|
data/lib/proxycrawl/version.rb
CHANGED
data/proxycrawl.gemspec
CHANGED
|
@@ -28,4 +28,7 @@ Gem::Specification.new do |spec|
|
|
|
28
28
|
spec.add_development_dependency "webmock", "~> 3.4"
|
|
29
29
|
spec.add_development_dependency "bundler", "~> 2.0"
|
|
30
30
|
spec.add_development_dependency "rake", "~> 12.3.3"
|
|
31
|
+
|
|
32
|
+
# Deprecation warning
|
|
33
|
+
spec.post_install_message = "DEPRECATION WARNING: This package is no longer maintained due to rebranding. Please use the 'crawlbase' gem instead. More info: https://github.com/crawlbase-source/crawlbase-ruby"
|
|
31
34
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: proxycrawl
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- proxycrawl
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2023-07-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rspec
|
|
@@ -94,7 +94,8 @@ homepage: https://github.com/proxycrawl/proxycrawl-ruby
|
|
|
94
94
|
licenses:
|
|
95
95
|
- MIT
|
|
96
96
|
metadata: {}
|
|
97
|
-
post_install_message:
|
|
97
|
+
post_install_message: 'DEPRECATION WARNING: This package is no longer maintained due
|
|
98
|
+
to rebranding. Please use the ''crawlbase'' gem instead. More info: https://github.com/crawlbase-source/crawlbase-ruby'
|
|
98
99
|
rdoc_options: []
|
|
99
100
|
require_paths:
|
|
100
101
|
- lib
|
|
@@ -109,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
109
110
|
- !ruby/object:Gem::Version
|
|
110
111
|
version: '0'
|
|
111
112
|
requirements: []
|
|
112
|
-
rubygems_version: 3.1.
|
|
113
|
+
rubygems_version: 3.1.2
|
|
113
114
|
signing_key:
|
|
114
115
|
specification_version: 4
|
|
115
116
|
summary: ProxyCrawl API client for web scraping and crawling
|