proxycrawl 0.3.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/lib/proxycrawl/api.rb +10 -2
- data/lib/proxycrawl/leads_api.rb +11 -2
- data/lib/proxycrawl/storage_api.rb +12 -2
- data/lib/proxycrawl/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c225955f21200efc0790c401385727e3b9dd9bc20f785ce768f44f687f55105b
|
4
|
+
data.tar.gz: 0a38e44493065a9779366d4ab5372771bf92b020da513059c0cf20b69e2d696a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d81900546063ff3ddbbe660f528b8cbc04336916bce468f37c8791370b7dd99c36d32915dd471ff886bc6fb8fb498ddccc4adfb40c95009849e25a1f4938805
|
7
|
+
data.tar.gz: cd167890621d7d8bcbdf1c2502e545c9ea40c95abf437927e443dc1e3694d8393fae4d1c521e7b34227c4a962e172cb085697169d43f9ee1ff99369649e6c36f
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
data/lib/proxycrawl/api.rb
CHANGED
@@ -6,7 +6,7 @@ require 'uri'
|
|
6
6
|
|
7
7
|
module ProxyCrawl
|
8
8
|
class API
|
9
|
-
attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url
|
9
|
+
attr_reader :token, :body, :timeout, :status_code, :original_status, :pc_status, :url, :storage_url
|
10
10
|
|
11
11
|
INVALID_TOKEN = 'Token is required'
|
12
12
|
INVALID_URL = 'URL is required'
|
@@ -15,14 +15,22 @@ module ProxyCrawl
|
|
15
15
|
raise INVALID_TOKEN if options[:token].nil?
|
16
16
|
|
17
17
|
@token = options[:token]
|
18
|
+
@timeout = options[:timeout] || 120
|
18
19
|
end
|
19
20
|
|
20
21
|
def get(url, options = {})
|
21
22
|
raise INVALID_URL if url.empty?
|
22
23
|
|
23
24
|
uri = prepare_uri(url, options)
|
25
|
+
req = Net::HTTP::Get.new(uri)
|
24
26
|
|
25
|
-
|
27
|
+
req_options = {
|
28
|
+
read_timeout: timeout,
|
29
|
+
use_ssl: uri.scheme == 'https',
|
30
|
+
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
31
|
+
}
|
32
|
+
|
33
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
|
26
34
|
|
27
35
|
prepare_response(response, options[:format])
|
28
36
|
|
data/lib/proxycrawl/leads_api.rb
CHANGED
@@ -6,7 +6,7 @@ require 'uri'
|
|
6
6
|
|
7
7
|
module ProxyCrawl
|
8
8
|
class LeadsAPI
|
9
|
-
attr_reader :token, :body, :status_code, :success, :remaining_requests
|
9
|
+
attr_reader :token, :timeout, :body, :status_code, :success, :remaining_requests
|
10
10
|
|
11
11
|
INVALID_TOKEN = 'Token is required'
|
12
12
|
INVALID_DOMAIN = 'Domain is required'
|
@@ -15,6 +15,7 @@ module ProxyCrawl
|
|
15
15
|
raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
|
16
16
|
|
17
17
|
@token = options[:token]
|
18
|
+
@timeout = options[:timeout] || 120
|
18
19
|
end
|
19
20
|
|
20
21
|
def get(domain)
|
@@ -23,7 +24,15 @@ module ProxyCrawl
|
|
23
24
|
uri = URI('https://api.proxycrawl.com/leads')
|
24
25
|
uri.query = URI.encode_www_form({ token: token, domain: domain })
|
25
26
|
|
26
|
-
|
27
|
+
req = Net::HTTP::Get.new(uri)
|
28
|
+
|
29
|
+
req_options = {
|
30
|
+
read_timeout: timeout,
|
31
|
+
use_ssl: uri.scheme == 'https',
|
32
|
+
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
33
|
+
}
|
34
|
+
|
35
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
|
27
36
|
@status_code = response.code.to_i
|
28
37
|
@body = response.body
|
29
38
|
|
@@ -6,7 +6,7 @@ require 'uri'
|
|
6
6
|
|
7
7
|
module ProxyCrawl
|
8
8
|
class StorageAPI
|
9
|
-
attr_reader :token, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
|
9
|
+
attr_reader :token, :timeout, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
|
10
10
|
|
11
11
|
INVALID_TOKEN = 'Token is required'
|
12
12
|
INVALID_RID = 'RID is required'
|
@@ -18,6 +18,7 @@ module ProxyCrawl
|
|
18
18
|
raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
|
19
19
|
|
20
20
|
@token = options[:token]
|
21
|
+
@timeout = options[:timeout] || 120
|
21
22
|
end
|
22
23
|
|
23
24
|
def get(url_or_rid, format = 'html')
|
@@ -25,7 +26,16 @@ module ProxyCrawl
|
|
25
26
|
|
26
27
|
uri = URI(BASE_URL)
|
27
28
|
uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
|
28
|
-
|
29
|
+
|
30
|
+
req = Net::HTTP::Get.new(uri)
|
31
|
+
|
32
|
+
req_options = {
|
33
|
+
read_timeout: timeout,
|
34
|
+
use_ssl: uri.scheme == 'https',
|
35
|
+
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
36
|
+
}
|
37
|
+
|
38
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
|
29
39
|
|
30
40
|
res = format == 'json' ? JSON.parse(response.body) : response
|
31
41
|
|
data/lib/proxycrawl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxycrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- proxycrawl
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-08-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -109,7 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
requirements: []
|
112
|
-
rubygems_version: 3.1.
|
112
|
+
rubygems_version: 3.1.2
|
113
113
|
signing_key:
|
114
114
|
specification_version: 4
|
115
115
|
summary: ProxyCrawl API client for web scraping and crawling
|