proxycrawl 0.3.1 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +7 -1
- data/lib/proxycrawl/api.rb +10 -2
- data/lib/proxycrawl/leads_api.rb +11 -2
- data/lib/proxycrawl/storage_api.rb +12 -2
- data/lib/proxycrawl/version.rb +1 -1
- data/proxycrawl.gemspec +3 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c2e422acaf7de6fef3bc43e6241265852a9d699dcf8a114af5aab12b84def901
|
4
|
+
data.tar.gz: a78a2265f2dde7c80918b36167499667d6c85871bb33c2c63e59d727cf9db47e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b0d9ec78313bdccc5083432f53a1c384280ac2f7725d9309fef20cb488e0e5bc7f625ad03865a34735cd7724d562a45cf91c1abd86e11f7dc610c36aabb354f
|
7
|
+
data.tar.gz: 95a92e684149d66a072f47c98d41f4de23cd538d0a85685ec17ff979e3c773e6ca8061191d3455411f6a6dddefe272b6c291cc09e117da2e2d367fca60311fec
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# DEPRECATION NOTICE
|
2
|
+
|
3
|
+
> :warning: **IMPORTANT:** This gem is no longer maintained or supported. For the latest updates, please use our new gem at [crawlbase-ruby](https://github.com/crawlbase-source/crawlbase-ruby).
|
4
|
+
|
5
|
+
---
|
6
|
+
|
1
7
|
# ProxyCrawl
|
2
8
|
|
3
9
|
Dependency free gem for scraping and crawling websites using the ProxyCrawl API.
|
@@ -361,4 +367,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
|
|
361
367
|
|
362
368
|
---
|
363
369
|
|
364
|
-
Copyright
|
370
|
+
Copyright 2023 ProxyCrawl
|
data/lib/proxycrawl/api.rb
CHANGED
@@ -6,7 +6,7 @@ require 'uri'
|
|
6
6
|
|
7
7
|
module ProxyCrawl
|
8
8
|
class API
|
9
|
-
attr_reader :token, :body, :status_code, :original_status, :pc_status, :url, :storage_url
|
9
|
+
attr_reader :token, :body, :timeout, :status_code, :original_status, :pc_status, :url, :storage_url
|
10
10
|
|
11
11
|
INVALID_TOKEN = 'Token is required'
|
12
12
|
INVALID_URL = 'URL is required'
|
@@ -15,14 +15,22 @@ module ProxyCrawl
|
|
15
15
|
raise INVALID_TOKEN if options[:token].nil?
|
16
16
|
|
17
17
|
@token = options[:token]
|
18
|
+
@timeout = options[:timeout] || 120
|
18
19
|
end
|
19
20
|
|
20
21
|
def get(url, options = {})
|
21
22
|
raise INVALID_URL if url.empty?
|
22
23
|
|
23
24
|
uri = prepare_uri(url, options)
|
25
|
+
req = Net::HTTP::Get.new(uri)
|
24
26
|
|
25
|
-
|
27
|
+
req_options = {
|
28
|
+
read_timeout: timeout,
|
29
|
+
use_ssl: uri.scheme == 'https',
|
30
|
+
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
31
|
+
}
|
32
|
+
|
33
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
|
26
34
|
|
27
35
|
prepare_response(response, options[:format])
|
28
36
|
|
data/lib/proxycrawl/leads_api.rb
CHANGED
@@ -6,7 +6,7 @@ require 'uri'
|
|
6
6
|
|
7
7
|
module ProxyCrawl
|
8
8
|
class LeadsAPI
|
9
|
-
attr_reader :token, :body, :status_code, :success, :remaining_requests
|
9
|
+
attr_reader :token, :timeout, :body, :status_code, :success, :remaining_requests
|
10
10
|
|
11
11
|
INVALID_TOKEN = 'Token is required'
|
12
12
|
INVALID_DOMAIN = 'Domain is required'
|
@@ -15,6 +15,7 @@ module ProxyCrawl
|
|
15
15
|
raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
|
16
16
|
|
17
17
|
@token = options[:token]
|
18
|
+
@timeout = options[:timeout] || 120
|
18
19
|
end
|
19
20
|
|
20
21
|
def get(domain)
|
@@ -23,7 +24,15 @@ module ProxyCrawl
|
|
23
24
|
uri = URI('https://api.proxycrawl.com/leads')
|
24
25
|
uri.query = URI.encode_www_form({ token: token, domain: domain })
|
25
26
|
|
26
|
-
|
27
|
+
req = Net::HTTP::Get.new(uri)
|
28
|
+
|
29
|
+
req_options = {
|
30
|
+
read_timeout: timeout,
|
31
|
+
use_ssl: uri.scheme == 'https',
|
32
|
+
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
33
|
+
}
|
34
|
+
|
35
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
|
27
36
|
@status_code = response.code.to_i
|
28
37
|
@body = response.body
|
29
38
|
|
@@ -6,7 +6,7 @@ require 'uri'
|
|
6
6
|
|
7
7
|
module ProxyCrawl
|
8
8
|
class StorageAPI
|
9
|
-
attr_reader :token, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
|
9
|
+
attr_reader :token, :timeout, :original_status, :pc_status, :url, :status_code, :rid, :body, :stored_at
|
10
10
|
|
11
11
|
INVALID_TOKEN = 'Token is required'
|
12
12
|
INVALID_RID = 'RID is required'
|
@@ -18,6 +18,7 @@ module ProxyCrawl
|
|
18
18
|
raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?
|
19
19
|
|
20
20
|
@token = options[:token]
|
21
|
+
@timeout = options[:timeout] || 120
|
21
22
|
end
|
22
23
|
|
23
24
|
def get(url_or_rid, format = 'html')
|
@@ -25,7 +26,16 @@ module ProxyCrawl
|
|
25
26
|
|
26
27
|
uri = URI(BASE_URL)
|
27
28
|
uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
|
28
|
-
|
29
|
+
|
30
|
+
req = Net::HTTP::Get.new(uri)
|
31
|
+
|
32
|
+
req_options = {
|
33
|
+
read_timeout: timeout,
|
34
|
+
use_ssl: uri.scheme == 'https',
|
35
|
+
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
36
|
+
}
|
37
|
+
|
38
|
+
response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }
|
29
39
|
|
30
40
|
res = format == 'json' ? JSON.parse(response.body) : response
|
31
41
|
|
data/lib/proxycrawl/version.rb
CHANGED
data/proxycrawl.gemspec
CHANGED
@@ -28,4 +28,7 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_development_dependency "webmock", "~> 3.4"
|
29
29
|
spec.add_development_dependency "bundler", "~> 2.0"
|
30
30
|
spec.add_development_dependency "rake", "~> 12.3.3"
|
31
|
+
|
32
|
+
# Deprecation warning
|
33
|
+
spec.post_install_message = "DEPRECATION WARNING: This package is no longer maintained due to rebranding. Please use the 'crawlbase' gem instead. More info: https://github.com/crawlbase-source/crawlbase-ruby"
|
31
34
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxycrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- proxycrawl
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -94,7 +94,8 @@ homepage: https://github.com/proxycrawl/proxycrawl-ruby
|
|
94
94
|
licenses:
|
95
95
|
- MIT
|
96
96
|
metadata: {}
|
97
|
-
post_install_message:
|
97
|
+
post_install_message: 'DEPRECATION WARNING: This package is no longer maintained due
|
98
|
+
to rebranding. Please use the ''crawlbase'' gem instead. More info: https://github.com/crawlbase-source/crawlbase-ruby'
|
98
99
|
rdoc_options: []
|
99
100
|
require_paths:
|
100
101
|
- lib
|
@@ -109,7 +110,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
110
|
- !ruby/object:Gem::Version
|
110
111
|
version: '0'
|
111
112
|
requirements: []
|
112
|
-
rubygems_version: 3.1.
|
113
|
+
rubygems_version: 3.1.2
|
113
114
|
signing_key:
|
114
115
|
specification_version: 4
|
115
116
|
summary: ProxyCrawl API client for web scraping and crawling
|