proxycrawl 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/LICENSE.txt +1 -1
- data/README.md +56 -4
- data/lib/proxycrawl.rb +5 -1
- data/lib/proxycrawl/api.rb +8 -5
- data/lib/proxycrawl/leads_api.rb +34 -0
- data/lib/proxycrawl/scraper_api.rb +16 -0
- data/lib/proxycrawl/version.rb +3 -1
- data/proxycrawl.gemspec +2 -2
- metadata +9 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: db0f27951f09d662cc5ff949b088c79e4cdf2620aeea573fb25471568d73c811
|
4
|
+
data.tar.gz: 6dd316888c926279d847e1f2a58c813e2e08ddb72e970ca02cb8d5baedcb145c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 96acc3f7de05710c91492e507781648f0b9b32214338a8f727a16f47c2c1d832d1ff9e6f0c8e7733873b99795a18441a25fad0c2da044f6c478586369ab31704
|
7
|
+
data.tar.gz: 970aa1619a944fa799584286caded25e7c573199738d21944ec1b47d1251c2b1a828e328c711b05043ece5dc61ae97979ec6be0ee4b568a0e89a375dbda8daec
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# ProxyCrawl
|
2
2
|
|
3
3
|
Dependency free gem for scraping and crawling websites using the ProxyCrawl API.
|
4
4
|
|
@@ -18,14 +18,20 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
$ gem install proxycrawl
|
20
20
|
|
21
|
-
## Usage
|
21
|
+
## Crawling API Usage
|
22
|
+
|
23
|
+
Require the gem in your project
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
require 'proxycrawl'
|
27
|
+
```
|
22
28
|
|
23
29
|
Initialize the API with one of your account tokens, either normal or javascript token. Then make get or post requests accordingly.
|
24
30
|
|
25
|
-
You can get a token for free by creating a ProxyCrawl account and 1000 free testing requests. You can use them for tcp calls or javascript calls or both.
|
31
|
+
You can get a token for free by [creating a ProxyCrawl account](https://proxycrawl.com/signup) and 1000 free testing requests. You can use them for tcp calls or javascript calls or both.
|
26
32
|
|
27
33
|
```ruby
|
28
|
-
|
34
|
+
api = ProxyCrawl::API.new(token: 'YOUR_TOKEN')
|
29
35
|
```
|
30
36
|
|
31
37
|
### GET requests
|
@@ -124,6 +130,48 @@ puts response.original_status
|
|
124
130
|
puts response.pc_status
|
125
131
|
```
|
126
132
|
|
133
|
+
## Scraper API usage
|
134
|
+
|
135
|
+
Initialize the Scraper API using your normal token and call the `get` method.
|
136
|
+
|
137
|
+
```ruby
|
138
|
+
scraper_api = ProxyCrawl::ScraperAPI.new(token: 'YOUR_TOKEN')
|
139
|
+
```
|
140
|
+
|
141
|
+
Pass the url that you want to scrape plus any options from the ones available in the [Scraper API documentation](https://proxycrawl.com/docs/scraper-api/parameters).
|
142
|
+
|
143
|
+
```ruby
|
144
|
+
api.get(url, options)
|
145
|
+
```
|
146
|
+
|
147
|
+
Example:
|
148
|
+
|
149
|
+
```ruby
|
150
|
+
begin
|
151
|
+
response = scraper_api.get('https://www.amazon.com/Halo-SleepSack-Swaddle-Triangle-Neutral/dp/B01LAG1TOS')
|
152
|
+
puts response.status_code
|
153
|
+
puts response.body
|
154
|
+
rescue => exception
|
155
|
+
puts exception.backtrace
|
156
|
+
end
|
157
|
+
```
|
158
|
+
|
159
|
+
## Leads API usage
|
160
|
+
|
161
|
+
Initialize with your Leads API token and call the `get` method.
|
162
|
+
|
163
|
+
```ruby
|
164
|
+
leads_api = ProxyCrawl::LeadsAPI.new(token: 'YOUR_TOKEN')
|
165
|
+
|
166
|
+
begin
|
167
|
+
response = leads_api.get('stripe.com')
|
168
|
+
puts response.status_code
|
169
|
+
puts response.body
|
170
|
+
rescue => exception
|
171
|
+
puts exception.backtrace
|
172
|
+
end
|
173
|
+
```
|
174
|
+
|
127
175
|
If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
|
128
176
|
|
129
177
|
## Development
|
@@ -143,3 +191,7 @@ The gem is available as open source under the terms of the [MIT License](http://
|
|
143
191
|
## Code of Conduct
|
144
192
|
|
145
193
|
Everyone interacting in the Proxycrawl project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/proxycrawl/proxycrawl-ruby/blob/master/CODE_OF_CONDUCT.md).
|
194
|
+
|
195
|
+
---
|
196
|
+
|
197
|
+
Copyright 2020 ProxyCrawl
|
data/lib/proxycrawl.rb
CHANGED
data/lib/proxycrawl/api.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'net/http'
|
3
4
|
require 'json'
|
4
5
|
require 'uri'
|
@@ -7,8 +8,6 @@ module ProxyCrawl
|
|
7
8
|
class API
|
8
9
|
attr_reader :token, :body, :status_code, :original_status, :pc_status, :url
|
9
10
|
|
10
|
-
BASE_URL = 'https://api.proxycrawl.com'
|
11
|
-
|
12
11
|
INVALID_TOKEN = 'Token is required'
|
13
12
|
INVALID_URL = 'URL is required'
|
14
13
|
|
@@ -58,15 +57,19 @@ module ProxyCrawl
|
|
58
57
|
|
59
58
|
private
|
60
59
|
|
60
|
+
def base_url
|
61
|
+
'https://api.proxycrawl.com'
|
62
|
+
end
|
63
|
+
|
61
64
|
def prepare_uri(url, options)
|
62
|
-
uri = URI(
|
65
|
+
uri = URI(base_url)
|
63
66
|
uri.query = URI.encode_www_form({ token: @token, url: url }.merge(options))
|
64
67
|
|
65
68
|
uri
|
66
69
|
end
|
67
70
|
|
68
71
|
def prepare_response(response, format)
|
69
|
-
if format == 'json'
|
72
|
+
if format == 'json' || base_url.include?('/scraper')
|
70
73
|
@status_code = response.code.to_i
|
71
74
|
@body = response.body
|
72
75
|
else
|
@@ -78,4 +81,4 @@ module ProxyCrawl
|
|
78
81
|
end
|
79
82
|
end
|
80
83
|
end
|
81
|
-
end
|
84
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'net/http'
|
4
|
+
require 'json'
|
5
|
+
require 'uri'
|
6
|
+
|
7
|
+
module ProxyCrawl
|
8
|
+
class LeadsAPI
|
9
|
+
attr_reader :token, :body, :status_code
|
10
|
+
|
11
|
+
INVALID_TOKEN = 'Token is required'
|
12
|
+
INVALID_DOMAIN = 'Domain is required'
|
13
|
+
|
14
|
+
def initialize(options = {})
|
15
|
+
raise INVALID_TOKEN if options[:token].nil?
|
16
|
+
|
17
|
+
@token = options[:token]
|
18
|
+
end
|
19
|
+
|
20
|
+
def get(domain)
|
21
|
+
raise INVALID_DOMAIN if domain.empty?
|
22
|
+
|
23
|
+
uri = URI('https://api.proxycrawl.com/leads')
|
24
|
+
uri.query = URI.encode_www_form({ token: token, domain: domain })
|
25
|
+
|
26
|
+
response = Net::HTTP.get_response(uri)
|
27
|
+
|
28
|
+
@status_code = response.code.to_i
|
29
|
+
@body = response.body
|
30
|
+
|
31
|
+
self
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyCrawl
|
4
|
+
class ScraperAPI < ProxyCrawl::API
|
5
|
+
|
6
|
+
def post
|
7
|
+
raise 'Only GET is allowed for the ScraperAPI'
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def base_url
|
13
|
+
'https://api.proxycrawl.com/scraper'
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/proxycrawl/version.rb
CHANGED
data/proxycrawl.gemspec
CHANGED
@@ -26,6 +26,6 @@ Gem::Specification.new do |spec|
|
|
26
26
|
|
27
27
|
spec.add_development_dependency "rspec", "~> 3.2"
|
28
28
|
spec.add_development_dependency "webmock", "~> 3.4"
|
29
|
-
spec.add_development_dependency "bundler", "~>
|
30
|
-
spec.add_development_dependency "rake", "~>
|
29
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
30
|
+
spec.add_development_dependency "rake", "~> 12.3.3"
|
31
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxycrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- proxycrawl
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -44,28 +44,28 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '2.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '2.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 12.3.3
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 12.3.3
|
69
69
|
description: Ruby based client for the ProxyCrawl API that helps developers crawl
|
70
70
|
or scrape thousands of web pages anonymously
|
71
71
|
email:
|
@@ -84,6 +84,8 @@ files:
|
|
84
84
|
- bin/setup
|
85
85
|
- lib/proxycrawl.rb
|
86
86
|
- lib/proxycrawl/api.rb
|
87
|
+
- lib/proxycrawl/leads_api.rb
|
88
|
+
- lib/proxycrawl/scraper_api.rb
|
87
89
|
- lib/proxycrawl/version.rb
|
88
90
|
- proxycrawl.gemspec
|
89
91
|
homepage: https://github.com/proxycrawl/proxycrawl-ruby
|
@@ -105,8 +107,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
105
107
|
- !ruby/object:Gem::Version
|
106
108
|
version: '0'
|
107
109
|
requirements: []
|
108
|
-
|
109
|
-
rubygems_version: 2.5.2
|
110
|
+
rubygems_version: 3.1.4
|
110
111
|
signing_key:
|
111
112
|
specification_version: 4
|
112
113
|
summary: ProxyCrawl API client for web scraping and crawling
|