proxycrawl 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: ab95c07f3180f82c6b77af90fa5c947fc2af592e
4
- data.tar.gz: daa80f3339e9cefe929a9600c0865319cf790bbd
2
+ SHA256:
3
+ metadata.gz: db0f27951f09d662cc5ff949b088c79e4cdf2620aeea573fb25471568d73c811
4
+ data.tar.gz: 6dd316888c926279d847e1f2a58c813e2e08ddb72e970ca02cb8d5baedcb145c
5
5
  SHA512:
6
- metadata.gz: 5f9ce2ddfdfb1e2865f64c3a7f505fb6dadaaacb9a5a3c5ef337af1db83ded51dcb4d8e6bb67269876365f2f190f1231dba3477ae683f784aabd9770735e6133
7
- data.tar.gz: b6eedbe01aa37e9a7bb3bd481a761460b98c1ba6be5f48e51fbcf5326e279bba7badc6eecc1337d6fba929d0326e12be29ba47b9a31cbe0894ba092f7a166ae2
6
+ metadata.gz: 96acc3f7de05710c91492e507781648f0b9b32214338a8f727a16f47c2c1d832d1ff9e6f0c8e7733873b99795a18441a25fad0c2da044f6c478586369ab31704
7
+ data.tar.gz: 970aa1619a944fa799584286caded25e7c573199738d21944ec1b47d1251c2b1a828e328c711b05043ece5dc61ae97979ec6be0ee4b568a0e89a375dbda8daec
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2018 ProxyCrawl
3
+ Copyright (c) 2020 ProxyCrawl
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Proxycrawl
1
+ # ProxyCrawl
2
2
 
3
3
  Dependency free gem for scraping and crawling websites using the ProxyCrawl API.
4
4
 
@@ -18,14 +18,20 @@ Or install it yourself as:
18
18
 
19
19
  $ gem install proxycrawl
20
20
 
21
- ## Usage
21
+ ## Crawling API Usage
22
+
23
+ Require the gem in your project
24
+
25
+ ```ruby
26
+ require 'proxycrawl'
27
+ ```
22
28
 
23
29
  Initialize the API with one of your account tokens, either normal or javascript token. Then make get or post requests accordingly.
24
30
 
25
- You can get a token for free by creating a ProxyCrawl account and 1000 free testing requests. You can use them for tcp calls or javascript calls or both.
31
+ You can get a token for free by [creating a ProxyCrawl account](https://proxycrawl.com/signup) and 1000 free testing requests. You can use them for tcp calls or javascript calls or both.
26
32
 
27
33
  ```ruby
28
- api = ProxyCrawl::API.new(token: 'YOUR_TOKEN')
34
+ api = ProxyCrawl::API.new(token: 'YOUR_TOKEN')
29
35
  ```
30
36
 
31
37
  ### GET requests
@@ -124,6 +130,48 @@ puts response.original_status
124
130
  puts response.pc_status
125
131
  ```
126
132
 
133
+ ## Scraper API usage
134
+
135
+ Initialize the Scraper API using your normal token and call the `get` method.
136
+
137
+ ```ruby
138
+ scraper_api = ProxyCrawl::ScraperAPI.new(token: 'YOUR_TOKEN')
139
+ ```
140
+
141
+ Pass the url that you want to scrape plus any options from the ones available in the [Scraper API documentation](https://proxycrawl.com/docs/scraper-api/parameters).
142
+
143
+ ```ruby
144
+ api.get(url, options)
145
+ ```
146
+
147
+ Example:
148
+
149
+ ```ruby
150
+ begin
151
+ response = scraper_api.get('https://www.amazon.com/Halo-SleepSack-Swaddle-Triangle-Neutral/dp/B01LAG1TOS')
152
+ puts response.status_code
153
+ puts response.body
154
+ rescue => exception
155
+ puts exception.backtrace
156
+ end
157
+ ```
158
+
159
+ ## Leads API usage
160
+
161
+ Initialize with your Leads API token and call the `get` method.
162
+
163
+ ```ruby
164
+ leads_api = ProxyCrawl::LeadsAPI.new(token: 'YOUR_TOKEN')
165
+
166
+ begin
167
+ response = leads_api.get('stripe.com')
168
+ puts response.status_code
169
+ puts response.body
170
+ rescue => exception
171
+ puts exception.backtrace
172
+ end
173
+ ```
174
+
127
175
  If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
128
176
 
129
177
  ## Development
@@ -143,3 +191,7 @@ The gem is available as open source under the terms of the [MIT License](http://
143
191
  ## Code of Conduct
144
192
 
145
193
  Everyone interacting in the Proxycrawl project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/proxycrawl/proxycrawl-ruby/blob/master/CODE_OF_CONDUCT.md).
194
+
195
+ ---
196
+
197
+ Copyright 2020 ProxyCrawl
@@ -1,5 +1,9 @@
1
- require "proxycrawl/version"
1
+ # frozen_string_literal: true
2
+
3
+ require 'proxycrawl/version'
2
4
  require 'proxycrawl/api'
5
+ require 'proxycrawl/scraper_api'
6
+ require 'proxycrawl/leads_api'
3
7
 
4
8
  module ProxyCrawl
5
9
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require 'net/http'
3
4
  require 'json'
4
5
  require 'uri'
@@ -7,8 +8,6 @@ module ProxyCrawl
7
8
  class API
8
9
  attr_reader :token, :body, :status_code, :original_status, :pc_status, :url
9
10
 
10
- BASE_URL = 'https://api.proxycrawl.com'
11
-
12
11
  INVALID_TOKEN = 'Token is required'
13
12
  INVALID_URL = 'URL is required'
14
13
 
@@ -58,15 +57,19 @@ module ProxyCrawl
58
57
 
59
58
  private
60
59
 
60
+ def base_url
61
+ 'https://api.proxycrawl.com'
62
+ end
63
+
61
64
  def prepare_uri(url, options)
62
- uri = URI(BASE_URL)
65
+ uri = URI(base_url)
63
66
  uri.query = URI.encode_www_form({ token: @token, url: url }.merge(options))
64
67
 
65
68
  uri
66
69
  end
67
70
 
68
71
  def prepare_response(response, format)
69
- if format == 'json'
72
+ if format == 'json' || base_url.include?('/scraper')
70
73
  @status_code = response.code.to_i
71
74
  @body = response.body
72
75
  else
@@ -78,4 +81,4 @@ module ProxyCrawl
78
81
  end
79
82
  end
80
83
  end
81
- end
84
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/http'
4
+ require 'json'
5
+ require 'uri'
6
+
7
+ module ProxyCrawl
8
+ class LeadsAPI
9
+ attr_reader :token, :body, :status_code
10
+
11
+ INVALID_TOKEN = 'Token is required'
12
+ INVALID_DOMAIN = 'Domain is required'
13
+
14
+ def initialize(options = {})
15
+ raise INVALID_TOKEN if options[:token].nil?
16
+
17
+ @token = options[:token]
18
+ end
19
+
20
+ def get(domain)
21
+ raise INVALID_DOMAIN if domain.empty?
22
+
23
+ uri = URI('https://api.proxycrawl.com/leads')
24
+ uri.query = URI.encode_www_form({ token: token, domain: domain })
25
+
26
+ response = Net::HTTP.get_response(uri)
27
+
28
+ @status_code = response.code.to_i
29
+ @body = response.body
30
+
31
+ self
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyCrawl
4
+ class ScraperAPI < ProxyCrawl::API
5
+
6
+ def post
7
+ raise 'Only GET is allowed for the ScraperAPI'
8
+ end
9
+
10
+ private
11
+
12
+ def base_url
13
+ 'https://api.proxycrawl.com/scraper'
14
+ end
15
+ end
16
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyCrawl
2
- VERSION = "0.2.0"
4
+ VERSION = '0.2.1'
3
5
  end
@@ -26,6 +26,6 @@ Gem::Specification.new do |spec|
26
26
 
27
27
  spec.add_development_dependency "rspec", "~> 3.2"
28
28
  spec.add_development_dependency "webmock", "~> 3.4"
29
- spec.add_development_dependency "bundler", "~> 1.15"
30
- spec.add_development_dependency "rake", "~> 10.0"
29
+ spec.add_development_dependency "bundler", "~> 2.0"
30
+ spec.add_development_dependency "rake", "~> 12.3.3"
31
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxycrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - proxycrawl
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-06-07 00:00:00.000000000 Z
11
+ date: 2020-10-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -44,28 +44,28 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.15'
47
+ version: '2.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.15'
54
+ version: '2.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rake
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '10.0'
61
+ version: 12.3.3
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '10.0'
68
+ version: 12.3.3
69
69
  description: Ruby based client for the ProxyCrawl API that helps developers crawl
70
70
  or scrape thousands of web pages anonymously
71
71
  email:
@@ -84,6 +84,8 @@ files:
84
84
  - bin/setup
85
85
  - lib/proxycrawl.rb
86
86
  - lib/proxycrawl/api.rb
87
+ - lib/proxycrawl/leads_api.rb
88
+ - lib/proxycrawl/scraper_api.rb
87
89
  - lib/proxycrawl/version.rb
88
90
  - proxycrawl.gemspec
89
91
  homepage: https://github.com/proxycrawl/proxycrawl-ruby
@@ -105,8 +107,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
105
107
  - !ruby/object:Gem::Version
106
108
  version: '0'
107
109
  requirements: []
108
- rubyforge_project:
109
- rubygems_version: 2.5.2
110
+ rubygems_version: 3.1.4
110
111
  signing_key:
111
112
  specification_version: 4
112
113
  summary: ProxyCrawl API client for web scraping and crawling