RubyGems - proxycrawl - Versions diffs - 0.2.0 → 0.2.1 - Mend

proxycrawl 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +5 -5
data/LICENSE.txt +1 -1
data/README.md +56 -4
data/lib/proxycrawl.rb +5 -1
data/lib/proxycrawl/api.rb +8 -5
data/lib/proxycrawl/leads_api.rb +34 -0
data/lib/proxycrawl/scraper_api.rb +16 -0
data/lib/proxycrawl/version.rb +3 -1
data/proxycrawl.gemspec +2 -2
metadata +9 -8

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: ab95c07f3180f82c6b77af90fa5c947fc2af592e
-  data.tar.gz: daa80f3339e9cefe929a9600c0865319cf790bbd
+SHA256:
+  metadata.gz: db0f27951f09d662cc5ff949b088c79e4cdf2620aeea573fb25471568d73c811
+  data.tar.gz: 6dd316888c926279d847e1f2a58c813e2e08ddb72e970ca02cb8d5baedcb145c
 SHA512:
-  metadata.gz: 5f9ce2ddfdfb1e2865f64c3a7f505fb6dadaaacb9a5a3c5ef337af1db83ded51dcb4d8e6bb67269876365f2f190f1231dba3477ae683f784aabd9770735e6133
-  data.tar.gz: b6eedbe01aa37e9a7bb3bd481a761460b98c1ba6be5f48e51fbcf5326e279bba7badc6eecc1337d6fba929d0326e12be29ba47b9a31cbe0894ba092f7a166ae2
+  metadata.gz: 96acc3f7de05710c91492e507781648f0b9b32214338a8f727a16f47c2c1d832d1ff9e6f0c8e7733873b99795a18441a25fad0c2da044f6c478586369ab31704
+  data.tar.gz: 970aa1619a944fa799584286caded25e7c573199738d21944ec1b47d1251c2b1a828e328c711b05043ece5dc61ae97979ec6be0ee4b568a0e89a375dbda8daec

data/LICENSE.txt CHANGED

@@ -1,6 +1,6 @@
 The MIT License (MIT)
-Copyright (c) 2018 ProxyCrawl
+Copyright (c) 2020 ProxyCrawl
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

data/README.md CHANGED

@@ -1,4 +1,4 @@
-# Proxycrawl
+# ProxyCrawl
 Dependency free gem for scraping and crawling websites using the ProxyCrawl API.
@@ -18,14 +18,20 @@ Or install it yourself as:
     $ gem install proxycrawl
-## Usage
+## Crawling API Usage
+Require the gem in your project
+```ruby
+require 'proxycrawl'
+```
 Initialize the API with one of your account tokens, either normal or javascript token. Then make get or post requests accordingly.
-You can get a token for free by creating a ProxyCrawl account and 1000 free testing requests. You can use them for tcp calls or javascript calls or both.
+You can get a token for free by [creating a ProxyCrawl account](https://proxycrawl.com/signup) and 1000 free testing requests. You can use them for tcp calls or javascript calls or both.
 ```ruby
-  api = ProxyCrawl::API.new(token: 'YOUR_TOKEN')
+api = ProxyCrawl::API.new(token: 'YOUR_TOKEN')
 ```
 ### GET requests
@@ -124,6 +130,48 @@ puts response.original_status
 puts response.pc_status
 ```
+## Scraper API usage
+Initialize the Scraper API using your normal token and call the `get` method.
+```ruby
+scraper_api = ProxyCrawl::ScraperAPI.new(token: 'YOUR_TOKEN')
+```
+Pass the url that you want to scrape plus any options from the ones available in the [Scraper API documentation](https://proxycrawl.com/docs/scraper-api/parameters).
+```ruby
+api.get(url, options)
+```
+Example:
+```ruby
+begin
+  response = scraper_api.get('https://www.amazon.com/Halo-SleepSack-Swaddle-Triangle-Neutral/dp/B01LAG1TOS')
+  puts response.status_code
+  puts response.body
+rescue => exception
+  puts exception.backtrace
+end
+```
+## Leads API usage
+Initialize with your Leads API token and call the `get` method.
+```ruby
+leads_api = ProxyCrawl::LeadsAPI.new(token: 'YOUR_TOKEN')
+begin
+  response = leads_api.get('stripe.com')
+  puts response.status_code
+  puts response.body
+rescue => exception
+  puts exception.backtrace
+end
+```
 If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
 ## Development
@@ -143,3 +191,7 @@ The gem is available as open source under the terms of the [MIT License](http://
 ## Code of Conduct
 Everyone interacting in the Proxycrawl project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/proxycrawl/proxycrawl-ruby/blob/master/CODE_OF_CONDUCT.md).
+---
+Copyright 2020 ProxyCrawl

data/lib/proxycrawl.rb CHANGED

@@ -1,5 +1,9 @@
-require "proxycrawl/version"
+# frozen_string_literal: true
+require 'proxycrawl/version'
 require 'proxycrawl/api'
+require 'proxycrawl/scraper_api'
+require 'proxycrawl/leads_api'
 module ProxyCrawl
 end

data/lib/proxycrawl/api.rb CHANGED

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
 require 'net/http'
 require 'json'
 require 'uri'
@@ -7,8 +8,6 @@ module ProxyCrawl
   class API
     attr_reader :token, :body, :status_code, :original_status, :pc_status, :url
-    BASE_URL = 'https://api.proxycrawl.com'
     INVALID_TOKEN = 'Token is required'
     INVALID_URL = 'URL is required'
@@ -58,15 +57,19 @@ module ProxyCrawl
     private
+    def base_url
+      'https://api.proxycrawl.com'
+    end
     def prepare_uri(url, options)
-      uri = URI(BASE_URL)
+      uri = URI(base_url)
       uri.query = URI.encode_www_form({ token: @token, url: url }.merge(options))
       uri
     end
     def prepare_response(response, format)
-      if format == 'json'
+      if format == 'json' || base_url.include?('/scraper')
         @status_code = response.code.to_i
         @body = response.body
       else
@@ -78,4 +81,4 @@ module ProxyCrawl
       end
     end
   end
-end
+end

data/lib/proxycrawl/leads_api.rb ADDED

@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+require 'net/http'
+require 'json'
+require 'uri'
+module ProxyCrawl
+  class LeadsAPI
+    attr_reader :token, :body, :status_code
+    INVALID_TOKEN = 'Token is required'
+    INVALID_DOMAIN = 'Domain is required'
+    def initialize(options = {})
+      raise INVALID_TOKEN if options[:token].nil?
+      @token = options[:token]
+    end
+    def get(domain)
+      raise INVALID_DOMAIN if domain.empty?
+      uri = URI('https://api.proxycrawl.com/leads')
+      uri.query = URI.encode_www_form({ token: token, domain: domain })
+      response = Net::HTTP.get_response(uri)
+      @status_code = response.code.to_i
+      @body = response.body
+      self
+    end
+  end
+end

data/lib/proxycrawl/scraper_api.rb ADDED

@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+module ProxyCrawl
+  class ScraperAPI < ProxyCrawl::API
+    def post
+      raise 'Only GET is allowed for the ScraperAPI'
+    end
+    private
+    def base_url
+      'https://api.proxycrawl.com/scraper'
+    end
+  end
+end

data/lib/proxycrawl/version.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module ProxyCrawl
-  VERSION = "0.2.0"
+  VERSION = '0.2.1'
 end

data/proxycrawl.gemspec CHANGED

@@ -26,6 +26,6 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "rspec", "~> 3.2"
   spec.add_development_dependency "webmock", "~> 3.4"
-  spec.add_development_dependency "bundler", "~> 1.15"
-  spec.add_development_dependency "rake", "~> 10.0"
+  spec.add_development_dependency "bundler", "~> 2.0"
+  spec.add_development_dependency "rake", "~> 12.3.3"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: proxycrawl
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.2.1
 platform: ruby
 authors:
 - proxycrawl
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-06-07 00:00:00.000000000 Z
+date: 2020-10-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -44,28 +44,28 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.15'
+        version: '2.0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.15'
+        version: '2.0'
 - !ruby/object:Gem::Dependency
   name: rake
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: 12.3.3
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: 12.3.3
 description: Ruby based client for the ProxyCrawl API that helps developers crawl
   or scrape thousands of web pages anonymously
 email:
@@ -84,6 +84,8 @@ files:
 - bin/setup
 - lib/proxycrawl.rb
 - lib/proxycrawl/api.rb
+- lib/proxycrawl/leads_api.rb
+- lib/proxycrawl/scraper_api.rb
 - lib/proxycrawl/version.rb
 - proxycrawl.gemspec
 homepage: https://github.com/proxycrawl/proxycrawl-ruby
@@ -105,8 +107,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.5.2
+rubygems_version: 3.1.4
 signing_key:
 specification_version: 4
 summary: ProxyCrawl API client for web scraping and crawling