RubyGems - proxycrawl - Versions diffs - 0.2.1 → 0.3.0 - Mend

proxycrawl 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/LICENSE.txt +1 -1
data/README.md +52 -1
data/lib/proxycrawl.rb +1 -0
data/lib/proxycrawl/api.rb +6 -2
data/lib/proxycrawl/scraper_api.rb +7 -0
data/lib/proxycrawl/screenshots_api.rb +52 -0
data/lib/proxycrawl/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: db0f27951f09d662cc5ff949b088c79e4cdf2620aeea573fb25471568d73c811
-  data.tar.gz: 6dd316888c926279d847e1f2a58c813e2e08ddb72e970ca02cb8d5baedcb145c
+  metadata.gz: 8217ab7a72ae67d28e565375f1903aa2c485c250ebf17bd4afa24e03f1d124b1
+  data.tar.gz: 8626caed930b16ef6287d9075b45d430f6e01de11565abc5d15352908e0b187c
 SHA512:
-  metadata.gz: 96acc3f7de05710c91492e507781648f0b9b32214338a8f727a16f47c2c1d832d1ff9e6f0c8e7733873b99795a18441a25fad0c2da044f6c478586369ab31704
-  data.tar.gz: 970aa1619a944fa799584286caded25e7c573199738d21944ec1b47d1251c2b1a828e328c711b05043ece5dc61ae97979ec6be0ee4b568a0e89a375dbda8daec
+  metadata.gz: c60c5baea5a6f7638a5d0ee773f1225a05b40d31db781d5e44db6b3673c0c02d4d32a868663ddaf8476b756dae68ed6b49d4e682b50bd74dc874e7759a604fc2
+  data.tar.gz: 336172ab96bd5b80f3a44ea1fe8efd7717eaadd3ad6b06f2a534ca9df2177b6e42fc8e02ee7595705879bfce95b8788d22789ed8a9ae82d1805087a33f636623

data/LICENSE.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 The MIT License (MIT)
-Copyright (c) 2020 ProxyCrawl
+Copyright (c) 2021 ProxyCrawl
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

data/README.md CHANGED Viewed

@@ -174,6 +174,57 @@ end
 If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
+## Screenshots API usage
+Initialize with your Screenshots API token and call the `get` method.
+```ruby
+screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
+begin
+  response = screenshots_api.get('https://www.apple.com')
+  puts response.status_code
+  puts response.screenshot_path # do something with screenshot_path here
+rescue => exception
+  puts exception.backtrace
+end
+```
+or with using a block
+```ruby
+screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
+begin
+  response = screenshots_api.get('https://www.apple.com') do |file|
+    # do something (reading/writing) with the image file here
+  end
+  puts response.status_code
+rescue => exception
+  puts exception.backtrace
+end
+```
+or specifying a file path
+```ruby
+screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
+begin
+  response = screenshots_api.get('https://www.apple.com', save_to_path: '~/screenshot.jpg') do |file|
+    # do something (reading/writing) with the image file here
+  end
+  puts response.status_code
+rescue => exception
+  puts exception.backtrace
+end
+```
+Note that `screenshots_api.get(url, options)` method accepts an [options](https://proxycrawl.com/docs/screenshots-api/parameters)
+If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -194,4 +245,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
 ---
-Copyright 2020 ProxyCrawl
+Copyright 2021 ProxyCrawl

data/lib/proxycrawl.rb CHANGED Viewed

@@ -4,6 +4,7 @@ require 'proxycrawl/version'
 require 'proxycrawl/api'
 require 'proxycrawl/scraper_api'
 require 'proxycrawl/leads_api'
+require 'proxycrawl/screenshots_api'
 module ProxyCrawl
 end

data/lib/proxycrawl/api.rb CHANGED Viewed

@@ -70,15 +70,19 @@ module ProxyCrawl
     def prepare_response(response, format)
       if format == 'json' || base_url.include?('/scraper')
+        json_body = JSON.parse(response.body)
+        @original_status = json_body['original_status'].to_i
+        @pc_status = json_body['pc_status'].to_i
+        @url = json_body['url']
         @status_code = response.code.to_i
-        @body = response.body
       else
         @original_status = response['original_status'].to_i
         @status_code = response.code.to_i
         @pc_status = response['pc_status'].to_i
         @url = response['url']
-        @body = response.body
       end
+      @body = response.body
     end
   end
 end

data/lib/proxycrawl/scraper_api.rb CHANGED Viewed

@@ -2,6 +2,7 @@
 module ProxyCrawl
   class ScraperAPI < ProxyCrawl::API
+    attr_reader :remaining_requests
     def post
       raise 'Only GET is allowed for the ScraperAPI'
@@ -9,6 +10,12 @@ module ProxyCrawl
     private
+    def prepare_response(response, format)
+      super(response, format)
+      json_body = JSON.parse(response.body)
+      @remaining_requests = json_body['remaining_requests'].to_i
+    end
     def base_url
       'https://api.proxycrawl.com/scraper'
     end

data/lib/proxycrawl/screenshots_api.rb ADDED Viewed

@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+require 'securerandom'
+require 'tmpdir'
+module ProxyCrawl
+  class ScreenshotsAPI < ProxyCrawl::API
+    attr_reader :screenshot_path, :success, :remaining_requests, :screenshot_url
+    INVALID_SAVE_TO_PATH_FILENAME = 'Filename must end with .jpg or .jpeg'
+    SAVE_TO_PATH_FILENAME_PATTERN = /.+\.(jpg|JPG|jpeg|JPEG)$/.freeze
+    def post
+      raise 'Only GET is allowed for the ScreenshotsAPI'
+    end
+    def get(url, options = {})
+      screenshot_path = options.delete(:save_to_path) || generate_file_path
+      raise INVALID_SAVE_TO_PATH_FILENAME unless SAVE_TO_PATH_FILENAME_PATTERN =~ screenshot_path
+      response = super(url, options)
+      file = File.open(screenshot_path, 'w+')
+      file.write(response.body&.force_encoding('UTF-8'))
+      @screenshot_path = screenshot_path
+      yield(file) if block_given?
+      response
+    ensure
+      file&.close
+    end
+    private
+    def prepare_response(response, format)
+      super(response, format)
+      @remaining_requests = response['remaining_requests'].to_i
+      @success = response['success'] == 'true'
+      @screenshot_url = response['screenshot_url']
+    end
+    def base_url
+      'https://api.proxycrawl.com/screenshots'
+    end
+    def generate_file_name
+      "#{SecureRandom.urlsafe_base64}.jpg"
+    end
+    def generate_file_path
+      File.join(Dir.tmpdir, generate_file_name)
+    end
+  end
+end

data/lib/proxycrawl/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module ProxyCrawl
-  VERSION = '0.2.1'
+  VERSION = '0.3.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: proxycrawl
 version: !ruby/object:Gem::Version
-  version: 0.2.1
+  version: 0.3.0
 platform: ruby
 authors:
 - proxycrawl
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-10-28 00:00:00.000000000 Z
+date: 2021-07-07 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -86,6 +86,7 @@ files:
 - lib/proxycrawl/api.rb
 - lib/proxycrawl/leads_api.rb
 - lib/proxycrawl/scraper_api.rb
+- lib/proxycrawl/screenshots_api.rb
 - lib/proxycrawl/version.rb
 - proxycrawl.gemspec
 homepage: https://github.com/proxycrawl/proxycrawl-ruby