proxycrawl 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db0f27951f09d662cc5ff949b088c79e4cdf2620aeea573fb25471568d73c811
4
- data.tar.gz: 6dd316888c926279d847e1f2a58c813e2e08ddb72e970ca02cb8d5baedcb145c
3
+ metadata.gz: 8217ab7a72ae67d28e565375f1903aa2c485c250ebf17bd4afa24e03f1d124b1
4
+ data.tar.gz: 8626caed930b16ef6287d9075b45d430f6e01de11565abc5d15352908e0b187c
5
5
  SHA512:
6
- metadata.gz: 96acc3f7de05710c91492e507781648f0b9b32214338a8f727a16f47c2c1d832d1ff9e6f0c8e7733873b99795a18441a25fad0c2da044f6c478586369ab31704
7
- data.tar.gz: 970aa1619a944fa799584286caded25e7c573199738d21944ec1b47d1251c2b1a828e328c711b05043ece5dc61ae97979ec6be0ee4b568a0e89a375dbda8daec
6
+ metadata.gz: c60c5baea5a6f7638a5d0ee773f1225a05b40d31db781d5e44db6b3673c0c02d4d32a868663ddaf8476b756dae68ed6b49d4e682b50bd74dc874e7759a604fc2
7
+ data.tar.gz: 336172ab96bd5b80f3a44ea1fe8efd7717eaadd3ad6b06f2a534ca9df2177b6e42fc8e02ee7595705879bfce95b8788d22789ed8a9ae82d1805087a33f636623
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2020 ProxyCrawl
3
+ Copyright (c) 2021 ProxyCrawl
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -174,6 +174,57 @@ end
174
174
 
175
175
  If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
176
176
 
177
+
178
+ ## Screenshots API usage
179
+
180
+ Initialize with your Screenshots API token and call the `get` method.
181
+
182
+ ```ruby
183
+ screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
184
+
185
+ begin
186
+ response = screenshots_api.get('https://www.apple.com')
187
+ puts response.status_code
188
+ puts response.screenshot_path # do something with screenshot_path here
189
+ rescue => exception
190
+ puts exception.backtrace
191
+ end
192
+ ```
193
+
194
+ or with using a block
195
+
196
+ ```ruby
197
+ screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
198
+
199
+ begin
200
+ response = screenshots_api.get('https://www.apple.com') do |file|
201
+ # do something (reading/writing) with the image file here
202
+ end
203
+ puts response.status_code
204
+ rescue => exception
205
+ puts exception.backtrace
206
+ end
207
+ ```
208
+
209
+ or specifying a file path
210
+
211
+ ```ruby
212
+ screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
213
+
214
+ begin
215
+ response = screenshots_api.get('https://www.apple.com', save_to_path: '~/screenshot.jpg') do |file|
216
+ # do something (reading/writing) with the image file here
217
+ end
218
+ puts response.status_code
219
+ rescue => exception
220
+ puts exception.backtrace
221
+ end
222
+ ```
223
+
224
+ Note that `screenshots_api.get(url, options)` method accepts an [options](https://proxycrawl.com/docs/screenshots-api/parameters)
225
+
226
+ If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
227
+
177
228
  ## Development
178
229
 
179
230
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -194,4 +245,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
194
245
 
195
246
  ---
196
247
 
197
- Copyright 2020 ProxyCrawl
248
+ Copyright 2021 ProxyCrawl
data/lib/proxycrawl.rb CHANGED
@@ -4,6 +4,7 @@ require 'proxycrawl/version'
4
4
  require 'proxycrawl/api'
5
5
  require 'proxycrawl/scraper_api'
6
6
  require 'proxycrawl/leads_api'
7
+ require 'proxycrawl/screenshots_api'
7
8
 
8
9
  module ProxyCrawl
9
10
  end
@@ -70,15 +70,19 @@ module ProxyCrawl
70
70
 
71
71
  def prepare_response(response, format)
72
72
  if format == 'json' || base_url.include?('/scraper')
73
+ json_body = JSON.parse(response.body)
74
+ @original_status = json_body['original_status'].to_i
75
+ @pc_status = json_body['pc_status'].to_i
76
+ @url = json_body['url']
73
77
  @status_code = response.code.to_i
74
- @body = response.body
75
78
  else
76
79
  @original_status = response['original_status'].to_i
77
80
  @status_code = response.code.to_i
78
81
  @pc_status = response['pc_status'].to_i
79
82
  @url = response['url']
80
- @body = response.body
81
83
  end
84
+
85
+ @body = response.body
82
86
  end
83
87
  end
84
88
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  module ProxyCrawl
4
4
  class ScraperAPI < ProxyCrawl::API
5
+ attr_reader :remaining_requests
5
6
 
6
7
  def post
7
8
  raise 'Only GET is allowed for the ScraperAPI'
@@ -9,6 +10,12 @@ module ProxyCrawl
9
10
 
10
11
  private
11
12
 
13
+ def prepare_response(response, format)
14
+ super(response, format)
15
+ json_body = JSON.parse(response.body)
16
+ @remaining_requests = json_body['remaining_requests'].to_i
17
+ end
18
+
12
19
  def base_url
13
20
  'https://api.proxycrawl.com/scraper'
14
21
  end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+ require 'tmpdir'
5
+
6
+ module ProxyCrawl
7
+ class ScreenshotsAPI < ProxyCrawl::API
8
+ attr_reader :screenshot_path, :success, :remaining_requests, :screenshot_url
9
+
10
+ INVALID_SAVE_TO_PATH_FILENAME = 'Filename must end with .jpg or .jpeg'
11
+ SAVE_TO_PATH_FILENAME_PATTERN = /.+\.(jpg|JPG|jpeg|JPEG)$/.freeze
12
+
13
+ def post
14
+ raise 'Only GET is allowed for the ScreenshotsAPI'
15
+ end
16
+
17
+ def get(url, options = {})
18
+ screenshot_path = options.delete(:save_to_path) || generate_file_path
19
+ raise INVALID_SAVE_TO_PATH_FILENAME unless SAVE_TO_PATH_FILENAME_PATTERN =~ screenshot_path
20
+
21
+ response = super(url, options)
22
+ file = File.open(screenshot_path, 'w+')
23
+ file.write(response.body&.force_encoding('UTF-8'))
24
+ @screenshot_path = screenshot_path
25
+ yield(file) if block_given?
26
+ response
27
+ ensure
28
+ file&.close
29
+ end
30
+
31
+ private
32
+
33
+ def prepare_response(response, format)
34
+ super(response, format)
35
+ @remaining_requests = response['remaining_requests'].to_i
36
+ @success = response['success'] == 'true'
37
+ @screenshot_url = response['screenshot_url']
38
+ end
39
+
40
+ def base_url
41
+ 'https://api.proxycrawl.com/screenshots'
42
+ end
43
+
44
+ def generate_file_name
45
+ "#{SecureRandom.urlsafe_base64}.jpg"
46
+ end
47
+
48
+ def generate_file_path
49
+ File.join(Dir.tmpdir, generate_file_name)
50
+ end
51
+ end
52
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ProxyCrawl
4
- VERSION = '0.2.1'
4
+ VERSION = '0.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxycrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - proxycrawl
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-28 00:00:00.000000000 Z
11
+ date: 2021-07-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -86,6 +86,7 @@ files:
86
86
  - lib/proxycrawl/api.rb
87
87
  - lib/proxycrawl/leads_api.rb
88
88
  - lib/proxycrawl/scraper_api.rb
89
+ - lib/proxycrawl/screenshots_api.rb
89
90
  - lib/proxycrawl/version.rb
90
91
  - proxycrawl.gemspec
91
92
  homepage: https://github.com/proxycrawl/proxycrawl-ruby