proxycrawl 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db0f27951f09d662cc5ff949b088c79e4cdf2620aeea573fb25471568d73c811
4
- data.tar.gz: 6dd316888c926279d847e1f2a58c813e2e08ddb72e970ca02cb8d5baedcb145c
3
+ metadata.gz: 8217ab7a72ae67d28e565375f1903aa2c485c250ebf17bd4afa24e03f1d124b1
4
+ data.tar.gz: 8626caed930b16ef6287d9075b45d430f6e01de11565abc5d15352908e0b187c
5
5
  SHA512:
6
- metadata.gz: 96acc3f7de05710c91492e507781648f0b9b32214338a8f727a16f47c2c1d832d1ff9e6f0c8e7733873b99795a18441a25fad0c2da044f6c478586369ab31704
7
- data.tar.gz: 970aa1619a944fa799584286caded25e7c573199738d21944ec1b47d1251c2b1a828e328c711b05043ece5dc61ae97979ec6be0ee4b568a0e89a375dbda8daec
6
+ metadata.gz: c60c5baea5a6f7638a5d0ee773f1225a05b40d31db781d5e44db6b3673c0c02d4d32a868663ddaf8476b756dae68ed6b49d4e682b50bd74dc874e7759a604fc2
7
+ data.tar.gz: 336172ab96bd5b80f3a44ea1fe8efd7717eaadd3ad6b06f2a534ca9df2177b6e42fc8e02ee7595705879bfce95b8788d22789ed8a9ae82d1805087a33f636623
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2020 ProxyCrawl
3
+ Copyright (c) 2021 ProxyCrawl
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -174,6 +174,57 @@ end
174
174
 
175
175
  If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
176
176
 
177
+
178
+ ## Screenshots API usage
179
+
180
+ Initialize with your Screenshots API token and call the `get` method.
181
+
182
+ ```ruby
183
+ screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
184
+
185
+ begin
186
+ response = screenshots_api.get('https://www.apple.com')
187
+ puts response.status_code
188
+ puts response.screenshot_path # do something with screenshot_path here
189
+ rescue => exception
190
+ puts exception.backtrace
191
+ end
192
+ ```
193
+
194
+ or with using a block
195
+
196
+ ```ruby
197
+ screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
198
+
199
+ begin
200
+ response = screenshots_api.get('https://www.apple.com') do |file|
201
+ # do something (reading/writing) with the image file here
202
+ end
203
+ puts response.status_code
204
+ rescue => exception
205
+ puts exception.backtrace
206
+ end
207
+ ```
208
+
209
+ or specifying a file path
210
+
211
+ ```ruby
212
+ screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
213
+
214
+ begin
215
+ response = screenshots_api.get('https://www.apple.com', save_to_path: '~/screenshot.jpg') do |file|
216
+ # do something (reading/writing) with the image file here
217
+ end
218
+ puts response.status_code
219
+ rescue => exception
220
+ puts exception.backtrace
221
+ end
222
+ ```
223
+
224
+ Note that `screenshots_api.get(url, options)` method accepts an [options](https://proxycrawl.com/docs/screenshots-api/parameters)
225
+
226
+ If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
227
+
177
228
  ## Development
178
229
 
179
230
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -194,4 +245,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
194
245
 
195
246
  ---
196
247
 
197
- Copyright 2020 ProxyCrawl
248
+ Copyright 2021 ProxyCrawl
data/lib/proxycrawl.rb CHANGED
@@ -4,6 +4,7 @@ require 'proxycrawl/version'
4
4
  require 'proxycrawl/api'
5
5
  require 'proxycrawl/scraper_api'
6
6
  require 'proxycrawl/leads_api'
7
+ require 'proxycrawl/screenshots_api'
7
8
 
8
9
  module ProxyCrawl
9
10
  end
@@ -70,15 +70,19 @@ module ProxyCrawl
70
70
 
71
71
  def prepare_response(response, format)
72
72
  if format == 'json' || base_url.include?('/scraper')
73
+ json_body = JSON.parse(response.body)
74
+ @original_status = json_body['original_status'].to_i
75
+ @pc_status = json_body['pc_status'].to_i
76
+ @url = json_body['url']
73
77
  @status_code = response.code.to_i
74
- @body = response.body
75
78
  else
76
79
  @original_status = response['original_status'].to_i
77
80
  @status_code = response.code.to_i
78
81
  @pc_status = response['pc_status'].to_i
79
82
  @url = response['url']
80
- @body = response.body
81
83
  end
84
+
85
+ @body = response.body
82
86
  end
83
87
  end
84
88
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  module ProxyCrawl
4
4
  class ScraperAPI < ProxyCrawl::API
5
+ attr_reader :remaining_requests
5
6
 
6
7
  def post
7
8
  raise 'Only GET is allowed for the ScraperAPI'
@@ -9,6 +10,12 @@ module ProxyCrawl
9
10
 
10
11
  private
11
12
 
13
+ def prepare_response(response, format)
14
+ super(response, format)
15
+ json_body = JSON.parse(response.body)
16
+ @remaining_requests = json_body['remaining_requests'].to_i
17
+ end
18
+
12
19
  def base_url
13
20
  'https://api.proxycrawl.com/scraper'
14
21
  end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+ require 'tmpdir'
5
+
6
+ module ProxyCrawl
7
+ class ScreenshotsAPI < ProxyCrawl::API
8
+ attr_reader :screenshot_path, :success, :remaining_requests, :screenshot_url
9
+
10
+ INVALID_SAVE_TO_PATH_FILENAME = 'Filename must end with .jpg or .jpeg'
11
+ SAVE_TO_PATH_FILENAME_PATTERN = /.+\.(jpg|JPG|jpeg|JPEG)$/.freeze
12
+
13
+ def post
14
+ raise 'Only GET is allowed for the ScreenshotsAPI'
15
+ end
16
+
17
+ def get(url, options = {})
18
+ screenshot_path = options.delete(:save_to_path) || generate_file_path
19
+ raise INVALID_SAVE_TO_PATH_FILENAME unless SAVE_TO_PATH_FILENAME_PATTERN =~ screenshot_path
20
+
21
+ response = super(url, options)
22
+ file = File.open(screenshot_path, 'w+')
23
+ file.write(response.body&.force_encoding('UTF-8'))
24
+ @screenshot_path = screenshot_path
25
+ yield(file) if block_given?
26
+ response
27
+ ensure
28
+ file&.close
29
+ end
30
+
31
+ private
32
+
33
+ def prepare_response(response, format)
34
+ super(response, format)
35
+ @remaining_requests = response['remaining_requests'].to_i
36
+ @success = response['success'] == 'true'
37
+ @screenshot_url = response['screenshot_url']
38
+ end
39
+
40
+ def base_url
41
+ 'https://api.proxycrawl.com/screenshots'
42
+ end
43
+
44
+ def generate_file_name
45
+ "#{SecureRandom.urlsafe_base64}.jpg"
46
+ end
47
+
48
+ def generate_file_path
49
+ File.join(Dir.tmpdir, generate_file_name)
50
+ end
51
+ end
52
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ProxyCrawl
4
- VERSION = '0.2.1'
4
+ VERSION = '0.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxycrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - proxycrawl
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-28 00:00:00.000000000 Z
11
+ date: 2021-07-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -86,6 +86,7 @@ files:
86
86
  - lib/proxycrawl/api.rb
87
87
  - lib/proxycrawl/leads_api.rb
88
88
  - lib/proxycrawl/scraper_api.rb
89
+ - lib/proxycrawl/screenshots_api.rb
89
90
  - lib/proxycrawl/version.rb
90
91
  - proxycrawl.gemspec
91
92
  homepage: https://github.com/proxycrawl/proxycrawl-ruby