proxycrawl 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +52 -1
- data/lib/proxycrawl.rb +1 -0
- data/lib/proxycrawl/api.rb +6 -2
- data/lib/proxycrawl/scraper_api.rb +7 -0
- data/lib/proxycrawl/screenshots_api.rb +52 -0
- data/lib/proxycrawl/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8217ab7a72ae67d28e565375f1903aa2c485c250ebf17bd4afa24e03f1d124b1
|
4
|
+
data.tar.gz: 8626caed930b16ef6287d9075b45d430f6e01de11565abc5d15352908e0b187c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c60c5baea5a6f7638a5d0ee773f1225a05b40d31db781d5e44db6b3673c0c02d4d32a868663ddaf8476b756dae68ed6b49d4e682b50bd74dc874e7759a604fc2
|
7
|
+
data.tar.gz: 336172ab96bd5b80f3a44ea1fe8efd7717eaadd3ad6b06f2a534ca9df2177b6e42fc8e02ee7595705879bfce95b8788d22789ed8a9ae82d1805087a33f636623
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -174,6 +174,57 @@ end
|
|
174
174
|
|
175
175
|
If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
|
176
176
|
|
177
|
+
|
178
|
+
## Screenshots API usage
|
179
|
+
|
180
|
+
Initialize with your Screenshots API token and call the `get` method.
|
181
|
+
|
182
|
+
```ruby
|
183
|
+
screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
|
184
|
+
|
185
|
+
begin
|
186
|
+
response = screenshots_api.get('https://www.apple.com')
|
187
|
+
puts response.status_code
|
188
|
+
puts response.screenshot_path # do something with screenshot_path here
|
189
|
+
rescue => exception
|
190
|
+
puts exception.backtrace
|
191
|
+
end
|
192
|
+
```
|
193
|
+
|
194
|
+
or with using a block
|
195
|
+
|
196
|
+
```ruby
|
197
|
+
screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
|
198
|
+
|
199
|
+
begin
|
200
|
+
response = screenshots_api.get('https://www.apple.com') do |file|
|
201
|
+
# do something (reading/writing) with the image file here
|
202
|
+
end
|
203
|
+
puts response.status_code
|
204
|
+
rescue => exception
|
205
|
+
puts exception.backtrace
|
206
|
+
end
|
207
|
+
```
|
208
|
+
|
209
|
+
or specifying a file path
|
210
|
+
|
211
|
+
```ruby
|
212
|
+
screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
|
213
|
+
|
214
|
+
begin
|
215
|
+
response = screenshots_api.get('https://www.apple.com', save_to_path: '~/screenshot.jpg') do |file|
|
216
|
+
# do something (reading/writing) with the image file here
|
217
|
+
end
|
218
|
+
puts response.status_code
|
219
|
+
rescue => exception
|
220
|
+
puts exception.backtrace
|
221
|
+
end
|
222
|
+
```
|
223
|
+
|
224
|
+
Note that `screenshots_api.get(url, options)` method accepts an [options](https://proxycrawl.com/docs/screenshots-api/parameters)
|
225
|
+
|
226
|
+
If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
|
227
|
+
|
177
228
|
## Development
|
178
229
|
|
179
230
|
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -194,4 +245,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
|
|
194
245
|
|
195
246
|
---
|
196
247
|
|
197
|
-
Copyright
|
248
|
+
Copyright 2021 ProxyCrawl
|
data/lib/proxycrawl.rb
CHANGED
data/lib/proxycrawl/api.rb
CHANGED
@@ -70,15 +70,19 @@ module ProxyCrawl
|
|
70
70
|
|
71
71
|
def prepare_response(response, format)
|
72
72
|
if format == 'json' || base_url.include?('/scraper')
|
73
|
+
json_body = JSON.parse(response.body)
|
74
|
+
@original_status = json_body['original_status'].to_i
|
75
|
+
@pc_status = json_body['pc_status'].to_i
|
76
|
+
@url = json_body['url']
|
73
77
|
@status_code = response.code.to_i
|
74
|
-
@body = response.body
|
75
78
|
else
|
76
79
|
@original_status = response['original_status'].to_i
|
77
80
|
@status_code = response.code.to_i
|
78
81
|
@pc_status = response['pc_status'].to_i
|
79
82
|
@url = response['url']
|
80
|
-
@body = response.body
|
81
83
|
end
|
84
|
+
|
85
|
+
@body = response.body
|
82
86
|
end
|
83
87
|
end
|
84
88
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
module ProxyCrawl
|
4
4
|
class ScraperAPI < ProxyCrawl::API
|
5
|
+
attr_reader :remaining_requests
|
5
6
|
|
6
7
|
def post
|
7
8
|
raise 'Only GET is allowed for the ScraperAPI'
|
@@ -9,6 +10,12 @@ module ProxyCrawl
|
|
9
10
|
|
10
11
|
private
|
11
12
|
|
13
|
+
def prepare_response(response, format)
|
14
|
+
super(response, format)
|
15
|
+
json_body = JSON.parse(response.body)
|
16
|
+
@remaining_requests = json_body['remaining_requests'].to_i
|
17
|
+
end
|
18
|
+
|
12
19
|
def base_url
|
13
20
|
'https://api.proxycrawl.com/scraper'
|
14
21
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
require 'tmpdir'
|
5
|
+
|
6
|
+
module ProxyCrawl
|
7
|
+
class ScreenshotsAPI < ProxyCrawl::API
|
8
|
+
attr_reader :screenshot_path, :success, :remaining_requests, :screenshot_url
|
9
|
+
|
10
|
+
INVALID_SAVE_TO_PATH_FILENAME = 'Filename must end with .jpg or .jpeg'
|
11
|
+
SAVE_TO_PATH_FILENAME_PATTERN = /.+\.(jpg|JPG|jpeg|JPEG)$/.freeze
|
12
|
+
|
13
|
+
def post
|
14
|
+
raise 'Only GET is allowed for the ScreenshotsAPI'
|
15
|
+
end
|
16
|
+
|
17
|
+
def get(url, options = {})
|
18
|
+
screenshot_path = options.delete(:save_to_path) || generate_file_path
|
19
|
+
raise INVALID_SAVE_TO_PATH_FILENAME unless SAVE_TO_PATH_FILENAME_PATTERN =~ screenshot_path
|
20
|
+
|
21
|
+
response = super(url, options)
|
22
|
+
file = File.open(screenshot_path, 'w+')
|
23
|
+
file.write(response.body&.force_encoding('UTF-8'))
|
24
|
+
@screenshot_path = screenshot_path
|
25
|
+
yield(file) if block_given?
|
26
|
+
response
|
27
|
+
ensure
|
28
|
+
file&.close
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def prepare_response(response, format)
|
34
|
+
super(response, format)
|
35
|
+
@remaining_requests = response['remaining_requests'].to_i
|
36
|
+
@success = response['success'] == 'true'
|
37
|
+
@screenshot_url = response['screenshot_url']
|
38
|
+
end
|
39
|
+
|
40
|
+
def base_url
|
41
|
+
'https://api.proxycrawl.com/screenshots'
|
42
|
+
end
|
43
|
+
|
44
|
+
def generate_file_name
|
45
|
+
"#{SecureRandom.urlsafe_base64}.jpg"
|
46
|
+
end
|
47
|
+
|
48
|
+
def generate_file_path
|
49
|
+
File.join(Dir.tmpdir, generate_file_name)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/proxycrawl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxycrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- proxycrawl
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-07-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -86,6 +86,7 @@ files:
|
|
86
86
|
- lib/proxycrawl/api.rb
|
87
87
|
- lib/proxycrawl/leads_api.rb
|
88
88
|
- lib/proxycrawl/scraper_api.rb
|
89
|
+
- lib/proxycrawl/screenshots_api.rb
|
89
90
|
- lib/proxycrawl/version.rb
|
90
91
|
- proxycrawl.gemspec
|
91
92
|
homepage: https://github.com/proxycrawl/proxycrawl-ruby
|