proxycrawl 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +52 -1
- data/lib/proxycrawl.rb +1 -0
- data/lib/proxycrawl/api.rb +6 -2
- data/lib/proxycrawl/scraper_api.rb +7 -0
- data/lib/proxycrawl/screenshots_api.rb +52 -0
- data/lib/proxycrawl/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8217ab7a72ae67d28e565375f1903aa2c485c250ebf17bd4afa24e03f1d124b1
|
4
|
+
data.tar.gz: 8626caed930b16ef6287d9075b45d430f6e01de11565abc5d15352908e0b187c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c60c5baea5a6f7638a5d0ee773f1225a05b40d31db781d5e44db6b3673c0c02d4d32a868663ddaf8476b756dae68ed6b49d4e682b50bd74dc874e7759a604fc2
|
7
|
+
data.tar.gz: 336172ab96bd5b80f3a44ea1fe8efd7717eaadd3ad6b06f2a534ca9df2177b6e42fc8e02ee7595705879bfce95b8788d22789ed8a9ae82d1805087a33f636623
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -174,6 +174,57 @@ end
|
|
174
174
|
|
175
175
|
If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
|
176
176
|
|
177
|
+
|
178
|
+
## Screenshots API usage
|
179
|
+
|
180
|
+
Initialize with your Screenshots API token and call the `get` method.
|
181
|
+
|
182
|
+
```ruby
|
183
|
+
screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
|
184
|
+
|
185
|
+
begin
|
186
|
+
response = screenshots_api.get('https://www.apple.com')
|
187
|
+
puts response.status_code
|
188
|
+
puts response.screenshot_path # do something with screenshot_path here
|
189
|
+
rescue => exception
|
190
|
+
puts exception.backtrace
|
191
|
+
end
|
192
|
+
```
|
193
|
+
|
194
|
+
or with using a block
|
195
|
+
|
196
|
+
```ruby
|
197
|
+
screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
|
198
|
+
|
199
|
+
begin
|
200
|
+
response = screenshots_api.get('https://www.apple.com') do |file|
|
201
|
+
# do something (reading/writing) with the image file here
|
202
|
+
end
|
203
|
+
puts response.status_code
|
204
|
+
rescue => exception
|
205
|
+
puts exception.backtrace
|
206
|
+
end
|
207
|
+
```
|
208
|
+
|
209
|
+
or specifying a file path
|
210
|
+
|
211
|
+
```ruby
|
212
|
+
screenshots_api = ProxyCrawl::ScreenshotsAPI.new(token: 'YOUR_TOKEN')
|
213
|
+
|
214
|
+
begin
|
215
|
+
response = screenshots_api.get('https://www.apple.com', save_to_path: '~/screenshot.jpg') do |file|
|
216
|
+
# do something (reading/writing) with the image file here
|
217
|
+
end
|
218
|
+
puts response.status_code
|
219
|
+
rescue => exception
|
220
|
+
puts exception.backtrace
|
221
|
+
end
|
222
|
+
```
|
223
|
+
|
224
|
+
Note that `screenshots_api.get(url, options)` method accepts an [options](https://proxycrawl.com/docs/screenshots-api/parameters)
|
225
|
+
|
226
|
+
If you have questions or need help using the library, please open an issue or [contact us](https://proxycrawl.com/contact).
|
227
|
+
|
177
228
|
## Development
|
178
229
|
|
179
230
|
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -194,4 +245,4 @@ Everyone interacting in the Proxycrawl project’s codebases, issue trackers, ch
|
|
194
245
|
|
195
246
|
---
|
196
247
|
|
197
|
-
Copyright
|
248
|
+
Copyright 2021 ProxyCrawl
|
data/lib/proxycrawl.rb
CHANGED
data/lib/proxycrawl/api.rb
CHANGED
@@ -70,15 +70,19 @@ module ProxyCrawl
|
|
70
70
|
|
71
71
|
def prepare_response(response, format)
|
72
72
|
if format == 'json' || base_url.include?('/scraper')
|
73
|
+
json_body = JSON.parse(response.body)
|
74
|
+
@original_status = json_body['original_status'].to_i
|
75
|
+
@pc_status = json_body['pc_status'].to_i
|
76
|
+
@url = json_body['url']
|
73
77
|
@status_code = response.code.to_i
|
74
|
-
@body = response.body
|
75
78
|
else
|
76
79
|
@original_status = response['original_status'].to_i
|
77
80
|
@status_code = response.code.to_i
|
78
81
|
@pc_status = response['pc_status'].to_i
|
79
82
|
@url = response['url']
|
80
|
-
@body = response.body
|
81
83
|
end
|
84
|
+
|
85
|
+
@body = response.body
|
82
86
|
end
|
83
87
|
end
|
84
88
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
module ProxyCrawl
|
4
4
|
class ScraperAPI < ProxyCrawl::API
|
5
|
+
attr_reader :remaining_requests
|
5
6
|
|
6
7
|
def post
|
7
8
|
raise 'Only GET is allowed for the ScraperAPI'
|
@@ -9,6 +10,12 @@ module ProxyCrawl
|
|
9
10
|
|
10
11
|
private
|
11
12
|
|
13
|
+
def prepare_response(response, format)
|
14
|
+
super(response, format)
|
15
|
+
json_body = JSON.parse(response.body)
|
16
|
+
@remaining_requests = json_body['remaining_requests'].to_i
|
17
|
+
end
|
18
|
+
|
12
19
|
def base_url
|
13
20
|
'https://api.proxycrawl.com/scraper'
|
14
21
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
require 'tmpdir'
|
5
|
+
|
6
|
+
module ProxyCrawl
|
7
|
+
class ScreenshotsAPI < ProxyCrawl::API
|
8
|
+
attr_reader :screenshot_path, :success, :remaining_requests, :screenshot_url
|
9
|
+
|
10
|
+
INVALID_SAVE_TO_PATH_FILENAME = 'Filename must end with .jpg or .jpeg'
|
11
|
+
SAVE_TO_PATH_FILENAME_PATTERN = /.+\.(jpg|JPG|jpeg|JPEG)$/.freeze
|
12
|
+
|
13
|
+
def post
|
14
|
+
raise 'Only GET is allowed for the ScreenshotsAPI'
|
15
|
+
end
|
16
|
+
|
17
|
+
def get(url, options = {})
|
18
|
+
screenshot_path = options.delete(:save_to_path) || generate_file_path
|
19
|
+
raise INVALID_SAVE_TO_PATH_FILENAME unless SAVE_TO_PATH_FILENAME_PATTERN =~ screenshot_path
|
20
|
+
|
21
|
+
response = super(url, options)
|
22
|
+
file = File.open(screenshot_path, 'w+')
|
23
|
+
file.write(response.body&.force_encoding('UTF-8'))
|
24
|
+
@screenshot_path = screenshot_path
|
25
|
+
yield(file) if block_given?
|
26
|
+
response
|
27
|
+
ensure
|
28
|
+
file&.close
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def prepare_response(response, format)
|
34
|
+
super(response, format)
|
35
|
+
@remaining_requests = response['remaining_requests'].to_i
|
36
|
+
@success = response['success'] == 'true'
|
37
|
+
@screenshot_url = response['screenshot_url']
|
38
|
+
end
|
39
|
+
|
40
|
+
def base_url
|
41
|
+
'https://api.proxycrawl.com/screenshots'
|
42
|
+
end
|
43
|
+
|
44
|
+
def generate_file_name
|
45
|
+
"#{SecureRandom.urlsafe_base64}.jpg"
|
46
|
+
end
|
47
|
+
|
48
|
+
def generate_file_path
|
49
|
+
File.join(Dir.tmpdir, generate_file_name)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/proxycrawl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxycrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- proxycrawl
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-07-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -86,6 +86,7 @@ files:
|
|
86
86
|
- lib/proxycrawl/api.rb
|
87
87
|
- lib/proxycrawl/leads_api.rb
|
88
88
|
- lib/proxycrawl/scraper_api.rb
|
89
|
+
- lib/proxycrawl/screenshots_api.rb
|
89
90
|
- lib/proxycrawl/version.rb
|
90
91
|
- proxycrawl.gemspec
|
91
92
|
homepage: https://github.com/proxycrawl/proxycrawl-ruby
|