snapcrawl 0.4.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/snapcrawl.rb +1 -1
- data/lib/snapcrawl/crawler.rb +4 -12
- data/lib/snapcrawl/exceptions.rb +4 -0
- data/lib/snapcrawl/version.rb +1 -1
- metadata +2 -2
- data/lib/snapcrawl/templates/docopt.txt +0 -48
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 327da92cb63a1a8e6f58e58a4806d4e10b2cfa491960306544165be6423c9b3e
|
4
|
+
data.tar.gz: c6f2056f9ca5614a76bce68bdf2f001668ab626764bce89cf1b1bc4a8f68f833
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91b00e39fbf5943501cc7f67eb1c684811d10ae3f5acc0263a3a6259ae64ad51d01d89aba75576990d7517a07a53660ec1f63d13adbbaf5bdd6380b7d9dd8050
|
7
|
+
data.tar.gz: 318d7c11aa087a20a8f5c0dd922e9f11f2eeca1be7c165bfa04d54d775d0688e3d3532434a4987cca039c362bb3817542331ce135f8ec686f0178d6e5fa343e5
|
data/lib/snapcrawl.rb
CHANGED
data/lib/snapcrawl/crawler.rb
CHANGED
@@ -5,16 +5,12 @@ require 'httparty'
|
|
5
5
|
require 'nokogiri'
|
6
6
|
require 'ostruct'
|
7
7
|
require 'pstore'
|
8
|
-
require 'uri'
|
9
8
|
require 'addressable/uri'
|
10
9
|
require 'webshot'
|
11
10
|
|
12
11
|
module Snapcrawl
|
13
12
|
include Colsole
|
14
13
|
|
15
|
-
class MissingPhantomJS < StandardError; end
|
16
|
-
class MissingImageMagick < StandardError; end
|
17
|
-
|
18
14
|
class Crawler
|
19
15
|
include Singleton
|
20
16
|
|
@@ -206,11 +202,7 @@ module Snapcrawl
|
|
206
202
|
|
207
203
|
# Convert relative links to absolute
|
208
204
|
begin
|
209
|
-
link = URI.join( @opts.base, link ).to_s.dup
|
210
|
-
rescue URI::InvalidURIError
|
211
|
-
escaped_link = Addressable::URI.encode link
|
212
|
-
warnings << { link: link, message: "Using escaped link: #{escaped_link}" }
|
213
|
-
link = URI.join( @opts.base, escaped_link ).to_s.dup
|
205
|
+
link = Addressable::URI.join( @opts.base, link ).to_s.dup
|
214
206
|
rescue => e
|
215
207
|
warnings << { link: link, message: "#{e.class} #{e.message}" }
|
216
208
|
next
|
@@ -226,11 +218,11 @@ module Snapcrawl
|
|
226
218
|
end
|
227
219
|
|
228
220
|
def doc
|
229
|
-
@doc ||= File.read
|
221
|
+
@doc ||= File.read docopt
|
230
222
|
end
|
231
223
|
|
232
|
-
def
|
233
|
-
File.expand_path
|
224
|
+
def docopt
|
225
|
+
File.expand_path "docopt.txt", __dir__
|
234
226
|
end
|
235
227
|
|
236
228
|
def opts_from_args(args)
|
data/lib/snapcrawl/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: snapcrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danny Ben Shitrit
|
@@ -112,7 +112,7 @@ files:
|
|
112
112
|
- bin/snapcrawl
|
113
113
|
- lib/snapcrawl.rb
|
114
114
|
- lib/snapcrawl/crawler.rb
|
115
|
-
- lib/snapcrawl/
|
115
|
+
- lib/snapcrawl/exceptions.rb
|
116
116
|
- lib/snapcrawl/version.rb
|
117
117
|
homepage: https://github.com/DannyBen/snapcrawl
|
118
118
|
licenses:
|
@@ -1,48 +0,0 @@
|
|
1
|
-
Snapcrawl
|
2
|
-
|
3
|
-
Usage:
|
4
|
-
snapcrawl URL [options]
|
5
|
-
snapcrawl -h | --help
|
6
|
-
snapcrawl -v | --version
|
7
|
-
|
8
|
-
Options:
|
9
|
-
-f, --folder PATH
|
10
|
-
Where to save screenshots [default: snaps]
|
11
|
-
|
12
|
-
-n, --name TEMPLATE
|
13
|
-
Filename template. Include the string '%{url}' anywhere in the name to
|
14
|
-
use the captured URL in the filename [default: %{url}]
|
15
|
-
|
16
|
-
-a, --age SECONDS
|
17
|
-
Number of seconds to consider screenshots fresh [default: 86400]
|
18
|
-
|
19
|
-
-d, --depth LEVELS
|
20
|
-
Number of levels to crawl [default: 1]
|
21
|
-
|
22
|
-
-W, --width PIXELS
|
23
|
-
Screen width in pixels [default: 1280]
|
24
|
-
|
25
|
-
-H, --height PIXELS
|
26
|
-
Screen height in pixels. Use 0 to capture the full page [default: 0]
|
27
|
-
|
28
|
-
-s, --selector SELECTOR
|
29
|
-
CSS selector to capture
|
30
|
-
|
31
|
-
-o, --only REGEX
|
32
|
-
Include only URLs that match REGEX
|
33
|
-
|
34
|
-
-h, --help
|
35
|
-
Show this screen
|
36
|
-
|
37
|
-
-v, --version
|
38
|
-
Show version number
|
39
|
-
|
40
|
-
Examples:
|
41
|
-
snapcrawl example.com
|
42
|
-
snapcrawl example.com -d2 -fscreens
|
43
|
-
snapcrawl example.com -d2 > out.txt 2> err.txt &
|
44
|
-
snapcrawl example.com -W360 -H480
|
45
|
-
snapcrawl example.com --selector "#main-content"
|
46
|
-
snapcrawl example.com --only "products|collections"
|
47
|
-
snapcrawl example.com --name "screenshot-%{url}"
|
48
|
-
snapcrawl example.com --name "`date +%Y%m%d`_%{url}"
|