snapcrawl 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +34 -24
- data/lib/snapcrawl/crawler.rb +5 -4
- data/lib/snapcrawl/templates/docopt.txt +33 -12
- data/lib/snapcrawl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2500895cfd465dd633ac9650fc702f87b684ae4a81aa2feb979a16fad17bedc2
|
4
|
+
data.tar.gz: 042f0e14f8139d3b33bdd0061731c7973f7c445e57e2c474d32c3b226d333acb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7603f985467c47f26b5bbf8c3cebf5654de708d0f7039c0268b4c4824089c259c17b687be482ed3e5813f44fef2274407542c76e3291f064aed1b06c2af1837a
|
7
|
+
data.tar.gz: 3246ea728fa90fc65da1732964492da9383ae0c99a47746d1194f5c991b2efadd333c69478e2c97fd99274acb47fc38773285a53c9fd5505e7a605af08f58cf2
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
Snapcrawl - crawl a website and take screenshots
|
2
2
|
==================================================
|
3
3
|
|
4
|
+
[![Build Status](https://travis-ci.com/DannyBen/snapcrawl.svg?branch=master)](https://travis-ci.com/DannyBen/snapcrawl)
|
4
5
|
[![Gem Version](https://badge.fury.io/rb/snapcrawl.svg)](http://badge.fury.io/rb/snapcrawl)
|
5
6
|
[![Code Climate](https://codeclimate.com/github/DannyBen/snapcrawl/badges/gpa.svg)](https://codeclimate.com/github/DannyBen/snapcrawl)
|
6
7
|
|
@@ -56,22 +57,41 @@ Usage
|
|
56
57
|
Snapcrawl
|
57
58
|
|
58
59
|
Usage:
|
59
|
-
snapcrawl go
|
60
|
+
snapcrawl go URL [options]
|
60
61
|
snapcrawl -h | --help
|
61
62
|
snapcrawl -v | --version
|
62
63
|
|
63
64
|
Options:
|
64
|
-
-f --folder
|
65
|
-
|
66
|
-
|
67
|
-
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
-
|
72
|
-
|
73
|
-
|
74
|
-
-
|
65
|
+
-f, --folder PATH
|
66
|
+
Where to save screenshots [default: snaps]
|
67
|
+
|
68
|
+
-n, --name TEMPLATE
|
69
|
+
Filename template. Include the string '%{url}' anywhere in the name to
|
70
|
+
use the captured URL in the filename [default: %{url}]
|
71
|
+
|
72
|
+
-a, --age SECONDS
|
73
|
+
Number of seconds to consider screenshots fresh [default: 86400]
|
74
|
+
|
75
|
+
-d, --depth LEVELS
|
76
|
+
Number of levels to crawl [default: 1]
|
77
|
+
|
78
|
+
-W, --width PIXELS
|
79
|
+
Screen width in pixels [default: 1280]
|
80
|
+
|
81
|
+
-H, --height PIXELS
|
82
|
+
Screen height in pixels. Use 0 to capture the full page [default: 0]
|
83
|
+
|
84
|
+
-s, --selector SELECTOR
|
85
|
+
CSS selector to capture
|
86
|
+
|
87
|
+
-o, --only REGEX
|
88
|
+
Include only URLs that match REGEX
|
89
|
+
|
90
|
+
-h, --help
|
91
|
+
Show this screen
|
92
|
+
|
93
|
+
-v, --version
|
94
|
+
Show version number
|
75
95
|
|
76
96
|
Examples:
|
77
97
|
snapcrawl go example.com
|
@@ -80,18 +100,8 @@ Usage
|
|
80
100
|
snapcrawl go example.com -W360 -H480
|
81
101
|
snapcrawl go example.com --selector "#main-content"
|
82
102
|
snapcrawl go example.com --only "products|collections"
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
Notes
|
87
|
-
--------------------------------------------------
|
88
|
-
|
89
|
-
If a URL cannot be found, Snapcrawl will report to stderr.
|
90
|
-
You can create a report by running
|
91
|
-
|
92
|
-
$ snapcrawl go example.com 2> err.txt
|
93
|
-
|
94
|
-
|
103
|
+
snapcrawl go example.com --name "screenshot-%{url}"
|
104
|
+
snapcrawl go example.com --name "`date +%Y%m%d`_%{url}"
|
95
105
|
|
96
106
|
---
|
97
107
|
|
data/lib/snapcrawl/crawler.rb
CHANGED
@@ -35,7 +35,7 @@ module Snapcrawl
|
|
35
35
|
def execute(args)
|
36
36
|
raise MissingPhantomJS unless command_exist? "phantomjs"
|
37
37
|
raise MissingImageMagick unless command_exist? "convert"
|
38
|
-
crawl args['
|
38
|
+
crawl args['URL'].dup, opts_from_args(args)
|
39
39
|
end
|
40
40
|
|
41
41
|
def clear_cache
|
@@ -51,7 +51,8 @@ module Snapcrawl
|
|
51
51
|
depth: 1,
|
52
52
|
age: 86400,
|
53
53
|
folder: 'snaps',
|
54
|
-
|
54
|
+
name: '%{url}',
|
55
|
+
base: url,
|
55
56
|
}
|
56
57
|
urls = [protocolize(url)]
|
57
58
|
|
@@ -155,7 +156,7 @@ module Snapcrawl
|
|
155
156
|
|
156
157
|
# Return proper image path for a UR
|
157
158
|
def image_path_for(url)
|
158
|
-
"#{@opts.folder}/#{handelize(url)}
|
159
|
+
"#{@opts.folder}/#{@opts.name}.png" % { url: handelize(url) }
|
159
160
|
end
|
160
161
|
|
161
162
|
# Add protocol to a URL if neeed
|
@@ -214,7 +215,7 @@ module Snapcrawl
|
|
214
215
|
|
215
216
|
def opts_from_args(args)
|
216
217
|
opts = {}
|
217
|
-
%w[folder selector only].each do |opt|
|
218
|
+
%w[folder name selector only].each do |opt|
|
218
219
|
opts[opt.to_sym] = args["--#{opt}"] if args["--#{opt}"]
|
219
220
|
end
|
220
221
|
|
@@ -1,22 +1,41 @@
|
|
1
1
|
Snapcrawl
|
2
2
|
|
3
3
|
Usage:
|
4
|
-
snapcrawl go
|
4
|
+
snapcrawl go URL [options]
|
5
5
|
snapcrawl -h | --help
|
6
6
|
snapcrawl -v | --version
|
7
7
|
|
8
8
|
Options:
|
9
|
-
-f --folder
|
10
|
-
|
11
|
-
|
12
|
-
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
-
|
17
|
-
|
18
|
-
|
19
|
-
-
|
9
|
+
-f, --folder PATH
|
10
|
+
Where to save screenshots [default: snaps]
|
11
|
+
|
12
|
+
-n, --name TEMPLATE
|
13
|
+
Filename template. Include the string '%{url}' anywhere in the name to
|
14
|
+
use the captured URL in the filename [default: %{url}]
|
15
|
+
|
16
|
+
-a, --age SECONDS
|
17
|
+
Number of seconds to consider screenshots fresh [default: 86400]
|
18
|
+
|
19
|
+
-d, --depth LEVELS
|
20
|
+
Number of levels to crawl [default: 1]
|
21
|
+
|
22
|
+
-W, --width PIXELS
|
23
|
+
Screen width in pixels [default: 1280]
|
24
|
+
|
25
|
+
-H, --height PIXELS
|
26
|
+
Screen height in pixels. Use 0 to capture the full page [default: 0]
|
27
|
+
|
28
|
+
-s, --selector SELECTOR
|
29
|
+
CSS selector to capture
|
30
|
+
|
31
|
+
-o, --only REGEX
|
32
|
+
Include only URLs that match REGEX
|
33
|
+
|
34
|
+
-h, --help
|
35
|
+
Show this screen
|
36
|
+
|
37
|
+
-v, --version
|
38
|
+
Show version number
|
20
39
|
|
21
40
|
Examples:
|
22
41
|
snapcrawl go example.com
|
@@ -25,3 +44,5 @@ Examples:
|
|
25
44
|
snapcrawl go example.com -W360 -H480
|
26
45
|
snapcrawl go example.com --selector "#main-content"
|
27
46
|
snapcrawl go example.com --only "products|collections"
|
47
|
+
snapcrawl go example.com --name "screenshot-%{url}"
|
48
|
+
snapcrawl go example.com --name "`date +%Y%m%d`_%{url}"
|
data/lib/snapcrawl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: snapcrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danny Ben Shitrit
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colsole
|