snapcrawl 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -24
- data/lib/snapcrawl/crawler.rb +5 -4
- data/lib/snapcrawl/templates/docopt.txt +33 -12
- data/lib/snapcrawl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2500895cfd465dd633ac9650fc702f87b684ae4a81aa2feb979a16fad17bedc2
|
|
4
|
+
data.tar.gz: 042f0e14f8139d3b33bdd0061731c7973f7c445e57e2c474d32c3b226d333acb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7603f985467c47f26b5bbf8c3cebf5654de708d0f7039c0268b4c4824089c259c17b687be482ed3e5813f44fef2274407542c76e3291f064aed1b06c2af1837a
|
|
7
|
+
data.tar.gz: 3246ea728fa90fc65da1732964492da9383ae0c99a47746d1194f5c991b2efadd333c69478e2c97fd99274acb47fc38773285a53c9fd5505e7a605af08f58cf2
|
data/README.md
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
Snapcrawl - crawl a website and take screenshots
|
|
2
2
|
==================================================
|
|
3
3
|
|
|
4
|
+
[](https://travis-ci.com/DannyBen/snapcrawl)
|
|
4
5
|
[](http://badge.fury.io/rb/snapcrawl)
|
|
5
6
|
[](https://codeclimate.com/github/DannyBen/snapcrawl)
|
|
6
7
|
|
|
@@ -56,22 +57,41 @@ Usage
|
|
|
56
57
|
Snapcrawl
|
|
57
58
|
|
|
58
59
|
Usage:
|
|
59
|
-
snapcrawl go
|
|
60
|
+
snapcrawl go URL [options]
|
|
60
61
|
snapcrawl -h | --help
|
|
61
62
|
snapcrawl -v | --version
|
|
62
63
|
|
|
63
64
|
Options:
|
|
64
|
-
-f --folder
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
-
|
|
65
|
+
-f, --folder PATH
|
|
66
|
+
Where to save screenshots [default: snaps]
|
|
67
|
+
|
|
68
|
+
-n, --name TEMPLATE
|
|
69
|
+
Filename template. Include the string '%{url}' anywhere in the name to
|
|
70
|
+
use the captured URL in the filename [default: %{url}]
|
|
71
|
+
|
|
72
|
+
-a, --age SECONDS
|
|
73
|
+
Number of seconds to consider screenshots fresh [default: 86400]
|
|
74
|
+
|
|
75
|
+
-d, --depth LEVELS
|
|
76
|
+
Number of levels to crawl [default: 1]
|
|
77
|
+
|
|
78
|
+
-W, --width PIXELS
|
|
79
|
+
Screen width in pixels [default: 1280]
|
|
80
|
+
|
|
81
|
+
-H, --height PIXELS
|
|
82
|
+
Screen height in pixels. Use 0 to capture the full page [default: 0]
|
|
83
|
+
|
|
84
|
+
-s, --selector SELECTOR
|
|
85
|
+
CSS selector to capture
|
|
86
|
+
|
|
87
|
+
-o, --only REGEX
|
|
88
|
+
Include only URLs that match REGEX
|
|
89
|
+
|
|
90
|
+
-h, --help
|
|
91
|
+
Show this screen
|
|
92
|
+
|
|
93
|
+
-v, --version
|
|
94
|
+
Show version number
|
|
75
95
|
|
|
76
96
|
Examples:
|
|
77
97
|
snapcrawl go example.com
|
|
@@ -80,18 +100,8 @@ Usage
|
|
|
80
100
|
snapcrawl go example.com -W360 -H480
|
|
81
101
|
snapcrawl go example.com --selector "#main-content"
|
|
82
102
|
snapcrawl go example.com --only "products|collections"
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
Notes
|
|
87
|
-
--------------------------------------------------
|
|
88
|
-
|
|
89
|
-
If a URL cannot be found, Snapcrawl will report to stderr.
|
|
90
|
-
You can create a report by running
|
|
91
|
-
|
|
92
|
-
$ snapcrawl go example.com 2> err.txt
|
|
93
|
-
|
|
94
|
-
|
|
103
|
+
snapcrawl go example.com --name "screenshot-%{url}"
|
|
104
|
+
snapcrawl go example.com --name "`date +%Y%m%d`_%{url}"
|
|
95
105
|
|
|
96
106
|
---
|
|
97
107
|
|
data/lib/snapcrawl/crawler.rb
CHANGED
|
@@ -35,7 +35,7 @@ module Snapcrawl
|
|
|
35
35
|
def execute(args)
|
|
36
36
|
raise MissingPhantomJS unless command_exist? "phantomjs"
|
|
37
37
|
raise MissingImageMagick unless command_exist? "convert"
|
|
38
|
-
crawl args['
|
|
38
|
+
crawl args['URL'].dup, opts_from_args(args)
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
def clear_cache
|
|
@@ -51,7 +51,8 @@ module Snapcrawl
|
|
|
51
51
|
depth: 1,
|
|
52
52
|
age: 86400,
|
|
53
53
|
folder: 'snaps',
|
|
54
|
-
|
|
54
|
+
name: '%{url}',
|
|
55
|
+
base: url,
|
|
55
56
|
}
|
|
56
57
|
urls = [protocolize(url)]
|
|
57
58
|
|
|
@@ -155,7 +156,7 @@ module Snapcrawl
|
|
|
155
156
|
|
|
156
157
|
# Return proper image path for a UR
|
|
157
158
|
def image_path_for(url)
|
|
158
|
-
"#{@opts.folder}/#{handelize(url)}
|
|
159
|
+
"#{@opts.folder}/#{@opts.name}.png" % { url: handelize(url) }
|
|
159
160
|
end
|
|
160
161
|
|
|
161
162
|
# Add protocol to a URL if neeed
|
|
@@ -214,7 +215,7 @@ module Snapcrawl
|
|
|
214
215
|
|
|
215
216
|
def opts_from_args(args)
|
|
216
217
|
opts = {}
|
|
217
|
-
%w[folder selector only].each do |opt|
|
|
218
|
+
%w[folder name selector only].each do |opt|
|
|
218
219
|
opts[opt.to_sym] = args["--#{opt}"] if args["--#{opt}"]
|
|
219
220
|
end
|
|
220
221
|
|
|
@@ -1,22 +1,41 @@
|
|
|
1
1
|
Snapcrawl
|
|
2
2
|
|
|
3
3
|
Usage:
|
|
4
|
-
snapcrawl go
|
|
4
|
+
snapcrawl go URL [options]
|
|
5
5
|
snapcrawl -h | --help
|
|
6
6
|
snapcrawl -v | --version
|
|
7
7
|
|
|
8
8
|
Options:
|
|
9
|
-
-f --folder
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
-
|
|
9
|
+
-f, --folder PATH
|
|
10
|
+
Where to save screenshots [default: snaps]
|
|
11
|
+
|
|
12
|
+
-n, --name TEMPLATE
|
|
13
|
+
Filename template. Include the string '%{url}' anywhere in the name to
|
|
14
|
+
use the captured URL in the filename [default: %{url}]
|
|
15
|
+
|
|
16
|
+
-a, --age SECONDS
|
|
17
|
+
Number of seconds to consider screenshots fresh [default: 86400]
|
|
18
|
+
|
|
19
|
+
-d, --depth LEVELS
|
|
20
|
+
Number of levels to crawl [default: 1]
|
|
21
|
+
|
|
22
|
+
-W, --width PIXELS
|
|
23
|
+
Screen width in pixels [default: 1280]
|
|
24
|
+
|
|
25
|
+
-H, --height PIXELS
|
|
26
|
+
Screen height in pixels. Use 0 to capture the full page [default: 0]
|
|
27
|
+
|
|
28
|
+
-s, --selector SELECTOR
|
|
29
|
+
CSS selector to capture
|
|
30
|
+
|
|
31
|
+
-o, --only REGEX
|
|
32
|
+
Include only URLs that match REGEX
|
|
33
|
+
|
|
34
|
+
-h, --help
|
|
35
|
+
Show this screen
|
|
36
|
+
|
|
37
|
+
-v, --version
|
|
38
|
+
Show version number
|
|
20
39
|
|
|
21
40
|
Examples:
|
|
22
41
|
snapcrawl go example.com
|
|
@@ -25,3 +44,5 @@ Examples:
|
|
|
25
44
|
snapcrawl go example.com -W360 -H480
|
|
26
45
|
snapcrawl go example.com --selector "#main-content"
|
|
27
46
|
snapcrawl go example.com --only "products|collections"
|
|
47
|
+
snapcrawl go example.com --name "screenshot-%{url}"
|
|
48
|
+
snapcrawl go example.com --name "`date +%Y%m%d`_%{url}"
|
data/lib/snapcrawl/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: snapcrawl
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Danny Ben Shitrit
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2019-06-
|
|
11
|
+
date: 2019-06-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: colsole
|