snapcrawl 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +38 -5
- data/lib/snapcrawl/crawler.rb +17 -7
- data/lib/snapcrawl/templates/docopt.txt +12 -0
- data/lib/snapcrawl/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7f8bfcb13d6d049104a97fe95b4f20527c9e93f9
|
4
|
+
data.tar.gz: 5d39a2e40270cbe8ddfd5e0863016a665059e747
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0ad7d74dff9e73d5892870cf162c2b61c96a3f316f6ffedf2b9bd84c09c080bf0e330d32a9f567d4f3e8bec14964afe2773e010177236f438046e3f06b87624
|
7
|
+
data.tar.gz: 015a6dd81b525bcd59cc52360c5d4989542646af72e84e48a350719cb4b88751e6def5ab201b81de9c256c7036e9894dc5ca03ae30829e7a8d4ab2de0af47aaa
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# SnapCrawl - crawl a website and take screenshots
|
2
2
|
|
3
|
-
|
4
|
-
screenshots.
|
3
|
+
SnapCrawl is a command line utility for crawling a website and saving
|
4
|
+
screenshots.
|
5
5
|
|
6
6
|
## Features
|
7
7
|
|
@@ -18,6 +18,39 @@ screenshots. It is using [Runfile](https://github.com/DannyBen/runfile).
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
$ snapcrawl --help
|
22
|
-
|
23
|
-
|
21
|
+
$ snapcrawl --help
|
22
|
+
|
23
|
+
Snapcrawl
|
24
|
+
|
25
|
+
Usage:
|
26
|
+
snapcrawl go <url> [options]
|
27
|
+
snapcrawl -h | --help
|
28
|
+
snapcrawl -v | --version
|
29
|
+
|
30
|
+
Options:
|
31
|
+
-f --folder <path> Where to save screenshots [default: snaps]
|
32
|
+
-a --age <n> Number of seconds to consider screenshots fresh
|
33
|
+
[default: 86400]
|
34
|
+
-d --depth <n> Number of levels to crawl [default: 1]
|
35
|
+
-W --width <n> Screen width in pixels [default: 1280]
|
36
|
+
-H --height <n> Screen height in pixels. Use 0 to capture the full
|
37
|
+
page [default: 0]
|
38
|
+
-s --selector <s> CSS selector to capture
|
39
|
+
-o --only <regex> Include only URLs that match <regex>
|
40
|
+
-h --help Show this screen
|
41
|
+
-v --version Show version
|
42
|
+
|
43
|
+
Examples:
|
44
|
+
snapcrawl go example.com
|
45
|
+
snapcrawl go example.com -d2 -fscreens
|
46
|
+
snapcrawl go example.com -d2 > out.txt 2> err.txt &
|
47
|
+
snapcrawl go example.com -W360 -H480
|
48
|
+
snapcrawl go example.com --selector "#main-content"
|
49
|
+
snapcrawl go example.com --only "products|collections"
|
50
|
+
|
51
|
+
---
|
52
|
+
|
53
|
+
## Notes
|
54
|
+
|
55
|
+
1. If a URL cannot be found, SnapCrawl will report to stderr.
|
56
|
+
You can create a report by running `snapcrawl go example.com 2> err.txt`
|
data/lib/snapcrawl/crawler.rb
CHANGED
@@ -36,6 +36,8 @@ module Snapcrawl
|
|
36
36
|
|
37
37
|
def crawl(url, opts={})
|
38
38
|
defaults = {
|
39
|
+
width: 1280,
|
40
|
+
height: 0,
|
39
41
|
depth: 1,
|
40
42
|
age: 86400,
|
41
43
|
dir: 'snaps',
|
@@ -60,7 +62,11 @@ module Snapcrawl
|
|
60
62
|
next if @done.include? url
|
61
63
|
@done << url
|
62
64
|
say "\n!txtgrn!-----> Visit: #{url}"
|
63
|
-
|
65
|
+
if @opts.only and url !~ /#{@opts.only}/
|
66
|
+
say " Snap: Skipping. Does not match regex"
|
67
|
+
else
|
68
|
+
snap url
|
69
|
+
end
|
64
70
|
new_urls += extract_urls_from url
|
65
71
|
end
|
66
72
|
new_urls
|
@@ -84,9 +90,8 @@ module Snapcrawl
|
|
84
90
|
fetch_opts = {}
|
85
91
|
fetch_opts[:output] = image_path_for(url)
|
86
92
|
fetch_opts[:width] = @opts.width
|
87
|
-
fetch_opts[:height] = @opts.height if @opts.height
|
88
|
-
|
89
|
-
# :div => '.header', # selector for a specific element to take screenshot of
|
93
|
+
fetch_opts[:height] = @opts.height if @opts.height > 0
|
94
|
+
fetch_opts[:div] = @opts.selector if @opts.selector
|
90
95
|
# :top => 0, :left => 0, :width => 100, :height => 100 # dimensions for a specific area
|
91
96
|
|
92
97
|
screenshot = f.fetch fetch_opts
|
@@ -191,9 +196,14 @@ module Snapcrawl
|
|
191
196
|
|
192
197
|
def opts_from_args(args)
|
193
198
|
opts = {}
|
194
|
-
|
195
|
-
|
196
|
-
|
199
|
+
%w[folder selector only].each do |opt|
|
200
|
+
opts[opt.to_sym] = args["--#{opt}"] if args["--#{opt}"]
|
201
|
+
end
|
202
|
+
|
203
|
+
%w[age depth width height].each do |opt|
|
204
|
+
opts[opt.to_sym] = args["--#{opt}"].to_i if args["--#{opt}"]
|
205
|
+
end
|
206
|
+
|
197
207
|
opts
|
198
208
|
end
|
199
209
|
end
|
@@ -10,6 +10,18 @@ Options:
|
|
10
10
|
-a --age <n> Number of seconds to consider screenshots fresh
|
11
11
|
[default: 86400]
|
12
12
|
-d --depth <n> Number of levels to crawl [default: 1]
|
13
|
+
-W --width <n> Screen width in pixels [default: 1280]
|
14
|
+
-H --height <n> Screen height in pixels. Use 0 to capture the full
|
15
|
+
page [default: 0]
|
16
|
+
-s --selector <s> CSS selector to capture
|
17
|
+
-o --only <regex> Include only URLs that match <regex>
|
13
18
|
-h --help Show this screen
|
14
19
|
-v --version Show version
|
15
20
|
|
21
|
+
Examples:
|
22
|
+
snapcrawl go example.com
|
23
|
+
snapcrawl go example.com -d2 -fscreens
|
24
|
+
snapcrawl go example.com -d2 > out.txt 2> err.txt &
|
25
|
+
snapcrawl go example.com -W360 -H480
|
26
|
+
snapcrawl go example.com --selector "#main-content"
|
27
|
+
snapcrawl go example.com --only "products|collections"
|
data/lib/snapcrawl/version.rb
CHANGED