snapcrawl 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8de37399949606ebc180cc7a80bf3ad91869e064
4
- data.tar.gz: 0b56bd7207f4b0f0e44ecdfcffe890d50bf1d34c
3
+ metadata.gz: 7f8bfcb13d6d049104a97fe95b4f20527c9e93f9
4
+ data.tar.gz: 5d39a2e40270cbe8ddfd5e0863016a665059e747
5
5
  SHA512:
6
- metadata.gz: 1ef25f70c86ce8f8d626b43d080d4f4048cae9a421111a3fbd232ebc6f26d9d4b67d69a90bbe306b3ff995c136ea8a4252b693dac6e1f2f82c1ff0d8a7e379e9
7
- data.tar.gz: 9fb48c4490dc64c14284cbfd2bd4809ec1c6de67d535a6e4175aed4140ecc7366ebe92c791551668c08ef5b30a0b9d4f9ff6eeebee83af715d6910b9ab9f9df4
6
+ metadata.gz: c0ad7d74dff9e73d5892870cf162c2b61c96a3f316f6ffedf2b9bd84c09c080bf0e330d32a9f567d4f3e8bec14964afe2773e010177236f438046e3f06b87624
7
+ data.tar.gz: 015a6dd81b525bcd59cc52360c5d4989542646af72e84e48a350719cb4b88751e6def5ab201b81de9c256c7036e9894dc5ca03ae30829e7a8d4ab2de0af47aaa
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # SnapCrawl - crawl a website and take screenshots
2
2
 
3
- `snapcrawl` is a command line utility for crawling a website and saving
4
- screenshots. It is using [Runfile](https://github.com/DannyBen/runfile).
3
+ SnapCrawl is a command line utility for crawling a website and saving
4
+ screenshots.
5
5
 
6
6
  ## Features
7
7
 
@@ -18,6 +18,39 @@ screenshots. It is using [Runfile](https://github.com/DannyBen/runfile).
18
18
 
19
19
  ## Usage
20
20
 
21
- $ snapcrawl --help
22
-
23
-
21
+ $ snapcrawl --help
22
+
23
+ Snapcrawl
24
+
25
+ Usage:
26
+ snapcrawl go <url> [options]
27
+ snapcrawl -h | --help
28
+ snapcrawl -v | --version
29
+
30
+ Options:
31
+ -f --folder <path> Where to save screenshots [default: snaps]
32
+ -a --age <n> Number of seconds to consider screenshots fresh
33
+ [default: 86400]
34
+ -d --depth <n> Number of levels to crawl [default: 1]
35
+ -W --width <n> Screen width in pixels [default: 1280]
36
+ -H --height <n> Screen height in pixels. Use 0 to capture the full
37
+ page [default: 0]
38
+ -s --selector <s> CSS selector to capture
39
+ -o --only <regex> Include only URLs that match <regex>
40
+ -h --help Show this screen
41
+ -v --version Show version
42
+
43
+ Examples:
44
+ snapcrawl go example.com
45
+ snapcrawl go example.com -d2 -fscreens
46
+ snapcrawl go example.com -d2 > out.txt 2> err.txt &
47
+ snapcrawl go example.com -W360 -H480
48
+ snapcrawl go example.com --selector "#main-content"
49
+ snapcrawl go example.com --only "products|collections"
50
+
51
+ ---
52
+
53
+ ## Notes
54
+
55
+ 1. If a URL cannot be found, SnapCrawl will report to stderr.
56
+ You can create a report by running `snapcrawl go example.com 2> err.txt`
@@ -36,6 +36,8 @@ module Snapcrawl
36
36
 
37
37
  def crawl(url, opts={})
38
38
  defaults = {
39
+ width: 1280,
40
+ height: 0,
39
41
  depth: 1,
40
42
  age: 86400,
41
43
  dir: 'snaps',
@@ -60,7 +62,11 @@ module Snapcrawl
60
62
  next if @done.include? url
61
63
  @done << url
62
64
  say "\n!txtgrn!-----> Visit: #{url}"
63
- snap url
65
+ if @opts.only and url !~ /#{@opts.only}/
66
+ say " Snap: Skipping. Does not match regex"
67
+ else
68
+ snap url
69
+ end
64
70
  new_urls += extract_urls_from url
65
71
  end
66
72
  new_urls
@@ -84,9 +90,8 @@ module Snapcrawl
84
90
  fetch_opts = {}
85
91
  fetch_opts[:output] = image_path_for(url)
86
92
  fetch_opts[:width] = @opts.width
87
- fetch_opts[:height] = @opts.height if @opts.height
88
- # :height => 768,
89
- # :div => '.header', # selector for a specific element to take screenshot of
93
+ fetch_opts[:height] = @opts.height if @opts.height > 0
94
+ fetch_opts[:div] = @opts.selector if @opts.selector
90
95
  # :top => 0, :left => 0, :width => 100, :height => 100 # dimensions for a specific area
91
96
 
92
97
  screenshot = f.fetch fetch_opts
@@ -191,9 +196,14 @@ module Snapcrawl
191
196
 
192
197
  def opts_from_args(args)
193
198
  opts = {}
194
- opts[:folder] = args['--folder'] if args['--folder']
195
- opts[:age] = args['--age'].to_i if args['--age']
196
- opts[:depth] = args['--depth'].to_i if args['--depth']
199
+ %w[folder selector only].each do |opt|
200
+ opts[opt.to_sym] = args["--#{opt}"] if args["--#{opt}"]
201
+ end
202
+
203
+ %w[age depth width height].each do |opt|
204
+ opts[opt.to_sym] = args["--#{opt}"].to_i if args["--#{opt}"]
205
+ end
206
+
197
207
  opts
198
208
  end
199
209
  end
@@ -10,6 +10,18 @@ Options:
10
10
  -a --age <n> Number of seconds to consider screenshots fresh
11
11
  [default: 86400]
12
12
  -d --depth <n> Number of levels to crawl [default: 1]
13
+ -W --width <n> Screen width in pixels [default: 1280]
14
+ -H --height <n> Screen height in pixels. Use 0 to capture the full
15
+ page [default: 0]
16
+ -s --selector <s> CSS selector to capture
17
+ -o --only <regex> Include only URLs that match <regex>
13
18
  -h --help Show this screen
14
19
  -v --version Show version
15
20
 
21
+ Examples:
22
+ snapcrawl go example.com
23
+ snapcrawl go example.com -d2 -fscreens
24
+ snapcrawl go example.com -d2 > out.txt 2> err.txt &
25
+ snapcrawl go example.com -W360 -H480
26
+ snapcrawl go example.com --selector "#main-content"
27
+ snapcrawl go example.com --only "products|collections"
@@ -1,3 +1,3 @@
1
1
  module Snapcrawl
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snapcrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Ben Shitrit