snapcrawl 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8de37399949606ebc180cc7a80bf3ad91869e064
4
- data.tar.gz: 0b56bd7207f4b0f0e44ecdfcffe890d50bf1d34c
3
+ metadata.gz: 7f8bfcb13d6d049104a97fe95b4f20527c9e93f9
4
+ data.tar.gz: 5d39a2e40270cbe8ddfd5e0863016a665059e747
5
5
  SHA512:
6
- metadata.gz: 1ef25f70c86ce8f8d626b43d080d4f4048cae9a421111a3fbd232ebc6f26d9d4b67d69a90bbe306b3ff995c136ea8a4252b693dac6e1f2f82c1ff0d8a7e379e9
7
- data.tar.gz: 9fb48c4490dc64c14284cbfd2bd4809ec1c6de67d535a6e4175aed4140ecc7366ebe92c791551668c08ef5b30a0b9d4f9ff6eeebee83af715d6910b9ab9f9df4
6
+ metadata.gz: c0ad7d74dff9e73d5892870cf162c2b61c96a3f316f6ffedf2b9bd84c09c080bf0e330d32a9f567d4f3e8bec14964afe2773e010177236f438046e3f06b87624
7
+ data.tar.gz: 015a6dd81b525bcd59cc52360c5d4989542646af72e84e48a350719cb4b88751e6def5ab201b81de9c256c7036e9894dc5ca03ae30829e7a8d4ab2de0af47aaa
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # SnapCrawl - crawl a website and take screenshots
2
2
 
3
- `snapcrawl` is a command line utility for crawling a website and saving
4
- screenshots. It is using [Runfile](https://github.com/DannyBen/runfile).
3
+ SnapCrawl is a command line utility for crawling a website and saving
4
+ screenshots.
5
5
 
6
6
  ## Features
7
7
 
@@ -18,6 +18,39 @@ screenshots. It is using [Runfile](https://github.com/DannyBen/runfile).
18
18
 
19
19
  ## Usage
20
20
 
21
- $ snapcrawl --help
22
-
23
-
21
+ $ snapcrawl --help
22
+
23
+ Snapcrawl
24
+
25
+ Usage:
26
+ snapcrawl go <url> [options]
27
+ snapcrawl -h | --help
28
+ snapcrawl -v | --version
29
+
30
+ Options:
31
+ -f --folder <path> Where to save screenshots [default: snaps]
32
+ -a --age <n> Number of seconds to consider screenshots fresh
33
+ [default: 86400]
34
+ -d --depth <n> Number of levels to crawl [default: 1]
35
+ -W --width <n> Screen width in pixels [default: 1280]
36
+ -H --height <n> Screen height in pixels. Use 0 to capture the full
37
+ page [default: 0]
38
+ -s --selector <s> CSS selector to capture
39
+ -o --only <regex> Include only URLs that match <regex>
40
+ -h --help Show this screen
41
+ -v --version Show version
42
+
43
+ Examples:
44
+ snapcrawl go example.com
45
+ snapcrawl go example.com -d2 -fscreens
46
+ snapcrawl go example.com -d2 > out.txt 2> err.txt &
47
+ snapcrawl go example.com -W360 -H480
48
+ snapcrawl go example.com --selector "#main-content"
49
+ snapcrawl go example.com --only "products|collections"
50
+
51
+ ---
52
+
53
+ ## Notes
54
+
55
+ 1. If a URL cannot be found, SnapCrawl will report to stderr.
56
+ You can create a report by running `snapcrawl go example.com 2> err.txt`
@@ -36,6 +36,8 @@ module Snapcrawl
36
36
 
37
37
  def crawl(url, opts={})
38
38
  defaults = {
39
+ width: 1280,
40
+ height: 0,
39
41
  depth: 1,
40
42
  age: 86400,
41
43
  dir: 'snaps',
@@ -60,7 +62,11 @@ module Snapcrawl
60
62
  next if @done.include? url
61
63
  @done << url
62
64
  say "\n!txtgrn!-----> Visit: #{url}"
63
- snap url
65
+ if @opts.only and url !~ /#{@opts.only}/
66
+ say " Snap: Skipping. Does not match regex"
67
+ else
68
+ snap url
69
+ end
64
70
  new_urls += extract_urls_from url
65
71
  end
66
72
  new_urls
@@ -84,9 +90,8 @@ module Snapcrawl
84
90
  fetch_opts = {}
85
91
  fetch_opts[:output] = image_path_for(url)
86
92
  fetch_opts[:width] = @opts.width
87
- fetch_opts[:height] = @opts.height if @opts.height
88
- # :height => 768,
89
- # :div => '.header', # selector for a specific element to take screenshot of
93
+ fetch_opts[:height] = @opts.height if @opts.height > 0
94
+ fetch_opts[:div] = @opts.selector if @opts.selector
90
95
  # :top => 0, :left => 0, :width => 100, :height => 100 # dimensions for a specific area
91
96
 
92
97
  screenshot = f.fetch fetch_opts
@@ -191,9 +196,14 @@ module Snapcrawl
191
196
 
192
197
  def opts_from_args(args)
193
198
  opts = {}
194
- opts[:folder] = args['--folder'] if args['--folder']
195
- opts[:age] = args['--age'].to_i if args['--age']
196
- opts[:depth] = args['--depth'].to_i if args['--depth']
199
+ %w[folder selector only].each do |opt|
200
+ opts[opt.to_sym] = args["--#{opt}"] if args["--#{opt}"]
201
+ end
202
+
203
+ %w[age depth width height].each do |opt|
204
+ opts[opt.to_sym] = args["--#{opt}"].to_i if args["--#{opt}"]
205
+ end
206
+
197
207
  opts
198
208
  end
199
209
  end
@@ -10,6 +10,18 @@ Options:
10
10
  -a --age <n> Number of seconds to consider screenshots fresh
11
11
  [default: 86400]
12
12
  -d --depth <n> Number of levels to crawl [default: 1]
13
+ -W --width <n> Screen width in pixels [default: 1280]
14
+ -H --height <n> Screen height in pixels. Use 0 to capture the full
15
+ page [default: 0]
16
+ -s --selector <s> CSS selector to capture
17
+ -o --only <regex> Include only URLs that match <regex>
13
18
  -h --help Show this screen
14
19
  -v --version Show version
15
20
 
21
+ Examples:
22
+ snapcrawl go example.com
23
+ snapcrawl go example.com -d2 -fscreens
24
+ snapcrawl go example.com -d2 > out.txt 2> err.txt &
25
+ snapcrawl go example.com -W360 -H480
26
+ snapcrawl go example.com --selector "#main-content"
27
+ snapcrawl go example.com --only "products|collections"
@@ -1,3 +1,3 @@
1
1
  module Snapcrawl
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snapcrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Ben Shitrit