RubyGems - snapcrawl - Versions diffs - 0.2.0 → 0.2.1 - Mend

snapcrawl 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/README.md +38 -5
data/lib/snapcrawl/crawler.rb +17 -7
data/lib/snapcrawl/templates/docopt.txt +12 -0
data/lib/snapcrawl/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 8de37399949606ebc180cc7a80bf3ad91869e064
-  data.tar.gz: 0b56bd7207f4b0f0e44ecdfcffe890d50bf1d34c
+  metadata.gz: 7f8bfcb13d6d049104a97fe95b4f20527c9e93f9
+  data.tar.gz: 5d39a2e40270cbe8ddfd5e0863016a665059e747
 SHA512:
-  metadata.gz: 1ef25f70c86ce8f8d626b43d080d4f4048cae9a421111a3fbd232ebc6f26d9d4b67d69a90bbe306b3ff995c136ea8a4252b693dac6e1f2f82c1ff0d8a7e379e9
-  data.tar.gz: 9fb48c4490dc64c14284cbfd2bd4809ec1c6de67d535a6e4175aed4140ecc7366ebe92c791551668c08ef5b30a0b9d4f9ff6eeebee83af715d6910b9ab9f9df4
+  metadata.gz: c0ad7d74dff9e73d5892870cf162c2b61c96a3f316f6ffedf2b9bd84c09c080bf0e330d32a9f567d4f3e8bec14964afe2773e010177236f438046e3f06b87624
+  data.tar.gz: 015a6dd81b525bcd59cc52360c5d4989542646af72e84e48a350719cb4b88751e6def5ab201b81de9c256c7036e9894dc5ca03ae30829e7a8d4ab2de0af47aaa

data/README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 # SnapCrawl - crawl a website and take screenshots
-`snapcrawl` is a command line utility for crawling a website and saving
-screenshots. It is using [Runfile](https://github.com/DannyBen/runfile).
+SnapCrawl is a command line utility for crawling a website and saving
+screenshots.
 ## Features
@@ -18,6 +18,39 @@ screenshots. It is using [Runfile](https://github.com/DannyBen/runfile).
 ## Usage
-	$ snapcrawl --help
+	$ snapcrawl --help
+    Snapcrawl
+    Usage:
+      snapcrawl go <url> [options]
+      snapcrawl -h | --help
+      snapcrawl -v | --version
+    Options:
+      -f --folder <path>     Where to save screenshots [default: snaps]
+      -a --age <n>           Number of seconds to consider screenshots fresh
+                             [default: 86400]
+      -d --depth <n>         Number of levels to crawl [default: 1]
+      -W --width <n>         Screen width in pixels [default: 1280]
+      -H --height <n>        Screen height in pixels. Use 0 to capture the full
+                             page [default: 0]
+      -s --selector <s>      CSS selector to capture
+      -o --only <regex>      Include only URLs that match <regex>
+      -h --help              Show this screen
+      -v --version           Show version
+    Examples:
+      snapcrawl go example.com
+      snapcrawl go example.com -d2 -fscreens
+      snapcrawl go example.com -d2 > out.txt 2> err.txt &
+      snapcrawl go example.com -W360 -H480
+      snapcrawl go example.com --selector "#main-content"
+      snapcrawl go example.com --only "products|collections"
+---
+## Notes
+1. If a URL cannot be found, SnapCrawl will report to stderr.
+   You can create a report by running `snapcrawl go example.com 2> err.txt`

data/lib/snapcrawl/crawler.rb CHANGED Viewed

@@ -36,6 +36,8 @@ module Snapcrawl
     def crawl(url, opts={})
       defaults = {
+        width: 1280,
+        height: 0,
         depth: 1,
         age: 86400,
         dir: 'snaps',
@@ -60,7 +62,11 @@ module Snapcrawl
         next if @done.include? url
         @done << url
         say "\n!txtgrn!-----> Visit: #{url}"
-        snap url
+        if @opts.only and url !~ /#{@opts.only}/
+          say "       Snap:  Skipping. Does not match regex"
+        else
+          snap url
+        end
         new_urls += extract_urls_from url
       end
       new_urls
@@ -84,9 +90,8 @@ module Snapcrawl
       fetch_opts = {}
       fetch_opts[:output] = image_path_for(url)
       fetch_opts[:width]  = @opts.width
-      fetch_opts[:height] = @opts.height if @opts.height
-      # :height => 768,
-      # :div => '.header', # selector for a specific element to take screenshot of
+      fetch_opts[:height] = @opts.height if @opts.height > 0
+      fetch_opts[:div]    = @opts.selector if @opts.selector
       # :top => 0, :left => 0, :width => 100, :height => 100 # dimensions for a specific area
       screenshot = f.fetch fetch_opts
@@ -191,9 +196,14 @@ module Snapcrawl
     def opts_from_args(args)
       opts = {}
-      opts[:folder] = args['--folder'] if args['--folder']
-      opts[:age] = args['--age'].to_i if args['--age']
-      opts[:depth] = args['--depth'].to_i if args['--depth']
+      %w[folder selector only].each do |opt|
+        opts[opt.to_sym] = args["--#{opt}"] if args["--#{opt}"]
+      end
+      %w[age depth width height].each do |opt|
+        opts[opt.to_sym] = args["--#{opt}"].to_i if args["--#{opt}"]
+      end
       opts
     end
   end

data/lib/snapcrawl/templates/docopt.txt CHANGED Viewed

@@ -10,6 +10,18 @@ Options:
   -a --age <n>           Number of seconds to consider screenshots fresh
                          [default: 86400]
   -d --depth <n>         Number of levels to crawl [default: 1]
+  -W --width <n>         Screen width in pixels [default: 1280]
+  -H --height <n>        Screen height in pixels. Use 0 to capture the full
+                         page [default: 0]
+  -s --selector <s>      CSS selector to capture
+  -o --only <regex>      Include only URLs that match <regex>
   -h --help              Show this screen
   -v --version           Show version
+Examples:
+  snapcrawl go example.com
+  snapcrawl go example.com -d2 -fscreens
+  snapcrawl go example.com -d2 > out.txt 2> err.txt &
+  snapcrawl go example.com -W360 -H480
+  snapcrawl go example.com --selector "#main-content"
+  snapcrawl go example.com --only "products|collections"

data/lib/snapcrawl/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Snapcrawl
-  VERSION = "0.2.0"
+  VERSION = "0.2.1"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: snapcrawl
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.2.1
 platform: ruby
 authors:
 - Danny Ben Shitrit