snapcrawl 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 64c0d3c62258c70417520749bdd71b7d33c8d920ec94f2dfb0aaed961310563b
4
- data.tar.gz: 72ece0ddb0f5f9f2b578d4c369814507bb1b2343170d799eb27497a3c937475a
3
+ metadata.gz: 327da92cb63a1a8e6f58e58a4806d4e10b2cfa491960306544165be6423c9b3e
4
+ data.tar.gz: c6f2056f9ca5614a76bce68bdf2f001668ab626764bce89cf1b1bc4a8f68f833
5
5
  SHA512:
6
- metadata.gz: b60088e4dab7944269f76344094a6f80e6b304c0591fff2c3e9aa0e18ffac89b2a9927bf2759e7a17f9944dd925b01b342f46ec6587754ac206bf614741be0c3
7
- data.tar.gz: ae1b3dda9b0339440b9f37727503ad87f1f5a71d2eadc3283250f481b9391f1d530213a05a0f5ce0adc7a1f9bd183c049c5cfc7a7fcd1b06d5286c312d8d0747
6
+ metadata.gz: 91b00e39fbf5943501cc7f67eb1c684811d10ae3f5acc0263a3a6259ae64ad51d01d89aba75576990d7517a07a53660ec1f63d13adbbaf5bdd6380b7d9dd8050
7
+ data.tar.gz: 318d7c11aa087a20a8f5c0dd922e9f11f2eeca1be7c165bfa04d54d775d0688e3d3532434a4987cca039c362bb3817542331ce135f8ec686f0178d6e5fa343e5
data/lib/snapcrawl.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'snapcrawl/version'
2
+ require 'snapcrawl/exceptions'
2
3
  require 'snapcrawl/crawler'
3
4
 
4
5
  require 'byebug' if ENV['BYEBUG']
5
6
 
6
- self.extend Snapcrawl
@@ -5,16 +5,12 @@ require 'httparty'
5
5
  require 'nokogiri'
6
6
  require 'ostruct'
7
7
  require 'pstore'
8
- require 'uri'
9
8
  require 'addressable/uri'
10
9
  require 'webshot'
11
10
 
12
11
  module Snapcrawl
13
12
  include Colsole
14
13
 
15
- class MissingPhantomJS < StandardError; end
16
- class MissingImageMagick < StandardError; end
17
-
18
14
  class Crawler
19
15
  include Singleton
20
16
 
@@ -206,11 +202,7 @@ module Snapcrawl
206
202
 
207
203
  # Convert relative links to absolute
208
204
  begin
209
- link = URI.join( @opts.base, link ).to_s.dup
210
- rescue URI::InvalidURIError
211
- escaped_link = Addressable::URI.encode link
212
- warnings << { link: link, message: "Using escaped link: #{escaped_link}" }
213
- link = URI.join( @opts.base, escaped_link ).to_s.dup
205
+ link = Addressable::URI.join( @opts.base, link ).to_s.dup
214
206
  rescue => e
215
207
  warnings << { link: link, message: "#{e.class} #{e.message}" }
216
208
  next
@@ -226,11 +218,11 @@ module Snapcrawl
226
218
  end
227
219
 
228
220
  def doc
229
- @doc ||= File.read template 'docopt.txt'
221
+ @doc ||= File.read docopt
230
222
  end
231
223
 
232
- def template(file)
233
- File.expand_path("../templates/#{file}", __FILE__)
224
+ def docopt
225
+ File.expand_path "docopt.txt", __dir__
234
226
  end
235
227
 
236
228
  def opts_from_args(args)
@@ -0,0 +1,4 @@
1
+ module Snapcrawl
2
+ class MissingPhantomJS < StandardError; end
3
+ class MissingImageMagick < StandardError; end
4
+ end
@@ -1,3 +1,3 @@
1
1
  module Snapcrawl
2
- VERSION = "0.4.1"
2
+ VERSION = "0.4.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snapcrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Ben Shitrit
@@ -112,7 +112,7 @@ files:
112
112
  - bin/snapcrawl
113
113
  - lib/snapcrawl.rb
114
114
  - lib/snapcrawl/crawler.rb
115
- - lib/snapcrawl/templates/docopt.txt
115
+ - lib/snapcrawl/exceptions.rb
116
116
  - lib/snapcrawl/version.rb
117
117
  homepage: https://github.com/DannyBen/snapcrawl
118
118
  licenses:
@@ -1,48 +0,0 @@
1
- Snapcrawl
2
-
3
- Usage:
4
- snapcrawl URL [options]
5
- snapcrawl -h | --help
6
- snapcrawl -v | --version
7
-
8
- Options:
9
- -f, --folder PATH
10
- Where to save screenshots [default: snaps]
11
-
12
- -n, --name TEMPLATE
13
- Filename template. Include the string '%{url}' anywhere in the name to
14
- use the captured URL in the filename [default: %{url}]
15
-
16
- -a, --age SECONDS
17
- Number of seconds to consider screenshots fresh [default: 86400]
18
-
19
- -d, --depth LEVELS
20
- Number of levels to crawl [default: 1]
21
-
22
- -W, --width PIXELS
23
- Screen width in pixels [default: 1280]
24
-
25
- -H, --height PIXELS
26
- Screen height in pixels. Use 0 to capture the full page [default: 0]
27
-
28
- -s, --selector SELECTOR
29
- CSS selector to capture
30
-
31
- -o, --only REGEX
32
- Include only URLs that match REGEX
33
-
34
- -h, --help
35
- Show this screen
36
-
37
- -v, --version
38
- Show version number
39
-
40
- Examples:
41
- snapcrawl example.com
42
- snapcrawl example.com -d2 -fscreens
43
- snapcrawl example.com -d2 > out.txt 2> err.txt &
44
- snapcrawl example.com -W360 -H480
45
- snapcrawl example.com --selector "#main-content"
46
- snapcrawl example.com --only "products|collections"
47
- snapcrawl example.com --name "screenshot-%{url}"
48
- snapcrawl example.com --name "`date +%Y%m%d`_%{url}"