snapcrawl 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 64c0d3c62258c70417520749bdd71b7d33c8d920ec94f2dfb0aaed961310563b
4
- data.tar.gz: 72ece0ddb0f5f9f2b578d4c369814507bb1b2343170d799eb27497a3c937475a
3
+ metadata.gz: 327da92cb63a1a8e6f58e58a4806d4e10b2cfa491960306544165be6423c9b3e
4
+ data.tar.gz: c6f2056f9ca5614a76bce68bdf2f001668ab626764bce89cf1b1bc4a8f68f833
5
5
  SHA512:
6
- metadata.gz: b60088e4dab7944269f76344094a6f80e6b304c0591fff2c3e9aa0e18ffac89b2a9927bf2759e7a17f9944dd925b01b342f46ec6587754ac206bf614741be0c3
7
- data.tar.gz: ae1b3dda9b0339440b9f37727503ad87f1f5a71d2eadc3283250f481b9391f1d530213a05a0f5ce0adc7a1f9bd183c049c5cfc7a7fcd1b06d5286c312d8d0747
6
+ metadata.gz: 91b00e39fbf5943501cc7f67eb1c684811d10ae3f5acc0263a3a6259ae64ad51d01d89aba75576990d7517a07a53660ec1f63d13adbbaf5bdd6380b7d9dd8050
7
+ data.tar.gz: 318d7c11aa087a20a8f5c0dd922e9f11f2eeca1be7c165bfa04d54d775d0688e3d3532434a4987cca039c362bb3817542331ce135f8ec686f0178d6e5fa343e5
data/lib/snapcrawl.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'snapcrawl/version'
2
+ require 'snapcrawl/exceptions'
2
3
  require 'snapcrawl/crawler'
3
4
 
4
5
  require 'byebug' if ENV['BYEBUG']
5
6
 
6
- self.extend Snapcrawl
@@ -5,16 +5,12 @@ require 'httparty'
5
5
  require 'nokogiri'
6
6
  require 'ostruct'
7
7
  require 'pstore'
8
- require 'uri'
9
8
  require 'addressable/uri'
10
9
  require 'webshot'
11
10
 
12
11
  module Snapcrawl
13
12
  include Colsole
14
13
 
15
- class MissingPhantomJS < StandardError; end
16
- class MissingImageMagick < StandardError; end
17
-
18
14
  class Crawler
19
15
  include Singleton
20
16
 
@@ -206,11 +202,7 @@ module Snapcrawl
206
202
 
207
203
  # Convert relative links to absolute
208
204
  begin
209
- link = URI.join( @opts.base, link ).to_s.dup
210
- rescue URI::InvalidURIError
211
- escaped_link = Addressable::URI.encode link
212
- warnings << { link: link, message: "Using escaped link: #{escaped_link}" }
213
- link = URI.join( @opts.base, escaped_link ).to_s.dup
205
+ link = Addressable::URI.join( @opts.base, link ).to_s.dup
214
206
  rescue => e
215
207
  warnings << { link: link, message: "#{e.class} #{e.message}" }
216
208
  next
@@ -226,11 +218,11 @@ module Snapcrawl
226
218
  end
227
219
 
228
220
  def doc
229
- @doc ||= File.read template 'docopt.txt'
221
+ @doc ||= File.read docopt
230
222
  end
231
223
 
232
- def template(file)
233
- File.expand_path("../templates/#{file}", __FILE__)
224
+ def docopt
225
+ File.expand_path "docopt.txt", __dir__
234
226
  end
235
227
 
236
228
  def opts_from_args(args)
@@ -0,0 +1,4 @@
1
+ module Snapcrawl
2
+ class MissingPhantomJS < StandardError; end
3
+ class MissingImageMagick < StandardError; end
4
+ end
@@ -1,3 +1,3 @@
1
1
  module Snapcrawl
2
- VERSION = "0.4.1"
2
+ VERSION = "0.4.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snapcrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Ben Shitrit
@@ -112,7 +112,7 @@ files:
112
112
  - bin/snapcrawl
113
113
  - lib/snapcrawl.rb
114
114
  - lib/snapcrawl/crawler.rb
115
- - lib/snapcrawl/templates/docopt.txt
115
+ - lib/snapcrawl/exceptions.rb
116
116
  - lib/snapcrawl/version.rb
117
117
  homepage: https://github.com/DannyBen/snapcrawl
118
118
  licenses:
@@ -1,48 +0,0 @@
1
- Snapcrawl
2
-
3
- Usage:
4
- snapcrawl URL [options]
5
- snapcrawl -h | --help
6
- snapcrawl -v | --version
7
-
8
- Options:
9
- -f, --folder PATH
10
- Where to save screenshots [default: snaps]
11
-
12
- -n, --name TEMPLATE
13
- Filename template. Include the string '%{url}' anywhere in the name to
14
- use the captured URL in the filename [default: %{url}]
15
-
16
- -a, --age SECONDS
17
- Number of seconds to consider screenshots fresh [default: 86400]
18
-
19
- -d, --depth LEVELS
20
- Number of levels to crawl [default: 1]
21
-
22
- -W, --width PIXELS
23
- Screen width in pixels [default: 1280]
24
-
25
- -H, --height PIXELS
26
- Screen height in pixels. Use 0 to capture the full page [default: 0]
27
-
28
- -s, --selector SELECTOR
29
- CSS selector to capture
30
-
31
- -o, --only REGEX
32
- Include only URLs that match REGEX
33
-
34
- -h, --help
35
- Show this screen
36
-
37
- -v, --version
38
- Show version number
39
-
40
- Examples:
41
- snapcrawl example.com
42
- snapcrawl example.com -d2 -fscreens
43
- snapcrawl example.com -d2 > out.txt 2> err.txt &
44
- snapcrawl example.com -W360 -H480
45
- snapcrawl example.com --selector "#main-content"
46
- snapcrawl example.com --only "products|collections"
47
- snapcrawl example.com --name "screenshot-%{url}"
48
- snapcrawl example.com --name "`date +%Y%m%d`_%{url}"