wraith 1.2.2 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6737fd591c334bd813dbc07970efe9bea443d762
4
- data.tar.gz: c1191e025c5fb12056dd8fa15b45a4be5c8027a9
3
+ metadata.gz: 0ad5c9e4eecd6cbe1a694bc29c62410ff45b7c86
4
+ data.tar.gz: c84b6e122617e02962a281ad885717342c919d35
5
5
  SHA512:
6
- metadata.gz: a633654b6891f6803e074dffdd3ce37be5b55be336dbe33dfc0872dd16ec07d64878f779a1667e7b67eb864f5447d6eb3ca0e29f44f9b492a74bd2091653e8a2
7
- data.tar.gz: 80cd0a7d9f9ed8216ab2e9b368f8c44c1e562a7a06dead497096a4f4a8e7d3e2ea27e007c64f02e2585c4262cf4e8e243c799277720637577fcd44f7de967e5f
6
+ metadata.gz: 5d415de27039b977ba067eac6a0f17edacdd301ea5a43e5058454b8b8b35151f1a1bc9cc0b0216695374ff9b2d11bce6afc90ffe11299c5ca14af8818ade9917
7
+ data.tar.gz: 68d6c30abf6631ba704fe705290b9ca909d95859b8268e0886ef19773c991fdb90aa36972fba343ad3a7089729ca41f3e45365005b4a3c90ad8110f81d642419
data/configs/config.yaml CHANGED
@@ -44,3 +44,17 @@ fuzz: '20%'
44
44
  #Set the number of days to keep the site spider file
45
45
  spider_days:
46
46
  - 10
47
+
48
+ #A list of URLs to skip when spidering. Ruby regular expressions can be
49
+ #used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
50
+ #http://www.yaml.org/YAML_for_ruby.html#regexps
51
+ #
52
+ # spider_skips:
53
+ # - /foo/bar.html # Matches /foo/bar.html explcitly
54
+ # - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
55
+
56
+ #Choose how results are displayed, by default alphanumeric. Different screen widths are always grouped.
57
+ #alphanumeric - all paths (with, and without, a difference) are shown, sorted by path
58
+ #diffs_first - all paths (with, and without, a difference) are shown, sorted by difference size (largest first)
59
+ #diffs_only - only paths with a difference are shown, sorted by difference size (largest first)
60
+ #mode: diffs_first
@@ -41,3 +41,11 @@ fuzz: '20%'
41
41
  #Set the number of days to keep the site spider file
42
42
  spider_days:
43
43
  - 10
44
+
45
+ #A list of URLs to skip when spidering. Ruby regular expressions can be
46
+ #used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
47
+ #http://www.yaml.org/YAML_for_ruby.html#regexps
48
+ #
49
+ # spider_skips:
50
+ # - /foo/bar.html # Matches /foo/bar.html explcitly
51
+ # - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
@@ -44,3 +44,10 @@ fuzz: '20%'
44
44
  #Set the number of days to keep the site spider file
45
45
  spider_days:
46
46
  - 10
47
+ #A list of URLs to skip when spidering. Ruby regular expressions can be
48
+ #used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
49
+ #http://www.yaml.org/YAML_for_ruby.html#regexps
50
+ #
51
+ # spider_skips:
52
+ # - /foo/bar.html # Matches /foo/bar.html explcitly
53
+ # - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
@@ -16,7 +16,6 @@ class Wraith::CompareImages
16
16
  diff = base.gsub(/([a-z0-9]+).png$/, 'diff.png')
17
17
  info = base.gsub(/([a-z0-9]+).png$/, 'data.txt')
18
18
  compare_task(base, compare, diff, info)
19
- Dir.glob("#{wraith.directory}/*/*.txt").map { |f| "\n#{f}\n#{File.read(f)}" }
20
19
  puts 'Saved diff'
21
20
  end
22
21
  end
data/lib/wraith/folder.rb CHANGED
@@ -33,7 +33,7 @@ class Wraith::FolderManager
33
33
  spider_paths.each do |folder_label, path|
34
34
  unless path
35
35
  path = folder_label
36
- folder_label = path.gsub('/', '_')
36
+ folder_label = path.gsub('/', '__')
37
37
  end
38
38
 
39
39
  FileUtils.mkdir_p("#{dir}/thumbnails/#{folder_label}")
@@ -41,4 +41,20 @@ class Wraith::FolderManager
41
41
  end
42
42
  puts 'Creating Folders'
43
43
  end
44
+
45
+ # Tidy up the shots folder, removing uncessary files
46
+ #
47
+ def tidy_shots_folder dirs
48
+ if wraith.mode == 'diffs_only'
49
+ dirs.each do |a, b|
50
+ # If we are running in "diffs_only mode, and none of the variants show a difference
51
+ # we remove the file from the shots folder
52
+ if b.none? {|k, v| v[:data] > 0}
53
+ FileUtils.rm_rf("#{wraith.directory}/#{a}")
54
+ dirs.delete(a)
55
+ end
56
+ end
57
+ end
58
+ end
59
+
44
60
  end
@@ -15,6 +15,7 @@ class Wraith::GalleryGenerator
15
15
  def initialize(config)
16
16
  @wraith = Wraith::Wraith.new(config)
17
17
  @location = wraith.directory
18
+ @folder_manager = Wraith::FolderManager.new(config)
18
19
  end
19
20
 
20
21
  def parse_directories(dirname)
@@ -55,7 +56,7 @@ class Wraith::GalleryGenerator
55
56
  filename: filepath, thumb: thumbnail
56
57
  }
57
58
  when 'data'
58
- size_dict[:data] = File.read("#{dirname}/#{filepath}")
59
+ size_dict[:data] = File.read("#{dirname}/#{filepath}").to_f
59
60
  else
60
61
  size_dict[:variants] << {
61
62
  name: group,
@@ -68,7 +69,14 @@ class Wraith::GalleryGenerator
68
69
  end
69
70
  end
70
71
  end
71
- @dirs
72
+ @folder_manager.tidy_shots_folder(@dirs)
73
+ if [ 'diffs_only', 'diffs_first' ].include?(wraith.mode)
74
+ @sorted = @dirs.sort_by { |category, sizes| -1 * sizes.max_by { |size, dict| dict[:data]}[1][:data] }
75
+ else
76
+ @sorted = @dirs.sort_by { |category, sizes| category }
77
+ end
78
+ # The sort has made this into an enumerable, convert it back to a Hash
79
+ Hash[@sorted]
72
80
  end
73
81
 
74
82
  def generate_html(location, directories, template, destination, path)
@@ -20,17 +20,17 @@
20
20
  <div class="panel">
21
21
  <div class="panel-heading">Screenshots:</div>
22
22
  <ul class="list-group list-group-flush">
23
- <% directories.keys.sort.each do |dir| %>
24
- <li class="list-group-item"><a href="#<%=path%><%=dir%>"><%=dir%></a></li>
23
+ <% directories.keys.each do |dir| %>
24
+ <li class="list-group-item"><a href="#<%=path%><%=dir%>"><%=dir.gsub('__', '/')%></a></li>
25
25
  <% end %>
26
26
  </ul>
27
27
  </div>
28
28
  </div>
29
29
  <div class="col-lg-10">
30
- <% directories.to_a.sort.each do |dir, sizes| %>
30
+ <% directories.each do |dir, sizes| %>
31
31
  <div class="row">
32
32
  <a name="<%= dir %>"></a>
33
- <h2><%= dir %></h2>
33
+ <h2><%= dir.gsub('__', '/') %></h2>
34
34
  </div>
35
35
  <% sizes.to_a.sort.each do |size, files| %>
36
36
  <div class="row">
@@ -54,7 +54,7 @@ class Wraith::SaveImages
54
54
  check_paths.each do |label, path|
55
55
  unless path
56
56
  path = label
57
- label = path.gsub('/', '_')
57
+ label = path.gsub('/', '__')
58
58
  end
59
59
 
60
60
  base_url = base_urls(path)
data/lib/wraith/spider.rb CHANGED
@@ -1,64 +1,108 @@
1
1
  require 'wraith'
2
2
  require 'anemone'
3
+ require 'nokogiri'
3
4
  require 'uri'
4
5
 
5
6
  class Wraith::Spidering
6
- attr_reader :wraith
7
7
 
8
8
  def initialize(config)
9
9
  @wraith = Wraith::Wraith.new(config)
10
10
  end
11
11
 
12
12
  def check_for_paths
13
- unless wraith.paths
14
- puts 'no paths defined'
15
- spider
13
+ if @wraith.paths.nil?
14
+ unless @wraith.sitemap.nil?
15
+ puts 'no paths defined in config, loading paths from sitemap'
16
+ spider = Wraith::Sitemap.new(@wraith)
17
+ else
18
+ puts 'no paths defined in config, crawling from site root'
19
+ spider = Wraith::Crawler.new(@wraith)
20
+ end
21
+ spider.determine_paths
16
22
  end
17
23
  end
24
+ end
25
+
26
+ class Wraith::Spider
27
+
28
+ def initialize(wraith)
29
+ @wraith = wraith
30
+ @paths = {}
31
+ end
32
+
33
+ def determine_paths
34
+ spider
35
+ write_file
36
+ end
37
+
38
+ private
39
+
40
+ def write_file
41
+ File.open(@wraith.spider_file, 'w+') { |file| file.write(@paths) }
42
+ end
43
+
44
+ def add_path(path)
45
+ @paths[path == '/' ? 'home' : path.gsub('/', '__').chomp('__').downcase] = path.downcase
46
+ end
18
47
 
19
48
  def spider
20
- if File.exist?(wraith.spider_file)
21
- check_file
22
- else
23
- puts 'creating new spider file'
24
- spider_base_domain
25
- end
49
+
26
50
  end
27
51
 
28
- def check_file
29
- if (Time.now - File.ctime(wraith.spider_file)) / (24 * 3600) < wraith.spider_days[0]
52
+ end
53
+
54
+ class Wraith::Crawler < Wraith::Spider
55
+
56
+ EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
57
+ gz jar js css dtd xsd ico raw mp3 mp4 \
58
+ wav wmv ape aac ac3 wma aiff mpg mpeg \
59
+ avi mov ogg mkv mka asx asf mp2 m1v \
60
+ m3u f4v pdf doc xls ppt pps bin exe rss xml)
61
+
62
+ def spider
63
+ if File.exist?(@wraith.spider_file) && modified_since(@wraith.spider_file, @wraith.spider_days[0])
30
64
  puts 'using existing spider file'
31
65
  else
32
66
  puts 'creating new spider file'
33
- spider_base_domain
67
+ spider_list = []
68
+ Anemone.crawl(@wraith.base_domain) do |anemone|
69
+ anemone.skip_links_like(/\.#{EXT.join('|')}$/)
70
+ # Add user specified skips
71
+ anemone.skip_links_like(@wraith.spider_skips)
72
+ anemone.on_every_page { |page| add_path(page.url.path) }
73
+ end
34
74
  end
35
75
  end
36
76
 
37
- def spider_base_domain
38
- @spider_list = []
39
- crawl_url = wraith.base_domain
40
- ext = %w(flv swf png jpg gif asx zip rar tar 7z \
41
- gz jar js css dtd xsd ico raw mp3 mp4 \
42
- wav wmv ape aac ac3 wma aiff mpg mpeg \
43
- avi mov ogg mkv mka asx asf mp2 m1v \
44
- m3u f4v pdf doc xls ppt pps bin exe rss xml)
45
-
46
- Anemone.crawl(crawl_url) do |anemone|
47
- anemone.skip_links_like(/\.#{ext.join('|')}$/)
48
- anemone.on_every_page { |page| @spider_list << page.url.path }
49
- end
50
- create_spider_file
77
+ def modified_since(file, since)
78
+ (Time.now - File.ctime(file)) / (24 * 3600) < since
51
79
  end
52
80
 
53
- def create_spider_file
54
- @i = 0
55
- spider = Hash.new { |h, k| h[k] = [] }
56
- while @i < @spider_list.length
57
- lab = @spider_list[@i].to_s.split('/').last
58
- lab = 'home' if @spider_list[@i] == '/'
59
- spider[lab] = @spider_list[@i]
60
- @i += 1
81
+ end
82
+
83
+ class Wraith::Sitemap < Wraith::Spider
84
+
85
+ def spider
86
+ unless @wraith.sitemap.nil?
87
+ puts "reading sitemap.xml from #{@wraith.sitemap}"
88
+ if @wraith.sitemap =~ URI::regexp
89
+ sitemap = Nokogiri::XML(open(@wraith.sitemap))
90
+ else
91
+ sitemap = Nokogiri::XML(File.open(@wraith.sitemap))
92
+ end
93
+ urls = {}
94
+ sitemap.css('loc').each do |loc|
95
+ path = loc.content
96
+ # Allow use of either domain in the sitemap.xml
97
+ @wraith.domains.each do |k, v|
98
+ path.sub!(v, '')
99
+ end
100
+ if @wraith.spider_skips.nil? || @wraith.spider_skips.none? { |regex| regex.match(path) }
101
+ add_path(path)
102
+ end
103
+ end
61
104
  end
62
- File.open(wraith.spider_file, 'w+') { |file| file.write(spider) }
63
105
  end
106
+
64
107
  end
108
+
@@ -1,3 +1,3 @@
1
1
  module Wraith
2
- VERSION = '1.2.2'
2
+ VERSION = '1.3.0'
3
3
  end
data/lib/wraith/wraith.rb CHANGED
@@ -47,6 +47,14 @@ class Wraith::Wraith
47
47
  @config['spider_days']
48
48
  end
49
49
 
50
+ def sitemap
51
+ @config['sitemap']
52
+ end
53
+
54
+ def spider_skips
55
+ @config['spider_skips']
56
+ end
57
+
50
58
  def paths
51
59
  @config['paths']
52
60
  end
@@ -59,13 +67,21 @@ class Wraith::Wraith
59
67
  @config['fuzz']
60
68
  end
61
69
 
70
+ def mode
71
+ if ['diffs_only', 'diffs_first', 'alphanumeric'].include?(@config['mode'])
72
+ @config['mode']
73
+ else
74
+ 'alphanumeric'
75
+ end
76
+ end
77
+
62
78
  def capture_page_image(browser, url, width, file_name)
63
79
  puts `"#{browser}" #{@config['phantomjs_options']} "#{snap_file}" "#{url}" "#{width}" "#{file_name}"`
64
80
  end
65
81
 
66
82
  def self.crop_images(crop, height)
67
83
  # For compatibility with windows file structures switch commenting on the following 2 lines
68
- puts `convert #{crop} -background none -extent 0x#{height} #{crop}`
84
+ `convert #{crop} -background none -extent 0x#{height} #{crop}`
69
85
  # puts `convert #{crop.gsub('/', '\\')} -background none -extent 0x#{height} #{crop.gsub('/', '\\')}`
70
86
  end
71
87
 
@@ -75,7 +91,7 @@ class Wraith::Wraith
75
91
 
76
92
  def set_image_width(image, width)
77
93
  # For compatibility with windows file structures switch commenting on the following 2 lines
78
- puts `convert #{image} -background none -extent #{width}x0 #{image}`
94
+ `convert #{image} -background none -extent #{width}x0 #{image}`
79
95
  # puts `convert #{image.gsub('/', '\\')} -background none -extent #{width}x0 #{image.gsub('/', '\\')}`
80
96
  end
81
97
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wraith
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.2
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dave Blooman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-06-26 00:00:00.000000000 Z
12
+ date: 2014-07-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pry