wraith 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6737fd591c334bd813dbc07970efe9bea443d762
4
- data.tar.gz: c1191e025c5fb12056dd8fa15b45a4be5c8027a9
3
+ metadata.gz: 0ad5c9e4eecd6cbe1a694bc29c62410ff45b7c86
4
+ data.tar.gz: c84b6e122617e02962a281ad885717342c919d35
5
5
  SHA512:
6
- metadata.gz: a633654b6891f6803e074dffdd3ce37be5b55be336dbe33dfc0872dd16ec07d64878f779a1667e7b67eb864f5447d6eb3ca0e29f44f9b492a74bd2091653e8a2
7
- data.tar.gz: 80cd0a7d9f9ed8216ab2e9b368f8c44c1e562a7a06dead497096a4f4a8e7d3e2ea27e007c64f02e2585c4262cf4e8e243c799277720637577fcd44f7de967e5f
6
+ metadata.gz: 5d415de27039b977ba067eac6a0f17edacdd301ea5a43e5058454b8b8b35151f1a1bc9cc0b0216695374ff9b2d11bce6afc90ffe11299c5ca14af8818ade9917
7
+ data.tar.gz: 68d6c30abf6631ba704fe705290b9ca909d95859b8268e0886ef19773c991fdb90aa36972fba343ad3a7089729ca41f3e45365005b4a3c90ad8110f81d642419
data/configs/config.yaml CHANGED
@@ -44,3 +44,17 @@ fuzz: '20%'
44
44
  #Set the number of days to keep the site spider file
45
45
  spider_days:
46
46
  - 10
47
+
48
+ #A list of URLs to skip when spidering. Ruby regular expressions can be
49
+ #used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
50
+ #http://www.yaml.org/YAML_for_ruby.html#regexps
51
+ #
52
+ # spider_skips:
53
+ # - /foo/bar.html # Matches /foo/bar.html explcitly
54
+ # - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
55
+
56
+ #Choose how results are displayed, by default alphanumeric. Different screen widths are always grouped.
57
+ #alphanumeric - all paths (with, and without, a difference) are shown, sorted by path
58
+ #diffs_first - all paths (with, and without, a difference) are shown, sorted by difference size (largest first)
59
+ #diffs_only - only paths with a difference are shown, sorted by difference size (largest first)
60
+ #mode: diffs_first
@@ -41,3 +41,11 @@ fuzz: '20%'
41
41
  #Set the number of days to keep the site spider file
42
42
  spider_days:
43
43
  - 10
44
+
45
+ #A list of URLs to skip when spidering. Ruby regular expressions can be
46
+ #used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
47
+ #http://www.yaml.org/YAML_for_ruby.html#regexps
48
+ #
49
+ # spider_skips:
50
+ # - /foo/bar.html # Matches /foo/bar.html explcitly
51
+ # - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
@@ -44,3 +44,10 @@ fuzz: '20%'
44
44
  #Set the number of days to keep the site spider file
45
45
  spider_days:
46
46
  - 10
47
+ #A list of URLs to skip when spidering. Ruby regular expressions can be
48
+ #used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
49
+ #http://www.yaml.org/YAML_for_ruby.html#regexps
50
+ #
51
+ # spider_skips:
52
+ # - /foo/bar.html # Matches /foo/bar.html explcitly
53
+ # - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
@@ -16,7 +16,6 @@ class Wraith::CompareImages
16
16
  diff = base.gsub(/([a-z0-9]+).png$/, 'diff.png')
17
17
  info = base.gsub(/([a-z0-9]+).png$/, 'data.txt')
18
18
  compare_task(base, compare, diff, info)
19
- Dir.glob("#{wraith.directory}/*/*.txt").map { |f| "\n#{f}\n#{File.read(f)}" }
20
19
  puts 'Saved diff'
21
20
  end
22
21
  end
data/lib/wraith/folder.rb CHANGED
@@ -33,7 +33,7 @@ class Wraith::FolderManager
33
33
  spider_paths.each do |folder_label, path|
34
34
  unless path
35
35
  path = folder_label
36
- folder_label = path.gsub('/', '_')
36
+ folder_label = path.gsub('/', '__')
37
37
  end
38
38
 
39
39
  FileUtils.mkdir_p("#{dir}/thumbnails/#{folder_label}")
@@ -41,4 +41,20 @@ class Wraith::FolderManager
41
41
  end
42
42
  puts 'Creating Folders'
43
43
  end
44
+
45
+ # Tidy up the shots folder, removing uncessary files
46
+ #
47
+ def tidy_shots_folder dirs
48
+ if wraith.mode == 'diffs_only'
49
+ dirs.each do |a, b|
50
+ # If we are running in "diffs_only mode, and none of the variants show a difference
51
+ # we remove the file from the shots folder
52
+ if b.none? {|k, v| v[:data] > 0}
53
+ FileUtils.rm_rf("#{wraith.directory}/#{a}")
54
+ dirs.delete(a)
55
+ end
56
+ end
57
+ end
58
+ end
59
+
44
60
  end
@@ -15,6 +15,7 @@ class Wraith::GalleryGenerator
15
15
  def initialize(config)
16
16
  @wraith = Wraith::Wraith.new(config)
17
17
  @location = wraith.directory
18
+ @folder_manager = Wraith::FolderManager.new(config)
18
19
  end
19
20
 
20
21
  def parse_directories(dirname)
@@ -55,7 +56,7 @@ class Wraith::GalleryGenerator
55
56
  filename: filepath, thumb: thumbnail
56
57
  }
57
58
  when 'data'
58
- size_dict[:data] = File.read("#{dirname}/#{filepath}")
59
+ size_dict[:data] = File.read("#{dirname}/#{filepath}").to_f
59
60
  else
60
61
  size_dict[:variants] << {
61
62
  name: group,
@@ -68,7 +69,14 @@ class Wraith::GalleryGenerator
68
69
  end
69
70
  end
70
71
  end
71
- @dirs
72
+ @folder_manager.tidy_shots_folder(@dirs)
73
+ if [ 'diffs_only', 'diffs_first' ].include?(wraith.mode)
74
+ @sorted = @dirs.sort_by { |category, sizes| -1 * sizes.max_by { |size, dict| dict[:data]}[1][:data] }
75
+ else
76
+ @sorted = @dirs.sort_by { |category, sizes| category }
77
+ end
78
+ # The sort has made this into an enumerable, convert it back to a Hash
79
+ Hash[@sorted]
72
80
  end
73
81
 
74
82
  def generate_html(location, directories, template, destination, path)
@@ -20,17 +20,17 @@
20
20
  <div class="panel">
21
21
  <div class="panel-heading">Screenshots:</div>
22
22
  <ul class="list-group list-group-flush">
23
- <% directories.keys.sort.each do |dir| %>
24
- <li class="list-group-item"><a href="#<%=path%><%=dir%>"><%=dir%></a></li>
23
+ <% directories.keys.each do |dir| %>
24
+ <li class="list-group-item"><a href="#<%=path%><%=dir%>"><%=dir.gsub('__', '/')%></a></li>
25
25
  <% end %>
26
26
  </ul>
27
27
  </div>
28
28
  </div>
29
29
  <div class="col-lg-10">
30
- <% directories.to_a.sort.each do |dir, sizes| %>
30
+ <% directories.each do |dir, sizes| %>
31
31
  <div class="row">
32
32
  <a name="<%= dir %>"></a>
33
- <h2><%= dir %></h2>
33
+ <h2><%= dir.gsub('__', '/') %></h2>
34
34
  </div>
35
35
  <% sizes.to_a.sort.each do |size, files| %>
36
36
  <div class="row">
@@ -54,7 +54,7 @@ class Wraith::SaveImages
54
54
  check_paths.each do |label, path|
55
55
  unless path
56
56
  path = label
57
- label = path.gsub('/', '_')
57
+ label = path.gsub('/', '__')
58
58
  end
59
59
 
60
60
  base_url = base_urls(path)
data/lib/wraith/spider.rb CHANGED
@@ -1,64 +1,108 @@
1
1
  require 'wraith'
2
2
  require 'anemone'
3
+ require 'nokogiri'
3
4
  require 'uri'
4
5
 
5
6
  class Wraith::Spidering
6
- attr_reader :wraith
7
7
 
8
8
  def initialize(config)
9
9
  @wraith = Wraith::Wraith.new(config)
10
10
  end
11
11
 
12
12
  def check_for_paths
13
- unless wraith.paths
14
- puts 'no paths defined'
15
- spider
13
+ if @wraith.paths.nil?
14
+ unless @wraith.sitemap.nil?
15
+ puts 'no paths defined in config, loading paths from sitemap'
16
+ spider = Wraith::Sitemap.new(@wraith)
17
+ else
18
+ puts 'no paths defined in config, crawling from site root'
19
+ spider = Wraith::Crawler.new(@wraith)
20
+ end
21
+ spider.determine_paths
16
22
  end
17
23
  end
24
+ end
25
+
26
+ class Wraith::Spider
27
+
28
+ def initialize(wraith)
29
+ @wraith = wraith
30
+ @paths = {}
31
+ end
32
+
33
+ def determine_paths
34
+ spider
35
+ write_file
36
+ end
37
+
38
+ private
39
+
40
+ def write_file
41
+ File.open(@wraith.spider_file, 'w+') { |file| file.write(@paths) }
42
+ end
43
+
44
+ def add_path(path)
45
+ @paths[path == '/' ? 'home' : path.gsub('/', '__').chomp('__').downcase] = path.downcase
46
+ end
18
47
 
19
48
  def spider
20
- if File.exist?(wraith.spider_file)
21
- check_file
22
- else
23
- puts 'creating new spider file'
24
- spider_base_domain
25
- end
49
+
26
50
  end
27
51
 
28
- def check_file
29
- if (Time.now - File.ctime(wraith.spider_file)) / (24 * 3600) < wraith.spider_days[0]
52
+ end
53
+
54
+ class Wraith::Crawler < Wraith::Spider
55
+
56
+ EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
57
+ gz jar js css dtd xsd ico raw mp3 mp4 \
58
+ wav wmv ape aac ac3 wma aiff mpg mpeg \
59
+ avi mov ogg mkv mka asx asf mp2 m1v \
60
+ m3u f4v pdf doc xls ppt pps bin exe rss xml)
61
+
62
+ def spider
63
+ if File.exist?(@wraith.spider_file) && modified_since(@wraith.spider_file, @wraith.spider_days[0])
30
64
  puts 'using existing spider file'
31
65
  else
32
66
  puts 'creating new spider file'
33
- spider_base_domain
67
+ spider_list = []
68
+ Anemone.crawl(@wraith.base_domain) do |anemone|
69
+ anemone.skip_links_like(/\.#{EXT.join('|')}$/)
70
+ # Add user specified skips
71
+ anemone.skip_links_like(@wraith.spider_skips)
72
+ anemone.on_every_page { |page| add_path(page.url.path) }
73
+ end
34
74
  end
35
75
  end
36
76
 
37
- def spider_base_domain
38
- @spider_list = []
39
- crawl_url = wraith.base_domain
40
- ext = %w(flv swf png jpg gif asx zip rar tar 7z \
41
- gz jar js css dtd xsd ico raw mp3 mp4 \
42
- wav wmv ape aac ac3 wma aiff mpg mpeg \
43
- avi mov ogg mkv mka asx asf mp2 m1v \
44
- m3u f4v pdf doc xls ppt pps bin exe rss xml)
45
-
46
- Anemone.crawl(crawl_url) do |anemone|
47
- anemone.skip_links_like(/\.#{ext.join('|')}$/)
48
- anemone.on_every_page { |page| @spider_list << page.url.path }
49
- end
50
- create_spider_file
77
+ def modified_since(file, since)
78
+ (Time.now - File.ctime(file)) / (24 * 3600) < since
51
79
  end
52
80
 
53
- def create_spider_file
54
- @i = 0
55
- spider = Hash.new { |h, k| h[k] = [] }
56
- while @i < @spider_list.length
57
- lab = @spider_list[@i].to_s.split('/').last
58
- lab = 'home' if @spider_list[@i] == '/'
59
- spider[lab] = @spider_list[@i]
60
- @i += 1
81
+ end
82
+
83
+ class Wraith::Sitemap < Wraith::Spider
84
+
85
+ def spider
86
+ unless @wraith.sitemap.nil?
87
+ puts "reading sitemap.xml from #{@wraith.sitemap}"
88
+ if @wraith.sitemap =~ URI::regexp
89
+ sitemap = Nokogiri::XML(open(@wraith.sitemap))
90
+ else
91
+ sitemap = Nokogiri::XML(File.open(@wraith.sitemap))
92
+ end
93
+ urls = {}
94
+ sitemap.css('loc').each do |loc|
95
+ path = loc.content
96
+ # Allow use of either domain in the sitemap.xml
97
+ @wraith.domains.each do |k, v|
98
+ path.sub!(v, '')
99
+ end
100
+ if @wraith.spider_skips.nil? || @wraith.spider_skips.none? { |regex| regex.match(path) }
101
+ add_path(path)
102
+ end
103
+ end
61
104
  end
62
- File.open(wraith.spider_file, 'w+') { |file| file.write(spider) }
63
105
  end
106
+
64
107
  end
108
+
@@ -1,3 +1,3 @@
1
1
  module Wraith
2
- VERSION = '1.2.2'
2
+ VERSION = '1.3.0'
3
3
  end
data/lib/wraith/wraith.rb CHANGED
@@ -47,6 +47,14 @@ class Wraith::Wraith
47
47
  @config['spider_days']
48
48
  end
49
49
 
50
+ def sitemap
51
+ @config['sitemap']
52
+ end
53
+
54
+ def spider_skips
55
+ @config['spider_skips']
56
+ end
57
+
50
58
  def paths
51
59
  @config['paths']
52
60
  end
@@ -59,13 +67,21 @@ class Wraith::Wraith
59
67
  @config['fuzz']
60
68
  end
61
69
 
70
+ def mode
71
+ if ['diffs_only', 'diffs_first', 'alphanumeric'].include?(@config['mode'])
72
+ @config['mode']
73
+ else
74
+ 'alphanumeric'
75
+ end
76
+ end
77
+
62
78
  def capture_page_image(browser, url, width, file_name)
63
79
  puts `"#{browser}" #{@config['phantomjs_options']} "#{snap_file}" "#{url}" "#{width}" "#{file_name}"`
64
80
  end
65
81
 
66
82
  def self.crop_images(crop, height)
67
83
  # For compatibility with windows file structures switch commenting on the following 2 lines
68
- puts `convert #{crop} -background none -extent 0x#{height} #{crop}`
84
+ `convert #{crop} -background none -extent 0x#{height} #{crop}`
69
85
  # puts `convert #{crop.gsub('/', '\\')} -background none -extent 0x#{height} #{crop.gsub('/', '\\')}`
70
86
  end
71
87
 
@@ -75,7 +91,7 @@ class Wraith::Wraith
75
91
 
76
92
  def set_image_width(image, width)
77
93
  # For compatibility with windows file structures switch commenting on the following 2 lines
78
- puts `convert #{image} -background none -extent #{width}x0 #{image}`
94
+ `convert #{image} -background none -extent #{width}x0 #{image}`
79
95
  # puts `convert #{image.gsub('/', '\\')} -background none -extent #{width}x0 #{image.gsub('/', '\\')}`
80
96
  end
81
97
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wraith
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.2
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dave Blooman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-06-26 00:00:00.000000000 Z
12
+ date: 2014-07-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: pry