wraith 1.2.2 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/configs/config.yaml +14 -0
- data/configs/config_nojs.yaml +8 -0
- data/configs/test_config.yaml +7 -0
- data/lib/wraith/compare_images.rb +0 -1
- data/lib/wraith/folder.rb +17 -1
- data/lib/wraith/gallery.rb +10 -2
- data/lib/wraith/gallery_template/gallery_template.erb +4 -4
- data/lib/wraith/save_images.rb +1 -1
- data/lib/wraith/spider.rb +80 -36
- data/lib/wraith/version.rb +1 -1
- data/lib/wraith/wraith.rb +18 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ad5c9e4eecd6cbe1a694bc29c62410ff45b7c86
|
4
|
+
data.tar.gz: c84b6e122617e02962a281ad885717342c919d35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d415de27039b977ba067eac6a0f17edacdd301ea5a43e5058454b8b8b35151f1a1bc9cc0b0216695374ff9b2d11bce6afc90ffe11299c5ca14af8818ade9917
|
7
|
+
data.tar.gz: 68d6c30abf6631ba704fe705290b9ca909d95859b8268e0886ef19773c991fdb90aa36972fba343ad3a7089729ca41f3e45365005b4a3c90ad8110f81d642419
|
data/configs/config.yaml
CHANGED
@@ -44,3 +44,17 @@ fuzz: '20%'
|
|
44
44
|
#Set the number of days to keep the site spider file
|
45
45
|
spider_days:
|
46
46
|
- 10
|
47
|
+
|
48
|
+
#A list of URLs to skip when spidering. Ruby regular expressions can be
|
49
|
+
#used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
|
50
|
+
#http://www.yaml.org/YAML_for_ruby.html#regexps
|
51
|
+
#
|
52
|
+
# spider_skips:
|
53
|
+
# - /foo/bar.html # Matches /foo/bar.html explcitly
|
54
|
+
# - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
|
55
|
+
|
56
|
+
#Choose how results are displayed, by default alphanumeric. Different screen widths are always grouped.
|
57
|
+
#alphanumeric - all paths (with, and without, a difference) are shown, sorted by path
|
58
|
+
#diffs_first - all paths (with, and without, a difference) are shown, sorted by difference size (largest first)
|
59
|
+
#diffs_only - only paths with a difference are shown, sorted by difference size (largest first)
|
60
|
+
#mode: diffs_first
|
data/configs/config_nojs.yaml
CHANGED
@@ -41,3 +41,11 @@ fuzz: '20%'
|
|
41
41
|
#Set the number of days to keep the site spider file
|
42
42
|
spider_days:
|
43
43
|
- 10
|
44
|
+
|
45
|
+
#A list of URLs to skip when spidering. Ruby regular expressions can be
|
46
|
+
#used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
|
47
|
+
#http://www.yaml.org/YAML_for_ruby.html#regexps
|
48
|
+
#
|
49
|
+
# spider_skips:
|
50
|
+
# - /foo/bar.html # Matches /foo/bar.html explcitly
|
51
|
+
# - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
|
data/configs/test_config.yaml
CHANGED
@@ -44,3 +44,10 @@ fuzz: '20%'
|
|
44
44
|
#Set the number of days to keep the site spider file
|
45
45
|
spider_days:
|
46
46
|
- 10
|
47
|
+
#A list of URLs to skip when spidering. Ruby regular expressions can be
|
48
|
+
#used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
|
49
|
+
#http://www.yaml.org/YAML_for_ruby.html#regexps
|
50
|
+
#
|
51
|
+
# spider_skips:
|
52
|
+
# - /foo/bar.html # Matches /foo/bar.html explcitly
|
53
|
+
# - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
|
@@ -16,7 +16,6 @@ class Wraith::CompareImages
|
|
16
16
|
diff = base.gsub(/([a-z0-9]+).png$/, 'diff.png')
|
17
17
|
info = base.gsub(/([a-z0-9]+).png$/, 'data.txt')
|
18
18
|
compare_task(base, compare, diff, info)
|
19
|
-
Dir.glob("#{wraith.directory}/*/*.txt").map { |f| "\n#{f}\n#{File.read(f)}" }
|
20
19
|
puts 'Saved diff'
|
21
20
|
end
|
22
21
|
end
|
data/lib/wraith/folder.rb
CHANGED
@@ -33,7 +33,7 @@ class Wraith::FolderManager
|
|
33
33
|
spider_paths.each do |folder_label, path|
|
34
34
|
unless path
|
35
35
|
path = folder_label
|
36
|
-
folder_label = path.gsub('/', '
|
36
|
+
folder_label = path.gsub('/', '__')
|
37
37
|
end
|
38
38
|
|
39
39
|
FileUtils.mkdir_p("#{dir}/thumbnails/#{folder_label}")
|
@@ -41,4 +41,20 @@ class Wraith::FolderManager
|
|
41
41
|
end
|
42
42
|
puts 'Creating Folders'
|
43
43
|
end
|
44
|
+
|
45
|
+
# Tidy up the shots folder, removing uncessary files
|
46
|
+
#
|
47
|
+
def tidy_shots_folder dirs
|
48
|
+
if wraith.mode == 'diffs_only'
|
49
|
+
dirs.each do |a, b|
|
50
|
+
# If we are running in "diffs_only mode, and none of the variants show a difference
|
51
|
+
# we remove the file from the shots folder
|
52
|
+
if b.none? {|k, v| v[:data] > 0}
|
53
|
+
FileUtils.rm_rf("#{wraith.directory}/#{a}")
|
54
|
+
dirs.delete(a)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
44
60
|
end
|
data/lib/wraith/gallery.rb
CHANGED
@@ -15,6 +15,7 @@ class Wraith::GalleryGenerator
|
|
15
15
|
def initialize(config)
|
16
16
|
@wraith = Wraith::Wraith.new(config)
|
17
17
|
@location = wraith.directory
|
18
|
+
@folder_manager = Wraith::FolderManager.new(config)
|
18
19
|
end
|
19
20
|
|
20
21
|
def parse_directories(dirname)
|
@@ -55,7 +56,7 @@ class Wraith::GalleryGenerator
|
|
55
56
|
filename: filepath, thumb: thumbnail
|
56
57
|
}
|
57
58
|
when 'data'
|
58
|
-
size_dict[:data] = File.read("#{dirname}/#{filepath}")
|
59
|
+
size_dict[:data] = File.read("#{dirname}/#{filepath}").to_f
|
59
60
|
else
|
60
61
|
size_dict[:variants] << {
|
61
62
|
name: group,
|
@@ -68,7 +69,14 @@ class Wraith::GalleryGenerator
|
|
68
69
|
end
|
69
70
|
end
|
70
71
|
end
|
71
|
-
@dirs
|
72
|
+
@folder_manager.tidy_shots_folder(@dirs)
|
73
|
+
if [ 'diffs_only', 'diffs_first' ].include?(wraith.mode)
|
74
|
+
@sorted = @dirs.sort_by { |category, sizes| -1 * sizes.max_by { |size, dict| dict[:data]}[1][:data] }
|
75
|
+
else
|
76
|
+
@sorted = @dirs.sort_by { |category, sizes| category }
|
77
|
+
end
|
78
|
+
# The sort has made this into an enumerable, convert it back to a Hash
|
79
|
+
Hash[@sorted]
|
72
80
|
end
|
73
81
|
|
74
82
|
def generate_html(location, directories, template, destination, path)
|
@@ -20,17 +20,17 @@
|
|
20
20
|
<div class="panel">
|
21
21
|
<div class="panel-heading">Screenshots:</div>
|
22
22
|
<ul class="list-group list-group-flush">
|
23
|
-
<% directories.keys.
|
24
|
-
<li class="list-group-item"><a href="#<%=path%><%=dir%>"><%=dir%></a></li>
|
23
|
+
<% directories.keys.each do |dir| %>
|
24
|
+
<li class="list-group-item"><a href="#<%=path%><%=dir%>"><%=dir.gsub('__', '/')%></a></li>
|
25
25
|
<% end %>
|
26
26
|
</ul>
|
27
27
|
</div>
|
28
28
|
</div>
|
29
29
|
<div class="col-lg-10">
|
30
|
-
<% directories.
|
30
|
+
<% directories.each do |dir, sizes| %>
|
31
31
|
<div class="row">
|
32
32
|
<a name="<%= dir %>"></a>
|
33
|
-
<h2><%= dir %></h2>
|
33
|
+
<h2><%= dir.gsub('__', '/') %></h2>
|
34
34
|
</div>
|
35
35
|
<% sizes.to_a.sort.each do |size, files| %>
|
36
36
|
<div class="row">
|
data/lib/wraith/save_images.rb
CHANGED
data/lib/wraith/spider.rb
CHANGED
@@ -1,64 +1,108 @@
|
|
1
1
|
require 'wraith'
|
2
2
|
require 'anemone'
|
3
|
+
require 'nokogiri'
|
3
4
|
require 'uri'
|
4
5
|
|
5
6
|
class Wraith::Spidering
|
6
|
-
attr_reader :wraith
|
7
7
|
|
8
8
|
def initialize(config)
|
9
9
|
@wraith = Wraith::Wraith.new(config)
|
10
10
|
end
|
11
11
|
|
12
12
|
def check_for_paths
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
if @wraith.paths.nil?
|
14
|
+
unless @wraith.sitemap.nil?
|
15
|
+
puts 'no paths defined in config, loading paths from sitemap'
|
16
|
+
spider = Wraith::Sitemap.new(@wraith)
|
17
|
+
else
|
18
|
+
puts 'no paths defined in config, crawling from site root'
|
19
|
+
spider = Wraith::Crawler.new(@wraith)
|
20
|
+
end
|
21
|
+
spider.determine_paths
|
16
22
|
end
|
17
23
|
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Wraith::Spider
|
27
|
+
|
28
|
+
def initialize(wraith)
|
29
|
+
@wraith = wraith
|
30
|
+
@paths = {}
|
31
|
+
end
|
32
|
+
|
33
|
+
def determine_paths
|
34
|
+
spider
|
35
|
+
write_file
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def write_file
|
41
|
+
File.open(@wraith.spider_file, 'w+') { |file| file.write(@paths) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_path(path)
|
45
|
+
@paths[path == '/' ? 'home' : path.gsub('/', '__').chomp('__').downcase] = path.downcase
|
46
|
+
end
|
18
47
|
|
19
48
|
def spider
|
20
|
-
|
21
|
-
check_file
|
22
|
-
else
|
23
|
-
puts 'creating new spider file'
|
24
|
-
spider_base_domain
|
25
|
-
end
|
49
|
+
|
26
50
|
end
|
27
51
|
|
28
|
-
|
29
|
-
|
52
|
+
end
|
53
|
+
|
54
|
+
class Wraith::Crawler < Wraith::Spider
|
55
|
+
|
56
|
+
EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
|
57
|
+
gz jar js css dtd xsd ico raw mp3 mp4 \
|
58
|
+
wav wmv ape aac ac3 wma aiff mpg mpeg \
|
59
|
+
avi mov ogg mkv mka asx asf mp2 m1v \
|
60
|
+
m3u f4v pdf doc xls ppt pps bin exe rss xml)
|
61
|
+
|
62
|
+
def spider
|
63
|
+
if File.exist?(@wraith.spider_file) && modified_since(@wraith.spider_file, @wraith.spider_days[0])
|
30
64
|
puts 'using existing spider file'
|
31
65
|
else
|
32
66
|
puts 'creating new spider file'
|
33
|
-
|
67
|
+
spider_list = []
|
68
|
+
Anemone.crawl(@wraith.base_domain) do |anemone|
|
69
|
+
anemone.skip_links_like(/\.#{EXT.join('|')}$/)
|
70
|
+
# Add user specified skips
|
71
|
+
anemone.skip_links_like(@wraith.spider_skips)
|
72
|
+
anemone.on_every_page { |page| add_path(page.url.path) }
|
73
|
+
end
|
34
74
|
end
|
35
75
|
end
|
36
76
|
|
37
|
-
def
|
38
|
-
|
39
|
-
crawl_url = wraith.base_domain
|
40
|
-
ext = %w(flv swf png jpg gif asx zip rar tar 7z \
|
41
|
-
gz jar js css dtd xsd ico raw mp3 mp4 \
|
42
|
-
wav wmv ape aac ac3 wma aiff mpg mpeg \
|
43
|
-
avi mov ogg mkv mka asx asf mp2 m1v \
|
44
|
-
m3u f4v pdf doc xls ppt pps bin exe rss xml)
|
45
|
-
|
46
|
-
Anemone.crawl(crawl_url) do |anemone|
|
47
|
-
anemone.skip_links_like(/\.#{ext.join('|')}$/)
|
48
|
-
anemone.on_every_page { |page| @spider_list << page.url.path }
|
49
|
-
end
|
50
|
-
create_spider_file
|
77
|
+
def modified_since(file, since)
|
78
|
+
(Time.now - File.ctime(file)) / (24 * 3600) < since
|
51
79
|
end
|
52
80
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
@
|
81
|
+
end
|
82
|
+
|
83
|
+
class Wraith::Sitemap < Wraith::Spider
|
84
|
+
|
85
|
+
def spider
|
86
|
+
unless @wraith.sitemap.nil?
|
87
|
+
puts "reading sitemap.xml from #{@wraith.sitemap}"
|
88
|
+
if @wraith.sitemap =~ URI::regexp
|
89
|
+
sitemap = Nokogiri::XML(open(@wraith.sitemap))
|
90
|
+
else
|
91
|
+
sitemap = Nokogiri::XML(File.open(@wraith.sitemap))
|
92
|
+
end
|
93
|
+
urls = {}
|
94
|
+
sitemap.css('loc').each do |loc|
|
95
|
+
path = loc.content
|
96
|
+
# Allow use of either domain in the sitemap.xml
|
97
|
+
@wraith.domains.each do |k, v|
|
98
|
+
path.sub!(v, '')
|
99
|
+
end
|
100
|
+
if @wraith.spider_skips.nil? || @wraith.spider_skips.none? { |regex| regex.match(path) }
|
101
|
+
add_path(path)
|
102
|
+
end
|
103
|
+
end
|
61
104
|
end
|
62
|
-
File.open(wraith.spider_file, 'w+') { |file| file.write(spider) }
|
63
105
|
end
|
106
|
+
|
64
107
|
end
|
108
|
+
|
data/lib/wraith/version.rb
CHANGED
data/lib/wraith/wraith.rb
CHANGED
@@ -47,6 +47,14 @@ class Wraith::Wraith
|
|
47
47
|
@config['spider_days']
|
48
48
|
end
|
49
49
|
|
50
|
+
def sitemap
|
51
|
+
@config['sitemap']
|
52
|
+
end
|
53
|
+
|
54
|
+
def spider_skips
|
55
|
+
@config['spider_skips']
|
56
|
+
end
|
57
|
+
|
50
58
|
def paths
|
51
59
|
@config['paths']
|
52
60
|
end
|
@@ -59,13 +67,21 @@ class Wraith::Wraith
|
|
59
67
|
@config['fuzz']
|
60
68
|
end
|
61
69
|
|
70
|
+
def mode
|
71
|
+
if ['diffs_only', 'diffs_first', 'alphanumeric'].include?(@config['mode'])
|
72
|
+
@config['mode']
|
73
|
+
else
|
74
|
+
'alphanumeric'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
62
78
|
def capture_page_image(browser, url, width, file_name)
|
63
79
|
puts `"#{browser}" #{@config['phantomjs_options']} "#{snap_file}" "#{url}" "#{width}" "#{file_name}"`
|
64
80
|
end
|
65
81
|
|
66
82
|
def self.crop_images(crop, height)
|
67
83
|
# For compatibility with windows file structures switch commenting on the following 2 lines
|
68
|
-
|
84
|
+
`convert #{crop} -background none -extent 0x#{height} #{crop}`
|
69
85
|
# puts `convert #{crop.gsub('/', '\\')} -background none -extent 0x#{height} #{crop.gsub('/', '\\')}`
|
70
86
|
end
|
71
87
|
|
@@ -75,7 +91,7 @@ class Wraith::Wraith
|
|
75
91
|
|
76
92
|
def set_image_width(image, width)
|
77
93
|
# For compatibility with windows file structures switch commenting on the following 2 lines
|
78
|
-
|
94
|
+
`convert #{image} -background none -extent #{width}x0 #{image}`
|
79
95
|
# puts `convert #{image.gsub('/', '\\')} -background none -extent #{width}x0 #{image.gsub('/', '\\')}`
|
80
96
|
end
|
81
97
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wraith
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dave Blooman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-07-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: pry
|