wraith 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/configs/config.yaml +14 -0
- data/configs/config_nojs.yaml +8 -0
- data/configs/test_config.yaml +7 -0
- data/lib/wraith/compare_images.rb +0 -1
- data/lib/wraith/folder.rb +17 -1
- data/lib/wraith/gallery.rb +10 -2
- data/lib/wraith/gallery_template/gallery_template.erb +4 -4
- data/lib/wraith/save_images.rb +1 -1
- data/lib/wraith/spider.rb +80 -36
- data/lib/wraith/version.rb +1 -1
- data/lib/wraith/wraith.rb +18 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ad5c9e4eecd6cbe1a694bc29c62410ff45b7c86
|
4
|
+
data.tar.gz: c84b6e122617e02962a281ad885717342c919d35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d415de27039b977ba067eac6a0f17edacdd301ea5a43e5058454b8b8b35151f1a1bc9cc0b0216695374ff9b2d11bce6afc90ffe11299c5ca14af8818ade9917
|
7
|
+
data.tar.gz: 68d6c30abf6631ba704fe705290b9ca909d95859b8268e0886ef19773c991fdb90aa36972fba343ad3a7089729ca41f3e45365005b4a3c90ad8110f81d642419
|
data/configs/config.yaml
CHANGED
@@ -44,3 +44,17 @@ fuzz: '20%'
|
|
44
44
|
#Set the number of days to keep the site spider file
|
45
45
|
spider_days:
|
46
46
|
- 10
|
47
|
+
|
48
|
+
#A list of URLs to skip when spidering. Ruby regular expressions can be
|
49
|
+
#used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
|
50
|
+
#http://www.yaml.org/YAML_for_ruby.html#regexps
|
51
|
+
#
|
52
|
+
# spider_skips:
|
53
|
+
# - /foo/bar.html # Matches /foo/bar.html explcitly
|
54
|
+
# - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
|
55
|
+
|
56
|
+
#Choose how results are displayed, by default alphanumeric. Different screen widths are always grouped.
|
57
|
+
#alphanumeric - all paths (with, and without, a difference) are shown, sorted by path
|
58
|
+
#diffs_first - all paths (with, and without, a difference) are shown, sorted by difference size (largest first)
|
59
|
+
#diffs_only - only paths with a difference are shown, sorted by difference size (largest first)
|
60
|
+
#mode: diffs_first
|
data/configs/config_nojs.yaml
CHANGED
@@ -41,3 +41,11 @@ fuzz: '20%'
|
|
41
41
|
#Set the number of days to keep the site spider file
|
42
42
|
spider_days:
|
43
43
|
- 10
|
44
|
+
|
45
|
+
#A list of URLs to skip when spidering. Ruby regular expressions can be
|
46
|
+
#used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
|
47
|
+
#http://www.yaml.org/YAML_for_ruby.html#regexps
|
48
|
+
#
|
49
|
+
# spider_skips:
|
50
|
+
# - /foo/bar.html # Matches /foo/bar.html explcitly
|
51
|
+
# - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
|
data/configs/test_config.yaml
CHANGED
@@ -44,3 +44,10 @@ fuzz: '20%'
|
|
44
44
|
#Set the number of days to keep the site spider file
|
45
45
|
spider_days:
|
46
46
|
- 10
|
47
|
+
#A list of URLs to skip when spidering. Ruby regular expressions can be
|
48
|
+
#used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
|
49
|
+
#http://www.yaml.org/YAML_for_ruby.html#regexps
|
50
|
+
#
|
51
|
+
# spider_skips:
|
52
|
+
# - /foo/bar.html # Matches /foo/bar.html explcitly
|
53
|
+
# - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
|
@@ -16,7 +16,6 @@ class Wraith::CompareImages
|
|
16
16
|
diff = base.gsub(/([a-z0-9]+).png$/, 'diff.png')
|
17
17
|
info = base.gsub(/([a-z0-9]+).png$/, 'data.txt')
|
18
18
|
compare_task(base, compare, diff, info)
|
19
|
-
Dir.glob("#{wraith.directory}/*/*.txt").map { |f| "\n#{f}\n#{File.read(f)}" }
|
20
19
|
puts 'Saved diff'
|
21
20
|
end
|
22
21
|
end
|
data/lib/wraith/folder.rb
CHANGED
@@ -33,7 +33,7 @@ class Wraith::FolderManager
|
|
33
33
|
spider_paths.each do |folder_label, path|
|
34
34
|
unless path
|
35
35
|
path = folder_label
|
36
|
-
folder_label = path.gsub('/', '
|
36
|
+
folder_label = path.gsub('/', '__')
|
37
37
|
end
|
38
38
|
|
39
39
|
FileUtils.mkdir_p("#{dir}/thumbnails/#{folder_label}")
|
@@ -41,4 +41,20 @@ class Wraith::FolderManager
|
|
41
41
|
end
|
42
42
|
puts 'Creating Folders'
|
43
43
|
end
|
44
|
+
|
45
|
+
# Tidy up the shots folder, removing uncessary files
|
46
|
+
#
|
47
|
+
def tidy_shots_folder dirs
|
48
|
+
if wraith.mode == 'diffs_only'
|
49
|
+
dirs.each do |a, b|
|
50
|
+
# If we are running in "diffs_only mode, and none of the variants show a difference
|
51
|
+
# we remove the file from the shots folder
|
52
|
+
if b.none? {|k, v| v[:data] > 0}
|
53
|
+
FileUtils.rm_rf("#{wraith.directory}/#{a}")
|
54
|
+
dirs.delete(a)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
44
60
|
end
|
data/lib/wraith/gallery.rb
CHANGED
@@ -15,6 +15,7 @@ class Wraith::GalleryGenerator
|
|
15
15
|
def initialize(config)
|
16
16
|
@wraith = Wraith::Wraith.new(config)
|
17
17
|
@location = wraith.directory
|
18
|
+
@folder_manager = Wraith::FolderManager.new(config)
|
18
19
|
end
|
19
20
|
|
20
21
|
def parse_directories(dirname)
|
@@ -55,7 +56,7 @@ class Wraith::GalleryGenerator
|
|
55
56
|
filename: filepath, thumb: thumbnail
|
56
57
|
}
|
57
58
|
when 'data'
|
58
|
-
size_dict[:data] = File.read("#{dirname}/#{filepath}")
|
59
|
+
size_dict[:data] = File.read("#{dirname}/#{filepath}").to_f
|
59
60
|
else
|
60
61
|
size_dict[:variants] << {
|
61
62
|
name: group,
|
@@ -68,7 +69,14 @@ class Wraith::GalleryGenerator
|
|
68
69
|
end
|
69
70
|
end
|
70
71
|
end
|
71
|
-
@dirs
|
72
|
+
@folder_manager.tidy_shots_folder(@dirs)
|
73
|
+
if [ 'diffs_only', 'diffs_first' ].include?(wraith.mode)
|
74
|
+
@sorted = @dirs.sort_by { |category, sizes| -1 * sizes.max_by { |size, dict| dict[:data]}[1][:data] }
|
75
|
+
else
|
76
|
+
@sorted = @dirs.sort_by { |category, sizes| category }
|
77
|
+
end
|
78
|
+
# The sort has made this into an enumerable, convert it back to a Hash
|
79
|
+
Hash[@sorted]
|
72
80
|
end
|
73
81
|
|
74
82
|
def generate_html(location, directories, template, destination, path)
|
@@ -20,17 +20,17 @@
|
|
20
20
|
<div class="panel">
|
21
21
|
<div class="panel-heading">Screenshots:</div>
|
22
22
|
<ul class="list-group list-group-flush">
|
23
|
-
<% directories.keys.
|
24
|
-
<li class="list-group-item"><a href="#<%=path%><%=dir%>"><%=dir%></a></li>
|
23
|
+
<% directories.keys.each do |dir| %>
|
24
|
+
<li class="list-group-item"><a href="#<%=path%><%=dir%>"><%=dir.gsub('__', '/')%></a></li>
|
25
25
|
<% end %>
|
26
26
|
</ul>
|
27
27
|
</div>
|
28
28
|
</div>
|
29
29
|
<div class="col-lg-10">
|
30
|
-
<% directories.
|
30
|
+
<% directories.each do |dir, sizes| %>
|
31
31
|
<div class="row">
|
32
32
|
<a name="<%= dir %>"></a>
|
33
|
-
<h2><%= dir %></h2>
|
33
|
+
<h2><%= dir.gsub('__', '/') %></h2>
|
34
34
|
</div>
|
35
35
|
<% sizes.to_a.sort.each do |size, files| %>
|
36
36
|
<div class="row">
|
data/lib/wraith/save_images.rb
CHANGED
data/lib/wraith/spider.rb
CHANGED
@@ -1,64 +1,108 @@
|
|
1
1
|
require 'wraith'
|
2
2
|
require 'anemone'
|
3
|
+
require 'nokogiri'
|
3
4
|
require 'uri'
|
4
5
|
|
5
6
|
class Wraith::Spidering
|
6
|
-
attr_reader :wraith
|
7
7
|
|
8
8
|
def initialize(config)
|
9
9
|
@wraith = Wraith::Wraith.new(config)
|
10
10
|
end
|
11
11
|
|
12
12
|
def check_for_paths
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
if @wraith.paths.nil?
|
14
|
+
unless @wraith.sitemap.nil?
|
15
|
+
puts 'no paths defined in config, loading paths from sitemap'
|
16
|
+
spider = Wraith::Sitemap.new(@wraith)
|
17
|
+
else
|
18
|
+
puts 'no paths defined in config, crawling from site root'
|
19
|
+
spider = Wraith::Crawler.new(@wraith)
|
20
|
+
end
|
21
|
+
spider.determine_paths
|
16
22
|
end
|
17
23
|
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Wraith::Spider
|
27
|
+
|
28
|
+
def initialize(wraith)
|
29
|
+
@wraith = wraith
|
30
|
+
@paths = {}
|
31
|
+
end
|
32
|
+
|
33
|
+
def determine_paths
|
34
|
+
spider
|
35
|
+
write_file
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def write_file
|
41
|
+
File.open(@wraith.spider_file, 'w+') { |file| file.write(@paths) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_path(path)
|
45
|
+
@paths[path == '/' ? 'home' : path.gsub('/', '__').chomp('__').downcase] = path.downcase
|
46
|
+
end
|
18
47
|
|
19
48
|
def spider
|
20
|
-
|
21
|
-
check_file
|
22
|
-
else
|
23
|
-
puts 'creating new spider file'
|
24
|
-
spider_base_domain
|
25
|
-
end
|
49
|
+
|
26
50
|
end
|
27
51
|
|
28
|
-
|
29
|
-
|
52
|
+
end
|
53
|
+
|
54
|
+
class Wraith::Crawler < Wraith::Spider
|
55
|
+
|
56
|
+
EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
|
57
|
+
gz jar js css dtd xsd ico raw mp3 mp4 \
|
58
|
+
wav wmv ape aac ac3 wma aiff mpg mpeg \
|
59
|
+
avi mov ogg mkv mka asx asf mp2 m1v \
|
60
|
+
m3u f4v pdf doc xls ppt pps bin exe rss xml)
|
61
|
+
|
62
|
+
def spider
|
63
|
+
if File.exist?(@wraith.spider_file) && modified_since(@wraith.spider_file, @wraith.spider_days[0])
|
30
64
|
puts 'using existing spider file'
|
31
65
|
else
|
32
66
|
puts 'creating new spider file'
|
33
|
-
|
67
|
+
spider_list = []
|
68
|
+
Anemone.crawl(@wraith.base_domain) do |anemone|
|
69
|
+
anemone.skip_links_like(/\.#{EXT.join('|')}$/)
|
70
|
+
# Add user specified skips
|
71
|
+
anemone.skip_links_like(@wraith.spider_skips)
|
72
|
+
anemone.on_every_page { |page| add_path(page.url.path) }
|
73
|
+
end
|
34
74
|
end
|
35
75
|
end
|
36
76
|
|
37
|
-
def
|
38
|
-
|
39
|
-
crawl_url = wraith.base_domain
|
40
|
-
ext = %w(flv swf png jpg gif asx zip rar tar 7z \
|
41
|
-
gz jar js css dtd xsd ico raw mp3 mp4 \
|
42
|
-
wav wmv ape aac ac3 wma aiff mpg mpeg \
|
43
|
-
avi mov ogg mkv mka asx asf mp2 m1v \
|
44
|
-
m3u f4v pdf doc xls ppt pps bin exe rss xml)
|
45
|
-
|
46
|
-
Anemone.crawl(crawl_url) do |anemone|
|
47
|
-
anemone.skip_links_like(/\.#{ext.join('|')}$/)
|
48
|
-
anemone.on_every_page { |page| @spider_list << page.url.path }
|
49
|
-
end
|
50
|
-
create_spider_file
|
77
|
+
def modified_since(file, since)
|
78
|
+
(Time.now - File.ctime(file)) / (24 * 3600) < since
|
51
79
|
end
|
52
80
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
@
|
81
|
+
end
|
82
|
+
|
83
|
+
class Wraith::Sitemap < Wraith::Spider
|
84
|
+
|
85
|
+
def spider
|
86
|
+
unless @wraith.sitemap.nil?
|
87
|
+
puts "reading sitemap.xml from #{@wraith.sitemap}"
|
88
|
+
if @wraith.sitemap =~ URI::regexp
|
89
|
+
sitemap = Nokogiri::XML(open(@wraith.sitemap))
|
90
|
+
else
|
91
|
+
sitemap = Nokogiri::XML(File.open(@wraith.sitemap))
|
92
|
+
end
|
93
|
+
urls = {}
|
94
|
+
sitemap.css('loc').each do |loc|
|
95
|
+
path = loc.content
|
96
|
+
# Allow use of either domain in the sitemap.xml
|
97
|
+
@wraith.domains.each do |k, v|
|
98
|
+
path.sub!(v, '')
|
99
|
+
end
|
100
|
+
if @wraith.spider_skips.nil? || @wraith.spider_skips.none? { |regex| regex.match(path) }
|
101
|
+
add_path(path)
|
102
|
+
end
|
103
|
+
end
|
61
104
|
end
|
62
|
-
File.open(wraith.spider_file, 'w+') { |file| file.write(spider) }
|
63
105
|
end
|
106
|
+
|
64
107
|
end
|
108
|
+
|
data/lib/wraith/version.rb
CHANGED
data/lib/wraith/wraith.rb
CHANGED
@@ -47,6 +47,14 @@ class Wraith::Wraith
|
|
47
47
|
@config['spider_days']
|
48
48
|
end
|
49
49
|
|
50
|
+
def sitemap
|
51
|
+
@config['sitemap']
|
52
|
+
end
|
53
|
+
|
54
|
+
def spider_skips
|
55
|
+
@config['spider_skips']
|
56
|
+
end
|
57
|
+
|
50
58
|
def paths
|
51
59
|
@config['paths']
|
52
60
|
end
|
@@ -59,13 +67,21 @@ class Wraith::Wraith
|
|
59
67
|
@config['fuzz']
|
60
68
|
end
|
61
69
|
|
70
|
+
def mode
|
71
|
+
if ['diffs_only', 'diffs_first', 'alphanumeric'].include?(@config['mode'])
|
72
|
+
@config['mode']
|
73
|
+
else
|
74
|
+
'alphanumeric'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
62
78
|
def capture_page_image(browser, url, width, file_name)
|
63
79
|
puts `"#{browser}" #{@config['phantomjs_options']} "#{snap_file}" "#{url}" "#{width}" "#{file_name}"`
|
64
80
|
end
|
65
81
|
|
66
82
|
def self.crop_images(crop, height)
|
67
83
|
# For compatibility with windows file structures switch commenting on the following 2 lines
|
68
|
-
|
84
|
+
`convert #{crop} -background none -extent 0x#{height} #{crop}`
|
69
85
|
# puts `convert #{crop.gsub('/', '\\')} -background none -extent 0x#{height} #{crop.gsub('/', '\\')}`
|
70
86
|
end
|
71
87
|
|
@@ -75,7 +91,7 @@ class Wraith::Wraith
|
|
75
91
|
|
76
92
|
def set_image_width(image, width)
|
77
93
|
# For compatibility with windows file structures switch commenting on the following 2 lines
|
78
|
-
|
94
|
+
`convert #{image} -background none -extent #{width}x0 #{image}`
|
79
95
|
# puts `convert #{image.gsub('/', '\\')} -background none -extent #{width}x0 #{image.gsub('/', '\\')}`
|
80
96
|
end
|
81
97
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wraith
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dave Blooman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-07-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: pry
|