wraith 3.2.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/wraith/cli.rb +15 -23
- data/lib/wraith/helpers/custom_exceptions.rb +6 -0
- data/lib/wraith/helpers/utilities.rb +41 -0
- data/lib/wraith/save_images.rb +1 -1
- data/lib/wraith/spider.rb +27 -83
- data/lib/wraith/validate.rb +12 -31
- data/lib/wraith/version.rb +1 -1
- data/lib/wraith/wraith.rb +46 -5
- data/spec/before_capture_spec.rb +2 -2
- data/spec/config_spec.rb +19 -9
- data/spec/configs/test_config--imports.yaml +4 -0
- data/spec/configs/test_config--spider.yaml +38 -0
- data/spec/validate_spec.rb +40 -11
- data/wraith.gemspec +0 -1
- metadata +6 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56d5e05f1ed1c519e36e5a1327881abe8f6d312a
|
4
|
+
data.tar.gz: f42c42db043a716a7f4053653edb517867b5e401
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d1eb913d7f7fd2b98f0381ceb2279251524b29075372b16223d3a5efe1a135ed619910c41394e17e4f9b712d42387b3a29d4ca892c8b1ae735f7935151d01b05
|
7
|
+
data.tar.gz: ba88e5406d884f0b3a8785561756d8f531ea6f8420ba586e5d02b520d95e2599bc6518138ab272594dcb7083d88042073c627730b3dda8691a08c885d0726ed5
|
data/lib/wraith/cli.rb
CHANGED
@@ -22,26 +22,6 @@ class Wraith::CLI < Thor
|
|
22
22
|
File.expand_path("../../../", __FILE__)
|
23
23
|
end
|
24
24
|
|
25
|
-
# define internal methods which user should not be able to run directly
|
26
|
-
no_commands do
|
27
|
-
def within_acceptable_limits
|
28
|
-
yield
|
29
|
-
rescue CustomError => e
|
30
|
-
logger.error e.message
|
31
|
-
# other errors, such as SystemError, will not be caught nicely and will give a stack trace (which we'd need)
|
32
|
-
end
|
33
|
-
|
34
|
-
def check_for_paths(config_name)
|
35
|
-
spider = Wraith::Spidering.new(config_name)
|
36
|
-
spider.check_for_paths
|
37
|
-
end
|
38
|
-
|
39
|
-
def copy_old_shots(config_name)
|
40
|
-
create = Wraith::FolderManager.new(config_name)
|
41
|
-
create.copy_old_shots
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
25
|
desc "validate [config_name]", "checks your configuration and validates that all required properties exist"
|
46
26
|
def validate(config_name)
|
47
27
|
within_acceptable_limits do
|
@@ -57,6 +37,15 @@ class Wraith::CLI < Thor
|
|
57
37
|
end
|
58
38
|
end
|
59
39
|
|
40
|
+
desc "spider [config_name]", "crawls a site for paths and stores them to YML file"
|
41
|
+
def spider(config)
|
42
|
+
within_acceptable_limits do
|
43
|
+
logger.info Wraith::Validate.new(config, { imports_must_resolve: false }).validate("spider")
|
44
|
+
spider = Wraith::Spider.new(config)
|
45
|
+
spider.crawl
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
60
49
|
desc "reset_shots [config_name]", "removes all the files in the shots folder"
|
61
50
|
def reset_shots(config_name)
|
62
51
|
within_acceptable_limits do
|
@@ -131,7 +120,6 @@ class Wraith::CLI < Thor
|
|
131
120
|
within_acceptable_limits do
|
132
121
|
logger.info Wraith::Validate.new(config).validate("capture")
|
133
122
|
reset_shots(config)
|
134
|
-
check_for_paths(config)
|
135
123
|
setup_folders(config)
|
136
124
|
save_images(config)
|
137
125
|
crop_images(config)
|
@@ -156,10 +144,9 @@ class Wraith::CLI < Thor
|
|
156
144
|
within_acceptable_limits do
|
157
145
|
logger.info Wraith::Validate.new(config).validate("history")
|
158
146
|
reset_shots(config)
|
159
|
-
check_for_paths(config)
|
160
147
|
setup_folders(config)
|
161
148
|
save_images(config)
|
162
|
-
|
149
|
+
Wraith::FolderManager.new(config).copy_old_shots
|
163
150
|
end
|
164
151
|
end
|
165
152
|
|
@@ -177,6 +164,11 @@ class Wraith::CLI < Thor
|
|
177
164
|
end
|
178
165
|
end
|
179
166
|
|
167
|
+
desc "info", "Show various info about your system"
|
168
|
+
def info
|
169
|
+
list_debug_information
|
170
|
+
end
|
171
|
+
|
180
172
|
desc "version", "Show the version of Wraith"
|
181
173
|
map ["--version", "-version", "-v"] => "version"
|
182
174
|
def version
|
@@ -10,6 +10,12 @@ end
|
|
10
10
|
class ConfigFileDoesNotExistError < CustomError
|
11
11
|
end
|
12
12
|
|
13
|
+
class PropertyOutOfContextError < CustomError
|
14
|
+
end
|
15
|
+
|
16
|
+
class InvalidYamlError < CustomError
|
17
|
+
end
|
18
|
+
|
13
19
|
class MissingImageError < CustomError
|
14
20
|
def initialize(msg = false)
|
15
21
|
default_msg = "Something went wrong! It looks like you're missing some images. Check your output directory and make sure that each path has four files for every screen size (data.txt, diff, base, latest). If in doubt, delete your output directory and run Wraith again."
|
@@ -1,5 +1,18 @@
|
|
1
1
|
require "wraith/helpers/custom_exceptions"
|
2
2
|
|
3
|
+
def within_acceptable_limits
|
4
|
+
yield
|
5
|
+
rescue CustomError => e
|
6
|
+
logger.error e.message
|
7
|
+
# other errors, such as SystemError, will not be caught nicely and will give a stack trace (which we'd need)
|
8
|
+
end
|
9
|
+
|
10
|
+
def absolute_path_of_dir(filepath)
|
11
|
+
path_parts = filepath.split('/')
|
12
|
+
path_to_dir = path_parts.first path_parts.size - 1
|
13
|
+
path_to_dir.join('/')
|
14
|
+
end
|
15
|
+
|
3
16
|
def convert_to_absolute(filepath)
|
4
17
|
if !filepath
|
5
18
|
"false"
|
@@ -14,3 +27,31 @@ def convert_to_absolute(filepath)
|
|
14
27
|
"#{Dir.pwd}/#{filepath}"
|
15
28
|
end
|
16
29
|
end
|
30
|
+
|
31
|
+
def list_debug_information
|
32
|
+
wraith_version = Wraith::VERSION
|
33
|
+
command_run = ARGV.join ' '
|
34
|
+
ruby_version = run_command_safely("ruby -v") || "Ruby not installed"
|
35
|
+
phantomjs_version = run_command_safely("phantomjs --version") || "PhantomJS not installed"
|
36
|
+
casperjs_version = run_command_safely("casperjs --version") || "CasperJS not installed"
|
37
|
+
imagemagick_version = run_command_safely("convert -version") || "ImageMagick not installed"
|
38
|
+
|
39
|
+
logger.debug "#################################################"
|
40
|
+
logger.debug " Command run: #{command_run}"
|
41
|
+
logger.debug " Wraith version: #{wraith_version}"
|
42
|
+
logger.debug " Ruby version: #{ruby_version}"
|
43
|
+
logger.debug " ImageMagick: #{imagemagick_version}"
|
44
|
+
logger.debug " PhantomJS version: #{phantomjs_version}"
|
45
|
+
logger.debug " CasperJS version: #{casperjs_version}"
|
46
|
+
# @TODO - add a SlimerJS equivalent
|
47
|
+
logger.debug "#################################################"
|
48
|
+
end
|
49
|
+
|
50
|
+
def run_command_safely(command)
|
51
|
+
begin
|
52
|
+
output = `#{command}`
|
53
|
+
rescue StandardError
|
54
|
+
return false
|
55
|
+
end
|
56
|
+
output.lines.first.chomp
|
57
|
+
end
|
data/lib/wraith/save_images.rb
CHANGED
@@ -11,7 +11,7 @@ class Wraith::SaveImages
|
|
11
11
|
attr_reader :wraith, :history, :meta
|
12
12
|
|
13
13
|
def initialize(config, history = false, yaml_passed = false)
|
14
|
-
@wraith = Wraith::Wraith.new(config, yaml_passed)
|
14
|
+
@wraith = Wraith::Wraith.new(config, { yaml_passed: yaml_passed })
|
15
15
|
@history = history
|
16
16
|
@meta = SaveMetadata.new(@wraith, history)
|
17
17
|
end
|
data/lib/wraith/spider.rb
CHANGED
@@ -1,108 +1,52 @@
|
|
1
1
|
require "wraith"
|
2
2
|
require "wraith/helpers/logger"
|
3
|
+
require "yaml"
|
3
4
|
require "anemone"
|
4
|
-
require "nokogiri"
|
5
5
|
require "uri"
|
6
6
|
|
7
|
-
class Wraith::
|
7
|
+
class Wraith::Spider
|
8
8
|
include Logging
|
9
|
+
|
10
|
+
EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
|
11
|
+
gz jar js css dtd xsd ico raw mp3 mp4 m4a \
|
12
|
+
wav wmv ape aac ac3 wma aiff mpg mpeg \
|
13
|
+
avi mov ogg mkv mka asx asf mp2 m1v \
|
14
|
+
m3u f4v pdf doc xls ppt pps bin exe rss xml)
|
15
|
+
|
9
16
|
attr_reader :wraith
|
10
17
|
|
11
18
|
def initialize(config)
|
12
19
|
@wraith = Wraith::Wraith.new(config)
|
20
|
+
@paths = {}
|
13
21
|
end
|
14
22
|
|
15
|
-
def
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
+
def crawl
|
24
|
+
logger.info "Crawling #{wraith.base_domain}"
|
25
|
+
Anemone.crawl(wraith.base_domain) do |anemone|
|
26
|
+
anemone.skip_links_like(/\.(#{EXT.join('|')})$/)
|
27
|
+
# Add user specified skips
|
28
|
+
anemone.skip_links_like(wraith.spider_skips)
|
29
|
+
anemone.on_every_page do |page|
|
30
|
+
logger.info " #{page.url.path}"
|
31
|
+
add_path(page.url.path)
|
23
32
|
end
|
24
|
-
spider.determine_paths
|
25
33
|
end
|
26
|
-
end
|
27
|
-
end
|
28
34
|
|
29
|
-
|
30
|
-
attr_reader :wraith
|
31
|
-
|
32
|
-
def initialize(wraith)
|
33
|
-
@wraith = wraith
|
34
|
-
@paths = {}
|
35
|
-
end
|
36
|
-
|
37
|
-
def determine_paths
|
38
|
-
spider
|
35
|
+
logger.info "Crawl complete."
|
39
36
|
write_file
|
40
37
|
end
|
41
38
|
|
42
|
-
private
|
43
|
-
|
44
|
-
def write_file
|
45
|
-
File.open(wraith.spider_file, "w+") { |file| file.write(@paths) }
|
46
|
-
end
|
47
|
-
|
48
39
|
def add_path(path)
|
49
40
|
@paths[path == "/" ? "home" : path.gsub("/", "__").chomp("__").downcase] = path.downcase
|
50
41
|
end
|
51
42
|
|
52
|
-
def
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
|
60
|
-
gz jar js css dtd xsd ico raw mp3 mp4 \
|
61
|
-
wav wmv ape aac ac3 wma aiff mpg mpeg \
|
62
|
-
avi mov ogg mkv mka asx asf mp2 m1v \
|
63
|
-
m3u f4v pdf doc xls ppt pps bin exe rss xml)
|
64
|
-
|
65
|
-
def spider
|
66
|
-
if File.exist?(wraith.spider_file) && modified_since(wraith.spider_file, wraith.spider_days[0])
|
67
|
-
logger.info "using existing spider file"
|
68
|
-
@paths = eval(File.read(wraith.spider_file))
|
69
|
-
else
|
70
|
-
logger.info "creating new spider file"
|
71
|
-
Anemone.crawl(wraith.base_domain) do |anemone|
|
72
|
-
anemone.skip_links_like(/\.(#{EXT.join('|')})$/)
|
73
|
-
# Add user specified skips
|
74
|
-
anemone.skip_links_like(wraith.spider_skips)
|
75
|
-
anemone.on_every_page { |page| add_path(page.url.path) }
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
def modified_since(file, since)
|
81
|
-
(Time.now - File.ctime(file)) / (24 * 3600) < since
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
class Wraith::Sitemap < Wraith::Spider
|
86
|
-
include Logging
|
87
|
-
|
88
|
-
def spider
|
89
|
-
unless wraith.sitemap.nil?
|
90
|
-
logger.info "reading sitemap.xml from #{wraith.sitemap}"
|
91
|
-
if wraith.sitemap =~ URI.regexp
|
92
|
-
sitemap = Nokogiri::XML(open(wraith.sitemap))
|
93
|
-
else
|
94
|
-
sitemap = Nokogiri::XML(File.open(wraith.sitemap))
|
95
|
-
end
|
96
|
-
sitemap.css("loc").each do |loc|
|
97
|
-
path = loc.content
|
98
|
-
# Allow use of either domain in the sitemap.xml
|
99
|
-
wraith.domains.each do |_k, v|
|
100
|
-
path.sub!(v, "")
|
101
|
-
end
|
102
|
-
if wraith.spider_skips.nil? || wraith.spider_skips.none? { |regex| regex.match(path) }
|
103
|
-
add_path(path)
|
104
|
-
end
|
105
|
-
end
|
43
|
+
def write_file
|
44
|
+
logger.info "Writing to YML file..."
|
45
|
+
config = {}
|
46
|
+
config['paths'] = @paths
|
47
|
+
File.open("#{wraith.config_dir}/#{wraith.imports}", "w+") do |file|
|
48
|
+
file.write(config.to_yaml)
|
49
|
+
logger.info "Spider paths written to #{wraith.imports}"
|
106
50
|
end
|
107
51
|
end
|
108
52
|
end
|
data/lib/wraith/validate.rb
CHANGED
@@ -6,8 +6,8 @@ class Wraith::Validate
|
|
6
6
|
include Logging
|
7
7
|
attr_reader :wraith
|
8
8
|
|
9
|
-
def initialize(config,
|
10
|
-
@wraith = Wraith::Wraith.new(config,
|
9
|
+
def initialize(config, options = {})
|
10
|
+
@wraith = Wraith::Wraith.new(config, options)
|
11
11
|
end
|
12
12
|
|
13
13
|
def validate(mode = false)
|
@@ -37,6 +37,8 @@ class Wraith::Validate
|
|
37
37
|
when "latest"
|
38
38
|
validate_history_mode
|
39
39
|
validate_base_shots_exist
|
40
|
+
when "spider"
|
41
|
+
validate_spider_mode
|
40
42
|
else
|
41
43
|
logger.warn "Wraith doesn't know how to validate mode '#{mode}'. Continuing..."
|
42
44
|
end
|
@@ -56,6 +58,14 @@ class Wraith::Validate
|
|
56
58
|
fail InvalidDomainsError, "History mode requires exactly one domain. #{docs_prompt}" if wraith.domains.length != 1
|
57
59
|
end
|
58
60
|
|
61
|
+
def validate_spider_mode
|
62
|
+
fail MissingRequiredPropertyError, "You must specify an `imports` YML"\
|
63
|
+
" before running `wraith spider`. #{docs_prompt}" unless wraith.imports
|
64
|
+
|
65
|
+
#fail PropertyOutOfContextError, "Tried running `wraith spider` but you have already"\
|
66
|
+
# " specified paths in your YML. #{docs_prompt}" if wraith.paths
|
67
|
+
end
|
68
|
+
|
59
69
|
def validate_base_shots_exist
|
60
70
|
unless File.directory?(wraith.history_dir)
|
61
71
|
logger.error "You need to run `wraith history` at least once before you can run `wraith latest`!"
|
@@ -65,33 +75,4 @@ class Wraith::Validate
|
|
65
75
|
def docs_prompt
|
66
76
|
"See the docs at http://bbc-news.github.io/wraith/"
|
67
77
|
end
|
68
|
-
|
69
|
-
def list_debug_information
|
70
|
-
wraith_version = Wraith::VERSION
|
71
|
-
command_run = ARGV.join ' '
|
72
|
-
ruby_version = run_command_safely("ruby -v") || "Ruby not installed"
|
73
|
-
phantomjs_version = run_command_safely("phantomjs --version") || "PhantomJS not installed"
|
74
|
-
casperjs_version = run_command_safely("casperjs --version") || "CasperJS not installed"
|
75
|
-
imagemagick_version = run_command_safely("convert -version") || "ImageMagick not installed"
|
76
|
-
|
77
|
-
logger.debug "#################################################"
|
78
|
-
logger.debug " Command run: #{command_run}"
|
79
|
-
logger.debug " Wraith version: #{wraith_version}"
|
80
|
-
logger.debug " Ruby version: #{ruby_version}"
|
81
|
-
logger.debug " ImageMagick: #{imagemagick_version}"
|
82
|
-
logger.debug " PhantomJS version: #{phantomjs_version}"
|
83
|
-
logger.debug " CasperJS version: #{casperjs_version}"
|
84
|
-
# @TODO - add a SlimerJS equivalent
|
85
|
-
logger.debug "#################################################"
|
86
|
-
logger.debug ""
|
87
|
-
end
|
88
|
-
|
89
|
-
def run_command_safely(command)
|
90
|
-
begin
|
91
|
-
output = `#{command}`
|
92
|
-
rescue StandardError
|
93
|
-
return false
|
94
|
-
end
|
95
|
-
output.lines.first
|
96
|
-
end
|
97
78
|
end
|
data/lib/wraith/version.rb
CHANGED
data/lib/wraith/wraith.rb
CHANGED
@@ -6,12 +6,30 @@ class Wraith::Wraith
|
|
6
6
|
include Logging
|
7
7
|
attr_accessor :config
|
8
8
|
|
9
|
-
def initialize(config,
|
10
|
-
|
9
|
+
def initialize(config, options = {})
|
10
|
+
options = {
|
11
|
+
yaml_passed: false,
|
12
|
+
imports_must_resolve: true,
|
13
|
+
}.merge(options)
|
14
|
+
|
15
|
+
if options[:yaml_passed]
|
16
|
+
@config = config
|
17
|
+
else
|
18
|
+
filepath = determine_config_path config
|
19
|
+
@config = YAML.load_file filepath
|
20
|
+
if !@config
|
21
|
+
fail InvalidYamlError, "could not parse \"#{config}\" as YAML"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
if @config['imports']
|
26
|
+
@config = apply_imported_config(@config['imports'], @config, options[:imports_must_resolve])
|
27
|
+
end
|
28
|
+
|
11
29
|
logger.level = verbose ? Logger::DEBUG : Logger::INFO
|
12
30
|
end
|
13
31
|
|
14
|
-
def
|
32
|
+
def determine_config_path(config_name)
|
15
33
|
possible_filenames = [
|
16
34
|
config_name,
|
17
35
|
"#{config_name}.yml",
|
@@ -22,13 +40,32 @@ class Wraith::Wraith
|
|
22
40
|
|
23
41
|
possible_filenames.each do |filepath|
|
24
42
|
if File.exist?(filepath)
|
25
|
-
|
26
|
-
return
|
43
|
+
@calculated_config_dir = absolute_path_of_dir(convert_to_absolute filepath)
|
44
|
+
return convert_to_absolute filepath
|
27
45
|
end
|
28
46
|
end
|
47
|
+
|
29
48
|
fail ConfigFileDoesNotExistError, "unable to find config \"#{config_name}\""
|
30
49
|
end
|
31
50
|
|
51
|
+
def config_dir
|
52
|
+
@calculated_config_dir
|
53
|
+
end
|
54
|
+
|
55
|
+
def apply_imported_config(config_to_import, config, imports_must_resolve)
|
56
|
+
path_to_config = "#{config_dir}/#{config_to_import}"
|
57
|
+
if File.exist?(path_to_config)
|
58
|
+
yaml = YAML.load_file path_to_config
|
59
|
+
return yaml.merge(config)
|
60
|
+
end
|
61
|
+
|
62
|
+
if imports_must_resolve
|
63
|
+
fail ConfigFileDoesNotExistError, "unable to find referenced imported config \"#{config_to_import}\""
|
64
|
+
else
|
65
|
+
config # return original config
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
32
69
|
def directory
|
33
70
|
# Legacy support for those using array configs
|
34
71
|
@config["directory"].is_a?(Array) ? @config["directory"].first : @config["directory"]
|
@@ -166,6 +203,10 @@ class Wraith::Wraith
|
|
166
203
|
@config["phantomjs_options"]
|
167
204
|
end
|
168
205
|
|
206
|
+
def imports
|
207
|
+
@config['imports'] || false
|
208
|
+
end
|
209
|
+
|
169
210
|
def verbose
|
170
211
|
# @TODO - also add a `--verbose` CLI flag which overrides whatever you have set in the config
|
171
212
|
@config["verbose"] || false
|
data/spec/before_capture_spec.rb
CHANGED
@@ -28,7 +28,7 @@ describe Wraith do
|
|
28
28
|
browser: casperjs
|
29
29
|
before_capture: javascript/do_something.js
|
30
30
|
'
|
31
|
-
wraith = Wraith::Wraith.new(config, true)
|
31
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
32
32
|
# not sure about having code IN the test, but we want to get this right.
|
33
33
|
expect(wraith.before_capture).to eq(Dir.pwd + "/javascript/do_something.js")
|
34
34
|
end
|
@@ -38,7 +38,7 @@ describe Wraith do
|
|
38
38
|
browser: casperjs
|
39
39
|
before_capture: /Users/some_user/wraith/javascript/do_something.js
|
40
40
|
'
|
41
|
-
wraith = Wraith::Wraith.new(config, true)
|
41
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
42
42
|
expect(wraith.before_capture).to eq("/Users/some_user/wraith/javascript/do_something.js")
|
43
43
|
end
|
44
44
|
end
|
data/spec/config_spec.rb
CHANGED
@@ -16,6 +16,16 @@ describe "wraith config" do
|
|
16
16
|
it "contains shot options" do
|
17
17
|
expect(wraith.config).to include "directory" => "shots"
|
18
18
|
end
|
19
|
+
|
20
|
+
it "should be able to import other configs" do
|
21
|
+
config_name = get_path_relative_to __FILE__, "./configs/test_config--imports.yaml"
|
22
|
+
wraith = Wraith::Wraith.new(config_name)
|
23
|
+
|
24
|
+
# retain the imported config settings
|
25
|
+
expect(wraith.paths).to eq("home" => "/", "uk_index" => "/uk")
|
26
|
+
# ...but override the imported config in places
|
27
|
+
expect(wraith.widths).to eq [1337]
|
28
|
+
end
|
19
29
|
end
|
20
30
|
|
21
31
|
describe "When creating a wraith worker" do
|
@@ -71,7 +81,7 @@ describe "wraith config" do
|
|
71
81
|
describe "different ways of initialising browser engine" do
|
72
82
|
it "should let us directly specify the engine" do
|
73
83
|
config = YAML.load "browser: phantomjs"
|
74
|
-
wraith = Wraith::Wraith.new(config, true)
|
84
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
75
85
|
|
76
86
|
expect(wraith.engine).to eq "phantomjs"
|
77
87
|
end
|
@@ -81,7 +91,7 @@ describe "wraith config" do
|
|
81
91
|
browser:
|
82
92
|
phantomjs: "casperjs"
|
83
93
|
'
|
84
|
-
wraith = Wraith::Wraith.new(config, true)
|
94
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
85
95
|
expect(wraith.engine).to eq "casperjs"
|
86
96
|
end
|
87
97
|
end
|
@@ -89,11 +99,11 @@ describe "wraith config" do
|
|
89
99
|
describe "different ways of determining the snap file" do
|
90
100
|
it "should calculate the snap file from the engine" do
|
91
101
|
config = YAML.load "browser: phantomjs"
|
92
|
-
wraith = Wraith::Wraith.new(config, true)
|
102
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
93
103
|
expect(wraith.snap_file).to include "lib/wraith/javascript/phantom.js"
|
94
104
|
|
95
105
|
config = YAML.load "browser: casperjs"
|
96
|
-
wraith = Wraith::Wraith.new(config, true)
|
106
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
97
107
|
expect(wraith.snap_file).to include "lib/wraith/javascript/casper.js"
|
98
108
|
end
|
99
109
|
|
@@ -102,7 +112,7 @@ describe "wraith config" do
|
|
102
112
|
browser:
|
103
113
|
phantomjs: "casperjs"
|
104
114
|
'
|
105
|
-
wraith = Wraith::Wraith.new(config, true)
|
115
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
106
116
|
expect(wraith.snap_file).to include "lib/wraith/javascript/casper.js"
|
107
117
|
end
|
108
118
|
|
@@ -111,7 +121,7 @@ describe "wraith config" do
|
|
111
121
|
browser: casperjs
|
112
122
|
snap_file: path/to/snap.js
|
113
123
|
'
|
114
|
-
wraith = Wraith::Wraith.new(config, true)
|
124
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
115
125
|
# not sure about having code IN the test, but we want to get this right.
|
116
126
|
expect(wraith.snap_file).to eq(Dir.pwd + "/path/to/snap.js")
|
117
127
|
end
|
@@ -121,7 +131,7 @@ describe "wraith config" do
|
|
121
131
|
browser: casperjs
|
122
132
|
snap_file: /Users/my_username/Sites/bbc/wraith/path/to/snap.js
|
123
133
|
'
|
124
|
-
wraith = Wraith::Wraith.new(config, true)
|
134
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
125
135
|
expect(wraith.snap_file).to eq("/Users/my_username/Sites/bbc/wraith/path/to/snap.js")
|
126
136
|
end
|
127
137
|
end
|
@@ -129,13 +139,13 @@ describe "wraith config" do
|
|
129
139
|
describe "different modes of efficiency (resize or reload)" do
|
130
140
|
it "should trigger efficient mode if resize was specified" do
|
131
141
|
config = YAML.load 'resize_or_reload: "resize"'
|
132
|
-
wraith = Wraith::Wraith.new(config, true)
|
142
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
133
143
|
expect(wraith.resize)
|
134
144
|
end
|
135
145
|
|
136
146
|
it "should fall back to slow mode if reload was specified" do
|
137
147
|
config = YAML.load 'resize_or_reload: "reload"'
|
138
|
-
wraith = Wraith::Wraith.new(config, true)
|
148
|
+
wraith = Wraith::Wraith.new(config, { yaml_passed: true })
|
139
149
|
expect(wraith.resize).to eq false
|
140
150
|
end
|
141
151
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
imports: "spider_paths.yaml"
|
2
|
+
|
3
|
+
#Headless browser option
|
4
|
+
browser: "phantomjs"
|
5
|
+
|
6
|
+
# Type the name of the directory that shots will be stored in
|
7
|
+
directory: "shots"
|
8
|
+
|
9
|
+
# Add only 2 domains, key will act as a label
|
10
|
+
domains:
|
11
|
+
personal: "http://ashton.codes"
|
12
|
+
business: "http://webdapper.com"
|
13
|
+
|
14
|
+
#Type screen widths below, here are a couple of examples
|
15
|
+
screen_widths:
|
16
|
+
- 320
|
17
|
+
- 600
|
18
|
+
- 768x1500 # you can also specify the height, as we've done here
|
19
|
+
- 1024
|
20
|
+
- 1280
|
21
|
+
|
22
|
+
#Amount of fuzz ImageMagick will use
|
23
|
+
fuzz: "20%"
|
24
|
+
|
25
|
+
#A list of URLs to skip when spidering. Ruby regular expressions can be
|
26
|
+
#used, if prefixed with !ruby/regexp as defined in the YAML Cookbook
|
27
|
+
#http://www.yaml.org/YAML_for_ruby.html#regexps
|
28
|
+
# spider_skips:
|
29
|
+
# - /foo/bar.html # Matches /foo/bar.html explcitly
|
30
|
+
# - !ruby/regexp /^\/baz\// # Matches any URLs that start with /baz
|
31
|
+
|
32
|
+
#Choose how results are displayed, by default alphanumeric. Different screen widths are always grouped.
|
33
|
+
#alphanumeric - all paths (with, and without, a difference) are shown, sorted by path
|
34
|
+
#diffs_first - all paths (with, and without, a difference) are shown, sorted by difference size (largest first)
|
35
|
+
#diffs_only - only paths with a difference are shown, sorted by difference size (largest first)
|
36
|
+
mode: diffs_first
|
37
|
+
|
38
|
+
threshold: 15
|
data/spec/validate_spec.rb
CHANGED
@@ -14,17 +14,17 @@ describe "Wraith config validator" do
|
|
14
14
|
|
15
15
|
describe "universal, basic validation for all modes" do
|
16
16
|
it "should validate a basic config" do
|
17
|
-
Wraith::Validate.new(config, true).validate
|
17
|
+
Wraith::Validate.new(config, { yaml_passed: true }).validate
|
18
18
|
end
|
19
19
|
|
20
20
|
it "should complain if the `domains` property is missing" do
|
21
21
|
config["domains"] = nil
|
22
|
-
expect { Wraith::Validate.new(config, true).validate }.to raise_error MissingRequiredPropertyError
|
22
|
+
expect { Wraith::Validate.new(config, { yaml_passed: true }).validate }.to raise_error MissingRequiredPropertyError
|
23
23
|
end
|
24
24
|
|
25
25
|
it "should complain if the `browser` property is missing" do
|
26
26
|
config["browser"] = nil
|
27
|
-
expect { Wraith::Validate.new(config, true).validate }.to raise_error MissingRequiredPropertyError
|
27
|
+
expect { Wraith::Validate.new(config, { yaml_passed: true }).validate }.to raise_error MissingRequiredPropertyError
|
28
28
|
end
|
29
29
|
|
30
30
|
it "should complain if the config file doesn't exist" do
|
@@ -34,7 +34,7 @@ describe "Wraith config validator" do
|
|
34
34
|
|
35
35
|
describe "validation specific to capture mode" do
|
36
36
|
it "should complain if fewer than two domains are specified" do
|
37
|
-
expect { Wraith::Validate.new(config, true).validate("capture") }.to raise_error InvalidDomainsError
|
37
|
+
expect { Wraith::Validate.new(config, { yaml_passed: true }).validate("capture") }.to raise_error InvalidDomainsError
|
38
38
|
end
|
39
39
|
|
40
40
|
it "should complain if more than two domains are specified" do
|
@@ -43,7 +43,7 @@ describe "Wraith config validator" do
|
|
43
43
|
stage: http://something-else.bbc.com
|
44
44
|
live: http://www.bbc.com
|
45
45
|
')
|
46
|
-
expect { Wraith::Validate.new(config, true).validate("capture") }.to raise_error InvalidDomainsError
|
46
|
+
expect { Wraith::Validate.new(config, { yaml_passed: true }).validate("capture") }.to raise_error InvalidDomainsError
|
47
47
|
end
|
48
48
|
|
49
49
|
it "should be happy if exactly two domains are specified" do
|
@@ -51,16 +51,16 @@ describe "Wraith config validator" do
|
|
51
51
|
test: http://something.bbc.com
|
52
52
|
live: http://www.bbc.com
|
53
53
|
')
|
54
|
-
Wraith::Validate.new(config, true).validate("capture")
|
54
|
+
Wraith::Validate.new(config, { yaml_passed: true }).validate("capture")
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
it "should fail if no directory is specified" do
|
58
58
|
config["domains"] = YAML.load('
|
59
59
|
test: http://something.bbc.com
|
60
60
|
live: http://www.bbc.com
|
61
61
|
')
|
62
62
|
config["directory"] = nil
|
63
|
-
expect { Wraith::Validate.new(config, true).validate("capture") }.to raise_error MissingRequiredPropertyError
|
63
|
+
expect { Wraith::Validate.new(config, { yaml_passed: true }).validate("capture") }.to raise_error MissingRequiredPropertyError
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
@@ -76,16 +76,45 @@ describe "Wraith config validator" do
|
|
76
76
|
test: http://something.bbc.com
|
77
77
|
live: http://www.bbc.com
|
78
78
|
')
|
79
|
-
expect { Wraith::Validate.new(history_conf, true).validate("history") }.to raise_error InvalidDomainsError
|
79
|
+
expect { Wraith::Validate.new(history_conf, { yaml_passed: true }).validate("history") }.to raise_error InvalidDomainsError
|
80
80
|
end
|
81
81
|
|
82
82
|
it "should complain if no history_dir is specified" do
|
83
83
|
history_conf["history_dir"] = nil
|
84
|
-
expect { Wraith::Validate.new(history_conf, true).validate("history") }.to raise_error MissingRequiredPropertyError
|
84
|
+
expect { Wraith::Validate.new(history_conf, { yaml_passed: true }).validate("history") }.to raise_error MissingRequiredPropertyError
|
85
85
|
end
|
86
86
|
|
87
87
|
it "should be happy if a history_dir and one domain is specified" do
|
88
|
-
Wraith::Validate.new(history_conf, true).validate("history")
|
88
|
+
Wraith::Validate.new(history_conf, { yaml_passed: true }).validate("history")
|
89
89
|
end
|
90
90
|
end
|
91
|
+
|
92
|
+
describe "validations specific to spider mode" do
|
93
|
+
let(:spider_conf) do
|
94
|
+
YAML.load('
|
95
|
+
domains:
|
96
|
+
test: http://www.bbc.com
|
97
|
+
|
98
|
+
browser: "casperjs"
|
99
|
+
|
100
|
+
directory: some/dir
|
101
|
+
|
102
|
+
imports: "spider_paths.yml"
|
103
|
+
')
|
104
|
+
end
|
105
|
+
|
106
|
+
it "should complain if imports is empty" do
|
107
|
+
spider_conf.delete 'imports'
|
108
|
+
expect { Wraith::Validate.new(spider_conf, { yaml_passed: true, imports_must_resolve: false }).validate("spider") }.to raise_error MissingRequiredPropertyError
|
109
|
+
end
|
110
|
+
|
111
|
+
# @TODO - would be good to get this passing. Right now we get a false positive if you've run `wraith spider` once already - thereby 'paths' being set, and this error being raised.
|
112
|
+
# it "should complain if paths is set" do
|
113
|
+
# spider_conf.merge!(YAML.load('
|
114
|
+
# paths:
|
115
|
+
# home: /
|
116
|
+
# '))
|
117
|
+
# expect { Wraith::Validate.new(spider_conf, { yaml_passed: true, imports_must_resolve: false }).validate("spider") }.to raise_error PropertyOutOfContextError
|
118
|
+
# end
|
119
|
+
end
|
91
120
|
end
|
data/wraith.gemspec
CHANGED
@@ -26,7 +26,6 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_runtime_dependency 'image_size'
|
27
27
|
spec.add_runtime_dependency 'anemone'
|
28
28
|
spec.add_runtime_dependency 'robotex'
|
29
|
-
spec.add_runtime_dependency 'nokogiri', '~> 1.6.7'
|
30
29
|
spec.add_runtime_dependency 'log4r'
|
31
30
|
spec.add_runtime_dependency 'thor'
|
32
31
|
spec.add_runtime_dependency 'parallel'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wraith
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dave Blooman
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-
|
13
|
+
date: 2016-11-25 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: pry
|
@@ -110,20 +110,6 @@ dependencies:
|
|
110
110
|
- - '>='
|
111
111
|
- !ruby/object:Gem::Version
|
112
112
|
version: '0'
|
113
|
-
- !ruby/object:Gem::Dependency
|
114
|
-
name: nokogiri
|
115
|
-
requirement: !ruby/object:Gem::Requirement
|
116
|
-
requirements:
|
117
|
-
- - ~>
|
118
|
-
- !ruby/object:Gem::Version
|
119
|
-
version: 1.6.7
|
120
|
-
type: :runtime
|
121
|
-
prerelease: false
|
122
|
-
version_requirements: !ruby/object:Gem::Requirement
|
123
|
-
requirements:
|
124
|
-
- - ~>
|
125
|
-
- !ruby/object:Gem::Version
|
126
|
-
version: 1.6.7
|
127
113
|
- !ruby/object:Gem::Dependency
|
128
114
|
name: log4r
|
129
115
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,7 +206,9 @@ files:
|
|
220
206
|
- spec/before_capture_spec.rb
|
221
207
|
- spec/config_spec.rb
|
222
208
|
- spec/configs/test_config--casper.yaml
|
209
|
+
- spec/configs/test_config--imports.yaml
|
223
210
|
- spec/configs/test_config--phantom.yaml
|
211
|
+
- spec/configs/test_config--spider.yaml
|
224
212
|
- spec/construct_command_spec.rb
|
225
213
|
- spec/gallery_spec.rb
|
226
214
|
- spec/helper_spec.rb
|
@@ -277,7 +265,9 @@ test_files:
|
|
277
265
|
- spec/before_capture_spec.rb
|
278
266
|
- spec/config_spec.rb
|
279
267
|
- spec/configs/test_config--casper.yaml
|
268
|
+
- spec/configs/test_config--imports.yaml
|
280
269
|
- spec/configs/test_config--phantom.yaml
|
270
|
+
- spec/configs/test_config--spider.yaml
|
281
271
|
- spec/construct_command_spec.rb
|
282
272
|
- spec/gallery_spec.rb
|
283
273
|
- spec/helper_spec.rb
|