wraith 3.0.4 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -0
  3. data/Gemfile +1 -1
  4. data/lib/wraith/cli.rb +108 -73
  5. data/lib/wraith/compare_images.rb +4 -2
  6. data/lib/wraith/crop.rb +3 -1
  7. data/lib/wraith/folder.rb +6 -4
  8. data/lib/wraith/gallery.rb +21 -22
  9. data/lib/wraith/helpers/capture_options.rb +52 -0
  10. data/lib/wraith/helpers/custom_exceptions.rb +8 -0
  11. data/lib/wraith/helpers/logger.rb +20 -0
  12. data/lib/wraith/helpers/save_metadata.rb +31 -0
  13. data/lib/wraith/{utilities.rb → helpers/utilities.rb} +5 -4
  14. data/lib/wraith/javascript/_helper.js +0 -2
  15. data/lib/wraith/javascript/casper.js +26 -16
  16. data/lib/wraith/javascript/phantom.js +148 -5
  17. data/lib/wraith/save_images.rb +28 -104
  18. data/lib/wraith/spider.rb +12 -5
  19. data/lib/wraith/thumbnails.rb +0 -2
  20. data/lib/wraith/validate.rb +98 -0
  21. data/lib/wraith/version.rb +1 -1
  22. data/lib/wraith/wraith.rb +19 -12
  23. data/spec/_helpers.rb +4 -10
  24. data/spec/before_capture_spec.rb +19 -10
  25. data/spec/config_spec.rb +2 -9
  26. data/spec/configs/test_config--casper.yaml +1 -5
  27. data/spec/construct_command_spec.rb +43 -0
  28. data/spec/gallery_spec.rb +1 -2
  29. data/spec/js/custom_snap_file.js +26 -16
  30. data/spec/js/global.js +2 -1
  31. data/spec/js/path.js +2 -1
  32. data/spec/resize_reload_spec.rb +4 -8
  33. data/spec/save_images_spec.rb +3 -2
  34. data/spec/validate_spec.rb +76 -0
  35. data/templates/configs/capture.yaml +61 -0
  36. data/templates/configs/history.yaml +81 -0
  37. data/templates/configs/spider.yaml +13 -2
  38. data/templates/javascript/cookies_and_headers--casper.js +16 -0
  39. data/templates/javascript/cookies_and_headers--phantom.js +26 -0
  40. data/templates/javascript/disable_javascript--casper.js +11 -0
  41. data/templates/javascript/disable_javascript--phantom.js +13 -0
  42. data/templates/javascript/interact--casper.js +11 -0
  43. data/templates/javascript/interact--phantom.js +17 -0
  44. data/templates/javascript/wait--casper.js +8 -0
  45. data/templates/javascript/wait--phantom.js +8 -0
  46. data/wraith.gemspec +1 -1
  47. metadata +27 -14
  48. data/lib/wraith/javascript/_phantom__common.js +0 -120
  49. data/lib/wraith/javascript/phantom--nojs.js +0 -6
  50. data/templates/configs/component.yaml +0 -60
  51. data/templates/configs/multiple_domains.yaml +0 -53
  52. data/templates/javascript/beforeCapture--casper_example.js +0 -12
  53. data/templates/javascript/beforeCapture--phantom_example.js +0 -36
data/lib/wraith/spider.rb CHANGED
@@ -1,9 +1,12 @@
1
1
  require "wraith"
2
+ require "wraith/helpers/logger"
2
3
  require "anemone"
3
4
  require "nokogiri"
4
5
  require "uri"
5
6
 
6
7
  class Wraith::Spidering
8
+ include Logging
9
+
7
10
  def initialize(config)
8
11
  @wraith = Wraith::Wraith.new(config)
9
12
  end
@@ -11,10 +14,10 @@ class Wraith::Spidering
11
14
  def check_for_paths
12
15
  if @wraith.paths.nil?
13
16
  unless @wraith.sitemap.nil?
14
- puts "no paths defined in config, loading paths from sitemap"
17
+ logger.info "no paths defined in config, loading paths from sitemap"
15
18
  spider = Wraith::Sitemap.new(@wraith)
16
19
  else
17
- puts "no paths defined in config, crawling from site root"
20
+ logger.info "no paths defined in config, crawling from site root"
18
21
  spider = Wraith::Crawler.new(@wraith)
19
22
  end
20
23
  spider.determine_paths
@@ -48,6 +51,8 @@ class Wraith::Spider
48
51
  end
49
52
 
50
53
  class Wraith::Crawler < Wraith::Spider
54
+ include Logging
55
+
51
56
  EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
52
57
  gz jar js css dtd xsd ico raw mp3 mp4 \
53
58
  wav wmv ape aac ac3 wma aiff mpg mpeg \
@@ -56,10 +61,10 @@ class Wraith::Crawler < Wraith::Spider
56
61
 
57
62
  def spider
58
63
  if File.exist?(@wraith.spider_file) && modified_since(@wraith.spider_file, @wraith.spider_days[0])
59
- puts "using existing spider file"
64
+ logger.info "using existing spider file"
60
65
  @paths = eval(File.read(@wraith.spider_file))
61
66
  else
62
- puts "creating new spider file"
67
+ logger.info "creating new spider file"
63
68
  spider_list = []
64
69
  Anemone.crawl(@wraith.base_domain) do |anemone|
65
70
  anemone.skip_links_like(/\.(#{EXT.join('|')})$/)
@@ -76,9 +81,11 @@ class Wraith::Crawler < Wraith::Spider
76
81
  end
77
82
 
78
83
  class Wraith::Sitemap < Wraith::Spider
84
+ include Logging
85
+
79
86
  def spider
80
87
  unless @wraith.sitemap.nil?
81
- puts "reading sitemap.xml from #{@wraith.sitemap}"
88
+ logger.info "reading sitemap.xml from #{@wraith.sitemap}"
82
89
  if @wraith.sitemap =~ URI.regexp
83
90
  sitemap = Nokogiri::XML(open(@wraith.sitemap))
84
91
  else
@@ -11,8 +11,6 @@ class Wraith::Thumbnails
11
11
  end
12
12
 
13
13
  def generate_thumbnails
14
- puts "Generating thumbnails"
15
-
16
14
  files = Dir.glob("#{wraith.directory}/*/*.png")
17
15
 
18
16
  Parallel.each(files, :in_processes => Parallel.processor_count) do |filename|
@@ -0,0 +1,98 @@
1
+ require "wraith/wraith"
2
+ require "wraith/helpers/logger"
3
+ require "wraith/helpers/utilities"
4
+
5
+ class Wraith::Validate
6
+ include Logging
7
+
8
+ def initialize(config, yaml_passed = false)
9
+ @wraith = Wraith::Wraith.new(config, yaml_passed)
10
+ end
11
+
12
+ def validate(mode = false)
13
+ list_debug_information if @wraith.verbose
14
+ validate_basic_properties
15
+ validate_mode_properties(mode) if mode
16
+ # if we get this far, we've only had warnings at worst, not errors.
17
+ "Config validated. No serious issues found."
18
+ end
19
+
20
+ def validate_basic_properties
21
+ if @wraith.engine.nil?
22
+ raise MissingRequiredPropertyError, "You must specify a browser engine! #{docs_prompt}"
23
+ end
24
+ unless @wraith.domains
25
+ raise MissingRequiredPropertyError, "You must specify at least one domain for Wraith to do anything! #{docs_prompt}"
26
+ end
27
+ #@TODO validate fuzz is not nil, etc
28
+ end
29
+
30
+ def validate_mode_properties(mode)
31
+ case mode
32
+ when "capture"
33
+ validate_capture_mode
34
+ when "history"
35
+ validate_history_mode
36
+ when "latest"
37
+ validate_history_mode
38
+ validate_base_shots_exist
39
+ else
40
+ logger.warn "Wraith doesn't know how to validate mode '#{mode}'. Continuing..."
41
+ end
42
+ end
43
+
44
+ def validate_capture_mode
45
+ if @wraith.domains.length != 2
46
+ raise InvalidDomainsError, "`wraith capture` requires exactly two domains. #{docs_prompt}"
47
+ end
48
+ if @wraith.history_dir
49
+ logger.warn "You have specified a `history_dir` in your config, but this is used in `history` mode, NOT `capture` mode. #{docs_prompt}"
50
+ end
51
+ end
52
+
53
+ def validate_history_mode
54
+ unless @wraith.history_dir
55
+ raise MissingRequiredPropertyError, "You must specify a `history_dir` to run Wraith in history mode. #{docs_prompt}"
56
+ end
57
+ if @wraith.domains.length != 1
58
+ raise InvalidDomainsError, "History mode requires exactly one domain. #{docs_prompt}"
59
+ end
60
+ end
61
+
62
+ def validate_base_shots_exist
63
+ unless File.directory?(@wraith.history_dir)
64
+ logger.error "You need to run `wraith history` at least once before you can run `wraith latest`!"
65
+ end
66
+ end
67
+
68
+ def docs_prompt
69
+ "See the docs at http://bbc-news.github.io/wraith/"
70
+ end
71
+
72
+ def list_debug_information
73
+ wraith_version = Wraith::VERSION
74
+ ruby_version = run_command_safely('ruby -v') || 'Ruby not installed'
75
+ phantomjs_version = run_command_safely('phantomjs --version') || 'PhantomJS not installed'
76
+ casperjs_version = run_command_safely('casperjs --version') || 'CasperJS not installed'
77
+ imagemagick_version = run_command_safely('convert -version') || 'ImageMagick not installed'
78
+
79
+ logger.debug "#################################################"
80
+ logger.debug " Wraith version: #{wraith_version}"
81
+ logger.debug " Ruby version: #{ruby_version}"
82
+ logger.debug " ImageMagick: #{imagemagick_version}"
83
+ logger.debug " PhantomJS version: #{phantomjs_version}"
84
+ logger.debug " CasperJS version: #{casperjs_version}"
85
+ # @TODO - add a SlimerJS equivalent
86
+ logger.debug "#################################################"
87
+ logger.debug ""
88
+ end
89
+
90
+ def run_command_safely(command)
91
+ begin
92
+ output = `#{command}`
93
+ rescue Exception => e
94
+ return false
95
+ end
96
+ output.lines.first
97
+ end
98
+ end
@@ -1,3 +1,3 @@
1
1
  module Wraith
2
- VERSION = "3.0.4"
2
+ VERSION = "3.1.0"
3
3
  end
data/lib/wraith/wraith.rb CHANGED
@@ -1,14 +1,18 @@
1
1
  require "yaml"
2
- require "wraith/utilities"
2
+ require "wraith/helpers/logger"
3
+ require "wraith/helpers/utilities"
3
4
 
4
5
  class Wraith::Wraith
6
+ include Logging
5
7
  attr_accessor :config
6
8
 
7
9
  def initialize(config, yaml_passed = false)
8
- @config = yaml_passed ? config : open_config_file(config)
9
- rescue
10
- puts "unable to find config at #{config}"
11
- exit 1
10
+ begin
11
+ @config = yaml_passed ? config : open_config_file(config)
12
+ logger.level = verbose ? Logger::DEBUG : Logger::INFO
13
+ rescue
14
+ logger.error "unable to find config at #{config}"
15
+ end
12
16
  end
13
17
 
14
18
  def open_config_file(config_name)
@@ -26,15 +30,13 @@ class Wraith::Wraith
26
30
  end
27
31
 
28
32
  def history_dir
29
- @config["history_dir"]
33
+ @config["history_dir"] || false
30
34
  end
31
35
 
32
36
  def engine
33
37
  engine = @config["browser"]
34
38
  # Legacy support for those using the old style "browser: \n phantomjs: 'casperjs'" configs
35
- if engine.is_a? Hash
36
- engine = engine.values.first
37
- end
39
+ engine = engine.values.first if engine.is_a? Hash
38
40
  engine
39
41
  end
40
42
 
@@ -51,12 +53,12 @@ class Wraith::Wraith
51
53
  path_to_js_templates + "/casper.js"
52
54
  # @TODO - add a SlimerJS option
53
55
  else
54
- abort "Wraith does not recognise the browser engine '#{engine}'"
56
+ logger.error "Wraith does not recognise the browser engine '#{engine}'"
55
57
  end
56
58
  end
57
59
 
58
60
  def before_capture
59
- @config["before_capture"] ? convert_to_absolute(@config["before_capture"]) : "false"
61
+ @config["before_capture"] ? convert_to_absolute(@config["before_capture"]) : false
60
62
  end
61
63
 
62
64
  def widths
@@ -158,4 +160,9 @@ class Wraith::Wraith
158
160
  def phantomjs_options
159
161
  @config["phantomjs_options"]
160
162
  end
161
- end
163
+
164
+ def verbose
165
+ # @TODO - also add a `--verbose` CLI flag which overrides whatever you have set in the config
166
+ @config['verbose'] || false
167
+ end
168
+ end
data/spec/_helpers.rb CHANGED
@@ -2,8 +2,10 @@ require "rspec"
2
2
  require "./lib/wraith/cli"
3
3
 
4
4
  def create_diff_image
5
- saving.capture_page_image(wraith.engine, test_url1, 320, test_image1, selector, 'false', 'false')
6
- saving.capture_page_image(wraith.engine, test_url2, 320, test_image2, selector, 'false', 'false')
5
+ capture_image = saving.construct_command(320, test_url1, test_image1, selector, false, false)
6
+ `#{capture_image}`
7
+ capture_image = saving.construct_command(320, test_url2, test_image2, selector, false, false)
8
+ `#{capture_image}`
7
9
  end
8
10
 
9
11
  def crop_images
@@ -14,14 +16,6 @@ def compare_images
14
16
  Wraith::CompareImages.new(config_name).compare_task(test_image1, test_image2, diff_image, data_txt)
15
17
  end
16
18
 
17
- def run_js_then_capture(config)
18
- generated_image = 'shots/test/temporary_jsified_image.png'
19
- saving.capture_page_image(config[:engine], test_url1, 320, generated_image, selector, config[:global_js], config[:path_js])
20
- Wraith::CompareImages.new(config_name).compare_task(generated_image, config[:output_should_look_like], diff_image, data_txt)
21
- diff = File.open('shots/test/test.txt', "rb").read
22
- expect(diff).to eq '0.0'
23
- end
24
-
25
19
  def get_path_relative_to(current_file, file_to_find)
26
20
  File.expand_path(File.join(File.dirname(current_file), file_to_find))
27
21
  end
@@ -1,16 +1,27 @@
1
1
  require "_helpers"
2
2
 
3
+ def run_js_then_capture(config)
4
+ saving = Wraith::SaveImages.new(config_name)
5
+ generated_image = 'shots/test/temporary_jsified_image.png'
6
+ capture_image = saving.construct_command(320, "http://www.bbc.com/afrique", generated_image, selector, config[:global_js], config[:path_js])
7
+ `#{capture_image}`
8
+ Wraith::CompareImages.new(config_name).compare_task(generated_image, config[:output_should_look_like], "shots/test/test_diff.png", "shots/test/test.txt")
9
+ diff = File.open('shots/test/test.txt', "rb").read
10
+ expect(diff).to eq '0.0'
11
+ end
12
+
3
13
  describe Wraith do
4
14
  let(:config_name) { get_path_relative_to __FILE__, "./configs/test_config--casper.yaml" }
5
- let(:test_url1) { "http://www.bbc.com/afrique" }
6
- let(:diff_image) { "shots/test/test_diff.png" }
7
- let(:data_txt) { "shots/test/test.txt" }
8
- let(:saving) { Wraith::SaveImages.new(config_name) }
9
15
  let(:wraith) { Wraith::Wraith.new(config_name) }
10
16
  let(:selector) { "body" }
11
17
  let(:before_suite_js) { "spec/js/global.js" }
12
18
  let(:before_capture_js) { "spec/js/path.js" }
13
19
 
20
+ before(:each) do
21
+ Wraith::FolderManager.new(config_name).clear_shots_folder
22
+ Dir.mkdir("shots/test")
23
+ end
24
+
14
25
  describe "different ways of determining the before_capture file" do
15
26
  it "should allow users to specify the relative path to the before_capture file" do
16
27
  config = YAML.load '
@@ -19,7 +30,7 @@ describe Wraith do
19
30
  '
20
31
  wraith = Wraith::Wraith.new(config, true)
21
32
  # not sure about having code IN the test, but we want to get this right.
22
- expect(wraith.before_capture).to eq (`pwd`.chomp! + '/javascript/do_something.js')
33
+ expect(wraith.before_capture).to eq (Dir.pwd + '/javascript/do_something.js')
23
34
  end
24
35
 
25
36
  it "should allow users to specify the absolute path to the before_capture file" do
@@ -35,11 +46,10 @@ describe Wraith do
35
46
  # @TODO - we need tests determining the path to "path-level before_capture hooks"
36
47
 
37
48
  describe "When hooking into beforeCapture (CasperJS)" do
38
-
39
49
  it "Executes the global JS before capturing" do
40
50
  run_js_then_capture(
41
51
  global_js: before_suite_js,
42
- path_js: 'false',
52
+ path_js: false,
43
53
  output_should_look_like: 'spec/base/global.png',
44
54
  engine: 'casperjs'
45
55
  )
@@ -47,7 +57,7 @@ describe Wraith do
47
57
 
48
58
  it "Executes the path-level JS before capturing" do
49
59
  run_js_then_capture(
50
- global_js: 'false',
60
+ global_js: false,
51
61
  path_js: before_capture_js,
52
62
  output_should_look_like: 'spec/base/path.png',
53
63
  engine: 'casperjs'
@@ -100,5 +110,4 @@ describe Wraith do
100
110
  # )
101
111
  # end
102
112
  # end
103
-
104
- end
113
+ end
data/spec/config_spec.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require "_helpers"
2
2
 
3
3
  describe "wraith config" do
4
-
5
4
  let(:config_name) { get_path_relative_to __FILE__, "./configs/test_config--phantom.yaml" }
6
5
  let(:wraith) { Wraith::Wraith.new(config_name) }
7
6
 
@@ -20,7 +19,6 @@ describe "wraith config" do
20
19
  end
21
20
 
22
21
  describe "When creating a wraith worker" do
23
-
24
22
  it "should have a browser engine defined" do
25
23
  expect(wraith.engine).to be_a String
26
24
  end
@@ -71,7 +69,6 @@ describe "wraith config" do
71
69
  end
72
70
 
73
71
  describe "different ways of initialising browser engine" do
74
-
75
72
  it "should let us directly specify the engine" do
76
73
  config = YAML.load 'browser: phantomjs'
77
74
  wraith = Wraith::Wraith.new(config, true)
@@ -90,7 +87,6 @@ describe "wraith config" do
90
87
  end
91
88
 
92
89
  describe "different ways of determining the snap file" do
93
-
94
90
  it "should calculate the snap file from the engine" do
95
91
  config = YAML.load 'browser: phantomjs'
96
92
  wraith = Wraith::Wraith.new(config, true)
@@ -117,7 +113,7 @@ describe "wraith config" do
117
113
  '
118
114
  wraith = Wraith::Wraith.new(config, true)
119
115
  # not sure about having code IN the test, but we want to get this right.
120
- expect(wraith.snap_file).to eq (`pwd`.chomp! + '/path/to/snap.js')
116
+ expect(wraith.snap_file).to eq (Dir.pwd + '/path/to/snap.js')
121
117
  end
122
118
 
123
119
  it "should allow users to specify the absolute path to their own snap file" do
@@ -131,7 +127,6 @@ describe "wraith config" do
131
127
  end
132
128
 
133
129
  describe "different modes of efficiency (resize or reload)" do
134
-
135
130
  it "should trigger efficient mode if resize was specified" do
136
131
  config = YAML.load 'resize_or_reload: "resize"'
137
132
  wraith = Wraith::Wraith.new(config, true)
@@ -143,7 +138,5 @@ describe "wraith config" do
143
138
  wraith = Wraith::Wraith.new(config, true)
144
139
  expect(wraith.resize).to eq false
145
140
  end
146
-
147
141
  end
148
-
149
- end
142
+ end
@@ -4,11 +4,7 @@
4
4
  ##########
5
5
 
6
6
  #Headless browser option
7
- browser:
8
- phantomjs: "casperjs"
9
- # slimerjs: "slimerjs"
10
-
11
- #overriding the snap file
7
+ browser: "casperjs"
12
8
  snap_file: "spec/js/custom_snap_file.js"
13
9
 
14
10
  # Type the name of the directory that shots will be stored in
@@ -0,0 +1,43 @@
1
+ require "_helpers"
2
+
3
+ describe "Wraith config to CLI argument mapping" do
4
+ describe "passing variables to construct_command" do
5
+ # set default variables we can override if necessary
6
+ let(:config_name) { get_path_relative_to __FILE__, "./configs/test_config--phantom.yaml" }
7
+ let(:saving) { Wraith::SaveImages.new(config_name) }
8
+ let(:width) { 320 }
9
+ let(:url) { 'http://example.com/my-page' }
10
+ let(:file_name) { 'wraith/my-page/320_phantomjs_latest.png' }
11
+ let(:selector) { '.my_selector' }
12
+ let(:global_bc) { 'javascript/before_capture.js' }
13
+ let(:path_bc) { false }
14
+
15
+ it "should take a load of variables and construct a command" do
16
+ expected = "phantomjs '#{Dir.pwd}/lib/wraith/javascript/phantom.js' 'http://example.com/my-page' '320' 'wraith/my-page/320_phantomjs_latest.png' '.my_selector' '#{Dir.pwd}/javascript/before_capture.js' 'false'"
17
+ actual = saving.construct_command(width, url, file_name, selector, global_bc, path_bc)
18
+ expect(actual).to eq expected
19
+ end
20
+
21
+ it "should allow hashtags in selectors" do
22
+ selector = '#some-id'
23
+ expected = "phantomjs '#{Dir.pwd}/lib/wraith/javascript/phantom.js' 'http://example.com/my-page' '320' 'wraith/my-page/320_phantomjs_latest.png' '\\#some-id' '#{Dir.pwd}/javascript/before_capture.js' 'false'"
24
+ actual = saving.construct_command(width, url, file_name, selector, global_bc, path_bc)
25
+ expect(actual).to eq expected
26
+ end
27
+
28
+ it "should be able to pass multiple widths at once" do
29
+ width = [320, 624, 976]
30
+ expected = "phantomjs '#{Dir.pwd}/lib/wraith/javascript/phantom.js' 'http://example.com/my-page' '320,624,976' 'wraith/my-page/320_phantomjs_latest.png' '.my_selector' '#{Dir.pwd}/javascript/before_capture.js' 'false'"
31
+ actual = saving.construct_command(width, url, file_name, selector, global_bc, path_bc)
32
+ expect(actual).to eq expected
33
+ end
34
+
35
+ it "should call casperjs when the config says so" do
36
+ config_name = get_path_relative_to(__FILE__, "./configs/test_config--casper.yaml")
37
+ saving = Wraith::SaveImages.new(config_name)
38
+ expected = "casperjs '#{Dir.pwd}/spec/js/custom_snap_file.js' 'http://example.com/my-page' '320' 'wraith/my-page/320_phantomjs_latest.png' '.my_selector' '#{Dir.pwd}/javascript/before_capture.js' 'false'"
39
+ actual = saving.construct_command(width, url, file_name, selector, global_bc, path_bc)
40
+ expect(actual).to eq expected
41
+ end
42
+ end
43
+ end