wraith 3.0.4 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -0
  3. data/Gemfile +1 -1
  4. data/lib/wraith/cli.rb +108 -73
  5. data/lib/wraith/compare_images.rb +4 -2
  6. data/lib/wraith/crop.rb +3 -1
  7. data/lib/wraith/folder.rb +6 -4
  8. data/lib/wraith/gallery.rb +21 -22
  9. data/lib/wraith/helpers/capture_options.rb +52 -0
  10. data/lib/wraith/helpers/custom_exceptions.rb +8 -0
  11. data/lib/wraith/helpers/logger.rb +20 -0
  12. data/lib/wraith/helpers/save_metadata.rb +31 -0
  13. data/lib/wraith/{utilities.rb → helpers/utilities.rb} +5 -4
  14. data/lib/wraith/javascript/_helper.js +0 -2
  15. data/lib/wraith/javascript/casper.js +26 -16
  16. data/lib/wraith/javascript/phantom.js +148 -5
  17. data/lib/wraith/save_images.rb +28 -104
  18. data/lib/wraith/spider.rb +12 -5
  19. data/lib/wraith/thumbnails.rb +0 -2
  20. data/lib/wraith/validate.rb +98 -0
  21. data/lib/wraith/version.rb +1 -1
  22. data/lib/wraith/wraith.rb +19 -12
  23. data/spec/_helpers.rb +4 -10
  24. data/spec/before_capture_spec.rb +19 -10
  25. data/spec/config_spec.rb +2 -9
  26. data/spec/configs/test_config--casper.yaml +1 -5
  27. data/spec/construct_command_spec.rb +43 -0
  28. data/spec/gallery_spec.rb +1 -2
  29. data/spec/js/custom_snap_file.js +26 -16
  30. data/spec/js/global.js +2 -1
  31. data/spec/js/path.js +2 -1
  32. data/spec/resize_reload_spec.rb +4 -8
  33. data/spec/save_images_spec.rb +3 -2
  34. data/spec/validate_spec.rb +76 -0
  35. data/templates/configs/capture.yaml +61 -0
  36. data/templates/configs/history.yaml +81 -0
  37. data/templates/configs/spider.yaml +13 -2
  38. data/templates/javascript/cookies_and_headers--casper.js +16 -0
  39. data/templates/javascript/cookies_and_headers--phantom.js +26 -0
  40. data/templates/javascript/disable_javascript--casper.js +11 -0
  41. data/templates/javascript/disable_javascript--phantom.js +13 -0
  42. data/templates/javascript/interact--casper.js +11 -0
  43. data/templates/javascript/interact--phantom.js +17 -0
  44. data/templates/javascript/wait--casper.js +8 -0
  45. data/templates/javascript/wait--phantom.js +8 -0
  46. data/wraith.gemspec +1 -1
  47. metadata +27 -14
  48. data/lib/wraith/javascript/_phantom__common.js +0 -120
  49. data/lib/wraith/javascript/phantom--nojs.js +0 -6
  50. data/templates/configs/component.yaml +0 -60
  51. data/templates/configs/multiple_domains.yaml +0 -53
  52. data/templates/javascript/beforeCapture--casper_example.js +0 -12
  53. data/templates/javascript/beforeCapture--phantom_example.js +0 -36
data/lib/wraith/spider.rb CHANGED
@@ -1,9 +1,12 @@
1
1
  require "wraith"
2
+ require "wraith/helpers/logger"
2
3
  require "anemone"
3
4
  require "nokogiri"
4
5
  require "uri"
5
6
 
6
7
  class Wraith::Spidering
8
+ include Logging
9
+
7
10
  def initialize(config)
8
11
  @wraith = Wraith::Wraith.new(config)
9
12
  end
@@ -11,10 +14,10 @@ class Wraith::Spidering
11
14
  def check_for_paths
12
15
  if @wraith.paths.nil?
13
16
  unless @wraith.sitemap.nil?
14
- puts "no paths defined in config, loading paths from sitemap"
17
+ logger.info "no paths defined in config, loading paths from sitemap"
15
18
  spider = Wraith::Sitemap.new(@wraith)
16
19
  else
17
- puts "no paths defined in config, crawling from site root"
20
+ logger.info "no paths defined in config, crawling from site root"
18
21
  spider = Wraith::Crawler.new(@wraith)
19
22
  end
20
23
  spider.determine_paths
@@ -48,6 +51,8 @@ class Wraith::Spider
48
51
  end
49
52
 
50
53
  class Wraith::Crawler < Wraith::Spider
54
+ include Logging
55
+
51
56
  EXT = %w(flv swf png jpg gif asx zip rar tar 7z \
52
57
  gz jar js css dtd xsd ico raw mp3 mp4 \
53
58
  wav wmv ape aac ac3 wma aiff mpg mpeg \
@@ -56,10 +61,10 @@ class Wraith::Crawler < Wraith::Spider
56
61
 
57
62
  def spider
58
63
  if File.exist?(@wraith.spider_file) && modified_since(@wraith.spider_file, @wraith.spider_days[0])
59
- puts "using existing spider file"
64
+ logger.info "using existing spider file"
60
65
  @paths = eval(File.read(@wraith.spider_file))
61
66
  else
62
- puts "creating new spider file"
67
+ logger.info "creating new spider file"
63
68
  spider_list = []
64
69
  Anemone.crawl(@wraith.base_domain) do |anemone|
65
70
  anemone.skip_links_like(/\.(#{EXT.join('|')})$/)
@@ -76,9 +81,11 @@ class Wraith::Crawler < Wraith::Spider
76
81
  end
77
82
 
78
83
  class Wraith::Sitemap < Wraith::Spider
84
+ include Logging
85
+
79
86
  def spider
80
87
  unless @wraith.sitemap.nil?
81
- puts "reading sitemap.xml from #{@wraith.sitemap}"
88
+ logger.info "reading sitemap.xml from #{@wraith.sitemap}"
82
89
  if @wraith.sitemap =~ URI.regexp
83
90
  sitemap = Nokogiri::XML(open(@wraith.sitemap))
84
91
  else
@@ -11,8 +11,6 @@ class Wraith::Thumbnails
11
11
  end
12
12
 
13
13
  def generate_thumbnails
14
- puts "Generating thumbnails"
15
-
16
14
  files = Dir.glob("#{wraith.directory}/*/*.png")
17
15
 
18
16
  Parallel.each(files, :in_processes => Parallel.processor_count) do |filename|
@@ -0,0 +1,98 @@
1
+ require "wraith/wraith"
2
+ require "wraith/helpers/logger"
3
+ require "wraith/helpers/utilities"
4
+
5
+ class Wraith::Validate
6
+ include Logging
7
+
8
+ def initialize(config, yaml_passed = false)
9
+ @wraith = Wraith::Wraith.new(config, yaml_passed)
10
+ end
11
+
12
+ def validate(mode = false)
13
+ list_debug_information if @wraith.verbose
14
+ validate_basic_properties
15
+ validate_mode_properties(mode) if mode
16
+ # if we get this far, we've only had warnings at worst, not errors.
17
+ "Config validated. No serious issues found."
18
+ end
19
+
20
+ def validate_basic_properties
21
+ if @wraith.engine.nil?
22
+ raise MissingRequiredPropertyError, "You must specify a browser engine! #{docs_prompt}"
23
+ end
24
+ unless @wraith.domains
25
+ raise MissingRequiredPropertyError, "You must specify at least one domain for Wraith to do anything! #{docs_prompt}"
26
+ end
27
+ #@TODO validate fuzz is not nil, etc
28
+ end
29
+
30
+ def validate_mode_properties(mode)
31
+ case mode
32
+ when "capture"
33
+ validate_capture_mode
34
+ when "history"
35
+ validate_history_mode
36
+ when "latest"
37
+ validate_history_mode
38
+ validate_base_shots_exist
39
+ else
40
+ logger.warn "Wraith doesn't know how to validate mode '#{mode}'. Continuing..."
41
+ end
42
+ end
43
+
44
+ def validate_capture_mode
45
+ if @wraith.domains.length != 2
46
+ raise InvalidDomainsError, "`wraith capture` requires exactly two domains. #{docs_prompt}"
47
+ end
48
+ if @wraith.history_dir
49
+ logger.warn "You have specified a `history_dir` in your config, but this is used in `history` mode, NOT `capture` mode. #{docs_prompt}"
50
+ end
51
+ end
52
+
53
+ def validate_history_mode
54
+ unless @wraith.history_dir
55
+ raise MissingRequiredPropertyError, "You must specify a `history_dir` to run Wraith in history mode. #{docs_prompt}"
56
+ end
57
+ if @wraith.domains.length != 1
58
+ raise InvalidDomainsError, "History mode requires exactly one domain. #{docs_prompt}"
59
+ end
60
+ end
61
+
62
+ def validate_base_shots_exist
63
+ unless File.directory?(@wraith.history_dir)
64
+ logger.error "You need to run `wraith history` at least once before you can run `wraith latest`!"
65
+ end
66
+ end
67
+
68
+ def docs_prompt
69
+ "See the docs at http://bbc-news.github.io/wraith/"
70
+ end
71
+
72
+ def list_debug_information
73
+ wraith_version = Wraith::VERSION
74
+ ruby_version = run_command_safely('ruby -v') || 'Ruby not installed'
75
+ phantomjs_version = run_command_safely('phantomjs --version') || 'PhantomJS not installed'
76
+ casperjs_version = run_command_safely('casperjs --version') || 'CasperJS not installed'
77
+ imagemagick_version = run_command_safely('convert -version') || 'ImageMagick not installed'
78
+
79
+ logger.debug "#################################################"
80
+ logger.debug " Wraith version: #{wraith_version}"
81
+ logger.debug " Ruby version: #{ruby_version}"
82
+ logger.debug " ImageMagick: #{imagemagick_version}"
83
+ logger.debug " PhantomJS version: #{phantomjs_version}"
84
+ logger.debug " CasperJS version: #{casperjs_version}"
85
+ # @TODO - add a SlimerJS equivalent
86
+ logger.debug "#################################################"
87
+ logger.debug ""
88
+ end
89
+
90
+ def run_command_safely(command)
91
+ begin
92
+ output = `#{command}`
93
+ rescue Exception => e
94
+ return false
95
+ end
96
+ output.lines.first
97
+ end
98
+ end
@@ -1,3 +1,3 @@
1
1
  module Wraith
2
- VERSION = "3.0.4"
2
+ VERSION = "3.1.0"
3
3
  end
data/lib/wraith/wraith.rb CHANGED
@@ -1,14 +1,18 @@
1
1
  require "yaml"
2
- require "wraith/utilities"
2
+ require "wraith/helpers/logger"
3
+ require "wraith/helpers/utilities"
3
4
 
4
5
  class Wraith::Wraith
6
+ include Logging
5
7
  attr_accessor :config
6
8
 
7
9
  def initialize(config, yaml_passed = false)
8
- @config = yaml_passed ? config : open_config_file(config)
9
- rescue
10
- puts "unable to find config at #{config}"
11
- exit 1
10
+ begin
11
+ @config = yaml_passed ? config : open_config_file(config)
12
+ logger.level = verbose ? Logger::DEBUG : Logger::INFO
13
+ rescue
14
+ logger.error "unable to find config at #{config}"
15
+ end
12
16
  end
13
17
 
14
18
  def open_config_file(config_name)
@@ -26,15 +30,13 @@ class Wraith::Wraith
26
30
  end
27
31
 
28
32
  def history_dir
29
- @config["history_dir"]
33
+ @config["history_dir"] || false
30
34
  end
31
35
 
32
36
  def engine
33
37
  engine = @config["browser"]
34
38
  # Legacy support for those using the old style "browser: \n phantomjs: 'casperjs'" configs
35
- if engine.is_a? Hash
36
- engine = engine.values.first
37
- end
39
+ engine = engine.values.first if engine.is_a? Hash
38
40
  engine
39
41
  end
40
42
 
@@ -51,12 +53,12 @@ class Wraith::Wraith
51
53
  path_to_js_templates + "/casper.js"
52
54
  # @TODO - add a SlimerJS option
53
55
  else
54
- abort "Wraith does not recognise the browser engine '#{engine}'"
56
+ logger.error "Wraith does not recognise the browser engine '#{engine}'"
55
57
  end
56
58
  end
57
59
 
58
60
  def before_capture
59
- @config["before_capture"] ? convert_to_absolute(@config["before_capture"]) : "false"
61
+ @config["before_capture"] ? convert_to_absolute(@config["before_capture"]) : false
60
62
  end
61
63
 
62
64
  def widths
@@ -158,4 +160,9 @@ class Wraith::Wraith
158
160
  def phantomjs_options
159
161
  @config["phantomjs_options"]
160
162
  end
161
- end
163
+
164
+ def verbose
165
+ # @TODO - also add a `--verbose` CLI flag which overrides whatever you have set in the config
166
+ @config['verbose'] || false
167
+ end
168
+ end
data/spec/_helpers.rb CHANGED
@@ -2,8 +2,10 @@ require "rspec"
2
2
  require "./lib/wraith/cli"
3
3
 
4
4
  def create_diff_image
5
- saving.capture_page_image(wraith.engine, test_url1, 320, test_image1, selector, 'false', 'false')
6
- saving.capture_page_image(wraith.engine, test_url2, 320, test_image2, selector, 'false', 'false')
5
+ capture_image = saving.construct_command(320, test_url1, test_image1, selector, false, false)
6
+ `#{capture_image}`
7
+ capture_image = saving.construct_command(320, test_url2, test_image2, selector, false, false)
8
+ `#{capture_image}`
7
9
  end
8
10
 
9
11
  def crop_images
@@ -14,14 +16,6 @@ def compare_images
14
16
  Wraith::CompareImages.new(config_name).compare_task(test_image1, test_image2, diff_image, data_txt)
15
17
  end
16
18
 
17
- def run_js_then_capture(config)
18
- generated_image = 'shots/test/temporary_jsified_image.png'
19
- saving.capture_page_image(config[:engine], test_url1, 320, generated_image, selector, config[:global_js], config[:path_js])
20
- Wraith::CompareImages.new(config_name).compare_task(generated_image, config[:output_should_look_like], diff_image, data_txt)
21
- diff = File.open('shots/test/test.txt', "rb").read
22
- expect(diff).to eq '0.0'
23
- end
24
-
25
19
  def get_path_relative_to(current_file, file_to_find)
26
20
  File.expand_path(File.join(File.dirname(current_file), file_to_find))
27
21
  end
@@ -1,16 +1,27 @@
1
1
  require "_helpers"
2
2
 
3
+ def run_js_then_capture(config)
4
+ saving = Wraith::SaveImages.new(config_name)
5
+ generated_image = 'shots/test/temporary_jsified_image.png'
6
+ capture_image = saving.construct_command(320, "http://www.bbc.com/afrique", generated_image, selector, config[:global_js], config[:path_js])
7
+ `#{capture_image}`
8
+ Wraith::CompareImages.new(config_name).compare_task(generated_image, config[:output_should_look_like], "shots/test/test_diff.png", "shots/test/test.txt")
9
+ diff = File.open('shots/test/test.txt', "rb").read
10
+ expect(diff).to eq '0.0'
11
+ end
12
+
3
13
  describe Wraith do
4
14
  let(:config_name) { get_path_relative_to __FILE__, "./configs/test_config--casper.yaml" }
5
- let(:test_url1) { "http://www.bbc.com/afrique" }
6
- let(:diff_image) { "shots/test/test_diff.png" }
7
- let(:data_txt) { "shots/test/test.txt" }
8
- let(:saving) { Wraith::SaveImages.new(config_name) }
9
15
  let(:wraith) { Wraith::Wraith.new(config_name) }
10
16
  let(:selector) { "body" }
11
17
  let(:before_suite_js) { "spec/js/global.js" }
12
18
  let(:before_capture_js) { "spec/js/path.js" }
13
19
 
20
+ before(:each) do
21
+ Wraith::FolderManager.new(config_name).clear_shots_folder
22
+ Dir.mkdir("shots/test")
23
+ end
24
+
14
25
  describe "different ways of determining the before_capture file" do
15
26
  it "should allow users to specify the relative path to the before_capture file" do
16
27
  config = YAML.load '
@@ -19,7 +30,7 @@ describe Wraith do
19
30
  '
20
31
  wraith = Wraith::Wraith.new(config, true)
21
32
  # not sure about having code IN the test, but we want to get this right.
22
- expect(wraith.before_capture).to eq (`pwd`.chomp! + '/javascript/do_something.js')
33
+ expect(wraith.before_capture).to eq (Dir.pwd + '/javascript/do_something.js')
23
34
  end
24
35
 
25
36
  it "should allow users to specify the absolute path to the before_capture file" do
@@ -35,11 +46,10 @@ describe Wraith do
35
46
  # @TODO - we need tests determining the path to "path-level before_capture hooks"
36
47
 
37
48
  describe "When hooking into beforeCapture (CasperJS)" do
38
-
39
49
  it "Executes the global JS before capturing" do
40
50
  run_js_then_capture(
41
51
  global_js: before_suite_js,
42
- path_js: 'false',
52
+ path_js: false,
43
53
  output_should_look_like: 'spec/base/global.png',
44
54
  engine: 'casperjs'
45
55
  )
@@ -47,7 +57,7 @@ describe Wraith do
47
57
 
48
58
  it "Executes the path-level JS before capturing" do
49
59
  run_js_then_capture(
50
- global_js: 'false',
60
+ global_js: false,
51
61
  path_js: before_capture_js,
52
62
  output_should_look_like: 'spec/base/path.png',
53
63
  engine: 'casperjs'
@@ -100,5 +110,4 @@ describe Wraith do
100
110
  # )
101
111
  # end
102
112
  # end
103
-
104
- end
113
+ end
data/spec/config_spec.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require "_helpers"
2
2
 
3
3
  describe "wraith config" do
4
-
5
4
  let(:config_name) { get_path_relative_to __FILE__, "./configs/test_config--phantom.yaml" }
6
5
  let(:wraith) { Wraith::Wraith.new(config_name) }
7
6
 
@@ -20,7 +19,6 @@ describe "wraith config" do
20
19
  end
21
20
 
22
21
  describe "When creating a wraith worker" do
23
-
24
22
  it "should have a browser engine defined" do
25
23
  expect(wraith.engine).to be_a String
26
24
  end
@@ -71,7 +69,6 @@ describe "wraith config" do
71
69
  end
72
70
 
73
71
  describe "different ways of initialising browser engine" do
74
-
75
72
  it "should let us directly specify the engine" do
76
73
  config = YAML.load 'browser: phantomjs'
77
74
  wraith = Wraith::Wraith.new(config, true)
@@ -90,7 +87,6 @@ describe "wraith config" do
90
87
  end
91
88
 
92
89
  describe "different ways of determining the snap file" do
93
-
94
90
  it "should calculate the snap file from the engine" do
95
91
  config = YAML.load 'browser: phantomjs'
96
92
  wraith = Wraith::Wraith.new(config, true)
@@ -117,7 +113,7 @@ describe "wraith config" do
117
113
  '
118
114
  wraith = Wraith::Wraith.new(config, true)
119
115
  # not sure about having code IN the test, but we want to get this right.
120
- expect(wraith.snap_file).to eq (`pwd`.chomp! + '/path/to/snap.js')
116
+ expect(wraith.snap_file).to eq (Dir.pwd + '/path/to/snap.js')
121
117
  end
122
118
 
123
119
  it "should allow users to specify the absolute path to their own snap file" do
@@ -131,7 +127,6 @@ describe "wraith config" do
131
127
  end
132
128
 
133
129
  describe "different modes of efficiency (resize or reload)" do
134
-
135
130
  it "should trigger efficient mode if resize was specified" do
136
131
  config = YAML.load 'resize_or_reload: "resize"'
137
132
  wraith = Wraith::Wraith.new(config, true)
@@ -143,7 +138,5 @@ describe "wraith config" do
143
138
  wraith = Wraith::Wraith.new(config, true)
144
139
  expect(wraith.resize).to eq false
145
140
  end
146
-
147
141
  end
148
-
149
- end
142
+ end
@@ -4,11 +4,7 @@
4
4
  ##########
5
5
 
6
6
  #Headless browser option
7
- browser:
8
- phantomjs: "casperjs"
9
- # slimerjs: "slimerjs"
10
-
11
- #overriding the snap file
7
+ browser: "casperjs"
12
8
  snap_file: "spec/js/custom_snap_file.js"
13
9
 
14
10
  # Type the name of the directory that shots will be stored in
@@ -0,0 +1,43 @@
1
+ require "_helpers"
2
+
3
+ describe "Wraith config to CLI argument mapping" do
4
+ describe "passing variables to construct_command" do
5
+ # set default variables we can override if necessary
6
+ let(:config_name) { get_path_relative_to __FILE__, "./configs/test_config--phantom.yaml" }
7
+ let(:saving) { Wraith::SaveImages.new(config_name) }
8
+ let(:width) { 320 }
9
+ let(:url) { 'http://example.com/my-page' }
10
+ let(:file_name) { 'wraith/my-page/320_phantomjs_latest.png' }
11
+ let(:selector) { '.my_selector' }
12
+ let(:global_bc) { 'javascript/before_capture.js' }
13
+ let(:path_bc) { false }
14
+
15
+ it "should take a load of variables and construct a command" do
16
+ expected = "phantomjs '#{Dir.pwd}/lib/wraith/javascript/phantom.js' 'http://example.com/my-page' '320' 'wraith/my-page/320_phantomjs_latest.png' '.my_selector' '#{Dir.pwd}/javascript/before_capture.js' 'false'"
17
+ actual = saving.construct_command(width, url, file_name, selector, global_bc, path_bc)
18
+ expect(actual).to eq expected
19
+ end
20
+
21
+ it "should allow hashtags in selectors" do
22
+ selector = '#some-id'
23
+ expected = "phantomjs '#{Dir.pwd}/lib/wraith/javascript/phantom.js' 'http://example.com/my-page' '320' 'wraith/my-page/320_phantomjs_latest.png' '\\#some-id' '#{Dir.pwd}/javascript/before_capture.js' 'false'"
24
+ actual = saving.construct_command(width, url, file_name, selector, global_bc, path_bc)
25
+ expect(actual).to eq expected
26
+ end
27
+
28
+ it "should be able to pass multiple widths at once" do
29
+ width = [320, 624, 976]
30
+ expected = "phantomjs '#{Dir.pwd}/lib/wraith/javascript/phantom.js' 'http://example.com/my-page' '320,624,976' 'wraith/my-page/320_phantomjs_latest.png' '.my_selector' '#{Dir.pwd}/javascript/before_capture.js' 'false'"
31
+ actual = saving.construct_command(width, url, file_name, selector, global_bc, path_bc)
32
+ expect(actual).to eq expected
33
+ end
34
+
35
+ it "should call casperjs when the config says so" do
36
+ config_name = get_path_relative_to(__FILE__, "./configs/test_config--casper.yaml")
37
+ saving = Wraith::SaveImages.new(config_name)
38
+ expected = "casperjs '#{Dir.pwd}/spec/js/custom_snap_file.js' 'http://example.com/my-page' '320' 'wraith/my-page/320_phantomjs_latest.png' '.my_selector' '#{Dir.pwd}/javascript/before_capture.js' 'false'"
39
+ actual = saving.construct_command(width, url, file_name, selector, global_bc, path_bc)
40
+ expect(actual).to eq expected
41
+ end
42
+ end
43
+ end