govuk-diff-pages 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +33 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +75 -0
  8. data/Rakefile +14 -0
  9. data/config/settings.yml +53 -0
  10. data/docs/screenshots/gallery.png +0 -0
  11. data/govuk-diff-pages.gemspec +30 -0
  12. data/lib/govuk/diff/pages.rb +30 -0
  13. data/lib/govuk/diff/pages/app_config.rb +61 -0
  14. data/lib/govuk/diff/pages/format_searcher.rb +37 -0
  15. data/lib/govuk/diff/pages/html_diff.rb +10 -0
  16. data/lib/govuk/diff/pages/html_diff/assets/gallery_template.erb +30 -0
  17. data/lib/govuk/diff/pages/html_diff/assets/html_diff_styles.css +59 -0
  18. data/lib/govuk/diff/pages/html_diff/assets/html_diff_template.erb +31 -0
  19. data/lib/govuk/diff/pages/html_diff/differ.rb +84 -0
  20. data/lib/govuk/diff/pages/html_diff/runner.rb +44 -0
  21. data/lib/govuk/diff/pages/link_checker.rb +47 -0
  22. data/lib/govuk/diff/pages/page_indexer.rb +27 -0
  23. data/lib/govuk/diff/pages/page_searcher.rb +46 -0
  24. data/lib/govuk/diff/pages/tasks/rakefile.rake +93 -0
  25. data/lib/govuk/diff/pages/text_diff.rb +13 -0
  26. data/lib/govuk/diff/pages/text_diff/differ.rb +15 -0
  27. data/lib/govuk/diff/pages/text_diff/formatter.rb +15 -0
  28. data/lib/govuk/diff/pages/text_diff/renderer.rb +23 -0
  29. data/lib/govuk/diff/pages/text_diff/retriever.rb +13 -0
  30. data/lib/govuk/diff/pages/text_diff/runner.rb +41 -0
  31. data/lib/govuk/diff/pages/url_checker.rb +36 -0
  32. data/lib/govuk/diff/pages/version.rb +7 -0
  33. data/lib/govuk/diff/pages/wraith_config_generator.rb +79 -0
  34. metadata +189 -0
@@ -0,0 +1,31 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <title>HTML diff for <%= base_path %></title>
8
+
9
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css">
10
+ <link rel="stylesheet" href="../../lib/html_diff/assets/html_diff_styles.css">
11
+ </head>
12
+ <body>
13
+ <div class="container">
14
+
15
+ <h1>HTML diff for <%= base_path %></h1>
16
+
17
+ <div class="row">
18
+ <div class="col-md-6">
19
+ <h2 class='example left'>HTML on production (<a href='<%= production_url(base_path) %>' target='blank'>visit page</a>)</h2>
20
+ </div>
21
+ <div class="col-md-6">
22
+ <h2 class='example right'>HTML on staging (<a href='<%= staging_url(base_path) %>' target='blank'>visit page</a>)</h2>
23
+ </div>
24
+ </div>
25
+
26
+ <hr/>
27
+
28
+ <%= diff_string %>
29
+ </div>
30
+ </body>
31
+ </html>
@@ -0,0 +1,84 @@
1
+ require 'nokogiri'
2
+ require 'diffy'
3
+
4
+ module Govuk
5
+ module Diff
6
+ module Pages
7
+ module HtmlDiff
8
+ class Differ
9
+ REPLACEMENTS = {
10
+ 'https://www-origin.staging.publishing.service.gov.uk' => 'https://www.gov.uk',
11
+ 'https://www-origin.publishing.service.gov.uk' => 'https://www.gov.uk',
12
+ 'https://assets-origin.staging.publishing.service.gov.uk' => 'https://assets.digital.cabinet-office.gov.uk',
13
+ /https:\/\/assets\.digital\.cabinet-office\.gov\.uk\/specialist-frontend\/application-[0-9a-f]{32}\.js/ => 'https://assets.digital.cabinet-office.gov.uk/specialist-frontend/application-7463fa64f198b6568dc121dae41d44b1.js',
14
+ }
15
+
16
+ attr_reader :differing_pages
17
+
18
+ def initialize(config)
19
+ @config = config
20
+ @template = File.read "#{Govuk::Diff::Pages.root_dir}/diff/pages/html_diff/assets/html_diff_template.erb"
21
+ @diff_dir = "#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}"
22
+ reset_html_diffs_dir
23
+ @differing_pages = {}
24
+ end
25
+
26
+ def diff(base_path)
27
+ staging_html = get_normalized_html(staging_url(base_path))
28
+ production_html = get_normalized_html(production_url(base_path))
29
+ diffy = Diffy::Diff.new(production_html, staging_html, context: 3)
30
+ unless diffy.diff == ""
31
+ write_diff_page(base_path, diffy.to_s(:html))
32
+ @differing_pages[base_path] = html_diff_filename(base_path)
33
+ end
34
+ end
35
+
36
+ private
37
+ def reset_html_diffs_dir
38
+ Dir.mkdir(@diff_dir) unless Dir.exist?(@diff_dir)
39
+ FileUtils.rm Dir.glob("#{@diff_dir}/*")
40
+ end
41
+
42
+ def write_diff_page(base_path, diff_string)
43
+ renderer = ERB.new(@template)
44
+ File.open(html_diff_filename(base_path), "w") do |fp|
45
+ fp.puts renderer.result(binding)
46
+ end
47
+ end
48
+
49
+ def html_diff_filename(base_path)
50
+ "#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}/#{safe_filename(base_path)}.html"
51
+ end
52
+
53
+ def safe_filename(base_path)
54
+ remove_starting_and_trailing_slash(base_path).tr('/', '.')
55
+ end
56
+
57
+ def remove_starting_and_trailing_slash(base_path)
58
+ base_path.sub(/^\//, '').sub(/\/$/, '')
59
+ end
60
+
61
+ def get_normalized_html(url)
62
+ body_html = Nokogiri::HTML(fetch_html(url)).css('body').to_s
63
+ REPLACEMENTS.each do |original, replacement|
64
+ body_html.gsub!(original, replacement)
65
+ end
66
+ body_html
67
+ end
68
+
69
+ def fetch_html(url)
70
+ %x[ curl -s #{url} ]
71
+ end
72
+
73
+ def production_url(base_path)
74
+ "#{@config.domains.production}#{base_path}"
75
+ end
76
+
77
+ def staging_url(base_path)
78
+ "#{@config.domains.staging}#{base_path}"
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,44 @@
1
+ require 'yaml'
2
+
3
+ module Govuk
4
+ module Diff
5
+ module Pages
6
+ module HtmlDiff
7
+ class Runner
8
+ def initialize
9
+ @config = AppConfig.new
10
+ @govuk_pages = YAML.load_file(Govuk::Diff::Pages.govuk_pages_file)
11
+ @gallery_template = File.read(
12
+ "#{Govuk::Diff::Pages.root_dir}/diff/pages/html_diff/assets/gallery_template.erb"
13
+ )
14
+ @differ = Differ.new(@config)
15
+ end
16
+
17
+ def run
18
+ @govuk_pages.each do |page|
19
+ @differ.diff(page)
20
+ end
21
+ create_gallery_page
22
+ end
23
+
24
+ private
25
+ def create_gallery_page
26
+ @result_hash = @differ.differing_pages
27
+ shots_dir = "#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}"
28
+ Dir.mkdir(shots_dir) unless Dir.exist?(shots_dir)
29
+ renderer = ERB.new(@gallery_template)
30
+ File.open("#{shots_dir}/gallery.html", "w") do |fp|
31
+ fp.puts renderer.result(binding)
32
+ end
33
+ display_browser_message(shots_dir)
34
+ end
35
+
36
+ def display_browser_message(shots_dir)
37
+ puts "View the gallery of HTML diffs in your browser:"
38
+ puts " file://#{shots_dir}/gallery.html"
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,47 @@
1
+ require 'yaml'
2
+ require 'rest-client'
3
+
4
+ module Govuk
5
+ module Diff
6
+ module Pages
7
+ class LinkChecker
8
+ def initialize
9
+ @urls = YAML.load_file(Govuk::Diff::Pages.govuk_pages_file)
10
+ @config = AppConfig.new
11
+ @results = Hash.new { |hash, key| hash[key] = Array.new }
12
+ @num_links = 0
13
+ @url_checker = UrlChecker.new(@config)
14
+ end
15
+
16
+ def run
17
+ @urls.each { |u| make_request(u) }
18
+ print_results
19
+ end
20
+
21
+ private
22
+ def print_results
23
+ puts "Number of pages checked: #{@num_links}"
24
+ puts " of which:"
25
+ @results.each do |code, links|
26
+ puts " #{links.size} responded with #{code}"
27
+ end
28
+ end
29
+
30
+ def make_request(url)
31
+ @num_links += 1
32
+ normalized_url = @url_checker.production_url(url)
33
+ begin
34
+ print "GET #{normalized_url} "
35
+ response = RestClient.get "#{normalized_url}"
36
+ @results[response.code] << url
37
+ puts "Response #{response.code}"
38
+ rescue => e
39
+ puts "\nERROR GETTING #{normalized_url}"
40
+ puts "#{e.class} ::: #{e.message}"
41
+ @results['EX'] << url
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,27 @@
1
+ require 'yaml'
2
+
3
+ module Govuk
4
+ module Diff
5
+ module Pages
6
+ class PageIndexer
7
+ def initialize
8
+ @pages = []
9
+ @config = AppConfig.new("#{Govuk::Diff::Pages.root_dir}/config/settings.yml")
10
+ end
11
+
12
+ def run
13
+ formats = FormatSearcher.new(@config).run
14
+ @pages = PageSearcher.new(@config, formats).run
15
+ File.open(Govuk::Diff::Pages.govuk_pages_file, 'w') do |fp|
16
+ fp.puts @pages.sort.to_yaml
17
+ end
18
+ end
19
+
20
+ private
21
+ def get_formats
22
+ @formats = FormatSearcher.new(@config).run
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,46 @@
1
+ require 'rest-client'
2
+ require 'json'
3
+
4
+ module Govuk
5
+ module Diff
6
+ module Pages
7
+ class PageSearcher
8
+ def initialize(config, formats)
9
+ @config = config
10
+ @formats = formats
11
+ @pages = []
12
+ @url_checker = UrlChecker.new(@config)
13
+ end
14
+
15
+ def run
16
+ @formats.each do |format|
17
+ puts "Getting top #{@config.page_indexer.max_pages_per_format} for format #{format}" if verbose?
18
+ @pages << top_pages_for_format(format)
19
+ end
20
+ @pages.flatten!
21
+ end
22
+
23
+ def verbose?
24
+ @config.verbose
25
+ end
26
+
27
+ private
28
+ def top_pages_for_format(format)
29
+ result_set = JSON.parse(result_set_for_format(format))
30
+ extract_top_govuk_pages(result_set)
31
+ end
32
+
33
+ def extract_top_govuk_pages(result_set)
34
+ links = result_set.fetch('results').collect { |result| result['link'] }
35
+ valid_links = links.select { |link| @url_checker.valid?(link) }
36
+ valid_links.slice(0, @config.page_indexer.max_pages_per_format)
37
+ end
38
+
39
+ def result_set_for_format(format)
40
+ url = "#{@config.domains.production}/api/search.json?filter_format=#{format}"
41
+ RestClient.get(url)
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,93 @@
1
+ require 'govuk/diff/pages'
2
+
3
+ namespace :diff do
4
+ desc 'produce visual diffs'
5
+ task visual: ['config:pre_flight_check'] do
6
+ puts "---> Creating Visual Diffs"
7
+ cmd = "wraith capture #{Govuk::Diff::Pages::WRAITH_CONFIG_FILE}"
8
+ puts cmd
9
+ system cmd
10
+ end
11
+
12
+ desc 'produce html diffs'
13
+ task :html do
14
+ Govuk::Diff::Pages::HtmlDiff::Runner.new.run
15
+ end
16
+
17
+ desc 'produce text diffs'
18
+ task :text do
19
+ if ARGV.tap(&:shift).empty?
20
+ abort "You must provide one or more YAML files containing the pages to diff"
21
+ end
22
+
23
+ left = ENV.fetch("LEFT", "www-origin.staging.publishing.service.gov.uk")
24
+ right = ENV.fetch("RIGHT", "www-origin.publishing.service.gov.uk")
25
+
26
+ require 'yaml'
27
+
28
+ ARGV.each do |file|
29
+ Govuk::Diff::Pages::TextDiff::Runner.new(
30
+ pages: YAML.load_file(file),
31
+ left_domain: left,
32
+ right_domain: right
33
+ ).run
34
+ end
35
+ end
36
+ end
37
+
38
+ namespace :config do
39
+ desc "Checks that dependencies are in place"
40
+ task :pre_flight_check do
41
+ puts "Checking required packages installed."
42
+ dependencies_present = true
43
+ {imagemagick: 'convert', phantomjs: 'phantomjs'}.each do |package, binary|
44
+ print "#{package}..... "
45
+ result = %x[ which #{binary} ]
46
+ if result.empty?
47
+ puts "Not found"
48
+ dependencies_present = false
49
+ else
50
+ puts "OK"
51
+ end
52
+ end
53
+ unless dependencies_present
54
+ puts "ERROR: A required dependency is not installed"
55
+ exit 1
56
+ end
57
+ end
58
+
59
+ desc 'merges settings.yml with govuk_pages.yml to produce merged config file for wraith'
60
+ task :wraith do
61
+ puts "---> Generating Wraith config"
62
+ generator = Govuk::Diff::Pages::WraithConfigGenerator.new
63
+ generator.run
64
+ generator.save
65
+ end
66
+
67
+ desc 'update config files with list of pages to diff'
68
+ task :update_page_list do
69
+ puts "---> Updating page list"
70
+ Govuk::Diff::Pages::PageIndexer.new.run
71
+ end
72
+ end
73
+
74
+ namespace :shots do
75
+ desc "clears the screen shots directory"
76
+ task :clear do
77
+ puts "---> Clearing shots directory"
78
+ require 'fileutils'
79
+ config = Govuk::Diff::Pages::AppConfig.new
80
+ [config.wraith.directory, config.html_diff.directory].each do |directory|
81
+ shots_dir = "#{Govuk::Diff::Pages.root_dir}/#{directory}"
82
+ FileUtils.remove_dir shots_dir
83
+ end
84
+ end
85
+ end
86
+
87
+ desc 'Generate config files and run diffs'
88
+ task diff: ['config:update_page_list', 'config:wraith', 'diff:visual', 'diff:html']
89
+
90
+ desc 'checks all URLs are accessible'
91
+ task :check_urls do
92
+ Govuk::Diff::Pages::LinkChecker.new.run
93
+ end
@@ -0,0 +1,13 @@
1
+ module Govuk
2
+ module Diff
3
+ module Pages
4
+ module TextDiff
5
+ autoload :Differ, 'govuk/diff/pages/text_diff/differ'
6
+ autoload :Formatter, 'govuk/diff/pages/text_diff/formatter'
7
+ autoload :Renderer, 'govuk/diff/pages/text_diff/renderer'
8
+ autoload :Retriever, 'govuk/diff/pages/text_diff/retriever'
9
+ autoload :Runner, 'govuk/diff/pages/text_diff/runner'
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ require 'diffy'
2
+
3
+ module Govuk
4
+ module Diff
5
+ module Pages
6
+ module TextDiff
7
+ class Differ
8
+ def diff(left, right)
9
+ Diffy::Diff.new(left, right).to_s
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require 'nokogiri'
2
+
3
+ module Govuk
4
+ module Diff
5
+ module Pages
6
+ module TextDiff
7
+ class Formatter
8
+ def call(html)
9
+ Nokogiri::HTML(html).xpath("//text()").text
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,23 @@
1
+ module Govuk
2
+ module Diff
3
+ module Pages
4
+ module TextDiff
5
+ class Renderer
6
+ SEPARATOR = "\n\n".freeze
7
+
8
+ def initialize(kernel = Kernel)
9
+ @kernel = kernel
10
+ end
11
+
12
+ def call(responses)
13
+ if responses.all?(&:empty?)
14
+ puts 'OK!'
15
+ else
16
+ @kernel.abort responses.join(SEPARATOR)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end