govuk-diff-pages 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +33 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +75 -0
  8. data/Rakefile +14 -0
  9. data/config/settings.yml +53 -0
  10. data/docs/screenshots/gallery.png +0 -0
  11. data/govuk-diff-pages.gemspec +30 -0
  12. data/lib/govuk/diff/pages.rb +30 -0
  13. data/lib/govuk/diff/pages/app_config.rb +61 -0
  14. data/lib/govuk/diff/pages/format_searcher.rb +37 -0
  15. data/lib/govuk/diff/pages/html_diff.rb +10 -0
  16. data/lib/govuk/diff/pages/html_diff/assets/gallery_template.erb +30 -0
  17. data/lib/govuk/diff/pages/html_diff/assets/html_diff_styles.css +59 -0
  18. data/lib/govuk/diff/pages/html_diff/assets/html_diff_template.erb +31 -0
  19. data/lib/govuk/diff/pages/html_diff/differ.rb +84 -0
  20. data/lib/govuk/diff/pages/html_diff/runner.rb +44 -0
  21. data/lib/govuk/diff/pages/link_checker.rb +47 -0
  22. data/lib/govuk/diff/pages/page_indexer.rb +27 -0
  23. data/lib/govuk/diff/pages/page_searcher.rb +46 -0
  24. data/lib/govuk/diff/pages/tasks/rakefile.rake +93 -0
  25. data/lib/govuk/diff/pages/text_diff.rb +13 -0
  26. data/lib/govuk/diff/pages/text_diff/differ.rb +15 -0
  27. data/lib/govuk/diff/pages/text_diff/formatter.rb +15 -0
  28. data/lib/govuk/diff/pages/text_diff/renderer.rb +23 -0
  29. data/lib/govuk/diff/pages/text_diff/retriever.rb +13 -0
  30. data/lib/govuk/diff/pages/text_diff/runner.rb +41 -0
  31. data/lib/govuk/diff/pages/url_checker.rb +36 -0
  32. data/lib/govuk/diff/pages/version.rb +7 -0
  33. data/lib/govuk/diff/pages/wraith_config_generator.rb +79 -0
  34. metadata +189 -0
@@ -0,0 +1,31 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <title>HTML diff for <%= base_path %></title>
8
+
9
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css">
10
+ <link rel="stylesheet" href="../../lib/html_diff/assets/html_diff_styles.css">
11
+ </head>
12
+ <body>
13
+ <div class="container">
14
+
15
+ <h1>HTML diff for <%= base_path %></h1>
16
+
17
+ <div class="row">
18
+ <div class="col-md-6">
19
+ <h2 class='example left'>HTML on production (<a href='<%= production_url(base_path) %>' target='blank'>visit page</a>)</h2>
20
+ </div>
21
+ <div class="col-md-6">
22
+ <h2 class='example right'>HTML on staging (<a href='<%= staging_url(base_path) %>' target='blank'>visit page</a>)</h2>
23
+ </div>
24
+ </div>
25
+
26
+ <hr/>
27
+
28
+ <%= diff_string %>
29
+ </div>
30
+ </body>
31
+ </html>
@@ -0,0 +1,84 @@
1
+ require 'nokogiri'
2
+ require 'diffy'
3
+
4
+ module Govuk
5
+ module Diff
6
+ module Pages
7
+ module HtmlDiff
8
+ class Differ
9
+ REPLACEMENTS = {
10
+ 'https://www-origin.staging.publishing.service.gov.uk' => 'https://www.gov.uk',
11
+ 'https://www-origin.publishing.service.gov.uk' => 'https://www.gov.uk',
12
+ 'https://assets-origin.staging.publishing.service.gov.uk' => 'https://assets.digital.cabinet-office.gov.uk',
13
+ /https:\/\/assets\.digital\.cabinet-office\.gov\.uk\/specialist-frontend\/application-[0-9a-f]{32}\.js/ => 'https://assets.digital.cabinet-office.gov.uk/specialist-frontend/application-7463fa64f198b6568dc121dae41d44b1.js',
14
+ }
15
+
16
+ attr_reader :differing_pages
17
+
18
+ def initialize(config)
19
+ @config = config
20
+ @template = File.read "#{Govuk::Diff::Pages.root_dir}/diff/pages/html_diff/assets/html_diff_template.erb"
21
+ @diff_dir = "#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}"
22
+ reset_html_diffs_dir
23
+ @differing_pages = {}
24
+ end
25
+
26
+ def diff(base_path)
27
+ staging_html = get_normalized_html(staging_url(base_path))
28
+ production_html = get_normalized_html(production_url(base_path))
29
+ diffy = Diffy::Diff.new(production_html, staging_html, context: 3)
30
+ unless diffy.diff == ""
31
+ write_diff_page(base_path, diffy.to_s(:html))
32
+ @differing_pages[base_path] = html_diff_filename(base_path)
33
+ end
34
+ end
35
+
36
+ private
37
+ def reset_html_diffs_dir
38
+ Dir.mkdir(@diff_dir) unless Dir.exist?(@diff_dir)
39
+ FileUtils.rm Dir.glob("#{@diff_dir}/*")
40
+ end
41
+
42
+ def write_diff_page(base_path, diff_string)
43
+ renderer = ERB.new(@template)
44
+ File.open(html_diff_filename(base_path), "w") do |fp|
45
+ fp.puts renderer.result(binding)
46
+ end
47
+ end
48
+
49
+ def html_diff_filename(base_path)
50
+ "#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}/#{safe_filename(base_path)}.html"
51
+ end
52
+
53
+ def safe_filename(base_path)
54
+ remove_starting_and_trailing_slash(base_path).tr('/', '.')
55
+ end
56
+
57
+ def remove_starting_and_trailing_slash(base_path)
58
+ base_path.sub(/^\//, '').sub(/\/$/, '')
59
+ end
60
+
61
+ def get_normalized_html(url)
62
+ body_html = Nokogiri::HTML(fetch_html(url)).css('body').to_s
63
+ REPLACEMENTS.each do |original, replacement|
64
+ body_html.gsub!(original, replacement)
65
+ end
66
+ body_html
67
+ end
68
+
69
+ def fetch_html(url)
70
+ %x[ curl -s #{url} ]
71
+ end
72
+
73
+ def production_url(base_path)
74
+ "#{@config.domains.production}#{base_path}"
75
+ end
76
+
77
+ def staging_url(base_path)
78
+ "#{@config.domains.staging}#{base_path}"
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,44 @@
1
+ require 'yaml'
2
+
3
+ module Govuk
4
+ module Diff
5
+ module Pages
6
+ module HtmlDiff
7
+ class Runner
8
+ def initialize
9
+ @config = AppConfig.new
10
+ @govuk_pages = YAML.load_file(Govuk::Diff::Pages.govuk_pages_file)
11
+ @gallery_template = File.read(
12
+ "#{Govuk::Diff::Pages.root_dir}/diff/pages/html_diff/assets/gallery_template.erb"
13
+ )
14
+ @differ = Differ.new(@config)
15
+ end
16
+
17
+ def run
18
+ @govuk_pages.each do |page|
19
+ @differ.diff(page)
20
+ end
21
+ create_gallery_page
22
+ end
23
+
24
+ private
25
+ def create_gallery_page
26
+ @result_hash = @differ.differing_pages
27
+ shots_dir = "#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}"
28
+ Dir.mkdir(shots_dir) unless Dir.exist?(shots_dir)
29
+ renderer = ERB.new(@gallery_template)
30
+ File.open("#{shots_dir}/gallery.html", "w") do |fp|
31
+ fp.puts renderer.result(binding)
32
+ end
33
+ display_browser_message(shots_dir)
34
+ end
35
+
36
+ def display_browser_message(shots_dir)
37
+ puts "View the gallery of HTML diffs in your browser:"
38
+ puts " file://#{shots_dir}/gallery.html"
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,47 @@
1
+ require 'yaml'
2
+ require 'rest-client'
3
+
4
+ module Govuk
5
+ module Diff
6
+ module Pages
7
+ class LinkChecker
8
+ def initialize
9
+ @urls = YAML.load_file(Govuk::Diff::Pages.govuk_pages_file)
10
+ @config = AppConfig.new
11
+ @results = Hash.new { |hash, key| hash[key] = Array.new }
12
+ @num_links = 0
13
+ @url_checker = UrlChecker.new(@config)
14
+ end
15
+
16
+ def run
17
+ @urls.each { |u| make_request(u) }
18
+ print_results
19
+ end
20
+
21
+ private
22
+ def print_results
23
+ puts "Number of pages checked: #{@num_links}"
24
+ puts " of which:"
25
+ @results.each do |code, links|
26
+ puts " #{links.size} responded with #{code}"
27
+ end
28
+ end
29
+
30
+ def make_request(url)
31
+ @num_links += 1
32
+ normalized_url = @url_checker.production_url(url)
33
+ begin
34
+ print "GET #{normalized_url} "
35
+ response = RestClient.get "#{normalized_url}"
36
+ @results[response.code] << url
37
+ puts "Response #{response.code}"
38
+ rescue => e
39
+ puts "\nERROR GETTING #{normalized_url}"
40
+ puts "#{e.class} ::: #{e.message}"
41
+ @results['EX'] << url
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,27 @@
1
+ require 'yaml'
2
+
3
+ module Govuk
4
+ module Diff
5
+ module Pages
6
+ class PageIndexer
7
+ def initialize
8
+ @pages = []
9
+ @config = AppConfig.new("#{Govuk::Diff::Pages.root_dir}/config/settings.yml")
10
+ end
11
+
12
+ def run
13
+ formats = FormatSearcher.new(@config).run
14
+ @pages = PageSearcher.new(@config, formats).run
15
+ File.open(Govuk::Diff::Pages.govuk_pages_file, 'w') do |fp|
16
+ fp.puts @pages.sort.to_yaml
17
+ end
18
+ end
19
+
20
+ private
21
+ def get_formats
22
+ @formats = FormatSearcher.new(@config).run
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,46 @@
1
+ require 'rest-client'
2
+ require 'json'
3
+
4
+ module Govuk
5
+ module Diff
6
+ module Pages
7
+ class PageSearcher
8
+ def initialize(config, formats)
9
+ @config = config
10
+ @formats = formats
11
+ @pages = []
12
+ @url_checker = UrlChecker.new(@config)
13
+ end
14
+
15
+ def run
16
+ @formats.each do |format|
17
+ puts "Getting top #{@config.page_indexer.max_pages_per_format} for format #{format}" if verbose?
18
+ @pages << top_pages_for_format(format)
19
+ end
20
+ @pages.flatten!
21
+ end
22
+
23
+ def verbose?
24
+ @config.verbose
25
+ end
26
+
27
+ private
28
+ def top_pages_for_format(format)
29
+ result_set = JSON.parse(result_set_for_format(format))
30
+ extract_top_govuk_pages(result_set)
31
+ end
32
+
33
+ def extract_top_govuk_pages(result_set)
34
+ links = result_set.fetch('results').collect { |result| result['link'] }
35
+ valid_links = links.select { |link| @url_checker.valid?(link) }
36
+ valid_links.slice(0, @config.page_indexer.max_pages_per_format)
37
+ end
38
+
39
+ def result_set_for_format(format)
40
+ url = "#{@config.domains.production}/api/search.json?filter_format=#{format}"
41
+ RestClient.get(url)
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,93 @@
1
+ require 'govuk/diff/pages'
2
+
3
+ namespace :diff do
4
+ desc 'produce visual diffs'
5
+ task visual: ['config:pre_flight_check'] do
6
+ puts "---> Creating Visual Diffs"
7
+ cmd = "wraith capture #{Govuk::Diff::Pages::WRAITH_CONFIG_FILE}"
8
+ puts cmd
9
+ system cmd
10
+ end
11
+
12
+ desc 'produce html diffs'
13
+ task :html do
14
+ Govuk::Diff::Pages::HtmlDiff::Runner.new.run
15
+ end
16
+
17
+ desc 'produce text diffs'
18
+ task :text do
19
+ if ARGV.tap(&:shift).empty?
20
+ abort "You must provide one or more YAML files containing the pages to diff"
21
+ end
22
+
23
+ left = ENV.fetch("LEFT", "www-origin.staging.publishing.service.gov.uk")
24
+ right = ENV.fetch("RIGHT", "www-origin.publishing.service.gov.uk")
25
+
26
+ require 'yaml'
27
+
28
+ ARGV.each do |file|
29
+ Govuk::Diff::Pages::TextDiff::Runner.new(
30
+ pages: YAML.load_file(file),
31
+ left_domain: left,
32
+ right_domain: right
33
+ ).run
34
+ end
35
+ end
36
+ end
37
+
38
+ namespace :config do
39
+ desc "Checks that dependencies are in place"
40
+ task :pre_flight_check do
41
+ puts "Checking required packages installed."
42
+ dependencies_present = true
43
+ {imagemagick: 'convert', phantomjs: 'phantomjs'}.each do |package, binary|
44
+ print "#{package}..... "
45
+ result = %x[ which #{binary} ]
46
+ if result.empty?
47
+ puts "Not found"
48
+ dependencies_present = false
49
+ else
50
+ puts "OK"
51
+ end
52
+ end
53
+ unless dependencies_present
54
+ puts "ERROR: A required dependency is not installed"
55
+ exit 1
56
+ end
57
+ end
58
+
59
+ desc 'merges settings.yml with govuk_pages.yml to produce merged config file for wraith'
60
+ task :wraith do
61
+ puts "---> Generating Wraith config"
62
+ generator = Govuk::Diff::Pages::WraithConfigGenerator.new
63
+ generator.run
64
+ generator.save
65
+ end
66
+
67
+ desc 'update config files with list of pages to diff'
68
+ task :update_page_list do
69
+ puts "---> Updating page list"
70
+ Govuk::Diff::Pages::PageIndexer.new.run
71
+ end
72
+ end
73
+
74
+ namespace :shots do
75
+ desc "clears the screen shots directory"
76
+ task :clear do
77
+ puts "---> Clearing shots directory"
78
+ require 'fileutils'
79
+ config = Govuk::Diff::Pages::AppConfig.new
80
+ [config.wraith.directory, config.html_diff.directory].each do |directory|
81
+ shots_dir = "#{Govuk::Diff::Pages.root_dir}/#{directory}"
82
+ FileUtils.remove_dir shots_dir
83
+ end
84
+ end
85
+ end
86
+
87
+ desc 'Generate config files and run diffs'
88
+ task diff: ['config:update_page_list', 'config:wraith', 'diff:visual', 'diff:html']
89
+
90
+ desc 'checks all URLs are accessible'
91
+ task :check_urls do
92
+ Govuk::Diff::Pages::LinkChecker.new.run
93
+ end
@@ -0,0 +1,13 @@
1
+ module Govuk
2
+ module Diff
3
+ module Pages
4
+ module TextDiff
5
+ autoload :Differ, 'govuk/diff/pages/text_diff/differ'
6
+ autoload :Formatter, 'govuk/diff/pages/text_diff/formatter'
7
+ autoload :Renderer, 'govuk/diff/pages/text_diff/renderer'
8
+ autoload :Retriever, 'govuk/diff/pages/text_diff/retriever'
9
+ autoload :Runner, 'govuk/diff/pages/text_diff/runner'
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ require 'diffy'
2
+
3
+ module Govuk
4
+ module Diff
5
+ module Pages
6
+ module TextDiff
7
+ class Differ
8
+ def diff(left, right)
9
+ Diffy::Diff.new(left, right).to_s
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require 'nokogiri'
2
+
3
+ module Govuk
4
+ module Diff
5
+ module Pages
6
+ module TextDiff
7
+ class Formatter
8
+ def call(html)
9
+ Nokogiri::HTML(html).xpath("//text()").text
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,23 @@
1
+ module Govuk
2
+ module Diff
3
+ module Pages
4
+ module TextDiff
5
+ class Renderer
6
+ SEPARATOR = "\n\n".freeze
7
+
8
+ def initialize(kernel = Kernel)
9
+ @kernel = kernel
10
+ end
11
+
12
+ def call(responses)
13
+ if responses.all?(&:empty?)
14
+ puts 'OK!'
15
+ else
16
+ @kernel.abort responses.join(SEPARATOR)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end