govuk-diff-pages 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +33 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +75 -0
- data/Rakefile +14 -0
- data/config/settings.yml +53 -0
- data/docs/screenshots/gallery.png +0 -0
- data/govuk-diff-pages.gemspec +30 -0
- data/lib/govuk/diff/pages.rb +30 -0
- data/lib/govuk/diff/pages/app_config.rb +61 -0
- data/lib/govuk/diff/pages/format_searcher.rb +37 -0
- data/lib/govuk/diff/pages/html_diff.rb +10 -0
- data/lib/govuk/diff/pages/html_diff/assets/gallery_template.erb +30 -0
- data/lib/govuk/diff/pages/html_diff/assets/html_diff_styles.css +59 -0
- data/lib/govuk/diff/pages/html_diff/assets/html_diff_template.erb +31 -0
- data/lib/govuk/diff/pages/html_diff/differ.rb +84 -0
- data/lib/govuk/diff/pages/html_diff/runner.rb +44 -0
- data/lib/govuk/diff/pages/link_checker.rb +47 -0
- data/lib/govuk/diff/pages/page_indexer.rb +27 -0
- data/lib/govuk/diff/pages/page_searcher.rb +46 -0
- data/lib/govuk/diff/pages/tasks/rakefile.rake +93 -0
- data/lib/govuk/diff/pages/text_diff.rb +13 -0
- data/lib/govuk/diff/pages/text_diff/differ.rb +15 -0
- data/lib/govuk/diff/pages/text_diff/formatter.rb +15 -0
- data/lib/govuk/diff/pages/text_diff/renderer.rb +23 -0
- data/lib/govuk/diff/pages/text_diff/retriever.rb +13 -0
- data/lib/govuk/diff/pages/text_diff/runner.rb +41 -0
- data/lib/govuk/diff/pages/url_checker.rb +36 -0
- data/lib/govuk/diff/pages/version.rb +7 -0
- data/lib/govuk/diff/pages/wraith_config_generator.rb +79 -0
- metadata +189 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7
|
+
<title>HTML diff for <%= base_path %></title>
|
8
|
+
|
9
|
+
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css">
|
10
|
+
<link rel="stylesheet" href="../../lib/html_diff/assets/html_diff_styles.css">
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<div class="container">
|
14
|
+
|
15
|
+
<h1>HTML diff for <%= base_path %></h1>
|
16
|
+
|
17
|
+
<div class="row">
|
18
|
+
<div class="col-md-6">
|
19
|
+
<h2 class='example left'>HTML on production (<a href='<%= production_url(base_path) %>' target='blank'>visit page</a>)</h2>
|
20
|
+
</div>
|
21
|
+
<div class="col-md-6">
|
22
|
+
<h2 class='example right'>HTML on staging (<a href='<%= staging_url(base_path) %>' target='blank'>visit page</a>)</h2>
|
23
|
+
</div>
|
24
|
+
</div>
|
25
|
+
|
26
|
+
<hr/>
|
27
|
+
|
28
|
+
<%= diff_string %>
|
29
|
+
</div>
|
30
|
+
</body>
|
31
|
+
</html>
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'diffy'
|
3
|
+
|
4
|
+
module Govuk
|
5
|
+
module Diff
|
6
|
+
module Pages
|
7
|
+
module HtmlDiff
|
8
|
+
class Differ
|
9
|
+
REPLACEMENTS = {
|
10
|
+
'https://www-origin.staging.publishing.service.gov.uk' => 'https://www.gov.uk',
|
11
|
+
'https://www-origin.publishing.service.gov.uk' => 'https://www.gov.uk',
|
12
|
+
'https://assets-origin.staging.publishing.service.gov.uk' => 'https://assets.digital.cabinet-office.gov.uk',
|
13
|
+
/https:\/\/assets\.digital\.cabinet-office\.gov\.uk\/specialist-frontend\/application-[0-9a-f]{32}\.js/ => 'https://assets.digital.cabinet-office.gov.uk/specialist-frontend/application-7463fa64f198b6568dc121dae41d44b1.js',
|
14
|
+
}
|
15
|
+
|
16
|
+
attr_reader :differing_pages
|
17
|
+
|
18
|
+
def initialize(config)
|
19
|
+
@config = config
|
20
|
+
@template = File.read "#{Govuk::Diff::Pages.root_dir}/diff/pages/html_diff/assets/html_diff_template.erb"
|
21
|
+
@diff_dir = "#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}"
|
22
|
+
reset_html_diffs_dir
|
23
|
+
@differing_pages = {}
|
24
|
+
end
|
25
|
+
|
26
|
+
def diff(base_path)
|
27
|
+
staging_html = get_normalized_html(staging_url(base_path))
|
28
|
+
production_html = get_normalized_html(production_url(base_path))
|
29
|
+
diffy = Diffy::Diff.new(production_html, staging_html, context: 3)
|
30
|
+
unless diffy.diff == ""
|
31
|
+
write_diff_page(base_path, diffy.to_s(:html))
|
32
|
+
@differing_pages[base_path] = html_diff_filename(base_path)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
def reset_html_diffs_dir
|
38
|
+
Dir.mkdir(@diff_dir) unless Dir.exist?(@diff_dir)
|
39
|
+
FileUtils.rm Dir.glob("#{@diff_dir}/*")
|
40
|
+
end
|
41
|
+
|
42
|
+
def write_diff_page(base_path, diff_string)
|
43
|
+
renderer = ERB.new(@template)
|
44
|
+
File.open(html_diff_filename(base_path), "w") do |fp|
|
45
|
+
fp.puts renderer.result(binding)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def html_diff_filename(base_path)
|
50
|
+
"#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}/#{safe_filename(base_path)}.html"
|
51
|
+
end
|
52
|
+
|
53
|
+
def safe_filename(base_path)
|
54
|
+
remove_starting_and_trailing_slash(base_path).tr('/', '.')
|
55
|
+
end
|
56
|
+
|
57
|
+
def remove_starting_and_trailing_slash(base_path)
|
58
|
+
base_path.sub(/^\//, '').sub(/\/$/, '')
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_normalized_html(url)
|
62
|
+
body_html = Nokogiri::HTML(fetch_html(url)).css('body').to_s
|
63
|
+
REPLACEMENTS.each do |original, replacement|
|
64
|
+
body_html.gsub!(original, replacement)
|
65
|
+
end
|
66
|
+
body_html
|
67
|
+
end
|
68
|
+
|
69
|
+
def fetch_html(url)
|
70
|
+
%x[ curl -s #{url} ]
|
71
|
+
end
|
72
|
+
|
73
|
+
def production_url(base_path)
|
74
|
+
"#{@config.domains.production}#{base_path}"
|
75
|
+
end
|
76
|
+
|
77
|
+
def staging_url(base_path)
|
78
|
+
"#{@config.domains.staging}#{base_path}"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Govuk
|
4
|
+
module Diff
|
5
|
+
module Pages
|
6
|
+
module HtmlDiff
|
7
|
+
class Runner
|
8
|
+
def initialize
|
9
|
+
@config = AppConfig.new
|
10
|
+
@govuk_pages = YAML.load_file(Govuk::Diff::Pages.govuk_pages_file)
|
11
|
+
@gallery_template = File.read(
|
12
|
+
"#{Govuk::Diff::Pages.root_dir}/diff/pages/html_diff/assets/gallery_template.erb"
|
13
|
+
)
|
14
|
+
@differ = Differ.new(@config)
|
15
|
+
end
|
16
|
+
|
17
|
+
def run
|
18
|
+
@govuk_pages.each do |page|
|
19
|
+
@differ.diff(page)
|
20
|
+
end
|
21
|
+
create_gallery_page
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def create_gallery_page
|
26
|
+
@result_hash = @differ.differing_pages
|
27
|
+
shots_dir = "#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}"
|
28
|
+
Dir.mkdir(shots_dir) unless Dir.exist?(shots_dir)
|
29
|
+
renderer = ERB.new(@gallery_template)
|
30
|
+
File.open("#{shots_dir}/gallery.html", "w") do |fp|
|
31
|
+
fp.puts renderer.result(binding)
|
32
|
+
end
|
33
|
+
display_browser_message(shots_dir)
|
34
|
+
end
|
35
|
+
|
36
|
+
def display_browser_message(shots_dir)
|
37
|
+
puts "View the gallery of HTML diffs in your browser:"
|
38
|
+
puts " file://#{shots_dir}/gallery.html"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'rest-client'
|
3
|
+
|
4
|
+
module Govuk
|
5
|
+
module Diff
|
6
|
+
module Pages
|
7
|
+
class LinkChecker
|
8
|
+
def initialize
|
9
|
+
@urls = YAML.load_file(Govuk::Diff::Pages.govuk_pages_file)
|
10
|
+
@config = AppConfig.new
|
11
|
+
@results = Hash.new { |hash, key| hash[key] = Array.new }
|
12
|
+
@num_links = 0
|
13
|
+
@url_checker = UrlChecker.new(@config)
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
@urls.each { |u| make_request(u) }
|
18
|
+
print_results
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def print_results
|
23
|
+
puts "Number of pages checked: #{@num_links}"
|
24
|
+
puts " of which:"
|
25
|
+
@results.each do |code, links|
|
26
|
+
puts " #{links.size} responded with #{code}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def make_request(url)
|
31
|
+
@num_links += 1
|
32
|
+
normalized_url = @url_checker.production_url(url)
|
33
|
+
begin
|
34
|
+
print "GET #{normalized_url} "
|
35
|
+
response = RestClient.get "#{normalized_url}"
|
36
|
+
@results[response.code] << url
|
37
|
+
puts "Response #{response.code}"
|
38
|
+
rescue => e
|
39
|
+
puts "\nERROR GETTING #{normalized_url}"
|
40
|
+
puts "#{e.class} ::: #{e.message}"
|
41
|
+
@results['EX'] << url
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Govuk
|
4
|
+
module Diff
|
5
|
+
module Pages
|
6
|
+
class PageIndexer
|
7
|
+
def initialize
|
8
|
+
@pages = []
|
9
|
+
@config = AppConfig.new("#{Govuk::Diff::Pages.root_dir}/config/settings.yml")
|
10
|
+
end
|
11
|
+
|
12
|
+
def run
|
13
|
+
formats = FormatSearcher.new(@config).run
|
14
|
+
@pages = PageSearcher.new(@config, formats).run
|
15
|
+
File.open(Govuk::Diff::Pages.govuk_pages_file, 'w') do |fp|
|
16
|
+
fp.puts @pages.sort.to_yaml
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
def get_formats
|
22
|
+
@formats = FormatSearcher.new(@config).run
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'rest-client'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module Govuk
|
5
|
+
module Diff
|
6
|
+
module Pages
|
7
|
+
class PageSearcher
|
8
|
+
def initialize(config, formats)
|
9
|
+
@config = config
|
10
|
+
@formats = formats
|
11
|
+
@pages = []
|
12
|
+
@url_checker = UrlChecker.new(@config)
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
@formats.each do |format|
|
17
|
+
puts "Getting top #{@config.page_indexer.max_pages_per_format} for format #{format}" if verbose?
|
18
|
+
@pages << top_pages_for_format(format)
|
19
|
+
end
|
20
|
+
@pages.flatten!
|
21
|
+
end
|
22
|
+
|
23
|
+
def verbose?
|
24
|
+
@config.verbose
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
def top_pages_for_format(format)
|
29
|
+
result_set = JSON.parse(result_set_for_format(format))
|
30
|
+
extract_top_govuk_pages(result_set)
|
31
|
+
end
|
32
|
+
|
33
|
+
def extract_top_govuk_pages(result_set)
|
34
|
+
links = result_set.fetch('results').collect { |result| result['link'] }
|
35
|
+
valid_links = links.select { |link| @url_checker.valid?(link) }
|
36
|
+
valid_links.slice(0, @config.page_indexer.max_pages_per_format)
|
37
|
+
end
|
38
|
+
|
39
|
+
def result_set_for_format(format)
|
40
|
+
url = "#{@config.domains.production}/api/search.json?filter_format=#{format}"
|
41
|
+
RestClient.get(url)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'govuk/diff/pages'
|
2
|
+
|
3
|
+
namespace :diff do
|
4
|
+
desc 'produce visual diffs'
|
5
|
+
task visual: ['config:pre_flight_check'] do
|
6
|
+
puts "---> Creating Visual Diffs"
|
7
|
+
cmd = "wraith capture #{Govuk::Diff::Pages::WRAITH_CONFIG_FILE}"
|
8
|
+
puts cmd
|
9
|
+
system cmd
|
10
|
+
end
|
11
|
+
|
12
|
+
desc 'produce html diffs'
|
13
|
+
task :html do
|
14
|
+
Govuk::Diff::Pages::HtmlDiff::Runner.new.run
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'produce text diffs'
|
18
|
+
task :text do
|
19
|
+
if ARGV.tap(&:shift).empty?
|
20
|
+
abort "You must provide one or more YAML files containing the pages to diff"
|
21
|
+
end
|
22
|
+
|
23
|
+
left = ENV.fetch("LEFT", "www-origin.staging.publishing.service.gov.uk")
|
24
|
+
right = ENV.fetch("RIGHT", "www-origin.publishing.service.gov.uk")
|
25
|
+
|
26
|
+
require 'yaml'
|
27
|
+
|
28
|
+
ARGV.each do |file|
|
29
|
+
Govuk::Diff::Pages::TextDiff::Runner.new(
|
30
|
+
pages: YAML.load_file(file),
|
31
|
+
left_domain: left,
|
32
|
+
right_domain: right
|
33
|
+
).run
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
namespace :config do
|
39
|
+
desc "Checks that dependencies are in place"
|
40
|
+
task :pre_flight_check do
|
41
|
+
puts "Checking required packages installed."
|
42
|
+
dependencies_present = true
|
43
|
+
{imagemagick: 'convert', phantomjs: 'phantomjs'}.each do |package, binary|
|
44
|
+
print "#{package}..... "
|
45
|
+
result = %x[ which #{binary} ]
|
46
|
+
if result.empty?
|
47
|
+
puts "Not found"
|
48
|
+
dependencies_present = false
|
49
|
+
else
|
50
|
+
puts "OK"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
unless dependencies_present
|
54
|
+
puts "ERROR: A required dependency is not installed"
|
55
|
+
exit 1
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
desc 'merges settings.yml with govuk_pages.yml to produce merged config file for wraith'
|
60
|
+
task :wraith do
|
61
|
+
puts "---> Generating Wraith config"
|
62
|
+
generator = Govuk::Diff::Pages::WraithConfigGenerator.new
|
63
|
+
generator.run
|
64
|
+
generator.save
|
65
|
+
end
|
66
|
+
|
67
|
+
desc 'update config files with list of pages to diff'
|
68
|
+
task :update_page_list do
|
69
|
+
puts "---> Updating page list"
|
70
|
+
Govuk::Diff::Pages::PageIndexer.new.run
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
namespace :shots do
|
75
|
+
desc "clears the screen shots directory"
|
76
|
+
task :clear do
|
77
|
+
puts "---> Clearing shots directory"
|
78
|
+
require 'fileutils'
|
79
|
+
config = Govuk::Diff::Pages::AppConfig.new
|
80
|
+
[config.wraith.directory, config.html_diff.directory].each do |directory|
|
81
|
+
shots_dir = "#{Govuk::Diff::Pages.root_dir}/#{directory}"
|
82
|
+
FileUtils.remove_dir shots_dir
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
desc 'Generate config files and run diffs'
|
88
|
+
task diff: ['config:update_page_list', 'config:wraith', 'diff:visual', 'diff:html']
|
89
|
+
|
90
|
+
desc 'checks all URLs are accessible'
|
91
|
+
task :check_urls do
|
92
|
+
Govuk::Diff::Pages::LinkChecker.new.run
|
93
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Govuk
|
2
|
+
module Diff
|
3
|
+
module Pages
|
4
|
+
module TextDiff
|
5
|
+
autoload :Differ, 'govuk/diff/pages/text_diff/differ'
|
6
|
+
autoload :Formatter, 'govuk/diff/pages/text_diff/formatter'
|
7
|
+
autoload :Renderer, 'govuk/diff/pages/text_diff/renderer'
|
8
|
+
autoload :Retriever, 'govuk/diff/pages/text_diff/retriever'
|
9
|
+
autoload :Runner, 'govuk/diff/pages/text_diff/runner'
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Govuk
|
2
|
+
module Diff
|
3
|
+
module Pages
|
4
|
+
module TextDiff
|
5
|
+
class Renderer
|
6
|
+
SEPARATOR = "\n\n".freeze
|
7
|
+
|
8
|
+
def initialize(kernel = Kernel)
|
9
|
+
@kernel = kernel
|
10
|
+
end
|
11
|
+
|
12
|
+
def call(responses)
|
13
|
+
if responses.all?(&:empty?)
|
14
|
+
puts 'OK!'
|
15
|
+
else
|
16
|
+
@kernel.abort responses.join(SEPARATOR)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|