govuk-diff-pages 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +33 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +75 -0
- data/Rakefile +14 -0
- data/config/settings.yml +53 -0
- data/docs/screenshots/gallery.png +0 -0
- data/govuk-diff-pages.gemspec +30 -0
- data/lib/govuk/diff/pages.rb +30 -0
- data/lib/govuk/diff/pages/app_config.rb +61 -0
- data/lib/govuk/diff/pages/format_searcher.rb +37 -0
- data/lib/govuk/diff/pages/html_diff.rb +10 -0
- data/lib/govuk/diff/pages/html_diff/assets/gallery_template.erb +30 -0
- data/lib/govuk/diff/pages/html_diff/assets/html_diff_styles.css +59 -0
- data/lib/govuk/diff/pages/html_diff/assets/html_diff_template.erb +31 -0
- data/lib/govuk/diff/pages/html_diff/differ.rb +84 -0
- data/lib/govuk/diff/pages/html_diff/runner.rb +44 -0
- data/lib/govuk/diff/pages/link_checker.rb +47 -0
- data/lib/govuk/diff/pages/page_indexer.rb +27 -0
- data/lib/govuk/diff/pages/page_searcher.rb +46 -0
- data/lib/govuk/diff/pages/tasks/rakefile.rake +93 -0
- data/lib/govuk/diff/pages/text_diff.rb +13 -0
- data/lib/govuk/diff/pages/text_diff/differ.rb +15 -0
- data/lib/govuk/diff/pages/text_diff/formatter.rb +15 -0
- data/lib/govuk/diff/pages/text_diff/renderer.rb +23 -0
- data/lib/govuk/diff/pages/text_diff/retriever.rb +13 -0
- data/lib/govuk/diff/pages/text_diff/runner.rb +41 -0
- data/lib/govuk/diff/pages/url_checker.rb +36 -0
- data/lib/govuk/diff/pages/version.rb +7 -0
- data/lib/govuk/diff/pages/wraith_config_generator.rb +79 -0
- metadata +189 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7
|
+
<title>HTML diff for <%= base_path %></title>
|
8
|
+
|
9
|
+
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css">
|
10
|
+
<link rel="stylesheet" href="../../lib/html_diff/assets/html_diff_styles.css">
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<div class="container">
|
14
|
+
|
15
|
+
<h1>HTML diff for <%= base_path %></h1>
|
16
|
+
|
17
|
+
<div class="row">
|
18
|
+
<div class="col-md-6">
|
19
|
+
<h2 class='example left'>HTML on production (<a href='<%= production_url(base_path) %>' target='blank'>visit page</a>)</h2>
|
20
|
+
</div>
|
21
|
+
<div class="col-md-6">
|
22
|
+
<h2 class='example right'>HTML on staging (<a href='<%= staging_url(base_path) %>' target='blank'>visit page</a>)</h2>
|
23
|
+
</div>
|
24
|
+
</div>
|
25
|
+
|
26
|
+
<hr/>
|
27
|
+
|
28
|
+
<%= diff_string %>
|
29
|
+
</div>
|
30
|
+
</body>
|
31
|
+
</html>
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'diffy'
|
3
|
+
|
4
|
+
module Govuk
|
5
|
+
module Diff
|
6
|
+
module Pages
|
7
|
+
module HtmlDiff
|
8
|
+
class Differ
|
9
|
+
REPLACEMENTS = {
|
10
|
+
'https://www-origin.staging.publishing.service.gov.uk' => 'https://www.gov.uk',
|
11
|
+
'https://www-origin.publishing.service.gov.uk' => 'https://www.gov.uk',
|
12
|
+
'https://assets-origin.staging.publishing.service.gov.uk' => 'https://assets.digital.cabinet-office.gov.uk',
|
13
|
+
/https:\/\/assets\.digital\.cabinet-office\.gov\.uk\/specialist-frontend\/application-[0-9a-f]{32}\.js/ => 'https://assets.digital.cabinet-office.gov.uk/specialist-frontend/application-7463fa64f198b6568dc121dae41d44b1.js',
|
14
|
+
}
|
15
|
+
|
16
|
+
attr_reader :differing_pages
|
17
|
+
|
18
|
+
def initialize(config)
|
19
|
+
@config = config
|
20
|
+
@template = File.read "#{Govuk::Diff::Pages.root_dir}/diff/pages/html_diff/assets/html_diff_template.erb"
|
21
|
+
@diff_dir = "#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}"
|
22
|
+
reset_html_diffs_dir
|
23
|
+
@differing_pages = {}
|
24
|
+
end
|
25
|
+
|
26
|
+
def diff(base_path)
|
27
|
+
staging_html = get_normalized_html(staging_url(base_path))
|
28
|
+
production_html = get_normalized_html(production_url(base_path))
|
29
|
+
diffy = Diffy::Diff.new(production_html, staging_html, context: 3)
|
30
|
+
unless diffy.diff == ""
|
31
|
+
write_diff_page(base_path, diffy.to_s(:html))
|
32
|
+
@differing_pages[base_path] = html_diff_filename(base_path)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
def reset_html_diffs_dir
|
38
|
+
Dir.mkdir(@diff_dir) unless Dir.exist?(@diff_dir)
|
39
|
+
FileUtils.rm Dir.glob("#{@diff_dir}/*")
|
40
|
+
end
|
41
|
+
|
42
|
+
def write_diff_page(base_path, diff_string)
|
43
|
+
renderer = ERB.new(@template)
|
44
|
+
File.open(html_diff_filename(base_path), "w") do |fp|
|
45
|
+
fp.puts renderer.result(binding)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def html_diff_filename(base_path)
|
50
|
+
"#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}/#{safe_filename(base_path)}.html"
|
51
|
+
end
|
52
|
+
|
53
|
+
def safe_filename(base_path)
|
54
|
+
remove_starting_and_trailing_slash(base_path).tr('/', '.')
|
55
|
+
end
|
56
|
+
|
57
|
+
def remove_starting_and_trailing_slash(base_path)
|
58
|
+
base_path.sub(/^\//, '').sub(/\/$/, '')
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_normalized_html(url)
|
62
|
+
body_html = Nokogiri::HTML(fetch_html(url)).css('body').to_s
|
63
|
+
REPLACEMENTS.each do |original, replacement|
|
64
|
+
body_html.gsub!(original, replacement)
|
65
|
+
end
|
66
|
+
body_html
|
67
|
+
end
|
68
|
+
|
69
|
+
def fetch_html(url)
|
70
|
+
%x[ curl -s #{url} ]
|
71
|
+
end
|
72
|
+
|
73
|
+
def production_url(base_path)
|
74
|
+
"#{@config.domains.production}#{base_path}"
|
75
|
+
end
|
76
|
+
|
77
|
+
def staging_url(base_path)
|
78
|
+
"#{@config.domains.staging}#{base_path}"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Govuk
|
4
|
+
module Diff
|
5
|
+
module Pages
|
6
|
+
module HtmlDiff
|
7
|
+
class Runner
|
8
|
+
def initialize
|
9
|
+
@config = AppConfig.new
|
10
|
+
@govuk_pages = YAML.load_file(Govuk::Diff::Pages.govuk_pages_file)
|
11
|
+
@gallery_template = File.read(
|
12
|
+
"#{Govuk::Diff::Pages.root_dir}/diff/pages/html_diff/assets/gallery_template.erb"
|
13
|
+
)
|
14
|
+
@differ = Differ.new(@config)
|
15
|
+
end
|
16
|
+
|
17
|
+
def run
|
18
|
+
@govuk_pages.each do |page|
|
19
|
+
@differ.diff(page)
|
20
|
+
end
|
21
|
+
create_gallery_page
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def create_gallery_page
|
26
|
+
@result_hash = @differ.differing_pages
|
27
|
+
shots_dir = "#{Govuk::Diff::Pages.root_dir}/#{@config.html_diff.directory}"
|
28
|
+
Dir.mkdir(shots_dir) unless Dir.exist?(shots_dir)
|
29
|
+
renderer = ERB.new(@gallery_template)
|
30
|
+
File.open("#{shots_dir}/gallery.html", "w") do |fp|
|
31
|
+
fp.puts renderer.result(binding)
|
32
|
+
end
|
33
|
+
display_browser_message(shots_dir)
|
34
|
+
end
|
35
|
+
|
36
|
+
def display_browser_message(shots_dir)
|
37
|
+
puts "View the gallery of HTML diffs in your browser:"
|
38
|
+
puts " file://#{shots_dir}/gallery.html"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'rest-client'
|
3
|
+
|
4
|
+
module Govuk
|
5
|
+
module Diff
|
6
|
+
module Pages
|
7
|
+
class LinkChecker
|
8
|
+
def initialize
|
9
|
+
@urls = YAML.load_file(Govuk::Diff::Pages.govuk_pages_file)
|
10
|
+
@config = AppConfig.new
|
11
|
+
@results = Hash.new { |hash, key| hash[key] = Array.new }
|
12
|
+
@num_links = 0
|
13
|
+
@url_checker = UrlChecker.new(@config)
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
@urls.each { |u| make_request(u) }
|
18
|
+
print_results
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def print_results
|
23
|
+
puts "Number of pages checked: #{@num_links}"
|
24
|
+
puts " of which:"
|
25
|
+
@results.each do |code, links|
|
26
|
+
puts " #{links.size} responded with #{code}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def make_request(url)
|
31
|
+
@num_links += 1
|
32
|
+
normalized_url = @url_checker.production_url(url)
|
33
|
+
begin
|
34
|
+
print "GET #{normalized_url} "
|
35
|
+
response = RestClient.get "#{normalized_url}"
|
36
|
+
@results[response.code] << url
|
37
|
+
puts "Response #{response.code}"
|
38
|
+
rescue => e
|
39
|
+
puts "\nERROR GETTING #{normalized_url}"
|
40
|
+
puts "#{e.class} ::: #{e.message}"
|
41
|
+
@results['EX'] << url
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Govuk
|
4
|
+
module Diff
|
5
|
+
module Pages
|
6
|
+
class PageIndexer
|
7
|
+
def initialize
|
8
|
+
@pages = []
|
9
|
+
@config = AppConfig.new("#{Govuk::Diff::Pages.root_dir}/config/settings.yml")
|
10
|
+
end
|
11
|
+
|
12
|
+
def run
|
13
|
+
formats = FormatSearcher.new(@config).run
|
14
|
+
@pages = PageSearcher.new(@config, formats).run
|
15
|
+
File.open(Govuk::Diff::Pages.govuk_pages_file, 'w') do |fp|
|
16
|
+
fp.puts @pages.sort.to_yaml
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
def get_formats
|
22
|
+
@formats = FormatSearcher.new(@config).run
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'rest-client'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module Govuk
|
5
|
+
module Diff
|
6
|
+
module Pages
|
7
|
+
class PageSearcher
|
8
|
+
def initialize(config, formats)
|
9
|
+
@config = config
|
10
|
+
@formats = formats
|
11
|
+
@pages = []
|
12
|
+
@url_checker = UrlChecker.new(@config)
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
@formats.each do |format|
|
17
|
+
puts "Getting top #{@config.page_indexer.max_pages_per_format} for format #{format}" if verbose?
|
18
|
+
@pages << top_pages_for_format(format)
|
19
|
+
end
|
20
|
+
@pages.flatten!
|
21
|
+
end
|
22
|
+
|
23
|
+
def verbose?
|
24
|
+
@config.verbose
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
def top_pages_for_format(format)
|
29
|
+
result_set = JSON.parse(result_set_for_format(format))
|
30
|
+
extract_top_govuk_pages(result_set)
|
31
|
+
end
|
32
|
+
|
33
|
+
def extract_top_govuk_pages(result_set)
|
34
|
+
links = result_set.fetch('results').collect { |result| result['link'] }
|
35
|
+
valid_links = links.select { |link| @url_checker.valid?(link) }
|
36
|
+
valid_links.slice(0, @config.page_indexer.max_pages_per_format)
|
37
|
+
end
|
38
|
+
|
39
|
+
def result_set_for_format(format)
|
40
|
+
url = "#{@config.domains.production}/api/search.json?filter_format=#{format}"
|
41
|
+
RestClient.get(url)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'govuk/diff/pages'
|
2
|
+
|
3
|
+
namespace :diff do
|
4
|
+
desc 'produce visual diffs'
|
5
|
+
task visual: ['config:pre_flight_check'] do
|
6
|
+
puts "---> Creating Visual Diffs"
|
7
|
+
cmd = "wraith capture #{Govuk::Diff::Pages::WRAITH_CONFIG_FILE}"
|
8
|
+
puts cmd
|
9
|
+
system cmd
|
10
|
+
end
|
11
|
+
|
12
|
+
desc 'produce html diffs'
|
13
|
+
task :html do
|
14
|
+
Govuk::Diff::Pages::HtmlDiff::Runner.new.run
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'produce text diffs'
|
18
|
+
task :text do
|
19
|
+
if ARGV.tap(&:shift).empty?
|
20
|
+
abort "You must provide one or more YAML files containing the pages to diff"
|
21
|
+
end
|
22
|
+
|
23
|
+
left = ENV.fetch("LEFT", "www-origin.staging.publishing.service.gov.uk")
|
24
|
+
right = ENV.fetch("RIGHT", "www-origin.publishing.service.gov.uk")
|
25
|
+
|
26
|
+
require 'yaml'
|
27
|
+
|
28
|
+
ARGV.each do |file|
|
29
|
+
Govuk::Diff::Pages::TextDiff::Runner.new(
|
30
|
+
pages: YAML.load_file(file),
|
31
|
+
left_domain: left,
|
32
|
+
right_domain: right
|
33
|
+
).run
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
namespace :config do
|
39
|
+
desc "Checks that dependencies are in place"
|
40
|
+
task :pre_flight_check do
|
41
|
+
puts "Checking required packages installed."
|
42
|
+
dependencies_present = true
|
43
|
+
{imagemagick: 'convert', phantomjs: 'phantomjs'}.each do |package, binary|
|
44
|
+
print "#{package}..... "
|
45
|
+
result = %x[ which #{binary} ]
|
46
|
+
if result.empty?
|
47
|
+
puts "Not found"
|
48
|
+
dependencies_present = false
|
49
|
+
else
|
50
|
+
puts "OK"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
unless dependencies_present
|
54
|
+
puts "ERROR: A required dependency is not installed"
|
55
|
+
exit 1
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
desc 'merges settings.yml with govuk_pages.yml to produce merged config file for wraith'
|
60
|
+
task :wraith do
|
61
|
+
puts "---> Generating Wraith config"
|
62
|
+
generator = Govuk::Diff::Pages::WraithConfigGenerator.new
|
63
|
+
generator.run
|
64
|
+
generator.save
|
65
|
+
end
|
66
|
+
|
67
|
+
desc 'update config files with list of pages to diff'
|
68
|
+
task :update_page_list do
|
69
|
+
puts "---> Updating page list"
|
70
|
+
Govuk::Diff::Pages::PageIndexer.new.run
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
namespace :shots do
|
75
|
+
desc "clears the screen shots directory"
|
76
|
+
task :clear do
|
77
|
+
puts "---> Clearing shots directory"
|
78
|
+
require 'fileutils'
|
79
|
+
config = Govuk::Diff::Pages::AppConfig.new
|
80
|
+
[config.wraith.directory, config.html_diff.directory].each do |directory|
|
81
|
+
shots_dir = "#{Govuk::Diff::Pages.root_dir}/#{directory}"
|
82
|
+
FileUtils.remove_dir shots_dir
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
desc 'Generate config files and run diffs'
|
88
|
+
task diff: ['config:update_page_list', 'config:wraith', 'diff:visual', 'diff:html']
|
89
|
+
|
90
|
+
desc 'checks all URLs are accessible'
|
91
|
+
task :check_urls do
|
92
|
+
Govuk::Diff::Pages::LinkChecker.new.run
|
93
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Govuk
|
2
|
+
module Diff
|
3
|
+
module Pages
|
4
|
+
module TextDiff
|
5
|
+
autoload :Differ, 'govuk/diff/pages/text_diff/differ'
|
6
|
+
autoload :Formatter, 'govuk/diff/pages/text_diff/formatter'
|
7
|
+
autoload :Renderer, 'govuk/diff/pages/text_diff/renderer'
|
8
|
+
autoload :Retriever, 'govuk/diff/pages/text_diff/retriever'
|
9
|
+
autoload :Runner, 'govuk/diff/pages/text_diff/runner'
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Govuk
|
2
|
+
module Diff
|
3
|
+
module Pages
|
4
|
+
module TextDiff
|
5
|
+
class Renderer
|
6
|
+
SEPARATOR = "\n\n".freeze
|
7
|
+
|
8
|
+
def initialize(kernel = Kernel)
|
9
|
+
@kernel = kernel
|
10
|
+
end
|
11
|
+
|
12
|
+
def call(responses)
|
13
|
+
if responses.all?(&:empty?)
|
14
|
+
puts 'OK!'
|
15
|
+
else
|
16
|
+
@kernel.abort responses.join(SEPARATOR)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|