govuk-diff-pages 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +33 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +75 -0
- data/Rakefile +14 -0
- data/config/settings.yml +53 -0
- data/docs/screenshots/gallery.png +0 -0
- data/govuk-diff-pages.gemspec +30 -0
- data/lib/govuk/diff/pages.rb +30 -0
- data/lib/govuk/diff/pages/app_config.rb +61 -0
- data/lib/govuk/diff/pages/format_searcher.rb +37 -0
- data/lib/govuk/diff/pages/html_diff.rb +10 -0
- data/lib/govuk/diff/pages/html_diff/assets/gallery_template.erb +30 -0
- data/lib/govuk/diff/pages/html_diff/assets/html_diff_styles.css +59 -0
- data/lib/govuk/diff/pages/html_diff/assets/html_diff_template.erb +31 -0
- data/lib/govuk/diff/pages/html_diff/differ.rb +84 -0
- data/lib/govuk/diff/pages/html_diff/runner.rb +44 -0
- data/lib/govuk/diff/pages/link_checker.rb +47 -0
- data/lib/govuk/diff/pages/page_indexer.rb +27 -0
- data/lib/govuk/diff/pages/page_searcher.rb +46 -0
- data/lib/govuk/diff/pages/tasks/rakefile.rake +93 -0
- data/lib/govuk/diff/pages/text_diff.rb +13 -0
- data/lib/govuk/diff/pages/text_diff/differ.rb +15 -0
- data/lib/govuk/diff/pages/text_diff/formatter.rb +15 -0
- data/lib/govuk/diff/pages/text_diff/renderer.rb +23 -0
- data/lib/govuk/diff/pages/text_diff/retriever.rb +13 -0
- data/lib/govuk/diff/pages/text_diff/runner.rb +41 -0
- data/lib/govuk/diff/pages/url_checker.rb +36 -0
- data/lib/govuk/diff/pages/version.rb +7 -0
- data/lib/govuk/diff/pages/wraith_config_generator.rb +79 -0
- metadata +189 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d7461d5624d6438b64b89ee14db3a51a49c6789d
|
4
|
+
data.tar.gz: 5575a5c4d1a74062a4d50c72dd6d29b2e2e05293
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9363e7d7ef669768b36d6acea6c8a2301c3dbe0d32dfc51d83f872a803d0a66003e28ceeb13759a4ac07dba798dcc58dece2a0c394ed933a8d7c1e59e6267e41
|
7
|
+
data.tar.gz: abfc0131220eb9ad866edfd83f2b36e7ea958f86cbc4608be01483a9d7c2471ec46a4971c5a8e6d9057c3cdd40b725f2887c056688aad565a55c2e1434227fc3
|
data/.gitignore
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# See https://help.github.com/articles/ignoring-files for more about ignoring files.
|
2
|
+
#
|
3
|
+
# If you find yourself ignoring temporary files generated by your text editor
|
4
|
+
# or operating system, you probably want to add a global ignore instead:
|
5
|
+
# git config --global core.excludesfile '~/.gitignore_global'
|
6
|
+
|
7
|
+
# Ignore bundler config.
|
8
|
+
/.bundle
|
9
|
+
|
10
|
+
# Ignore the default SQLite database.
|
11
|
+
/db/*.sqlite3
|
12
|
+
/db/*.sqlite3-journal
|
13
|
+
|
14
|
+
# Ignore all logfiles and tempfiles.
|
15
|
+
/log/*.log
|
16
|
+
/tmp
|
17
|
+
|
18
|
+
/coverage
|
19
|
+
/spec/reports
|
20
|
+
|
21
|
+
|
22
|
+
# OS-generated files
|
23
|
+
.DS_Store
|
24
|
+
|
25
|
+
# shots generated
|
26
|
+
/shots
|
27
|
+
|
28
|
+
# generated config files
|
29
|
+
config/govuk_pages.yml
|
30
|
+
config/wraith.yaml
|
31
|
+
|
32
|
+
# This is a gem
|
33
|
+
Gemfile.lock
|
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.2.3
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Ben Lovell
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
# govuk-diff-pages
|
2
|
+
|
3
|
+
This app provides a rake task to produce visual diffs as screenshots, HTML
|
4
|
+
diffs and textual diffs of the production GOVUK website as compared with
|
5
|
+
staging. Viewable as browser pages or directly in the terminal. It looks a the
|
6
|
+
10 most popular pages (this is configurable) of each document format.
|
7
|
+
|
8
|
+
## Screenshots
|
9
|
+
|
10
|
+

|
12
|
+
|
13
|
+
|
14
|
+
## Technical documentation for visual and HTML diffs
|
15
|
+
|
16
|
+
It uses a fork of the BBC's wraith gem (The fork is at
|
17
|
+
https://github.com/alphagov/wraith, the main gem at
|
18
|
+
https://github.com/BBC-News/wraith). The fork adds two extra configuration
|
19
|
+
variables, allowing the user to specify the number of threads to use, and the
|
20
|
+
maximum timeout when loading pages. The output is written to an html file
|
21
|
+
which can be viewed in a browser.
|
22
|
+
|
23
|
+
When `bundle exec rake diff` is run, a list of all the document formats on
|
24
|
+
govuk is obtained using the search api, and then the top n pages for each
|
25
|
+
format (n being a configuration variable). Diffs are produced for each of
|
26
|
+
these pages.
|
27
|
+
|
28
|
+
|
29
|
+
### Dependencies
|
30
|
+
|
31
|
+
- [ImageMagick] (http://www.imagemagick.org/script/index.php)
|
32
|
+
- [phantomjs] (http://phantomjs.org/) - preferbaly 1.9 rather than 2.0
|
33
|
+
|
34
|
+
|
35
|
+
## How to run
|
36
|
+
|
37
|
+
### Running the application locally
|
38
|
+
|
39
|
+
bundle exec rake diff
|
40
|
+
|
41
|
+
### Checking plain-text diffs
|
42
|
+
|
43
|
+
bundle exec rake diff:text pages.yml
|
44
|
+
|
45
|
+
Where `pages.yml` is a YAML array of paths to compare. For example:
|
46
|
+
|
47
|
+
- government/organisations/prime-ministers-office-10-downing-street
|
48
|
+
- government/topical-events/budget-2016
|
49
|
+
- topic/competition/regulatory-appeals-references
|
50
|
+
|
51
|
+
Text diffs can also specify the domains to compare using the `LEFT` and `RIGHT`
|
52
|
+
environment variables. Defaulting to our `www-origin.staging` and
|
53
|
+
`www-origin.publishing` domains respectively.
|
54
|
+
|
55
|
+
Plain-text diffing can be parallelised by starting multiple processes with
|
56
|
+
individual page files.
|
57
|
+
|
58
|
+
### Using the gem from an existing project
|
59
|
+
|
60
|
+
# Gemfile
|
61
|
+
gem 'govuk-diff-pages'
|
62
|
+
|
63
|
+
# Rakefile
|
64
|
+
load 'govuk/diff/pages/tasks/rakefile.rake'
|
65
|
+
|
66
|
+
# Shell
|
67
|
+
bundle exec rake -T
|
68
|
+
|
69
|
+
### Running the test suite
|
70
|
+
|
71
|
+
bundle exec rake
|
72
|
+
|
73
|
+
## Licence
|
74
|
+
|
75
|
+
[MIT License](LICENCE)
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "rspec/core/rake_task"
|
2
|
+
require "gem_publisher"
|
3
|
+
|
4
|
+
load File.dirname(__FILE__) + "/lib/govuk/diff/pages/tasks/rakefile.rake"
|
5
|
+
|
6
|
+
RSpec::Core::RakeTask.new(:spec)
|
7
|
+
task default: :spec
|
8
|
+
|
9
|
+
RSpec::Core::RakeTask.new(:spec)
|
10
|
+
|
11
|
+
task :publish_gem do
|
12
|
+
gem = GemPublisher.publish_if_updated("govuk-diff-pages.gemspec", :rubygems)
|
13
|
+
puts "Published #{gem}" if gem
|
14
|
+
end
|
data/config/settings.yml
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
|
2
|
+
# base url for production and staging domains - WITHOUT TRAILING slash
|
3
|
+
domains:
|
4
|
+
production: https://www-origin.publishing.service.gov.uk
|
5
|
+
staging: https://www-origin.staging.publishing.service.gov.uk
|
6
|
+
|
7
|
+
# Enter a list of pages that you want to diff, but are not in the list of pages returned by the
|
8
|
+
# page indexer
|
9
|
+
hard_coded_pages:
|
10
|
+
|
11
|
+
verbose: true
|
12
|
+
|
13
|
+
html_diff:
|
14
|
+
directory: "shots/html"
|
15
|
+
|
16
|
+
page_indexer:
|
17
|
+
# The maximum number of formats to search for.
|
18
|
+
# There are less than 100, so a value of 100 will ensure we capture them all
|
19
|
+
# max_formats: 100 # use this value in production
|
20
|
+
max_formats: 6 # use this value for a quicker test
|
21
|
+
|
22
|
+
# The maximum number of pages for each format - we will capture the <n> most popular pages
|
23
|
+
# max_pages_per_format: 10 # use this value in production
|
24
|
+
max_pages_per_format: 3 # use this value for a quicker test
|
25
|
+
|
26
|
+
# Configuration options for wraith
|
27
|
+
wraith:
|
28
|
+
# Headless browser option
|
29
|
+
browser:
|
30
|
+
phantomjs: phantomjs
|
31
|
+
|
32
|
+
# name of the directory where the screenshots will be stored
|
33
|
+
directory: "shots/visual"
|
34
|
+
|
35
|
+
# Screen widths
|
36
|
+
screen_widths:
|
37
|
+
- 1024
|
38
|
+
|
39
|
+
# Choose how results are displayed, by default alphanumeric. Different screen widths are always grouped.
|
40
|
+
# alphanumeric - all paths (with, and without, a difference) are shown, sorted by path
|
41
|
+
# diffs_first - all paths (with, and without, a difference) are shown, sorted by difference size (largest first)
|
42
|
+
# diffs_only - only paths with a difference are shown, sorted by difference size (largest first)
|
43
|
+
mode: diffs_only
|
44
|
+
|
45
|
+
# Amount of fuzz ImageMagick will use
|
46
|
+
fuzz: "5%"
|
47
|
+
threshold: 5
|
48
|
+
|
49
|
+
# number of threads to use (reduce this number if there are timeouts getting page captures)
|
50
|
+
num_threads: 4
|
51
|
+
|
52
|
+
# timeout in milliseconds for browser to wait for pages to load
|
53
|
+
timeout_ms: 2000
|
Binary file
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'govuk/diff/pages/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "govuk-diff-pages"
|
8
|
+
spec.version = Govuk::Diff::Pages::VERSION
|
9
|
+
spec.authors = ["Ben Lovell"]
|
10
|
+
spec.email = ["benjamin.lovell@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Visual and textual page diffing.}
|
13
|
+
spec.description = %q{Diffs web pages both visually and textually.}
|
14
|
+
spec.homepage = "https://github.com/alphagov/govuk-diff-pages"
|
15
|
+
spec.license = "MIT"
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
spec.bindir = "exe"
|
18
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "diffy", "~> 3.1"
|
22
|
+
spec.add_dependency "rest-client", "~> 1.8"
|
23
|
+
spec.add_dependency "nokogiri"
|
24
|
+
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
28
|
+
spec.add_development_dependency "pry-byebug"
|
29
|
+
spec.add_development_dependency "gem_publisher", "~> 1.1"
|
30
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require "govuk/diff/pages/version"
|
2
|
+
|
3
|
+
module Govuk
|
4
|
+
module Diff
|
5
|
+
module Pages
|
6
|
+
autoload :AppConfig, 'govuk/diff/pages/app_config'
|
7
|
+
autoload :FormatSearcher, 'govuk/diff/pages/format_searcher'
|
8
|
+
autoload :LinkChecker, 'govuk/diff/pages/link_checker'
|
9
|
+
autoload :PageIndexer, 'govuk/diff/pages/page_indexer'
|
10
|
+
autoload :PageSearcher, 'govuk/diff/pages/page_searcher'
|
11
|
+
autoload :UrlChecker, 'govuk/diff/pages/url_checker'
|
12
|
+
autoload :WraithConfigGenerator, 'govuk/diff/pages/wraith_config_generator'
|
13
|
+
|
14
|
+
autoload :HtmlDiff, 'govuk/diff/pages/html_diff'
|
15
|
+
autoload :TextDiff, 'govuk/diff/pages/text_diff'
|
16
|
+
|
17
|
+
def self.root_dir
|
18
|
+
File.dirname __dir__
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.govuk_pages_file
|
22
|
+
File.expand_path(root_dir + '/../../config/govuk_pages.yml')
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.wraith_config_file
|
26
|
+
File.expand_path(root_dir + '/../../config/wraith.yaml')
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
module Govuk
|
5
|
+
module Diff
|
6
|
+
module Pages
|
7
|
+
class AppConfig
|
8
|
+
class MissingKeyError < RuntimeError; end
|
9
|
+
|
10
|
+
# param can be one of the following:
|
11
|
+
# * a file path - the config will be loaded from the specified yaml file
|
12
|
+
# * a Hash - the config will be loaded directly from the hash,
|
13
|
+
# * nil - the config will be loaded from the default yaml file (config/settings.yml)
|
14
|
+
#
|
15
|
+
def initialize(path_or_hash = nil)
|
16
|
+
if path_or_hash.is_a?(Hash)
|
17
|
+
@config = populate_config(path_or_hash)
|
18
|
+
else
|
19
|
+
path_or_hash ||= "#{Govuk::Diff::Pages.root_dir}/config/settings.yml"
|
20
|
+
hash = YAML.load_file(path_or_hash)
|
21
|
+
@config = populate_config(hash)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def method_missing(method, *_params)
|
26
|
+
result = @config.public_send(method)
|
27
|
+
raise MissingKeyError.new "No such config key '#{method}'" if result.nil?
|
28
|
+
result = nil if result == :nil_value
|
29
|
+
result
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_h
|
33
|
+
result = {}
|
34
|
+
@config.to_h.each do |key, value|
|
35
|
+
value = nil if value == :nil_value
|
36
|
+
if value.is_a?(AppConfig)
|
37
|
+
result[key.to_s] = value.to_h
|
38
|
+
else
|
39
|
+
result[key.to_s] = value
|
40
|
+
end
|
41
|
+
end
|
42
|
+
result
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def populate_config(hash)
|
47
|
+
config = OpenStruct.new
|
48
|
+
hash.each do |key, value|
|
49
|
+
if value.is_a?(Hash)
|
50
|
+
config[key] = AppConfig.new(value)
|
51
|
+
else
|
52
|
+
value = :nil_value if value.nil?
|
53
|
+
config[key] = value
|
54
|
+
end
|
55
|
+
end
|
56
|
+
config
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'rest-client'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module Govuk
|
5
|
+
module Diff
|
6
|
+
module Pages
|
7
|
+
class FormatSearcher
|
8
|
+
def initialize(config)
|
9
|
+
@config = config
|
10
|
+
end
|
11
|
+
|
12
|
+
def run
|
13
|
+
puts "Getting list of formats in GOVUK" if verbose?
|
14
|
+
response = get_facets
|
15
|
+
extract_formats(response)
|
16
|
+
end
|
17
|
+
|
18
|
+
def verbose?
|
19
|
+
@config.verbose
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def extract_formats(response)
|
25
|
+
parsed_response = JSON.parse(response)
|
26
|
+
options = parsed_response.fetch('facets').fetch('format').fetch('options')
|
27
|
+
options.map { |o| o['value']['slug'] }
|
28
|
+
end
|
29
|
+
|
30
|
+
def get_facets
|
31
|
+
url = "#{@config.domains.production}/api/search.json?facet_format=#{@config.page_indexer.max_formats}"
|
32
|
+
RestClient.get(url)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7
|
+
<title>Gallery of HTML Diffs</title>
|
8
|
+
|
9
|
+
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css">
|
10
|
+
<link rel="stylesheet" href="../../lib/html_diff/assets/html_diff_styles.css">
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<div class="container">
|
14
|
+
|
15
|
+
<h1>List of HTML Diffs</h1>
|
16
|
+
<h2><%= @result_hash.size %> pages out of <%= @govuk_pages.size %> compared have differences</h2>
|
17
|
+
|
18
|
+
<div class="row">
|
19
|
+
<div class="col-md-12">
|
20
|
+
<table>
|
21
|
+
<% @result_hash.each do |base_path, filename| %>
|
22
|
+
<tr>
|
23
|
+
<td><%= base_path %></td>
|
24
|
+
<td><a href= 'file://<%= filename %>' target='_blank'>HTML Diff</a></td>
|
25
|
+
</tr>
|
26
|
+
<% end %>
|
27
|
+
</table>
|
28
|
+
</div>
|
29
|
+
</body>
|
30
|
+
</html>
|
@@ -0,0 +1,59 @@
|
|
1
|
+
.diff {
|
2
|
+
overflow: auto;
|
3
|
+
}
|
4
|
+
|
5
|
+
.diff ul {
|
6
|
+
background: #fff;
|
7
|
+
display: table;
|
8
|
+
font-size: 13px;
|
9
|
+
list-style: none;
|
10
|
+
margin: 0;
|
11
|
+
overflow: auto;
|
12
|
+
padding: 0;
|
13
|
+
width: 100%;
|
14
|
+
}
|
15
|
+
|
16
|
+
.diff del, .diff ins {
|
17
|
+
display: block;
|
18
|
+
text-decoration: none;
|
19
|
+
}
|
20
|
+
|
21
|
+
.diff li {
|
22
|
+
display: table-row;
|
23
|
+
height: 1em;
|
24
|
+
margin: 0;
|
25
|
+
padding: 0;
|
26
|
+
}
|
27
|
+
|
28
|
+
.example {
|
29
|
+
padding: 10px;
|
30
|
+
}
|
31
|
+
|
32
|
+
.left {
|
33
|
+
background: #ddefff;
|
34
|
+
}
|
35
|
+
|
36
|
+
.right {
|
37
|
+
background: #af8ed0;
|
38
|
+
}
|
39
|
+
|
40
|
+
.diff li.ins {
|
41
|
+
background: #d8b6fa;
|
42
|
+
}
|
43
|
+
|
44
|
+
.diff li.del {
|
45
|
+
background: #ddefff;
|
46
|
+
}
|
47
|
+
|
48
|
+
.diff del, .diff ins, .diff span {
|
49
|
+
font-family: courier;
|
50
|
+
white-space: pre-wrap;
|
51
|
+
}
|
52
|
+
|
53
|
+
.diff li.diff-comment {
|
54
|
+
display: none;
|
55
|
+
}
|
56
|
+
|
57
|
+
.diff li.diff-block-info {
|
58
|
+
background: none repeat scroll 0 0 gray;
|
59
|
+
}
|