govuk-diff-pages 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +33 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +75 -0
- data/Rakefile +14 -0
- data/config/settings.yml +53 -0
- data/docs/screenshots/gallery.png +0 -0
- data/govuk-diff-pages.gemspec +30 -0
- data/lib/govuk/diff/pages.rb +30 -0
- data/lib/govuk/diff/pages/app_config.rb +61 -0
- data/lib/govuk/diff/pages/format_searcher.rb +37 -0
- data/lib/govuk/diff/pages/html_diff.rb +10 -0
- data/lib/govuk/diff/pages/html_diff/assets/gallery_template.erb +30 -0
- data/lib/govuk/diff/pages/html_diff/assets/html_diff_styles.css +59 -0
- data/lib/govuk/diff/pages/html_diff/assets/html_diff_template.erb +31 -0
- data/lib/govuk/diff/pages/html_diff/differ.rb +84 -0
- data/lib/govuk/diff/pages/html_diff/runner.rb +44 -0
- data/lib/govuk/diff/pages/link_checker.rb +47 -0
- data/lib/govuk/diff/pages/page_indexer.rb +27 -0
- data/lib/govuk/diff/pages/page_searcher.rb +46 -0
- data/lib/govuk/diff/pages/tasks/rakefile.rake +93 -0
- data/lib/govuk/diff/pages/text_diff.rb +13 -0
- data/lib/govuk/diff/pages/text_diff/differ.rb +15 -0
- data/lib/govuk/diff/pages/text_diff/formatter.rb +15 -0
- data/lib/govuk/diff/pages/text_diff/renderer.rb +23 -0
- data/lib/govuk/diff/pages/text_diff/retriever.rb +13 -0
- data/lib/govuk/diff/pages/text_diff/runner.rb +41 -0
- data/lib/govuk/diff/pages/url_checker.rb +36 -0
- data/lib/govuk/diff/pages/version.rb +7 -0
- data/lib/govuk/diff/pages/wraith_config_generator.rb +79 -0
- metadata +189 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d7461d5624d6438b64b89ee14db3a51a49c6789d
|
4
|
+
data.tar.gz: 5575a5c4d1a74062a4d50c72dd6d29b2e2e05293
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9363e7d7ef669768b36d6acea6c8a2301c3dbe0d32dfc51d83f872a803d0a66003e28ceeb13759a4ac07dba798dcc58dece2a0c394ed933a8d7c1e59e6267e41
|
7
|
+
data.tar.gz: abfc0131220eb9ad866edfd83f2b36e7ea958f86cbc4608be01483a9d7c2471ec46a4971c5a8e6d9057c3cdd40b725f2887c056688aad565a55c2e1434227fc3
|
data/.gitignore
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# See https://help.github.com/articles/ignoring-files for more about ignoring files.
|
2
|
+
#
|
3
|
+
# If you find yourself ignoring temporary files generated by your text editor
|
4
|
+
# or operating system, you probably want to add a global ignore instead:
|
5
|
+
# git config --global core.excludesfile '~/.gitignore_global'
|
6
|
+
|
7
|
+
# Ignore bundler config.
|
8
|
+
/.bundle
|
9
|
+
|
10
|
+
# Ignore the default SQLite database.
|
11
|
+
/db/*.sqlite3
|
12
|
+
/db/*.sqlite3-journal
|
13
|
+
|
14
|
+
# Ignore all logfiles and tempfiles.
|
15
|
+
/log/*.log
|
16
|
+
/tmp
|
17
|
+
|
18
|
+
/coverage
|
19
|
+
/spec/reports
|
20
|
+
|
21
|
+
|
22
|
+
# OS-generated files
|
23
|
+
.DS_Store
|
24
|
+
|
25
|
+
# shots generated
|
26
|
+
/shots
|
27
|
+
|
28
|
+
# generated config files
|
29
|
+
config/govuk_pages.yml
|
30
|
+
config/wraith.yaml
|
31
|
+
|
32
|
+
# This is a gem
|
33
|
+
Gemfile.lock
|
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.2.3
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Ben Lovell
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
# govuk-diff-pages
|
2
|
+
|
3
|
+
This app provides a rake task to produce visual diffs as screenshots, HTML
|
4
|
+
diffs and textual diffs of the production GOVUK website as compared with
|
5
|
+
staging. Viewable as browser pages or directly in the terminal. It looks a the
|
6
|
+
10 most popular pages (this is configurable) of each document format.
|
7
|
+
|
8
|
+
## Screenshots
|
9
|
+
|
10
|
+
![Example output](docs/screenshots/gallery.png?raw=true "Example gallery of
|
11
|
+
differing pages")
|
12
|
+
|
13
|
+
|
14
|
+
## Technical documentation for visual and HTML diffs
|
15
|
+
|
16
|
+
It uses a fork of the BBC's wraith gem (The fork is at
|
17
|
+
https://github.com/alphagov/wraith, the main gem at
|
18
|
+
https://github.com/BBC-News/wraith). The fork adds two extra configuration
|
19
|
+
variables, allowing the user to specify the number of threads to use, and the
|
20
|
+
maximum timeout when loading pages. The output is written to an html file
|
21
|
+
which can be viewed in a browser.
|
22
|
+
|
23
|
+
When `bundle exec rake diff` is run, a list of all the document formats on
|
24
|
+
govuk is obtained using the search api, and then the top n pages for each
|
25
|
+
format (n being a configuration variable). Diffs are produced for each of
|
26
|
+
these pages.
|
27
|
+
|
28
|
+
|
29
|
+
### Dependencies
|
30
|
+
|
31
|
+
- [ImageMagick] (http://www.imagemagick.org/script/index.php)
|
32
|
+
- [phantomjs] (http://phantomjs.org/) - preferbaly 1.9 rather than 2.0
|
33
|
+
|
34
|
+
|
35
|
+
## How to run
|
36
|
+
|
37
|
+
### Running the application locally
|
38
|
+
|
39
|
+
bundle exec rake diff
|
40
|
+
|
41
|
+
### Checking plain-text diffs
|
42
|
+
|
43
|
+
bundle exec rake diff:text pages.yml
|
44
|
+
|
45
|
+
Where `pages.yml` is a YAML array of paths to compare. For example:
|
46
|
+
|
47
|
+
- government/organisations/prime-ministers-office-10-downing-street
|
48
|
+
- government/topical-events/budget-2016
|
49
|
+
- topic/competition/regulatory-appeals-references
|
50
|
+
|
51
|
+
Text diffs can also specify the domains to compare using the `LEFT` and `RIGHT`
|
52
|
+
environment variables. Defaulting to our `www-origin.staging` and
|
53
|
+
`www-origin.publishing` domains respectively.
|
54
|
+
|
55
|
+
Plain-text diffing can be parallelised by starting multiple processes with
|
56
|
+
individual page files.
|
57
|
+
|
58
|
+
### Using the gem from an existing project
|
59
|
+
|
60
|
+
# Gemfile
|
61
|
+
gem 'govuk-diff-pages'
|
62
|
+
|
63
|
+
# Rakefile
|
64
|
+
load 'govuk/diff/pages/tasks/rakefile.rake'
|
65
|
+
|
66
|
+
# Shell
|
67
|
+
bundle exec rake -T
|
68
|
+
|
69
|
+
### Running the test suite
|
70
|
+
|
71
|
+
bundle exec rake
|
72
|
+
|
73
|
+
## Licence
|
74
|
+
|
75
|
+
[MIT License](LICENCE)
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "rspec/core/rake_task"
|
2
|
+
require "gem_publisher"
|
3
|
+
|
4
|
+
load File.dirname(__FILE__) + "/lib/govuk/diff/pages/tasks/rakefile.rake"
|
5
|
+
|
6
|
+
RSpec::Core::RakeTask.new(:spec)
|
7
|
+
task default: :spec
|
8
|
+
|
9
|
+
RSpec::Core::RakeTask.new(:spec)
|
10
|
+
|
11
|
+
task :publish_gem do
|
12
|
+
gem = GemPublisher.publish_if_updated("govuk-diff-pages.gemspec", :rubygems)
|
13
|
+
puts "Published #{gem}" if gem
|
14
|
+
end
|
data/config/settings.yml
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
|
2
|
+
# base url for production and staging domains - WITHOUT TRAILING slash
|
3
|
+
domains:
|
4
|
+
production: https://www-origin.publishing.service.gov.uk
|
5
|
+
staging: https://www-origin.staging.publishing.service.gov.uk
|
6
|
+
|
7
|
+
# Enter a list of pages that you want to diff, but are not in the list of pages returned by the
|
8
|
+
# page indexer
|
9
|
+
hard_coded_pages:
|
10
|
+
|
11
|
+
verbose: true
|
12
|
+
|
13
|
+
html_diff:
|
14
|
+
directory: "shots/html"
|
15
|
+
|
16
|
+
page_indexer:
|
17
|
+
# The maximum number of formats to search for.
|
18
|
+
# There are less than 100, so a value of 100 will ensure we capture them all
|
19
|
+
# max_formats: 100 # use this value in production
|
20
|
+
max_formats: 6 # use this value for a quicker test
|
21
|
+
|
22
|
+
# The maximum number of pages for each format - we will capture the <n> most popular pages
|
23
|
+
# max_pages_per_format: 10 # use this value in production
|
24
|
+
max_pages_per_format: 3 # use this value for a quicker test
|
25
|
+
|
26
|
+
# Configuration options for wraith
|
27
|
+
wraith:
|
28
|
+
# Headless browser option
|
29
|
+
browser:
|
30
|
+
phantomjs: phantomjs
|
31
|
+
|
32
|
+
# name of the directory where the screenshots will be stored
|
33
|
+
directory: "shots/visual"
|
34
|
+
|
35
|
+
# Screen widths
|
36
|
+
screen_widths:
|
37
|
+
- 1024
|
38
|
+
|
39
|
+
# Choose how results are displayed, by default alphanumeric. Different screen widths are always grouped.
|
40
|
+
# alphanumeric - all paths (with, and without, a difference) are shown, sorted by path
|
41
|
+
# diffs_first - all paths (with, and without, a difference) are shown, sorted by difference size (largest first)
|
42
|
+
# diffs_only - only paths with a difference are shown, sorted by difference size (largest first)
|
43
|
+
mode: diffs_only
|
44
|
+
|
45
|
+
# Amount of fuzz ImageMagick will use
|
46
|
+
fuzz: "5%"
|
47
|
+
threshold: 5
|
48
|
+
|
49
|
+
# number of threads to use (reduce this number if there are timeouts getting page captures)
|
50
|
+
num_threads: 4
|
51
|
+
|
52
|
+
# timeout in milliseconds for browser to wait for pages to load
|
53
|
+
timeout_ms: 2000
|
Binary file
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'govuk/diff/pages/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "govuk-diff-pages"
|
8
|
+
spec.version = Govuk::Diff::Pages::VERSION
|
9
|
+
spec.authors = ["Ben Lovell"]
|
10
|
+
spec.email = ["benjamin.lovell@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Visual and textual page diffing.}
|
13
|
+
spec.description = %q{Diffs web pages both visually and textually.}
|
14
|
+
spec.homepage = "https://github.com/alphagov/govuk-diff-pages"
|
15
|
+
spec.license = "MIT"
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
spec.bindir = "exe"
|
18
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "diffy", "~> 3.1"
|
22
|
+
spec.add_dependency "rest-client", "~> 1.8"
|
23
|
+
spec.add_dependency "nokogiri"
|
24
|
+
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
28
|
+
spec.add_development_dependency "pry-byebug"
|
29
|
+
spec.add_development_dependency "gem_publisher", "~> 1.1"
|
30
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require "govuk/diff/pages/version"
|
2
|
+
|
3
|
+
module Govuk
|
4
|
+
module Diff
|
5
|
+
module Pages
|
6
|
+
autoload :AppConfig, 'govuk/diff/pages/app_config'
|
7
|
+
autoload :FormatSearcher, 'govuk/diff/pages/format_searcher'
|
8
|
+
autoload :LinkChecker, 'govuk/diff/pages/link_checker'
|
9
|
+
autoload :PageIndexer, 'govuk/diff/pages/page_indexer'
|
10
|
+
autoload :PageSearcher, 'govuk/diff/pages/page_searcher'
|
11
|
+
autoload :UrlChecker, 'govuk/diff/pages/url_checker'
|
12
|
+
autoload :WraithConfigGenerator, 'govuk/diff/pages/wraith_config_generator'
|
13
|
+
|
14
|
+
autoload :HtmlDiff, 'govuk/diff/pages/html_diff'
|
15
|
+
autoload :TextDiff, 'govuk/diff/pages/text_diff'
|
16
|
+
|
17
|
+
def self.root_dir
|
18
|
+
File.dirname __dir__
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.govuk_pages_file
|
22
|
+
File.expand_path(root_dir + '/../../config/govuk_pages.yml')
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.wraith_config_file
|
26
|
+
File.expand_path(root_dir + '/../../config/wraith.yaml')
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
module Govuk
|
5
|
+
module Diff
|
6
|
+
module Pages
|
7
|
+
class AppConfig
|
8
|
+
class MissingKeyError < RuntimeError; end
|
9
|
+
|
10
|
+
# param can be one of the following:
|
11
|
+
# * a file path - the config will be loaded from the specified yaml file
|
12
|
+
# * a Hash - the config will be loaded directly from the hash,
|
13
|
+
# * nil - the config will be loaded from the default yaml file (config/settings.yml)
|
14
|
+
#
|
15
|
+
def initialize(path_or_hash = nil)
|
16
|
+
if path_or_hash.is_a?(Hash)
|
17
|
+
@config = populate_config(path_or_hash)
|
18
|
+
else
|
19
|
+
path_or_hash ||= "#{Govuk::Diff::Pages.root_dir}/config/settings.yml"
|
20
|
+
hash = YAML.load_file(path_or_hash)
|
21
|
+
@config = populate_config(hash)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def method_missing(method, *_params)
|
26
|
+
result = @config.public_send(method)
|
27
|
+
raise MissingKeyError.new "No such config key '#{method}'" if result.nil?
|
28
|
+
result = nil if result == :nil_value
|
29
|
+
result
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_h
|
33
|
+
result = {}
|
34
|
+
@config.to_h.each do |key, value|
|
35
|
+
value = nil if value == :nil_value
|
36
|
+
if value.is_a?(AppConfig)
|
37
|
+
result[key.to_s] = value.to_h
|
38
|
+
else
|
39
|
+
result[key.to_s] = value
|
40
|
+
end
|
41
|
+
end
|
42
|
+
result
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def populate_config(hash)
|
47
|
+
config = OpenStruct.new
|
48
|
+
hash.each do |key, value|
|
49
|
+
if value.is_a?(Hash)
|
50
|
+
config[key] = AppConfig.new(value)
|
51
|
+
else
|
52
|
+
value = :nil_value if value.nil?
|
53
|
+
config[key] = value
|
54
|
+
end
|
55
|
+
end
|
56
|
+
config
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'rest-client'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module Govuk
|
5
|
+
module Diff
|
6
|
+
module Pages
|
7
|
+
class FormatSearcher
|
8
|
+
def initialize(config)
|
9
|
+
@config = config
|
10
|
+
end
|
11
|
+
|
12
|
+
def run
|
13
|
+
puts "Getting list of formats in GOVUK" if verbose?
|
14
|
+
response = get_facets
|
15
|
+
extract_formats(response)
|
16
|
+
end
|
17
|
+
|
18
|
+
def verbose?
|
19
|
+
@config.verbose
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def extract_formats(response)
|
25
|
+
parsed_response = JSON.parse(response)
|
26
|
+
options = parsed_response.fetch('facets').fetch('format').fetch('options')
|
27
|
+
options.map { |o| o['value']['slug'] }
|
28
|
+
end
|
29
|
+
|
30
|
+
def get_facets
|
31
|
+
url = "#{@config.domains.production}/api/search.json?facet_format=#{@config.page_indexer.max_formats}"
|
32
|
+
RestClient.get(url)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7
|
+
<title>Gallery of HTML Diffs</title>
|
8
|
+
|
9
|
+
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css">
|
10
|
+
<link rel="stylesheet" href="../../lib/html_diff/assets/html_diff_styles.css">
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<div class="container">
|
14
|
+
|
15
|
+
<h1>List of HTML Diffs</h1>
|
16
|
+
<h2><%= @result_hash.size %> pages out of <%= @govuk_pages.size %> compared have differences</h2>
|
17
|
+
|
18
|
+
<div class="row">
|
19
|
+
<div class="col-md-12">
|
20
|
+
<table>
|
21
|
+
<% @result_hash.each do |base_path, filename| %>
|
22
|
+
<tr>
|
23
|
+
<td><%= base_path %></td>
|
24
|
+
<td><a href= 'file://<%= filename %>' target='_blank'>HTML Diff</a></td>
|
25
|
+
</tr>
|
26
|
+
<% end %>
|
27
|
+
</table>
|
28
|
+
</div>
|
29
|
+
</body>
|
30
|
+
</html>
|
@@ -0,0 +1,59 @@
|
|
1
|
+
.diff {
|
2
|
+
overflow: auto;
|
3
|
+
}
|
4
|
+
|
5
|
+
.diff ul {
|
6
|
+
background: #fff;
|
7
|
+
display: table;
|
8
|
+
font-size: 13px;
|
9
|
+
list-style: none;
|
10
|
+
margin: 0;
|
11
|
+
overflow: auto;
|
12
|
+
padding: 0;
|
13
|
+
width: 100%;
|
14
|
+
}
|
15
|
+
|
16
|
+
.diff del, .diff ins {
|
17
|
+
display: block;
|
18
|
+
text-decoration: none;
|
19
|
+
}
|
20
|
+
|
21
|
+
.diff li {
|
22
|
+
display: table-row;
|
23
|
+
height: 1em;
|
24
|
+
margin: 0;
|
25
|
+
padding: 0;
|
26
|
+
}
|
27
|
+
|
28
|
+
.example {
|
29
|
+
padding: 10px;
|
30
|
+
}
|
31
|
+
|
32
|
+
.left {
|
33
|
+
background: #ddefff;
|
34
|
+
}
|
35
|
+
|
36
|
+
.right {
|
37
|
+
background: #af8ed0;
|
38
|
+
}
|
39
|
+
|
40
|
+
.diff li.ins {
|
41
|
+
background: #d8b6fa;
|
42
|
+
}
|
43
|
+
|
44
|
+
.diff li.del {
|
45
|
+
background: #ddefff;
|
46
|
+
}
|
47
|
+
|
48
|
+
.diff del, .diff ins, .diff span {
|
49
|
+
font-family: courier;
|
50
|
+
white-space: pre-wrap;
|
51
|
+
}
|
52
|
+
|
53
|
+
.diff li.diff-comment {
|
54
|
+
display: none;
|
55
|
+
}
|
56
|
+
|
57
|
+
.diff li.diff-block-info {
|
58
|
+
background: none repeat scroll 0 0 gray;
|
59
|
+
}
|