govuk-diff-pages 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +33 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +75 -0
  8. data/Rakefile +14 -0
  9. data/config/settings.yml +53 -0
  10. data/docs/screenshots/gallery.png +0 -0
  11. data/govuk-diff-pages.gemspec +30 -0
  12. data/lib/govuk/diff/pages.rb +30 -0
  13. data/lib/govuk/diff/pages/app_config.rb +61 -0
  14. data/lib/govuk/diff/pages/format_searcher.rb +37 -0
  15. data/lib/govuk/diff/pages/html_diff.rb +10 -0
  16. data/lib/govuk/diff/pages/html_diff/assets/gallery_template.erb +30 -0
  17. data/lib/govuk/diff/pages/html_diff/assets/html_diff_styles.css +59 -0
  18. data/lib/govuk/diff/pages/html_diff/assets/html_diff_template.erb +31 -0
  19. data/lib/govuk/diff/pages/html_diff/differ.rb +84 -0
  20. data/lib/govuk/diff/pages/html_diff/runner.rb +44 -0
  21. data/lib/govuk/diff/pages/link_checker.rb +47 -0
  22. data/lib/govuk/diff/pages/page_indexer.rb +27 -0
  23. data/lib/govuk/diff/pages/page_searcher.rb +46 -0
  24. data/lib/govuk/diff/pages/tasks/rakefile.rake +93 -0
  25. data/lib/govuk/diff/pages/text_diff.rb +13 -0
  26. data/lib/govuk/diff/pages/text_diff/differ.rb +15 -0
  27. data/lib/govuk/diff/pages/text_diff/formatter.rb +15 -0
  28. data/lib/govuk/diff/pages/text_diff/renderer.rb +23 -0
  29. data/lib/govuk/diff/pages/text_diff/retriever.rb +13 -0
  30. data/lib/govuk/diff/pages/text_diff/runner.rb +41 -0
  31. data/lib/govuk/diff/pages/url_checker.rb +36 -0
  32. data/lib/govuk/diff/pages/version.rb +7 -0
  33. data/lib/govuk/diff/pages/wraith_config_generator.rb +79 -0
  34. metadata +189 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d7461d5624d6438b64b89ee14db3a51a49c6789d
4
+ data.tar.gz: 5575a5c4d1a74062a4d50c72dd6d29b2e2e05293
5
+ SHA512:
6
+ metadata.gz: 9363e7d7ef669768b36d6acea6c8a2301c3dbe0d32dfc51d83f872a803d0a66003e28ceeb13759a4ac07dba798dcc58dece2a0c394ed933a8d7c1e59e6267e41
7
+ data.tar.gz: abfc0131220eb9ad866edfd83f2b36e7ea958f86cbc4608be01483a9d7c2471ec46a4971c5a8e6d9057c3cdd40b725f2887c056688aad565a55c2e1434227fc3
data/.gitignore ADDED
@@ -0,0 +1,33 @@
1
+ # See https://help.github.com/articles/ignoring-files for more about ignoring files.
2
+ #
3
+ # If you find yourself ignoring temporary files generated by your text editor
4
+ # or operating system, you probably want to add a global ignore instead:
5
+ # git config --global core.excludesfile '~/.gitignore_global'
6
+
7
+ # Ignore bundler config.
8
+ /.bundle
9
+
10
+ # Ignore the default SQLite database.
11
+ /db/*.sqlite3
12
+ /db/*.sqlite3-journal
13
+
14
+ # Ignore all logfiles and tempfiles.
15
+ /log/*.log
16
+ /tmp
17
+
18
+ /coverage
19
+ /spec/reports
20
+
21
+
22
+ # OS-generated files
23
+ .DS_Store
24
+
25
+ # shots generated
26
+ /shots
27
+
28
+ # generated config files
29
+ config/govuk_pages.yml
30
+ config/wraith.yaml
31
+
32
+ # This is a gem
33
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.2.3
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'wraith', git: 'git@github.com:alphagov/wraith.git', branch: 'psr'
4
+
5
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Ben Lovell
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,75 @@
1
+ # govuk-diff-pages
2
+
3
+ This app provides a rake task to produce visual diffs as screenshots, HTML
4
+ diffs and textual diffs of the production GOVUK website as compared with
5
+ staging. Viewable as browser pages or directly in the terminal. It looks a the
6
+ 10 most popular pages (this is configurable) of each document format.
7
+
8
+ ## Screenshots
9
+
10
+ ![Example output](docs/screenshots/gallery.png?raw=true "Example gallery of
11
+ differing pages")
12
+
13
+
14
+ ## Technical documentation for visual and HTML diffs
15
+
16
+ It uses a fork of the BBC's wraith gem (The fork is at
17
+ https://github.com/alphagov/wraith, the main gem at
18
+ https://github.com/BBC-News/wraith). The fork adds two extra configuration
19
+ variables, allowing the user to specify the number of threads to use, and the
20
+ maximum timeout when loading pages. The output is written to an html file
21
+ which can be viewed in a browser.
22
+
23
+ When `bundle exec rake diff` is run, a list of all the document formats on
24
+ govuk is obtained using the search api, and then the top n pages for each
25
+ format (n being a configuration variable). Diffs are produced for each of
26
+ these pages.
27
+
28
+
29
+ ### Dependencies
30
+
31
+ - [ImageMagick] (http://www.imagemagick.org/script/index.php)
32
+ - [phantomjs] (http://phantomjs.org/) - preferbaly 1.9 rather than 2.0
33
+
34
+
35
+ ## How to run
36
+
37
+ ### Running the application locally
38
+
39
+ bundle exec rake diff
40
+
41
+ ### Checking plain-text diffs
42
+
43
+ bundle exec rake diff:text pages.yml
44
+
45
+ Where `pages.yml` is a YAML array of paths to compare. For example:
46
+
47
+ - government/organisations/prime-ministers-office-10-downing-street
48
+ - government/topical-events/budget-2016
49
+ - topic/competition/regulatory-appeals-references
50
+
51
+ Text diffs can also specify the domains to compare using the `LEFT` and `RIGHT`
52
+ environment variables. Defaulting to our `www-origin.staging` and
53
+ `www-origin.publishing` domains respectively.
54
+
55
+ Plain-text diffing can be parallelised by starting multiple processes with
56
+ individual page files.
57
+
58
+ ### Using the gem from an existing project
59
+
60
+ # Gemfile
61
+ gem 'govuk-diff-pages'
62
+
63
+ # Rakefile
64
+ load 'govuk/diff/pages/tasks/rakefile.rake'
65
+
66
+ # Shell
67
+ bundle exec rake -T
68
+
69
+ ### Running the test suite
70
+
71
+ bundle exec rake
72
+
73
+ ## Licence
74
+
75
+ [MIT License](LICENCE)
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require "rspec/core/rake_task"
2
+ require "gem_publisher"
3
+
4
+ load File.dirname(__FILE__) + "/lib/govuk/diff/pages/tasks/rakefile.rake"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+ task default: :spec
8
+
9
+ RSpec::Core::RakeTask.new(:spec)
10
+
11
+ task :publish_gem do
12
+ gem = GemPublisher.publish_if_updated("govuk-diff-pages.gemspec", :rubygems)
13
+ puts "Published #{gem}" if gem
14
+ end
@@ -0,0 +1,53 @@
1
+
2
+ # base url for production and staging domains - WITHOUT TRAILING slash
3
+ domains:
4
+ production: https://www-origin.publishing.service.gov.uk
5
+ staging: https://www-origin.staging.publishing.service.gov.uk
6
+
7
+ # Enter a list of pages that you want to diff, but are not in the list of pages returned by the
8
+ # page indexer
9
+ hard_coded_pages:
10
+
11
+ verbose: true
12
+
13
+ html_diff:
14
+ directory: "shots/html"
15
+
16
+ page_indexer:
17
+ # The maximum number of formats to search for.
18
+ # There are less than 100, so a value of 100 will ensure we capture them all
19
+ # max_formats: 100 # use this value in production
20
+ max_formats: 6 # use this value for a quicker test
21
+
22
+ # The maximum number of pages for each format - we will capture the <n> most popular pages
23
+ # max_pages_per_format: 10 # use this value in production
24
+ max_pages_per_format: 3 # use this value for a quicker test
25
+
26
+ # Configuration options for wraith
27
+ wraith:
28
+ # Headless browser option​
29
+ browser:
30
+ phantomjs: phantomjs
31
+
32
+ # name of the directory where the screenshots will be stored
33
+ directory: "shots/visual"
34
+
35
+ # Screen widths
36
+ screen_widths:
37
+ - 1024
38
+
39
+ # Choose how results are displayed, by default alphanumeric. Different screen widths are always grouped.
40
+ # alphanumeric - all paths (with, and without, a difference) are shown, sorted by path​
41
+ # diffs_first - all paths (with, and without, a difference) are shown, sorted by difference size (largest first)​
42
+ # diffs_only - only paths with a difference are shown, sorted by difference size (largest first)​
43
+ mode: diffs_only
44
+
45
+ # Amount of fuzz ImageMagick will use
46
+ fuzz: "5%"
47
+ threshold: 5
48
+
49
+ # number of threads to use (reduce this number if there are timeouts getting page captures)
50
+ num_threads: 4
51
+
52
+ # timeout in milliseconds for browser to wait for pages to load
53
+ timeout_ms: 2000
Binary file
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'govuk/diff/pages/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "govuk-diff-pages"
8
+ spec.version = Govuk::Diff::Pages::VERSION
9
+ spec.authors = ["Ben Lovell"]
10
+ spec.email = ["benjamin.lovell@gmail.com"]
11
+
12
+ spec.summary = %q{Visual and textual page diffing.}
13
+ spec.description = %q{Diffs web pages both visually and textually.}
14
+ spec.homepage = "https://github.com/alphagov/govuk-diff-pages"
15
+ spec.license = "MIT"
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "diffy", "~> 3.1"
22
+ spec.add_dependency "rest-client", "~> 1.8"
23
+ spec.add_dependency "nokogiri"
24
+
25
+ spec.add_development_dependency "bundler", "~> 1.10"
26
+ spec.add_development_dependency "rake", "~> 10.0"
27
+ spec.add_development_dependency "rspec", "~> 3.0"
28
+ spec.add_development_dependency "pry-byebug"
29
+ spec.add_development_dependency "gem_publisher", "~> 1.1"
30
+ end
@@ -0,0 +1,30 @@
1
+ require "govuk/diff/pages/version"
2
+
3
+ module Govuk
4
+ module Diff
5
+ module Pages
6
+ autoload :AppConfig, 'govuk/diff/pages/app_config'
7
+ autoload :FormatSearcher, 'govuk/diff/pages/format_searcher'
8
+ autoload :LinkChecker, 'govuk/diff/pages/link_checker'
9
+ autoload :PageIndexer, 'govuk/diff/pages/page_indexer'
10
+ autoload :PageSearcher, 'govuk/diff/pages/page_searcher'
11
+ autoload :UrlChecker, 'govuk/diff/pages/url_checker'
12
+ autoload :WraithConfigGenerator, 'govuk/diff/pages/wraith_config_generator'
13
+
14
+ autoload :HtmlDiff, 'govuk/diff/pages/html_diff'
15
+ autoload :TextDiff, 'govuk/diff/pages/text_diff'
16
+
17
+ def self.root_dir
18
+ File.dirname __dir__
19
+ end
20
+
21
+ def self.govuk_pages_file
22
+ File.expand_path(root_dir + '/../../config/govuk_pages.yml')
23
+ end
24
+
25
+ def self.wraith_config_file
26
+ File.expand_path(root_dir + '/../../config/wraith.yaml')
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,61 @@
1
+ require 'yaml'
2
+ require 'ostruct'
3
+
4
+ module Govuk
5
+ module Diff
6
+ module Pages
7
+ class AppConfig
8
+ class MissingKeyError < RuntimeError; end
9
+
10
+ # param can be one of the following:
11
+ # * a file path - the config will be loaded from the specified yaml file
12
+ # * a Hash - the config will be loaded directly from the hash,
13
+ # * nil - the config will be loaded from the default yaml file (config/settings.yml)
14
+ #
15
+ def initialize(path_or_hash = nil)
16
+ if path_or_hash.is_a?(Hash)
17
+ @config = populate_config(path_or_hash)
18
+ else
19
+ path_or_hash ||= "#{Govuk::Diff::Pages.root_dir}/config/settings.yml"
20
+ hash = YAML.load_file(path_or_hash)
21
+ @config = populate_config(hash)
22
+ end
23
+ end
24
+
25
+ def method_missing(method, *_params)
26
+ result = @config.public_send(method)
27
+ raise MissingKeyError.new "No such config key '#{method}'" if result.nil?
28
+ result = nil if result == :nil_value
29
+ result
30
+ end
31
+
32
+ def to_h
33
+ result = {}
34
+ @config.to_h.each do |key, value|
35
+ value = nil if value == :nil_value
36
+ if value.is_a?(AppConfig)
37
+ result[key.to_s] = value.to_h
38
+ else
39
+ result[key.to_s] = value
40
+ end
41
+ end
42
+ result
43
+ end
44
+
45
+ private
46
+ def populate_config(hash)
47
+ config = OpenStruct.new
48
+ hash.each do |key, value|
49
+ if value.is_a?(Hash)
50
+ config[key] = AppConfig.new(value)
51
+ else
52
+ value = :nil_value if value.nil?
53
+ config[key] = value
54
+ end
55
+ end
56
+ config
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,37 @@
1
+ require 'rest-client'
2
+ require 'json'
3
+
4
+ module Govuk
5
+ module Diff
6
+ module Pages
7
+ class FormatSearcher
8
+ def initialize(config)
9
+ @config = config
10
+ end
11
+
12
+ def run
13
+ puts "Getting list of formats in GOVUK" if verbose?
14
+ response = get_facets
15
+ extract_formats(response)
16
+ end
17
+
18
+ def verbose?
19
+ @config.verbose
20
+ end
21
+
22
+ private
23
+
24
+ def extract_formats(response)
25
+ parsed_response = JSON.parse(response)
26
+ options = parsed_response.fetch('facets').fetch('format').fetch('options')
27
+ options.map { |o| o['value']['slug'] }
28
+ end
29
+
30
+ def get_facets
31
+ url = "#{@config.domains.production}/api/search.json?facet_format=#{@config.page_indexer.max_formats}"
32
+ RestClient.get(url)
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,10 @@
1
+ module Govuk
2
+ module Diff
3
+ module Pages
4
+ module HtmlDiff
5
+ autoload :Differ, 'govuk/diff/pages/html_diff/differ'
6
+ autoload :Runner, 'govuk/diff/pages/html_diff/runner'
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,30 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <title>Gallery of HTML Diffs</title>
8
+
9
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css">
10
+ <link rel="stylesheet" href="../../lib/html_diff/assets/html_diff_styles.css">
11
+ </head>
12
+ <body>
13
+ <div class="container">
14
+
15
+ <h1>List of HTML Diffs</h1>
16
+ <h2><%= @result_hash.size %> pages out of <%= @govuk_pages.size %> compared have differences</h2>
17
+
18
+ <div class="row">
19
+ <div class="col-md-12">
20
+ <table>
21
+ <% @result_hash.each do |base_path, filename| %>
22
+ <tr>
23
+ <td><%= base_path %></td>
24
+ <td><a href= 'file://<%= filename %>' target='_blank'>HTML Diff</a></td>
25
+ </tr>
26
+ <% end %>
27
+ </table>
28
+ </div>
29
+ </body>
30
+ </html>
@@ -0,0 +1,59 @@
1
+ .diff {
2
+ overflow: auto;
3
+ }
4
+
5
+ .diff ul {
6
+ background: #fff;
7
+ display: table;
8
+ font-size: 13px;
9
+ list-style: none;
10
+ margin: 0;
11
+ overflow: auto;
12
+ padding: 0;
13
+ width: 100%;
14
+ }
15
+
16
+ .diff del, .diff ins {
17
+ display: block;
18
+ text-decoration: none;
19
+ }
20
+
21
+ .diff li {
22
+ display: table-row;
23
+ height: 1em;
24
+ margin: 0;
25
+ padding: 0;
26
+ }
27
+
28
+ .example {
29
+ padding: 10px;
30
+ }
31
+
32
+ .left {
33
+ background: #ddefff;
34
+ }
35
+
36
+ .right {
37
+ background: #af8ed0;
38
+ }
39
+
40
+ .diff li.ins {
41
+ background: #d8b6fa;
42
+ }
43
+
44
+ .diff li.del {
45
+ background: #ddefff;
46
+ }
47
+
48
+ .diff del, .diff ins, .diff span {
49
+ font-family: courier;
50
+ white-space: pre-wrap;
51
+ }
52
+
53
+ .diff li.diff-comment {
54
+ display: none;
55
+ }
56
+
57
+ .diff li.diff-block-info {
58
+ background: none repeat scroll 0 0 gray;
59
+ }