govuk-diff-pages 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +33 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +75 -0
  8. data/Rakefile +14 -0
  9. data/config/settings.yml +53 -0
  10. data/docs/screenshots/gallery.png +0 -0
  11. data/govuk-diff-pages.gemspec +30 -0
  12. data/lib/govuk/diff/pages.rb +30 -0
  13. data/lib/govuk/diff/pages/app_config.rb +61 -0
  14. data/lib/govuk/diff/pages/format_searcher.rb +37 -0
  15. data/lib/govuk/diff/pages/html_diff.rb +10 -0
  16. data/lib/govuk/diff/pages/html_diff/assets/gallery_template.erb +30 -0
  17. data/lib/govuk/diff/pages/html_diff/assets/html_diff_styles.css +59 -0
  18. data/lib/govuk/diff/pages/html_diff/assets/html_diff_template.erb +31 -0
  19. data/lib/govuk/diff/pages/html_diff/differ.rb +84 -0
  20. data/lib/govuk/diff/pages/html_diff/runner.rb +44 -0
  21. data/lib/govuk/diff/pages/link_checker.rb +47 -0
  22. data/lib/govuk/diff/pages/page_indexer.rb +27 -0
  23. data/lib/govuk/diff/pages/page_searcher.rb +46 -0
  24. data/lib/govuk/diff/pages/tasks/rakefile.rake +93 -0
  25. data/lib/govuk/diff/pages/text_diff.rb +13 -0
  26. data/lib/govuk/diff/pages/text_diff/differ.rb +15 -0
  27. data/lib/govuk/diff/pages/text_diff/formatter.rb +15 -0
  28. data/lib/govuk/diff/pages/text_diff/renderer.rb +23 -0
  29. data/lib/govuk/diff/pages/text_diff/retriever.rb +13 -0
  30. data/lib/govuk/diff/pages/text_diff/runner.rb +41 -0
  31. data/lib/govuk/diff/pages/url_checker.rb +36 -0
  32. data/lib/govuk/diff/pages/version.rb +7 -0
  33. data/lib/govuk/diff/pages/wraith_config_generator.rb +79 -0
  34. metadata +189 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d7461d5624d6438b64b89ee14db3a51a49c6789d
4
+ data.tar.gz: 5575a5c4d1a74062a4d50c72dd6d29b2e2e05293
5
+ SHA512:
6
+ metadata.gz: 9363e7d7ef669768b36d6acea6c8a2301c3dbe0d32dfc51d83f872a803d0a66003e28ceeb13759a4ac07dba798dcc58dece2a0c394ed933a8d7c1e59e6267e41
7
+ data.tar.gz: abfc0131220eb9ad866edfd83f2b36e7ea958f86cbc4608be01483a9d7c2471ec46a4971c5a8e6d9057c3cdd40b725f2887c056688aad565a55c2e1434227fc3
data/.gitignore ADDED
@@ -0,0 +1,33 @@
1
+ # See https://help.github.com/articles/ignoring-files for more about ignoring files.
2
+ #
3
+ # If you find yourself ignoring temporary files generated by your text editor
4
+ # or operating system, you probably want to add a global ignore instead:
5
+ # git config --global core.excludesfile '~/.gitignore_global'
6
+
7
+ # Ignore bundler config.
8
+ /.bundle
9
+
10
+ # Ignore the default SQLite database.
11
+ /db/*.sqlite3
12
+ /db/*.sqlite3-journal
13
+
14
+ # Ignore all logfiles and tempfiles.
15
+ /log/*.log
16
+ /tmp
17
+
18
+ /coverage
19
+ /spec/reports
20
+
21
+
22
+ # OS-generated files
23
+ .DS_Store
24
+
25
+ # shots generated
26
+ /shots
27
+
28
+ # generated config files
29
+ config/govuk_pages.yml
30
+ config/wraith.yaml
31
+
32
+ # This is a gem
33
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.2.3
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'wraith', git: 'git@github.com:alphagov/wraith.git', branch: 'psr'
4
+
5
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Ben Lovell
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,75 @@
1
+ # govuk-diff-pages
2
+
3
+ This app provides a rake task to produce visual diffs as screenshots, HTML
4
+ diffs and textual diffs of the production GOVUK website as compared with
5
+ staging. Viewable as browser pages or directly in the terminal. It looks a the
6
+ 10 most popular pages (this is configurable) of each document format.
7
+
8
+ ## Screenshots
9
+
10
+ ![Example output](docs/screenshots/gallery.png?raw=true "Example gallery of
11
+ differing pages")
12
+
13
+
14
+ ## Technical documentation for visual and HTML diffs
15
+
16
+ It uses a fork of the BBC's wraith gem (The fork is at
17
+ https://github.com/alphagov/wraith, the main gem at
18
+ https://github.com/BBC-News/wraith). The fork adds two extra configuration
19
+ variables, allowing the user to specify the number of threads to use, and the
20
+ maximum timeout when loading pages. The output is written to an html file
21
+ which can be viewed in a browser.
22
+
23
+ When `bundle exec rake diff` is run, a list of all the document formats on
24
+ govuk is obtained using the search api, and then the top n pages for each
25
+ format (n being a configuration variable). Diffs are produced for each of
26
+ these pages.
27
+
28
+
29
+ ### Dependencies
30
+
31
+ - [ImageMagick] (http://www.imagemagick.org/script/index.php)
32
+ - [phantomjs] (http://phantomjs.org/) - preferbaly 1.9 rather than 2.0
33
+
34
+
35
+ ## How to run
36
+
37
+ ### Running the application locally
38
+
39
+ bundle exec rake diff
40
+
41
+ ### Checking plain-text diffs
42
+
43
+ bundle exec rake diff:text pages.yml
44
+
45
+ Where `pages.yml` is a YAML array of paths to compare. For example:
46
+
47
+ - government/organisations/prime-ministers-office-10-downing-street
48
+ - government/topical-events/budget-2016
49
+ - topic/competition/regulatory-appeals-references
50
+
51
+ Text diffs can also specify the domains to compare using the `LEFT` and `RIGHT`
52
+ environment variables. Defaulting to our `www-origin.staging` and
53
+ `www-origin.publishing` domains respectively.
54
+
55
+ Plain-text diffing can be parallelised by starting multiple processes with
56
+ individual page files.
57
+
58
+ ### Using the gem from an existing project
59
+
60
+ # Gemfile
61
+ gem 'govuk-diff-pages'
62
+
63
+ # Rakefile
64
+ load 'govuk/diff/pages/tasks/rakefile.rake'
65
+
66
+ # Shell
67
+ bundle exec rake -T
68
+
69
+ ### Running the test suite
70
+
71
+ bundle exec rake
72
+
73
+ ## Licence
74
+
75
+ [MIT License](LICENCE)
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require "rspec/core/rake_task"
2
+ require "gem_publisher"
3
+
4
+ load File.dirname(__FILE__) + "/lib/govuk/diff/pages/tasks/rakefile.rake"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+ task default: :spec
8
+
9
+ RSpec::Core::RakeTask.new(:spec)
10
+
11
+ task :publish_gem do
12
+ gem = GemPublisher.publish_if_updated("govuk-diff-pages.gemspec", :rubygems)
13
+ puts "Published #{gem}" if gem
14
+ end
@@ -0,0 +1,53 @@
1
+
2
+ # base url for production and staging domains - WITHOUT TRAILING slash
3
+ domains:
4
+ production: https://www-origin.publishing.service.gov.uk
5
+ staging: https://www-origin.staging.publishing.service.gov.uk
6
+
7
+ # Enter a list of pages that you want to diff, but are not in the list of pages returned by the
8
+ # page indexer
9
+ hard_coded_pages:
10
+
11
+ verbose: true
12
+
13
+ html_diff:
14
+ directory: "shots/html"
15
+
16
+ page_indexer:
17
+ # The maximum number of formats to search for.
18
+ # There are less than 100, so a value of 100 will ensure we capture them all
19
+ # max_formats: 100 # use this value in production
20
+ max_formats: 6 # use this value for a quicker test
21
+
22
+ # The maximum number of pages for each format - we will capture the <n> most popular pages
23
+ # max_pages_per_format: 10 # use this value in production
24
+ max_pages_per_format: 3 # use this value for a quicker test
25
+
26
+ # Configuration options for wraith
27
+ wraith:
28
+ # Headless browser option​
29
+ browser:
30
+ phantomjs: phantomjs
31
+
32
+ # name of the directory where the screenshots will be stored
33
+ directory: "shots/visual"
34
+
35
+ # Screen widths
36
+ screen_widths:
37
+ - 1024
38
+
39
+ # Choose how results are displayed, by default alphanumeric. Different screen widths are always grouped.
40
+ # alphanumeric - all paths (with, and without, a difference) are shown, sorted by path​
41
+ # diffs_first - all paths (with, and without, a difference) are shown, sorted by difference size (largest first)​
42
+ # diffs_only - only paths with a difference are shown, sorted by difference size (largest first)​
43
+ mode: diffs_only
44
+
45
+ # Amount of fuzz ImageMagick will use
46
+ fuzz: "5%"
47
+ threshold: 5
48
+
49
+ # number of threads to use (reduce this number if there are timeouts getting page captures)
50
+ num_threads: 4
51
+
52
+ # timeout in milliseconds for browser to wait for pages to load
53
+ timeout_ms: 2000
Binary file
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'govuk/diff/pages/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "govuk-diff-pages"
8
+ spec.version = Govuk::Diff::Pages::VERSION
9
+ spec.authors = ["Ben Lovell"]
10
+ spec.email = ["benjamin.lovell@gmail.com"]
11
+
12
+ spec.summary = %q{Visual and textual page diffing.}
13
+ spec.description = %q{Diffs web pages both visually and textually.}
14
+ spec.homepage = "https://github.com/alphagov/govuk-diff-pages"
15
+ spec.license = "MIT"
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "diffy", "~> 3.1"
22
+ spec.add_dependency "rest-client", "~> 1.8"
23
+ spec.add_dependency "nokogiri"
24
+
25
+ spec.add_development_dependency "bundler", "~> 1.10"
26
+ spec.add_development_dependency "rake", "~> 10.0"
27
+ spec.add_development_dependency "rspec", "~> 3.0"
28
+ spec.add_development_dependency "pry-byebug"
29
+ spec.add_development_dependency "gem_publisher", "~> 1.1"
30
+ end
@@ -0,0 +1,30 @@
1
+ require "govuk/diff/pages/version"
2
+
3
+ module Govuk
4
+ module Diff
5
+ module Pages
6
+ autoload :AppConfig, 'govuk/diff/pages/app_config'
7
+ autoload :FormatSearcher, 'govuk/diff/pages/format_searcher'
8
+ autoload :LinkChecker, 'govuk/diff/pages/link_checker'
9
+ autoload :PageIndexer, 'govuk/diff/pages/page_indexer'
10
+ autoload :PageSearcher, 'govuk/diff/pages/page_searcher'
11
+ autoload :UrlChecker, 'govuk/diff/pages/url_checker'
12
+ autoload :WraithConfigGenerator, 'govuk/diff/pages/wraith_config_generator'
13
+
14
+ autoload :HtmlDiff, 'govuk/diff/pages/html_diff'
15
+ autoload :TextDiff, 'govuk/diff/pages/text_diff'
16
+
17
+ def self.root_dir
18
+ File.dirname __dir__
19
+ end
20
+
21
+ def self.govuk_pages_file
22
+ File.expand_path(root_dir + '/../../config/govuk_pages.yml')
23
+ end
24
+
25
+ def self.wraith_config_file
26
+ File.expand_path(root_dir + '/../../config/wraith.yaml')
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,61 @@
1
+ require 'yaml'
2
+ require 'ostruct'
3
+
4
+ module Govuk
5
+ module Diff
6
+ module Pages
7
+ class AppConfig
8
+ class MissingKeyError < RuntimeError; end
9
+
10
+ # param can be one of the following:
11
+ # * a file path - the config will be loaded from the specified yaml file
12
+ # * a Hash - the config will be loaded directly from the hash,
13
+ # * nil - the config will be loaded from the default yaml file (config/settings.yml)
14
+ #
15
+ def initialize(path_or_hash = nil)
16
+ if path_or_hash.is_a?(Hash)
17
+ @config = populate_config(path_or_hash)
18
+ else
19
+ path_or_hash ||= "#{Govuk::Diff::Pages.root_dir}/config/settings.yml"
20
+ hash = YAML.load_file(path_or_hash)
21
+ @config = populate_config(hash)
22
+ end
23
+ end
24
+
25
+ def method_missing(method, *_params)
26
+ result = @config.public_send(method)
27
+ raise MissingKeyError.new "No such config key '#{method}'" if result.nil?
28
+ result = nil if result == :nil_value
29
+ result
30
+ end
31
+
32
+ def to_h
33
+ result = {}
34
+ @config.to_h.each do |key, value|
35
+ value = nil if value == :nil_value
36
+ if value.is_a?(AppConfig)
37
+ result[key.to_s] = value.to_h
38
+ else
39
+ result[key.to_s] = value
40
+ end
41
+ end
42
+ result
43
+ end
44
+
45
+ private
46
+ def populate_config(hash)
47
+ config = OpenStruct.new
48
+ hash.each do |key, value|
49
+ if value.is_a?(Hash)
50
+ config[key] = AppConfig.new(value)
51
+ else
52
+ value = :nil_value if value.nil?
53
+ config[key] = value
54
+ end
55
+ end
56
+ config
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,37 @@
1
+ require 'rest-client'
2
+ require 'json'
3
+
4
+ module Govuk
5
+ module Diff
6
+ module Pages
7
+ class FormatSearcher
8
+ def initialize(config)
9
+ @config = config
10
+ end
11
+
12
+ def run
13
+ puts "Getting list of formats in GOVUK" if verbose?
14
+ response = get_facets
15
+ extract_formats(response)
16
+ end
17
+
18
+ def verbose?
19
+ @config.verbose
20
+ end
21
+
22
+ private
23
+
24
+ def extract_formats(response)
25
+ parsed_response = JSON.parse(response)
26
+ options = parsed_response.fetch('facets').fetch('format').fetch('options')
27
+ options.map { |o| o['value']['slug'] }
28
+ end
29
+
30
+ def get_facets
31
+ url = "#{@config.domains.production}/api/search.json?facet_format=#{@config.page_indexer.max_formats}"
32
+ RestClient.get(url)
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,10 @@
1
+ module Govuk
2
+ module Diff
3
+ module Pages
4
+ module HtmlDiff
5
+ autoload :Differ, 'govuk/diff/pages/html_diff/differ'
6
+ autoload :Runner, 'govuk/diff/pages/html_diff/runner'
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,30 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <title>Gallery of HTML Diffs</title>
8
+
9
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css">
10
+ <link rel="stylesheet" href="../../lib/html_diff/assets/html_diff_styles.css">
11
+ </head>
12
+ <body>
13
+ <div class="container">
14
+
15
+ <h1>List of HTML Diffs</h1>
16
+ <h2><%= @result_hash.size %> pages out of <%= @govuk_pages.size %> compared have differences</h2>
17
+
18
+ <div class="row">
19
+ <div class="col-md-12">
20
+ <table>
21
+ <% @result_hash.each do |base_path, filename| %>
22
+ <tr>
23
+ <td><%= base_path %></td>
24
+ <td><a href= 'file://<%= filename %>' target='_blank'>HTML Diff</a></td>
25
+ </tr>
26
+ <% end %>
27
+ </table>
28
+ </div>
29
+ </body>
30
+ </html>
@@ -0,0 +1,59 @@
1
+ .diff {
2
+ overflow: auto;
3
+ }
4
+
5
+ .diff ul {
6
+ background: #fff;
7
+ display: table;
8
+ font-size: 13px;
9
+ list-style: none;
10
+ margin: 0;
11
+ overflow: auto;
12
+ padding: 0;
13
+ width: 100%;
14
+ }
15
+
16
+ .diff del, .diff ins {
17
+ display: block;
18
+ text-decoration: none;
19
+ }
20
+
21
+ .diff li {
22
+ display: table-row;
23
+ height: 1em;
24
+ margin: 0;
25
+ padding: 0;
26
+ }
27
+
28
+ .example {
29
+ padding: 10px;
30
+ }
31
+
32
+ .left {
33
+ background: #ddefff;
34
+ }
35
+
36
+ .right {
37
+ background: #af8ed0;
38
+ }
39
+
40
+ .diff li.ins {
41
+ background: #d8b6fa;
42
+ }
43
+
44
+ .diff li.del {
45
+ background: #ddefff;
46
+ }
47
+
48
+ .diff del, .diff ins, .diff span {
49
+ font-family: courier;
50
+ white-space: pre-wrap;
51
+ }
52
+
53
+ .diff li.diff-comment {
54
+ display: none;
55
+ }
56
+
57
+ .diff li.diff-block-info {
58
+ background: none repeat scroll 0 0 gray;
59
+ }