sitediff 0.0.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.eslintignore +1 -0
- data/.eslintrc.json +28 -0
- data/.project +11 -0
- data/.rubocop.yml +179 -0
- data/.rubocop_todo.yml +51 -0
- data/CHANGELOG.md +28 -0
- data/Dockerfile +33 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +85 -0
- data/INSTALLATION.md +146 -0
- data/LICENSE +339 -0
- data/README.md +810 -0
- data/Rakefile +12 -0
- data/Thorfile +135 -0
- data/bin/sitediff +9 -2
- data/config/.gitkeep +0 -0
- data/config/sanitize_domains.example.yaml +8 -0
- data/config/sitediff.example.yaml +81 -0
- data/docker-compose.test.yml +3 -0
- data/lib/sitediff/api.rb +276 -0
- data/lib/sitediff/cache.rb +57 -8
- data/lib/sitediff/cli.rb +156 -176
- data/lib/sitediff/config/creator.rb +61 -77
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/config.rb +436 -31
- data/lib/sitediff/crawler.rb +27 -21
- data/lib/sitediff/diff.rb +32 -9
- data/lib/sitediff/fetch.rb +10 -3
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +171 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +303 -30
- data/lib/sitediff/files/sitediff.js +367 -0
- data/lib/sitediff/presets/drupal.yaml +63 -0
- data/lib/sitediff/report.rb +254 -0
- data/lib/sitediff/result.rb +50 -20
- data/lib/sitediff/sanitize/dom_transform.rb +47 -8
- data/lib/sitediff/sanitize/regexp.rb +24 -3
- data/lib/sitediff/sanitize.rb +81 -12
- data/lib/sitediff/uriwrapper.rb +65 -23
- data/lib/sitediff/webserver/resultserver.rb +30 -33
- data/lib/sitediff/webserver.rb +15 -3
- data/lib/sitediff.rb +130 -83
- data/misc/sitediff - overview report.png +0 -0
- data/misc/sitediff - page report.png +0 -0
- data/package-lock.json +878 -0
- data/package.json +25 -0
- data/sitediff.gemspec +51 -0
- metadata +91 -29
- data/lib/sitediff/files/html_report.html.erb +0 -66
- data/lib/sitediff/files/rules/drupal.yaml +0 -63
- data/lib/sitediff/rules.rb +0 -65
    
        data/lib/sitediff.rb
    CHANGED
    
    | @@ -2,63 +2,85 @@ | |
| 2 2 | 
             
            # frozen_string_literal: true
         | 
| 3 3 |  | 
| 4 4 | 
             
            require 'sitediff/config'
         | 
| 5 | 
            +
            require 'sitediff/diff'
         | 
| 5 6 | 
             
            require 'sitediff/fetch'
         | 
| 6 7 | 
             
            require 'sitediff/result'
         | 
| 8 | 
            +
            require 'sitediff/report'
         | 
| 7 9 | 
             
            require 'pathname'
         | 
| 8 10 | 
             
            require 'rainbow'
         | 
| 11 | 
            +
            require 'rubygems'
         | 
| 9 12 | 
             
            require 'yaml'
         | 
| 10 13 |  | 
| 14 | 
            +
            # SiteDiff Object.
         | 
| 11 15 | 
             
            class SiteDiff
         | 
| 12 | 
            -
               | 
| 16 | 
            +
              attr_reader :config, :results
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              # SiteDiff installation directory.
         | 
| 19 | 
            +
              ROOT_DIR = File.dirname(File.dirname(__FILE__))
         | 
| 20 | 
            +
             | 
| 21 | 
            +
              # Path to misc files. Ex: *.erb, *.css.
         | 
| 13 22 | 
             
              FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
         | 
| 14 23 |  | 
| 15 | 
            -
              #  | 
| 16 | 
            -
               | 
| 17 | 
            -
             | 
| 18 | 
            -
              #  | 
| 19 | 
            -
               | 
| 20 | 
            -
               | 
| 21 | 
            -
               | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
                bg = fg = nil
         | 
| 28 | 
            -
                case type
         | 
| 29 | 
            -
                when :info
         | 
| 30 | 
            -
                  bg = fg = nil
         | 
| 31 | 
            -
                when :diff_success
         | 
| 32 | 
            -
                  bg = :green
         | 
| 24 | 
            +
              # Logs a message.
         | 
| 25 | 
            +
              #
         | 
| 26 | 
            +
              # Label will be colorized and message will not.
         | 
| 27 | 
            +
              # Type dictates the color: can be :success, :error, or :failure.
         | 
| 28 | 
            +
              #
         | 
| 29 | 
            +
              # TODO: Only print :debug messages in debug mode.
         | 
| 30 | 
            +
              def self.log(message, type = :info, label = nil)
         | 
| 31 | 
            +
                # Prepare label.
         | 
| 32 | 
            +
                label ||= type unless type == :info
         | 
| 33 | 
            +
                label = label.to_s
         | 
| 34 | 
            +
                unless label.empty?
         | 
| 35 | 
            +
                  # Colorize label.
         | 
| 33 36 | 
             
                  fg = :black
         | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 37 | 
            -
                   | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 37 | 
            +
                  bg = :blue
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                  case type
         | 
| 40 | 
            +
                  when :info
         | 
| 41 | 
            +
                    bg = :cyan
         | 
| 42 | 
            +
                  when :success
         | 
| 43 | 
            +
                    bg = :green
         | 
| 44 | 
            +
                  when :error
         | 
| 45 | 
            +
                    bg = :red
         | 
| 46 | 
            +
                  when :warning
         | 
| 47 | 
            +
                    bg = :yellow
         | 
| 48 | 
            +
                  end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                  label = "[#{label}]"
         | 
| 51 | 
            +
                  label = Rainbow(label)
         | 
| 52 | 
            +
                  label = label.bg(bg) if bg
         | 
| 53 | 
            +
                  label = label.fg(fg) if fg
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                  # Add a space after the label.
         | 
| 56 | 
            +
                  label += ' '
         | 
| 41 57 | 
             
                end
         | 
| 42 | 
            -
             | 
| 43 | 
            -
                 | 
| 44 | 
            -
                label = label.fg(fg) if fg
         | 
| 45 | 
            -
                puts label + ' ' + str
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                puts label + message
         | 
| 46 60 | 
             
              end
         | 
| 47 61 |  | 
| 48 | 
            -
               | 
| 62 | 
            +
              ##
         | 
| 63 | 
            +
              # Returns the "before" site's URL.
         | 
| 64 | 
            +
              #
         | 
| 65 | 
            +
              # TODO: Remove in favor of config.before_url.
         | 
| 49 66 | 
             
              def before
         | 
| 50 67 | 
             
                @config.before['url']
         | 
| 51 68 | 
             
              end
         | 
| 52 69 |  | 
| 70 | 
            +
              ##
         | 
| 71 | 
            +
              # Returns the "after" site's URL.
         | 
| 72 | 
            +
              #
         | 
| 73 | 
            +
              # TODO: Remove in favor of config.after_url.
         | 
| 53 74 | 
             
              def after
         | 
| 54 75 | 
             
                @config.after['url']
         | 
| 55 76 | 
             
              end
         | 
| 56 77 |  | 
| 57 | 
            -
               | 
| 78 | 
            +
              # Initialize SiteDiff.
         | 
| 79 | 
            +
              def initialize(config, cache, verbose: true, debug: false)
         | 
| 58 80 | 
             
                @cache = cache
         | 
| 59 81 | 
             
                @verbose = verbose
         | 
| 60 82 | 
             
                @debug = debug
         | 
| 61 | 
            -
             | 
| 83 | 
            +
             | 
| 62 84 | 
             
                # Check for single-site mode
         | 
| 63 85 | 
             
                validate_opts = {}
         | 
| 64 86 | 
             
                if !config.before['url'] && @cache.tag?(:before)
         | 
| @@ -69,37 +91,50 @@ class SiteDiff | |
| 69 91 | 
             
                  validate_opts[:need_before] = false
         | 
| 70 92 | 
             
                end
         | 
| 71 93 | 
             
                config.validate(validate_opts)
         | 
| 72 | 
            -
             | 
| 73 | 
            -
                 | 
| 94 | 
            +
                # Configure diff.
         | 
| 95 | 
            +
                Diff.diff_config(config)
         | 
| 74 96 | 
             
                @config = config
         | 
| 75 97 | 
             
              end
         | 
| 76 98 |  | 
| 77 | 
            -
              # Sanitize HTML
         | 
| 78 | 
            -
              def sanitize( | 
| 99 | 
            +
              # Sanitize HTML.
         | 
| 100 | 
            +
              def sanitize(path_passed, read_results)
         | 
| 79 101 | 
             
                %i[before after].map do |tag|
         | 
| 80 102 | 
             
                  html = read_results[tag].content
         | 
| 103 | 
            +
                  # TODO: See why encoding is empty while running tests.
         | 
| 104 | 
            +
                  #
         | 
| 105 | 
            +
                  # The presence of an "encoding" value used to be used to determine
         | 
| 106 | 
            +
                  # if the sanitizer would be called. However, encoding turns up blank
         | 
| 107 | 
            +
                  # during rspec tests for some reason.
         | 
| 81 108 | 
             
                  encoding = read_results[tag].encoding
         | 
| 82 | 
            -
                  if encoding
         | 
| 83 | 
            -
                     | 
| 84 | 
            -
                     | 
| 109 | 
            +
                  if encoding || html.length.positive?
         | 
| 110 | 
            +
                    section = @config.send(tag, apply_preset: true)
         | 
| 111 | 
            +
                    opts = { path: path_passed }
         | 
| 112 | 
            +
                    opts[:output] = @config.output if @config.output
         | 
| 113 | 
            +
                    Sanitizer.new(html, section, opts).sanitize
         | 
| 85 114 | 
             
                  else
         | 
| 86 115 | 
             
                    html
         | 
| 87 116 | 
             
                  end
         | 
| 88 117 | 
             
                end
         | 
| 89 118 | 
             
              end
         | 
| 90 119 |  | 
| 91 | 
            -
               | 
| 120 | 
            +
              ##
         | 
| 121 | 
            +
              # Process a set of read results.
         | 
| 122 | 
            +
              #
         | 
| 123 | 
            +
              # This is the callback that processes items fetched by the Fetcher.
         | 
| 92 124 | 
             
              def process_results(path, read_results)
         | 
| 93 | 
            -
                 | 
| 125 | 
            +
                error = (read_results[:before].error || read_results[:after].error)
         | 
| 126 | 
            +
                if error
         | 
| 94 127 | 
             
                  diff = Result.new(path, nil, nil, nil, nil, error)
         | 
| 95 128 | 
             
                else
         | 
| 96 129 | 
             
                  begin
         | 
| 97 | 
            -
                    diff = Result.new( | 
| 98 | 
            -
             | 
| 99 | 
            -
             | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 102 | 
            -
             | 
| 130 | 
            +
                    diff = Result.new(
         | 
| 131 | 
            +
                      path,
         | 
| 132 | 
            +
                      *sanitize(path, read_results),
         | 
| 133 | 
            +
                      read_results[:before].encoding,
         | 
| 134 | 
            +
                      read_results[:after].encoding,
         | 
| 135 | 
            +
                      nil
         | 
| 136 | 
            +
                    )
         | 
| 137 | 
            +
                  rescue StandardError => e
         | 
| 103 138 | 
             
                    raise if @debug
         | 
| 104 139 |  | 
| 105 140 | 
             
                    Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
         | 
| @@ -109,65 +144,77 @@ class SiteDiff | |
| 109 144 |  | 
| 110 145 | 
             
                # Print results in order!
         | 
| 111 146 | 
             
                while (next_diff = @results[@ordered.first])
         | 
| 112 | 
            -
                  next_diff.log(@verbose)
         | 
| 147 | 
            +
                  next_diff.log(verbose: @verbose)
         | 
| 113 148 | 
             
                  @ordered.shift
         | 
| 114 149 | 
             
                end
         | 
| 115 150 | 
             
              end
         | 
| 116 151 |  | 
| 117 | 
            -
               | 
| 118 | 
            -
              #  | 
| 119 | 
            -
               | 
| 152 | 
            +
              ##
         | 
| 153 | 
            +
              # Compute diff as per config.
         | 
| 154 | 
            +
              #
         | 
| 155 | 
            +
              # @return [Integer]
         | 
| 156 | 
            +
              #   Number of paths which have diffs.
         | 
| 157 | 
            +
              def run
         | 
| 120 158 | 
             
                # Map of path -> Result object, populated by process_results
         | 
| 121 159 | 
             
                @results = {}
         | 
| 122 160 | 
             
                @ordered = @config.paths.dup
         | 
| 123 161 |  | 
| 124 162 | 
             
                unless @cache.read_tags.empty?
         | 
| 125 | 
            -
                  SiteDiff.log( | 
| 126 | 
            -
                    @cache.read_tags.sort.join(', '))
         | 
| 163 | 
            +
                  SiteDiff.log("Using sites from cache: #{@cache.read_tags.sort.join(', ')}")
         | 
| 127 164 | 
             
                end
         | 
| 128 165 |  | 
| 129 166 | 
             
                # TODO: Fix this after config merge refactor!
         | 
| 130 167 | 
             
                # Not quite right. We are not passing @config.before or @config.after
         | 
| 131 168 | 
             
                # so passing this instead but @config.after['curl_opts'] is ignored.
         | 
| 169 | 
            +
                curl_opts = @config.setting :curl_opts
         | 
| 132 170 | 
             
                config_curl_opts = @config.before['curl_opts']
         | 
| 133 171 | 
             
                curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
         | 
| 134 | 
            -
                fetcher = Fetch.new( | 
| 135 | 
            -
             | 
| 172 | 
            +
                fetcher = Fetch.new(
         | 
| 173 | 
            +
                  @cache,
         | 
| 174 | 
            +
                  @config.paths,
         | 
| 175 | 
            +
                  @config.setting(:interval),
         | 
| 176 | 
            +
                  @config.setting(:concurrency),
         | 
| 177 | 
            +
                  curl_opts,
         | 
| 178 | 
            +
                  debug: @debug,
         | 
| 179 | 
            +
                  before: @config.before_url,
         | 
| 180 | 
            +
                  after: @config.after_url
         | 
| 181 | 
            +
                )
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                # Run the Fetcher with "process results" as a callback.
         | 
| 136 184 | 
             
                fetcher.run(&method(:process_results))
         | 
| 137 185 |  | 
| 138 186 | 
             
                # Order by original path order
         | 
| 139 | 
            -
                @results = @config.paths.map { | | 
| 187 | 
            +
                @results = @config.paths.map { |path| @results[path] }
         | 
| 140 188 | 
             
                results.map { |r| r unless r.success? }.compact.length
         | 
| 141 189 | 
             
              end
         | 
| 142 190 |  | 
| 143 | 
            -
               | 
| 144 | 
            -
               | 
| 145 | 
            -
             | 
| 146 | 
            -
                 | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
                # store diffs of each failing case, first wipe out existing diffs
         | 
| 151 | 
            -
                diff_dir = dir + DIFFS_DIR
         | 
| 152 | 
            -
                diff_dir.rmtree if diff_dir.exist?
         | 
| 153 | 
            -
                results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
         | 
| 154 | 
            -
                SiteDiff.log "All diff files were dumped inside #{dir.expand_path}"
         | 
| 155 | 
            -
             | 
| 156 | 
            -
                # store failing paths
         | 
| 157 | 
            -
                failures = dir + FAILURES_FILE
         | 
| 158 | 
            -
                SiteDiff.log "Writing failures to #{failures.expand_path}"
         | 
| 159 | 
            -
                failures.open('w') do |f|
         | 
| 160 | 
            -
                  results.each { |r| f.puts r.path unless r.success? }
         | 
| 191 | 
            +
              ##
         | 
| 192 | 
            +
              # Get a reporter object to help with report generation.
         | 
| 193 | 
            +
              def report
         | 
| 194 | 
            +
                if @results.nil?
         | 
| 195 | 
            +
                  raise SiteDiffException(
         | 
| 196 | 
            +
                    'No results detected. Run SiteDiff.run before SiteDiff.report.'
         | 
| 197 | 
            +
                  )
         | 
| 161 198 | 
             
                end
         | 
| 162 199 |  | 
| 163 | 
            -
                 | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 200 | 
            +
                Report.new(@config, @cache, @results)
         | 
| 201 | 
            +
              end
         | 
| 202 | 
            +
             | 
| 203 | 
            +
              ##
         | 
| 204 | 
            +
              # Get SiteDiff gemspec.
         | 
| 205 | 
            +
              def self.gemspec
         | 
| 206 | 
            +
                file = "#{ROOT_DIR}/sitediff.gemspec"
         | 
| 207 | 
            +
                Gem::Specification.load(file)
         | 
| 208 | 
            +
              end
         | 
| 167 209 |  | 
| 168 | 
            -
             | 
| 169 | 
            -
             | 
| 170 | 
            -
             | 
| 171 | 
            -
             | 
| 210 | 
            +
              ##
         | 
| 211 | 
            +
              # Ensures that a directory exists and returns a Pathname for it.
         | 
| 212 | 
            +
              #
         | 
| 213 | 
            +
              # @param [String] dir
         | 
| 214 | 
            +
              #   path/to/directory
         | 
| 215 | 
            +
              def self.ensure_dir(dir)
         | 
| 216 | 
            +
                dir = Pathname.new(dir) unless dir.is_a? Pathname
         | 
| 217 | 
            +
                dir.mkpath unless dir.directory?
         | 
| 218 | 
            +
                dir
         | 
| 172 219 | 
             
              end
         | 
| 173 220 | 
             
            end
         | 
| Binary file | 
| Binary file |