RubyGems - sitediff - Versions diffs - 0.0.6 → 1.0.0 - Mend

sitediff 0.0.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +5 -5
data/bin/sitediff +9 -2
data/lib/sitediff.rb +126 -81
data/lib/sitediff/cache.rb +35 -6
data/lib/sitediff/cli.rb +254 -119
data/lib/sitediff/config.rb +362 -29
data/lib/sitediff/config/creator.rb +53 -71
data/lib/sitediff/config/preset.rb +75 -0
data/lib/sitediff/crawler.rb +11 -15
data/lib/sitediff/diff.rb +28 -9
data/lib/sitediff/fetch.rb +9 -2
data/lib/sitediff/files/diff.html.erb +20 -2
data/lib/sitediff/files/jquery.min.js +2 -0
data/lib/sitediff/files/normalize.css +349 -0
data/lib/sitediff/files/report.html.erb +144 -0
data/lib/sitediff/files/sidebyside.html.erb +5 -2
data/lib/sitediff/files/sitediff.css +226 -30
data/lib/sitediff/files/sitediff.js +176 -0
data/lib/sitediff/report.rb +238 -0
data/lib/sitediff/result.rb +47 -19
data/lib/sitediff/sanitize.rb +29 -8
data/lib/sitediff/sanitize/dom_transform.rb +45 -6
data/lib/sitediff/sanitize/regexp.rb +23 -2
data/lib/sitediff/uriwrapper.rb +56 -15
data/lib/sitediff/webserver.rb +12 -3
data/lib/sitediff/webserver/resultserver.rb +28 -33
metadata +33 -16
data/lib/sitediff/files/html_report.html.erb +0 -66
data/lib/sitediff/files/rules/drupal.yaml +0 -63
data/lib/sitediff/rules.rb +0 -65

data/lib/sitediff/files/html_report.html.erb DELETED

@@ -1,66 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <!-- important: otherwise chrome will choke on non-ascii characters -->
-    <meta charset="utf-8" />
-    <style>
-      <%= SiteDiff::Diff.css %>
-    </style>
-    <title> SiteDiff Report </title>
-  </head>
-  <body>
-    <div class="sitediff">
-      <div class="legend">
-        <%
-           tags = %w[before after]
-           tags.each do |tag| %>
-             <% if tags.first != tag %> | <% end %>
-             <% notes = ['base url']
-                notes << 'cached' if cache.read_tags.include?(tag.to_sym) %>
-             <strong><%= tag %></strong> (<%= notes.join(', ') %>):
-               <a href="<%= eval(tag) %>"><%= eval(tag) %></a>
-        <% end %>
-      </div>
-      <div class="run">
-        <a href="../run/diff">Rerun diff</a>
-      </div>
-      <table class="results">
-        <colgroup>
-          <col class="before-col">
-          <col class="after-col">
-          <col class="both-col">
-          <col class="path-col">
-          <col class="diff-stat-col">
-        </colgroup>
-        <thead>
-          <tr>
-            <th> Before </th>
-            <th> After </th>
-            <th> Both </th>
-            <th> Path </th>
-            <th> Status </th>
-          </tr>
-        </thead>
-        <% results.each do |result| %>
-        <tr class="<%= result.status_text %>">
-          <td class="before">
-            <a href="<%= result.url(:before, before, cache) %>">[before]</a>
-          </td>
-          <td class="after">
-            <a href="<%= result.url(:after, after, cache) %>">[after]</a>
-          </td>
-          <td class="both">
-            <a href="/sidebyside<%= result.path %>">[both]</a>
-          </td>
-          <td class="path"><%= result.path %></td>
-          <td class="status"><%= result.link %></td>
-        </tr>
-        <% end %>
-      </table>
-    </div>
-  </body>
-</html>

data/lib/sitediff/files/rules/drupal.yaml DELETED

@@ -1,63 +0,0 @@
-sanitization:
-- title: Strip Drupal.settings
-  selector: script
-  pattern: '^(<script>)?jQuery.extend\(Drupal.settings.*$'
-- title: Strip IE CSS/JS cache IDs
-  pattern: '("[^"]*ie\d?\.(js|css))\?[a-z0-9]{6}"'
-  substitute: '\1'
-- title: Strip form build ID
-  selector: input
-  pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
-  substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
-- title: Strip view DOM ID
-  pattern: '(class="view .*) view-dom-id-[a-f0-9]{32}"'
-  substitute: '\1 view-dom-id-DRUPAL_VIEW_DOM_ID"'
-- title: Strip CSS aggregation filenames
-  selector: link[rel=stylesheet]
-  pattern: '(href="[^"]*/files/css/css_)[-\w]{40,43}\.css"'
-  substitute: '\1DRUPAL_AGGREGATED_CSS.css"'
-- title: Strip JS aggregation filenames
-  selector: script
-  pattern: '(src="[^"]*/files/js/js_)[-\w]{40,43}\.js"'
-  substitute: '\1DRUPAL_AGGREGATED_JS.js"'
-- title: Strip CSS/JS cache IDs
-  selector: style, script
-  pattern: '("[^"]*\.(js|css))\?[a-z0-9]{6}"'
-  substitute: '\1'
-- title: Strip Drupal JS version tags
-  selector: script
-  pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
-  substitute: '\1'
-- title: Strip domain names from absolute URLs
-  pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
-  substitute: '__domain__'
-- title: Strip form build ID
-  selector: input
-  pattern: 'autocomplete="off" data-drupal-selector="form-[-\w]{40,43}"'
-  substitute: 'autocomplete="off" data-drupal-selector="form-DRUPAL_FORM_BUILD_ID"'
-- title: Strip form build ID 2
-  selector: input
-  pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
-  substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
-- title: Strip Drupal CSS link queries
-  selector: link
-  pattern: '\.css\?(\w*)'
-  substitute: '\.css'
-- title: Strip Drupal JS link queries
-  selector: script
-  pattern: '\.js\?(\w*)'
-  substitute: '\.js'
-- title: Strip Drupal View-DOM ID
-  pattern: 'view-dom-id-\w*'
-  substitute: 'view-dom-id-_ID_'
-- title: Strip Drupal View-DOM ID 2
-  pattern: '(views?_dom_id"?:"?)\w*'
-  substitute: '\1_ID_'
-- title: Ignore Drupal CSS file names
-  selector: link
-  pattern: 'css_[-\w]{40,43}(\\|%5C)?\.css'
-  substitute: 'css__ID__.css'
-- title: Ignore Drupal JS file names
-  selector: script
-  pattern: 'js_[-\w]{40,43}\\?\.js'
-  substitute: 'js__ID__.js'

data/lib/sitediff/rules.rb DELETED

@@ -1,65 +0,0 @@
-# frozen_string_literal: true
-require 'sitediff/sanitize/regexp'
-require 'pathname'
-require 'set'
-class SiteDiff
-  # Find appropriate rules for a given site
-  class Rules
-    def initialize(config, disabled = false)
-      @disabled = disabled
-      @config = config
-      find_sanitization_candidates
-      @rules = Hash.new { |h, k| h[k] = Set.new }
-    end
-    def find_sanitization_candidates
-      @candidates = Set.new
-      rules_dir = Pathname.new(__FILE__).dirname + 'files' + 'rules'
-      rules_dir.children.each do |f|
-        next unless f.file? && f.extname == '.yaml'
-        conf = YAML.load_file(f)
-        @candidates.merge(conf['sanitization'])
-      end
-    end
-    def handle_page(tag, html, doc)
-      found = find_rules(html, doc)
-      @rules[tag].merge(found)
-    end
-    # Yield a set of rules that seem reasonable for this HTML
-    # assumption: the YAML file is a list of regexp rules only
-    def find_rules(html, doc)
-      @candidates.select do |rule|
-        re = SiteDiff::Sanitizer::Regexp.create(rule)
-        re.applies?(html, doc)
-      end
-    end
-    # Find all rules from all rulesets that apply for all pages
-    def add_config
-      have_both = @rules.include?(:before)
-      r1, r2 = *@rules.values_at(:before, :after)
-      if have_both
-        add_section('before', r1 - r2)
-        add_section('after', r2 - r1)
-        add_section(nil, r1 & r2)
-      else
-        add_section(nil, r2)
-      end
-    end
-    def add_section(name, rules)
-      return if rules.empty?
-      conf = name ? @config[name] : @config
-      rules.each { |r| r['disabled'] = true } if @disabled
-      conf['sanitization'] = rules.to_a.sort_by { |r| r['title'] }
-    end
-  end
-end