sitediff 0.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/sitediff +9 -3
- data/lib/sitediff.rb +153 -79
- data/lib/sitediff/api.rb +265 -0
- data/lib/sitediff/cache.rb +110 -47
- data/lib/sitediff/cli.rb +219 -165
- data/lib/sitediff/config.rb +439 -58
- data/lib/sitediff/config/creator.rb +93 -99
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/crawler.rb +108 -72
- data/lib/sitediff/diff.rb +60 -12
- data/lib/sitediff/exception.rb +3 -1
- data/lib/sitediff/fetch.rb +62 -41
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +171 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +303 -30
- data/lib/sitediff/files/sitediff.js +367 -0
- data/lib/sitediff/report.rb +254 -0
- data/lib/sitediff/result.rb +59 -23
- data/lib/sitediff/sanitize.rb +222 -150
- data/lib/sitediff/sanitize/dom_transform.rb +111 -73
- data/lib/sitediff/sanitize/regexp.rb +69 -43
- data/lib/sitediff/uriwrapper.rb +104 -34
- data/lib/sitediff/webserver.rb +89 -77
- data/lib/sitediff/webserver/resultserver.rb +113 -77
- metadata +92 -76
- data/lib/sitediff/files/html_report.html.erb +0 -63
- data/lib/sitediff/files/rules/drupal.yaml +0 -33
- data/lib/sitediff/rules.rb +0 -65
| @@ -1,63 +0,0 @@ | |
| 1 | 
            -
            <!DOCTYPE html>
         | 
| 2 | 
            -
            <html>
         | 
| 3 | 
            -
              <head>
         | 
| 4 | 
            -
                <!-- important: otherwise chrome will choke on non-ascii characters -->
         | 
| 5 | 
            -
                <meta charset="utf-8" />
         | 
| 6 | 
            -
                <style>
         | 
| 7 | 
            -
                  <%= SiteDiff::Diff.css %>
         | 
| 8 | 
            -
                </style>
         | 
| 9 | 
            -
                <title> SiteDiff Report </title>
         | 
| 10 | 
            -
              </head>
         | 
| 11 | 
            -
              <body>
         | 
| 12 | 
            -
                <div class="sitediff">
         | 
| 13 | 
            -
                  <div class="legend">
         | 
| 14 | 
            -
                    <%
         | 
| 15 | 
            -
                       tags = %w[before after]
         | 
| 16 | 
            -
                       tags.each do |tag| %>
         | 
| 17 | 
            -
                         <% if tags.first != tag %> | <% end %>
         | 
| 18 | 
            -
                         <% notes = ['base url']
         | 
| 19 | 
            -
                            notes << 'cached' if cache.read_tags.include?(tag.to_sym) %>
         | 
| 20 | 
            -
                         <strong><%= tag %></strong> (<%= notes.join(', ') %>):
         | 
| 21 | 
            -
                           <a href="<%= eval(tag) %>"><%= eval(tag) %></a>
         | 
| 22 | 
            -
                    <% end %>
         | 
| 23 | 
            -
                  </div>
         | 
| 24 | 
            -
                  <table class="results">
         | 
| 25 | 
            -
             | 
| 26 | 
            -
                    <colgroup>
         | 
| 27 | 
            -
                      <col class="before-col">
         | 
| 28 | 
            -
                      <col class="after-col">
         | 
| 29 | 
            -
                      <col class="both-col">
         | 
| 30 | 
            -
                      <col class="path-col">
         | 
| 31 | 
            -
                      <col class="diff-stat-col">
         | 
| 32 | 
            -
                    </colgroup>
         | 
| 33 | 
            -
             | 
| 34 | 
            -
                    <thead>
         | 
| 35 | 
            -
                      <tr>
         | 
| 36 | 
            -
                        <th> Before </th>
         | 
| 37 | 
            -
                        <th> After </th>
         | 
| 38 | 
            -
                        <th> Both </th>
         | 
| 39 | 
            -
                        <th> Path </th>
         | 
| 40 | 
            -
                        <th> Status </th>
         | 
| 41 | 
            -
                      </tr>
         | 
| 42 | 
            -
                    </thead>
         | 
| 43 | 
            -
             | 
| 44 | 
            -
                    <% results.each do |result| %>
         | 
| 45 | 
            -
                    <tr class="<%= result.status_text %>">
         | 
| 46 | 
            -
                      <td class="before">
         | 
| 47 | 
            -
                        <a href="<%= result.url(:before, before, cache) %>">[before]</a>
         | 
| 48 | 
            -
                      </td>
         | 
| 49 | 
            -
                      <td class="after">
         | 
| 50 | 
            -
                        <a href="<%= result.url(:after, after, cache) %>">[after]</a>
         | 
| 51 | 
            -
                      </td>
         | 
| 52 | 
            -
                      <td class="both">
         | 
| 53 | 
            -
                        <a href="/sidebyside<%= result.path %>">[both]</a>
         | 
| 54 | 
            -
                      </td>
         | 
| 55 | 
            -
                      <td class="path"><%= result.path %></td>
         | 
| 56 | 
            -
                      <td class="status"><%= result.link %></td>
         | 
| 57 | 
            -
                    </tr>
         | 
| 58 | 
            -
                    <% end %>
         | 
| 59 | 
            -
             | 
| 60 | 
            -
                  </table>
         | 
| 61 | 
            -
                </div>
         | 
| 62 | 
            -
              </body>
         | 
| 63 | 
            -
            </html>
         | 
| @@ -1,33 +0,0 @@ | |
| 1 | 
            -
            sanitization:
         | 
| 2 | 
            -
            - title: Strip Drupal.settings
         | 
| 3 | 
            -
              selector: script
         | 
| 4 | 
            -
              pattern: '^(<script>)?jQuery.extend\(Drupal.settings.*$'
         | 
| 5 | 
            -
            - title: Strip form build ID
         | 
| 6 | 
            -
              selector: input
         | 
| 7 | 
            -
              pattern: 'name="form_build_id" value="form-[-\w]{43}"'
         | 
| 8 | 
            -
              substitution: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
         | 
| 9 | 
            -
            - title: Strip view DOM ID
         | 
| 10 | 
            -
              pattern: '(class="view .*) view-dom-id-[a-f0-9]{32}"'
         | 
| 11 | 
            -
              substitution: '\1 view-dom-id-DRUPAL_VIEW_DOM_ID"'
         | 
| 12 | 
            -
            - title: Strip CSS aggregation filenames
         | 
| 13 | 
            -
              selector: link[rel=stylesheet]
         | 
| 14 | 
            -
              pattern: '(href="[^"]*/files/css/css_)[-\w]{43}\.css"'
         | 
| 15 | 
            -
              substitution: '\1DRUPAL_AGGREGATED_CSS.css"'
         | 
| 16 | 
            -
            - title: Strip JS aggregation filenames
         | 
| 17 | 
            -
              selector: script
         | 
| 18 | 
            -
              pattern: '(src="[^"]*/files/js/js_)[-\w]{43}\.js"'
         | 
| 19 | 
            -
              substitution: '\1DRUPAL_AGGREGATED_JS.js"'
         | 
| 20 | 
            -
            - title: Strip CSS/JS cache IDs
         | 
| 21 | 
            -
              selector: style, script
         | 
| 22 | 
            -
              pattern: '("[^"]*\.(js|css))\?[a-z0-9]{6}"'
         | 
| 23 | 
            -
              substitution: '\1'
         | 
| 24 | 
            -
            - title: Strip IE CSS/JS cache IDs
         | 
| 25 | 
            -
              pattern: '("[^"]*ie\d?\.(js|css))\?[a-z0-9]{6}"'
         | 
| 26 | 
            -
              substitution: '\1'
         | 
| 27 | 
            -
            - title: Strip Drupal JS version tags
         | 
| 28 | 
            -
              selector: script
         | 
| 29 | 
            -
              pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
         | 
| 30 | 
            -
              substitution: '\1'
         | 
| 31 | 
            -
            - title: Strip domain names from absolute URLs
         | 
| 32 | 
            -
              pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
         | 
| 33 | 
            -
              substitute: '__domain__'
         | 
    
        data/lib/sitediff/rules.rb
    DELETED
    
    | @@ -1,65 +0,0 @@ | |
| 1 | 
            -
            require 'sitediff/sanitize/regexp'
         | 
| 2 | 
            -
            require 'pathname'
         | 
| 3 | 
            -
            require 'set'
         | 
| 4 | 
            -
             | 
| 5 | 
            -
            class SiteDiff
         | 
| 6 | 
            -
            # Find appropriate rules for a given site
         | 
| 7 | 
            -
            class Rules
         | 
| 8 | 
            -
              def initialize(config, disabled = false)
         | 
| 9 | 
            -
                @disabled = disabled
         | 
| 10 | 
            -
                @config = config
         | 
| 11 | 
            -
                find_sanitization_candidates
         | 
| 12 | 
            -
                @rules = Hash.new { |h, k| h[k] = Set.new }
         | 
| 13 | 
            -
              end
         | 
| 14 | 
            -
             | 
| 15 | 
            -
              def find_sanitization_candidates
         | 
| 16 | 
            -
                @candidates = Set.new
         | 
| 17 | 
            -
             | 
| 18 | 
            -
                rules_dir = Pathname.new(__FILE__).dirname + 'files' + 'rules'
         | 
| 19 | 
            -
                rules_dir.children.each do |f|
         | 
| 20 | 
            -
                  next unless f.file? && f.extname == '.yaml'
         | 
| 21 | 
            -
                  conf = YAML.load_file(f)
         | 
| 22 | 
            -
                  @candidates.merge(conf['sanitization'])
         | 
| 23 | 
            -
                end
         | 
| 24 | 
            -
              end
         | 
| 25 | 
            -
             | 
| 26 | 
            -
              def handle_page(tag, html, doc)
         | 
| 27 | 
            -
                found = find_rules(html, doc)
         | 
| 28 | 
            -
                @rules[tag].merge(found)
         | 
| 29 | 
            -
              end
         | 
| 30 | 
            -
             | 
| 31 | 
            -
              # Yield a set of rules that seem reasonable for this HTML
         | 
| 32 | 
            -
              # assumption: the YAML file is a list of regexp rules only
         | 
| 33 | 
            -
              def find_rules(html, doc)
         | 
| 34 | 
            -
                rules = []
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                return @candidates.select do |rule|
         | 
| 37 | 
            -
                  re = SiteDiff::Sanitizer::Regexp.create(rule)
         | 
| 38 | 
            -
                  re.applies?(html, doc)
         | 
| 39 | 
            -
                end
         | 
| 40 | 
            -
              end
         | 
| 41 | 
            -
             | 
| 42 | 
            -
              # Find all rules from all rulesets that apply for all pages
         | 
| 43 | 
            -
              def add_config
         | 
| 44 | 
            -
                have_both = @rules.include?(:before)
         | 
| 45 | 
            -
             | 
| 46 | 
            -
                r1, r2 = *@rules.values_at(:before, :after)
         | 
| 47 | 
            -
                if have_both
         | 
| 48 | 
            -
                  add_section('before', r1 - r2)
         | 
| 49 | 
            -
                  add_section('after', r2 - r1)
         | 
| 50 | 
            -
                  add_section(nil, r1 & r2)
         | 
| 51 | 
            -
                else
         | 
| 52 | 
            -
                  add_section(nil, r2)
         | 
| 53 | 
            -
                end
         | 
| 54 | 
            -
              end
         | 
| 55 | 
            -
             | 
| 56 | 
            -
              def add_section(name, rules)
         | 
| 57 | 
            -
                return if rules.empty?
         | 
| 58 | 
            -
                conf = name ? @config[name] : @config
         | 
| 59 | 
            -
                if @disabled
         | 
| 60 | 
            -
                  rules.each { |r| r['disabled'] = true }
         | 
| 61 | 
            -
                end
         | 
| 62 | 
            -
                conf['sanitization'] = rules.to_a.sort_by { |r| r['title'] }
         | 
| 63 | 
            -
              end
         | 
| 64 | 
            -
            end
         | 
| 65 | 
            -
            end
         |