sitediff 0.0.6 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/bin/sitediff +9 -2
- data/lib/sitediff.rb +126 -81
- data/lib/sitediff/cache.rb +35 -6
- data/lib/sitediff/cli.rb +254 -119
- data/lib/sitediff/config.rb +362 -29
- data/lib/sitediff/config/creator.rb +53 -71
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/crawler.rb +11 -15
- data/lib/sitediff/diff.rb +28 -9
- data/lib/sitediff/fetch.rb +9 -2
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +144 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +226 -30
- data/lib/sitediff/files/sitediff.js +176 -0
- data/lib/sitediff/report.rb +238 -0
- data/lib/sitediff/result.rb +47 -19
- data/lib/sitediff/sanitize.rb +29 -8
- data/lib/sitediff/sanitize/dom_transform.rb +45 -6
- data/lib/sitediff/sanitize/regexp.rb +23 -2
- data/lib/sitediff/uriwrapper.rb +56 -15
- data/lib/sitediff/webserver.rb +12 -3
- data/lib/sitediff/webserver/resultserver.rb +28 -33
- metadata +33 -16
- data/lib/sitediff/files/html_report.html.erb +0 -66
- data/lib/sitediff/files/rules/drupal.yaml +0 -63
- data/lib/sitediff/rules.rb +0 -65
@@ -1,66 +0,0 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<!-- important: otherwise chrome will choke on non-ascii characters -->
|
5
|
-
<meta charset="utf-8" />
|
6
|
-
<style>
|
7
|
-
<%= SiteDiff::Diff.css %>
|
8
|
-
</style>
|
9
|
-
<title> SiteDiff Report </title>
|
10
|
-
</head>
|
11
|
-
<body>
|
12
|
-
<div class="sitediff">
|
13
|
-
<div class="legend">
|
14
|
-
<%
|
15
|
-
tags = %w[before after]
|
16
|
-
tags.each do |tag| %>
|
17
|
-
<% if tags.first != tag %> | <% end %>
|
18
|
-
<% notes = ['base url']
|
19
|
-
notes << 'cached' if cache.read_tags.include?(tag.to_sym) %>
|
20
|
-
<strong><%= tag %></strong> (<%= notes.join(', ') %>):
|
21
|
-
<a href="<%= eval(tag) %>"><%= eval(tag) %></a>
|
22
|
-
<% end %>
|
23
|
-
</div>
|
24
|
-
<div class="run">
|
25
|
-
<a href="../run/diff">Rerun diff</a>
|
26
|
-
</div>
|
27
|
-
<table class="results">
|
28
|
-
|
29
|
-
<colgroup>
|
30
|
-
<col class="before-col">
|
31
|
-
<col class="after-col">
|
32
|
-
<col class="both-col">
|
33
|
-
<col class="path-col">
|
34
|
-
<col class="diff-stat-col">
|
35
|
-
</colgroup>
|
36
|
-
|
37
|
-
<thead>
|
38
|
-
<tr>
|
39
|
-
<th> Before </th>
|
40
|
-
<th> After </th>
|
41
|
-
<th> Both </th>
|
42
|
-
<th> Path </th>
|
43
|
-
<th> Status </th>
|
44
|
-
</tr>
|
45
|
-
</thead>
|
46
|
-
|
47
|
-
<% results.each do |result| %>
|
48
|
-
<tr class="<%= result.status_text %>">
|
49
|
-
<td class="before">
|
50
|
-
<a href="<%= result.url(:before, before, cache) %>">[before]</a>
|
51
|
-
</td>
|
52
|
-
<td class="after">
|
53
|
-
<a href="<%= result.url(:after, after, cache) %>">[after]</a>
|
54
|
-
</td>
|
55
|
-
<td class="both">
|
56
|
-
<a href="/sidebyside<%= result.path %>">[both]</a>
|
57
|
-
</td>
|
58
|
-
<td class="path"><%= result.path %></td>
|
59
|
-
<td class="status"><%= result.link %></td>
|
60
|
-
</tr>
|
61
|
-
<% end %>
|
62
|
-
|
63
|
-
</table>
|
64
|
-
</div>
|
65
|
-
</body>
|
66
|
-
</html>
|
@@ -1,63 +0,0 @@
|
|
1
|
-
sanitization:
|
2
|
-
- title: Strip Drupal.settings
|
3
|
-
selector: script
|
4
|
-
pattern: '^(<script>)?jQuery.extend\(Drupal.settings.*$'
|
5
|
-
- title: Strip IE CSS/JS cache IDs
|
6
|
-
pattern: '("[^"]*ie\d?\.(js|css))\?[a-z0-9]{6}"'
|
7
|
-
substitute: '\1'
|
8
|
-
- title: Strip form build ID
|
9
|
-
selector: input
|
10
|
-
pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
|
11
|
-
substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
|
12
|
-
- title: Strip view DOM ID
|
13
|
-
pattern: '(class="view .*) view-dom-id-[a-f0-9]{32}"'
|
14
|
-
substitute: '\1 view-dom-id-DRUPAL_VIEW_DOM_ID"'
|
15
|
-
- title: Strip CSS aggregation filenames
|
16
|
-
selector: link[rel=stylesheet]
|
17
|
-
pattern: '(href="[^"]*/files/css/css_)[-\w]{40,43}\.css"'
|
18
|
-
substitute: '\1DRUPAL_AGGREGATED_CSS.css"'
|
19
|
-
- title: Strip JS aggregation filenames
|
20
|
-
selector: script
|
21
|
-
pattern: '(src="[^"]*/files/js/js_)[-\w]{40,43}\.js"'
|
22
|
-
substitute: '\1DRUPAL_AGGREGATED_JS.js"'
|
23
|
-
- title: Strip CSS/JS cache IDs
|
24
|
-
selector: style, script
|
25
|
-
pattern: '("[^"]*\.(js|css))\?[a-z0-9]{6}"'
|
26
|
-
substitute: '\1'
|
27
|
-
- title: Strip Drupal JS version tags
|
28
|
-
selector: script
|
29
|
-
pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
|
30
|
-
substitute: '\1'
|
31
|
-
- title: Strip domain names from absolute URLs
|
32
|
-
pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
|
33
|
-
substitute: '__domain__'
|
34
|
-
- title: Strip form build ID
|
35
|
-
selector: input
|
36
|
-
pattern: 'autocomplete="off" data-drupal-selector="form-[-\w]{40,43}"'
|
37
|
-
substitute: 'autocomplete="off" data-drupal-selector="form-DRUPAL_FORM_BUILD_ID"'
|
38
|
-
- title: Strip form build ID 2
|
39
|
-
selector: input
|
40
|
-
pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
|
41
|
-
substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
|
42
|
-
- title: Strip Drupal CSS link queries
|
43
|
-
selector: link
|
44
|
-
pattern: '\.css\?(\w*)'
|
45
|
-
substitute: '\.css'
|
46
|
-
- title: Strip Drupal JS link queries
|
47
|
-
selector: script
|
48
|
-
pattern: '\.js\?(\w*)'
|
49
|
-
substitute: '\.js'
|
50
|
-
- title: Strip Drupal View-DOM ID
|
51
|
-
pattern: 'view-dom-id-\w*'
|
52
|
-
substitute: 'view-dom-id-_ID_'
|
53
|
-
- title: Strip Drupal View-DOM ID 2
|
54
|
-
pattern: '(views?_dom_id"?:"?)\w*'
|
55
|
-
substitute: '\1_ID_'
|
56
|
-
- title: Ignore Drupal CSS file names
|
57
|
-
selector: link
|
58
|
-
pattern: 'css_[-\w]{40,43}(\\|%5C)?\.css'
|
59
|
-
substitute: 'css__ID__.css'
|
60
|
-
- title: Ignore Drupal JS file names
|
61
|
-
selector: script
|
62
|
-
pattern: 'js_[-\w]{40,43}\\?\.js'
|
63
|
-
substitute: 'js__ID__.js'
|
data/lib/sitediff/rules.rb
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'sitediff/sanitize/regexp'
|
4
|
-
require 'pathname'
|
5
|
-
require 'set'
|
6
|
-
|
7
|
-
class SiteDiff
|
8
|
-
# Find appropriate rules for a given site
|
9
|
-
class Rules
|
10
|
-
def initialize(config, disabled = false)
|
11
|
-
@disabled = disabled
|
12
|
-
@config = config
|
13
|
-
find_sanitization_candidates
|
14
|
-
@rules = Hash.new { |h, k| h[k] = Set.new }
|
15
|
-
end
|
16
|
-
|
17
|
-
def find_sanitization_candidates
|
18
|
-
@candidates = Set.new
|
19
|
-
|
20
|
-
rules_dir = Pathname.new(__FILE__).dirname + 'files' + 'rules'
|
21
|
-
rules_dir.children.each do |f|
|
22
|
-
next unless f.file? && f.extname == '.yaml'
|
23
|
-
|
24
|
-
conf = YAML.load_file(f)
|
25
|
-
@candidates.merge(conf['sanitization'])
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def handle_page(tag, html, doc)
|
30
|
-
found = find_rules(html, doc)
|
31
|
-
@rules[tag].merge(found)
|
32
|
-
end
|
33
|
-
|
34
|
-
# Yield a set of rules that seem reasonable for this HTML
|
35
|
-
# assumption: the YAML file is a list of regexp rules only
|
36
|
-
def find_rules(html, doc)
|
37
|
-
@candidates.select do |rule|
|
38
|
-
re = SiteDiff::Sanitizer::Regexp.create(rule)
|
39
|
-
re.applies?(html, doc)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
# Find all rules from all rulesets that apply for all pages
|
44
|
-
def add_config
|
45
|
-
have_both = @rules.include?(:before)
|
46
|
-
|
47
|
-
r1, r2 = *@rules.values_at(:before, :after)
|
48
|
-
if have_both
|
49
|
-
add_section('before', r1 - r2)
|
50
|
-
add_section('after', r2 - r1)
|
51
|
-
add_section(nil, r1 & r2)
|
52
|
-
else
|
53
|
-
add_section(nil, r2)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def add_section(name, rules)
|
58
|
-
return if rules.empty?
|
59
|
-
|
60
|
-
conf = name ? @config[name] : @config
|
61
|
-
rules.each { |r| r['disabled'] = true } if @disabled
|
62
|
-
conf['sanitization'] = rules.to_a.sort_by { |r| r['title'] }
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|