sitediff 0.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/sitediff +9 -3
- data/lib/sitediff.rb +153 -79
- data/lib/sitediff/api.rb +265 -0
- data/lib/sitediff/cache.rb +110 -47
- data/lib/sitediff/cli.rb +219 -165
- data/lib/sitediff/config.rb +439 -58
- data/lib/sitediff/config/creator.rb +93 -99
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/crawler.rb +108 -72
- data/lib/sitediff/diff.rb +60 -12
- data/lib/sitediff/exception.rb +3 -1
- data/lib/sitediff/fetch.rb +62 -41
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +171 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +303 -30
- data/lib/sitediff/files/sitediff.js +367 -0
- data/lib/sitediff/report.rb +254 -0
- data/lib/sitediff/result.rb +59 -23
- data/lib/sitediff/sanitize.rb +222 -150
- data/lib/sitediff/sanitize/dom_transform.rb +111 -73
- data/lib/sitediff/sanitize/regexp.rb +69 -43
- data/lib/sitediff/uriwrapper.rb +104 -34
- data/lib/sitediff/webserver.rb +89 -77
- data/lib/sitediff/webserver/resultserver.rb +113 -77
- metadata +92 -76
- data/lib/sitediff/files/html_report.html.erb +0 -63
- data/lib/sitediff/files/rules/drupal.yaml +0 -33
- data/lib/sitediff/rules.rb +0 -65
@@ -1,63 +0,0 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<!-- important: otherwise chrome will choke on non-ascii characters -->
|
5
|
-
<meta charset="utf-8" />
|
6
|
-
<style>
|
7
|
-
<%= SiteDiff::Diff.css %>
|
8
|
-
</style>
|
9
|
-
<title> SiteDiff Report </title>
|
10
|
-
</head>
|
11
|
-
<body>
|
12
|
-
<div class="sitediff">
|
13
|
-
<div class="legend">
|
14
|
-
<%
|
15
|
-
tags = %w[before after]
|
16
|
-
tags.each do |tag| %>
|
17
|
-
<% if tags.first != tag %> | <% end %>
|
18
|
-
<% notes = ['base url']
|
19
|
-
notes << 'cached' if cache.read_tags.include?(tag.to_sym) %>
|
20
|
-
<strong><%= tag %></strong> (<%= notes.join(', ') %>):
|
21
|
-
<a href="<%= eval(tag) %>"><%= eval(tag) %></a>
|
22
|
-
<% end %>
|
23
|
-
</div>
|
24
|
-
<table class="results">
|
25
|
-
|
26
|
-
<colgroup>
|
27
|
-
<col class="before-col">
|
28
|
-
<col class="after-col">
|
29
|
-
<col class="both-col">
|
30
|
-
<col class="path-col">
|
31
|
-
<col class="diff-stat-col">
|
32
|
-
</colgroup>
|
33
|
-
|
34
|
-
<thead>
|
35
|
-
<tr>
|
36
|
-
<th> Before </th>
|
37
|
-
<th> After </th>
|
38
|
-
<th> Both </th>
|
39
|
-
<th> Path </th>
|
40
|
-
<th> Status </th>
|
41
|
-
</tr>
|
42
|
-
</thead>
|
43
|
-
|
44
|
-
<% results.each do |result| %>
|
45
|
-
<tr class="<%= result.status_text %>">
|
46
|
-
<td class="before">
|
47
|
-
<a href="<%= result.url(:before, before, cache) %>">[before]</a>
|
48
|
-
</td>
|
49
|
-
<td class="after">
|
50
|
-
<a href="<%= result.url(:after, after, cache) %>">[after]</a>
|
51
|
-
</td>
|
52
|
-
<td class="both">
|
53
|
-
<a href="/sidebyside<%= result.path %>">[both]</a>
|
54
|
-
</td>
|
55
|
-
<td class="path"><%= result.path %></td>
|
56
|
-
<td class="status"><%= result.link %></td>
|
57
|
-
</tr>
|
58
|
-
<% end %>
|
59
|
-
|
60
|
-
</table>
|
61
|
-
</div>
|
62
|
-
</body>
|
63
|
-
</html>
|
@@ -1,33 +0,0 @@
|
|
1
|
-
sanitization:
|
2
|
-
- title: Strip Drupal.settings
|
3
|
-
selector: script
|
4
|
-
pattern: '^(<script>)?jQuery.extend\(Drupal.settings.*$'
|
5
|
-
- title: Strip form build ID
|
6
|
-
selector: input
|
7
|
-
pattern: 'name="form_build_id" value="form-[-\w]{43}"'
|
8
|
-
substitution: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
|
9
|
-
- title: Strip view DOM ID
|
10
|
-
pattern: '(class="view .*) view-dom-id-[a-f0-9]{32}"'
|
11
|
-
substitution: '\1 view-dom-id-DRUPAL_VIEW_DOM_ID"'
|
12
|
-
- title: Strip CSS aggregation filenames
|
13
|
-
selector: link[rel=stylesheet]
|
14
|
-
pattern: '(href="[^"]*/files/css/css_)[-\w]{43}\.css"'
|
15
|
-
substitution: '\1DRUPAL_AGGREGATED_CSS.css"'
|
16
|
-
- title: Strip JS aggregation filenames
|
17
|
-
selector: script
|
18
|
-
pattern: '(src="[^"]*/files/js/js_)[-\w]{43}\.js"'
|
19
|
-
substitution: '\1DRUPAL_AGGREGATED_JS.js"'
|
20
|
-
- title: Strip CSS/JS cache IDs
|
21
|
-
selector: style, script
|
22
|
-
pattern: '("[^"]*\.(js|css))\?[a-z0-9]{6}"'
|
23
|
-
substitution: '\1'
|
24
|
-
- title: Strip IE CSS/JS cache IDs
|
25
|
-
pattern: '("[^"]*ie\d?\.(js|css))\?[a-z0-9]{6}"'
|
26
|
-
substitution: '\1'
|
27
|
-
- title: Strip Drupal JS version tags
|
28
|
-
selector: script
|
29
|
-
pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
|
30
|
-
substitution: '\1'
|
31
|
-
- title: Strip domain names from absolute URLs
|
32
|
-
pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
|
33
|
-
substitute: '__domain__'
|
data/lib/sitediff/rules.rb
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
require 'sitediff/sanitize/regexp'
|
2
|
-
require 'pathname'
|
3
|
-
require 'set'
|
4
|
-
|
5
|
-
class SiteDiff
|
6
|
-
# Find appropriate rules for a given site
|
7
|
-
class Rules
|
8
|
-
def initialize(config, disabled = false)
|
9
|
-
@disabled = disabled
|
10
|
-
@config = config
|
11
|
-
find_sanitization_candidates
|
12
|
-
@rules = Hash.new { |h, k| h[k] = Set.new }
|
13
|
-
end
|
14
|
-
|
15
|
-
def find_sanitization_candidates
|
16
|
-
@candidates = Set.new
|
17
|
-
|
18
|
-
rules_dir = Pathname.new(__FILE__).dirname + 'files' + 'rules'
|
19
|
-
rules_dir.children.each do |f|
|
20
|
-
next unless f.file? && f.extname == '.yaml'
|
21
|
-
conf = YAML.load_file(f)
|
22
|
-
@candidates.merge(conf['sanitization'])
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def handle_page(tag, html, doc)
|
27
|
-
found = find_rules(html, doc)
|
28
|
-
@rules[tag].merge(found)
|
29
|
-
end
|
30
|
-
|
31
|
-
# Yield a set of rules that seem reasonable for this HTML
|
32
|
-
# assumption: the YAML file is a list of regexp rules only
|
33
|
-
def find_rules(html, doc)
|
34
|
-
rules = []
|
35
|
-
|
36
|
-
return @candidates.select do |rule|
|
37
|
-
re = SiteDiff::Sanitizer::Regexp.create(rule)
|
38
|
-
re.applies?(html, doc)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
# Find all rules from all rulesets that apply for all pages
|
43
|
-
def add_config
|
44
|
-
have_both = @rules.include?(:before)
|
45
|
-
|
46
|
-
r1, r2 = *@rules.values_at(:before, :after)
|
47
|
-
if have_both
|
48
|
-
add_section('before', r1 - r2)
|
49
|
-
add_section('after', r2 - r1)
|
50
|
-
add_section(nil, r1 & r2)
|
51
|
-
else
|
52
|
-
add_section(nil, r2)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def add_section(name, rules)
|
57
|
-
return if rules.empty?
|
58
|
-
conf = name ? @config[name] : @config
|
59
|
-
if @disabled
|
60
|
-
rules.each { |r| r['disabled'] = true }
|
61
|
-
end
|
62
|
-
conf['sanitization'] = rules.to_a.sort_by { |r| r['title'] }
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|