sitediff 0.0.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,66 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <!-- important: otherwise chrome will choke on non-ascii characters -->
5
- <meta charset="utf-8" />
6
- <style>
7
- <%= SiteDiff::Diff.css %>
8
- </style>
9
- <title> SiteDiff Report </title>
10
- </head>
11
- <body>
12
- <div class="sitediff">
13
- <div class="legend">
14
- <%
15
- tags = %w[before after]
16
- tags.each do |tag| %>
17
- <% if tags.first != tag %> | <% end %>
18
- <% notes = ['base url']
19
- notes << 'cached' if cache.read_tags.include?(tag.to_sym) %>
20
- <strong><%= tag %></strong> (<%= notes.join(', ') %>):
21
- <a href="<%= eval(tag) %>"><%= eval(tag) %></a>
22
- <% end %>
23
- </div>
24
- <div class="run">
25
- <a href="../run/diff">Rerun diff</a>
26
- </div>
27
- <table class="results">
28
-
29
- <colgroup>
30
- <col class="before-col">
31
- <col class="after-col">
32
- <col class="both-col">
33
- <col class="path-col">
34
- <col class="diff-stat-col">
35
- </colgroup>
36
-
37
- <thead>
38
- <tr>
39
- <th> Before </th>
40
- <th> After </th>
41
- <th> Both </th>
42
- <th> Path </th>
43
- <th> Status </th>
44
- </tr>
45
- </thead>
46
-
47
- <% results.each do |result| %>
48
- <tr class="<%= result.status_text %>">
49
- <td class="before">
50
- <a href="<%= result.url(:before, before, cache) %>">[before]</a>
51
- </td>
52
- <td class="after">
53
- <a href="<%= result.url(:after, after, cache) %>">[after]</a>
54
- </td>
55
- <td class="both">
56
- <a href="/sidebyside<%= result.path %>">[both]</a>
57
- </td>
58
- <td class="path"><%= result.path %></td>
59
- <td class="status"><%= result.link %></td>
60
- </tr>
61
- <% end %>
62
-
63
- </table>
64
- </div>
65
- </body>
66
- </html>
@@ -1,63 +0,0 @@
1
- sanitization:
2
- - title: Strip Drupal.settings
3
- selector: script
4
- pattern: '^(<script>)?jQuery.extend\(Drupal.settings.*$'
5
- - title: Strip IE CSS/JS cache IDs
6
- pattern: '("[^"]*ie\d?\.(js|css))\?[a-z0-9]{6}"'
7
- substitute: '\1'
8
- - title: Strip form build ID
9
- selector: input
10
- pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
11
- substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
12
- - title: Strip view DOM ID
13
- pattern: '(class="view .*) view-dom-id-[a-f0-9]{32}"'
14
- substitute: '\1 view-dom-id-DRUPAL_VIEW_DOM_ID"'
15
- - title: Strip CSS aggregation filenames
16
- selector: link[rel=stylesheet]
17
- pattern: '(href="[^"]*/files/css/css_)[-\w]{40,43}\.css"'
18
- substitute: '\1DRUPAL_AGGREGATED_CSS.css"'
19
- - title: Strip JS aggregation filenames
20
- selector: script
21
- pattern: '(src="[^"]*/files/js/js_)[-\w]{40,43}\.js"'
22
- substitute: '\1DRUPAL_AGGREGATED_JS.js"'
23
- - title: Strip CSS/JS cache IDs
24
- selector: style, script
25
- pattern: '("[^"]*\.(js|css))\?[a-z0-9]{6}"'
26
- substitute: '\1'
27
- - title: Strip Drupal JS version tags
28
- selector: script
29
- pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
30
- substitute: '\1'
31
- - title: Strip domain names from absolute URLs
32
- pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
33
- substitute: '__domain__'
34
- - title: Strip form build ID
35
- selector: input
36
- pattern: 'autocomplete="off" data-drupal-selector="form-[-\w]{40,43}"'
37
- substitute: 'autocomplete="off" data-drupal-selector="form-DRUPAL_FORM_BUILD_ID"'
38
- - title: Strip form build ID 2
39
- selector: input
40
- pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
41
- substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
42
- - title: Strip Drupal CSS link queries
43
- selector: link
44
- pattern: '\.css\?(\w*)'
45
- substitute: '\.css'
46
- - title: Strip Drupal JS link queries
47
- selector: script
48
- pattern: '\.js\?(\w*)'
49
- substitute: '\.js'
50
- - title: Strip Drupal View-DOM ID
51
- pattern: 'view-dom-id-\w*'
52
- substitute: 'view-dom-id-_ID_'
53
- - title: Strip Drupal View-DOM ID 2
54
- pattern: '(views?_dom_id"?:"?)\w*'
55
- substitute: '\1_ID_'
56
- - title: Ignore Drupal CSS file names
57
- selector: link
58
- pattern: 'css_[-\w]{40,43}(\\|%5C)?\.css'
59
- substitute: 'css__ID__.css'
60
- - title: Ignore Drupal JS file names
61
- selector: script
62
- pattern: 'js_[-\w]{40,43}\\?\.js'
63
- substitute: 'js__ID__.js'
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'sitediff/sanitize/regexp'
4
- require 'pathname'
5
- require 'set'
6
-
7
- class SiteDiff
8
- # Find appropriate rules for a given site
9
- class Rules
10
- def initialize(config, disabled = false)
11
- @disabled = disabled
12
- @config = config
13
- find_sanitization_candidates
14
- @rules = Hash.new { |h, k| h[k] = Set.new }
15
- end
16
-
17
- def find_sanitization_candidates
18
- @candidates = Set.new
19
-
20
- rules_dir = Pathname.new(__FILE__).dirname + 'files' + 'rules'
21
- rules_dir.children.each do |f|
22
- next unless f.file? && f.extname == '.yaml'
23
-
24
- conf = YAML.load_file(f)
25
- @candidates.merge(conf['sanitization'])
26
- end
27
- end
28
-
29
- def handle_page(tag, html, doc)
30
- found = find_rules(html, doc)
31
- @rules[tag].merge(found)
32
- end
33
-
34
- # Yield a set of rules that seem reasonable for this HTML
35
- # assumption: the YAML file is a list of regexp rules only
36
- def find_rules(html, doc)
37
- @candidates.select do |rule|
38
- re = SiteDiff::Sanitizer::Regexp.create(rule)
39
- re.applies?(html, doc)
40
- end
41
- end
42
-
43
- # Find all rules from all rulesets that apply for all pages
44
- def add_config
45
- have_both = @rules.include?(:before)
46
-
47
- r1, r2 = *@rules.values_at(:before, :after)
48
- if have_both
49
- add_section('before', r1 - r2)
50
- add_section('after', r2 - r1)
51
- add_section(nil, r1 & r2)
52
- else
53
- add_section(nil, r2)
54
- end
55
- end
56
-
57
- def add_section(name, rules)
58
- return if rules.empty?
59
-
60
- conf = name ? @config[name] : @config
61
- rules.each { |r| r['disabled'] = true } if @disabled
62
- conf['sanitization'] = rules.to_a.sort_by { |r| r['title'] }
63
- end
64
- end
65
- end