sitediff 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/sitediff/diff.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'sitediff'
2
4
  require 'diffy'
3
5
  require 'erb'
@@ -9,26 +11,26 @@ class SiteDiff
9
11
 
10
12
  def html_diffy(before_html, after_html)
11
13
  diff = Diffy::Diff.new(before_html, after_html)
12
- diff.first ? # Is it non-empty?
14
+ diff.first ? # Is it non-empty?
13
15
  diff.to_s(:html) : nil
14
16
  end
15
17
 
16
18
  def terminal_diffy(before_html, after_html)
17
19
  args = []
18
20
  args << :color if Rainbow.enabled
19
- return Diffy::Diff.new(before_html, after_html, :context => 3).
20
- to_s(*args)
21
+ Diffy::Diff.new(before_html, after_html, context: 3)
22
+ .to_s(*args)
21
23
  end
22
24
 
23
25
  def generate_html_report(results, before, after, cache)
24
26
  erb_path = File.join(SiteDiff::FILES_DIR, 'html_report.html.erb')
25
27
  report_html = ERB.new(File.read(erb_path)).result(binding)
26
- return report_html
28
+ report_html
27
29
  end
28
30
 
29
31
  def generate_diff_output(result)
30
32
  erb_path = File.join(SiteDiff::FILES_DIR, 'diff.html.erb')
31
- return ERB.new(File.read(erb_path)).result(binding)
33
+ ERB.new(File.read(erb_path)).result(binding)
32
34
  end
33
35
 
34
36
  def css
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteDiff
2
- class SiteDiffException < Exception; end
4
+ class SiteDiffException < RuntimeError; end
3
5
  end
@@ -1,55 +1,61 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'sitediff/uriwrapper'
2
4
  require 'typhoeus'
3
5
 
4
6
  class SiteDiff
5
- class Fetch
6
- # Cache is a cache object, see sitediff/cache
7
- # Paths is a list of sub-paths
8
- # Tags is a hash of tag names => base URLs.
9
- def initialize(cache, paths, tags)
10
- @cache = cache
11
- @paths = paths
12
- @tags = tags
13
- end
7
+ class Fetch
8
+ # Cache is a cache object, see sitediff/cache
9
+ # Paths is a list of sub-paths
10
+ # Tags is a hash of tag names => base URLs.
11
+ def initialize(cache, paths, concurrency = 3, curl_opts = nil, **tags)
12
+ @cache = cache
13
+ @paths = paths
14
+ @tags = tags
15
+ @curl_opts = curl_opts || UriWrapper::DEFAULT_CURL_OPTS
16
+ @concurrency = concurrency
17
+ end
14
18
 
15
- # Fetch all the paths, once per tag.
16
- # When a path has been fetched for every tag, block will be called with the
17
- # path, and a hash of tag => UriWrapper::ReadResult objects.
18
- def run(&block)
19
- @callback = block
20
- @hydra = Typhoeus::Hydra.new(max_concurrency: 3)
21
- @paths.each { |path| queue_path(path) }
22
- @hydra.run
23
- end
19
+ # Fetch all the paths, once per tag.
20
+ # When a path has been fetched for every tag, block will be called with the
21
+ # path, and a hash of tag => UriWrapper::ReadResult objects.
22
+ def run(&block)
23
+ @callback = block
24
+ @hydra = Typhoeus::Hydra.new(max_concurrency: @concurrency)
25
+ @paths.each { |path| queue_path(path) }
26
+ @hydra.run
27
+ end
28
+
29
+ private
24
30
 
25
- private
26
- # Queue a path for fetching
27
- def queue_path(path)
28
- results = {}
29
-
30
- @tags.each do |tag, base|
31
- if res = @cache.get(tag, path)
32
- results[tag] = res
33
- process_results(path, results)
34
- elsif !base
35
- # We only have the cache, but this item isn't cached!
36
- results[tag] = UriWrapper::ReadResult.error("Not cached")
37
- process_results(path, results)
38
- else
39
- uri = UriWrapper.new(base + path)
40
- uri.queue(@hydra) do |res|
41
- @cache.set(tag, path, res)
31
+ # Queue a path for fetching
32
+ def queue_path(path)
33
+ results = {}
34
+
35
+ @tags.each do |tag, base|
36
+ if (res = @cache.get(tag, path))
42
37
  results[tag] = res
43
38
  process_results(path, results)
39
+ elsif !base
40
+ # We only have the cache, but this item isn't cached!
41
+ results[tag] = UriWrapper::ReadResult.error('Not cached')
42
+ process_results(path, results)
43
+ else
44
+ uri = UriWrapper.new(base + path, @curl_opts)
45
+ uri.queue(@hydra) do |resl|
46
+ @cache.set(tag, path, resl)
47
+ results[tag] = resl
48
+ process_results(path, results)
49
+ end
44
50
  end
45
51
  end
46
52
  end
47
- end
48
53
 
49
- # Process fetch results
50
- def process_results(path, results)
51
- return unless results.size == @tags.size
52
- @callback[path, results]
54
+ # Process fetch results
55
+ def process_results(path, results)
56
+ return unless results.size == @tags.size
57
+
58
+ @callback[path, results]
59
+ end
53
60
  end
54
61
  end
55
- end
@@ -21,6 +21,9 @@
21
21
  <a href="<%= eval(tag) %>"><%= eval(tag) %></a>
22
22
  <% end %>
23
23
  </div>
24
+ <div class="run">
25
+ <a href="../run/diff">Rerun diff</a>
26
+ </div>
24
27
  <table class="results">
25
28
 
26
29
  <colgroup>
@@ -2,28 +2,28 @@ sanitization:
2
2
  - title: Strip Drupal.settings
3
3
  selector: script
4
4
  pattern: '^(<script>)?jQuery.extend\(Drupal.settings.*$'
5
+ - title: Strip IE CSS/JS cache IDs
6
+ pattern: '("[^"]*ie\d?\.(js|css))\?[a-z0-9]{6}"'
7
+ substitute: '\1'
5
8
  - title: Strip form build ID
6
9
  selector: input
7
- pattern: 'name="form_build_id" value="form-[-\w]{43}"'
10
+ pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
8
11
  substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
9
12
  - title: Strip view DOM ID
10
13
  pattern: '(class="view .*) view-dom-id-[a-f0-9]{32}"'
11
14
  substitute: '\1 view-dom-id-DRUPAL_VIEW_DOM_ID"'
12
15
  - title: Strip CSS aggregation filenames
13
16
  selector: link[rel=stylesheet]
14
- pattern: '(href="[^"]*/files/css/css_)[-\w]{43}\.css"'
17
+ pattern: '(href="[^"]*/files/css/css_)[-\w]{40,43}\.css"'
15
18
  substitute: '\1DRUPAL_AGGREGATED_CSS.css"'
16
19
  - title: Strip JS aggregation filenames
17
20
  selector: script
18
- pattern: '(src="[^"]*/files/js/js_)[-\w]{43}\.js"'
21
+ pattern: '(src="[^"]*/files/js/js_)[-\w]{40,43}\.js"'
19
22
  substitute: '\1DRUPAL_AGGREGATED_JS.js"'
20
23
  - title: Strip CSS/JS cache IDs
21
24
  selector: style, script
22
25
  pattern: '("[^"]*\.(js|css))\?[a-z0-9]{6}"'
23
26
  substitute: '\1'
24
- - title: Strip IE CSS/JS cache IDs
25
- pattern: '("[^"]*ie\d?\.(js|css))\?[a-z0-9]{6}"'
26
- substitute: '\1'
27
27
  - title: Strip Drupal JS version tags
28
28
  selector: script
29
29
  pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
@@ -31,3 +31,33 @@ sanitization:
31
31
  - title: Strip domain names from absolute URLs
32
32
  pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
33
33
  substitute: '__domain__'
34
+ - title: Strip form build ID
35
+ selector: input
36
+ pattern: 'autocomplete="off" data-drupal-selector="form-[-\w]{40,43}"'
37
+ substitute: 'autocomplete="off" data-drupal-selector="form-DRUPAL_FORM_BUILD_ID"'
38
+ - title: Strip form build ID 2
39
+ selector: input
40
+ pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
41
+ substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
42
+ - title: Strip Drupal CSS link queries
43
+ selector: link
44
+ pattern: '\.css\?(\w*)'
45
+ substitute: '\.css'
46
+ - title: Strip Drupal JS link queries
47
+ selector: script
48
+ pattern: '\.js\?(\w*)'
49
+ substitute: '\.js'
50
+ - title: Strip Drupal View-DOM ID
51
+ pattern: 'view-dom-id-\w*'
52
+ substitute: 'view-dom-id-_ID_'
53
+ - title: Strip Drupal View-DOM ID 2
54
+ pattern: '(views?_dom_id"?:"?)\w*'
55
+ substitute: '\1_ID_'
56
+ - title: Ignore Drupal CSS file names
57
+ selector: link
58
+ pattern: 'css_[-\w]{40,43}(\\|%5C)?\.css'
59
+ substitute: 'css__ID__.css'
60
+ - title: Ignore Drupal JS file names
61
+ selector: script
62
+ pattern: 'js_[-\w]{40,43}\\?\.js'
63
+ substitute: 'js__ID__.js'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'sitediff'
2
4
  require 'sitediff/diff'
3
5
  require 'digest/sha1'
@@ -8,7 +10,7 @@ class SiteDiff
8
10
  STATUS_SUCCESS = 0 # Identical before and after
9
11
  STATUS_FAILURE = 1 # Different before and after
10
12
  STATUS_ERROR = 2 # Couldn't fetch page
11
- STATUS_TEXT = %w[success failure error]
13
+ STATUS_TEXT = %w[success failure error].freeze
12
14
 
13
15
  attr_reader :status, :diff
14
16
 
@@ -17,7 +19,7 @@ class SiteDiff
17
19
  if error
18
20
  @status = STATUS_ERROR
19
21
  else
20
- @diff = Diff::html_diffy(before, after)
22
+ @diff = Diff.html_diffy(before, after)
21
23
  @status = @diff ? STATUS_FAILURE : STATUS_SUCCESS
22
24
  end
23
25
  end
@@ -28,7 +30,7 @@ class SiteDiff
28
30
 
29
31
  # Textual representation of the status
30
32
  def status_text
31
- return STATUS_TEXT[status]
33
+ STATUS_TEXT[status]
32
34
  end
33
35
 
34
36
  # Printable URL
@@ -39,7 +41,7 @@ class SiteDiff
39
41
 
40
42
  # Filename to store diff
41
43
  def filename
42
- File.join(SiteDiff::DIFFS_DIR, Digest::SHA1.hexdigest(self.path) + '.html')
44
+ File.join(SiteDiff::DIFFS_DIR, Digest::SHA1.hexdigest(path) + '.html')
43
45
  end
44
46
 
45
47
  # Text of the link in the HTML report
@@ -52,15 +54,15 @@ class SiteDiff
52
54
  end
53
55
 
54
56
  # Log the result to the terminal
55
- def log(verbose=true)
57
+ def log(verbose = true)
56
58
  case status
57
59
  when STATUS_SUCCESS then
58
- SiteDiff::log path, :diff_success, 'SUCCESS'
60
+ SiteDiff.log path, :diff_success, 'UNCHANGED'
59
61
  when STATUS_ERROR then
60
- SiteDiff::log path, :warn, "ERROR (#{error})"
62
+ SiteDiff.log path, :warn, "ERROR (#{error})"
61
63
  when STATUS_FAILURE then
62
- SiteDiff::log path, :diff_failure, "FAILURE"
63
- puts Diff::terminal_diffy(before, after) if verbose
64
+ SiteDiff.log path, :diff_failure, 'CHANGED'
65
+ puts Diff.terminal_diffy(before, after) if verbose
64
66
  end
65
67
  end
66
68
 
@@ -68,9 +70,9 @@ class SiteDiff
68
70
  def dump(dir)
69
71
  dump_path = File.join(dir, filename)
70
72
  base = File.dirname(dump_path)
71
- FileUtils::mkdir_p(base) unless File.exists?(base)
73
+ FileUtils.mkdir_p(base) unless File.exist?(base)
72
74
  File.open(dump_path, 'w') do |f|
73
- f.write(Diff::generate_diff_output(self))
75
+ f.write(Diff.generate_diff_output(self))
74
76
  end
75
77
  end
76
78
  end
@@ -1,65 +1,65 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'sitediff/sanitize/regexp'
2
4
  require 'pathname'
3
5
  require 'set'
4
6
 
5
7
  class SiteDiff
6
- # Find appropriate rules for a given site
7
- class Rules
8
- def initialize(config, disabled = false)
9
- @disabled = disabled
10
- @config = config
11
- find_sanitization_candidates
12
- @rules = Hash.new { |h, k| h[k] = Set.new }
13
- end
8
+ # Find appropriate rules for a given site
9
+ class Rules
10
+ def initialize(config, disabled = false)
11
+ @disabled = disabled
12
+ @config = config
13
+ find_sanitization_candidates
14
+ @rules = Hash.new { |h, k| h[k] = Set.new }
15
+ end
14
16
 
15
- def find_sanitization_candidates
16
- @candidates = Set.new
17
+ def find_sanitization_candidates
18
+ @candidates = Set.new
17
19
 
18
- rules_dir = Pathname.new(__FILE__).dirname + 'files' + 'rules'
19
- rules_dir.children.each do |f|
20
- next unless f.file? && f.extname == '.yaml'
21
- conf = YAML.load_file(f)
22
- @candidates.merge(conf['sanitization'])
23
- end
24
- end
20
+ rules_dir = Pathname.new(__FILE__).dirname + 'files' + 'rules'
21
+ rules_dir.children.each do |f|
22
+ next unless f.file? && f.extname == '.yaml'
25
23
 
26
- def handle_page(tag, html, doc)
27
- found = find_rules(html, doc)
28
- @rules[tag].merge(found)
29
- end
24
+ conf = YAML.load_file(f)
25
+ @candidates.merge(conf['sanitization'])
26
+ end
27
+ end
30
28
 
31
- # Yield a set of rules that seem reasonable for this HTML
32
- # assumption: the YAML file is a list of regexp rules only
33
- def find_rules(html, doc)
34
- rules = []
29
+ def handle_page(tag, html, doc)
30
+ found = find_rules(html, doc)
31
+ @rules[tag].merge(found)
32
+ end
35
33
 
36
- return @candidates.select do |rule|
37
- re = SiteDiff::Sanitizer::Regexp.create(rule)
38
- re.applies?(html, doc)
34
+ # Yield a set of rules that seem reasonable for this HTML
35
+ # assumption: the YAML file is a list of regexp rules only
36
+ def find_rules(html, doc)
37
+ @candidates.select do |rule|
38
+ re = SiteDiff::Sanitizer::Regexp.create(rule)
39
+ re.applies?(html, doc)
40
+ end
39
41
  end
40
- end
41
42
 
42
- # Find all rules from all rulesets that apply for all pages
43
- def add_config
44
- have_both = @rules.include?(:before)
43
+ # Find all rules from all rulesets that apply for all pages
44
+ def add_config
45
+ have_both = @rules.include?(:before)
45
46
 
46
- r1, r2 = *@rules.values_at(:before, :after)
47
- if have_both
48
- add_section('before', r1 - r2)
49
- add_section('after', r2 - r1)
50
- add_section(nil, r1 & r2)
51
- else
52
- add_section(nil, r2)
47
+ r1, r2 = *@rules.values_at(:before, :after)
48
+ if have_both
49
+ add_section('before', r1 - r2)
50
+ add_section('after', r2 - r1)
51
+ add_section(nil, r1 & r2)
52
+ else
53
+ add_section(nil, r2)
54
+ end
53
55
  end
54
- end
55
56
 
56
- def add_section(name, rules)
57
- return if rules.empty?
58
- conf = name ? @config[name] : @config
59
- if @disabled
60
- rules.each { |r| r['disabled'] = true }
57
+ def add_section(name, rules)
58
+ return if rules.empty?
59
+
60
+ conf = name ? @config[name] : @config
61
+ rules.each { |r| r['disabled'] = true } if @disabled
62
+ conf['sanitization'] = rules.to_a.sort_by { |r| r['title'] }
61
63
  end
62
- conf['sanitization'] = rules.to_a.sort_by { |r| r['title'] }
63
64
  end
64
65
  end
65
- end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'sitediff'
2
4
  require 'sitediff/exception'
3
5
  require 'sitediff/sanitize/dom_transform'
@@ -6,183 +8,176 @@ require 'nokogiri'
6
8
  require 'set'
7
9
 
8
10
  class SiteDiff
9
- class Sanitizer
10
- class InvalidSanitization < SiteDiffException; end
11
-
12
- TOOLS = {
13
- :array => %w[dom_transform sanitization],
14
- :scalar => %w[selector remove_spacing],
15
- }
16
- DOM_TRANSFORMS = Set.new(%w[remove unwrap_root unwrap remove_class])
17
-
18
- def initialize(html, config, opts = {})
19
- @html = html
20
- @config = config
21
- @opts = opts
22
- end
23
-
24
- def sanitize
25
- return '' if @html == '' # Quick return on empty input
11
+ class Sanitizer
12
+ class InvalidSanitization < SiteDiffException; end
13
+
14
+ TOOLS = {
15
+ array: %w[dom_transform sanitization],
16
+ scalar: %w[selector remove_spacing]
17
+ }.freeze
18
+ DOM_TRANSFORMS = Set.new(%w[remove unwrap_root unwrap remove_class])
19
+
20
+ def initialize(html, config, opts = {})
21
+ @html = html
22
+ @config = config
23
+ @opts = opts
24
+ end
26
25
 
27
- @node, @html = Sanitizer.domify(@html), nil
26
+ def sanitize
27
+ return '' if @html == '' # Quick return on empty input
28
28
 
29
- remove_spacing
30
- selector
31
- dom_transforms
32
- regexps
29
+ @node = Sanitizer.domify(@html)
30
+ @html = nil
33
31
 
34
- return @html || Sanitizer.prettify(@node)
35
- end
32
+ remove_spacing
33
+ selector
34
+ dom_transforms
35
+ regexps
36
36
 
37
- # Return whether or not we want to keep a rule
38
- def want_rule(rule)
39
- return false unless rule
40
- return false if rule['disabled']
37
+ @html || Sanitizer.prettify(@node)
38
+ end
41
39
 
42
- # Filter out if path regexp doesn't match
43
- if (pathre = rule['path']) and (path = @opts[:path])
44
- return ::Regexp.new(pathre).match(path)
45
- end
40
+ # Return whether or not we want to keep a rule
41
+ def want_rule(rule)
42
+ return false unless rule
43
+ return false if rule['disabled']
46
44
 
47
- return true
48
- end
45
+ # Filter out if path regexp doesn't match
46
+ if (pathre = rule['path']) && (path = @opts[:path])
47
+ return ::Regexp.new(pathre).match(path)
48
+ end
49
49
 
50
- # Canonicalize a simple rule, eg: 'remove_spacing' or 'selector'.
51
- # It may be a simple value, or a hash, or an array of hashes.
52
- # Turn it into an array of hashes.
53
- def canonicalize_rule(name)
54
- rules = @config[name] or return nil
55
-
56
- if rules[0] && rules[0].respond_to?(:[]) && rules[0]['value']
57
- # Already an array
58
- elsif rules['value']
59
- # Hash, put it in an array
60
- rules = [rules]
61
- else
62
- # Scalar, put it in a hash
63
- rules = [{ 'value' => rules }]
64
- end
50
+ true
51
+ end
65
52
 
66
- want = rules.select { |r| want_rule(r) }
67
- return nil if want.empty?
68
- raise "Too many matching rules of type #{name}" if want.size > 1
69
- return want.first
70
- end
53
+ # Canonicalize a simple rule, eg: 'remove_spacing' or 'selector'.
54
+ # It may be a simple value, or a hash, or an array of hashes.
55
+ # Turn it into an array of hashes.
56
+ def canonicalize_rule(name)
57
+ (rules = @config[name]) || (return nil)
58
+
59
+ if rules[0]&.respond_to?(:[]) && rules[0]['value']
60
+ # Already an array
61
+ elsif rules['value']
62
+ # Hash, put it in an array
63
+ rules = [rules]
64
+ else
65
+ # Scalar, put it in a hash
66
+ rules = [{ 'value' => rules }]
67
+ end
68
+
69
+ want = rules.select { |r| want_rule(r) }
70
+ return nil if want.empty?
71
+ raise "Too many matching rules of type #{name}" if want.size > 1
72
+
73
+ want.first
74
+ end
71
75
 
72
- # Perform 'remove_spacing' action
73
- def remove_spacing
74
- rule = canonicalize_rule('remove_spacing') or return
75
- Sanitizer.remove_node_spacing(@node) if rule['value']
76
- end
76
+ # Perform 'remove_spacing' action
77
+ def remove_spacing
78
+ (rule = canonicalize_rule('remove_spacing')) || return
79
+ Sanitizer.remove_node_spacing(@node) if rule['value']
80
+ end
77
81
 
78
- # Perform 'selector' action, to choose a new root
79
- def selector
80
- rule = canonicalize_rule('selector') or return
81
- @node = Sanitizer.select_fragments(@node, rule['value'])
82
- end
82
+ # Perform 'selector' action, to choose a new root
83
+ def selector
84
+ (rule = canonicalize_rule('selector')) || return
85
+ @node = Sanitizer.select_fragments(@node, rule['value'])
86
+ end
83
87
 
84
- # Applies regexps. Also
85
- def regexps
86
- rules = @config['sanitization'] or return
87
- rules = rules.select { |r| want_rule(r) }
88
+ # Applies regexps. Also
89
+ def regexps
90
+ (rules = @config['sanitization']) || return
91
+ rules = rules.select { |r| want_rule(r) }
88
92
 
89
- rules.map! { |r| Regexp.create(r) }
90
- selector, global = rules.partition { |r| r.selector? }
93
+ rules.map! { |r| Regexp.create(r) }
94
+ selector, global = rules.partition(&:selector?)
91
95
 
92
- selector.each { |r| r.apply(@node) }
93
- @html, @node = Sanitizer.prettify(@node), nil
94
- global.each { |r| r.apply(@html) }
95
- end
96
+ selector.each { |r| r.apply(@node) }
97
+ @html = Sanitizer.prettify(@node)
98
+ @node = nil
99
+ global.each { |r| r.apply(@html) }
100
+ end
96
101
 
97
- # Perform DOM transforms
98
- def dom_transforms
99
- rules = @config['dom_transform'] or return
100
- rules = rules.select { |r| want_rule(r) }
102
+ # Perform DOM transforms
103
+ def dom_transforms
104
+ (rules = @config['dom_transform']) || return
105
+ rules = rules.select { |r| want_rule(r) }
101
106
 
102
- rules.each do |rule|
103
- transform = DomTransform.create(rule)
104
- transform.apply(@node)
105
- end
106
- end
107
+ rules.each do |rule|
108
+ transform = DomTransform.create(rule)
109
+ transform.apply(@node)
110
+ end
111
+ end
107
112
 
108
- ##### Implementations of actions #####
113
+ ##### Implementations of actions #####
109
114
 
110
- # Remove double-spacing inside text nodes
111
- def self.remove_node_spacing(node)
112
- # remove double spacing, but only inside text nodes (eg not attributes)
113
- node.xpath('//text()').each do |el|
114
- el.content = el.content.gsub(/ +/, ' ')
115
- end
116
- end
115
+ # Remove double-spacing inside text nodes
116
+ def self.remove_node_spacing(node)
117
+ # remove double spacing, but only inside text nodes (eg not attributes)
118
+ node.xpath('//text()').each do |el|
119
+ el.content = el.content.gsub(/ +/, ' ')
120
+ end
121
+ end
117
122
 
118
- # Get a fragment consisting of the elements matching the selector(s)
119
- def self.select_fragments(node, sel)
120
- # When we choose a new root, we always become a DocumentFragment,
121
- # and lose any DOCTYPE and such.
122
- ns = node.css(sel)
123
- unless node.fragment?
124
- node = Nokogiri::HTML.fragment('')
125
- end
126
- node.children = ns
127
- return node
128
- end
123
+ # Get a fragment consisting of the elements matching the selector(s)
124
+ def self.select_fragments(node, sel)
125
+ # When we choose a new root, we always become a DocumentFragment,
126
+ # and lose any DOCTYPE and such.
127
+ ns = node.css(sel)
128
+ node = Nokogiri::HTML.fragment('') unless node.fragment?
129
+ node.children = ns
130
+ node
131
+ end
129
132
 
130
- # Pretty-print some HTML
131
- def self.prettify(obj)
132
- @stylesheet ||= begin
133
- stylesheet_path = File.join(SiteDiff::FILES_DIR, 'pretty_print.xsl')
134
- Nokogiri::XSLT(File.read(stylesheet_path))
135
- end
133
+ # Pretty-print some HTML
134
+ def self.prettify(obj)
135
+ @stylesheet ||= begin
136
+ stylesheet_path = File.join(SiteDiff::FILES_DIR, 'pretty_print.xsl')
137
+ Nokogiri::XSLT(File.read(stylesheet_path))
138
+ end
136
139
 
137
- # Pull out the html element's children
138
- # The obvious way to do this is to iterate over pretty.css('html'),
139
- # but that tends to segfault Nokogiri
140
- str = @stylesheet.apply_to(to_document(obj))
140
+ # Pull out the html element's children
141
+ # The obvious way to do this is to iterate over pretty.css('html'),
142
+ # but that tends to segfault Nokogiri
143
+ str = @stylesheet.apply_to(to_document(obj))
141
144
 
142
- # There's a lot of cruft left over,that we don't want
145
+ # There's a lot of cruft left over,that we don't want
143
146
 
144
- # Remove xml declaration and <html> tags
145
- str.sub!(/\A<\?xml.*$\n/, '')
146
- str.sub!(/\A^<html>$\n/, '')
147
- str.sub!(%r[</html>\n\Z], '')
147
+ # Remove xml declaration and <html> tags
148
+ str.sub!(/\A<\?xml.*$\n/, '')
149
+ str.sub!(/\A^<html>$\n/, '')
150
+ str.sub!(%r{</html>\n\Z}, '')
148
151
 
149
- # Remove top-level indentation
150
- indent = /\A(\s*)/.match(str)[1].size
151
- str.gsub!(/^\s{,#{indent}}/, '')
152
+ # Remove top-level indentation
153
+ indent = /\A(\s*)/.match(str)[1].size
154
+ str.gsub!(/^\s{,#{indent}}/, '')
152
155
 
153
- # Remove blank lines
154
- str.gsub!(/^\s*$\n/, '')
156
+ # Remove blank lines
157
+ str.gsub!(/^\s*$\n/, '')
155
158
 
156
- return str
157
- end
159
+ str
160
+ end
158
161
 
159
- # Parse HTML into a node
160
- def self.domify(str, force_doc = false)
161
- if force_doc || /<!DOCTYPE/.match(str[0, 512])
162
- return Nokogiri::HTML(str)
163
- else
164
- return Nokogiri::HTML.fragment(str)
165
- end
166
- end
162
+ # Parse HTML into a node
163
+ def self.domify(str, force_doc = false)
164
+ if force_doc || /<!DOCTYPE/.match(str[0, 512])
165
+ Nokogiri::HTML(str)
166
+ else
167
+ Nokogiri::HTML.fragment(str)
168
+ end
169
+ end
167
170
 
168
- # Force this object to be a document, so we can apply a stylesheet
169
- def self.to_document(obj)
170
- if Nokogiri::XML::Document === obj
171
- return obj
172
- elsif Nokogiri::XML::Node === obj # node or fragment
173
- return domify(obj.to_s, true)
174
-
175
- # This ought to work, and would be faster,
176
- # but seems to segfault Nokogiri
177
- if false
178
- doc = Nokogiri::HTML('<html><body>')
179
- doc.at('body').children = obj.children
180
- return doc
171
+ # Force this object to be a document, so we can apply a stylesheet
172
+ def self.to_document(obj)
173
+ if Nokogiri::XML::Document == obj.class || Nokogiri::HTML::Document == obj.class
174
+ obj
175
+ # node or fragment
176
+ elsif Nokogiri::XML::Node == obj.class || Nokogiri::HTML::DocumentFragment == obj.class
177
+ domify(obj.to_s, true)
178
+ else
179
+ to_document(domify(obj, false))
180
+ end
181
181
  end
182
- else
183
- return to_document(domify(obj))
184
182
  end
185
183
  end
186
-
187
- end
188
- end