sitediff 0.0.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,238 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fileutils'
4
+ require 'json'
5
+ require 'minitar'
6
+ require 'sitediff'
7
+ require 'sitediff/config'
8
+ require 'zlib'
9
+
10
+ class SiteDiff
11
+ ##
12
+ # SiteDiff Report Helper.
13
+ class Report
14
+ attr_reader :results, :cache
15
+
16
+ ##
17
+ # Directory where diffs will be generated.
18
+ DIFFS_DIR = 'diffs'
19
+
20
+ ##
21
+ # Name of file containing a list of pages with diffs.
22
+ FAILURES_FILE = 'failures.txt'
23
+
24
+ ##
25
+ # Name of file containing HTML report of diffs.
26
+ REPORT_FILE_HTML = 'report.html'
27
+
28
+ ##
29
+ # Name of file containing JSON report of diffs.
30
+ REPORT_FILE_JSON = 'report.json'
31
+
32
+ ##
33
+ # Name of file containing exported file archive.
34
+ REPORT_FILE_TAR = 'report.tgz'
35
+
36
+ ##
37
+ # Name of directory in which to build the portable report.
38
+ REPORT_BUILD_DIR = '_tmp_report'
39
+
40
+ ##
41
+ # Name of the portable report directory.
42
+ REPORT_DIR = 'report'
43
+
44
+ ##
45
+ # Path to settings used for report.
46
+ SETTINGS_FILE = 'settings.yaml'
47
+
48
+ ##
49
+ # Creates a Reporter object.
50
+ #
51
+ # @param [Config] config.
52
+ # @param [Cache] cache.
53
+ # @param [Array] results.
54
+ def initialize(config, cache, results)
55
+ @config = config
56
+ @cache = cache
57
+ @results = results
58
+ end
59
+
60
+ ##
61
+ # Generates an HTML report.
62
+ #
63
+ # @param [String] dir
64
+ # The directory in which the report is to be generated.
65
+ def generate_html(
66
+ dir,
67
+ report_before = nil,
68
+ report_after = nil
69
+ )
70
+ report_before ||= @config.before_url
71
+ report_after ||= @config.after_url
72
+
73
+ dir = SiteDiff.ensure_dir dir
74
+
75
+ write_diffs dir
76
+ write_failures dir
77
+
78
+ # Prepare report.
79
+ report = Diff.generate_html(
80
+ @results,
81
+ report_before,
82
+ report_after,
83
+ @cache,
84
+ @config.export
85
+ )
86
+
87
+ # Write report.
88
+ report_file = dir + REPORT_FILE_HTML
89
+ report_file.unlink if report_file.file?
90
+ report_file.open('w') { |f| f.write(report) }
91
+
92
+ write_settings dir, report_before, report_after
93
+
94
+ if @config.export
95
+ package_report(dir)
96
+ else
97
+ SiteDiff.log 'Report generated to ' + report_file.expand_path.to_s
98
+ end
99
+ end
100
+
101
+ ##
102
+ # Generates a JSON report.
103
+ #
104
+ # @param dir
105
+ # The directory in which the report is to be generated.
106
+ def generate_json(dir)
107
+ dir = SiteDiff.ensure_dir dir
108
+ write_diffs dir
109
+ write_failures dir
110
+
111
+ # Prepare report.
112
+ report = {
113
+ paths_compared: @results.length,
114
+ paths_diffs: 0,
115
+ paths: {}
116
+ }
117
+ @results.each do |item|
118
+ report[:paths_diffs] += 1 unless item.success?
119
+
120
+ item_report = {
121
+ path: item.path,
122
+ status: item.status,
123
+ message: item.error
124
+ }
125
+ report[:paths][item.path] = item_report
126
+ end
127
+ report = JSON report
128
+
129
+ # Write report.
130
+ report_file = dir + REPORT_FILE_JSON
131
+ report_file.unlink if report_file.file?
132
+ report_file.open('w') { |f| f.write(report) }
133
+
134
+ write_settings dir
135
+
136
+ SiteDiff.log 'Report generated to ' + report_file.expand_path.to_s
137
+ end
138
+
139
+ ##
140
+ # Package report for export.
141
+ def package_report(dir)
142
+ # Create temporaryreport directories.
143
+ temp_path = dir + REPORT_BUILD_DIR
144
+ temp_path.rmtree if temp_path.directory?
145
+ temp_path.mkpath
146
+ report_path = temp_path + REPORT_DIR
147
+ report_path.mkpath
148
+ files_path = report_path + 'files'
149
+ files_path.mkpath
150
+ diffs_path = dir + DIFFS_DIR
151
+
152
+ # Move files to place.
153
+ FileUtils.move(dir + REPORT_FILE_HTML, report_path)
154
+ FileUtils.move(diffs_path, files_path) if diffs_path.directory?
155
+
156
+ # Make tar file.
157
+ Dir.chdir(temp_path) do
158
+ Minitar.pack(
159
+ REPORT_DIR,
160
+ Zlib::GzipWriter.new(File.open(REPORT_FILE_TAR, 'wb'))
161
+ )
162
+ end
163
+ FileUtils.move(temp_path + REPORT_FILE_TAR, dir)
164
+ temp_path.rmtree
165
+ SiteDiff.log 'Archived report generated to ' + dir.join(REPORT_FILE_TAR).to_s
166
+ end
167
+
168
+ ##
169
+ # Creates diff files in a directory named "diffs".
170
+ #
171
+ # If "dir" is /foo/bar, then diffs will be placed in /foo/bar/diffs.
172
+ #
173
+ # @param [Pathname] dir
174
+ # The directory in which a "diffs" directory is to be generated.
175
+ def write_diffs(dir)
176
+ raise Exception 'dir must be a Pathname' unless dir.is_a? Pathname
177
+
178
+ # Delete existing "diffs" dir, if exists.
179
+ diff_dir = dir + DIFFS_DIR
180
+ diff_dir.rmtree if diff_dir.exist?
181
+
182
+ # Write diffs to the diff directory.
183
+ @results.each { |r| r.dump(dir, @config.export) if r.status == Result::STATUS_FAILURE }
184
+ SiteDiff.log "All diff files written to #{diff_dir.expand_path}" unless @config.export
185
+ end
186
+
187
+ ##
188
+ # Writes paths with diffs into a file.
189
+ #
190
+ # @param [Pathname] dir
191
+ # The directory in which the report is to be generated.
192
+ def write_failures(dir)
193
+ raise Exception 'dir must be a Pathname' unless dir.is_a? Pathname
194
+
195
+ failures = dir + FAILURES_FILE
196
+ SiteDiff.log "All failures written to #{failures.expand_path}"
197
+ failures.open('w') do |f|
198
+ @results.each { |r| f.puts r.path unless r.success? }
199
+ end
200
+ end
201
+
202
+ ##
203
+ # Creates report settings.yaml file.
204
+ #
205
+ # TODO: Find a way to avoid having to create this file.
206
+ #
207
+ # @param [Pathname] dir
208
+ # The directory in which the report is to be generated.
209
+ def write_settings(dir, report_before = nil, report_after = nil)
210
+ raise Exception 'dir must be a Pathname' unless dir.is_a? Pathname
211
+
212
+ settings = {
213
+ 'before' => report_before,
214
+ 'after' => report_after,
215
+ 'cached' => %w[before after]
216
+ }
217
+ dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) }
218
+ end
219
+
220
+ ##
221
+ # Returns CSS for HTML report.
222
+ def self.css
223
+ output = ''
224
+ output += File.read(File.join(SiteDiff::FILES_DIR, 'normalize.css'))
225
+ output += File.read(File.join(SiteDiff::FILES_DIR, 'sitediff.css'))
226
+ output
227
+ end
228
+
229
+ ##
230
+ # Returns JS for HTML report.
231
+ def self.js
232
+ output = ''
233
+ output += File.read(File.join(SiteDiff::FILES_DIR, 'jquery.min.js'))
234
+ output += File.read(File.join(SiteDiff::FILES_DIR, 'sitediff.js'))
235
+ output
236
+ end
237
+ end
238
+ end
@@ -1,73 +1,110 @@
1
- require 'fileutils'
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff'
4
+ require 'sitediff/diff'
5
+ require 'sitediff/report'
2
6
  require 'digest/sha1'
7
+ require 'fileutils'
3
8
 
4
9
  class SiteDiff
5
- class Result < Struct.new(:path, :before, :after, :error)
10
+ # SiteDiff Result Object.
11
+ class Result < Struct.new(
12
+ :path,
13
+ :before,
14
+ :after,
15
+ :before_encoding,
16
+ :after_encoding,
17
+ :error,
18
+ :verbose
19
+ )
6
20
  STATUS_SUCCESS = 0 # Identical before and after
7
21
  STATUS_FAILURE = 1 # Different before and after
8
22
  STATUS_ERROR = 2 # Couldn't fetch page
9
- STATUS_TEXT = %w[success failure error]
23
+ STATUS_TEXT = %w[unchanged changed error].freeze
10
24
 
11
25
  attr_reader :status, :diff
12
26
 
27
+ ##
28
+ # Creates a Result.
13
29
  def initialize(*args)
14
30
  super
15
31
  if error
16
32
  @status = STATUS_ERROR
17
33
  else
18
- @diff = Diff::html_diffy(before, after)
34
+ if !before_encoding || !after_encoding
35
+ @diff = Diff.binary_diffy(
36
+ before,
37
+ after,
38
+ before_encoding,
39
+ after_encoding
40
+ )
41
+ else
42
+ @diff = Diff.html_diffy(before, after)
43
+ end
19
44
  @status = @diff ? STATUS_FAILURE : STATUS_SUCCESS
20
45
  end
21
46
  end
22
47
 
48
+ ##
49
+ # Whether the result has no diff.
50
+ #
51
+ # If there is a diff, it is not a success.
52
+ #
53
+ # TODO: Change "Success" to unchanged.
23
54
  def success?
24
55
  status == STATUS_SUCCESS
25
56
  end
26
57
 
58
+ ##
59
+ # Whether the result has an error.
60
+ def error?
61
+ status == STATUS_ERROR
62
+ end
63
+
27
64
  # Textual representation of the status
28
65
  def status_text
29
- return STATUS_TEXT[status]
66
+ STATUS_TEXT[status]
30
67
  end
31
68
 
32
69
  # Printable URL
33
- def url(prefix)
34
- prefix.to_s + path
70
+ def url(tag, prefix, cache)
71
+ base = cache.read_tags.include?(tag) ? "/cache/#{tag}" : prefix
72
+ base.to_s + path
35
73
  end
36
74
 
37
75
  # Filename to store diff
38
76
  def filename
39
- File.join(SiteDiff::DIFFS_DIR, Digest::SHA1.hexdigest(self.path) + '.html')
77
+ File.join(Report::DIFFS_DIR, Digest::SHA1.hexdigest(path) + '.html')
40
78
  end
41
79
 
42
- # Text of the link in the HTML report
43
- def link
44
- case status
45
- when STATUS_ERROR then error
46
- when STATUS_SUCCESS then status_text
47
- when STATUS_FAILURE then "<a href='#{filename}'>DIFF</a>"
48
- end
80
+ # Returns a URL to the result diff.
81
+ #
82
+ # Returns nil if the result has no diffs.
83
+ def diff_url(relative = false)
84
+ prefix = relative ? 'files/' : '/files/'
85
+ return prefix + filename if status == STATUS_FAILURE
49
86
  end
50
87
 
51
88
  # Log the result to the terminal
52
- def log
89
+ def log(verbose = true)
53
90
  case status
54
- when STATUS_SUCCESS then
55
- SiteDiff::log path, :success, 'SUCCESS'
56
- when STATUS_ERROR then
57
- SiteDiff::log path, :error, "ERROR (#{error})"
58
- when STATUS_FAILURE then
59
- SiteDiff::log path, :failure, "FAILURE"
60
- puts Diff::terminal_diffy(before, after)
91
+ when STATUS_SUCCESS
92
+ SiteDiff.log path, :success, 'UNCHANGED'
93
+ when STATUS_ERROR
94
+ SiteDiff.log path + " (#{error})", :warning, 'ERROR'
95
+ when STATUS_FAILURE
96
+ SiteDiff.log path, :error, 'CHANGED'
97
+ puts Diff.terminal_diffy(before, after) if verbose
61
98
  end
62
99
  end
63
100
 
64
101
  # Dump the result to a file
65
- def dump(dir)
102
+ def dump(dir, relative = false)
66
103
  dump_path = File.join(dir, filename)
67
104
  base = File.dirname(dump_path)
68
- FileUtils::mkdir_p(base) unless File.exists?(base)
105
+ FileUtils.mkdir_p(base) unless File.exist?(base)
69
106
  File.open(dump_path, 'w') do |f|
70
- f.write(Diff::generate_diff_output(self))
107
+ f.write(Diff.generate_diff_output(self, relative))
71
108
  end
72
109
  end
73
110
  end
@@ -1,104 +1,152 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff'
4
+ require 'sitediff/exception'
5
+ require 'sitediff/sanitize/dom_transform'
6
+ require 'sitediff/sanitize/regexp'
1
7
  require 'nokogiri'
2
8
  require 'set'
3
9
 
4
10
  class SiteDiff
5
- module Sanitize
6
- class InvalidSanitization < Exception; end
11
+ # SiteDiff Sanitizer.
12
+ class Sanitizer
13
+ class InvalidSanitization < SiteDiffException; end
7
14
 
8
15
  TOOLS = {
9
- :array => %w[dom_transform sanitization],
10
- :scalar => %w[selector remove_spacing],
11
- }
12
- DOM_TRANSFORMS = Set.new(%w[remove unwrap_root unwrap remove_class])
13
-
14
- module_function
15
-
16
- # Performs dom transformations.
17
- #
18
- # Currently supported transforms:
19
- #
20
- # * { :type => "unwrap_root" }
21
- # * { :type => "unwrap", :selector => "div.field-item" }
22
- # * { :type => "remove", :selector => "div.extra-stuff" }
23
- #
24
- # @arg node - Nokogiri document or Node
25
- # @arg rules - array of dom_transform rules
26
- # @return - transformed Nokogiri document node
27
- def perform_dom_transforms(node, rules)
28
- rules.each do |rule|
29
- type = rule['type'] or
30
- raise InvalidSanitization, "DOM transform needs a type"
31
- DOM_TRANSFORMS.include?(type) or
32
- raise InvalidSanitization, "No DOM transform named #{type}"
33
-
34
- meth = 'transform_' + type
35
-
36
- if sels = rule['selector']
37
- sels = [sels].flatten # Either array or scalar is fine
38
- # Call method for each node the selectors find
39
- sels.each do |sel|
40
- node.css(sel).each { |e| send(meth, rule, e) }
41
- end
42
- else
43
- send(meth, rule, node)
44
- end
45
- end
16
+ array: %w[dom_transform sanitization],
17
+ scalar: %w[selector remove_spacing ignore_whitespace]
18
+ }.freeze
19
+ DOM_TRANSFORMS = Set.new(%w[remove strip unwrap_root unwrap remove_class])
20
+
21
+ ##
22
+ # Creates a Sanitizer.
23
+ def initialize(html, config, opts = {})
24
+ @html = html
25
+ @config = config
26
+ @opts = opts
46
27
  end
47
28
 
48
- def transform_remove(rule, el)
49
- el.remove
50
- end
51
- def transform_unwrap(rule, el)
52
- el.add_next_sibling(el.children)
53
- el.remove
29
+ ##
30
+ # Performs sanitization.
31
+ def sanitize
32
+ return '' if @html == '' # Quick return on empty input
33
+
34
+ @node = Sanitizer.domify(@html)
35
+ @html = nil
36
+
37
+ remove_spacing
38
+ selector
39
+ dom_transforms
40
+ regexps
41
+
42
+ @html || Sanitizer.prettify(@node)
54
43
  end
55
- def transform_remove_class(rule, el)
56
- # Must call remove_class on a NodeSet!
57
- ns = Nokogiri::XML::NodeSet.new(el.document, [el])
58
- [rule['class']].flatten.each do |class_name|
59
- ns.remove_class(class_name)
44
+
45
+ # Return whether or not we want to keep a rule
46
+ def want_rule(rule)
47
+ return false unless rule
48
+ return false if rule['disabled']
49
+
50
+ # Filter out if path regexp doesn't match
51
+ if (pathre = rule['path']) && (path = @opts[:path])
52
+ return ::Regexp.new(pathre).match(path)
60
53
  end
61
- end
62
- def transform_unwrap_root(rule, node)
63
- node.children.size == 1 or
64
- raise InvalidSanitization, "Multiple root elements in unwrap_root"
65
- node.children = node.children[0].children
54
+
55
+ true
66
56
  end
67
57
 
68
- def parse(str, force_doc = false, log_errors = false)
69
- if force_doc || /<!DOCTYPE/.match(str[0, 512])
70
- doc = Nokogiri::HTML(str)
71
- doc
58
+ # Canonicalize a simple rule, eg: 'remove_spacing' or 'selector'.
59
+ # It may be a simple value, or a hash, or an array of hashes.
60
+ # Turn it into an array of hashes.
61
+ def canonicalize_rule(name)
62
+ (rules = @config[name]) || (return nil)
63
+
64
+ # Already an array? Do nothing.
65
+ if rules[0]&.respond_to?('each') && rules[0]&.fetch('value')
66
+ # If it is a hash, put it in an array.
67
+ elsif rules['value']
68
+ rules = [rules]
69
+ # If it is a scalar value, put it in an array.
72
70
  else
73
- doc = Nokogiri::HTML.fragment(str)
71
+ rules = [{ 'value' => rules }]
74
72
  end
75
- if log_errors
76
- doc.errors.each do |e|
77
- SiteDiff::log "Error in parsing HTML document: #{e}", :error
78
- end
73
+
74
+ want = rules.select { |r| want_rule(r) }
75
+ return nil if want.empty?
76
+ raise "Too many matching rules of type #{name}" if want.size > 1
77
+
78
+ want.first
79
+ end
80
+
81
+ # Perform 'remove_spacing' action
82
+ def remove_spacing
83
+ (rule = canonicalize_rule('remove_spacing')) || return
84
+ Sanitizer.remove_node_spacing(@node) if rule['value']
85
+ end
86
+
87
+ # Perform 'selector' action, to choose a new root
88
+ def selector
89
+ (rule = canonicalize_rule('selector')) || return
90
+ @node = Sanitizer.select_fragments(@node, rule['value'])
91
+ end
92
+
93
+ # Applies regexps. Also
94
+ def regexps
95
+ (rules = @config['sanitization']) || return
96
+ rules = rules.select { |r| want_rule(r) }
97
+
98
+ rules.map! { |r| Regexp.create(r) }
99
+ selector, global = rules.partition(&:selector?)
100
+
101
+ selector.each { |r| r.apply(@node) }
102
+ @html = Sanitizer.prettify(@node)
103
+ @node = nil
104
+ # Prevent potential UTF-8 encoding errors by removing bytes
105
+ # Not the only solution. An alternative is to return the
106
+ # string unmodified.
107
+ @html = @html.encode(
108
+ 'UTF-8',
109
+ 'binary',
110
+ invalid: :replace,
111
+ undef: :replace,
112
+ replace: ''
113
+ )
114
+ global.each { |r| r.apply(@html) }
115
+ end
116
+
117
+ # Perform DOM transforms
118
+ def dom_transforms
119
+ (rules = @config['dom_transform']) || return
120
+ rules = rules.select { |r| want_rule(r) }
121
+
122
+ rules.each do |rule|
123
+ transform = DomTransform.create(rule)
124
+ transform.apply(@node)
79
125
  end
80
- doc
81
126
  end
82
127
 
83
- # Force this object to be a document, so we can apply a stylesheet
84
- def to_document(obj)
85
- if Nokogiri::XML::Document === obj
86
- return obj
87
- elsif Nokogiri::XML::Node === obj # or fragment
88
- return parse(obj.to_s, true)
89
-
90
- # This ought to work, and would be faster,
91
- # but seems to segfault Nokogiri
92
- # doc = Nokogiri::HTML('<html><body>')
93
- # doc.at('body').children = obj.children
94
- # return doc
95
- else
96
- return to_document(parse(obj))
128
+ ##### Implementations of actions #####
129
+
130
+ # Remove double-spacing inside text nodes
131
+ def self.remove_node_spacing(node)
132
+ # remove double spacing, but only inside text nodes (eg not attributes)
133
+ node.xpath('//text()').each do |el|
134
+ el.content = el.content.gsub(/ +/, ' ')
97
135
  end
98
136
  end
99
137
 
100
- # Pretty-print the HTML
101
- def prettify(obj)
138
+ # Get a fragment consisting of the elements matching the selector(s)
139
+ def self.select_fragments(node, sel)
140
+ # When we choose a new root, we always become a DocumentFragment,
141
+ # and lose any DOCTYPE and such.
142
+ ns = node.css(sel)
143
+ node = Nokogiri::HTML.fragment('') unless node.fragment?
144
+ node.children = ns
145
+ node
146
+ end
147
+
148
+ # Pretty-print some HTML
149
+ def self.prettify(obj)
102
150
  @stylesheet ||= begin
103
151
  stylesheet_path = File.join(SiteDiff::FILES_DIR, 'pretty_print.xsl')
104
152
  Nokogiri::XSLT(File.read(stylesheet_path))
@@ -109,10 +157,22 @@ class SiteDiff
109
157
  # but that tends to segfault Nokogiri
110
158
  str = @stylesheet.apply_to(to_document(obj))
111
159
 
160
+ # There's a lot of cruft left over,that we don't want
161
+
162
+ # Prevent potential UTF-8 encoding errors by removing invalid bytes.
163
+ # Not the only solution.
164
+ # An alternative is to return the string unmodified.
165
+ str = str.encode(
166
+ 'UTF-8',
167
+ 'binary',
168
+ invalid: :replace,
169
+ undef: :replace,
170
+ replace: ''
171
+ )
112
172
  # Remove xml declaration and <html> tags
113
173
  str.sub!(/\A<\?xml.*$\n/, '')
114
174
  str.sub!(/\A^<html>$\n/, '')
115
- str.sub!(%r[</html>\n\Z], '')
175
+ str.sub!(%r{</html>\n\Z}, '')
116
176
 
117
177
  # Remove top-level indentation
118
178
  indent = /\A(\s*)/.match(str)[1].size
@@ -121,73 +181,32 @@ class SiteDiff
121
181
  # Remove blank lines
122
182
  str.gsub!(/^\s*$\n/, '')
123
183
 
124
- return str
125
- end
184
+ # Remove DOS newlines
185
+ str.gsub!(/\x0D$/, '')
186
+ str.gsub!(/&#13;$/, '')
126
187
 
127
- def remove_spacing(doc)
128
- # remove double spacing, but only inside text nodes (eg not attributes)
129
- doc.xpath('//text()').each do |node|
130
- node.content = node.content.gsub(/ +/, ' ')
131
- end
132
- end
133
-
134
- # Do one regexp transformation on a string
135
- def substitute(str, rule)
136
- #FIXME escape forward slashes, right now we are escaping them in YAML!
137
- str.gsub!(/#{rule['pattern']}/, rule['substitute'] || '' )
138
188
  str
139
189
  end
140
190
 
141
- # Do all regexp sanitization rules
142
- def perform_regexps(node, rules)
143
- rules ||= []
144
-
145
- # First do rules with a selector
146
- rules.each do |rule|
147
- if sel = rule['selector']
148
- node.css(sel).each do |e|
149
- e.replace(substitute(e.to_html, rule))
150
- end
151
- end
152
- end
153
-
154
- # If needed, do rules without a selector. We'd rather not convert to
155
- # a string unless necessary.
156
- global_rules = rules.reject { |r| r['selector'] }
157
- return node if global_rules.empty?
158
-
159
- str = node.to_html # Convert to string
160
- global_rules.each { |r| substitute(str, r) }
161
- return str
162
- end
163
-
164
- def select_root(node, sel)
165
- return node unless sel
166
-
167
- # When we choose a new root, we always become a DocumentFragment,
168
- # and lose any DOCTYPE and such.
169
- ns = node.css(sel)
170
- unless node.fragment?
171
- node = Nokogiri::HTML.fragment('')
191
+ # Parse HTML into a node
192
+ def self.domify(str, force_doc = false)
193
+ if force_doc || /<!DOCTYPE/.match(str[0, 512])
194
+ Nokogiri::HTML(str)
195
+ else
196
+ Nokogiri::HTML.fragment(str)
172
197
  end
173
- node.children = ns
174
- return node
175
198
  end
176
199
 
177
- def sanitize(str, config)
178
- return '' if str == ''
179
-
180
- node = parse(str)
181
-
182
- remove_spacing(node) if config['remove_spacing']
183
- node = select_root(node, config['selector'])
184
- if transform = config['dom_transform']
185
- perform_dom_transforms(node, transform)
200
+ # Force this object to be a document, so we can apply a stylesheet
201
+ def self.to_document(obj)
202
+ if Nokogiri::XML::Document == obj.class || Nokogiri::HTML::Document == obj.class
203
+ obj
204
+ # node or fragment
205
+ elsif Nokogiri::XML::Node == obj.class || Nokogiri::HTML::DocumentFragment == obj.class
206
+ domify(obj.to_s, true)
207
+ else
208
+ to_document(domify(obj, false))
186
209
  end
187
-
188
- obj = perform_regexps(node, config['sanitization'])
189
-
190
- return prettify(obj)
191
210
  end
192
211
  end
193
212
  end