sitediff 0.0.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,238 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fileutils'
4
+ require 'json'
5
+ require 'minitar'
6
+ require 'sitediff'
7
+ require 'sitediff/config'
8
+ require 'zlib'
9
+
10
+ class SiteDiff
11
+ ##
12
+ # SiteDiff Report Helper.
13
+ class Report
14
+ attr_reader :results, :cache
15
+
16
+ ##
17
+ # Directory where diffs will be generated.
18
+ DIFFS_DIR = 'diffs'
19
+
20
+ ##
21
+ # Name of file containing a list of pages with diffs.
22
+ FAILURES_FILE = 'failures.txt'
23
+
24
+ ##
25
+ # Name of file containing HTML report of diffs.
26
+ REPORT_FILE_HTML = 'report.html'
27
+
28
+ ##
29
+ # Name of file containing JSON report of diffs.
30
+ REPORT_FILE_JSON = 'report.json'
31
+
32
+ ##
33
+ # Name of file containing exported file archive.
34
+ REPORT_FILE_TAR = 'report.tgz'
35
+
36
+ ##
37
+ # Name of directory in which to build the portable report.
38
+ REPORT_BUILD_DIR = '_tmp_report'
39
+
40
+ ##
41
+ # Name of the portable report directory.
42
+ REPORT_DIR = 'report'
43
+
44
+ ##
45
+ # Path to settings used for report.
46
+ SETTINGS_FILE = 'settings.yaml'
47
+
48
+ ##
49
+ # Creates a Reporter object.
50
+ #
51
+ # @param [Config] config.
52
+ # @param [Cache] cache.
53
+ # @param [Array] results.
54
+ def initialize(config, cache, results)
55
+ @config = config
56
+ @cache = cache
57
+ @results = results
58
+ end
59
+
60
+ ##
61
+ # Generates an HTML report.
62
+ #
63
+ # @param [String] dir
64
+ # The directory in which the report is to be generated.
65
+ def generate_html(
66
+ dir,
67
+ report_before = nil,
68
+ report_after = nil
69
+ )
70
+ report_before ||= @config.before_url
71
+ report_after ||= @config.after_url
72
+
73
+ dir = SiteDiff.ensure_dir dir
74
+
75
+ write_diffs dir
76
+ write_failures dir
77
+
78
+ # Prepare report.
79
+ report = Diff.generate_html(
80
+ @results,
81
+ report_before,
82
+ report_after,
83
+ @cache,
84
+ @config.export
85
+ )
86
+
87
+ # Write report.
88
+ report_file = dir + REPORT_FILE_HTML
89
+ report_file.unlink if report_file.file?
90
+ report_file.open('w') { |f| f.write(report) }
91
+
92
+ write_settings dir, report_before, report_after
93
+
94
+ if @config.export
95
+ package_report(dir)
96
+ else
97
+ SiteDiff.log 'Report generated to ' + report_file.expand_path.to_s
98
+ end
99
+ end
100
+
101
+ ##
102
+ # Generates a JSON report.
103
+ #
104
+ # @param dir
105
+ # The directory in which the report is to be generated.
106
+ def generate_json(dir)
107
+ dir = SiteDiff.ensure_dir dir
108
+ write_diffs dir
109
+ write_failures dir
110
+
111
+ # Prepare report.
112
+ report = {
113
+ paths_compared: @results.length,
114
+ paths_diffs: 0,
115
+ paths: {}
116
+ }
117
+ @results.each do |item|
118
+ report[:paths_diffs] += 1 unless item.success?
119
+
120
+ item_report = {
121
+ path: item.path,
122
+ status: item.status,
123
+ message: item.error
124
+ }
125
+ report[:paths][item.path] = item_report
126
+ end
127
+ report = JSON report
128
+
129
+ # Write report.
130
+ report_file = dir + REPORT_FILE_JSON
131
+ report_file.unlink if report_file.file?
132
+ report_file.open('w') { |f| f.write(report) }
133
+
134
+ write_settings dir
135
+
136
+ SiteDiff.log 'Report generated to ' + report_file.expand_path.to_s
137
+ end
138
+
139
+ ##
140
+ # Package report for export.
141
+ def package_report(dir)
142
+ # Create temporaryreport directories.
143
+ temp_path = dir + REPORT_BUILD_DIR
144
+ temp_path.rmtree if temp_path.directory?
145
+ temp_path.mkpath
146
+ report_path = temp_path + REPORT_DIR
147
+ report_path.mkpath
148
+ files_path = report_path + 'files'
149
+ files_path.mkpath
150
+ diffs_path = dir + DIFFS_DIR
151
+
152
+ # Move files to place.
153
+ FileUtils.move(dir + REPORT_FILE_HTML, report_path)
154
+ FileUtils.move(diffs_path, files_path) if diffs_path.directory?
155
+
156
+ # Make tar file.
157
+ Dir.chdir(temp_path) do
158
+ Minitar.pack(
159
+ REPORT_DIR,
160
+ Zlib::GzipWriter.new(File.open(REPORT_FILE_TAR, 'wb'))
161
+ )
162
+ end
163
+ FileUtils.move(temp_path + REPORT_FILE_TAR, dir)
164
+ temp_path.rmtree
165
+ SiteDiff.log 'Archived report generated to ' + dir.join(REPORT_FILE_TAR).to_s
166
+ end
167
+
168
+ ##
169
+ # Creates diff files in a directory named "diffs".
170
+ #
171
+ # If "dir" is /foo/bar, then diffs will be placed in /foo/bar/diffs.
172
+ #
173
+ # @param [Pathname] dir
174
+ # The directory in which a "diffs" directory is to be generated.
175
+ def write_diffs(dir)
176
+ raise Exception 'dir must be a Pathname' unless dir.is_a? Pathname
177
+
178
+ # Delete existing "diffs" dir, if exists.
179
+ diff_dir = dir + DIFFS_DIR
180
+ diff_dir.rmtree if diff_dir.exist?
181
+
182
+ # Write diffs to the diff directory.
183
+ @results.each { |r| r.dump(dir, @config.export) if r.status == Result::STATUS_FAILURE }
184
+ SiteDiff.log "All diff files written to #{diff_dir.expand_path}" unless @config.export
185
+ end
186
+
187
+ ##
188
+ # Writes paths with diffs into a file.
189
+ #
190
+ # @param [Pathname] dir
191
+ # The directory in which the report is to be generated.
192
+ def write_failures(dir)
193
+ raise Exception 'dir must be a Pathname' unless dir.is_a? Pathname
194
+
195
+ failures = dir + FAILURES_FILE
196
+ SiteDiff.log "All failures written to #{failures.expand_path}"
197
+ failures.open('w') do |f|
198
+ @results.each { |r| f.puts r.path unless r.success? }
199
+ end
200
+ end
201
+
202
+ ##
203
+ # Creates report settings.yaml file.
204
+ #
205
+ # TODO: Find a way to avoid having to create this file.
206
+ #
207
+ # @param [Pathname] dir
208
+ # The directory in which the report is to be generated.
209
+ def write_settings(dir, report_before = nil, report_after = nil)
210
+ raise Exception 'dir must be a Pathname' unless dir.is_a? Pathname
211
+
212
+ settings = {
213
+ 'before' => report_before,
214
+ 'after' => report_after,
215
+ 'cached' => %w[before after]
216
+ }
217
+ dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) }
218
+ end
219
+
220
+ ##
221
+ # Returns CSS for HTML report.
222
+ def self.css
223
+ output = ''
224
+ output += File.read(File.join(SiteDiff::FILES_DIR, 'normalize.css'))
225
+ output += File.read(File.join(SiteDiff::FILES_DIR, 'sitediff.css'))
226
+ output
227
+ end
228
+
229
+ ##
230
+ # Returns JS for HTML report.
231
+ def self.js
232
+ output = ''
233
+ output += File.read(File.join(SiteDiff::FILES_DIR, 'jquery.min.js'))
234
+ output += File.read(File.join(SiteDiff::FILES_DIR, 'sitediff.js'))
235
+ output
236
+ end
237
+ end
238
+ end
@@ -1,73 +1,110 @@
1
- require 'fileutils'
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff'
4
+ require 'sitediff/diff'
5
+ require 'sitediff/report'
2
6
  require 'digest/sha1'
7
+ require 'fileutils'
3
8
 
4
9
  class SiteDiff
5
- class Result < Struct.new(:path, :before, :after, :error)
10
+ # SiteDiff Result Object.
11
+ class Result < Struct.new(
12
+ :path,
13
+ :before,
14
+ :after,
15
+ :before_encoding,
16
+ :after_encoding,
17
+ :error,
18
+ :verbose
19
+ )
6
20
  STATUS_SUCCESS = 0 # Identical before and after
7
21
  STATUS_FAILURE = 1 # Different before and after
8
22
  STATUS_ERROR = 2 # Couldn't fetch page
9
- STATUS_TEXT = %w[success failure error]
23
+ STATUS_TEXT = %w[unchanged changed error].freeze
10
24
 
11
25
  attr_reader :status, :diff
12
26
 
27
+ ##
28
+ # Creates a Result.
13
29
  def initialize(*args)
14
30
  super
15
31
  if error
16
32
  @status = STATUS_ERROR
17
33
  else
18
- @diff = Diff::html_diffy(before, after)
34
+ if !before_encoding || !after_encoding
35
+ @diff = Diff.binary_diffy(
36
+ before,
37
+ after,
38
+ before_encoding,
39
+ after_encoding
40
+ )
41
+ else
42
+ @diff = Diff.html_diffy(before, after)
43
+ end
19
44
  @status = @diff ? STATUS_FAILURE : STATUS_SUCCESS
20
45
  end
21
46
  end
22
47
 
48
+ ##
49
+ # Whether the result has no diff.
50
+ #
51
+ # If there is a diff, it is not a success.
52
+ #
53
+ # TODO: Change "Success" to unchanged.
23
54
  def success?
24
55
  status == STATUS_SUCCESS
25
56
  end
26
57
 
58
+ ##
59
+ # Whether the result has an error.
60
+ def error?
61
+ status == STATUS_ERROR
62
+ end
63
+
27
64
  # Textual representation of the status
28
65
  def status_text
29
- return STATUS_TEXT[status]
66
+ STATUS_TEXT[status]
30
67
  end
31
68
 
32
69
  # Printable URL
33
- def url(prefix)
34
- prefix.to_s + path
70
+ def url(tag, prefix, cache)
71
+ base = cache.read_tags.include?(tag) ? "/cache/#{tag}" : prefix
72
+ base.to_s + path
35
73
  end
36
74
 
37
75
  # Filename to store diff
38
76
  def filename
39
- File.join(SiteDiff::DIFFS_DIR, Digest::SHA1.hexdigest(self.path) + '.html')
77
+ File.join(Report::DIFFS_DIR, Digest::SHA1.hexdigest(path) + '.html')
40
78
  end
41
79
 
42
- # Text of the link in the HTML report
43
- def link
44
- case status
45
- when STATUS_ERROR then error
46
- when STATUS_SUCCESS then status_text
47
- when STATUS_FAILURE then "<a href='#{filename}'>DIFF</a>"
48
- end
80
+ # Returns a URL to the result diff.
81
+ #
82
+ # Returns nil if the result has no diffs.
83
+ def diff_url(relative = false)
84
+ prefix = relative ? 'files/' : '/files/'
85
+ return prefix + filename if status == STATUS_FAILURE
49
86
  end
50
87
 
51
88
  # Log the result to the terminal
52
- def log
89
+ def log(verbose = true)
53
90
  case status
54
- when STATUS_SUCCESS then
55
- SiteDiff::log path, :success, 'SUCCESS'
56
- when STATUS_ERROR then
57
- SiteDiff::log path, :error, "ERROR (#{error})"
58
- when STATUS_FAILURE then
59
- SiteDiff::log path, :failure, "FAILURE"
60
- puts Diff::terminal_diffy(before, after)
91
+ when STATUS_SUCCESS
92
+ SiteDiff.log path, :success, 'UNCHANGED'
93
+ when STATUS_ERROR
94
+ SiteDiff.log path + " (#{error})", :warning, 'ERROR'
95
+ when STATUS_FAILURE
96
+ SiteDiff.log path, :error, 'CHANGED'
97
+ puts Diff.terminal_diffy(before, after) if verbose
61
98
  end
62
99
  end
63
100
 
64
101
  # Dump the result to a file
65
- def dump(dir)
102
+ def dump(dir, relative = false)
66
103
  dump_path = File.join(dir, filename)
67
104
  base = File.dirname(dump_path)
68
- FileUtils::mkdir_p(base) unless File.exists?(base)
105
+ FileUtils.mkdir_p(base) unless File.exist?(base)
69
106
  File.open(dump_path, 'w') do |f|
70
- f.write(Diff::generate_diff_output(self))
107
+ f.write(Diff.generate_diff_output(self, relative))
71
108
  end
72
109
  end
73
110
  end
@@ -1,104 +1,152 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sitediff'
4
+ require 'sitediff/exception'
5
+ require 'sitediff/sanitize/dom_transform'
6
+ require 'sitediff/sanitize/regexp'
1
7
  require 'nokogiri'
2
8
  require 'set'
3
9
 
4
10
  class SiteDiff
5
- module Sanitize
6
- class InvalidSanitization < Exception; end
11
+ # SiteDiff Sanitizer.
12
+ class Sanitizer
13
+ class InvalidSanitization < SiteDiffException; end
7
14
 
8
15
  TOOLS = {
9
- :array => %w[dom_transform sanitization],
10
- :scalar => %w[selector remove_spacing],
11
- }
12
- DOM_TRANSFORMS = Set.new(%w[remove unwrap_root unwrap remove_class])
13
-
14
- module_function
15
-
16
- # Performs dom transformations.
17
- #
18
- # Currently supported transforms:
19
- #
20
- # * { :type => "unwrap_root" }
21
- # * { :type => "unwrap", :selector => "div.field-item" }
22
- # * { :type => "remove", :selector => "div.extra-stuff" }
23
- #
24
- # @arg node - Nokogiri document or Node
25
- # @arg rules - array of dom_transform rules
26
- # @return - transformed Nokogiri document node
27
- def perform_dom_transforms(node, rules)
28
- rules.each do |rule|
29
- type = rule['type'] or
30
- raise InvalidSanitization, "DOM transform needs a type"
31
- DOM_TRANSFORMS.include?(type) or
32
- raise InvalidSanitization, "No DOM transform named #{type}"
33
-
34
- meth = 'transform_' + type
35
-
36
- if sels = rule['selector']
37
- sels = [sels].flatten # Either array or scalar is fine
38
- # Call method for each node the selectors find
39
- sels.each do |sel|
40
- node.css(sel).each { |e| send(meth, rule, e) }
41
- end
42
- else
43
- send(meth, rule, node)
44
- end
45
- end
16
+ array: %w[dom_transform sanitization],
17
+ scalar: %w[selector remove_spacing ignore_whitespace]
18
+ }.freeze
19
+ DOM_TRANSFORMS = Set.new(%w[remove strip unwrap_root unwrap remove_class])
20
+
21
+ ##
22
+ # Creates a Sanitizer.
23
+ def initialize(html, config, opts = {})
24
+ @html = html
25
+ @config = config
26
+ @opts = opts
46
27
  end
47
28
 
48
- def transform_remove(rule, el)
49
- el.remove
50
- end
51
- def transform_unwrap(rule, el)
52
- el.add_next_sibling(el.children)
53
- el.remove
29
+ ##
30
+ # Performs sanitization.
31
+ def sanitize
32
+ return '' if @html == '' # Quick return on empty input
33
+
34
+ @node = Sanitizer.domify(@html)
35
+ @html = nil
36
+
37
+ remove_spacing
38
+ selector
39
+ dom_transforms
40
+ regexps
41
+
42
+ @html || Sanitizer.prettify(@node)
54
43
  end
55
- def transform_remove_class(rule, el)
56
- # Must call remove_class on a NodeSet!
57
- ns = Nokogiri::XML::NodeSet.new(el.document, [el])
58
- [rule['class']].flatten.each do |class_name|
59
- ns.remove_class(class_name)
44
+
45
+ # Return whether or not we want to keep a rule
46
+ def want_rule(rule)
47
+ return false unless rule
48
+ return false if rule['disabled']
49
+
50
+ # Filter out if path regexp doesn't match
51
+ if (pathre = rule['path']) && (path = @opts[:path])
52
+ return ::Regexp.new(pathre).match(path)
60
53
  end
61
- end
62
- def transform_unwrap_root(rule, node)
63
- node.children.size == 1 or
64
- raise InvalidSanitization, "Multiple root elements in unwrap_root"
65
- node.children = node.children[0].children
54
+
55
+ true
66
56
  end
67
57
 
68
- def parse(str, force_doc = false, log_errors = false)
69
- if force_doc || /<!DOCTYPE/.match(str[0, 512])
70
- doc = Nokogiri::HTML(str)
71
- doc
58
+ # Canonicalize a simple rule, eg: 'remove_spacing' or 'selector'.
59
+ # It may be a simple value, or a hash, or an array of hashes.
60
+ # Turn it into an array of hashes.
61
+ def canonicalize_rule(name)
62
+ (rules = @config[name]) || (return nil)
63
+
64
+ # Already an array? Do nothing.
65
+ if rules[0]&.respond_to?('each') && rules[0]&.fetch('value')
66
+ # If it is a hash, put it in an array.
67
+ elsif rules['value']
68
+ rules = [rules]
69
+ # If it is a scalar value, put it in an array.
72
70
  else
73
- doc = Nokogiri::HTML.fragment(str)
71
+ rules = [{ 'value' => rules }]
74
72
  end
75
- if log_errors
76
- doc.errors.each do |e|
77
- SiteDiff::log "Error in parsing HTML document: #{e}", :error
78
- end
73
+
74
+ want = rules.select { |r| want_rule(r) }
75
+ return nil if want.empty?
76
+ raise "Too many matching rules of type #{name}" if want.size > 1
77
+
78
+ want.first
79
+ end
80
+
81
+ # Perform 'remove_spacing' action
82
+ def remove_spacing
83
+ (rule = canonicalize_rule('remove_spacing')) || return
84
+ Sanitizer.remove_node_spacing(@node) if rule['value']
85
+ end
86
+
87
+ # Perform 'selector' action, to choose a new root
88
+ def selector
89
+ (rule = canonicalize_rule('selector')) || return
90
+ @node = Sanitizer.select_fragments(@node, rule['value'])
91
+ end
92
+
93
+ # Applies regexps. Also
94
+ def regexps
95
+ (rules = @config['sanitization']) || return
96
+ rules = rules.select { |r| want_rule(r) }
97
+
98
+ rules.map! { |r| Regexp.create(r) }
99
+ selector, global = rules.partition(&:selector?)
100
+
101
+ selector.each { |r| r.apply(@node) }
102
+ @html = Sanitizer.prettify(@node)
103
+ @node = nil
104
+ # Prevent potential UTF-8 encoding errors by removing bytes
105
+ # Not the only solution. An alternative is to return the
106
+ # string unmodified.
107
+ @html = @html.encode(
108
+ 'UTF-8',
109
+ 'binary',
110
+ invalid: :replace,
111
+ undef: :replace,
112
+ replace: ''
113
+ )
114
+ global.each { |r| r.apply(@html) }
115
+ end
116
+
117
+ # Perform DOM transforms
118
+ def dom_transforms
119
+ (rules = @config['dom_transform']) || return
120
+ rules = rules.select { |r| want_rule(r) }
121
+
122
+ rules.each do |rule|
123
+ transform = DomTransform.create(rule)
124
+ transform.apply(@node)
79
125
  end
80
- doc
81
126
  end
82
127
 
83
- # Force this object to be a document, so we can apply a stylesheet
84
- def to_document(obj)
85
- if Nokogiri::XML::Document === obj
86
- return obj
87
- elsif Nokogiri::XML::Node === obj # or fragment
88
- return parse(obj.to_s, true)
89
-
90
- # This ought to work, and would be faster,
91
- # but seems to segfault Nokogiri
92
- # doc = Nokogiri::HTML('<html><body>')
93
- # doc.at('body').children = obj.children
94
- # return doc
95
- else
96
- return to_document(parse(obj))
128
+ ##### Implementations of actions #####
129
+
130
+ # Remove double-spacing inside text nodes
131
+ def self.remove_node_spacing(node)
132
+ # remove double spacing, but only inside text nodes (eg not attributes)
133
+ node.xpath('//text()').each do |el|
134
+ el.content = el.content.gsub(/ +/, ' ')
97
135
  end
98
136
  end
99
137
 
100
- # Pretty-print the HTML
101
- def prettify(obj)
138
+ # Get a fragment consisting of the elements matching the selector(s)
139
+ def self.select_fragments(node, sel)
140
+ # When we choose a new root, we always become a DocumentFragment,
141
+ # and lose any DOCTYPE and such.
142
+ ns = node.css(sel)
143
+ node = Nokogiri::HTML.fragment('') unless node.fragment?
144
+ node.children = ns
145
+ node
146
+ end
147
+
148
+ # Pretty-print some HTML
149
+ def self.prettify(obj)
102
150
  @stylesheet ||= begin
103
151
  stylesheet_path = File.join(SiteDiff::FILES_DIR, 'pretty_print.xsl')
104
152
  Nokogiri::XSLT(File.read(stylesheet_path))
@@ -109,10 +157,22 @@ class SiteDiff
109
157
  # but that tends to segfault Nokogiri
110
158
  str = @stylesheet.apply_to(to_document(obj))
111
159
 
160
+ # There's a lot of cruft left over,that we don't want
161
+
162
+ # Prevent potential UTF-8 encoding errors by removing invalid bytes.
163
+ # Not the only solution.
164
+ # An alternative is to return the string unmodified.
165
+ str = str.encode(
166
+ 'UTF-8',
167
+ 'binary',
168
+ invalid: :replace,
169
+ undef: :replace,
170
+ replace: ''
171
+ )
112
172
  # Remove xml declaration and <html> tags
113
173
  str.sub!(/\A<\?xml.*$\n/, '')
114
174
  str.sub!(/\A^<html>$\n/, '')
115
- str.sub!(%r[</html>\n\Z], '')
175
+ str.sub!(%r{</html>\n\Z}, '')
116
176
 
117
177
  # Remove top-level indentation
118
178
  indent = /\A(\s*)/.match(str)[1].size
@@ -121,73 +181,32 @@ class SiteDiff
121
181
  # Remove blank lines
122
182
  str.gsub!(/^\s*$\n/, '')
123
183
 
124
- return str
125
- end
184
+ # Remove DOS newlines
185
+ str.gsub!(/\x0D$/, '')
186
+ str.gsub!(/&#13;$/, '')
126
187
 
127
- def remove_spacing(doc)
128
- # remove double spacing, but only inside text nodes (eg not attributes)
129
- doc.xpath('//text()').each do |node|
130
- node.content = node.content.gsub(/ +/, ' ')
131
- end
132
- end
133
-
134
- # Do one regexp transformation on a string
135
- def substitute(str, rule)
136
- #FIXME escape forward slashes, right now we are escaping them in YAML!
137
- str.gsub!(/#{rule['pattern']}/, rule['substitute'] || '' )
138
188
  str
139
189
  end
140
190
 
141
- # Do all regexp sanitization rules
142
- def perform_regexps(node, rules)
143
- rules ||= []
144
-
145
- # First do rules with a selector
146
- rules.each do |rule|
147
- if sel = rule['selector']
148
- node.css(sel).each do |e|
149
- e.replace(substitute(e.to_html, rule))
150
- end
151
- end
152
- end
153
-
154
- # If needed, do rules without a selector. We'd rather not convert to
155
- # a string unless necessary.
156
- global_rules = rules.reject { |r| r['selector'] }
157
- return node if global_rules.empty?
158
-
159
- str = node.to_html # Convert to string
160
- global_rules.each { |r| substitute(str, r) }
161
- return str
162
- end
163
-
164
- def select_root(node, sel)
165
- return node unless sel
166
-
167
- # When we choose a new root, we always become a DocumentFragment,
168
- # and lose any DOCTYPE and such.
169
- ns = node.css(sel)
170
- unless node.fragment?
171
- node = Nokogiri::HTML.fragment('')
191
+ # Parse HTML into a node
192
+ def self.domify(str, force_doc = false)
193
+ if force_doc || /<!DOCTYPE/.match(str[0, 512])
194
+ Nokogiri::HTML(str)
195
+ else
196
+ Nokogiri::HTML.fragment(str)
172
197
  end
173
- node.children = ns
174
- return node
175
198
  end
176
199
 
177
- def sanitize(str, config)
178
- return '' if str == ''
179
-
180
- node = parse(str)
181
-
182
- remove_spacing(node) if config['remove_spacing']
183
- node = select_root(node, config['selector'])
184
- if transform = config['dom_transform']
185
- perform_dom_transforms(node, transform)
200
+ # Force this object to be a document, so we can apply a stylesheet
201
+ def self.to_document(obj)
202
+ if Nokogiri::XML::Document == obj.class || Nokogiri::HTML::Document == obj.class
203
+ obj
204
+ # node or fragment
205
+ elsif Nokogiri::XML::Node == obj.class || Nokogiri::HTML::DocumentFragment == obj.class
206
+ domify(obj.to_s, true)
207
+ else
208
+ to_document(domify(obj, false))
186
209
  end
187
-
188
- obj = perform_regexps(node, config['sanitization'])
189
-
190
- return prettify(obj)
191
210
  end
192
211
  end
193
212
  end