rails-deprecated_sanitizer 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9570967c2f4f3fb0464be3a5de1ee7ccae1bd09a
4
+ data.tar.gz: bbb6628bc2532ef7cf95038b7d8a986845eceb3f
5
+ SHA512:
6
+ metadata.gz: 642ec60f5421e3cbc81ea35d0ddbd96fae302aef12655e1a07a66ffc4b625a01d604aecc09f535232cc1222a44b3ab20b5af52cec0dde353dbdf0537c63acfbb
7
+ data.tar.gz: d1b3465d6908c599fde76a3d58185f132937ca44ebc60912b29abeb0789a8816a447b8498ce24e9374bbfab18ff7d005ed745e655c7731f8e2f51801a3821a76
@@ -0,0 +1,3 @@
1
+ ## 1.0.0
2
+
3
+ * First release
@@ -0,0 +1,16 @@
1
+ # Rails::Deprecated::Sanitizer
2
+
3
+ In Rails 4.2 the sanitization implementation uses Loofah by default.
4
+ Previously html-scanner was used for this.
5
+ This gem includes that old behavior for easier migration and it will be supported until Rails 5.
6
+
7
+ If you need this behavior, add the gem to an applications gemfile, run `bundle` and the deprecated behavior is installed.
8
+
9
+ gem 'rails-deprecated_sanitizer'
10
+
11
+ You can read more about the new behavior here: [rails-html-sanitizer](https://github.com/rails/rails-html-sanitizer).
12
+
13
+ # Reporting XSS Security Issues
14
+
15
+ The code provided here deals with XSS attacks and is therefore a security concern.
16
+ So if you find a security issue please follow the [regular security reporting guidelines](http://rubyonrails.org/security/).
@@ -0,0 +1 @@
1
+ require 'rails/deprecated_sanitizer'
@@ -0,0 +1,152 @@
1
+ require "rails/deprecated_sanitizer/version"
2
+ require "rails/deprecated_sanitizer/html-scanner"
3
+
4
+ module Rails
5
+ module DeprecatedSanitizer
6
+ extend self
7
+
8
+ def full_sanitizer
9
+ HTML::FullSanitizer
10
+ end
11
+
12
+ def link_sanitizer
13
+ HTML::LinkSanitizer
14
+ end
15
+
16
+ def white_list_sanitizer
17
+ HTML::WhiteListSanitizer
18
+ end
19
+ end
20
+ end
21
+
22
+ module ActionView
23
+ module Helpers
24
+ module SanitizeHelper
25
+ extend self
26
+
27
+ def sanitizer_vendor
28
+ Rails::DeprecatedSanitizer
29
+ end
30
+
31
+ def sanitized_protocol_separator
32
+ white_list_sanitizer.protocol_separator
33
+ end
34
+
35
+ def sanitized_uri_attributes
36
+ white_list_sanitizer.uri_attributes
37
+ end
38
+
39
+ def sanitized_bad_tags
40
+ white_list_sanitizer.bad_tags
41
+ end
42
+
43
+ def sanitized_allowed_tags
44
+ white_list_sanitizer.allowed_tags
45
+ end
46
+
47
+ def sanitized_allowed_attributes
48
+ white_list_sanitizer.allowed_attributes
49
+ end
50
+
51
+ def sanitized_allowed_css_properties
52
+ white_list_sanitizer.allowed_css_properties
53
+ end
54
+
55
+ def sanitized_allowed_css_keywords
56
+ white_list_sanitizer.allowed_css_keywords
57
+ end
58
+
59
+ def sanitized_shorthand_css_properties
60
+ white_list_sanitizer.shorthand_css_properties
61
+ end
62
+
63
+ def sanitized_allowed_protocols
64
+ white_list_sanitizer.allowed_protocols
65
+ end
66
+
67
+ def sanitized_protocol_separator=(value)
68
+ white_list_sanitizer.protocol_separator = value
69
+ end
70
+
71
+ # Adds valid HTML attributes that the +sanitize+ helper checks for URIs.
72
+ #
73
+ # class Application < Rails::Application
74
+ # config.action_view.sanitized_uri_attributes = 'lowsrc', 'target'
75
+ # end
76
+ #
77
+ def sanitized_uri_attributes=(attributes)
78
+ HTML::WhiteListSanitizer.uri_attributes.merge(attributes)
79
+ end
80
+
81
+ # Adds to the Set of 'bad' tags for the +sanitize+ helper.
82
+ #
83
+ # class Application < Rails::Application
84
+ # config.action_view.sanitized_bad_tags = 'embed', 'object'
85
+ # end
86
+ #
87
+ def sanitized_bad_tags=(attributes)
88
+ HTML::WhiteListSanitizer.bad_tags.merge(attributes)
89
+ end
90
+
91
+ # Adds to the Set of allowed tags for the +sanitize+ helper.
92
+ #
93
+ # class Application < Rails::Application
94
+ # config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td'
95
+ # end
96
+ #
97
+ def sanitized_allowed_tags=(attributes)
98
+ HTML::WhiteListSanitizer.allowed_tags.merge(attributes)
99
+ end
100
+
101
+ # Adds to the Set of allowed HTML attributes for the +sanitize+ helper.
102
+ #
103
+ # class Application < Rails::Application
104
+ # config.action_view.sanitized_allowed_attributes = ['onclick', 'longdesc']
105
+ # end
106
+ #
107
+ def sanitized_allowed_attributes=(attributes)
108
+ HTML::WhiteListSanitizer.allowed_attributes.merge(attributes)
109
+ end
110
+
111
+ # Adds to the Set of allowed CSS properties for the #sanitize and +sanitize_css+ helpers.
112
+ #
113
+ # class Application < Rails::Application
114
+ # config.action_view.sanitized_allowed_css_properties = 'expression'
115
+ # end
116
+ #
117
+ def sanitized_allowed_css_properties=(attributes)
118
+ HTML::WhiteListSanitizer.allowed_css_properties.merge(attributes)
119
+ end
120
+
121
+ # Adds to the Set of allowed CSS keywords for the +sanitize+ and +sanitize_css+ helpers.
122
+ #
123
+ # class Application < Rails::Application
124
+ # config.action_view.sanitized_allowed_css_keywords = 'expression'
125
+ # end
126
+ #
127
+ def sanitized_allowed_css_keywords=(attributes)
128
+ HTML::WhiteListSanitizer.allowed_css_keywords.merge(attributes)
129
+ end
130
+
131
+ # Adds to the Set of allowed shorthand CSS properties for the +sanitize+ and +sanitize_css+ helpers.
132
+ #
133
+ # class Application < Rails::Application
134
+ # config.action_view.sanitized_shorthand_css_properties = 'expression'
135
+ # end
136
+ #
137
+ def sanitized_shorthand_css_properties=(attributes)
138
+ HTML::WhiteListSanitizer.shorthand_css_properties.merge(attributes)
139
+ end
140
+
141
+ # Adds to the Set of allowed protocols for the +sanitize+ helper.
142
+ #
143
+ # class Application < Rails::Application
144
+ # config.action_view.sanitized_allowed_protocols = 'ssh', 'feed'
145
+ # end
146
+ #
147
+ def sanitized_allowed_protocols=(attributes)
148
+ HTML::WhiteListSanitizer.allowed_protocols.merge(attributes)
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,20 @@
1
+ $LOAD_PATH.unshift "#{File.dirname(__FILE__)}/html-scanner"
2
+
3
+ module HTML
4
+ extend ActiveSupport::Autoload
5
+
6
+ eager_autoload do
7
+ autoload :CDATA, 'html/node'
8
+ autoload :Document, 'html/document'
9
+ autoload :FullSanitizer, 'html/sanitizer'
10
+ autoload :LinkSanitizer, 'html/sanitizer'
11
+ autoload :Node, 'html/node'
12
+ autoload :Sanitizer, 'html/sanitizer'
13
+ autoload :Selector, 'html/selector'
14
+ autoload :Tag, 'html/node'
15
+ autoload :Text, 'html/node'
16
+ autoload :Tokenizer, 'html/tokenizer'
17
+ autoload :Version, 'html/version'
18
+ autoload :WhiteListSanitizer, 'html/sanitizer'
19
+ end
20
+ end
@@ -0,0 +1,68 @@
1
+ require 'html/tokenizer'
2
+ require 'html/node'
3
+ require 'html/selector'
4
+ require 'html/sanitizer'
5
+
6
+ module HTML #:nodoc:
7
+ # A top-level HTML document. You give it a body of text, and it will parse that
8
+ # text into a tree of nodes.
9
+ class Document #:nodoc:
10
+
11
+ # The root of the parsed document.
12
+ attr_reader :root
13
+
14
+ # Create a new Document from the given text.
15
+ def initialize(text, strict=false, xml=false)
16
+ tokenizer = Tokenizer.new(text)
17
+ @root = Node.new(nil)
18
+ node_stack = [ @root ]
19
+ while token = tokenizer.next
20
+ node = Node.parse(node_stack.last, tokenizer.line, tokenizer.position, token, strict)
21
+
22
+ node_stack.last.children << node unless node.tag? && node.closing == :close
23
+ if node.tag?
24
+ if node_stack.length > 1 && node.closing == :close
25
+ if node_stack.last.name == node.name
26
+ if node_stack.last.children.empty?
27
+ node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "")
28
+ end
29
+ node_stack.pop
30
+ else
31
+ open_start = node_stack.last.position - 20
32
+ open_start = 0 if open_start < 0
33
+ close_start = node.position - 20
34
+ close_start = 0 if close_start < 0
35
+ msg = <<EOF.strip
36
+ ignoring attempt to close #{node_stack.last.name} with #{node.name}
37
+ opened at byte #{node_stack.last.position}, line #{node_stack.last.line}
38
+ closed at byte #{node.position}, line #{node.line}
39
+ attributes at open: #{node_stack.last.attributes.inspect}
40
+ text around open: #{text[open_start,40].inspect}
41
+ text around close: #{text[close_start,40].inspect}
42
+ EOF
43
+ strict ? raise(msg) : warn(msg)
44
+ end
45
+ elsif !node.childless?(xml) && node.closing != :close
46
+ node_stack.push node
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ # Search the tree for (and return) the first node that matches the given
53
+ # conditions. The conditions are interpreted differently for different node
54
+ # types, see HTML::Text#find and HTML::Tag#find.
55
+ def find(conditions)
56
+ @root.find(conditions)
57
+ end
58
+
59
+ # Search the tree for (and return) all nodes that match the given
60
+ # conditions. The conditions are interpreted differently for different node
61
+ # types, see HTML::Text#find and HTML::Tag#find.
62
+ def find_all(conditions)
63
+ @root.find_all(conditions)
64
+ end
65
+
66
+ end
67
+
68
+ end
@@ -0,0 +1,532 @@
1
+ require 'strscan'
2
+
3
+ module HTML #:nodoc:
4
+
5
+ class Conditions < Hash #:nodoc:
6
+ def initialize(hash)
7
+ super()
8
+ hash = { :content => hash } unless Hash === hash
9
+ hash = keys_to_symbols(hash)
10
+ hash.each do |k,v|
11
+ case k
12
+ when :tag, :content then
13
+ # keys are valid, and require no further processing
14
+ when :attributes then
15
+ hash[k] = keys_to_strings(v)
16
+ when :parent, :child, :ancestor, :descendant, :sibling, :before,
17
+ :after
18
+ hash[k] = Conditions.new(v)
19
+ when :children
20
+ hash[k] = v = keys_to_symbols(v)
21
+ v.each do |key,value|
22
+ case key
23
+ when :count, :greater_than, :less_than
24
+ # keys are valid, and require no further processing
25
+ when :only
26
+ v[key] = Conditions.new(value)
27
+ else
28
+ raise "illegal key #{key.inspect} => #{value.inspect}"
29
+ end
30
+ end
31
+ else
32
+ raise "illegal key #{k.inspect} => #{v.inspect}"
33
+ end
34
+ end
35
+ update hash
36
+ end
37
+
38
+ private
39
+
40
+ def keys_to_strings(hash)
41
+ Hash[hash.keys.map {|k| [k.to_s, hash[k]]}]
42
+ end
43
+
44
+ def keys_to_symbols(hash)
45
+ Hash[hash.keys.map do |k|
46
+ raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym)
47
+ [k.to_sym, hash[k]]
48
+ end]
49
+ end
50
+ end
51
+
52
+ # The base class of all nodes, textual and otherwise, in an HTML document.
53
+ class Node #:nodoc:
54
+ # The array of children of this node. Not all nodes have children.
55
+ attr_reader :children
56
+
57
+ # The parent node of this node. All nodes have a parent, except for the
58
+ # root node.
59
+ attr_reader :parent
60
+
61
+ # The line number of the input where this node was begun
62
+ attr_reader :line
63
+
64
+ # The byte position in the input where this node was begun
65
+ attr_reader :position
66
+
67
+ # Create a new node as a child of the given parent.
68
+ def initialize(parent, line=0, pos=0)
69
+ @parent = parent
70
+ @children = []
71
+ @line, @position = line, pos
72
+ end
73
+
74
+ # Returns a textual representation of the node.
75
+ def to_s
76
+ @children.join()
77
+ end
78
+
79
+ # Returns false (subclasses must override this to provide specific matching
80
+ # behavior.) +conditions+ may be of any type.
81
+ def match(conditions)
82
+ false
83
+ end
84
+
85
+ # Search the children of this node for the first node for which #find
86
+ # returns non +nil+. Returns the result of the #find call that succeeded.
87
+ def find(conditions)
88
+ conditions = validate_conditions(conditions)
89
+ @children.each do |child|
90
+ node = child.find(conditions)
91
+ return node if node
92
+ end
93
+ nil
94
+ end
95
+
96
+ # Search for all nodes that match the given conditions, and return them
97
+ # as an array.
98
+ def find_all(conditions)
99
+ conditions = validate_conditions(conditions)
100
+
101
+ matches = []
102
+ matches << self if match(conditions)
103
+ @children.each do |child|
104
+ matches.concat child.find_all(conditions)
105
+ end
106
+ matches
107
+ end
108
+
109
+ # Returns +false+. Subclasses may override this if they define a kind of
110
+ # tag.
111
+ def tag?
112
+ false
113
+ end
114
+
115
+ def validate_conditions(conditions)
116
+ Conditions === conditions ? conditions : Conditions.new(conditions)
117
+ end
118
+
119
+ def ==(node)
120
+ return false unless self.class == node.class && children.size == node.children.size
121
+
122
+ equivalent = true
123
+
124
+ children.size.times do |i|
125
+ equivalent &&= children[i] == node.children[i]
126
+ end
127
+
128
+ equivalent
129
+ end
130
+
131
+ class <<self
132
+ def parse(parent, line, pos, content, strict=true)
133
+ if content !~ /^<\S/
134
+ Text.new(parent, line, pos, content)
135
+ else
136
+ scanner = StringScanner.new(content)
137
+
138
+ unless scanner.skip(/</)
139
+ if strict
140
+ raise "expected <"
141
+ else
142
+ return Text.new(parent, line, pos, content)
143
+ end
144
+ end
145
+
146
+ if scanner.skip(/!\[CDATA\[/)
147
+ unless scanner.skip_until(/\]\]>/)
148
+ if strict
149
+ raise "expected ]]> (got #{scanner.rest.inspect} for #{content})"
150
+ else
151
+ scanner.skip_until(/\Z/)
152
+ end
153
+ end
154
+
155
+ return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
156
+ end
157
+
158
+ closing = ( scanner.scan(/\//) ? :close : nil )
159
+ return Text.new(parent, line, pos, content) unless name = scanner.scan(/[^\s!>\/]+/)
160
+ name.downcase!
161
+
162
+ unless closing
163
+ scanner.skip(/\s*/)
164
+ attributes = {}
165
+ while attr = scanner.scan(/[-\w:]+/)
166
+ value = true
167
+ if scanner.scan(/\s*=\s*/)
168
+ if delim = scanner.scan(/['"]/)
169
+ value = ""
170
+ while text = scanner.scan(/[^#{delim}\\]+|./)
171
+ case text
172
+ when "\\" then
173
+ value << text
174
+ break if scanner.eos?
175
+ value << scanner.getch
176
+ when delim
177
+ break
178
+ else value << text
179
+ end
180
+ end
181
+ else
182
+ value = scanner.scan(/[^\s>\/]+/)
183
+ end
184
+ end
185
+ attributes[attr.downcase] = value
186
+ scanner.skip(/\s*/)
187
+ end
188
+
189
+ closing = ( scanner.scan(/\//) ? :self : nil )
190
+ end
191
+
192
+ unless scanner.scan(/\s*>/)
193
+ if strict
194
+ raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
195
+ else
196
+ # throw away all text until we find what we're looking for
197
+ scanner.skip_until(/>/) or scanner.terminate
198
+ end
199
+ end
200
+
201
+ Tag.new(parent, line, pos, name, attributes, closing)
202
+ end
203
+ end
204
+ end
205
+ end
206
+
207
+ # A node that represents text, rather than markup.
208
+ class Text < Node #:nodoc:
209
+
210
+ attr_reader :content
211
+
212
+ # Creates a new text node as a child of the given parent, with the given
213
+ # content.
214
+ def initialize(parent, line, pos, content)
215
+ super(parent, line, pos)
216
+ @content = content
217
+ end
218
+
219
+ # Returns the content of this node.
220
+ def to_s
221
+ @content
222
+ end
223
+
224
+ # Returns +self+ if this node meets the given conditions. Text nodes support
225
+ # conditions of the following kinds:
226
+ #
227
+ # * if +conditions+ is a string, it must be a substring of the node's
228
+ # content
229
+ # * if +conditions+ is a regular expression, it must match the node's
230
+ # content
231
+ # * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that
232
+ # is either a string or a regexp, and which is interpreted as described
233
+ # above.
234
+ def find(conditions)
235
+ match(conditions) && self
236
+ end
237
+
238
+ # Returns non-+nil+ if this node meets the given conditions, or +nil+
239
+ # otherwise. See the discussion of #find for the valid conditions.
240
+ def match(conditions)
241
+ case conditions
242
+ when String
243
+ @content == conditions
244
+ when Regexp
245
+ @content =~ conditions
246
+ when Hash
247
+ conditions = validate_conditions(conditions)
248
+
249
+ # Text nodes only have :content, :parent, :ancestor
250
+ unless (conditions.keys - [:content, :parent, :ancestor]).empty?
251
+ return false
252
+ end
253
+
254
+ match(conditions[:content])
255
+ else
256
+ nil
257
+ end
258
+ end
259
+
260
+ def ==(node)
261
+ return false unless super
262
+ content == node.content
263
+ end
264
+ end
265
+
266
+ # A CDATA node is simply a text node with a specialized way of displaying
267
+ # itself.
268
+ class CDATA < Text #:nodoc:
269
+ def to_s
270
+ "<![CDATA[#{super}]]>"
271
+ end
272
+ end
273
+
274
+ # A Tag is any node that represents markup. It may be an opening tag, a
275
+ # closing tag, or a self-closing tag. It has a name, and may have a hash of
276
+ # attributes.
277
+ class Tag < Node #:nodoc:
278
+
279
+ # Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
280
+ attr_reader :closing
281
+
282
+ # Either +nil+, or a hash of attributes for this node.
283
+ attr_reader :attributes
284
+
285
+ # The name of this tag.
286
+ attr_reader :name
287
+
288
+ # Create a new node as a child of the given parent, using the given content
289
+ # to describe the node. It will be parsed and the node name, attributes and
290
+ # closing status extracted.
291
+ def initialize(parent, line, pos, name, attributes, closing)
292
+ super(parent, line, pos)
293
+ @name = name
294
+ @attributes = attributes
295
+ @closing = closing
296
+ end
297
+
298
+ # A convenience for obtaining an attribute of the node. Returns +nil+ if
299
+ # the node has no attributes.
300
+ def [](attr)
301
+ @attributes ? @attributes[attr] : nil
302
+ end
303
+
304
+ # Returns non-+nil+ if this tag can contain child nodes.
305
+ def childless?(xml = false)
306
+ return false if xml && @closing.nil?
307
+ !@closing.nil? ||
308
+ @name =~ /^(img|br|hr|link|meta|area|base|basefont|
309
+ col|frame|input|isindex|param)$/ox
310
+ end
311
+
312
+ # Returns a textual representation of the node
313
+ def to_s
314
+ if @closing == :close
315
+ "</#{@name}>"
316
+ else
317
+ s = "<#{@name}"
318
+ @attributes.each do |k,v|
319
+ s << " #{k}"
320
+ s << "=\"#{v}\"" if String === v
321
+ end
322
+ s << " /" if @closing == :self
323
+ s << ">"
324
+ @children.each { |child| s << child.to_s }
325
+ s << "</#{@name}>" if @closing != :self && !@children.empty?
326
+ s
327
+ end
328
+ end
329
+
330
+ # If either the node or any of its children meet the given conditions, the
331
+ # matching node is returned. Otherwise, +nil+ is returned. (See the
332
+ # description of the valid conditions in the +match+ method.)
333
+ def find(conditions)
334
+ match(conditions) && self || super
335
+ end
336
+
337
+ # Returns +true+, indicating that this node represents an HTML tag.
338
+ def tag?
339
+ true
340
+ end
341
+
342
+ # Returns +true+ if the node meets any of the given conditions. The
343
+ # +conditions+ parameter must be a hash of any of the following keys
344
+ # (all are optional):
345
+ #
346
+ # * <tt>:tag</tt>: the node name must match the corresponding value
347
+ # * <tt>:attributes</tt>: a hash. The node's values must match the
348
+ # corresponding values in the hash.
349
+ # * <tt>:parent</tt>: a hash. The node's parent must match the
350
+ # corresponding hash.
351
+ # * <tt>:child</tt>: a hash. At least one of the node's immediate children
352
+ # must meet the criteria described by the hash.
353
+ # * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must
354
+ # meet the criteria described by the hash.
355
+ # * <tt>:descendant</tt>: a hash. At least one of the node's descendants
356
+ # must meet the criteria described by the hash.
357
+ # * <tt>:sibling</tt>: a hash. At least one of the node's siblings must
358
+ # meet the criteria described by the hash.
359
+ # * <tt>:after</tt>: a hash. The node must be after any sibling meeting
360
+ # the criteria described by the hash, and at least one sibling must match.
361
+ # * <tt>:before</tt>: a hash. The node must be before any sibling meeting
362
+ # the criteria described by the hash, and at least one sibling must match.
363
+ # * <tt>:children</tt>: a hash, for counting children of a node. Accepts the
364
+ # keys:
365
+ # ** <tt>:count</tt>: either a number or a range which must equal (or
366
+ # include) the number of children that match.
367
+ # ** <tt>:less_than</tt>: the number of matching children must be less than
368
+ # this number.
369
+ # ** <tt>:greater_than</tt>: the number of matching children must be
370
+ # greater than this number.
371
+ # ** <tt>:only</tt>: another hash consisting of the keys to use
372
+ # to match on the children, and only matching children will be
373
+ # counted.
374
+ #
375
+ # Conditions are matched using the following algorithm:
376
+ #
377
+ # * if the condition is a string, it must be a substring of the value.
378
+ # * if the condition is a regexp, it must match the value.
379
+ # * if the condition is a number, the value must match number.to_s.
380
+ # * if the condition is +true+, the value must not be +nil+.
381
+ # * if the condition is +false+ or +nil+, the value must be +nil+.
382
+ #
383
+ # Usage:
384
+ #
385
+ # # test if the node is a "span" tag
386
+ # node.match tag: "span"
387
+ #
388
+ # # test if the node's parent is a "div"
389
+ # node.match parent: { tag: "div" }
390
+ #
391
+ # # test if any of the node's ancestors are "table" tags
392
+ # node.match ancestor: { tag: "table" }
393
+ #
394
+ # # test if any of the node's immediate children are "em" tags
395
+ # node.match child: { tag: "em" }
396
+ #
397
+ # # test if any of the node's descendants are "strong" tags
398
+ # node.match descendant: { tag: "strong" }
399
+ #
400
+ # # test if the node has between 2 and 4 span tags as immediate children
401
+ # node.match children: { count: 2..4, only: { tag: "span" } }
402
+ #
403
+ # # get funky: test to see if the node is a "div", has a "ul" ancestor
404
+ # # and an "li" parent (with "class" = "enum"), and whether or not it has
405
+ # # a "span" descendant that contains # text matching /hello world/:
406
+ # node.match tag: "div",
407
+ # ancestor: { tag: "ul" },
408
+ # parent: { tag: "li",
409
+ # attributes: { class: "enum" } },
410
+ # descendant: { tag: "span",
411
+ # child: /hello world/ }
412
+ def match(conditions)
413
+ conditions = validate_conditions(conditions)
414
+ # check content of child nodes
415
+ if conditions[:content]
416
+ if children.empty?
417
+ return false unless match_condition("", conditions[:content])
418
+ else
419
+ return false unless children.find { |child| child.match(conditions[:content]) }
420
+ end
421
+ end
422
+
423
+ # test the name
424
+ return false unless match_condition(@name, conditions[:tag]) if conditions[:tag]
425
+
426
+ # test attributes
427
+ (conditions[:attributes] || {}).each do |key, value|
428
+ return false unless match_condition(self[key], value)
429
+ end
430
+
431
+ # test parent
432
+ return false unless parent.match(conditions[:parent]) if conditions[:parent]
433
+
434
+ # test children
435
+ return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
436
+
437
+ # test ancestors
438
+ if conditions[:ancestor]
439
+ return false unless catch :found do
440
+ p = self
441
+ throw :found, true if p.match(conditions[:ancestor]) while p = p.parent
442
+ end
443
+ end
444
+
445
+ # test descendants
446
+ if conditions[:descendant]
447
+ return false unless children.find do |child|
448
+ # test the child
449
+ child.match(conditions[:descendant]) ||
450
+ # test the child's descendants
451
+ child.match(:descendant => conditions[:descendant])
452
+ end
453
+ end
454
+
455
+ # count children
456
+ if opts = conditions[:children]
457
+ matches = children.select do |c|
458
+ (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?))
459
+ end
460
+
461
+ matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
462
+ opts.each do |key, value|
463
+ next if key == :only
464
+ case key
465
+ when :count
466
+ if Integer === value
467
+ return false if matches.length != value
468
+ else
469
+ return false unless value.include?(matches.length)
470
+ end
471
+ when :less_than
472
+ return false unless matches.length < value
473
+ when :greater_than
474
+ return false unless matches.length > value
475
+ else raise "unknown count condition #{key}"
476
+ end
477
+ end
478
+ end
479
+
480
+ # test siblings
481
+ if conditions[:sibling] || conditions[:before] || conditions[:after]
482
+ siblings = parent ? parent.children : []
483
+ self_index = siblings.index(self)
484
+
485
+ if conditions[:sibling]
486
+ return false unless siblings.detect do |s|
487
+ s != self && s.match(conditions[:sibling])
488
+ end
489
+ end
490
+
491
+ if conditions[:before]
492
+ return false unless siblings[self_index+1..-1].detect do |s|
493
+ s != self && s.match(conditions[:before])
494
+ end
495
+ end
496
+
497
+ if conditions[:after]
498
+ return false unless siblings[0,self_index].detect do |s|
499
+ s != self && s.match(conditions[:after])
500
+ end
501
+ end
502
+ end
503
+
504
+ true
505
+ end
506
+
507
+ def ==(node)
508
+ return false unless super
509
+ return false unless closing == node.closing && self.name == node.name
510
+ attributes == node.attributes
511
+ end
512
+
513
+ private
514
+ # Match the given value to the given condition.
515
+ def match_condition(value, condition)
516
+ case condition
517
+ when String
518
+ value && value == condition
519
+ when Regexp
520
+ value && value.match(condition)
521
+ when Numeric
522
+ value == condition.to_s
523
+ when true
524
+ !value.nil?
525
+ when false, nil
526
+ value.nil?
527
+ else
528
+ false
529
+ end
530
+ end
531
+ end
532
+ end