tdreyno-staticmatic 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/LICENSE +21 -0
  2. data/Rakefile +12 -0
  3. data/bin/staticmatic +12 -0
  4. data/lib/staticmatic/actionpack_support/mime.rb +5 -0
  5. data/lib/staticmatic/actionpack_support/remove_partial_benchmark.rb +6 -0
  6. data/lib/staticmatic/autoload.rb +18 -0
  7. data/lib/staticmatic/base.rb +171 -0
  8. data/lib/staticmatic/builder.rb +102 -0
  9. data/lib/staticmatic/config.rb +47 -0
  10. data/lib/staticmatic/creator.rb +18 -0
  11. data/lib/staticmatic/deprecation.rb +26 -0
  12. data/lib/staticmatic/helpers/asset_tag_helper.rb +37 -0
  13. data/lib/staticmatic/helpers/deprecated_helpers.rb +48 -0
  14. data/lib/staticmatic/helpers/page_helper.rb +9 -0
  15. data/lib/staticmatic/helpers/url_helper.rb +19 -0
  16. data/lib/staticmatic/previewer.rb +65 -0
  17. data/lib/staticmatic/rescue.rb +14 -0
  18. data/lib/staticmatic/template_handlers/haml.rb +19 -0
  19. data/lib/staticmatic/template_handlers/liquid.rb +13 -0
  20. data/lib/staticmatic/template_handlers/markdown.rb +13 -0
  21. data/lib/staticmatic/template_handlers/sass.rb +13 -0
  22. data/lib/staticmatic/template_handlers/textile.rb +13 -0
  23. data/lib/staticmatic/templates/default/Rakefile +3 -0
  24. data/lib/staticmatic/templates/default/config.rb +3 -0
  25. data/lib/staticmatic/templates/default/src/helpers/site_helper.rb +5 -0
  26. data/lib/staticmatic/templates/default/src/layouts/site.html.haml +6 -0
  27. data/lib/staticmatic/templates/default/src/pages/index.html.haml +1 -0
  28. data/lib/staticmatic/templates/default/src/stylesheets/site.css.sass +3 -0
  29. data/lib/staticmatic/templates/rescues/default_error.html.erb +2 -0
  30. data/lib/staticmatic/templates/rescues/template_error.html.erb +19 -0
  31. data/lib/staticmatic.rb +28 -0
  32. data/lib/tasks/staticmatic.rb +9 -0
  33. data/staticmatic.gemspec +53 -0
  34. data/vendor/html-scanner/html/document.rb +68 -0
  35. data/vendor/html-scanner/html/node.rb +530 -0
  36. data/vendor/html-scanner/html/sanitizer.rb +173 -0
  37. data/vendor/html-scanner/html/selector.rb +828 -0
  38. data/vendor/html-scanner/html/tokenizer.rb +105 -0
  39. data/vendor/html-scanner/html/version.rb +11 -0
  40. metadata +127 -0
@@ -0,0 +1,530 @@
1
+ require 'strscan'
2
+
3
+ module HTML #:nodoc:
4
+
5
+ class Conditions < Hash #:nodoc:
6
+ def initialize(hash)
7
+ super()
8
+ hash = { :content => hash } unless Hash === hash
9
+ hash = keys_to_symbols(hash)
10
+ hash.each do |k,v|
11
+ case k
12
+ when :tag, :content then
13
+ # keys are valid, and require no further processing
14
+ when :attributes then
15
+ hash[k] = keys_to_strings(v)
16
+ when :parent, :child, :ancestor, :descendant, :sibling, :before,
17
+ :after
18
+ hash[k] = Conditions.new(v)
19
+ when :children
20
+ hash[k] = v = keys_to_symbols(v)
21
+ v.each do |k,v2|
22
+ case k
23
+ when :count, :greater_than, :less_than
24
+ # keys are valid, and require no further processing
25
+ when :only
26
+ v[k] = Conditions.new(v2)
27
+ else
28
+ raise "illegal key #{k.inspect} => #{v2.inspect}"
29
+ end
30
+ end
31
+ else
32
+ raise "illegal key #{k.inspect} => #{v.inspect}"
33
+ end
34
+ end
35
+ update hash
36
+ end
37
+
38
+ private
39
+
40
+ def keys_to_strings(hash)
41
+ hash.keys.inject({}) do |h,k|
42
+ h[k.to_s] = hash[k]
43
+ h
44
+ end
45
+ end
46
+
47
+ def keys_to_symbols(hash)
48
+ hash.keys.inject({}) do |h,k|
49
+ raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym)
50
+ h[k.to_sym] = hash[k]
51
+ h
52
+ end
53
+ end
54
+ end
55
+
56
+ # The base class of all nodes, textual and otherwise, in an HTML document.
57
+ class Node #:nodoc:
58
+ # The array of children of this node. Not all nodes have children.
59
+ attr_reader :children
60
+
61
+ # The parent node of this node. All nodes have a parent, except for the
62
+ # root node.
63
+ attr_reader :parent
64
+
65
+ # The line number of the input where this node was begun
66
+ attr_reader :line
67
+
68
+ # The byte position in the input where this node was begun
69
+ attr_reader :position
70
+
71
+ # Create a new node as a child of the given parent.
72
+ def initialize(parent, line=0, pos=0)
73
+ @parent = parent
74
+ @children = []
75
+ @line, @position = line, pos
76
+ end
77
+
78
+ # Return a textual representation of the node.
79
+ def to_s
80
+ s = ""
81
+ @children.each { |child| s << child.to_s }
82
+ s
83
+ end
84
+
85
+ # Return false (subclasses must override this to provide specific matching
86
+ # behavior.) +conditions+ may be of any type.
87
+ def match(conditions)
88
+ false
89
+ end
90
+
91
+ # Search the children of this node for the first node for which #find
92
+ # returns non +nil+. Returns the result of the #find call that succeeded.
93
+ def find(conditions)
94
+ conditions = validate_conditions(conditions)
95
+ @children.each do |child|
96
+ node = child.find(conditions)
97
+ return node if node
98
+ end
99
+ nil
100
+ end
101
+
102
+ # Search for all nodes that match the given conditions, and return them
103
+ # as an array.
104
+ def find_all(conditions)
105
+ conditions = validate_conditions(conditions)
106
+
107
+ matches = []
108
+ matches << self if match(conditions)
109
+ @children.each do |child|
110
+ matches.concat child.find_all(conditions)
111
+ end
112
+ matches
113
+ end
114
+
115
+ # Returns +false+. Subclasses may override this if they define a kind of
116
+ # tag.
117
+ def tag?
118
+ false
119
+ end
120
+
121
+ def validate_conditions(conditions)
122
+ Conditions === conditions ? conditions : Conditions.new(conditions)
123
+ end
124
+
125
+ def ==(node)
126
+ return false unless self.class == node.class && children.size == node.children.size
127
+
128
+ equivalent = true
129
+
130
+ children.size.times do |i|
131
+ equivalent &&= children[i] == node.children[i]
132
+ end
133
+
134
+ equivalent
135
+ end
136
+
137
+ class <<self
138
+ def parse(parent, line, pos, content, strict=true)
139
+ if content !~ /^<\S/
140
+ Text.new(parent, line, pos, content)
141
+ else
142
+ scanner = StringScanner.new(content)
143
+
144
+ unless scanner.skip(/</)
145
+ if strict
146
+ raise "expected <"
147
+ else
148
+ return Text.new(parent, line, pos, content)
149
+ end
150
+ end
151
+
152
+ if scanner.skip(/!\[CDATA\[/)
153
+ scanner.scan_until(/\]\]>/)
154
+ return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
155
+ end
156
+
157
+ closing = ( scanner.scan(/\//) ? :close : nil )
158
+ return Text.new(parent, line, pos, content) unless name = scanner.scan(/[\w:-]+/)
159
+ name.downcase!
160
+
161
+ unless closing
162
+ scanner.skip(/\s*/)
163
+ attributes = {}
164
+ while attr = scanner.scan(/[-\w:]+/)
165
+ value = true
166
+ if scanner.scan(/\s*=\s*/)
167
+ if delim = scanner.scan(/['"]/)
168
+ value = ""
169
+ while text = scanner.scan(/[^#{delim}\\]+|./)
170
+ case text
171
+ when "\\" then
172
+ value << text
173
+ value << scanner.getch
174
+ when delim
175
+ break
176
+ else value << text
177
+ end
178
+ end
179
+ else
180
+ value = scanner.scan(/[^\s>\/]+/)
181
+ end
182
+ end
183
+ attributes[attr.downcase] = value
184
+ scanner.skip(/\s*/)
185
+ end
186
+
187
+ closing = ( scanner.scan(/\//) ? :self : nil )
188
+ end
189
+
190
+ unless scanner.scan(/\s*>/)
191
+ if strict
192
+ raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
193
+ else
194
+ # throw away all text until we find what we're looking for
195
+ scanner.skip_until(/>/) or scanner.terminate
196
+ end
197
+ end
198
+
199
+ Tag.new(parent, line, pos, name, attributes, closing)
200
+ end
201
+ end
202
+ end
203
+ end
204
+
205
+ # A node that represents text, rather than markup.
206
+ class Text < Node #:nodoc:
207
+
208
+ attr_reader :content
209
+
210
+ # Creates a new text node as a child of the given parent, with the given
211
+ # content.
212
+ def initialize(parent, line, pos, content)
213
+ super(parent, line, pos)
214
+ @content = content
215
+ end
216
+
217
+ # Returns the content of this node.
218
+ def to_s
219
+ @content
220
+ end
221
+
222
+ # Returns +self+ if this node meets the given conditions. Text nodes support
223
+ # conditions of the following kinds:
224
+ #
225
+ # * if +conditions+ is a string, it must be a substring of the node's
226
+ # content
227
+ # * if +conditions+ is a regular expression, it must match the node's
228
+ # content
229
+ # * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that
230
+ # is either a string or a regexp, and which is interpreted as described
231
+ # above.
232
+ def find(conditions)
233
+ match(conditions) && self
234
+ end
235
+
236
+ # Returns non-+nil+ if this node meets the given conditions, or +nil+
237
+ # otherwise. See the discussion of #find for the valid conditions.
238
+ def match(conditions)
239
+ case conditions
240
+ when String
241
+ @content == conditions
242
+ when Regexp
243
+ @content =~ conditions
244
+ when Hash
245
+ conditions = validate_conditions(conditions)
246
+
247
+ # Text nodes only have :content, :parent, :ancestor
248
+ unless (conditions.keys - [:content, :parent, :ancestor]).empty?
249
+ return false
250
+ end
251
+
252
+ match(conditions[:content])
253
+ else
254
+ nil
255
+ end
256
+ end
257
+
258
+ def ==(node)
259
+ return false unless super
260
+ content == node.content
261
+ end
262
+ end
263
+
264
+ # A CDATA node is simply a text node with a specialized way of displaying
265
+ # itself.
266
+ class CDATA < Text #:nodoc:
267
+ def to_s
268
+ "<![CDATA[#{super}]>"
269
+ end
270
+ end
271
+
272
+ # A Tag is any node that represents markup. It may be an opening tag, a
273
+ # closing tag, or a self-closing tag. It has a name, and may have a hash of
274
+ # attributes.
275
+ class Tag < Node #:nodoc:
276
+
277
+ # Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
278
+ attr_reader :closing
279
+
280
+ # Either +nil+, or a hash of attributes for this node.
281
+ attr_reader :attributes
282
+
283
+ # The name of this tag.
284
+ attr_reader :name
285
+
286
+ # Create a new node as a child of the given parent, using the given content
287
+ # to describe the node. It will be parsed and the node name, attributes and
288
+ # closing status extracted.
289
+ def initialize(parent, line, pos, name, attributes, closing)
290
+ super(parent, line, pos)
291
+ @name = name
292
+ @attributes = attributes
293
+ @closing = closing
294
+ end
295
+
296
+ # A convenience for obtaining an attribute of the node. Returns +nil+ if
297
+ # the node has no attributes.
298
+ def [](attr)
299
+ @attributes ? @attributes[attr] : nil
300
+ end
301
+
302
+ # Returns non-+nil+ if this tag can contain child nodes.
303
+ def childless?(xml = false)
304
+ return false if xml && @closing.nil?
305
+ !@closing.nil? ||
306
+ @name =~ /^(img|br|hr|link|meta|area|base|basefont|
307
+ col|frame|input|isindex|param)$/ox
308
+ end
309
+
310
+ # Returns a textual representation of the node
311
+ def to_s
312
+ if @closing == :close
313
+ "</#{@name}>"
314
+ else
315
+ s = "<#{@name}"
316
+ @attributes.each do |k,v|
317
+ s << " #{k}"
318
+ s << "=\"#{v}\"" if String === v
319
+ end
320
+ s << " /" if @closing == :self
321
+ s << ">"
322
+ @children.each { |child| s << child.to_s }
323
+ s << "</#{@name}>" if @closing != :self && !@children.empty?
324
+ s
325
+ end
326
+ end
327
+
328
+ # If either the node or any of its children meet the given conditions, the
329
+ # matching node is returned. Otherwise, +nil+ is returned. (See the
330
+ # description of the valid conditions in the +match+ method.)
331
+ def find(conditions)
332
+ match(conditions) && self || super
333
+ end
334
+
335
+ # Returns +true+, indicating that this node represents an HTML tag.
336
+ def tag?
337
+ true
338
+ end
339
+
340
+ # Returns +true+ if the node meets any of the given conditions. The
341
+ # +conditions+ parameter must be a hash of any of the following keys
342
+ # (all are optional):
343
+ #
344
+ # * <tt>:tag</tt>: the node name must match the corresponding value
345
+ # * <tt>:attributes</tt>: a hash. The node's values must match the
346
+ # corresponding values in the hash.
347
+ # * <tt>:parent</tt>: a hash. The node's parent must match the
348
+ # corresponding hash.
349
+ # * <tt>:child</tt>: a hash. At least one of the node's immediate children
350
+ # must meet the criteria described by the hash.
351
+ # * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must
352
+ # meet the criteria described by the hash.
353
+ # * <tt>:descendant</tt>: a hash. At least one of the node's descendants
354
+ # must meet the criteria described by the hash.
355
+ # * <tt>:sibling</tt>: a hash. At least one of the node's siblings must
356
+ # meet the criteria described by the hash.
357
+ # * <tt>:after</tt>: a hash. The node must be after any sibling meeting
358
+ # the criteria described by the hash, and at least one sibling must match.
359
+ # * <tt>:before</tt>: a hash. The node must be before any sibling meeting
360
+ # the criteria described by the hash, and at least one sibling must match.
361
+ # * <tt>:children</tt>: a hash, for counting children of a node. Accepts the
362
+ # keys:
363
+ # ** <tt>:count</tt>: either a number or a range which must equal (or
364
+ # include) the number of children that match.
365
+ # ** <tt>:less_than</tt>: the number of matching children must be less than
366
+ # this number.
367
+ # ** <tt>:greater_than</tt>: the number of matching children must be
368
+ # greater than this number.
369
+ # ** <tt>:only</tt>: another hash consisting of the keys to use
370
+ # to match on the children, and only matching children will be
371
+ # counted.
372
+ #
373
+ # Conditions are matched using the following algorithm:
374
+ #
375
+ # * if the condition is a string, it must be a substring of the value.
376
+ # * if the condition is a regexp, it must match the value.
377
+ # * if the condition is a number, the value must match number.to_s.
378
+ # * if the condition is +true+, the value must not be +nil+.
379
+ # * if the condition is +false+ or +nil+, the value must be +nil+.
380
+ #
381
+ # Usage:
382
+ #
383
+ # # test if the node is a "span" tag
384
+ # node.match :tag => "span"
385
+ #
386
+ # # test if the node's parent is a "div"
387
+ # node.match :parent => { :tag => "div" }
388
+ #
389
+ # # test if any of the node's ancestors are "table" tags
390
+ # node.match :ancestor => { :tag => "table" }
391
+ #
392
+ # # test if any of the node's immediate children are "em" tags
393
+ # node.match :child => { :tag => "em" }
394
+ #
395
+ # # test if any of the node's descendants are "strong" tags
396
+ # node.match :descendant => { :tag => "strong" }
397
+ #
398
+ # # test if the node has between 2 and 4 span tags as immediate children
399
+ # node.match :children => { :count => 2..4, :only => { :tag => "span" } }
400
+ #
401
+ # # get funky: test to see if the node is a "div", has a "ul" ancestor
402
+ # # and an "li" parent (with "class" = "enum"), and whether or not it has
403
+ # # a "span" descendant that contains # text matching /hello world/:
404
+ # node.match :tag => "div",
405
+ # :ancestor => { :tag => "ul" },
406
+ # :parent => { :tag => "li",
407
+ # :attributes => { :class => "enum" } },
408
+ # :descendant => { :tag => "span",
409
+ # :child => /hello world/ }
410
+ def match(conditions)
411
+ conditions = validate_conditions(conditions)
412
+ # check content of child nodes
413
+ if conditions[:content]
414
+ if children.empty?
415
+ return false unless match_condition("", conditions[:content])
416
+ else
417
+ return false unless children.find { |child| child.match(conditions[:content]) }
418
+ end
419
+ end
420
+
421
+ # test the name
422
+ return false unless match_condition(@name, conditions[:tag]) if conditions[:tag]
423
+
424
+ # test attributes
425
+ (conditions[:attributes] || {}).each do |key, value|
426
+ return false unless match_condition(self[key], value)
427
+ end
428
+
429
+ # test parent
430
+ return false unless parent.match(conditions[:parent]) if conditions[:parent]
431
+
432
+ # test children
433
+ return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
434
+
435
+ # test ancestors
436
+ if conditions[:ancestor]
437
+ return false unless catch :found do
438
+ p = self
439
+ throw :found, true if p.match(conditions[:ancestor]) while p = p.parent
440
+ end
441
+ end
442
+
443
+ # test descendants
444
+ if conditions[:descendant]
445
+ return false unless children.find do |child|
446
+ # test the child
447
+ child.match(conditions[:descendant]) ||
448
+ # test the child's descendants
449
+ child.match(:descendant => conditions[:descendant])
450
+ end
451
+ end
452
+
453
+ # count children
454
+ if opts = conditions[:children]
455
+ matches = children.select do |c|
456
+ (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?))
457
+ end
458
+
459
+ matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
460
+ opts.each do |key, value|
461
+ next if key == :only
462
+ case key
463
+ when :count
464
+ if Integer === value
465
+ return false if matches.length != value
466
+ else
467
+ return false unless value.include?(matches.length)
468
+ end
469
+ when :less_than
470
+ return false unless matches.length < value
471
+ when :greater_than
472
+ return false unless matches.length > value
473
+ else raise "unknown count condition #{key}"
474
+ end
475
+ end
476
+ end
477
+
478
+ # test siblings
479
+ if conditions[:sibling] || conditions[:before] || conditions[:after]
480
+ siblings = parent ? parent.children : []
481
+ self_index = siblings.index(self)
482
+
483
+ if conditions[:sibling]
484
+ return false unless siblings.detect do |s|
485
+ s != self && s.match(conditions[:sibling])
486
+ end
487
+ end
488
+
489
+ if conditions[:before]
490
+ return false unless siblings[self_index+1..-1].detect do |s|
491
+ s != self && s.match(conditions[:before])
492
+ end
493
+ end
494
+
495
+ if conditions[:after]
496
+ return false unless siblings[0,self_index].detect do |s|
497
+ s != self && s.match(conditions[:after])
498
+ end
499
+ end
500
+ end
501
+
502
+ true
503
+ end
504
+
505
+ def ==(node)
506
+ return false unless super
507
+ return false unless closing == node.closing && self.name == node.name
508
+ attributes == node.attributes
509
+ end
510
+
511
+ private
512
+ # Match the given value to the given condition.
513
+ def match_condition(value, condition)
514
+ case condition
515
+ when String
516
+ value && value == condition
517
+ when Regexp
518
+ value && value.match(condition)
519
+ when Numeric
520
+ value == condition.to_s
521
+ when true
522
+ !value.nil?
523
+ when false, nil
524
+ value.nil?
525
+ else
526
+ false
527
+ end
528
+ end
529
+ end
530
+ end
@@ -0,0 +1,173 @@
1
+ module HTML
2
+ class Sanitizer
3
+ def sanitize(text, options = {})
4
+ return text unless sanitizeable?(text)
5
+ tokenize(text, options).join
6
+ end
7
+
8
+ def sanitizeable?(text)
9
+ !(text.nil? || text.empty? || !text.index("<"))
10
+ end
11
+
12
+ protected
13
+ def tokenize(text, options)
14
+ tokenizer = HTML::Tokenizer.new(text)
15
+ result = []
16
+ while token = tokenizer.next
17
+ node = Node.parse(nil, 0, 0, token, false)
18
+ process_node node, result, options
19
+ end
20
+ result
21
+ end
22
+
23
+ def process_node(node, result, options)
24
+ result << node.to_s
25
+ end
26
+ end
27
+
28
+ class FullSanitizer < Sanitizer
29
+ def sanitize(text, options = {})
30
+ result = super
31
+ # strip any comments, and if they have a newline at the end (ie. line with
32
+ # only a comment) strip that too
33
+ result.gsub!(/<!--(.*?)-->[\n]?/m, "") if result
34
+ # Recurse - handle all dirty nested tags
35
+ result == text ? result : sanitize(result, options)
36
+ end
37
+
38
+ def process_node(node, result, options)
39
+ result << node.to_s if node.class == HTML::Text
40
+ end
41
+ end
42
+
43
+ class LinkSanitizer < FullSanitizer
44
+ cattr_accessor :included_tags, :instance_writer => false
45
+ self.included_tags = Set.new(%w(a href))
46
+
47
+ def sanitizeable?(text)
48
+ !(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">")))
49
+ end
50
+
51
+ protected
52
+ def process_node(node, result, options)
53
+ result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name)
54
+ end
55
+ end
56
+
57
+ class WhiteListSanitizer < Sanitizer
58
+ [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags,
59
+ :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr|
60
+ class_inheritable_accessor attr, :instance_writer => false
61
+ end
62
+
63
+ # A regular expression of the valid characters used to separate protocols like
64
+ # the ':' in 'http://foo.com'
65
+ self.protocol_separator = /:|(&#0*58)|(&#x70)|(%|&#37;)3A/
66
+
67
+ # Specifies a Set of HTML attributes that can have URIs.
68
+ self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
69
+
70
+ # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
71
+ # to just escaping harmless tags like &lt;font&gt;
72
+ self.bad_tags = Set.new(%w(script))
73
+
74
+ # Specifies the default Set of tags that the #sanitize helper will allow unscathed.
75
+ self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
76
+ sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr
77
+ acronym a img blockquote del ins))
78
+
79
+ # Specifies the default Set of html attributes that the #sanitize helper will leave
80
+ # in the allowed tag.
81
+ self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
82
+
83
+ # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
84
+ self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto
85
+ feed svn urn aim rsync tag ssh sftp rtsp afs))
86
+
87
+ # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
88
+ self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse
89
+ border-color border-left-color border-right-color border-top-color clear color cursor direction display
90
+ elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
91
+ overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
92
+ speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
93
+ width))
94
+
95
+ # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
96
+ self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center
97
+ collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
98
+ nowrap olive pointer purple red right solid silver teal top transparent underline white yellow))
99
+
100
+ # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
101
+ self.shorthand_css_properties = Set.new(%w(background border margin padding))
102
+
103
+ # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute
104
+ def sanitize_css(style)
105
+ # disallow urls
106
+ style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
107
+
108
+ # gauntlet
109
+ if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ ||
110
+ style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/
111
+ return ''
112
+ end
113
+
114
+ clean = []
115
+ style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
116
+ if allowed_css_properties.include?(prop.downcase)
117
+ clean << prop + ': ' + val + ';'
118
+ elsif shorthand_css_properties.include?(prop.split('-')[0].downcase)
119
+ unless val.split().any? do |keyword|
120
+ !allowed_css_keywords.include?(keyword) &&
121
+ keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
122
+ end
123
+ clean << prop + ': ' + val + ';'
124
+ end
125
+ end
126
+ end
127
+ clean.join(' ')
128
+ end
129
+
130
+ protected
131
+ def tokenize(text, options)
132
+ options[:parent] = []
133
+ options[:attributes] ||= allowed_attributes
134
+ options[:tags] ||= allowed_tags
135
+ super
136
+ end
137
+
138
+ def process_node(node, result, options)
139
+ result << case node
140
+ when HTML::Tag
141
+ if node.closing == :close
142
+ options[:parent].shift
143
+ else
144
+ options[:parent].unshift node.name
145
+ end
146
+
147
+ process_attributes_for node, options
148
+
149
+ options[:tags].include?(node.name) ? node : nil
150
+ else
151
+ bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "&lt;")
152
+ end
153
+ end
154
+
155
+ def process_attributes_for(node, options)
156
+ return unless node.attributes
157
+ node.attributes.keys.each do |attr_name|
158
+ value = node.attributes[attr_name].to_s
159
+
160
+ if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value)
161
+ node.attributes.delete(attr_name)
162
+ else
163
+ node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(value)
164
+ end
165
+ end
166
+ end
167
+
168
+ def contains_bad_protocols?(attr_name, value)
169
+ uri_attributes.include?(attr_name) &&
170
+ (value =~ /(^[^\/:]*):|(&#0*58)|(&#x70)|(%|&#37;)3A/ && !allowed_protocols.include?(value.split(protocol_separator).first))
171
+ end
172
+ end
173
+ end