antisamy 0.0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/README.rdoc +6 -1
  2. data/lib/antisamy/css/css_filter.rb +187 -0
  3. data/lib/antisamy/css/css_scanner.rb +84 -0
  4. data/lib/antisamy/css/css_validator.rb +129 -0
  5. data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -0
  6. data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -0
  7. data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -0
  8. data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -0
  9. data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -0
  10. data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -0
  11. data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -0
  12. data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -0
  13. data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -0
  14. data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -0
  15. data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -0
  16. data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -0
  17. data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -0
  18. data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -0
  19. data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -0
  20. data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -0
  21. data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -0
  22. data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -0
  23. data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -0
  24. data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -0
  25. data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -0
  26. data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -0
  27. data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -0
  28. data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -0
  29. data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -0
  30. data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -0
  31. data/lib/antisamy/csspool/rsac/sac/token.rb +25 -0
  32. data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -0
  33. data/lib/antisamy/csspool/rsac/sac.rb +14 -0
  34. data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -0
  35. data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -0
  36. data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -0
  37. data/lib/antisamy/csspool/rsac.rb +1 -0
  38. data/lib/antisamy/html/handler.rb +4 -0
  39. data/lib/antisamy/html/sax_filter.rb +49 -33
  40. data/lib/antisamy/html/scanner.rb +1 -43
  41. data/lib/antisamy/policy.rb +8 -3
  42. data/lib/antisamy/scan_results.rb +68 -0
  43. data/lib/antisamy.rb +4 -0
  44. data/spec/antisamy_spec.rb +111 -3
  45. metadata +39 -3
@@ -0,0 +1,185 @@
1
+ require "antisamy/csspool/rsac/sac/lexeme"
2
+ require "antisamy/csspool/rsac/sac/token"
3
+
4
+ module RSAC
5
+ class Tokenizer
6
+ def initialize(&block)
7
+ @lexemes = []
8
+ @macros = {}
9
+
10
+ # http://www.w3.org/TR/CSS21/syndata.html
11
+ macro(:h, /([0-9a-f])/ )
12
+ macro(:nonascii, /([\200-\377])/ )
13
+ macro(:nl, /(\n|\r\n|\r|\f)/ )
14
+ macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
15
+ macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
16
+ macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
17
+ macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
18
+ macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
19
+ macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
20
+ macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
21
+ macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
22
+ macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
23
+ macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
24
+ macro(:name, /(#{m(:nmchar)}+)/ )
25
+ macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
26
+ macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
27
+ macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
28
+ macro(:url, /(([!#\$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*)/ )
29
+ macro(:s, /([ \t\r\n\f]+)/ )
30
+ macro(:w, /(#{m(:s)}?)/ )
31
+ macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
32
+ macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
33
+ macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
34
+ macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
35
+ macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
36
+ macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
37
+ macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
38
+ macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
39
+ macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
40
+ macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
41
+ macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
42
+ macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
43
+ macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
44
+ macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
45
+ macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
46
+ macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
47
+ macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )
48
+
49
+ #token :COMMENT do |patterns|
50
+ # patterns << /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
51
+ # patterns << /#{m(:s)}+\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
52
+ #end
53
+
54
+ token(:LBRACE, /#{m(:w)}\{/)
55
+ token(:PLUS, /#{m(:w)}\+/)
56
+ token(:GREATER, /#{m(:w)}>/)
57
+ token(:COMMA, /#{m(:w)},/)
58
+
59
+ token(:S, /#{m(:s)}/)
60
+
61
+ #token :URI do |patterns|
62
+ # patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
63
+ # patterns << /url\(#{m(:w)}#{m(:url)}#{m(:w)}\)/
64
+ #end
65
+
66
+ token(:FUNCTION, /#{m(:ident)}\(/)
67
+ token(:IDENT, /#{m(:ident)}/)
68
+
69
+ token(:CDO, /<!--/)
70
+ token(:CDC, /-->/)
71
+ token(:INCLUDES, /~=/)
72
+ token(:DASHMATCH, /\|=/)
73
+ #token(:STRING, /#{m(:string)}/)
74
+ token(:INVALID, /#{m(:invalid)}/)
75
+ token(:HASH, /##{m(:name)}/)
76
+ token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
77
+ token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
78
+ token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
79
+ token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
80
+ token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
81
+ token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
82
+ token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)
83
+
84
+ token :LENGTH do |patterns|
85
+ patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
86
+ patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
87
+ patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
88
+ patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
89
+ patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
90
+ patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
91
+ end
92
+
93
+ token :ANGLE do |patterns|
94
+ patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
95
+ patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
96
+ patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
97
+ end
98
+
99
+ token :TIME do |patterns|
100
+ patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
101
+ patterns << /#{m(:num)}#{m(:S)}/
102
+ end
103
+
104
+ token :FREQ do |patterns|
105
+ patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
106
+ patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
107
+ end
108
+
109
+ token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
110
+ token(:PERCENTAGE, /#{m(:num)}%/)
111
+ token(:NUMBER, /#{m(:num)}/)
112
+
113
+
114
+ yield self if block_given?
115
+ end
116
+
117
+ def tokenize(input_data)
118
+ tokens = []
119
+ pos = 0
120
+
121
+ comment_pattern = /\/\*.*?\*\//m
122
+ comments = input_data.scan(comment_pattern)
123
+ non_comments = input_data.split(comment_pattern)
124
+
125
+ # Handle a small edge case, if our CSS is *only* comments,
126
+ # the split, zip, scan trick won't work
127
+ if non_comments.length == 0
128
+ tokens = comments.map { |x| Token.new(:COMMENT, x, nil) }
129
+ else
130
+ non_comments.zip(comments).each do |non_comment, comment|
131
+ non_comment.split(/url\([^\)]*\)/m).zip(
132
+ non_comment.scan(/url\([^\)]*\)/m)
133
+ ).each do |non_url, url|
134
+ non_url.split(/"[^"]*"|'[^']*'/m).zip(
135
+ non_url.scan(/"[^"]*"|'[^']*'/m)
136
+ ).each do |non_string, quoted_string|
137
+ if non_string.length > 0 && non_string =~ /\A\s*\Z/m
138
+ tokens << Token.new(:S, non_string, nil)
139
+ else
140
+ non_string.split(/[ \t\r\n\f]*(?![{}+>]*)/m).zip(
141
+ non_string.scan(/[ \t\r\n\f]*(?![{}+>]*)/m)
142
+ ).each do |string, whitespace|
143
+ until string.empty?
144
+ token = nil
145
+ @lexemes.each do |lexeme|
146
+ match = lexeme.pattern.match(string)
147
+ if match
148
+ token = Token.new(lexeme.name, match.to_s, pos)
149
+ break
150
+ end
151
+ end
152
+
153
+ token ||= DelimiterToken.new(/^./.match(string).to_s, pos)
154
+
155
+ tokens << token
156
+ string = string.slice(Range.new(token.value.length, -1))
157
+ pos += token.value.length
158
+ end
159
+ tokens << Token.new(:S, whitespace, nil) if whitespace
160
+ end
161
+ end
162
+ tokens << Token.new(:STRING, quoted_string, nil) if quoted_string
163
+ end
164
+ tokens << Token.new(:URI, url, nil) if url
165
+ end
166
+ tokens << Token.new(:COMMENT, comment, nil) if comment
167
+ end
168
+ end
169
+
170
+ tokens
171
+ end
172
+
173
+ private
174
+
175
+ def token(name, pattern=nil, &block)
176
+ @lexemes << Lexeme.new(name, pattern, &block)
177
+ end
178
+
179
+ def macro(name, regex=nil)
180
+ regex ? @macros[name] = regex : @macros[name].source
181
+ end
182
+
183
+ alias :m :macro
184
+ end
185
+ end
@@ -0,0 +1,14 @@
1
+ require "antisamy/csspool/rsac/sac/conditions"
2
+ require "antisamy/csspool/rsac/sac/selectors"
3
+ require "antisamy/csspool/rsac/sac/parser"
4
+ require "antisamy/csspool/rsac/stylesheet"
5
+
6
+ module RSAC
7
+ class << self
8
+ def parse(text)
9
+ parser = CSS::SAC::Parser.new
10
+ parser.parse(text)
11
+ parser
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,20 @@
1
+ require 'set'
2
+ module RSAC
3
+ class StyleSheet
4
+ class Rule
5
+ include Comparable
6
+
7
+ attr_accessor :selector, :properties, :index
8
+ def initialize(selector, index, properties = [])
9
+ @selector = selector
10
+ @properties = Set.new(properties)
11
+ @index = index
12
+ end
13
+
14
+ def <=>(other)
15
+ comp = selector.specificity <=> other.selector.specificity
16
+ comp == 0 ? index <=> other.index : comp
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,76 @@
1
+ module RSAC
2
+ class StyleSheet < RSAC::DocumentHandler
3
+ attr_reader :rules
4
+
5
+ def initialize(sac)
6
+ @sac = sac
7
+ @rules = []
8
+ @current_rules = []
9
+ @selector_index = 0
10
+ end
11
+
12
+ def start_selector(selectors)
13
+ selectors.each { |selector|
14
+ @current_rules << Rule.new(selector, @selector_index)
15
+ }
16
+ end
17
+
18
+ def end_selector(selectors)
19
+ @rules += @current_rules
20
+ @current_rules = []
21
+ @selector_index += 1
22
+ reduce!
23
+ end
24
+
25
+ def find_rule(rule)
26
+ rule = self.create_rule(rule) if rule.is_a?(String)
27
+ rules.find { |x| x.selector == rule.selector }
28
+ end
29
+ alias :[] :find_rule
30
+
31
+ def create_rule(rule)
32
+ Rule.new(@sac.parse_rule(rule).first, @selector_index += 1)
33
+ end
34
+
35
+ def property(name, value, important)
36
+ @current_rules.each { |selector|
37
+ selector.properties << [name, value, important]
38
+ }
39
+ end
40
+
41
+ # Get a hash of rules by property
42
+ def rules_by_property
43
+ rules_by_property = Hash.new { |h,k| h[k] = [] }
44
+ @rules.each { |sel|
45
+ props = sel.properties.to_a.sort_by { |x| x.hash } # HACK?
46
+ rules_by_property[props] << sel
47
+ }
48
+ rules_by_property
49
+ end
50
+
51
+ def to_css
52
+ rules_by_property.map do |properties, rules|
53
+ rules.map { |rule| rule.selector.to_css }.sort.join(', ') + " {\n" +
54
+ properties.map { |key,value,important|
55
+ # Super annoying. If the property is font-family, its supposed to
56
+ # be commas
57
+ join_val = ('font-family' == key) ? ', ' : ' '
58
+ values = [value].flatten.join(join_val)
59
+ "#{key}:#{values}#{important ? ' !important' : ''};"
60
+ }.join("\n") + "\n}"
61
+ end.sort.join("\n")
62
+ end
63
+
64
+ private
65
+ # Remove duplicate rules
66
+ def reduce!
67
+ unique_rules = {}
68
+ @rules.each do |rule|
69
+ (unique_rules[rule.selector] ||= rule).properties += rule.properties
70
+ end
71
+ @rules = unique_rules.values
72
+ self
73
+ end
74
+ end
75
+ end
76
+
@@ -0,0 +1,3 @@
1
+ require "antisamy/csspool/rsac/stylesheet/stylesheet"
2
+ require "antisamy/csspool/rsac/stylesheet/rule"
3
+
@@ -0,0 +1 @@
1
+ require 'antisamy/csspool/rsac/sac'
@@ -14,6 +14,7 @@ module AntiSamy
14
14
 
15
15
  # HTML entity encode some text
16
16
  def encode_text(text)
17
+ return "" if text.nil?
17
18
  @document.encode_special_chars(text)
18
19
  end
19
20
 
@@ -40,6 +41,9 @@ module AntiSamy
40
41
 
41
42
  # start an element
42
43
  def start_element(name,attributes)
44
+ if name.eql?("head") or name.eql?("body") or name.eql?("html")
45
+ return
46
+ end
43
47
  elem = Nokogiri::XML::Element.new(name, @document)
44
48
  attributes.each do |attrib_pair|
45
49
  elem[attrib_pair.first] = attrib_pair.last
@@ -39,16 +39,14 @@ module AntiSamy
39
39
  @stack = Stack.new
40
40
  @css_content = nil
41
41
  @css_attributes = nil
42
- @css_scanner = nil
42
+ @css_scanner = CssScanner.new(policy)
43
43
  @param_tag = param_tag
44
44
  end
45
45
 
46
46
  def error(text)
47
- #puts "SAX Error #{text}"
48
47
  end
49
48
 
50
49
  def warning(text)
51
- puts "SAX Warning #{text}"
52
50
  end
53
51
 
54
52
  # Always create a HTML document unless the DECL was set beforehand
@@ -122,13 +120,24 @@ module AntiSamy
122
120
  @handler.characters(tmp)
123
121
  @stack.push(:filter)
124
122
  elsif tag.nil?
125
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
126
- @stack.push(:filter)
123
+ # We ignore missing HTML and BODY tags since we are fragment parsing, but the
124
+ # Nokogiri HTML::SAX parser injects HTML/BODY if they are missing
125
+ unless name.eql?("html") or name.eql?("body")
126
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
127
+ end
128
+ # Nokogiri work around for a style tag being auto inserted inot head
129
+ if name.eql?("head")
130
+ @stack.push(:remove)
131
+ else
132
+ @stack.push(:filter)
133
+ end
127
134
  elsif tag.action.eql?(Policy::ACTION_FILTER)
128
135
  @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
129
136
  @stack.push(:filter)
130
137
  elsif tag.action.eql?(Policy::ACTION_VALIDATE)
131
138
  # Handle validation
139
+ remove_tag = false
140
+ filter_tag = false
132
141
  is_style = name.include?("style")
133
142
  if is_style
134
143
  @stack.push(:css)
@@ -136,8 +145,6 @@ module AntiSamy
136
145
  @css_attributes = []
137
146
  else
138
147
  # Validate attributes
139
- remove_tag = false
140
- filter_tag = false
141
148
  attributes.each do |pair|
142
149
  a_name = pair.first
143
150
  a_value = pair.last
@@ -148,14 +155,16 @@ module AntiSamy
148
155
  # check if the attribute is a style
149
156
  if a_name.eql?("style")
150
157
  # Handle Style tags
151
- # begin
152
- # results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
153
- # valid_attributes << [a_name,results.clean_html]
154
- # @handler.errors << results.errors
155
- # @handler.errors.flatten!
156
- # rescue Exception => e
157
- # @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(value))
158
- # end
158
+ begin
159
+ results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
160
+ unless result.clean_html.empty?
161
+ valid_attributes << [a_name,results.clean_html]
162
+ end
163
+ @handler.errors << results.messages
164
+ @handler.errors.flatten!
165
+ rescue Exception => e
166
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(a_value))
167
+ end
159
168
  elsif !attrib.nil? # Attribute is not nil lets check it
160
169
  valid = false
161
170
  attrib.values.each do |av|
@@ -167,7 +176,8 @@ module AntiSamy
167
176
  end
168
177
  unless valid
169
178
  attrib.expressions.each do |ae|
170
- if a_value.downcase =~ ae
179
+ mc = ae.match(a_value)
180
+ if mc and mc.size == a_value.size
171
181
  valid_attributes << [a_name,a_value]
172
182
  valid = true
173
183
  break
@@ -198,7 +208,7 @@ module AntiSamy
198
208
  elsif filter_tag
199
209
  @stack.push(:filter)
200
210
  else
201
- if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW) =~ /true/i
211
+ if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW)
202
212
  valid_attributes << ["rel","nofollow"]
203
213
  end
204
214
  if masquerade
@@ -206,7 +216,7 @@ module AntiSamy
206
216
  valid_attributes << ["name",embed_name]
207
217
  valid_attributes << ["value",embed_value]
208
218
  end
209
- @stack.push(:keep)
219
+ @stack.push(:keep) unless @stack.peek?(:css)
210
220
  end
211
221
  # End validation action
212
222
  elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
@@ -234,7 +244,7 @@ module AntiSamy
234
244
  # Add character data to the current tag
235
245
  def characters(text)
236
246
  unless text =~ /\S/ # skip whitespace
237
- return unless @policy.directive(Policy::PRESERVE_SPACE) =~ /true/i
247
+ return unless @policy.directive(Policy::PRESERVE_SPACE)
238
248
  end
239
249
  if @stack.peek?(:css)
240
250
  @css_content << text
@@ -252,20 +262,26 @@ module AntiSamy
252
262
  elsif @stack.peek?(:css)
253
263
  @stack.pop
254
264
  # Do css stuff here
255
- # begin
256
- # results = @css_scanner.scan_tyle_sheet(@css_content,@policy.max_input)
257
- # @handler.errors << results.errors
258
- # @handler.errors.flatten!
259
- # unless results.clean_html.nil? or results.clean_html.empty?
260
- # @handler.start_element(element,css_attributes)
261
- # @handler.characters results.clean_html
262
- # @handler.end_element(element)
263
- # end
264
- # rescue Exception => e
265
- # @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
266
- # ensure
267
- # @css_content = nil
268
- # @css_attributes = nil
265
+ begin
266
+ results = @css_scanner.scan_sheet(@css_content,@policy.max_input)
267
+ @handler.errors << results.messages
268
+ @handler.errors.flatten!
269
+ unless results.clean_html.nil? or results.clean_html.empty?
270
+ @handler.start_element(name,@css_attributes)
271
+ @handler.characters results.clean_html
272
+ @handler.end_element(name)
273
+ else
274
+ @handler.start_element(name,@css_attributes)
275
+ @handler.characters "/* */"
276
+ @handler.end_element(name)
277
+ end
278
+ rescue Exception => e
279
+ puts e
280
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
281
+ ensure
282
+ @css_content = nil
283
+ @css_attributes = nil
284
+ end
269
285
  else
270
286
  @stack.pop
271
287
  @handler.end_element(name)
@@ -1,46 +1,4 @@
1
1
  module AntiSamy
2
-
3
- class ScanError < StandardError; end
4
-
5
- # Scan message, it will contain a message key, tag and optionally content, value
6
- class ScanMessage
7
- # error.tag.notfound
8
- ERROR_TAG_NOT_IN_POLICY = "error.tag.notfound"
9
- # error.tag.removed
10
- ERROR_TAG_DISALLOWED = "error.tag.removed"
11
- # error.tag.filtered
12
- ERROR_TAG_FILTERED = "error.tag.filtered"
13
- # error.tag.encoded
14
- ERROR_TAG_ENCODED = "error.tag.encoded"
15
- # error.css.tag.malformed
16
- ERROR_CSS_TAG_MALFORMED = "error.css.tag.malformed"
17
- # error.css.attribute.malformed
18
- ERROR_CSS_ATTRIBUTE_MALFORMED = "error.css.attribute.malformed"
19
- # error.attribute.invalid.filtered
20
- ERROR_ATTRIBUTE_CAUSE_FILTER = "error.attribute.invalid.filtered"
21
- # error.attribute.invalid.encoded
22
- ERROR_ATTRIBUTE_CAUSE_ENCODE = "error.attribute.invalid.encoded"
23
- # error.attribute.invalid.filtered
24
- ERROR_ATTRIBUTE_INVALID_FILTERED = "error.attribute.invalid.filtered"
25
- # error.attribute.invalid.removed
26
- ERROR_ATTRIBUTE_INVALID_REMOVED = "error.attribute.invalid.removed"
27
- # error.attribute.notfound
28
- ERROR_ATTRIBUTE_NOT_IN_POLICY = "error.attribute.notfound"
29
- # error.attribute.invalid
30
- ERROR_ATTRIBUTE_INVALID = "error.attribute.invalid"
31
-
32
- attr_reader :tag, :content, :value, :msgkey
33
- def initialize(msgkey, tag, content=nil,value=nil)
34
- @msgkey = msgkey
35
- @tag = tag
36
- @content = content
37
- @value = value
38
- end
39
- def to_s
40
- "#{self.msgkey} #{@tag} #{@content} #{@value}"
41
- end
42
- end
43
-
44
2
  class Scanner
45
3
  attr_accessor :policy, :errors, :nofollow, :pae
46
4
  DEFAULT_ENCODE = "UTF-8"
@@ -67,7 +25,7 @@ module AntiSamy
67
25
  # will raise an error if nil input or the maximum input size is exceeded
68
26
  def scan(input, input_encode, output_encoder)
69
27
  raise ArgumentError if input.nil?
70
- raise ScanError, "Max input Exceeded" if input.size > @policy.max_input
28
+ raise ScanError, "Max input Exceeded #{input.size} > #{@policy.max_input}" if input.size > @policy.max_input
71
29
  # check poilcy stuff
72
30
  handler = Handler.new(@policy,output_encoder)
73
31
  scanner = SaxFilter.new(@policy,handler,@@basic_param_tag_rule)
@@ -29,13 +29,16 @@ module AntiSamy
29
29
  MAX_INPUT = "maxInputSize"
30
30
  USE_XHTML = "userXHTML"
31
31
  FORMAT_OUTPUT = "formatOutput"
32
+ # will we allow embedded style sheets
32
33
  EMBED_STYLESHEETS = "embedStyleSheets"
34
+ # Connection timeout in miliseconds
33
35
  CONN_TIMEOUT = "conenctionTimeout"
34
36
  ANCHROS_NOFOLLOW = "nofollowAnchors"
35
37
  VALIDATE_P_AS_E = "validateParamAsEmbed"
36
38
  PRESERVE_SPACE = "preserveSpace"
37
39
  PRESERVE_COMMENTS = "preserveComments"
38
40
  ON_UNKNOWN_TAG = "onUnknownTag"
41
+ MAX_SHEETS = "maxStyleSheetImports"
39
42
 
40
43
  # Class method to fetch the schema
41
44
  def self.schema
@@ -192,15 +195,17 @@ module AntiSamy
192
195
  section.element_children.each do |dir|
193
196
  name = dir["name"]
194
197
  value = dir["value"]
195
- @directives[name] = value
196
- if name.eql?("maxInputSize")
198
+ if name.eql?("maxInputSize")
197
199
  @max_input = value.to_i
198
200
  else
199
- if value =~ /true/
201
+ if name.eql?("connectionTimeout") or name.eql?("maxStyleSheetImports")
202
+ value = value.to_i
203
+ elsif value =~ /true/i
200
204
  value = true
201
205
  else
202
206
  value = false
203
207
  end
208
+ @directives[name] = value
204
209
  end
205
210
  end
206
211
  end
@@ -1,4 +1,72 @@
1
1
  module AntiSamy
2
+ class ScanError < StandardError; end
3
+ # Scan message, it will contain a message key, tag and optionally content, value
4
+ class ScanMessage
5
+ # error.tag.notfound
6
+ ERROR_TAG_NOT_IN_POLICY = "error.tag.notfound"
7
+ # error.tag.removed
8
+ ERROR_TAG_DISALLOWED = "error.tag.removed"
9
+ # error.tag.filtered
10
+ ERROR_TAG_FILTERED = "error.tag.filtered"
11
+ # error.tag.encoded
12
+ ERROR_TAG_ENCODED = "error.tag.encoded"
13
+ # error.css.tag.malformed
14
+ ERROR_CSS_TAG_MALFORMED = "error.css.tag.malformed"
15
+ # error.css.attribute.malformed
16
+ ERROR_CSS_ATTRIBUTE_MALFORMED = "error.css.attribute.malformed"
17
+ # error.attribute.invalid.filtered
18
+ ERROR_ATTRIBUTE_CAUSE_FILTER = "error.attribute.invalid.filtered"
19
+ # error.attribute.invalid.encoded
20
+ ERROR_ATTRIBUTE_CAUSE_ENCODE = "error.attribute.invalid.encoded"
21
+ # error.attribute.invalid.filtered
22
+ ERROR_ATTRIBUTE_INVALID_FILTERED = "error.attribute.invalid.filtered"
23
+ # error.attribute.invalid.removed
24
+ ERROR_ATTRIBUTE_INVALID_REMOVED = "error.attribute.invalid.removed"
25
+ # error.attribute.notfound
26
+ ERROR_ATTRIBUTE_NOT_IN_POLICY = "error.attribute.notfound"
27
+ # error.attribute.invalid
28
+ ERROR_ATTRIBUTE_INVALID = "error.attribute.invalid"
29
+ # comment removed
30
+ ERROR_COMMENT_REMOVED = "error.comment.removed"
31
+ # tag rule not found
32
+ ERROR_CSS_TAG_RULE_NOTFOUND = "error.css.tag.notfound"
33
+ # style sheet nto found
34
+ ERROR_STYLESHEET_RULE_NOTFOUND = "error.stylesheet.notfound"
35
+ # embedded stylesheets disabled
36
+ ERROR_CSS_IMPORT_DISABLED = "error.css.import.disabled"
37
+ # bad uri
38
+ ERROR_CSS_IMPORT_URL_INVALID = "error.css.import.uri.invalid"
39
+ # disallowed selector
40
+ ERROR_CSS_TAG_SELECTOR_DISALLOWED = "error.css.tag.removed"
41
+ # invalid for style sheet
42
+ ERROR_STYLESHEET_SELECTOR_DISALLOWED = "error.style.tag.notallowed"
43
+ # invlaid css tag property
44
+ ERROR_CSS_TAG_PROPERTY_INVALID = "error.css.property.invalid"
45
+ # invid style sheet roperty tag
46
+ ERROR_STYLESHEET_PROPERTY_INVALID = "error.stylesheet.css.property.invalid"
47
+ # exceed alloted imports
48
+ ERROR_CSS_IMPORT_EXCEEDED = "error.import.exceeded.sheets"
49
+ # exceede size
50
+ ERROR_CSS_IMPORT_INPUT_SIZE = "error.import.exceeded.size"
51
+ # Failed to import
52
+ ERROR_CSS_IMPORT_FAILURE = "error.import.bad.uri"
53
+ # selector not found
54
+ ERROR_STYLESHEET_SELECTOR_NOTFOUND = "error.css.stylesheet.selector.notfound"
55
+ # selector in css not fond
56
+ ERROR_CSS_TAG_SELECTOR_NOTFOUND = "error.css.tag.selector.notfound"
57
+
58
+ attr_reader :tag, :content, :value, :msgkey
59
+ def initialize(msgkey, tag, content=nil,value=nil)
60
+ @msgkey = msgkey
61
+ @tag = tag
62
+ @content = content
63
+ @value = value
64
+ end
65
+ def to_s
66
+ "#{self.msgkey} #{@tag} #{@content} #{@value}"
67
+ end
68
+ end
69
+
2
70
  # Container of scan results, provides a list of ScanMessage indicating
3
71
  # why elements were removed from the resulting html
4
72
  class ScanResults
data/lib/antisamy.rb CHANGED
@@ -1,10 +1,14 @@
1
1
  require 'nokogiri'
2
+ require 'antisamy/csspool/rsac'
2
3
  require 'antisamy/model/attribute'
3
4
  require 'antisamy/model/tag'
4
5
  require 'antisamy/model/css_property'
5
6
  require 'antisamy/policy'
6
7
  require 'antisamy/scan_results'
7
8
  require 'antisamy/html/handler'
9
+ require 'antisamy/css/css_validator'
10
+ require 'antisamy/css/css_filter'
11
+ require 'antisamy/css/css_scanner'
8
12
  require 'antisamy/html/sax_filter'
9
13
  require 'antisamy/html/scanner'
10
14