antisamy 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/README.rdoc +6 -1
  2. data/lib/antisamy/css/css_filter.rb +187 -0
  3. data/lib/antisamy/css/css_scanner.rb +84 -0
  4. data/lib/antisamy/css/css_validator.rb +129 -0
  5. data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -0
  6. data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -0
  7. data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -0
  8. data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -0
  9. data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -0
  10. data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -0
  11. data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -0
  12. data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -0
  13. data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -0
  14. data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -0
  15. data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -0
  16. data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -0
  17. data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -0
  18. data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -0
  19. data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -0
  20. data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -0
  21. data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -0
  22. data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -0
  23. data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -0
  24. data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -0
  25. data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -0
  26. data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -0
  27. data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -0
  28. data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -0
  29. data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -0
  30. data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -0
  31. data/lib/antisamy/csspool/rsac/sac/token.rb +25 -0
  32. data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -0
  33. data/lib/antisamy/csspool/rsac/sac.rb +14 -0
  34. data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -0
  35. data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -0
  36. data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -0
  37. data/lib/antisamy/csspool/rsac.rb +1 -0
  38. data/lib/antisamy/html/handler.rb +4 -0
  39. data/lib/antisamy/html/sax_filter.rb +49 -33
  40. data/lib/antisamy/html/scanner.rb +1 -43
  41. data/lib/antisamy/policy.rb +8 -3
  42. data/lib/antisamy/scan_results.rb +68 -0
  43. data/lib/antisamy.rb +4 -0
  44. data/spec/antisamy_spec.rb +111 -3
  45. metadata +39 -3
@@ -0,0 +1,185 @@
1
+ require "antisamy/csspool/rsac/sac/lexeme"
2
+ require "antisamy/csspool/rsac/sac/token"
3
+
4
+ module RSAC
5
+ class Tokenizer
6
+ def initialize(&block)
7
+ @lexemes = []
8
+ @macros = {}
9
+
10
+ # http://www.w3.org/TR/CSS21/syndata.html
11
+ macro(:h, /([0-9a-f])/ )
12
+ macro(:nonascii, /([\200-\377])/ )
13
+ macro(:nl, /(\n|\r\n|\r|\f)/ )
14
+ macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
15
+ macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
16
+ macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
17
+ macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
18
+ macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
19
+ macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
20
+ macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
21
+ macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
22
+ macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
23
+ macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
24
+ macro(:name, /(#{m(:nmchar)}+)/ )
25
+ macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
26
+ macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
27
+ macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
28
+ macro(:url, /(([!#\$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*)/ )
29
+ macro(:s, /([ \t\r\n\f]+)/ )
30
+ macro(:w, /(#{m(:s)}?)/ )
31
+ macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
32
+ macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
33
+ macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
34
+ macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
35
+ macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
36
+ macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
37
+ macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
38
+ macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
39
+ macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
40
+ macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
41
+ macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
42
+ macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
43
+ macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
44
+ macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
45
+ macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
46
+ macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
47
+ macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )
48
+
49
+ #token :COMMENT do |patterns|
50
+ # patterns << /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
51
+ # patterns << /#{m(:s)}+\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
52
+ #end
53
+
54
+ token(:LBRACE, /#{m(:w)}\{/)
55
+ token(:PLUS, /#{m(:w)}\+/)
56
+ token(:GREATER, /#{m(:w)}>/)
57
+ token(:COMMA, /#{m(:w)},/)
58
+
59
+ token(:S, /#{m(:s)}/)
60
+
61
+ #token :URI do |patterns|
62
+ # patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
63
+ # patterns << /url\(#{m(:w)}#{m(:url)}#{m(:w)}\)/
64
+ #end
65
+
66
+ token(:FUNCTION, /#{m(:ident)}\(/)
67
+ token(:IDENT, /#{m(:ident)}/)
68
+
69
+ token(:CDO, /<!--/)
70
+ token(:CDC, /-->/)
71
+ token(:INCLUDES, /~=/)
72
+ token(:DASHMATCH, /\|=/)
73
+ #token(:STRING, /#{m(:string)}/)
74
+ token(:INVALID, /#{m(:invalid)}/)
75
+ token(:HASH, /##{m(:name)}/)
76
+ token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
77
+ token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
78
+ token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
79
+ token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
80
+ token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
81
+ token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
82
+ token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)
83
+
84
+ token :LENGTH do |patterns|
85
+ patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
86
+ patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
87
+ patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
88
+ patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
89
+ patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
90
+ patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
91
+ end
92
+
93
+ token :ANGLE do |patterns|
94
+ patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
95
+ patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
96
+ patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
97
+ end
98
+
99
+ token :TIME do |patterns|
100
+ patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
101
+ patterns << /#{m(:num)}#{m(:S)}/
102
+ end
103
+
104
+ token :FREQ do |patterns|
105
+ patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
106
+ patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
107
+ end
108
+
109
+ token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
110
+ token(:PERCENTAGE, /#{m(:num)}%/)
111
+ token(:NUMBER, /#{m(:num)}/)
112
+
113
+
114
+ yield self if block_given?
115
+ end
116
+
117
+ def tokenize(input_data)
118
+ tokens = []
119
+ pos = 0
120
+
121
+ comment_pattern = /\/\*.*?\*\//m
122
+ comments = input_data.scan(comment_pattern)
123
+ non_comments = input_data.split(comment_pattern)
124
+
125
+ # Handle a small edge case, if our CSS is *only* comments,
126
+ # the split, zip, scan trick won't work
127
+ if non_comments.length == 0
128
+ tokens = comments.map { |x| Token.new(:COMMENT, x, nil) }
129
+ else
130
+ non_comments.zip(comments).each do |non_comment, comment|
131
+ non_comment.split(/url\([^\)]*\)/m).zip(
132
+ non_comment.scan(/url\([^\)]*\)/m)
133
+ ).each do |non_url, url|
134
+ non_url.split(/"[^"]*"|'[^']*'/m).zip(
135
+ non_url.scan(/"[^"]*"|'[^']*'/m)
136
+ ).each do |non_string, quoted_string|
137
+ if non_string.length > 0 && non_string =~ /\A\s*\Z/m
138
+ tokens << Token.new(:S, non_string, nil)
139
+ else
140
+ non_string.split(/[ \t\r\n\f]*(?![{}+>]*)/m).zip(
141
+ non_string.scan(/[ \t\r\n\f]*(?![{}+>]*)/m)
142
+ ).each do |string, whitespace|
143
+ until string.empty?
144
+ token = nil
145
+ @lexemes.each do |lexeme|
146
+ match = lexeme.pattern.match(string)
147
+ if match
148
+ token = Token.new(lexeme.name, match.to_s, pos)
149
+ break
150
+ end
151
+ end
152
+
153
+ token ||= DelimiterToken.new(/^./.match(string).to_s, pos)
154
+
155
+ tokens << token
156
+ string = string.slice(Range.new(token.value.length, -1))
157
+ pos += token.value.length
158
+ end
159
+ tokens << Token.new(:S, whitespace, nil) if whitespace
160
+ end
161
+ end
162
+ tokens << Token.new(:STRING, quoted_string, nil) if quoted_string
163
+ end
164
+ tokens << Token.new(:URI, url, nil) if url
165
+ end
166
+ tokens << Token.new(:COMMENT, comment, nil) if comment
167
+ end
168
+ end
169
+
170
+ tokens
171
+ end
172
+
173
+ private
174
+
175
+ def token(name, pattern=nil, &block)
176
+ @lexemes << Lexeme.new(name, pattern, &block)
177
+ end
178
+
179
+ def macro(name, regex=nil)
180
+ regex ? @macros[name] = regex : @macros[name].source
181
+ end
182
+
183
+ alias :m :macro
184
+ end
185
+ end
@@ -0,0 +1,14 @@
1
+ require "antisamy/csspool/rsac/sac/conditions"
2
+ require "antisamy/csspool/rsac/sac/selectors"
3
+ require "antisamy/csspool/rsac/sac/parser"
4
+ require "antisamy/csspool/rsac/stylesheet"
5
+
6
+ module RSAC
7
+ class << self
8
+ def parse(text)
9
+ parser = CSS::SAC::Parser.new
10
+ parser.parse(text)
11
+ parser
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,20 @@
1
+ require 'set'
2
+ module RSAC
3
+ class StyleSheet
4
+ class Rule
5
+ include Comparable
6
+
7
+ attr_accessor :selector, :properties, :index
8
+ def initialize(selector, index, properties = [])
9
+ @selector = selector
10
+ @properties = Set.new(properties)
11
+ @index = index
12
+ end
13
+
14
+ def <=>(other)
15
+ comp = selector.specificity <=> other.selector.specificity
16
+ comp == 0 ? index <=> other.index : comp
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,76 @@
1
+ module RSAC
2
+ class StyleSheet < RSAC::DocumentHandler
3
+ attr_reader :rules
4
+
5
+ def initialize(sac)
6
+ @sac = sac
7
+ @rules = []
8
+ @current_rules = []
9
+ @selector_index = 0
10
+ end
11
+
12
+ def start_selector(selectors)
13
+ selectors.each { |selector|
14
+ @current_rules << Rule.new(selector, @selector_index)
15
+ }
16
+ end
17
+
18
+ def end_selector(selectors)
19
+ @rules += @current_rules
20
+ @current_rules = []
21
+ @selector_index += 1
22
+ reduce!
23
+ end
24
+
25
+ def find_rule(rule)
26
+ rule = self.create_rule(rule) if rule.is_a?(String)
27
+ rules.find { |x| x.selector == rule.selector }
28
+ end
29
+ alias :[] :find_rule
30
+
31
+ def create_rule(rule)
32
+ Rule.new(@sac.parse_rule(rule).first, @selector_index += 1)
33
+ end
34
+
35
+ def property(name, value, important)
36
+ @current_rules.each { |selector|
37
+ selector.properties << [name, value, important]
38
+ }
39
+ end
40
+
41
+ # Get a hash of rules by property
42
+ def rules_by_property
43
+ rules_by_property = Hash.new { |h,k| h[k] = [] }
44
+ @rules.each { |sel|
45
+ props = sel.properties.to_a.sort_by { |x| x.hash } # HACK?
46
+ rules_by_property[props] << sel
47
+ }
48
+ rules_by_property
49
+ end
50
+
51
+ def to_css
52
+ rules_by_property.map do |properties, rules|
53
+ rules.map { |rule| rule.selector.to_css }.sort.join(', ') + " {\n" +
54
+ properties.map { |key,value,important|
55
+ # Super annoying. If the property is font-family, its supposed to
56
+ # be commas
57
+ join_val = ('font-family' == key) ? ', ' : ' '
58
+ values = [value].flatten.join(join_val)
59
+ "#{key}:#{values}#{important ? ' !important' : ''};"
60
+ }.join("\n") + "\n}"
61
+ end.sort.join("\n")
62
+ end
63
+
64
+ private
65
+ # Remove duplicate rules
66
+ def reduce!
67
+ unique_rules = {}
68
+ @rules.each do |rule|
69
+ (unique_rules[rule.selector] ||= rule).properties += rule.properties
70
+ end
71
+ @rules = unique_rules.values
72
+ self
73
+ end
74
+ end
75
+ end
76
+
@@ -0,0 +1,3 @@
1
+ require "antisamy/csspool/rsac/stylesheet/stylesheet"
2
+ require "antisamy/csspool/rsac/stylesheet/rule"
3
+
@@ -0,0 +1 @@
1
+ require 'antisamy/csspool/rsac/sac'
@@ -14,6 +14,7 @@ module AntiSamy
14
14
 
15
15
  # HTML entity encode some text
16
16
  def encode_text(text)
17
+ return "" if text.nil?
17
18
  @document.encode_special_chars(text)
18
19
  end
19
20
 
@@ -40,6 +41,9 @@ module AntiSamy
40
41
 
41
42
  # start an element
42
43
  def start_element(name,attributes)
44
+ if name.eql?("head") or name.eql?("body") or name.eql?("html")
45
+ return
46
+ end
43
47
  elem = Nokogiri::XML::Element.new(name, @document)
44
48
  attributes.each do |attrib_pair|
45
49
  elem[attrib_pair.first] = attrib_pair.last
@@ -39,16 +39,14 @@ module AntiSamy
39
39
  @stack = Stack.new
40
40
  @css_content = nil
41
41
  @css_attributes = nil
42
- @css_scanner = nil
42
+ @css_scanner = CssScanner.new(policy)
43
43
  @param_tag = param_tag
44
44
  end
45
45
 
46
46
  def error(text)
47
- #puts "SAX Error #{text}"
48
47
  end
49
48
 
50
49
  def warning(text)
51
- puts "SAX Warning #{text}"
52
50
  end
53
51
 
54
52
  # Always create a HTML document unless the DECL was set beforehand
@@ -122,13 +120,24 @@ module AntiSamy
122
120
  @handler.characters(tmp)
123
121
  @stack.push(:filter)
124
122
  elsif tag.nil?
125
- @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
126
- @stack.push(:filter)
123
+ # We ignore missing HTML and BODY tags since we are fragment parsing, but the
124
+ # Nokogiri HTML::SAX parser injects HTML/BODY if they are missing
125
+ unless name.eql?("html") or name.eql?("body")
126
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
127
+ end
128
+ # Nokogiri work around for a style tag being auto inserted inot head
129
+ if name.eql?("head")
130
+ @stack.push(:remove)
131
+ else
132
+ @stack.push(:filter)
133
+ end
127
134
  elsif tag.action.eql?(Policy::ACTION_FILTER)
128
135
  @handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
129
136
  @stack.push(:filter)
130
137
  elsif tag.action.eql?(Policy::ACTION_VALIDATE)
131
138
  # Handle validation
139
+ remove_tag = false
140
+ filter_tag = false
132
141
  is_style = name.include?("style")
133
142
  if is_style
134
143
  @stack.push(:css)
@@ -136,8 +145,6 @@ module AntiSamy
136
145
  @css_attributes = []
137
146
  else
138
147
  # Validate attributes
139
- remove_tag = false
140
- filter_tag = false
141
148
  attributes.each do |pair|
142
149
  a_name = pair.first
143
150
  a_value = pair.last
@@ -148,14 +155,16 @@ module AntiSamy
148
155
  # check if the attribute is a style
149
156
  if a_name.eql?("style")
150
157
  # Handle Style tags
151
- # begin
152
- # results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
153
- # valid_attributes << [a_name,results.clean_html]
154
- # @handler.errors << results.errors
155
- # @handler.errors.flatten!
156
- # rescue Exception => e
157
- # @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(value))
158
- # end
158
+ begin
159
+ results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
160
+ unless result.clean_html.empty?
161
+ valid_attributes << [a_name,results.clean_html]
162
+ end
163
+ @handler.errors << results.messages
164
+ @handler.errors.flatten!
165
+ rescue Exception => e
166
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(a_value))
167
+ end
159
168
  elsif !attrib.nil? # Attribute is not nil lets check it
160
169
  valid = false
161
170
  attrib.values.each do |av|
@@ -167,7 +176,8 @@ module AntiSamy
167
176
  end
168
177
  unless valid
169
178
  attrib.expressions.each do |ae|
170
- if a_value.downcase =~ ae
179
+ mc = ae.match(a_value)
180
+ if mc and mc.size == a_value.size
171
181
  valid_attributes << [a_name,a_value]
172
182
  valid = true
173
183
  break
@@ -198,7 +208,7 @@ module AntiSamy
198
208
  elsif filter_tag
199
209
  @stack.push(:filter)
200
210
  else
201
- if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW) =~ /true/i
211
+ if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW)
202
212
  valid_attributes << ["rel","nofollow"]
203
213
  end
204
214
  if masquerade
@@ -206,7 +216,7 @@ module AntiSamy
206
216
  valid_attributes << ["name",embed_name]
207
217
  valid_attributes << ["value",embed_value]
208
218
  end
209
- @stack.push(:keep)
219
+ @stack.push(:keep) unless @stack.peek?(:css)
210
220
  end
211
221
  # End validation action
212
222
  elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
@@ -234,7 +244,7 @@ module AntiSamy
234
244
  # Add character data to the current tag
235
245
  def characters(text)
236
246
  unless text =~ /\S/ # skip whitespace
237
- return unless @policy.directive(Policy::PRESERVE_SPACE) =~ /true/i
247
+ return unless @policy.directive(Policy::PRESERVE_SPACE)
238
248
  end
239
249
  if @stack.peek?(:css)
240
250
  @css_content << text
@@ -252,20 +262,26 @@ module AntiSamy
252
262
  elsif @stack.peek?(:css)
253
263
  @stack.pop
254
264
  # Do css stuff here
255
- # begin
256
- # results = @css_scanner.scan_tyle_sheet(@css_content,@policy.max_input)
257
- # @handler.errors << results.errors
258
- # @handler.errors.flatten!
259
- # unless results.clean_html.nil? or results.clean_html.empty?
260
- # @handler.start_element(element,css_attributes)
261
- # @handler.characters results.clean_html
262
- # @handler.end_element(element)
263
- # end
264
- # rescue Exception => e
265
- # @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
266
- # ensure
267
- # @css_content = nil
268
- # @css_attributes = nil
265
+ begin
266
+ results = @css_scanner.scan_sheet(@css_content,@policy.max_input)
267
+ @handler.errors << results.messages
268
+ @handler.errors.flatten!
269
+ unless results.clean_html.nil? or results.clean_html.empty?
270
+ @handler.start_element(name,@css_attributes)
271
+ @handler.characters results.clean_html
272
+ @handler.end_element(name)
273
+ else
274
+ @handler.start_element(name,@css_attributes)
275
+ @handler.characters "/* */"
276
+ @handler.end_element(name)
277
+ end
278
+ rescue Exception => e
279
+ puts e
280
+ @handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
281
+ ensure
282
+ @css_content = nil
283
+ @css_attributes = nil
284
+ end
269
285
  else
270
286
  @stack.pop
271
287
  @handler.end_element(name)
@@ -1,46 +1,4 @@
1
1
  module AntiSamy
2
-
3
- class ScanError < StandardError; end
4
-
5
- # Scan message, it will contain a message key, tag and optionally content, value
6
- class ScanMessage
7
- # error.tag.notfound
8
- ERROR_TAG_NOT_IN_POLICY = "error.tag.notfound"
9
- # error.tag.removed
10
- ERROR_TAG_DISALLOWED = "error.tag.removed"
11
- # error.tag.filtered
12
- ERROR_TAG_FILTERED = "error.tag.filtered"
13
- # error.tag.encoded
14
- ERROR_TAG_ENCODED = "error.tag.encoded"
15
- # error.css.tag.malformed
16
- ERROR_CSS_TAG_MALFORMED = "error.css.tag.malformed"
17
- # error.css.attribute.malformed
18
- ERROR_CSS_ATTRIBUTE_MALFORMED = "error.css.attribute.malformed"
19
- # error.attribute.invalid.filtered
20
- ERROR_ATTRIBUTE_CAUSE_FILTER = "error.attribute.invalid.filtered"
21
- # error.attribute.invalid.encoded
22
- ERROR_ATTRIBUTE_CAUSE_ENCODE = "error.attribute.invalid.encoded"
23
- # error.attribute.invalid.filtered
24
- ERROR_ATTRIBUTE_INVALID_FILTERED = "error.attribute.invalid.filtered"
25
- # error.attribute.invalid.removed
26
- ERROR_ATTRIBUTE_INVALID_REMOVED = "error.attribute.invalid.removed"
27
- # error.attribute.notfound
28
- ERROR_ATTRIBUTE_NOT_IN_POLICY = "error.attribute.notfound"
29
- # error.attribute.invalid
30
- ERROR_ATTRIBUTE_INVALID = "error.attribute.invalid"
31
-
32
- attr_reader :tag, :content, :value, :msgkey
33
- def initialize(msgkey, tag, content=nil,value=nil)
34
- @msgkey = msgkey
35
- @tag = tag
36
- @content = content
37
- @value = value
38
- end
39
- def to_s
40
- "#{self.msgkey} #{@tag} #{@content} #{@value}"
41
- end
42
- end
43
-
44
2
  class Scanner
45
3
  attr_accessor :policy, :errors, :nofollow, :pae
46
4
  DEFAULT_ENCODE = "UTF-8"
@@ -67,7 +25,7 @@ module AntiSamy
67
25
  # will raise an error if nil input or the maximum input size is exceeded
68
26
  def scan(input, input_encode, output_encoder)
69
27
  raise ArgumentError if input.nil?
70
- raise ScanError, "Max input Exceeded" if input.size > @policy.max_input
28
+ raise ScanError, "Max input Exceeded #{input.size} > #{@policy.max_input}" if input.size > @policy.max_input
71
29
  # check poilcy stuff
72
30
  handler = Handler.new(@policy,output_encoder)
73
31
  scanner = SaxFilter.new(@policy,handler,@@basic_param_tag_rule)
@@ -29,13 +29,16 @@ module AntiSamy
29
29
  MAX_INPUT = "maxInputSize"
30
30
  USE_XHTML = "userXHTML"
31
31
  FORMAT_OUTPUT = "formatOutput"
32
+ # will we allow embedded style sheets
32
33
  EMBED_STYLESHEETS = "embedStyleSheets"
34
+ # Connection timeout in miliseconds
33
35
  CONN_TIMEOUT = "conenctionTimeout"
34
36
  ANCHROS_NOFOLLOW = "nofollowAnchors"
35
37
  VALIDATE_P_AS_E = "validateParamAsEmbed"
36
38
  PRESERVE_SPACE = "preserveSpace"
37
39
  PRESERVE_COMMENTS = "preserveComments"
38
40
  ON_UNKNOWN_TAG = "onUnknownTag"
41
+ MAX_SHEETS = "maxStyleSheetImports"
39
42
 
40
43
  # Class method to fetch the schema
41
44
  def self.schema
@@ -192,15 +195,17 @@ module AntiSamy
192
195
  section.element_children.each do |dir|
193
196
  name = dir["name"]
194
197
  value = dir["value"]
195
- @directives[name] = value
196
- if name.eql?("maxInputSize")
198
+ if name.eql?("maxInputSize")
197
199
  @max_input = value.to_i
198
200
  else
199
- if value =~ /true/
201
+ if name.eql?("connectionTimeout") or name.eql?("maxStyleSheetImports")
202
+ value = value.to_i
203
+ elsif value =~ /true/i
200
204
  value = true
201
205
  else
202
206
  value = false
203
207
  end
208
+ @directives[name] = value
204
209
  end
205
210
  end
206
211
  end
@@ -1,4 +1,72 @@
1
1
  module AntiSamy
2
+ class ScanError < StandardError; end
3
+ # Scan message, it will contain a message key, tag and optionally content, value
4
+ class ScanMessage
5
+ # error.tag.notfound
6
+ ERROR_TAG_NOT_IN_POLICY = "error.tag.notfound"
7
+ # error.tag.removed
8
+ ERROR_TAG_DISALLOWED = "error.tag.removed"
9
+ # error.tag.filtered
10
+ ERROR_TAG_FILTERED = "error.tag.filtered"
11
+ # error.tag.encoded
12
+ ERROR_TAG_ENCODED = "error.tag.encoded"
13
+ # error.css.tag.malformed
14
+ ERROR_CSS_TAG_MALFORMED = "error.css.tag.malformed"
15
+ # error.css.attribute.malformed
16
+ ERROR_CSS_ATTRIBUTE_MALFORMED = "error.css.attribute.malformed"
17
+ # error.attribute.invalid.filtered
18
+ ERROR_ATTRIBUTE_CAUSE_FILTER = "error.attribute.invalid.filtered"
19
+ # error.attribute.invalid.encoded
20
+ ERROR_ATTRIBUTE_CAUSE_ENCODE = "error.attribute.invalid.encoded"
21
+ # error.attribute.invalid.filtered
22
+ ERROR_ATTRIBUTE_INVALID_FILTERED = "error.attribute.invalid.filtered"
23
+ # error.attribute.invalid.removed
24
+ ERROR_ATTRIBUTE_INVALID_REMOVED = "error.attribute.invalid.removed"
25
+ # error.attribute.notfound
26
+ ERROR_ATTRIBUTE_NOT_IN_POLICY = "error.attribute.notfound"
27
+ # error.attribute.invalid
28
+ ERROR_ATTRIBUTE_INVALID = "error.attribute.invalid"
29
+ # comment removed
30
+ ERROR_COMMENT_REMOVED = "error.comment.removed"
31
+ # tag rule not found
32
+ ERROR_CSS_TAG_RULE_NOTFOUND = "error.css.tag.notfound"
33
+ # style sheet nto found
34
+ ERROR_STYLESHEET_RULE_NOTFOUND = "error.stylesheet.notfound"
35
+ # embedded stylesheets disabled
36
+ ERROR_CSS_IMPORT_DISABLED = "error.css.import.disabled"
37
+ # bad uri
38
+ ERROR_CSS_IMPORT_URL_INVALID = "error.css.import.uri.invalid"
39
+ # disallowed selector
40
+ ERROR_CSS_TAG_SELECTOR_DISALLOWED = "error.css.tag.removed"
41
+ # invalid for style sheet
42
+ ERROR_STYLESHEET_SELECTOR_DISALLOWED = "error.style.tag.notallowed"
43
+ # invlaid css tag property
44
+ ERROR_CSS_TAG_PROPERTY_INVALID = "error.css.property.invalid"
45
+ # invid style sheet roperty tag
46
+ ERROR_STYLESHEET_PROPERTY_INVALID = "error.stylesheet.css.property.invalid"
47
+ # exceed alloted imports
48
+ ERROR_CSS_IMPORT_EXCEEDED = "error.import.exceeded.sheets"
49
+ # exceede size
50
+ ERROR_CSS_IMPORT_INPUT_SIZE = "error.import.exceeded.size"
51
+ # Failed to import
52
+ ERROR_CSS_IMPORT_FAILURE = "error.import.bad.uri"
53
+ # selector not found
54
+ ERROR_STYLESHEET_SELECTOR_NOTFOUND = "error.css.stylesheet.selector.notfound"
55
+ # selector in css not fond
56
+ ERROR_CSS_TAG_SELECTOR_NOTFOUND = "error.css.tag.selector.notfound"
57
+
58
+ attr_reader :tag, :content, :value, :msgkey
59
+ def initialize(msgkey, tag, content=nil,value=nil)
60
+ @msgkey = msgkey
61
+ @tag = tag
62
+ @content = content
63
+ @value = value
64
+ end
65
+ def to_s
66
+ "#{self.msgkey} #{@tag} #{@content} #{@value}"
67
+ end
68
+ end
69
+
2
70
  # Container of scan results, provides a list of ScanMessage indicating
3
71
  # why elements were removed from the resulting html
4
72
  class ScanResults
data/lib/antisamy.rb CHANGED
@@ -1,10 +1,14 @@
1
1
  require 'nokogiri'
2
+ require 'antisamy/csspool/rsac'
2
3
  require 'antisamy/model/attribute'
3
4
  require 'antisamy/model/tag'
4
5
  require 'antisamy/model/css_property'
5
6
  require 'antisamy/policy'
6
7
  require 'antisamy/scan_results'
7
8
  require 'antisamy/html/handler'
9
+ require 'antisamy/css/css_validator'
10
+ require 'antisamy/css/css_filter'
11
+ require 'antisamy/css/css_scanner'
8
12
  require 'antisamy/html/sax_filter'
9
13
  require 'antisamy/html/scanner'
10
14