antisamy 0.0.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +6 -1
- data/lib/antisamy/css/css_filter.rb +187 -0
- data/lib/antisamy/css/css_scanner.rb +84 -0
- data/lib/antisamy/css/css_validator.rb +129 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -0
- data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -0
- data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -0
- data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -0
- data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -0
- data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -0
- data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -0
- data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -0
- data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -0
- data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -0
- data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -0
- data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -0
- data/lib/antisamy/csspool/rsac/sac/token.rb +25 -0
- data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -0
- data/lib/antisamy/csspool/rsac/sac.rb +14 -0
- data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -0
- data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -0
- data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -0
- data/lib/antisamy/csspool/rsac.rb +1 -0
- data/lib/antisamy/html/handler.rb +4 -0
- data/lib/antisamy/html/sax_filter.rb +49 -33
- data/lib/antisamy/html/scanner.rb +1 -43
- data/lib/antisamy/policy.rb +8 -3
- data/lib/antisamy/scan_results.rb +68 -0
- data/lib/antisamy.rb +4 -0
- data/spec/antisamy_spec.rb +111 -3
- metadata +39 -3
@@ -0,0 +1,185 @@
|
|
1
|
+
require "antisamy/csspool/rsac/sac/lexeme"
|
2
|
+
require "antisamy/csspool/rsac/sac/token"
|
3
|
+
|
4
|
+
module RSAC
|
5
|
+
class Tokenizer
|
6
|
+
def initialize(&block)
|
7
|
+
@lexemes = []
|
8
|
+
@macros = {}
|
9
|
+
|
10
|
+
# http://www.w3.org/TR/CSS21/syndata.html
|
11
|
+
macro(:h, /([0-9a-f])/ )
|
12
|
+
macro(:nonascii, /([\200-\377])/ )
|
13
|
+
macro(:nl, /(\n|\r\n|\r|\f)/ )
|
14
|
+
macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
|
15
|
+
macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
|
16
|
+
macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
|
17
|
+
macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
|
18
|
+
macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
|
19
|
+
macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
|
20
|
+
macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
|
21
|
+
macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
|
22
|
+
macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
|
23
|
+
macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
|
24
|
+
macro(:name, /(#{m(:nmchar)}+)/ )
|
25
|
+
macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
|
26
|
+
macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
|
27
|
+
macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
|
28
|
+
macro(:url, /(([!#\$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*)/ )
|
29
|
+
macro(:s, /([ \t\r\n\f]+)/ )
|
30
|
+
macro(:w, /(#{m(:s)}?)/ )
|
31
|
+
macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
|
32
|
+
macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
|
33
|
+
macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
|
34
|
+
macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
|
35
|
+
macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
|
36
|
+
macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
|
37
|
+
macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
|
38
|
+
macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
|
39
|
+
macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
|
40
|
+
macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
|
41
|
+
macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
|
42
|
+
macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
|
43
|
+
macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
|
44
|
+
macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
|
45
|
+
macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
|
46
|
+
macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
|
47
|
+
macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )
|
48
|
+
|
49
|
+
#token :COMMENT do |patterns|
|
50
|
+
# patterns << /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
|
51
|
+
# patterns << /#{m(:s)}+\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
|
52
|
+
#end
|
53
|
+
|
54
|
+
token(:LBRACE, /#{m(:w)}\{/)
|
55
|
+
token(:PLUS, /#{m(:w)}\+/)
|
56
|
+
token(:GREATER, /#{m(:w)}>/)
|
57
|
+
token(:COMMA, /#{m(:w)},/)
|
58
|
+
|
59
|
+
token(:S, /#{m(:s)}/)
|
60
|
+
|
61
|
+
#token :URI do |patterns|
|
62
|
+
# patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
|
63
|
+
# patterns << /url\(#{m(:w)}#{m(:url)}#{m(:w)}\)/
|
64
|
+
#end
|
65
|
+
|
66
|
+
token(:FUNCTION, /#{m(:ident)}\(/)
|
67
|
+
token(:IDENT, /#{m(:ident)}/)
|
68
|
+
|
69
|
+
token(:CDO, /<!--/)
|
70
|
+
token(:CDC, /-->/)
|
71
|
+
token(:INCLUDES, /~=/)
|
72
|
+
token(:DASHMATCH, /\|=/)
|
73
|
+
#token(:STRING, /#{m(:string)}/)
|
74
|
+
token(:INVALID, /#{m(:invalid)}/)
|
75
|
+
token(:HASH, /##{m(:name)}/)
|
76
|
+
token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
|
77
|
+
token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
|
78
|
+
token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
|
79
|
+
token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
|
80
|
+
token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
|
81
|
+
token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
|
82
|
+
token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)
|
83
|
+
|
84
|
+
token :LENGTH do |patterns|
|
85
|
+
patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
|
86
|
+
patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
|
87
|
+
patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
|
88
|
+
patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
|
89
|
+
patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
|
90
|
+
patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
|
91
|
+
end
|
92
|
+
|
93
|
+
token :ANGLE do |patterns|
|
94
|
+
patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
|
95
|
+
patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
|
96
|
+
patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
|
97
|
+
end
|
98
|
+
|
99
|
+
token :TIME do |patterns|
|
100
|
+
patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
|
101
|
+
patterns << /#{m(:num)}#{m(:S)}/
|
102
|
+
end
|
103
|
+
|
104
|
+
token :FREQ do |patterns|
|
105
|
+
patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
|
106
|
+
patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
|
107
|
+
end
|
108
|
+
|
109
|
+
token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
|
110
|
+
token(:PERCENTAGE, /#{m(:num)}%/)
|
111
|
+
token(:NUMBER, /#{m(:num)}/)
|
112
|
+
|
113
|
+
|
114
|
+
yield self if block_given?
|
115
|
+
end
|
116
|
+
|
117
|
+
def tokenize(input_data)
|
118
|
+
tokens = []
|
119
|
+
pos = 0
|
120
|
+
|
121
|
+
comment_pattern = /\/\*.*?\*\//m
|
122
|
+
comments = input_data.scan(comment_pattern)
|
123
|
+
non_comments = input_data.split(comment_pattern)
|
124
|
+
|
125
|
+
# Handle a small edge case, if our CSS is *only* comments,
|
126
|
+
# the split, zip, scan trick won't work
|
127
|
+
if non_comments.length == 0
|
128
|
+
tokens = comments.map { |x| Token.new(:COMMENT, x, nil) }
|
129
|
+
else
|
130
|
+
non_comments.zip(comments).each do |non_comment, comment|
|
131
|
+
non_comment.split(/url\([^\)]*\)/m).zip(
|
132
|
+
non_comment.scan(/url\([^\)]*\)/m)
|
133
|
+
).each do |non_url, url|
|
134
|
+
non_url.split(/"[^"]*"|'[^']*'/m).zip(
|
135
|
+
non_url.scan(/"[^"]*"|'[^']*'/m)
|
136
|
+
).each do |non_string, quoted_string|
|
137
|
+
if non_string.length > 0 && non_string =~ /\A\s*\Z/m
|
138
|
+
tokens << Token.new(:S, non_string, nil)
|
139
|
+
else
|
140
|
+
non_string.split(/[ \t\r\n\f]*(?![{}+>]*)/m).zip(
|
141
|
+
non_string.scan(/[ \t\r\n\f]*(?![{}+>]*)/m)
|
142
|
+
).each do |string, whitespace|
|
143
|
+
until string.empty?
|
144
|
+
token = nil
|
145
|
+
@lexemes.each do |lexeme|
|
146
|
+
match = lexeme.pattern.match(string)
|
147
|
+
if match
|
148
|
+
token = Token.new(lexeme.name, match.to_s, pos)
|
149
|
+
break
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
token ||= DelimiterToken.new(/^./.match(string).to_s, pos)
|
154
|
+
|
155
|
+
tokens << token
|
156
|
+
string = string.slice(Range.new(token.value.length, -1))
|
157
|
+
pos += token.value.length
|
158
|
+
end
|
159
|
+
tokens << Token.new(:S, whitespace, nil) if whitespace
|
160
|
+
end
|
161
|
+
end
|
162
|
+
tokens << Token.new(:STRING, quoted_string, nil) if quoted_string
|
163
|
+
end
|
164
|
+
tokens << Token.new(:URI, url, nil) if url
|
165
|
+
end
|
166
|
+
tokens << Token.new(:COMMENT, comment, nil) if comment
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
tokens
|
171
|
+
end
|
172
|
+
|
173
|
+
private
|
174
|
+
|
175
|
+
def token(name, pattern=nil, &block)
|
176
|
+
@lexemes << Lexeme.new(name, pattern, &block)
|
177
|
+
end
|
178
|
+
|
179
|
+
def macro(name, regex=nil)
|
180
|
+
regex ? @macros[name] = regex : @macros[name].source
|
181
|
+
end
|
182
|
+
|
183
|
+
alias :m :macro
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "antisamy/csspool/rsac/sac/conditions"
|
2
|
+
require "antisamy/csspool/rsac/sac/selectors"
|
3
|
+
require "antisamy/csspool/rsac/sac/parser"
|
4
|
+
require "antisamy/csspool/rsac/stylesheet"
|
5
|
+
|
6
|
+
module RSAC
|
7
|
+
class << self
|
8
|
+
def parse(text)
|
9
|
+
parser = CSS::SAC::Parser.new
|
10
|
+
parser.parse(text)
|
11
|
+
parser
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'set'
|
2
|
+
module RSAC
|
3
|
+
class StyleSheet
|
4
|
+
class Rule
|
5
|
+
include Comparable
|
6
|
+
|
7
|
+
attr_accessor :selector, :properties, :index
|
8
|
+
def initialize(selector, index, properties = [])
|
9
|
+
@selector = selector
|
10
|
+
@properties = Set.new(properties)
|
11
|
+
@index = index
|
12
|
+
end
|
13
|
+
|
14
|
+
def <=>(other)
|
15
|
+
comp = selector.specificity <=> other.selector.specificity
|
16
|
+
comp == 0 ? index <=> other.index : comp
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module RSAC
|
2
|
+
class StyleSheet < RSAC::DocumentHandler
|
3
|
+
attr_reader :rules
|
4
|
+
|
5
|
+
def initialize(sac)
|
6
|
+
@sac = sac
|
7
|
+
@rules = []
|
8
|
+
@current_rules = []
|
9
|
+
@selector_index = 0
|
10
|
+
end
|
11
|
+
|
12
|
+
def start_selector(selectors)
|
13
|
+
selectors.each { |selector|
|
14
|
+
@current_rules << Rule.new(selector, @selector_index)
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def end_selector(selectors)
|
19
|
+
@rules += @current_rules
|
20
|
+
@current_rules = []
|
21
|
+
@selector_index += 1
|
22
|
+
reduce!
|
23
|
+
end
|
24
|
+
|
25
|
+
def find_rule(rule)
|
26
|
+
rule = self.create_rule(rule) if rule.is_a?(String)
|
27
|
+
rules.find { |x| x.selector == rule.selector }
|
28
|
+
end
|
29
|
+
alias :[] :find_rule
|
30
|
+
|
31
|
+
def create_rule(rule)
|
32
|
+
Rule.new(@sac.parse_rule(rule).first, @selector_index += 1)
|
33
|
+
end
|
34
|
+
|
35
|
+
def property(name, value, important)
|
36
|
+
@current_rules.each { |selector|
|
37
|
+
selector.properties << [name, value, important]
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get a hash of rules by property
|
42
|
+
def rules_by_property
|
43
|
+
rules_by_property = Hash.new { |h,k| h[k] = [] }
|
44
|
+
@rules.each { |sel|
|
45
|
+
props = sel.properties.to_a.sort_by { |x| x.hash } # HACK?
|
46
|
+
rules_by_property[props] << sel
|
47
|
+
}
|
48
|
+
rules_by_property
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_css
|
52
|
+
rules_by_property.map do |properties, rules|
|
53
|
+
rules.map { |rule| rule.selector.to_css }.sort.join(', ') + " {\n" +
|
54
|
+
properties.map { |key,value,important|
|
55
|
+
# Super annoying. If the property is font-family, its supposed to
|
56
|
+
# be commas
|
57
|
+
join_val = ('font-family' == key) ? ', ' : ' '
|
58
|
+
values = [value].flatten.join(join_val)
|
59
|
+
"#{key}:#{values}#{important ? ' !important' : ''};"
|
60
|
+
}.join("\n") + "\n}"
|
61
|
+
end.sort.join("\n")
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
# Remove duplicate rules
|
66
|
+
def reduce!
|
67
|
+
unique_rules = {}
|
68
|
+
@rules.each do |rule|
|
69
|
+
(unique_rules[rule.selector] ||= rule).properties += rule.properties
|
70
|
+
end
|
71
|
+
@rules = unique_rules.values
|
72
|
+
self
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'antisamy/csspool/rsac/sac'
|
@@ -14,6 +14,7 @@ module AntiSamy
|
|
14
14
|
|
15
15
|
# HTML entity encode some text
|
16
16
|
def encode_text(text)
|
17
|
+
return "" if text.nil?
|
17
18
|
@document.encode_special_chars(text)
|
18
19
|
end
|
19
20
|
|
@@ -40,6 +41,9 @@ module AntiSamy
|
|
40
41
|
|
41
42
|
# start an element
|
42
43
|
def start_element(name,attributes)
|
44
|
+
if name.eql?("head") or name.eql?("body") or name.eql?("html")
|
45
|
+
return
|
46
|
+
end
|
43
47
|
elem = Nokogiri::XML::Element.new(name, @document)
|
44
48
|
attributes.each do |attrib_pair|
|
45
49
|
elem[attrib_pair.first] = attrib_pair.last
|
@@ -39,16 +39,14 @@ module AntiSamy
|
|
39
39
|
@stack = Stack.new
|
40
40
|
@css_content = nil
|
41
41
|
@css_attributes = nil
|
42
|
-
@css_scanner =
|
42
|
+
@css_scanner = CssScanner.new(policy)
|
43
43
|
@param_tag = param_tag
|
44
44
|
end
|
45
45
|
|
46
46
|
def error(text)
|
47
|
-
#puts "SAX Error #{text}"
|
48
47
|
end
|
49
48
|
|
50
49
|
def warning(text)
|
51
|
-
puts "SAX Warning #{text}"
|
52
50
|
end
|
53
51
|
|
54
52
|
# Always create a HTML document unless the DECL was set beforehand
|
@@ -122,13 +120,24 @@ module AntiSamy
|
|
122
120
|
@handler.characters(tmp)
|
123
121
|
@stack.push(:filter)
|
124
122
|
elsif tag.nil?
|
125
|
-
|
126
|
-
|
123
|
+
# We ignore missing HTML and BODY tags since we are fragment parsing, but the
|
124
|
+
# Nokogiri HTML::SAX parser injects HTML/BODY if they are missing
|
125
|
+
unless name.eql?("html") or name.eql?("body")
|
126
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
|
127
|
+
end
|
128
|
+
# Nokogiri work around for a style tag being auto inserted inot head
|
129
|
+
if name.eql?("head")
|
130
|
+
@stack.push(:remove)
|
131
|
+
else
|
132
|
+
@stack.push(:filter)
|
133
|
+
end
|
127
134
|
elsif tag.action.eql?(Policy::ACTION_FILTER)
|
128
135
|
@handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
|
129
136
|
@stack.push(:filter)
|
130
137
|
elsif tag.action.eql?(Policy::ACTION_VALIDATE)
|
131
138
|
# Handle validation
|
139
|
+
remove_tag = false
|
140
|
+
filter_tag = false
|
132
141
|
is_style = name.include?("style")
|
133
142
|
if is_style
|
134
143
|
@stack.push(:css)
|
@@ -136,8 +145,6 @@ module AntiSamy
|
|
136
145
|
@css_attributes = []
|
137
146
|
else
|
138
147
|
# Validate attributes
|
139
|
-
remove_tag = false
|
140
|
-
filter_tag = false
|
141
148
|
attributes.each do |pair|
|
142
149
|
a_name = pair.first
|
143
150
|
a_value = pair.last
|
@@ -148,14 +155,16 @@ module AntiSamy
|
|
148
155
|
# check if the attribute is a style
|
149
156
|
if a_name.eql?("style")
|
150
157
|
# Handle Style tags
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
158
|
+
begin
|
159
|
+
results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
|
160
|
+
unless result.clean_html.empty?
|
161
|
+
valid_attributes << [a_name,results.clean_html]
|
162
|
+
end
|
163
|
+
@handler.errors << results.messages
|
164
|
+
@handler.errors.flatten!
|
165
|
+
rescue Exception => e
|
166
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(a_value))
|
167
|
+
end
|
159
168
|
elsif !attrib.nil? # Attribute is not nil lets check it
|
160
169
|
valid = false
|
161
170
|
attrib.values.each do |av|
|
@@ -167,7 +176,8 @@ module AntiSamy
|
|
167
176
|
end
|
168
177
|
unless valid
|
169
178
|
attrib.expressions.each do |ae|
|
170
|
-
|
179
|
+
mc = ae.match(a_value)
|
180
|
+
if mc and mc.size == a_value.size
|
171
181
|
valid_attributes << [a_name,a_value]
|
172
182
|
valid = true
|
173
183
|
break
|
@@ -198,7 +208,7 @@ module AntiSamy
|
|
198
208
|
elsif filter_tag
|
199
209
|
@stack.push(:filter)
|
200
210
|
else
|
201
|
-
if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW)
|
211
|
+
if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW)
|
202
212
|
valid_attributes << ["rel","nofollow"]
|
203
213
|
end
|
204
214
|
if masquerade
|
@@ -206,7 +216,7 @@ module AntiSamy
|
|
206
216
|
valid_attributes << ["name",embed_name]
|
207
217
|
valid_attributes << ["value",embed_value]
|
208
218
|
end
|
209
|
-
@stack.push(:keep)
|
219
|
+
@stack.push(:keep) unless @stack.peek?(:css)
|
210
220
|
end
|
211
221
|
# End validation action
|
212
222
|
elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
|
@@ -234,7 +244,7 @@ module AntiSamy
|
|
234
244
|
# Add character data to the current tag
|
235
245
|
def characters(text)
|
236
246
|
unless text =~ /\S/ # skip whitespace
|
237
|
-
return unless @policy.directive(Policy::PRESERVE_SPACE)
|
247
|
+
return unless @policy.directive(Policy::PRESERVE_SPACE)
|
238
248
|
end
|
239
249
|
if @stack.peek?(:css)
|
240
250
|
@css_content << text
|
@@ -252,20 +262,26 @@ module AntiSamy
|
|
252
262
|
elsif @stack.peek?(:css)
|
253
263
|
@stack.pop
|
254
264
|
# Do css stuff here
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
265
|
+
begin
|
266
|
+
results = @css_scanner.scan_sheet(@css_content,@policy.max_input)
|
267
|
+
@handler.errors << results.messages
|
268
|
+
@handler.errors.flatten!
|
269
|
+
unless results.clean_html.nil? or results.clean_html.empty?
|
270
|
+
@handler.start_element(name,@css_attributes)
|
271
|
+
@handler.characters results.clean_html
|
272
|
+
@handler.end_element(name)
|
273
|
+
else
|
274
|
+
@handler.start_element(name,@css_attributes)
|
275
|
+
@handler.characters "/* */"
|
276
|
+
@handler.end_element(name)
|
277
|
+
end
|
278
|
+
rescue Exception => e
|
279
|
+
puts e
|
280
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
|
281
|
+
ensure
|
282
|
+
@css_content = nil
|
283
|
+
@css_attributes = nil
|
284
|
+
end
|
269
285
|
else
|
270
286
|
@stack.pop
|
271
287
|
@handler.end_element(name)
|
@@ -1,46 +1,4 @@
|
|
1
1
|
module AntiSamy
|
2
|
-
|
3
|
-
class ScanError < StandardError; end
|
4
|
-
|
5
|
-
# Scan message, it will contain a message key, tag and optionally content, value
|
6
|
-
class ScanMessage
|
7
|
-
# error.tag.notfound
|
8
|
-
ERROR_TAG_NOT_IN_POLICY = "error.tag.notfound"
|
9
|
-
# error.tag.removed
|
10
|
-
ERROR_TAG_DISALLOWED = "error.tag.removed"
|
11
|
-
# error.tag.filtered
|
12
|
-
ERROR_TAG_FILTERED = "error.tag.filtered"
|
13
|
-
# error.tag.encoded
|
14
|
-
ERROR_TAG_ENCODED = "error.tag.encoded"
|
15
|
-
# error.css.tag.malformed
|
16
|
-
ERROR_CSS_TAG_MALFORMED = "error.css.tag.malformed"
|
17
|
-
# error.css.attribute.malformed
|
18
|
-
ERROR_CSS_ATTRIBUTE_MALFORMED = "error.css.attribute.malformed"
|
19
|
-
# error.attribute.invalid.filtered
|
20
|
-
ERROR_ATTRIBUTE_CAUSE_FILTER = "error.attribute.invalid.filtered"
|
21
|
-
# error.attribute.invalid.encoded
|
22
|
-
ERROR_ATTRIBUTE_CAUSE_ENCODE = "error.attribute.invalid.encoded"
|
23
|
-
# error.attribute.invalid.filtered
|
24
|
-
ERROR_ATTRIBUTE_INVALID_FILTERED = "error.attribute.invalid.filtered"
|
25
|
-
# error.attribute.invalid.removed
|
26
|
-
ERROR_ATTRIBUTE_INVALID_REMOVED = "error.attribute.invalid.removed"
|
27
|
-
# error.attribute.notfound
|
28
|
-
ERROR_ATTRIBUTE_NOT_IN_POLICY = "error.attribute.notfound"
|
29
|
-
# error.attribute.invalid
|
30
|
-
ERROR_ATTRIBUTE_INVALID = "error.attribute.invalid"
|
31
|
-
|
32
|
-
attr_reader :tag, :content, :value, :msgkey
|
33
|
-
def initialize(msgkey, tag, content=nil,value=nil)
|
34
|
-
@msgkey = msgkey
|
35
|
-
@tag = tag
|
36
|
-
@content = content
|
37
|
-
@value = value
|
38
|
-
end
|
39
|
-
def to_s
|
40
|
-
"#{self.msgkey} #{@tag} #{@content} #{@value}"
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
2
|
class Scanner
|
45
3
|
attr_accessor :policy, :errors, :nofollow, :pae
|
46
4
|
DEFAULT_ENCODE = "UTF-8"
|
@@ -67,7 +25,7 @@ module AntiSamy
|
|
67
25
|
# will raise an error if nil input or the maximum input size is exceeded
|
68
26
|
def scan(input, input_encode, output_encoder)
|
69
27
|
raise ArgumentError if input.nil?
|
70
|
-
raise ScanError, "Max input Exceeded" if input.size > @policy.max_input
|
28
|
+
raise ScanError, "Max input Exceeded #{input.size} > #{@policy.max_input}" if input.size > @policy.max_input
|
71
29
|
# check poilcy stuff
|
72
30
|
handler = Handler.new(@policy,output_encoder)
|
73
31
|
scanner = SaxFilter.new(@policy,handler,@@basic_param_tag_rule)
|
data/lib/antisamy/policy.rb
CHANGED
@@ -29,13 +29,16 @@ module AntiSamy
|
|
29
29
|
MAX_INPUT = "maxInputSize"
|
30
30
|
USE_XHTML = "userXHTML"
|
31
31
|
FORMAT_OUTPUT = "formatOutput"
|
32
|
+
# will we allow embedded style sheets
|
32
33
|
EMBED_STYLESHEETS = "embedStyleSheets"
|
34
|
+
# Connection timeout in miliseconds
|
33
35
|
CONN_TIMEOUT = "conenctionTimeout"
|
34
36
|
ANCHROS_NOFOLLOW = "nofollowAnchors"
|
35
37
|
VALIDATE_P_AS_E = "validateParamAsEmbed"
|
36
38
|
PRESERVE_SPACE = "preserveSpace"
|
37
39
|
PRESERVE_COMMENTS = "preserveComments"
|
38
40
|
ON_UNKNOWN_TAG = "onUnknownTag"
|
41
|
+
MAX_SHEETS = "maxStyleSheetImports"
|
39
42
|
|
40
43
|
# Class method to fetch the schema
|
41
44
|
def self.schema
|
@@ -192,15 +195,17 @@ module AntiSamy
|
|
192
195
|
section.element_children.each do |dir|
|
193
196
|
name = dir["name"]
|
194
197
|
value = dir["value"]
|
195
|
-
|
196
|
-
if name.eql?("maxInputSize")
|
198
|
+
if name.eql?("maxInputSize")
|
197
199
|
@max_input = value.to_i
|
198
200
|
else
|
199
|
-
if
|
201
|
+
if name.eql?("connectionTimeout") or name.eql?("maxStyleSheetImports")
|
202
|
+
value = value.to_i
|
203
|
+
elsif value =~ /true/i
|
200
204
|
value = true
|
201
205
|
else
|
202
206
|
value = false
|
203
207
|
end
|
208
|
+
@directives[name] = value
|
204
209
|
end
|
205
210
|
end
|
206
211
|
end
|
@@ -1,4 +1,72 @@
|
|
1
1
|
module AntiSamy
|
2
|
+
class ScanError < StandardError; end
|
3
|
+
# Scan message, it will contain a message key, tag and optionally content, value
|
4
|
+
class ScanMessage
|
5
|
+
# error.tag.notfound
|
6
|
+
ERROR_TAG_NOT_IN_POLICY = "error.tag.notfound"
|
7
|
+
# error.tag.removed
|
8
|
+
ERROR_TAG_DISALLOWED = "error.tag.removed"
|
9
|
+
# error.tag.filtered
|
10
|
+
ERROR_TAG_FILTERED = "error.tag.filtered"
|
11
|
+
# error.tag.encoded
|
12
|
+
ERROR_TAG_ENCODED = "error.tag.encoded"
|
13
|
+
# error.css.tag.malformed
|
14
|
+
ERROR_CSS_TAG_MALFORMED = "error.css.tag.malformed"
|
15
|
+
# error.css.attribute.malformed
|
16
|
+
ERROR_CSS_ATTRIBUTE_MALFORMED = "error.css.attribute.malformed"
|
17
|
+
# error.attribute.invalid.filtered
|
18
|
+
ERROR_ATTRIBUTE_CAUSE_FILTER = "error.attribute.invalid.filtered"
|
19
|
+
# error.attribute.invalid.encoded
|
20
|
+
ERROR_ATTRIBUTE_CAUSE_ENCODE = "error.attribute.invalid.encoded"
|
21
|
+
# error.attribute.invalid.filtered
|
22
|
+
ERROR_ATTRIBUTE_INVALID_FILTERED = "error.attribute.invalid.filtered"
|
23
|
+
# error.attribute.invalid.removed
|
24
|
+
ERROR_ATTRIBUTE_INVALID_REMOVED = "error.attribute.invalid.removed"
|
25
|
+
# error.attribute.notfound
|
26
|
+
ERROR_ATTRIBUTE_NOT_IN_POLICY = "error.attribute.notfound"
|
27
|
+
# error.attribute.invalid
|
28
|
+
ERROR_ATTRIBUTE_INVALID = "error.attribute.invalid"
|
29
|
+
# comment removed
|
30
|
+
ERROR_COMMENT_REMOVED = "error.comment.removed"
|
31
|
+
# tag rule not found
|
32
|
+
ERROR_CSS_TAG_RULE_NOTFOUND = "error.css.tag.notfound"
|
33
|
+
# style sheet nto found
|
34
|
+
ERROR_STYLESHEET_RULE_NOTFOUND = "error.stylesheet.notfound"
|
35
|
+
# embedded stylesheets disabled
|
36
|
+
ERROR_CSS_IMPORT_DISABLED = "error.css.import.disabled"
|
37
|
+
# bad uri
|
38
|
+
ERROR_CSS_IMPORT_URL_INVALID = "error.css.import.uri.invalid"
|
39
|
+
# disallowed selector
|
40
|
+
ERROR_CSS_TAG_SELECTOR_DISALLOWED = "error.css.tag.removed"
|
41
|
+
# invalid for style sheet
|
42
|
+
ERROR_STYLESHEET_SELECTOR_DISALLOWED = "error.style.tag.notallowed"
|
43
|
+
# invlaid css tag property
|
44
|
+
ERROR_CSS_TAG_PROPERTY_INVALID = "error.css.property.invalid"
|
45
|
+
# invid style sheet roperty tag
|
46
|
+
ERROR_STYLESHEET_PROPERTY_INVALID = "error.stylesheet.css.property.invalid"
|
47
|
+
# exceed alloted imports
|
48
|
+
ERROR_CSS_IMPORT_EXCEEDED = "error.import.exceeded.sheets"
|
49
|
+
# exceede size
|
50
|
+
ERROR_CSS_IMPORT_INPUT_SIZE = "error.import.exceeded.size"
|
51
|
+
# Failed to import
|
52
|
+
ERROR_CSS_IMPORT_FAILURE = "error.import.bad.uri"
|
53
|
+
# selector not found
|
54
|
+
ERROR_STYLESHEET_SELECTOR_NOTFOUND = "error.css.stylesheet.selector.notfound"
|
55
|
+
# selector in css not fond
|
56
|
+
ERROR_CSS_TAG_SELECTOR_NOTFOUND = "error.css.tag.selector.notfound"
|
57
|
+
|
58
|
+
attr_reader :tag, :content, :value, :msgkey
|
59
|
+
def initialize(msgkey, tag, content=nil,value=nil)
|
60
|
+
@msgkey = msgkey
|
61
|
+
@tag = tag
|
62
|
+
@content = content
|
63
|
+
@value = value
|
64
|
+
end
|
65
|
+
def to_s
|
66
|
+
"#{self.msgkey} #{@tag} #{@content} #{@value}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
2
70
|
# Container of scan results, provides a list of ScanMessage indicating
|
3
71
|
# why elements were removed from the resulting html
|
4
72
|
class ScanResults
|
data/lib/antisamy.rb
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
+
require 'antisamy/csspool/rsac'
|
2
3
|
require 'antisamy/model/attribute'
|
3
4
|
require 'antisamy/model/tag'
|
4
5
|
require 'antisamy/model/css_property'
|
5
6
|
require 'antisamy/policy'
|
6
7
|
require 'antisamy/scan_results'
|
7
8
|
require 'antisamy/html/handler'
|
9
|
+
require 'antisamy/css/css_validator'
|
10
|
+
require 'antisamy/css/css_filter'
|
11
|
+
require 'antisamy/css/css_scanner'
|
8
12
|
require 'antisamy/html/sax_filter'
|
9
13
|
require 'antisamy/html/scanner'
|
10
14
|
|