antisamy 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +6 -1
- data/lib/antisamy/css/css_filter.rb +187 -0
- data/lib/antisamy/css/css_scanner.rb +84 -0
- data/lib/antisamy/css/css_validator.rb +129 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -0
- data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -0
- data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -0
- data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -0
- data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -0
- data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -0
- data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -0
- data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -0
- data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -0
- data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -0
- data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -0
- data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -0
- data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -0
- data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -0
- data/lib/antisamy/csspool/rsac/sac/token.rb +25 -0
- data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -0
- data/lib/antisamy/csspool/rsac/sac.rb +14 -0
- data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -0
- data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -0
- data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -0
- data/lib/antisamy/csspool/rsac.rb +1 -0
- data/lib/antisamy/html/handler.rb +4 -0
- data/lib/antisamy/html/sax_filter.rb +49 -33
- data/lib/antisamy/html/scanner.rb +1 -43
- data/lib/antisamy/policy.rb +8 -3
- data/lib/antisamy/scan_results.rb +68 -0
- data/lib/antisamy.rb +4 -0
- data/spec/antisamy_spec.rb +111 -3
- metadata +39 -3
@@ -0,0 +1,185 @@
|
|
1
|
+
require "antisamy/csspool/rsac/sac/lexeme"
|
2
|
+
require "antisamy/csspool/rsac/sac/token"
|
3
|
+
|
4
|
+
module RSAC
|
5
|
+
class Tokenizer
|
6
|
+
def initialize(&block)
|
7
|
+
@lexemes = []
|
8
|
+
@macros = {}
|
9
|
+
|
10
|
+
# http://www.w3.org/TR/CSS21/syndata.html
|
11
|
+
macro(:h, /([0-9a-f])/ )
|
12
|
+
macro(:nonascii, /([\200-\377])/ )
|
13
|
+
macro(:nl, /(\n|\r\n|\r|\f)/ )
|
14
|
+
macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
|
15
|
+
macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
|
16
|
+
macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
|
17
|
+
macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
|
18
|
+
macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
|
19
|
+
macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
|
20
|
+
macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
|
21
|
+
macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
|
22
|
+
macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
|
23
|
+
macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
|
24
|
+
macro(:name, /(#{m(:nmchar)}+)/ )
|
25
|
+
macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
|
26
|
+
macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
|
27
|
+
macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
|
28
|
+
macro(:url, /(([!#\$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*)/ )
|
29
|
+
macro(:s, /([ \t\r\n\f]+)/ )
|
30
|
+
macro(:w, /(#{m(:s)}?)/ )
|
31
|
+
macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
|
32
|
+
macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
|
33
|
+
macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
|
34
|
+
macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
|
35
|
+
macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
|
36
|
+
macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
|
37
|
+
macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
|
38
|
+
macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
|
39
|
+
macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
|
40
|
+
macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
|
41
|
+
macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
|
42
|
+
macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
|
43
|
+
macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
|
44
|
+
macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
|
45
|
+
macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
|
46
|
+
macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
|
47
|
+
macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )
|
48
|
+
|
49
|
+
#token :COMMENT do |patterns|
|
50
|
+
# patterns << /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
|
51
|
+
# patterns << /#{m(:s)}+\/\*[^*]*\*+([^\/*][^*]*\*+)*\//
|
52
|
+
#end
|
53
|
+
|
54
|
+
token(:LBRACE, /#{m(:w)}\{/)
|
55
|
+
token(:PLUS, /#{m(:w)}\+/)
|
56
|
+
token(:GREATER, /#{m(:w)}>/)
|
57
|
+
token(:COMMA, /#{m(:w)},/)
|
58
|
+
|
59
|
+
token(:S, /#{m(:s)}/)
|
60
|
+
|
61
|
+
#token :URI do |patterns|
|
62
|
+
# patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
|
63
|
+
# patterns << /url\(#{m(:w)}#{m(:url)}#{m(:w)}\)/
|
64
|
+
#end
|
65
|
+
|
66
|
+
token(:FUNCTION, /#{m(:ident)}\(/)
|
67
|
+
token(:IDENT, /#{m(:ident)}/)
|
68
|
+
|
69
|
+
token(:CDO, /<!--/)
|
70
|
+
token(:CDC, /-->/)
|
71
|
+
token(:INCLUDES, /~=/)
|
72
|
+
token(:DASHMATCH, /\|=/)
|
73
|
+
#token(:STRING, /#{m(:string)}/)
|
74
|
+
token(:INVALID, /#{m(:invalid)}/)
|
75
|
+
token(:HASH, /##{m(:name)}/)
|
76
|
+
token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
|
77
|
+
token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
|
78
|
+
token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
|
79
|
+
token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
|
80
|
+
token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
|
81
|
+
token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
|
82
|
+
token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)
|
83
|
+
|
84
|
+
token :LENGTH do |patterns|
|
85
|
+
patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
|
86
|
+
patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
|
87
|
+
patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
|
88
|
+
patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
|
89
|
+
patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
|
90
|
+
patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
|
91
|
+
end
|
92
|
+
|
93
|
+
token :ANGLE do |patterns|
|
94
|
+
patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
|
95
|
+
patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
|
96
|
+
patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
|
97
|
+
end
|
98
|
+
|
99
|
+
token :TIME do |patterns|
|
100
|
+
patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
|
101
|
+
patterns << /#{m(:num)}#{m(:S)}/
|
102
|
+
end
|
103
|
+
|
104
|
+
token :FREQ do |patterns|
|
105
|
+
patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
|
106
|
+
patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
|
107
|
+
end
|
108
|
+
|
109
|
+
token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
|
110
|
+
token(:PERCENTAGE, /#{m(:num)}%/)
|
111
|
+
token(:NUMBER, /#{m(:num)}/)
|
112
|
+
|
113
|
+
|
114
|
+
yield self if block_given?
|
115
|
+
end
|
116
|
+
|
117
|
+
def tokenize(input_data)
|
118
|
+
tokens = []
|
119
|
+
pos = 0
|
120
|
+
|
121
|
+
comment_pattern = /\/\*.*?\*\//m
|
122
|
+
comments = input_data.scan(comment_pattern)
|
123
|
+
non_comments = input_data.split(comment_pattern)
|
124
|
+
|
125
|
+
# Handle a small edge case, if our CSS is *only* comments,
|
126
|
+
# the split, zip, scan trick won't work
|
127
|
+
if non_comments.length == 0
|
128
|
+
tokens = comments.map { |x| Token.new(:COMMENT, x, nil) }
|
129
|
+
else
|
130
|
+
non_comments.zip(comments).each do |non_comment, comment|
|
131
|
+
non_comment.split(/url\([^\)]*\)/m).zip(
|
132
|
+
non_comment.scan(/url\([^\)]*\)/m)
|
133
|
+
).each do |non_url, url|
|
134
|
+
non_url.split(/"[^"]*"|'[^']*'/m).zip(
|
135
|
+
non_url.scan(/"[^"]*"|'[^']*'/m)
|
136
|
+
).each do |non_string, quoted_string|
|
137
|
+
if non_string.length > 0 && non_string =~ /\A\s*\Z/m
|
138
|
+
tokens << Token.new(:S, non_string, nil)
|
139
|
+
else
|
140
|
+
non_string.split(/[ \t\r\n\f]*(?![{}+>]*)/m).zip(
|
141
|
+
non_string.scan(/[ \t\r\n\f]*(?![{}+>]*)/m)
|
142
|
+
).each do |string, whitespace|
|
143
|
+
until string.empty?
|
144
|
+
token = nil
|
145
|
+
@lexemes.each do |lexeme|
|
146
|
+
match = lexeme.pattern.match(string)
|
147
|
+
if match
|
148
|
+
token = Token.new(lexeme.name, match.to_s, pos)
|
149
|
+
break
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
token ||= DelimiterToken.new(/^./.match(string).to_s, pos)
|
154
|
+
|
155
|
+
tokens << token
|
156
|
+
string = string.slice(Range.new(token.value.length, -1))
|
157
|
+
pos += token.value.length
|
158
|
+
end
|
159
|
+
tokens << Token.new(:S, whitespace, nil) if whitespace
|
160
|
+
end
|
161
|
+
end
|
162
|
+
tokens << Token.new(:STRING, quoted_string, nil) if quoted_string
|
163
|
+
end
|
164
|
+
tokens << Token.new(:URI, url, nil) if url
|
165
|
+
end
|
166
|
+
tokens << Token.new(:COMMENT, comment, nil) if comment
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
tokens
|
171
|
+
end
|
172
|
+
|
173
|
+
private
|
174
|
+
|
175
|
+
def token(name, pattern=nil, &block)
|
176
|
+
@lexemes << Lexeme.new(name, pattern, &block)
|
177
|
+
end
|
178
|
+
|
179
|
+
def macro(name, regex=nil)
|
180
|
+
regex ? @macros[name] = regex : @macros[name].source
|
181
|
+
end
|
182
|
+
|
183
|
+
alias :m :macro
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "antisamy/csspool/rsac/sac/conditions"
|
2
|
+
require "antisamy/csspool/rsac/sac/selectors"
|
3
|
+
require "antisamy/csspool/rsac/sac/parser"
|
4
|
+
require "antisamy/csspool/rsac/stylesheet"
|
5
|
+
|
6
|
+
module RSAC
|
7
|
+
class << self
|
8
|
+
def parse(text)
|
9
|
+
parser = CSS::SAC::Parser.new
|
10
|
+
parser.parse(text)
|
11
|
+
parser
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'set'
|
2
|
+
module RSAC
|
3
|
+
class StyleSheet
|
4
|
+
class Rule
|
5
|
+
include Comparable
|
6
|
+
|
7
|
+
attr_accessor :selector, :properties, :index
|
8
|
+
def initialize(selector, index, properties = [])
|
9
|
+
@selector = selector
|
10
|
+
@properties = Set.new(properties)
|
11
|
+
@index = index
|
12
|
+
end
|
13
|
+
|
14
|
+
def <=>(other)
|
15
|
+
comp = selector.specificity <=> other.selector.specificity
|
16
|
+
comp == 0 ? index <=> other.index : comp
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module RSAC
|
2
|
+
class StyleSheet < RSAC::DocumentHandler
|
3
|
+
attr_reader :rules
|
4
|
+
|
5
|
+
def initialize(sac)
|
6
|
+
@sac = sac
|
7
|
+
@rules = []
|
8
|
+
@current_rules = []
|
9
|
+
@selector_index = 0
|
10
|
+
end
|
11
|
+
|
12
|
+
def start_selector(selectors)
|
13
|
+
selectors.each { |selector|
|
14
|
+
@current_rules << Rule.new(selector, @selector_index)
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def end_selector(selectors)
|
19
|
+
@rules += @current_rules
|
20
|
+
@current_rules = []
|
21
|
+
@selector_index += 1
|
22
|
+
reduce!
|
23
|
+
end
|
24
|
+
|
25
|
+
def find_rule(rule)
|
26
|
+
rule = self.create_rule(rule) if rule.is_a?(String)
|
27
|
+
rules.find { |x| x.selector == rule.selector }
|
28
|
+
end
|
29
|
+
alias :[] :find_rule
|
30
|
+
|
31
|
+
def create_rule(rule)
|
32
|
+
Rule.new(@sac.parse_rule(rule).first, @selector_index += 1)
|
33
|
+
end
|
34
|
+
|
35
|
+
def property(name, value, important)
|
36
|
+
@current_rules.each { |selector|
|
37
|
+
selector.properties << [name, value, important]
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get a hash of rules by property
|
42
|
+
def rules_by_property
|
43
|
+
rules_by_property = Hash.new { |h,k| h[k] = [] }
|
44
|
+
@rules.each { |sel|
|
45
|
+
props = sel.properties.to_a.sort_by { |x| x.hash } # HACK?
|
46
|
+
rules_by_property[props] << sel
|
47
|
+
}
|
48
|
+
rules_by_property
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_css
|
52
|
+
rules_by_property.map do |properties, rules|
|
53
|
+
rules.map { |rule| rule.selector.to_css }.sort.join(', ') + " {\n" +
|
54
|
+
properties.map { |key,value,important|
|
55
|
+
# Super annoying. If the property is font-family, its supposed to
|
56
|
+
# be commas
|
57
|
+
join_val = ('font-family' == key) ? ', ' : ' '
|
58
|
+
values = [value].flatten.join(join_val)
|
59
|
+
"#{key}:#{values}#{important ? ' !important' : ''};"
|
60
|
+
}.join("\n") + "\n}"
|
61
|
+
end.sort.join("\n")
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
# Remove duplicate rules
|
66
|
+
def reduce!
|
67
|
+
unique_rules = {}
|
68
|
+
@rules.each do |rule|
|
69
|
+
(unique_rules[rule.selector] ||= rule).properties += rule.properties
|
70
|
+
end
|
71
|
+
@rules = unique_rules.values
|
72
|
+
self
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'antisamy/csspool/rsac/sac'
|
@@ -14,6 +14,7 @@ module AntiSamy
|
|
14
14
|
|
15
15
|
# HTML entity encode some text
|
16
16
|
def encode_text(text)
|
17
|
+
return "" if text.nil?
|
17
18
|
@document.encode_special_chars(text)
|
18
19
|
end
|
19
20
|
|
@@ -40,6 +41,9 @@ module AntiSamy
|
|
40
41
|
|
41
42
|
# start an element
|
42
43
|
def start_element(name,attributes)
|
44
|
+
if name.eql?("head") or name.eql?("body") or name.eql?("html")
|
45
|
+
return
|
46
|
+
end
|
43
47
|
elem = Nokogiri::XML::Element.new(name, @document)
|
44
48
|
attributes.each do |attrib_pair|
|
45
49
|
elem[attrib_pair.first] = attrib_pair.last
|
@@ -39,16 +39,14 @@ module AntiSamy
|
|
39
39
|
@stack = Stack.new
|
40
40
|
@css_content = nil
|
41
41
|
@css_attributes = nil
|
42
|
-
@css_scanner =
|
42
|
+
@css_scanner = CssScanner.new(policy)
|
43
43
|
@param_tag = param_tag
|
44
44
|
end
|
45
45
|
|
46
46
|
def error(text)
|
47
|
-
#puts "SAX Error #{text}"
|
48
47
|
end
|
49
48
|
|
50
49
|
def warning(text)
|
51
|
-
puts "SAX Warning #{text}"
|
52
50
|
end
|
53
51
|
|
54
52
|
# Always create a HTML document unless the DECL was set beforehand
|
@@ -122,13 +120,24 @@ module AntiSamy
|
|
122
120
|
@handler.characters(tmp)
|
123
121
|
@stack.push(:filter)
|
124
122
|
elsif tag.nil?
|
125
|
-
|
126
|
-
|
123
|
+
# We ignore missing HTML and BODY tags since we are fragment parsing, but the
|
124
|
+
# Nokogiri HTML::SAX parser injects HTML/BODY if they are missing
|
125
|
+
unless name.eql?("html") or name.eql?("body")
|
126
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_NOT_IN_POLICY,name)
|
127
|
+
end
|
128
|
+
# Nokogiri work around for a style tag being auto inserted inot head
|
129
|
+
if name.eql?("head")
|
130
|
+
@stack.push(:remove)
|
131
|
+
else
|
132
|
+
@stack.push(:filter)
|
133
|
+
end
|
127
134
|
elsif tag.action.eql?(Policy::ACTION_FILTER)
|
128
135
|
@handler.errors << ScanMessage.new(ScanMessage::ERROR_TAG_FILTERED,name)
|
129
136
|
@stack.push(:filter)
|
130
137
|
elsif tag.action.eql?(Policy::ACTION_VALIDATE)
|
131
138
|
# Handle validation
|
139
|
+
remove_tag = false
|
140
|
+
filter_tag = false
|
132
141
|
is_style = name.include?("style")
|
133
142
|
if is_style
|
134
143
|
@stack.push(:css)
|
@@ -136,8 +145,6 @@ module AntiSamy
|
|
136
145
|
@css_attributes = []
|
137
146
|
else
|
138
147
|
# Validate attributes
|
139
|
-
remove_tag = false
|
140
|
-
filter_tag = false
|
141
148
|
attributes.each do |pair|
|
142
149
|
a_name = pair.first
|
143
150
|
a_value = pair.last
|
@@ -148,14 +155,16 @@ module AntiSamy
|
|
148
155
|
# check if the attribute is a style
|
149
156
|
if a_name.eql?("style")
|
150
157
|
# Handle Style tags
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
158
|
+
begin
|
159
|
+
results = @css_scanner.scan_inline(a_value,name,@policy.max_input)
|
160
|
+
unless result.clean_html.empty?
|
161
|
+
valid_attributes << [a_name,results.clean_html]
|
162
|
+
end
|
163
|
+
@handler.errors << results.messages
|
164
|
+
@handler.errors.flatten!
|
165
|
+
rescue Exception => e
|
166
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_ATTRIBUTE_MALFORMED,name,@handler.encode_text(a_value))
|
167
|
+
end
|
159
168
|
elsif !attrib.nil? # Attribute is not nil lets check it
|
160
169
|
valid = false
|
161
170
|
attrib.values.each do |av|
|
@@ -167,7 +176,8 @@ module AntiSamy
|
|
167
176
|
end
|
168
177
|
unless valid
|
169
178
|
attrib.expressions.each do |ae|
|
170
|
-
|
179
|
+
mc = ae.match(a_value)
|
180
|
+
if mc and mc.size == a_value.size
|
171
181
|
valid_attributes << [a_name,a_value]
|
172
182
|
valid = true
|
173
183
|
break
|
@@ -198,7 +208,7 @@ module AntiSamy
|
|
198
208
|
elsif filter_tag
|
199
209
|
@stack.push(:filter)
|
200
210
|
else
|
201
|
-
if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW)
|
211
|
+
if name.eql?("a") and @policy.directive(Policy::ANCHROS_NOFOLLOW)
|
202
212
|
valid_attributes << ["rel","nofollow"]
|
203
213
|
end
|
204
214
|
if masquerade
|
@@ -206,7 +216,7 @@ module AntiSamy
|
|
206
216
|
valid_attributes << ["name",embed_name]
|
207
217
|
valid_attributes << ["value",embed_value]
|
208
218
|
end
|
209
|
-
@stack.push(:keep)
|
219
|
+
@stack.push(:keep) unless @stack.peek?(:css)
|
210
220
|
end
|
211
221
|
# End validation action
|
212
222
|
elsif tag.action.eql?(Policy::ACTION_TRUNCATE)
|
@@ -234,7 +244,7 @@ module AntiSamy
|
|
234
244
|
# Add character data to the current tag
|
235
245
|
def characters(text)
|
236
246
|
unless text =~ /\S/ # skip whitespace
|
237
|
-
return unless @policy.directive(Policy::PRESERVE_SPACE)
|
247
|
+
return unless @policy.directive(Policy::PRESERVE_SPACE)
|
238
248
|
end
|
239
249
|
if @stack.peek?(:css)
|
240
250
|
@css_content << text
|
@@ -252,20 +262,26 @@ module AntiSamy
|
|
252
262
|
elsif @stack.peek?(:css)
|
253
263
|
@stack.pop
|
254
264
|
# Do css stuff here
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
265
|
+
begin
|
266
|
+
results = @css_scanner.scan_sheet(@css_content,@policy.max_input)
|
267
|
+
@handler.errors << results.messages
|
268
|
+
@handler.errors.flatten!
|
269
|
+
unless results.clean_html.nil? or results.clean_html.empty?
|
270
|
+
@handler.start_element(name,@css_attributes)
|
271
|
+
@handler.characters results.clean_html
|
272
|
+
@handler.end_element(name)
|
273
|
+
else
|
274
|
+
@handler.start_element(name,@css_attributes)
|
275
|
+
@handler.characters "/* */"
|
276
|
+
@handler.end_element(name)
|
277
|
+
end
|
278
|
+
rescue Exception => e
|
279
|
+
puts e
|
280
|
+
@handler.errors << ScanMessage.new(ScanMessage::ERROR_CSS_TAG_MALFORMED,name,@handler.encode_text(@css_content))
|
281
|
+
ensure
|
282
|
+
@css_content = nil
|
283
|
+
@css_attributes = nil
|
284
|
+
end
|
269
285
|
else
|
270
286
|
@stack.pop
|
271
287
|
@handler.end_element(name)
|
@@ -1,46 +1,4 @@
|
|
1
1
|
module AntiSamy
|
2
|
-
|
3
|
-
class ScanError < StandardError; end
|
4
|
-
|
5
|
-
# Scan message, it will contain a message key, tag and optionally content, value
|
6
|
-
class ScanMessage
|
7
|
-
# error.tag.notfound
|
8
|
-
ERROR_TAG_NOT_IN_POLICY = "error.tag.notfound"
|
9
|
-
# error.tag.removed
|
10
|
-
ERROR_TAG_DISALLOWED = "error.tag.removed"
|
11
|
-
# error.tag.filtered
|
12
|
-
ERROR_TAG_FILTERED = "error.tag.filtered"
|
13
|
-
# error.tag.encoded
|
14
|
-
ERROR_TAG_ENCODED = "error.tag.encoded"
|
15
|
-
# error.css.tag.malformed
|
16
|
-
ERROR_CSS_TAG_MALFORMED = "error.css.tag.malformed"
|
17
|
-
# error.css.attribute.malformed
|
18
|
-
ERROR_CSS_ATTRIBUTE_MALFORMED = "error.css.attribute.malformed"
|
19
|
-
# error.attribute.invalid.filtered
|
20
|
-
ERROR_ATTRIBUTE_CAUSE_FILTER = "error.attribute.invalid.filtered"
|
21
|
-
# error.attribute.invalid.encoded
|
22
|
-
ERROR_ATTRIBUTE_CAUSE_ENCODE = "error.attribute.invalid.encoded"
|
23
|
-
# error.attribute.invalid.filtered
|
24
|
-
ERROR_ATTRIBUTE_INVALID_FILTERED = "error.attribute.invalid.filtered"
|
25
|
-
# error.attribute.invalid.removed
|
26
|
-
ERROR_ATTRIBUTE_INVALID_REMOVED = "error.attribute.invalid.removed"
|
27
|
-
# error.attribute.notfound
|
28
|
-
ERROR_ATTRIBUTE_NOT_IN_POLICY = "error.attribute.notfound"
|
29
|
-
# error.attribute.invalid
|
30
|
-
ERROR_ATTRIBUTE_INVALID = "error.attribute.invalid"
|
31
|
-
|
32
|
-
attr_reader :tag, :content, :value, :msgkey
|
33
|
-
def initialize(msgkey, tag, content=nil,value=nil)
|
34
|
-
@msgkey = msgkey
|
35
|
-
@tag = tag
|
36
|
-
@content = content
|
37
|
-
@value = value
|
38
|
-
end
|
39
|
-
def to_s
|
40
|
-
"#{self.msgkey} #{@tag} #{@content} #{@value}"
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
2
|
class Scanner
|
45
3
|
attr_accessor :policy, :errors, :nofollow, :pae
|
46
4
|
DEFAULT_ENCODE = "UTF-8"
|
@@ -67,7 +25,7 @@ module AntiSamy
|
|
67
25
|
# will raise an error if nil input or the maximum input size is exceeded
|
68
26
|
def scan(input, input_encode, output_encoder)
|
69
27
|
raise ArgumentError if input.nil?
|
70
|
-
raise ScanError, "Max input Exceeded" if input.size > @policy.max_input
|
28
|
+
raise ScanError, "Max input Exceeded #{input.size} > #{@policy.max_input}" if input.size > @policy.max_input
|
71
29
|
# check poilcy stuff
|
72
30
|
handler = Handler.new(@policy,output_encoder)
|
73
31
|
scanner = SaxFilter.new(@policy,handler,@@basic_param_tag_rule)
|
data/lib/antisamy/policy.rb
CHANGED
@@ -29,13 +29,16 @@ module AntiSamy
|
|
29
29
|
MAX_INPUT = "maxInputSize"
|
30
30
|
USE_XHTML = "userXHTML"
|
31
31
|
FORMAT_OUTPUT = "formatOutput"
|
32
|
+
# will we allow embedded style sheets
|
32
33
|
EMBED_STYLESHEETS = "embedStyleSheets"
|
34
|
+
# Connection timeout in miliseconds
|
33
35
|
CONN_TIMEOUT = "conenctionTimeout"
|
34
36
|
ANCHROS_NOFOLLOW = "nofollowAnchors"
|
35
37
|
VALIDATE_P_AS_E = "validateParamAsEmbed"
|
36
38
|
PRESERVE_SPACE = "preserveSpace"
|
37
39
|
PRESERVE_COMMENTS = "preserveComments"
|
38
40
|
ON_UNKNOWN_TAG = "onUnknownTag"
|
41
|
+
MAX_SHEETS = "maxStyleSheetImports"
|
39
42
|
|
40
43
|
# Class method to fetch the schema
|
41
44
|
def self.schema
|
@@ -192,15 +195,17 @@ module AntiSamy
|
|
192
195
|
section.element_children.each do |dir|
|
193
196
|
name = dir["name"]
|
194
197
|
value = dir["value"]
|
195
|
-
|
196
|
-
if name.eql?("maxInputSize")
|
198
|
+
if name.eql?("maxInputSize")
|
197
199
|
@max_input = value.to_i
|
198
200
|
else
|
199
|
-
if
|
201
|
+
if name.eql?("connectionTimeout") or name.eql?("maxStyleSheetImports")
|
202
|
+
value = value.to_i
|
203
|
+
elsif value =~ /true/i
|
200
204
|
value = true
|
201
205
|
else
|
202
206
|
value = false
|
203
207
|
end
|
208
|
+
@directives[name] = value
|
204
209
|
end
|
205
210
|
end
|
206
211
|
end
|
@@ -1,4 +1,72 @@
|
|
1
1
|
module AntiSamy
|
2
|
+
class ScanError < StandardError; end
|
3
|
+
# Scan message, it will contain a message key, tag and optionally content, value
|
4
|
+
class ScanMessage
|
5
|
+
# error.tag.notfound
|
6
|
+
ERROR_TAG_NOT_IN_POLICY = "error.tag.notfound"
|
7
|
+
# error.tag.removed
|
8
|
+
ERROR_TAG_DISALLOWED = "error.tag.removed"
|
9
|
+
# error.tag.filtered
|
10
|
+
ERROR_TAG_FILTERED = "error.tag.filtered"
|
11
|
+
# error.tag.encoded
|
12
|
+
ERROR_TAG_ENCODED = "error.tag.encoded"
|
13
|
+
# error.css.tag.malformed
|
14
|
+
ERROR_CSS_TAG_MALFORMED = "error.css.tag.malformed"
|
15
|
+
# error.css.attribute.malformed
|
16
|
+
ERROR_CSS_ATTRIBUTE_MALFORMED = "error.css.attribute.malformed"
|
17
|
+
# error.attribute.invalid.filtered
|
18
|
+
ERROR_ATTRIBUTE_CAUSE_FILTER = "error.attribute.invalid.filtered"
|
19
|
+
# error.attribute.invalid.encoded
|
20
|
+
ERROR_ATTRIBUTE_CAUSE_ENCODE = "error.attribute.invalid.encoded"
|
21
|
+
# error.attribute.invalid.filtered
|
22
|
+
ERROR_ATTRIBUTE_INVALID_FILTERED = "error.attribute.invalid.filtered"
|
23
|
+
# error.attribute.invalid.removed
|
24
|
+
ERROR_ATTRIBUTE_INVALID_REMOVED = "error.attribute.invalid.removed"
|
25
|
+
# error.attribute.notfound
|
26
|
+
ERROR_ATTRIBUTE_NOT_IN_POLICY = "error.attribute.notfound"
|
27
|
+
# error.attribute.invalid
|
28
|
+
ERROR_ATTRIBUTE_INVALID = "error.attribute.invalid"
|
29
|
+
# comment removed
|
30
|
+
ERROR_COMMENT_REMOVED = "error.comment.removed"
|
31
|
+
# tag rule not found
|
32
|
+
ERROR_CSS_TAG_RULE_NOTFOUND = "error.css.tag.notfound"
|
33
|
+
# style sheet nto found
|
34
|
+
ERROR_STYLESHEET_RULE_NOTFOUND = "error.stylesheet.notfound"
|
35
|
+
# embedded stylesheets disabled
|
36
|
+
ERROR_CSS_IMPORT_DISABLED = "error.css.import.disabled"
|
37
|
+
# bad uri
|
38
|
+
ERROR_CSS_IMPORT_URL_INVALID = "error.css.import.uri.invalid"
|
39
|
+
# disallowed selector
|
40
|
+
ERROR_CSS_TAG_SELECTOR_DISALLOWED = "error.css.tag.removed"
|
41
|
+
# invalid for style sheet
|
42
|
+
ERROR_STYLESHEET_SELECTOR_DISALLOWED = "error.style.tag.notallowed"
|
43
|
+
# invlaid css tag property
|
44
|
+
ERROR_CSS_TAG_PROPERTY_INVALID = "error.css.property.invalid"
|
45
|
+
# invid style sheet roperty tag
|
46
|
+
ERROR_STYLESHEET_PROPERTY_INVALID = "error.stylesheet.css.property.invalid"
|
47
|
+
# exceed alloted imports
|
48
|
+
ERROR_CSS_IMPORT_EXCEEDED = "error.import.exceeded.sheets"
|
49
|
+
# exceede size
|
50
|
+
ERROR_CSS_IMPORT_INPUT_SIZE = "error.import.exceeded.size"
|
51
|
+
# Failed to import
|
52
|
+
ERROR_CSS_IMPORT_FAILURE = "error.import.bad.uri"
|
53
|
+
# selector not found
|
54
|
+
ERROR_STYLESHEET_SELECTOR_NOTFOUND = "error.css.stylesheet.selector.notfound"
|
55
|
+
# selector in css not fond
|
56
|
+
ERROR_CSS_TAG_SELECTOR_NOTFOUND = "error.css.tag.selector.notfound"
|
57
|
+
|
58
|
+
attr_reader :tag, :content, :value, :msgkey
|
59
|
+
def initialize(msgkey, tag, content=nil,value=nil)
|
60
|
+
@msgkey = msgkey
|
61
|
+
@tag = tag
|
62
|
+
@content = content
|
63
|
+
@value = value
|
64
|
+
end
|
65
|
+
def to_s
|
66
|
+
"#{self.msgkey} #{@tag} #{@content} #{@value}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
2
70
|
# Container of scan results, provides a list of ScanMessage indicating
|
3
71
|
# why elements were removed from the resulting html
|
4
72
|
class ScanResults
|
data/lib/antisamy.rb
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
+
require 'antisamy/csspool/rsac'
|
2
3
|
require 'antisamy/model/attribute'
|
3
4
|
require 'antisamy/model/tag'
|
4
5
|
require 'antisamy/model/css_property'
|
5
6
|
require 'antisamy/policy'
|
6
7
|
require 'antisamy/scan_results'
|
7
8
|
require 'antisamy/html/handler'
|
9
|
+
require 'antisamy/css/css_validator'
|
10
|
+
require 'antisamy/css/css_filter'
|
11
|
+
require 'antisamy/css/css_scanner'
|
8
12
|
require 'antisamy/html/sax_filter'
|
9
13
|
require 'antisamy/html/scanner'
|
10
14
|
|