antisamy 0.0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/README.rdoc +6 -1
  2. data/lib/antisamy/css/css_filter.rb +187 -0
  3. data/lib/antisamy/css/css_scanner.rb +84 -0
  4. data/lib/antisamy/css/css_validator.rb +129 -0
  5. data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -0
  6. data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -0
  7. data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -0
  8. data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -0
  9. data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -0
  10. data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -0
  11. data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -0
  12. data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -0
  13. data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -0
  14. data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -0
  15. data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -0
  16. data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -0
  17. data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -0
  18. data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -0
  19. data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -0
  20. data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -0
  21. data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -0
  22. data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -0
  23. data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -0
  24. data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -0
  25. data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -0
  26. data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -0
  27. data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -0
  28. data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -0
  29. data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -0
  30. data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -0
  31. data/lib/antisamy/csspool/rsac/sac/token.rb +25 -0
  32. data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -0
  33. data/lib/antisamy/csspool/rsac/sac.rb +14 -0
  34. data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -0
  35. data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -0
  36. data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -0
  37. data/lib/antisamy/csspool/rsac.rb +1 -0
  38. data/lib/antisamy/html/handler.rb +4 -0
  39. data/lib/antisamy/html/sax_filter.rb +49 -33
  40. data/lib/antisamy/html/scanner.rb +1 -43
  41. data/lib/antisamy/policy.rb +8 -3
  42. data/lib/antisamy/scan_results.rb +68 -0
  43. data/lib/antisamy.rb +4 -0
  44. data/spec/antisamy_spec.rb +111 -3
  45. metadata +39 -3
@@ -0,0 +1,27 @@
1
+ module RSAC
2
+ class Lexeme
3
+ attr_reader :name, :pattern
4
+
5
+ def initialize(name, pattern=nil, &block)
6
+ raise ArgumentError, "name required" unless name
7
+
8
+ @name = name
9
+ patterns = []
10
+
11
+ patterns << pattern if pattern
12
+ yield(patterns) if block_given?
13
+
14
+ if patterns.empty?
15
+ raise ArgumentError, "at least one pattern required"
16
+ end
17
+
18
+ patterns.collect! do |spattern|
19
+ source = spattern.source
20
+ source = "\\A#{source}"
21
+ Regexp.new(source, Regexp::IGNORECASE + Regexp::MULTILINE, 'n')
22
+ end
23
+
24
+ @pattern = Regexp.union(*patterns)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,201 @@
1
+ module RSAC
2
+ class LexicalUnit
3
+ attr_accessor :dimension_unit_text,
4
+ :lexical_unit_type,
5
+ :float_value,
6
+ :integer_value,
7
+ :string_value,
8
+ :parameters,
9
+ :function_name
10
+
11
+ def ==(other)
12
+ self.class === other && self.lexical_unit_type == other.lexical_unit_type
13
+ end
14
+
15
+ def eql?(other)
16
+ self == other
17
+ end
18
+
19
+ alias :to_s :string_value
20
+ end
21
+
22
+ class Function < LexicalUnit
23
+ FUNCTIONS = {
24
+ 'counter' => :SAC_COUNTER_FUNCTION,
25
+ 'counters' => :SAC_COUNTERS_FUNCTION,
26
+ 'rect' => :SAC_RECT_FUNCTION,
27
+ }
28
+ def initialize(name, params)
29
+ self.string_value = "#{name}#{params.join(', ')})"
30
+ name =~ /^(.*)\(/
31
+ self.function_name = $1
32
+ self.parameters = params
33
+ self.lexical_unit_type = FUNCTIONS[self.function_name] || :SAC_FUNCTION
34
+ end
35
+
36
+ def ==(other)
37
+ super && %w{ function_name parameters }.all? { |x|
38
+ self.send(x.to_sym) == other.send(x.to_sym)
39
+ }
40
+ end
41
+
42
+ def hash
43
+ ([self.function_name] + parameters).hash
44
+ end
45
+ end
46
+
47
+ class Color < LexicalUnit
48
+ def initialize(value)
49
+ self.string_value = value
50
+ self.lexical_unit_type = :SAC_RGBCOLOR
51
+ if value =~ /^#([A-F\d]{1,2})([A-F\d]{1,2})([A-F\d]{1,2})$/
52
+ self.parameters = [$1, $2, $3].map { |x|
53
+ x.length == 1 ? (x * 2).hex : x.hex
54
+ }.map { |x|
55
+ Number.new(x, '', :SAC_INTEGER)
56
+ }
57
+ else
58
+ self.parameters = [LexicalIdent.new(value)]
59
+ end
60
+ end
61
+
62
+ def ==(other)
63
+ super && self.parameters == other.parameters
64
+ end
65
+
66
+ def hash
67
+ self.parameters.hash
68
+ end
69
+
70
+ def to_s
71
+ if self.parameters.length < 3
72
+ super
73
+ else
74
+ hex = self.parameters.map { |x|
75
+ sprintf("%02X", x.integer_value).split('').uniq
76
+ }.flatten
77
+ hex.length != 3 ? super : "##{hex.join()}"
78
+ end
79
+ end
80
+ end
81
+
82
+ class LexicalString < LexicalUnit
83
+ def initialize(value)
84
+ self.string_value = value
85
+ self.lexical_unit_type = :SAC_STRING_VALUE
86
+ end
87
+
88
+ def ==(other)
89
+ super && self.string_value == other.string_value
90
+ end
91
+
92
+ def hash
93
+ self.string_value.hash
94
+ end
95
+ end
96
+
97
+ class LexicalIdent < LexicalUnit
98
+ def initialize(value)
99
+ self.string_value = value
100
+ self.lexical_unit_type = :SAC_IDENT
101
+ end
102
+
103
+ def ==(other)
104
+ super && self.string_value == other.string_value
105
+ end
106
+
107
+ def hash
108
+ self.string_value.hash
109
+ end
110
+ end
111
+
112
+ class LexicalURI < LexicalUnit
113
+ def initialize(value)
114
+ self.string_value = value.gsub(/^url\(/, '').gsub(/\)$/, '')
115
+ self.lexical_unit_type = :SAC_URI
116
+ end
117
+
118
+ def ==(other)
119
+ super && self.string_value == other.string_value
120
+ end
121
+
122
+ def hash
123
+ self.string_value.hash
124
+ end
125
+
126
+ def to_s
127
+ "url(#{string_value})"
128
+ end
129
+ end
130
+
131
+ class Number < LexicalUnit
132
+ NON_NEGATIVE_UNITS = [
133
+ :SAC_DEGREE,
134
+ :SAC_GRADIAN,
135
+ :SAC_RADIAN,
136
+ :SAC_MILLISECOND,
137
+ :SAC_SECOND,
138
+ :SAC_HERTZ,
139
+ :SAC_KILOHERTZ,
140
+ ]
141
+ UNITS = {
142
+ 'deg' => :SAC_DEGREE,
143
+ 'rad' => :SAC_RADIAN,
144
+ 'grad' => :SAC_GRADIAN,
145
+ 'ms' => :SAC_MILLISECOND,
146
+ 's' => :SAC_SECOND,
147
+ 'hz' => :SAC_HERTZ,
148
+ 'khz' => :SAC_KILOHERTZ,
149
+ 'px' => :SAC_PIXEL,
150
+ 'cm' => :SAC_CENTIMETER,
151
+ 'mm' => :SAC_MILLIMETER,
152
+ 'in' => :SAC_INCH,
153
+ 'pt' => :SAC_POINT,
154
+ 'pc' => :SAC_PICA,
155
+ '%' => :SAC_PERCENTAGE,
156
+ 'em' => :SAC_EM,
157
+ 'ex' => :SAC_EX,
158
+ }
159
+ def initialize(value, unit = nil, type = nil)
160
+ if value.is_a?(String)
161
+ value =~ /^(-?[0-9.]*)(.*)$/
162
+ value = $1
163
+ unit ||= $2
164
+ end
165
+ type ||= UNITS[self.dimension_unit_text]
166
+ self.string_value = "#{value}#{unit}"
167
+ self.float_value = value.to_f
168
+ self.integer_value = value.to_i
169
+ self.dimension_unit_text = unit.downcase
170
+ self.lexical_unit_type = UNITS[self.dimension_unit_text] ||
171
+ (value =~ /\./ ? :SAC_NUMBER : :SAC_INTEGER)
172
+ end
173
+
174
+ def ==(other)
175
+ return true if self.float_value == 0 && other.float_value == 0
176
+ return false unless super
177
+
178
+ %w{ float_value integer_value dimension_unit_text }.all? { |x|
179
+ self.send(x.to_sym) == other.send(x.to_sym)
180
+ }
181
+ end
182
+
183
+ def hash
184
+ if self.float_value == 0
185
+ self.float_value.hash
186
+ else
187
+ %w{ float_value integer_value dimension_unit_text }.map { |x|
188
+ self.send(x.to_sym)
189
+ }.hash
190
+ end
191
+ end
192
+
193
+ def to_s
194
+ if self.float_value == 0
195
+ "0"
196
+ else
197
+ super
198
+ end
199
+ end
200
+ end
201
+ end
@@ -0,0 +1,4 @@
1
+ module RSAC
2
+ class ParseException < RuntimeError
3
+ end
4
+ end
@@ -0,0 +1,109 @@
1
+ require "antisamy/csspool/rsac/sac/document_handler"
2
+ require "antisamy/csspool/rsac/sac/error_handler"
3
+ require "antisamy/csspool/rsac/sac/generated_parser"
4
+ require "antisamy/csspool/rsac/sac/lexical_unit"
5
+ require "antisamy/csspool/rsac/sac/parse_exception"
6
+ require "antisamy/csspool/rsac/sac/tokenizer"
7
+ require "antisamy/csspool/rsac/sac/property_parser"
8
+
9
+ module RSAC
10
+ class Parser < RSAC::GeneratedParser
11
+ # The version of CSSPool you're using
12
+ VERSION = '0.2.7'
13
+
14
+ TOKENIZER = Tokenizer.new
15
+
16
+ attr_accessor :document_handler, :error_handler, :logger
17
+
18
+ def initialize(document_handler = StyleSheet.new(self))
19
+ @error_handler = ErrorHandler.new
20
+ @document_handler = document_handler
21
+ @property_parser = PropertyParser.new()
22
+ @tokenizer = TOKENIZER
23
+ @logger = nil
24
+ end
25
+
26
+ def parse_style_sheet(string)
27
+ @yydebug = true
28
+ @tokens = TOKENIZER.tokenize(string)
29
+ @position = 0
30
+
31
+ self.document_handler.start_document(string)
32
+ do_parse
33
+ self.document_handler.end_document(string)
34
+ self.document_handler
35
+ end
36
+
37
+ alias :parse :parse_style_sheet
38
+
39
+ def parse_rule(rule)
40
+ returner = Class.new(DocumentHandler) {
41
+ attr_accessor :selector
42
+ alias :start_selector :selector=
43
+ }.new
44
+ old_document_handler = self.document_handler
45
+ self.document_handler = returner
46
+ self.parse("#{rule} { }")
47
+ self.document_handler = old_document_handler
48
+ returner.selector
49
+ end
50
+
51
+ # Returns the parser version. We return CSS2, but its actually
52
+ # CSS2.1. No font-face tags. Sorry.
53
+ def parser_version
54
+ "http://www.w3.org/TR/REC-CSS2"
55
+ end
56
+
57
+ attr_reader :property_parser
58
+ attr_reader :tokenizer
59
+
60
+ private # Bro.
61
+
62
+ # We have to eliminate matching pairs.
63
+ # http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
64
+ # See the malformed declarations section
65
+ def eliminate_pair_matches(error_value)
66
+ pairs = {}
67
+ pairs['"'] = '"'
68
+ pairs["'"] = "'"
69
+ pairs['{'] = '}'
70
+ pairs['['] = ']'
71
+ pairs['('] = ')'
72
+
73
+ error_value.to_s.strip!
74
+ if pairs[error_value]
75
+ logger.warn("Eliminating pair for: #{error_value}") if logger
76
+ loop {
77
+ token = next_token
78
+ eliminate_pair_matches(token[1])
79
+ logger.warn("Eliminated token: #{token.join(' ')}") if logger
80
+ if token[1] == pairs[error_value]
81
+ @position -= 1
82
+ @tokens[@position] = Token.new(:S, ' ', nil) # super hack
83
+ break
84
+ end
85
+ }
86
+ end
87
+ end
88
+
89
+ def on_error(error_token_id, error_value, value_stack)
90
+ if logger
91
+ logger.error(token_to_str(error_token_id))
92
+ logger.error("error value: #{error_value}")
93
+ end
94
+ eliminate_pair_matches(error_value)
95
+ end
96
+
97
+ def next_token
98
+ return [false, false] if @position >= @tokens.length
99
+
100
+ n_token = @tokens[@position]
101
+ @position += 1
102
+ if n_token.name == :COMMENT
103
+ self.document_handler.comment(n_token.value)
104
+ return next_token
105
+ end
106
+ n_token.to_racc_token
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,44 @@
1
+ require "antisamy/csspool/rsac/sac/generated_property_parser"
2
+
3
+ module RSAC
4
+ class PropertyParser < RSAC::GeneratedPropertyParser
5
+ def initialize
6
+ @tokens = []
7
+ @token_table = Racc_arg[10]
8
+ end
9
+
10
+ def parse_tokens(tokens)
11
+ negate = false # Nasty hack for unary minus
12
+ @tokens = tokens.find_all { |x| x.name != :S }.map { |token|
13
+ tok = if @token_table.has_key?(token.value)
14
+ [token.value, token.value]
15
+ else
16
+ if token.name == :delim && !@token_table.has_key?(token.value)
17
+ negate = true if token.value == '-'
18
+ nil
19
+ else
20
+ token.to_racc_token
21
+ end
22
+ end
23
+
24
+ if negate && tok
25
+ tok[1] = "-#{tok[1]}"
26
+ negate = false
27
+ end
28
+ tok
29
+ }.compact
30
+
31
+ begin
32
+ return do_parse
33
+ rescue ParseError => e
34
+ return nil
35
+ end
36
+ end
37
+
38
+ private
39
+ def next_token
40
+ return [false, false] if @tokens.empty?
41
+ @tokens.shift
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,36 @@
1
+ module RSAC
2
+ module Selectors
3
+ class ChildSelector < SimpleSelector
4
+ attr_accessor :ancestor_selector, :simple_selector
5
+ alias :parent :ancestor_selector
6
+ alias :selector :simple_selector
7
+
8
+ def initialize(parent, selector)
9
+ super(:SAC_CHILD_SELECTOR)
10
+
11
+ @ancestor_selector = parent
12
+ @simple_selector = selector
13
+ end
14
+
15
+ def to_css
16
+ "#{parent.to_css} > #{selector.to_css}"
17
+ end
18
+
19
+ def to_xpath(prefix=true)
20
+ "#{parent.to_xpath(prefix)}/#{selector.to_xpath(false)}"
21
+ end
22
+
23
+ def specificity
24
+ parent.specificity.zip(selector.specificity).map { |x,y| x + y }
25
+ end
26
+
27
+ def ==(other)
28
+ super && parent == other.parent && selector == other.selector
29
+ end
30
+
31
+ def hash
32
+ [parent, selector].hash
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,45 @@
1
+ module RSAC
2
+ module Selectors
3
+ class ConditionalSelector < SimpleSelector
4
+ attr_accessor :condition, :simple_selector
5
+ alias :selector :simple_selector
6
+
7
+ def initialize(selector, condition)
8
+ super(:SAC_CONDITIONAL_SELECTOR)
9
+
10
+ @condition = condition
11
+ @simple_selector = selector
12
+ end
13
+
14
+ def to_css
15
+ [selector, condition].map { |x|
16
+ x ? x.to_css : ''
17
+ }.join('')
18
+ end
19
+
20
+ def to_xpath(prefix=true)
21
+ atoms = []
22
+ atoms << "//" if prefix
23
+ atoms << (selector ? selector.to_xpath(false) : "*")
24
+ atoms << condition.to_xpath
25
+
26
+ atoms.join("")
27
+ end
28
+
29
+ def specificity
30
+ (selector ? selector.specificity : ([0] * 4)).zip(
31
+ (condition ? condition.specificity : ([0] * 4))).map { |x,y|
32
+ x + y
33
+ }
34
+ end
35
+
36
+ def ==(other)
37
+ super && condition == other.condition && selector == other.selector
38
+ end
39
+
40
+ def hash
41
+ [condition, selector].hash
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,36 @@
1
+ module RSAC
2
+ module Selectors
3
+ class DescendantSelector < SimpleSelector
4
+ attr_accessor :ancestor_selector, :simple_selector
5
+ alias :ancestor :ancestor_selector
6
+ alias :selector :simple_selector
7
+
8
+ def initialize(ancestor, selector)
9
+ super(:SAC_DESCENDANT_SELECTOR)
10
+
11
+ @ancestor_selector = ancestor
12
+ @simple_selector = selector
13
+ end
14
+
15
+ def to_css
16
+ "#{ancestor.to_css} #{selector.to_css}"
17
+ end
18
+
19
+ def to_xpath(prefix=true)
20
+ "#{ancestor.to_xpath(prefix)}//#{selector.to_xpath(false)}"
21
+ end
22
+
23
+ def specificity
24
+ ancestor.specificity.zip(selector.specificity).map { |x,y| x + y }
25
+ end
26
+
27
+ def ==(other)
28
+ super && selector == other.selector && ancestor == other.ancestor
29
+ end
30
+
31
+ def hash
32
+ [selector, ancestor].hash
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,35 @@
1
+ module RSAC
2
+ module Selectors
3
+ class ElementSelector < SimpleSelector
4
+ attr_reader :local_name
5
+ alias :name :local_name
6
+
7
+ def initialize(name)
8
+ super(:SAC_ELEMENT_NODE_SELECTOR)
9
+ @local_name = name
10
+ end
11
+
12
+ def to_css
13
+ local_name
14
+ end
15
+
16
+ def to_xpath(prefix=true)
17
+ atoms = [local_name]
18
+ atoms.unshift("//") if prefix
19
+ atoms.join
20
+ end
21
+
22
+ def specificity
23
+ [0, 0, 0, 1]
24
+ end
25
+
26
+ def ==(other)
27
+ super && name == other.name
28
+ end
29
+
30
+ def hash
31
+ name.hash
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ module RSAC
2
+ module Selectors
3
+ class Selector
4
+
5
+ attr_reader :selector_type
6
+
7
+ def initialize(selector_type)
8
+ @selector_type = selector_type
9
+ end
10
+
11
+ def ==(other)
12
+ self.class === other && selector_type == other.selector_type
13
+ end
14
+
15
+ def hash
16
+ selector_type.hash
17
+ end
18
+
19
+ def eql?(other)
20
+ self == other
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,35 @@
1
+ module RSAC
2
+ module Selectors
3
+ class SiblingSelector < SimpleSelector
4
+ attr_accessor :selector, :sibling_selector
5
+ alias :sibling :sibling_selector
6
+
7
+ def initialize(selector, sibling)
8
+ super(:SAC_DIRECT_ADJACENT_SELECTOR)
9
+
10
+ @selector = selector
11
+ @sibling_selector = sibling
12
+ end
13
+
14
+ def to_css
15
+ "#{selector.to_css} + #{sibling.to_css}"
16
+ end
17
+
18
+ def to_xpath(prefix=true)
19
+ "#{selector.to_xpath(prefix)}/following-sibling::#{sibling.to_xpath(false)}"
20
+ end
21
+
22
+ def specificity
23
+ selector.specificity.zip(sibling.specificity).map { |x,y| x + y }
24
+ end
25
+
26
+ def ==(other)
27
+ super && selector == other.selector && sibling == other.sibling
28
+ end
29
+
30
+ def hash
31
+ [selector, sibling].hash
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,21 @@
1
+ module RSAC
2
+ module Selectors
3
+ class SimpleSelector < Selector
4
+ def initialize(selector_type=:SAC_ANY_NODE_SELECTOR)
5
+ super(selector_type)
6
+ end
7
+
8
+ def to_css
9
+ '*'
10
+ end
11
+
12
+ def to_xpath
13
+ "//*"
14
+ end
15
+
16
+ def specificity
17
+ [0, 0, 0, 0]
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,5 @@
1
+ require "antisamy/csspool/rsac/sac/selectors/selector"
2
+
3
+ %w(simple child conditional descendant element sibling).each do |type|
4
+ require "antisamy/csspool/rsac/sac/selectors/#{type}_selector"
5
+ end
@@ -0,0 +1,25 @@
1
+ module RSAC
2
+ class Token
3
+ attr_reader :name, :value, :position
4
+
5
+ def initialize(name, value, position)
6
+ @name = name
7
+ @value = value
8
+ @position = position
9
+ end
10
+
11
+ def to_racc_token
12
+ [name, value]
13
+ end
14
+ end
15
+
16
+ class DelimiterToken < Token
17
+ def initialize(value, position)
18
+ super(:delim, value, position)
19
+ end
20
+
21
+ def to_racc_token
22
+ [value, value]
23
+ end
24
+ end
25
+ end