antisamy 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/README.rdoc +6 -1
  2. data/lib/antisamy/css/css_filter.rb +187 -0
  3. data/lib/antisamy/css/css_scanner.rb +84 -0
  4. data/lib/antisamy/css/css_validator.rb +129 -0
  5. data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -0
  6. data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -0
  7. data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -0
  8. data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -0
  9. data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -0
  10. data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -0
  11. data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -0
  12. data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -0
  13. data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -0
  14. data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -0
  15. data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -0
  16. data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -0
  17. data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -0
  18. data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -0
  19. data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -0
  20. data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -0
  21. data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -0
  22. data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -0
  23. data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -0
  24. data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -0
  25. data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -0
  26. data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -0
  27. data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -0
  28. data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -0
  29. data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -0
  30. data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -0
  31. data/lib/antisamy/csspool/rsac/sac/token.rb +25 -0
  32. data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -0
  33. data/lib/antisamy/csspool/rsac/sac.rb +14 -0
  34. data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -0
  35. data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -0
  36. data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -0
  37. data/lib/antisamy/csspool/rsac.rb +1 -0
  38. data/lib/antisamy/html/handler.rb +4 -0
  39. data/lib/antisamy/html/sax_filter.rb +49 -33
  40. data/lib/antisamy/html/scanner.rb +1 -43
  41. data/lib/antisamy/policy.rb +8 -3
  42. data/lib/antisamy/scan_results.rb +68 -0
  43. data/lib/antisamy.rb +4 -0
  44. data/spec/antisamy_spec.rb +111 -3
  45. metadata +39 -3
@@ -0,0 +1,27 @@
1
+ module RSAC
2
+ class Lexeme
3
+ attr_reader :name, :pattern
4
+
5
+ def initialize(name, pattern=nil, &block)
6
+ raise ArgumentError, "name required" unless name
7
+
8
+ @name = name
9
+ patterns = []
10
+
11
+ patterns << pattern if pattern
12
+ yield(patterns) if block_given?
13
+
14
+ if patterns.empty?
15
+ raise ArgumentError, "at least one pattern required"
16
+ end
17
+
18
+ patterns.collect! do |spattern|
19
+ source = spattern.source
20
+ source = "\\A#{source}"
21
+ Regexp.new(source, Regexp::IGNORECASE + Regexp::MULTILINE, 'n')
22
+ end
23
+
24
+ @pattern = Regexp.union(*patterns)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,201 @@
1
+ module RSAC
2
+ class LexicalUnit
3
+ attr_accessor :dimension_unit_text,
4
+ :lexical_unit_type,
5
+ :float_value,
6
+ :integer_value,
7
+ :string_value,
8
+ :parameters,
9
+ :function_name
10
+
11
+ def ==(other)
12
+ self.class === other && self.lexical_unit_type == other.lexical_unit_type
13
+ end
14
+
15
+ def eql?(other)
16
+ self == other
17
+ end
18
+
19
+ alias :to_s :string_value
20
+ end
21
+
22
+ class Function < LexicalUnit
23
+ FUNCTIONS = {
24
+ 'counter' => :SAC_COUNTER_FUNCTION,
25
+ 'counters' => :SAC_COUNTERS_FUNCTION,
26
+ 'rect' => :SAC_RECT_FUNCTION,
27
+ }
28
+ def initialize(name, params)
29
+ self.string_value = "#{name}#{params.join(', ')})"
30
+ name =~ /^(.*)\(/
31
+ self.function_name = $1
32
+ self.parameters = params
33
+ self.lexical_unit_type = FUNCTIONS[self.function_name] || :SAC_FUNCTION
34
+ end
35
+
36
+ def ==(other)
37
+ super && %w{ function_name parameters }.all? { |x|
38
+ self.send(x.to_sym) == other.send(x.to_sym)
39
+ }
40
+ end
41
+
42
+ def hash
43
+ ([self.function_name] + parameters).hash
44
+ end
45
+ end
46
+
47
+ class Color < LexicalUnit
48
+ def initialize(value)
49
+ self.string_value = value
50
+ self.lexical_unit_type = :SAC_RGBCOLOR
51
+ if value =~ /^#([A-F\d]{1,2})([A-F\d]{1,2})([A-F\d]{1,2})$/
52
+ self.parameters = [$1, $2, $3].map { |x|
53
+ x.length == 1 ? (x * 2).hex : x.hex
54
+ }.map { |x|
55
+ Number.new(x, '', :SAC_INTEGER)
56
+ }
57
+ else
58
+ self.parameters = [LexicalIdent.new(value)]
59
+ end
60
+ end
61
+
62
+ def ==(other)
63
+ super && self.parameters == other.parameters
64
+ end
65
+
66
+ def hash
67
+ self.parameters.hash
68
+ end
69
+
70
+ def to_s
71
+ if self.parameters.length < 3
72
+ super
73
+ else
74
+ hex = self.parameters.map { |x|
75
+ sprintf("%02X", x.integer_value).split('').uniq
76
+ }.flatten
77
+ hex.length != 3 ? super : "##{hex.join()}"
78
+ end
79
+ end
80
+ end
81
+
82
+ class LexicalString < LexicalUnit
83
+ def initialize(value)
84
+ self.string_value = value
85
+ self.lexical_unit_type = :SAC_STRING_VALUE
86
+ end
87
+
88
+ def ==(other)
89
+ super && self.string_value == other.string_value
90
+ end
91
+
92
+ def hash
93
+ self.string_value.hash
94
+ end
95
+ end
96
+
97
+ class LexicalIdent < LexicalUnit
98
+ def initialize(value)
99
+ self.string_value = value
100
+ self.lexical_unit_type = :SAC_IDENT
101
+ end
102
+
103
+ def ==(other)
104
+ super && self.string_value == other.string_value
105
+ end
106
+
107
+ def hash
108
+ self.string_value.hash
109
+ end
110
+ end
111
+
112
+ class LexicalURI < LexicalUnit
113
+ def initialize(value)
114
+ self.string_value = value.gsub(/^url\(/, '').gsub(/\)$/, '')
115
+ self.lexical_unit_type = :SAC_URI
116
+ end
117
+
118
+ def ==(other)
119
+ super && self.string_value == other.string_value
120
+ end
121
+
122
+ def hash
123
+ self.string_value.hash
124
+ end
125
+
126
+ def to_s
127
+ "url(#{string_value})"
128
+ end
129
+ end
130
+
131
+ class Number < LexicalUnit
132
+ NON_NEGATIVE_UNITS = [
133
+ :SAC_DEGREE,
134
+ :SAC_GRADIAN,
135
+ :SAC_RADIAN,
136
+ :SAC_MILLISECOND,
137
+ :SAC_SECOND,
138
+ :SAC_HERTZ,
139
+ :SAC_KILOHERTZ,
140
+ ]
141
+ UNITS = {
142
+ 'deg' => :SAC_DEGREE,
143
+ 'rad' => :SAC_RADIAN,
144
+ 'grad' => :SAC_GRADIAN,
145
+ 'ms' => :SAC_MILLISECOND,
146
+ 's' => :SAC_SECOND,
147
+ 'hz' => :SAC_HERTZ,
148
+ 'khz' => :SAC_KILOHERTZ,
149
+ 'px' => :SAC_PIXEL,
150
+ 'cm' => :SAC_CENTIMETER,
151
+ 'mm' => :SAC_MILLIMETER,
152
+ 'in' => :SAC_INCH,
153
+ 'pt' => :SAC_POINT,
154
+ 'pc' => :SAC_PICA,
155
+ '%' => :SAC_PERCENTAGE,
156
+ 'em' => :SAC_EM,
157
+ 'ex' => :SAC_EX,
158
+ }
159
+ def initialize(value, unit = nil, type = nil)
160
+ if value.is_a?(String)
161
+ value =~ /^(-?[0-9.]*)(.*)$/
162
+ value = $1
163
+ unit ||= $2
164
+ end
165
+ type ||= UNITS[self.dimension_unit_text]
166
+ self.string_value = "#{value}#{unit}"
167
+ self.float_value = value.to_f
168
+ self.integer_value = value.to_i
169
+ self.dimension_unit_text = unit.downcase
170
+ self.lexical_unit_type = UNITS[self.dimension_unit_text] ||
171
+ (value =~ /\./ ? :SAC_NUMBER : :SAC_INTEGER)
172
+ end
173
+
174
+ def ==(other)
175
+ return true if self.float_value == 0 && other.float_value == 0
176
+ return false unless super
177
+
178
+ %w{ float_value integer_value dimension_unit_text }.all? { |x|
179
+ self.send(x.to_sym) == other.send(x.to_sym)
180
+ }
181
+ end
182
+
183
+ def hash
184
+ if self.float_value == 0
185
+ self.float_value.hash
186
+ else
187
+ %w{ float_value integer_value dimension_unit_text }.map { |x|
188
+ self.send(x.to_sym)
189
+ }.hash
190
+ end
191
+ end
192
+
193
+ def to_s
194
+ if self.float_value == 0
195
+ "0"
196
+ else
197
+ super
198
+ end
199
+ end
200
+ end
201
+ end
@@ -0,0 +1,4 @@
1
+ module RSAC
2
+ class ParseException < RuntimeError
3
+ end
4
+ end
@@ -0,0 +1,109 @@
1
+ require "antisamy/csspool/rsac/sac/document_handler"
2
+ require "antisamy/csspool/rsac/sac/error_handler"
3
+ require "antisamy/csspool/rsac/sac/generated_parser"
4
+ require "antisamy/csspool/rsac/sac/lexical_unit"
5
+ require "antisamy/csspool/rsac/sac/parse_exception"
6
+ require "antisamy/csspool/rsac/sac/tokenizer"
7
+ require "antisamy/csspool/rsac/sac/property_parser"
8
+
9
+ module RSAC
10
+ class Parser < RSAC::GeneratedParser
11
+ # The version of CSSPool you're using
12
+ VERSION = '0.2.7'
13
+
14
+ TOKENIZER = Tokenizer.new
15
+
16
+ attr_accessor :document_handler, :error_handler, :logger
17
+
18
+ def initialize(document_handler = StyleSheet.new(self))
19
+ @error_handler = ErrorHandler.new
20
+ @document_handler = document_handler
21
+ @property_parser = PropertyParser.new()
22
+ @tokenizer = TOKENIZER
23
+ @logger = nil
24
+ end
25
+
26
+ def parse_style_sheet(string)
27
+ @yydebug = true
28
+ @tokens = TOKENIZER.tokenize(string)
29
+ @position = 0
30
+
31
+ self.document_handler.start_document(string)
32
+ do_parse
33
+ self.document_handler.end_document(string)
34
+ self.document_handler
35
+ end
36
+
37
+ alias :parse :parse_style_sheet
38
+
39
+ def parse_rule(rule)
40
+ returner = Class.new(DocumentHandler) {
41
+ attr_accessor :selector
42
+ alias :start_selector :selector=
43
+ }.new
44
+ old_document_handler = self.document_handler
45
+ self.document_handler = returner
46
+ self.parse("#{rule} { }")
47
+ self.document_handler = old_document_handler
48
+ returner.selector
49
+ end
50
+
51
+ # Returns the parser version. We return CSS2, but its actually
52
+ # CSS2.1. No font-face tags. Sorry.
53
+ def parser_version
54
+ "http://www.w3.org/TR/REC-CSS2"
55
+ end
56
+
57
+ attr_reader :property_parser
58
+ attr_reader :tokenizer
59
+
60
+ private # Bro.
61
+
62
+ # We have to eliminate matching pairs.
63
+ # http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
64
+ # See the malformed declarations section
65
+ def eliminate_pair_matches(error_value)
66
+ pairs = {}
67
+ pairs['"'] = '"'
68
+ pairs["'"] = "'"
69
+ pairs['{'] = '}'
70
+ pairs['['] = ']'
71
+ pairs['('] = ')'
72
+
73
+ error_value.to_s.strip!
74
+ if pairs[error_value]
75
+ logger.warn("Eliminating pair for: #{error_value}") if logger
76
+ loop {
77
+ token = next_token
78
+ eliminate_pair_matches(token[1])
79
+ logger.warn("Eliminated token: #{token.join(' ')}") if logger
80
+ if token[1] == pairs[error_value]
81
+ @position -= 1
82
+ @tokens[@position] = Token.new(:S, ' ', nil) # super hack
83
+ break
84
+ end
85
+ }
86
+ end
87
+ end
88
+
89
+ def on_error(error_token_id, error_value, value_stack)
90
+ if logger
91
+ logger.error(token_to_str(error_token_id))
92
+ logger.error("error value: #{error_value}")
93
+ end
94
+ eliminate_pair_matches(error_value)
95
+ end
96
+
97
+ def next_token
98
+ return [false, false] if @position >= @tokens.length
99
+
100
+ n_token = @tokens[@position]
101
+ @position += 1
102
+ if n_token.name == :COMMENT
103
+ self.document_handler.comment(n_token.value)
104
+ return next_token
105
+ end
106
+ n_token.to_racc_token
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,44 @@
1
+ require "antisamy/csspool/rsac/sac/generated_property_parser"
2
+
3
+ module RSAC
4
+ class PropertyParser < RSAC::GeneratedPropertyParser
5
+ def initialize
6
+ @tokens = []
7
+ @token_table = Racc_arg[10]
8
+ end
9
+
10
+ def parse_tokens(tokens)
11
+ negate = false # Nasty hack for unary minus
12
+ @tokens = tokens.find_all { |x| x.name != :S }.map { |token|
13
+ tok = if @token_table.has_key?(token.value)
14
+ [token.value, token.value]
15
+ else
16
+ if token.name == :delim && !@token_table.has_key?(token.value)
17
+ negate = true if token.value == '-'
18
+ nil
19
+ else
20
+ token.to_racc_token
21
+ end
22
+ end
23
+
24
+ if negate && tok
25
+ tok[1] = "-#{tok[1]}"
26
+ negate = false
27
+ end
28
+ tok
29
+ }.compact
30
+
31
+ begin
32
+ return do_parse
33
+ rescue ParseError => e
34
+ return nil
35
+ end
36
+ end
37
+
38
+ private
39
+ def next_token
40
+ return [false, false] if @tokens.empty?
41
+ @tokens.shift
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,36 @@
1
+ module RSAC
2
+ module Selectors
3
+ class ChildSelector < SimpleSelector
4
+ attr_accessor :ancestor_selector, :simple_selector
5
+ alias :parent :ancestor_selector
6
+ alias :selector :simple_selector
7
+
8
+ def initialize(parent, selector)
9
+ super(:SAC_CHILD_SELECTOR)
10
+
11
+ @ancestor_selector = parent
12
+ @simple_selector = selector
13
+ end
14
+
15
+ def to_css
16
+ "#{parent.to_css} > #{selector.to_css}"
17
+ end
18
+
19
+ def to_xpath(prefix=true)
20
+ "#{parent.to_xpath(prefix)}/#{selector.to_xpath(false)}"
21
+ end
22
+
23
+ def specificity
24
+ parent.specificity.zip(selector.specificity).map { |x,y| x + y }
25
+ end
26
+
27
+ def ==(other)
28
+ super && parent == other.parent && selector == other.selector
29
+ end
30
+
31
+ def hash
32
+ [parent, selector].hash
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,45 @@
1
+ module RSAC
2
+ module Selectors
3
+ class ConditionalSelector < SimpleSelector
4
+ attr_accessor :condition, :simple_selector
5
+ alias :selector :simple_selector
6
+
7
+ def initialize(selector, condition)
8
+ super(:SAC_CONDITIONAL_SELECTOR)
9
+
10
+ @condition = condition
11
+ @simple_selector = selector
12
+ end
13
+
14
+ def to_css
15
+ [selector, condition].map { |x|
16
+ x ? x.to_css : ''
17
+ }.join('')
18
+ end
19
+
20
+ def to_xpath(prefix=true)
21
+ atoms = []
22
+ atoms << "//" if prefix
23
+ atoms << (selector ? selector.to_xpath(false) : "*")
24
+ atoms << condition.to_xpath
25
+
26
+ atoms.join("")
27
+ end
28
+
29
+ def specificity
30
+ (selector ? selector.specificity : ([0] * 4)).zip(
31
+ (condition ? condition.specificity : ([0] * 4))).map { |x,y|
32
+ x + y
33
+ }
34
+ end
35
+
36
+ def ==(other)
37
+ super && condition == other.condition && selector == other.selector
38
+ end
39
+
40
+ def hash
41
+ [condition, selector].hash
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,36 @@
1
+ module RSAC
2
+ module Selectors
3
+ class DescendantSelector < SimpleSelector
4
+ attr_accessor :ancestor_selector, :simple_selector
5
+ alias :ancestor :ancestor_selector
6
+ alias :selector :simple_selector
7
+
8
+ def initialize(ancestor, selector)
9
+ super(:SAC_DESCENDANT_SELECTOR)
10
+
11
+ @ancestor_selector = ancestor
12
+ @simple_selector = selector
13
+ end
14
+
15
+ def to_css
16
+ "#{ancestor.to_css} #{selector.to_css}"
17
+ end
18
+
19
+ def to_xpath(prefix=true)
20
+ "#{ancestor.to_xpath(prefix)}//#{selector.to_xpath(false)}"
21
+ end
22
+
23
+ def specificity
24
+ ancestor.specificity.zip(selector.specificity).map { |x,y| x + y }
25
+ end
26
+
27
+ def ==(other)
28
+ super && selector == other.selector && ancestor == other.ancestor
29
+ end
30
+
31
+ def hash
32
+ [selector, ancestor].hash
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,35 @@
1
+ module RSAC
2
+ module Selectors
3
+ class ElementSelector < SimpleSelector
4
+ attr_reader :local_name
5
+ alias :name :local_name
6
+
7
+ def initialize(name)
8
+ super(:SAC_ELEMENT_NODE_SELECTOR)
9
+ @local_name = name
10
+ end
11
+
12
+ def to_css
13
+ local_name
14
+ end
15
+
16
+ def to_xpath(prefix=true)
17
+ atoms = [local_name]
18
+ atoms.unshift("//") if prefix
19
+ atoms.join
20
+ end
21
+
22
+ def specificity
23
+ [0, 0, 0, 1]
24
+ end
25
+
26
+ def ==(other)
27
+ super && name == other.name
28
+ end
29
+
30
+ def hash
31
+ name.hash
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ module RSAC
2
+ module Selectors
3
+ class Selector
4
+
5
+ attr_reader :selector_type
6
+
7
+ def initialize(selector_type)
8
+ @selector_type = selector_type
9
+ end
10
+
11
+ def ==(other)
12
+ self.class === other && selector_type == other.selector_type
13
+ end
14
+
15
+ def hash
16
+ selector_type.hash
17
+ end
18
+
19
+ def eql?(other)
20
+ self == other
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,35 @@
1
+ module RSAC
2
+ module Selectors
3
+ class SiblingSelector < SimpleSelector
4
+ attr_accessor :selector, :sibling_selector
5
+ alias :sibling :sibling_selector
6
+
7
+ def initialize(selector, sibling)
8
+ super(:SAC_DIRECT_ADJACENT_SELECTOR)
9
+
10
+ @selector = selector
11
+ @sibling_selector = sibling
12
+ end
13
+
14
+ def to_css
15
+ "#{selector.to_css} + #{sibling.to_css}"
16
+ end
17
+
18
+ def to_xpath(prefix=true)
19
+ "#{selector.to_xpath(prefix)}/following-sibling::#{sibling.to_xpath(false)}"
20
+ end
21
+
22
+ def specificity
23
+ selector.specificity.zip(sibling.specificity).map { |x,y| x + y }
24
+ end
25
+
26
+ def ==(other)
27
+ super && selector == other.selector && sibling == other.sibling
28
+ end
29
+
30
+ def hash
31
+ [selector, sibling].hash
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,21 @@
1
+ module RSAC
2
+ module Selectors
3
+ class SimpleSelector < Selector
4
+ def initialize(selector_type=:SAC_ANY_NODE_SELECTOR)
5
+ super(selector_type)
6
+ end
7
+
8
+ def to_css
9
+ '*'
10
+ end
11
+
12
+ def to_xpath
13
+ "//*"
14
+ end
15
+
16
+ def specificity
17
+ [0, 0, 0, 0]
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,5 @@
1
+ require "antisamy/csspool/rsac/sac/selectors/selector"
2
+
3
+ %w(simple child conditional descendant element sibling).each do |type|
4
+ require "antisamy/csspool/rsac/sac/selectors/#{type}_selector"
5
+ end
@@ -0,0 +1,25 @@
1
+ module RSAC
2
+ class Token
3
+ attr_reader :name, :value, :position
4
+
5
+ def initialize(name, value, position)
6
+ @name = name
7
+ @value = value
8
+ @position = position
9
+ end
10
+
11
+ def to_racc_token
12
+ [name, value]
13
+ end
14
+ end
15
+
16
+ class DelimiterToken < Token
17
+ def initialize(value, position)
18
+ super(:delim, value, position)
19
+ end
20
+
21
+ def to_racc_token
22
+ [value, value]
23
+ end
24
+ end
25
+ end