antisamy 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/CHANGELOG.rdoc +13 -0
  2. data/LICENSE.txt +20 -20
  3. data/README.rdoc +41 -41
  4. data/lib/antisamy.rb +46 -46
  5. data/lib/antisamy/css/css_filter.rb +187 -187
  6. data/lib/antisamy/css/css_scanner.rb +84 -84
  7. data/lib/antisamy/css/css_validator.rb +128 -128
  8. data/lib/antisamy/csspool/rsac.rb +1 -1
  9. data/lib/antisamy/csspool/rsac/sac.rb +14 -14
  10. data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -5
  11. data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -50
  12. data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -18
  13. data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -18
  14. data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -36
  15. data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -29
  16. data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -23
  17. data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -18
  18. data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -20
  19. data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -66
  20. data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -13
  21. data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -1012
  22. data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -9284
  23. data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -27
  24. data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -201
  25. data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -4
  26. data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -109
  27. data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -44
  28. data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -5
  29. data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -36
  30. data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -45
  31. data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -36
  32. data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -35
  33. data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -25
  34. data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -35
  35. data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -21
  36. data/lib/antisamy/csspool/rsac/sac/token.rb +25 -25
  37. data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -185
  38. data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -3
  39. data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -20
  40. data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -76
  41. data/lib/antisamy/html/handler.rb +112 -99
  42. data/lib/antisamy/html/sax_filter.rb +305 -302
  43. data/lib/antisamy/html/scanner.rb +47 -43
  44. data/lib/antisamy/model/attribute.rb +19 -19
  45. data/lib/antisamy/model/css_property.rb +39 -39
  46. data/lib/antisamy/model/tag.rb +31 -31
  47. data/lib/antisamy/policy.rb +577 -545
  48. data/lib/antisamy/scan_results.rb +89 -89
  49. data/spec/antisamy_spec.rb +208 -142
  50. data/spec/spec_helper.rb +12 -12
  51. metadata +79 -81
@@ -1,43 +1,47 @@
1
- module AntiSamy
2
- class Scanner
3
- attr_accessor :policy, :errors, :nofollow, :pae
4
- DEFAULT_ENCODE = "UTF-8"
5
- ALLOW_EMPTY = %w[br hr a img link iframe script object applet frame base param meta input textarea embed basefont col]
6
- # Setup a basic param tag rule
7
- begin
8
- name_attr = Attribute.new("name")
9
- value_attr = Attribute.new("value")
10
- name_attr.expressions << /.*/
11
- value_attr.expressions << /.*/
12
- @@basic_param_tag_rule = Tag.new("param")
13
- @@basic_param_tag_rule << name_attr
14
- @@basic_param_tag_rule << value_attr
15
- @@basic_param_tag_rule.action = Policy::ACTION_VALIDATE
16
- end
17
-
18
- # Create a scanner with a given policy
19
- def initialize(policy)
20
- @policy = policy
21
- @errors = []
22
- end
23
-
24
- # Scan the input using the provided input and output encoding
25
- # will raise an error if nil input or the maximum input size is exceeded
26
- def scan(input, input_encode, output_encoder)
27
- raise ArgumentError if input.nil?
28
- raise ScanError, "Max input Exceeded #{input.size} > #{@policy.max_input}" if input.size > @policy.max_input
29
- # check poilcy stuff
30
- handler = Handler.new(@policy,output_encoder)
31
- scanner = SaxFilter.new(@policy,handler,@@basic_param_tag_rule)
32
- parser = Nokogiri::HTML::SAX::Parser.new(scanner,input_encode)
33
- #parser.parse(input)
34
- parser.parse(input) do |ctx|
35
- ctx.replace_entities = true
36
- end
37
- results = ScanResults.new(Time.now)
38
- results.clean_html = handler.document
39
- results.messages = handler.errors
40
- results
41
- end
42
- end
43
- end
1
+ module AntiSamy
2
+ class Scanner
3
+ attr_accessor :policy, :errors, :nofollow, :pae
4
+ DEFAULT_ENCODE = "UTF-8"
5
+ ALLOW_EMPTY = %w[br hr a img link iframe script object applet frame base param meta input textarea embed basefont col]
6
+ # Setup a basic param tag rule
7
+ begin
8
+ name_attr = Attribute.new("name")
9
+ value_attr = Attribute.new("value")
10
+ name_attr.expressions << /.*/
11
+ value_attr.expressions << /.*/
12
+ @@basic_param_tag_rule = Tag.new("param")
13
+ @@basic_param_tag_rule << name_attr
14
+ @@basic_param_tag_rule << value_attr
15
+ @@basic_param_tag_rule.action = Policy::ACTION_VALIDATE
16
+ end
17
+
18
+ # Create a scanner with a given policy
19
+ def initialize(policy)
20
+ @policy = policy
21
+ @errors = []
22
+ end
23
+
24
+ # Scan the input using the provided input and output encoding
25
+ # will raise an error if nil input or the maximum input size is exceeded
26
+ def scan(input, input_encode, output_encoder)
27
+ raise ArgumentError if input.nil?
28
+ raise ScanError, "Max input Exceeded #{input.size} > #{@policy.max_input}" if input.size > @policy.max_input
29
+ fragment = true
30
+ if input =~ /\<\s?html\s?.*?\>|DOCTYPE/im
31
+ fragment = false
32
+ end
33
+ # check poilcy stuff
34
+ handler = Handler.new(@policy,output_encoder,fragment)
35
+ scanner = SaxFilter.new(@policy,handler,@@basic_param_tag_rule,fragment)
36
+ parser = Nokogiri::HTML::SAX::Parser.new(scanner,input_encode)
37
+ #parser.parse(input)
38
+ parser.parse(input) do |ctx|
39
+ ctx.replace_entities = true
40
+ end
41
+ results = ScanResults.new(Time.now)
42
+ results.clean_html = handler.document
43
+ results.messages = handler.errors
44
+ results
45
+ end
46
+ end
47
+ end
@@ -1,19 +1,19 @@
1
- module AntiSamy
2
- # A model for HTML attributes and the "rules" they must follow (either literals or regular expressions) in
3
- # order to be considered valid. This is a simple container class
4
- class Attribute
5
- attr_accessor :name, :description, :action, :values, :expressions
6
- ACTION_REMOVE_TAG = "removeTag"
7
- ACTION_FILTER_TAG = "filterTag"
8
- ACTION_ENCODE_TAG = "encodeTag"
9
- ACTION_REMOVE_ATTRIB = "removeAttribute"
10
- # Create a new attribute
11
- def initialize(name)
12
- @name = name
13
- @description = nil
14
- @action = nil
15
- @values = []
16
- @expressions = []
17
- end
18
- end
19
- end
1
+ module AntiSamy
2
+ # A model for HTML attributes and the "rules" they must follow (either literals or regular expressions) in
3
+ # order to be considered valid. This is a simple container class
4
+ class Attribute
5
+ attr_accessor :name, :description, :action, :values, :expressions
6
+ ACTION_REMOVE_TAG = "removeTag"
7
+ ACTION_FILTER_TAG = "filterTag"
8
+ ACTION_ENCODE_TAG = "encodeTag"
9
+ ACTION_REMOVE_ATTRIB = "removeAttribute"
10
+ # Create a new attribute
11
+ def initialize(name)
12
+ @name = name
13
+ @description = nil
14
+ @action = nil
15
+ @values = []
16
+ @expressions = []
17
+ end
18
+ end
19
+ end
@@ -1,39 +1,39 @@
1
- module AntiSamy
2
- # A model for CSS properties and the "rules" they must follow (either literals
3
- # or regular expressions) in order to be considered valid.
4
- class CssProperty
5
- attr_accessor :name, :description, :action, :values, :expressions, :refs, :catagories
6
-
7
- # Create a new property
8
- def initialize(name)
9
- @name = name
10
- @description = nil
11
- @values = []
12
- @expressions = []
13
- @refs = []
14
- @categories = []
15
- @action = nil
16
- end
17
-
18
- # Add a literal value to this property
19
- def add_value(value)
20
- @values << value
21
- end
22
-
23
- # Add a regular expression to this property
24
- def add_expression(exp)
25
- @expressions << exp
26
- end
27
-
28
- # Add a shorthand reference to this property
29
- def add_ref(ref)
30
- @refs << ref
31
- end
32
-
33
- # Add a category to this property
34
- def add_category(cat)
35
- @categories << cat
36
- end
37
-
38
- end
39
- end
1
+ module AntiSamy
2
+ # A model for CSS properties and the "rules" they must follow (either literals
3
+ # or regular expressions) in order to be considered valid.
4
+ class CssProperty
5
+ attr_accessor :name, :description, :action, :values, :expressions, :refs, :catagories
6
+
7
+ # Create a new property
8
+ def initialize(name)
9
+ @name = name
10
+ @description = nil
11
+ @values = []
12
+ @expressions = []
13
+ @refs = []
14
+ @categories = []
15
+ @action = nil
16
+ end
17
+
18
+ # Add a literal value to this property
19
+ def add_value(value)
20
+ @values << value
21
+ end
22
+
23
+ # Add a regular expression to this property
24
+ def add_expression(exp)
25
+ @expressions << exp
26
+ end
27
+
28
+ # Add a shorthand reference to this property
29
+ def add_ref(ref)
30
+ @refs << ref
31
+ end
32
+
33
+ # Add a category to this property
34
+ def add_category(cat)
35
+ @categories << cat
36
+ end
37
+
38
+ end
39
+ end
@@ -1,31 +1,31 @@
1
- module AntiSamy
2
- # A model for HTML "tags" and the rules dictating their validation/filtration. Also contains information
3
- # about their allowed attributes.
4
- class Tag
5
- # Name and Action fields. Actions determine what we do when we see this tag
6
- attr_accessor :name, :action
7
-
8
- # Create a new Tag object
9
- def initialize(name)
10
- @name = name
11
- @action = action
12
- @allowed_attributes = {}
13
- end
14
-
15
- # Add an attribute to this property
16
- def <<(attribute)
17
- @allowed_attributes[attribute.name.downcase] = attribute
18
- end
19
-
20
- # fetch the map of attributes
21
- def attributes
22
- @allowed_attributes
23
- end
24
-
25
- # Fetch a property by name form this tag
26
- def attribute(name)
27
- @allowed_attributes[name]
28
- end
29
-
30
- end
31
- end
1
+ module AntiSamy
2
+ # A model for HTML "tags" and the rules dictating their validation/filtration. Also contains information
3
+ # about their allowed attributes.
4
+ class Tag
5
+ # Name and Action fields. Actions determine what we do when we see this tag
6
+ attr_accessor :name, :action
7
+
8
+ # Create a new Tag object
9
+ def initialize(name)
10
+ @name = name
11
+ @action = action
12
+ @allowed_attributes = {}
13
+ end
14
+
15
+ # Add an attribute to this property
16
+ def <<(attribute)
17
+ @allowed_attributes[attribute.name.downcase] = attribute
18
+ end
19
+
20
+ # fetch the map of attributes
21
+ def attributes
22
+ @allowed_attributes
23
+ end
24
+
25
+ # Fetch a property by name form this tag
26
+ def attribute(name)
27
+ @allowed_attributes[name]
28
+ end
29
+
30
+ end
31
+ end
@@ -1,545 +1,577 @@
1
- require 'stringio'
2
-
3
- module AntiSamy
4
-
5
- # Schema validation Error
6
- class SchemaError < StandardError; end
7
- # Policy validation error
8
- class PolicyError < StandardError; end
9
-
10
- # Model for our policy engine.
11
- # the XSD for AntiSammy is stored in this file after the END section
12
- class Policy
13
- attr_accessor :max_input
14
- # We allow these tags to be empty
15
- ALLOWED_EMPTY = ["br", "hr", "a", "img", "link", "iframe", "script", "object", "applet", "frame", "base", "param", "meta", "input", "textarea", "embed", "basefont", "col"]
16
- # *Actions*
17
- ACTION_FILTER = "filter"
18
- ACTION_TRUNCATE = "truncate"
19
- ACTION_VALIDATE = "validate"
20
- ACTION_REMOVE = "remove"
21
- ACTION_ENCODE = "encode"
22
- # Anything regular express
23
- ANYTHING_REGEX = /.*/
24
- # AntiSammy XSD constants
25
- DEFAULT_ONINVALID = "removeAttribute"
26
- # Directive Name Constants
27
- OMIT_XML_DECL = "omitXmlDeclaration"
28
- OMIT_DOC_TYPE = "omitDoctypeDeclaration"
29
- MAX_INPUT = "maxInputSize"
30
- USE_XHTML = "userXHTML"
31
- FORMAT_OUTPUT = "formatOutput"
32
- # will we allow embedded style sheets
33
- EMBED_STYLESHEETS = "embedStyleSheets"
34
- # Connection timeout in miliseconds
35
- CONN_TIMEOUT = "conenctionTimeout"
36
- ANCHROS_NOFOLLOW = "nofollowAnchors"
37
- VALIDATE_P_AS_E = "validateParamAsEmbed"
38
- PRESERVE_SPACE = "preserveSpace"
39
- PRESERVE_COMMENTS = "preserveComments"
40
- ON_UNKNOWN_TAG = "onUnknownTag"
41
- MAX_SHEETS = "maxStyleSheetImports"
42
-
43
- # Class method to fetch the schema
44
- def self.schema
45
- data = StringIO.new
46
- File.open(__FILE__) do |f|
47
- begin
48
- line = f.gets
49
- end until line.match(/^__END__$/)
50
- while line = f.gets
51
- data << line
52
- end
53
- end
54
- data.rewind
55
- data.read
56
- end
57
-
58
- # Create a policy object.
59
- # You can pass in either:
60
- # * File path
61
- # * IO object
62
- # * String containing the policy XML
63
- # All policies will be validated against the builtin schema file and will raise
64
- # an Error if the policy doesnt conform to the schema
65
- def initialize(string_or_io)
66
- schema = Nokogiri::XML.Schema(Policy.schema)
67
- if string_or_io.respond_to?(:read)
68
- uri = string_or_io.read
69
- else
70
- if File.exists?(string_or_io)
71
- uri = IO.read(string_or_io)
72
- else
73
- uri = string_or_io
74
- end
75
- end
76
- doc = Nokogiri::XML.parse(uri)
77
- # We now have the Poolicy XML data lets parse it
78
- errors = schema.validate(doc)
79
- raise SchemaError, errors.join(",") if errors.size > 0
80
- @common_regex = {}
81
- @common_attrib = {}
82
- @tag_rules = {}
83
- @css_rules = {}
84
- @directives = Hash.new(false)
85
- @global_attrib = {}
86
- @encode_tags = []
87
- parse(doc)
88
- end
89
-
90
- # Get a particular directive
91
- def directive(name)
92
- @directives[name]
93
- end
94
-
95
- # Set a directive for the policy
96
- def []=(name,value)
97
- @directives[name] = value
98
- end
99
-
100
- # Get a global attribute
101
- def global(name)
102
- @global_attrib[name.downcase]
103
- end
104
-
105
- # Is the tag in the encode list
106
- def encode?(tag)
107
- @encode_tags.include?(tag)
108
- end
109
-
110
- # Return the tag rules
111
- def tags
112
- @tag_rules
113
- end
114
-
115
- # get a specific tag
116
- def tag(name)
117
- @tag_rules[name.downcase]
118
- end
119
-
120
- # return the css rules
121
- def properties
122
- @css_rules
123
- end
124
-
125
- # get a specific css rule
126
- def property(prop)
127
- @css_rules[prop.downcase]
128
- end
129
-
130
- # Get the list of attributes
131
- def attributes
132
- @common_attrib
133
- end
134
-
135
- # Get a specific attribute
136
- def attribute(name)
137
- @common_attrib[name.downcase]
138
- end
139
-
140
- # Get the list of expressions
141
- def expressions
142
- @common_regex
143
- end
144
-
145
- # Get a specific expression
146
- def expression(name)
147
- @common_regex[name]
148
- end
149
-
150
- private
151
- def make_re(p,context) #:nodoc:
152
- output = StringIO.open('','w')
153
- $stderr = output
154
- begin
155
- r = /#{p}/
156
- warning = output.string
157
- raise PolicyError, "context=#{context}, error=#{$1}, re=#{p}",caller(2) if warning =~ /warning: (.*)$/
158
- return r
159
- rescue RegexpError => e
160
- raise PolicyError, "context=#{context}, error=#{e.message} re=#{p}", caller(2)
161
- ensure
162
- $stderr = STDERR
163
- end
164
- end
165
-
166
- # Parse the Policy file
167
- def parse(node) # :nodoc:
168
- if node.children.nil? or node.children.last.nil?
169
- return
170
- end
171
- node.children.last.children.each do |section|
172
- if section.name.eql?("directives")
173
- process_directves(section)
174
- elsif section.name.eql?("common-regexps")
175
- process_common_regexps(section)
176
- elsif section.name.eql?("common-attributes")
177
- process_common_attributes(section)
178
- elsif section.name.eql?("global-tag-attributes")
179
- process_global_attributes(section)
180
- elsif section.name.eql?("tags-to-encode")
181
- process_tag_to_encode(section)
182
- elsif section.name.eql?("tag-rules")
183
- process_tag_rules(section)
184
- elsif section.name.eql?("css-rules")
185
- process_css_rules(section)
186
- end
187
- end
188
- end
189
-
190
- # process the directives section
191
- def process_directves(section) # :nodoc:
192
- # skip if we had no section
193
- return if section.element_children.nil?
194
- # process the rules
195
- section.element_children.each do |dir|
196
- name = dir["name"]
197
- value = dir["value"]
198
- if name.eql?("maxInputSize")
199
- @max_input = value.to_i
200
- else
201
- if name.eql?("connectionTimeout") or name.eql?("maxStyleSheetImports")
202
- value = value.to_i
203
- elsif value =~ /true/i
204
- value = true
205
- else
206
- value = false
207
- end
208
- @directives[name] = value
209
- end
210
- end
211
- end
212
-
213
- # process the <common-regexp> section
214
- def process_common_regexps(section) # :nodoc:
215
- # skip if we had no section
216
- return if section.element_children.nil?
217
- section.element_children.each do |re|
218
- @common_regex[re["name"]] = make_re(re["value"],"common-regex(#{re['name']})")
219
- end
220
- end
221
-
222
- # Helper method to process a literal and regex section
223
- def process_attr_lists(att,node,exception) # :nodoc:
224
- node.element_children.each do |el|
225
- if el.name.eql?("regexp-list")
226
- if el.element_children
227
- el.element_children.each do |re|
228
- v = re["value"]
229
- n = re["name"]
230
- if n and !n.empty?
231
- if @common_regex[n].nil?
232
- raise PolicyError, "regex #{n} in #{exception} but wasnt found in <common-regex>"
233
- else
234
- att.expressions << expression(n)
235
- end
236
- else
237
- att.expressions << make_re(v,exception)
238
- end
239
- end
240
- end
241
- elsif el.name.eql?("literal-list")
242
- if el.element_children
243
- el.element_children.each do |re|
244
- v = re["value"]
245
- if v and !v.empty?
246
- att.values << v
247
- else
248
- if re.child and re.child.text?
249
- att.values << re.child.content
250
- end
251
- end
252
- end
253
- end
254
- end
255
- end
256
- end
257
-
258
- # Process the <common-attributes> section
259
- def process_common_attributes(section) # :nodoc:
260
- # skip if we had no section
261
- return if section.element_children.nil?
262
- section.element_children.each do |val|
263
- invalid = val["onInvalid"]
264
- name = val["name"]
265
- desc = val["description"]
266
- att = Attribute.new(name)
267
- att.description = desc
268
- att.action = (invalid.nil? or invalid.empty?) ? DEFAULT_ONINVALID : invalid
269
- return if val.element_children.nil?
270
- process_attr_lists(att,val,"common-attribute(#{name})")
271
- @common_attrib[name.downcase] = att
272
- end
273
- end
274
-
275
- # Process the <global-attributes> section
276
- def process_global_attributes(section) # :nodoc:
277
- # skip if we had no section
278
- return if section.element_children.nil?
279
- section.element_children.each do |ga|
280
- name = ga["name"]
281
- att = @common_attrib[name]
282
- raise PolicyError, "global attribute #{name} was not defined in <common-attributes>" if att.nil?
283
- @global_attrib[name.downcase] = att
284
- end
285
- end
286
-
287
- # process the <tag-to-encode> section
288
- def process_tag_to_encode(section) # :nodoc:
289
- # skip if we had no section
290
- return if section.element_children.nil?
291
- section.element_children.each do |tag|
292
- if tag.child and tag.child.text?
293
- @encode_tags << tag.child.content.downcase
294
- end
295
- end
296
- end
297
-
298
- # Process the <tag-ruls> section
299
- def process_tag_rules(section) # :nodoc:
300
- return if section.element_children.nil?
301
- section.element_children.each do |tx|
302
- name = tx["name"]
303
- action = tx["action"]
304
- t = Tag.new(name)
305
- t.action = action
306
- # Add attributes
307
- if tx.element_children
308
- tx.element_children.each do |tc|
309
- catt = @common_attrib[tc["name"]]
310
- if catt # common attrib with value override
311
- act = tc["onInvalid"]
312
- dec = tc["description"]
313
- ncatt = catt.dup
314
- ncatt.action = act unless act.nil? or act.empty?
315
- ncatt.description = dec unless dec.nil? or dec.empty?
316
- t<< ncatt
317
- else
318
- att = Attribute.new(tc["name"])
319
- att.action = tc["onInvalid"]
320
- att.description = tc["description"]
321
- process_attr_lists(att,tc," tag-rules(#{name})")
322
- t<< att
323
- end
324
- end
325
- end
326
- # End add attributes
327
- @tag_rules[name.downcase] = t
328
- end
329
- end
330
-
331
- # Process the <css-rules> section
332
- def process_css_rules(section) # :nodoc:
333
- return if section.element_children.nil?
334
- section.element_children.each do |css|
335
- name = css["name"]
336
- desc = css["description"]
337
- action = css["onInvalid"]
338
- if action.nil? or action.empty?
339
- action = DEFAULT_ONINVALID
340
- end
341
- prop = CssProperty.new(name)
342
- prop.action = action
343
- prop.description = desc
344
- # Process regex, listerals and shorthands
345
- if css.element_children
346
- css.element_children.each do |child|
347
- empty = child.element_children.nil?
348
- # Regex
349
- if child.name.eql?("regexp-list")
350
- unless empty
351
- child.element_children.each do |re|
352
- re_name = re["name"]
353
- re_value = re["value"]
354
- gre = expression(re_name)
355
- if gre
356
- prop.add_expression(gre)
357
- elsif re_value and !re_value.empty?
358
- prop.add_expression(make_re(re_value,"css-rule(#{name})"))
359
- else
360
- raise PolicyError, "#{re_name} was referenced in CSS rule #{name} but wasnt found in <common-regexp>"
361
- end
362
- end
363
- end
364
- elsif child.name.eql?("literal-list") # literals
365
- unless empty
366
- child.element_children.each do |li|
367
- prop.add_value(li["value"]) if li["value"]
368
- end
369
- end
370
- elsif child.name.eql?("category-list") # literals
371
- unless empty
372
- child.element_children.each do |li|
373
- prop.add_category(li["value"]) if li["value"]
374
- end
375
- end
376
-
377
- elsif child.name.eql?("shorthand-list") # refs
378
- unless empty
379
- child.element_children.each do |sl|
380
- prop.add_ref(sl["name"]) if sl["name"]
381
- end
382
- end
383
- end
384
- end
385
- end
386
- @css_rules[name.downcase] = prop
387
- end
388
- end
389
- end
390
- end
391
-
392
-
393
- __END__
394
- <?xml version="1.0" encoding="UTF-8"?>
395
- <xsd:schema
396
- xmlns:xsd="http://www.w3.org/2001/XMLSchema">
397
- <xsd:element name="anti-samy-rules">
398
- <xsd:complexType>
399
- <xsd:sequence>
400
- <xsd:element name="directives" type="Directives" maxOccurs="1" minOccurs="1"/>
401
- <xsd:element name="common-regexps" type="CommonRegexps" maxOccurs="1" minOccurs="1"/>
402
- <xsd:element name="common-attributes" type="AttributeList" maxOccurs="1" minOccurs="1"/>
403
- <xsd:element name="global-tag-attributes" type="AttributeList" maxOccurs="1" minOccurs="1"/>
404
- <xsd:element name="tags-to-encode" type="TagsToEncodeList" minOccurs="0" maxOccurs="1"/>
405
- <xsd:element name="tag-rules" type="TagRules" minOccurs="1" maxOccurs="1"/>
406
- <xsd:element name="css-rules" type="CSSRules" minOccurs="1" maxOccurs="1"/>
407
- </xsd:sequence>
408
- </xsd:complexType>
409
- </xsd:element>
410
- <xsd:complexType name="Directives">
411
- <xsd:sequence maxOccurs="unbounded">
412
- <xsd:element name="directive" type="Directive" minOccurs="0"/>
413
- </xsd:sequence>
414
- </xsd:complexType>
415
- <xsd:complexType name="Directive">
416
- <xsd:attribute name="name" use="required">
417
- <xsd:simpleType>
418
- <xsd:restriction base="xsd:string">
419
- <xsd:enumeration value="omitXmlDeclaration"/>
420
- <xsd:enumeration value="omitDoctypeDeclaration"/>
421
- <xsd:enumeration value="maxInputSize"/>
422
- <xsd:enumeration value="useXHTML"/>
423
- <xsd:enumeration value="embedStyleSheets"/>
424
- <xsd:enumeration value="maxStyleSheetImports"/>
425
- <xsd:enumeration value="connectionTimeout"/>
426
- <xsd:enumeration value="nofollowAnchors"/>
427
- <xsd:enumeration value="validateParamAsEmbed"/>
428
- <xsd:enumeration value="preserveComments"/>
429
- <xsd:enumeration value="preserveSpace"/>
430
- <xsd:enumeration value="onUnknownTag"/>
431
- <xsd:enumeration value="formatOutput"/>
432
- </xsd:restriction>
433
- </xsd:simpleType>
434
- </xsd:attribute>
435
- <xsd:attribute name="value" use="required"/>
436
- </xsd:complexType>
437
- <xsd:complexType name="CommonRegexps">
438
- <xsd:sequence maxOccurs="unbounded">
439
- <xsd:element name="regexp" type="RegExp" minOccurs="0"/>
440
- </xsd:sequence>
441
- </xsd:complexType>
442
- <xsd:complexType name="AttributeList">
443
- <xsd:sequence maxOccurs="unbounded">
444
- <xsd:element name="attribute" type="Attribute" minOccurs="0"/>
445
- </xsd:sequence>
446
- </xsd:complexType>
447
- <xsd:complexType name="TagsToEncodeList">
448
- <xsd:sequence maxOccurs="unbounded">
449
- <xsd:element name="tag" minOccurs="0"/>
450
- </xsd:sequence>
451
- </xsd:complexType>
452
- <xsd:complexType name="TagRules">
453
- <xsd:sequence maxOccurs="unbounded">
454
- <xsd:element name="tag" type="Tag" minOccurs="0"/>
455
- </xsd:sequence>
456
- </xsd:complexType>
457
- <xsd:complexType name="Tag">
458
- <xsd:sequence maxOccurs="unbounded">
459
- <xsd:element name="attribute" type="Attribute" minOccurs="0" />
460
- </xsd:sequence>
461
- <xsd:attribute name="name" use="required"/>
462
- <xsd:attribute name="action" use="required">
463
- <xsd:simpleType>
464
- <xsd:restriction base="xsd:string">
465
- <xsd:enumeration value="validate"/>
466
- <xsd:enumeration value="truncate"/>
467
- <xsd:enumeration value="remove"/>
468
- <xsd:enumeration value="filter"/>
469
- <xsd:enumeration value="encode"/>
470
- </xsd:restriction>
471
- </xsd:simpleType>
472
- </xsd:attribute>
473
- </xsd:complexType>
474
- <xsd:complexType name="Attribute">
475
- <xsd:sequence>
476
- <xsd:element name="regexp-list" type="RegexpList" minOccurs="0"/>
477
- <xsd:element name="literal-list" type="LiteralList" minOccurs="0"/>
478
- </xsd:sequence>
479
- <xsd:attribute name="name" use="required"/>
480
- <xsd:attribute name="description"/>
481
- <xsd:attribute name="onInvalid">
482
- <xsd:simpleType>
483
- <xsd:restriction base="xsd:string">
484
- <xsd:enumeration value="removeTag"/>
485
- <xsd:enumeration value="filterTag"/>
486
- <xsd:enumeration value="encodeTag"/>
487
- <xsd:enumeration value="removeAttribute"/>
488
- </xsd:restriction>
489
- </xsd:simpleType>
490
- </xsd:attribute>
491
- </xsd:complexType>
492
- <xsd:complexType name="RegexpList">
493
- <xsd:sequence maxOccurs="unbounded">
494
- <xsd:element name="regexp" type="RegExp" minOccurs="0"/>
495
- </xsd:sequence>
496
- </xsd:complexType>
497
- <xsd:complexType name="RegExp">
498
- <xsd:attribute name="name" type="xsd:string"/>
499
- <xsd:attribute name="value" type="xsd:string"/>
500
- </xsd:complexType>
501
- <xsd:complexType name="LiteralList">
502
- <xsd:sequence maxOccurs="unbounded">
503
- <xsd:element name="literal" type="Literal" minOccurs="0"/>
504
- </xsd:sequence>
505
- </xsd:complexType>
506
- <xsd:complexType name="Literal">
507
- <xsd:attribute name="value" type="xsd:string"/>
508
- </xsd:complexType>
509
- <xsd:complexType name="CSSRules">
510
- <xsd:sequence maxOccurs="unbounded">
511
- <xsd:element name="property" type="Property" minOccurs="0"/>
512
- </xsd:sequence>
513
- </xsd:complexType>
514
- <xsd:complexType name="Property">
515
- <xsd:sequence>
516
- <xsd:element name="category-list" type="CategoryList" minOccurs="0"/>
517
- <xsd:element name="literal-list" type="LiteralList" minOccurs="0"/>
518
- <xsd:element name="regexp-list" type="RegexpList" minOccurs="0"/>
519
- <xsd:element name="shorthand-list" type="ShorthandList" minOccurs="0"/>
520
- </xsd:sequence>
521
- <xsd:attribute name="name" type="xsd:string" use="required"/>
522
- <xsd:attribute name="default" type="xsd:string"/>
523
- <xsd:attribute name="description" type="xsd:string"/>
524
- </xsd:complexType>
525
- <xsd:complexType name="ShorthandList">
526
- <xsd:sequence maxOccurs="unbounded">
527
- <xsd:element name="shorthand" type="Shorthand" minOccurs="0"/>
528
- </xsd:sequence>
529
- </xsd:complexType>
530
- <xsd:complexType name="Shorthand">
531
- <xsd:attribute name="name" type="xsd:string" use="required"/>
532
- </xsd:complexType>
533
- <xsd:complexType name="CategoryList">
534
- <xsd:sequence maxOccurs="unbounded">
535
- <xsd:element name="category" type="Category" minOccurs="0"/>
536
- </xsd:sequence>
537
- </xsd:complexType>
538
- <xsd:complexType name="Category">
539
- <xsd:attribute name="value" type="xsd:string" use="required"/>
540
- </xsd:complexType>
541
- <xsd:complexType name="Entity">
542
- <xsd:attribute name="name" type="xsd:string" use="required"/>
543
- <xsd:attribute name="cdata" type="xsd:string" use="required"/>
544
- </xsd:complexType>
545
- </xsd:schema>
1
+ require 'stringio'
2
+
3
+ module AntiSamy
4
+
5
+ # Schema validation Error
6
+ class SchemaError < StandardError; end
7
+ # Policy validation error
8
+ class PolicyError < StandardError; end
9
+
10
+ # Model for our policy engine.
11
+ # the XSD for AntiSammy is stored in this file after the END section
12
+ class Policy
13
+ attr_accessor :max_input
14
+ # We allow these tags to be empty
15
+ ALLOWED_EMPTY = ["br", "hr", "a", "img", "link", "iframe", "script", "object", "applet", "frame", "base", "param", "meta", "input", "textarea", "embed", "basefont", "col"]
16
+ # *Actions*
17
+ ACTION_FILTER = "filter"
18
+ ACTION_TRUNCATE = "truncate"
19
+ ACTION_VALIDATE = "validate"
20
+ ACTION_REMOVE = "remove"
21
+ ACTION_ENCODE = "encode"
22
+ # Anything regular express
23
+ ANYTHING_REGEX = /.*/
24
+ # AntiSammy XSD constants
25
+ DEFAULT_ONINVALID = "removeAttribute"
26
+ # Directive Name Constants
27
+ OMIT_XML_DECL = "omitXmlDeclaration"
28
+ OMIT_DOC_TYPE = "omitDoctypeDeclaration"
29
+ MAX_INPUT = "maxInputSize"
30
+ USE_XHTML = "userXHTML"
31
+ FORMAT_OUTPUT = "formatOutput"
32
+ # will we allow embedded style sheets
33
+ EMBED_STYLESHEETS = "embedStyleSheets"
34
+ # Connection timeout in miliseconds
35
+ CONN_TIMEOUT = "conenctionTimeout"
36
+ ANCHORS_NOFOLLOW = "nofollowAnchors"
37
+ VALIDATE_P_AS_E = "validateParamAsEmbed"
38
+ PRESERVE_SPACE = "preserveSpace"
39
+ PRESERVE_COMMENTS = "preserveComments"
40
+ ON_UNKNOWN_TAG = "onUnknownTag"
41
+ MAX_SHEETS = "maxStyleSheetImports"
42
+
43
+ # Class method to fetch the schema
44
+ def self.schema
45
+ data = StringIO.new
46
+ File.open(__FILE__) do |f|
47
+ begin
48
+ line = f.gets
49
+ end until line.match(/^__END__$/)
50
+ while line = f.gets
51
+ data << line
52
+ end
53
+ end
54
+ data.rewind
55
+ data.read
56
+ end
57
+
58
+ # Create a policy object.
59
+ # You can pass in either:
60
+ # * File path
61
+ # * IO object
62
+ # * String containing the policy XML
63
+ # All policies will be validated against the builtin schema file and will raise
64
+ # an Error if the policy doesnt conform to the schema
65
+ def initialize(string_or_io)
66
+ schema = Nokogiri::XML.Schema(Policy.schema)
67
+ if string_or_io.respond_to?(:read)
68
+ uri = string_or_io.read
69
+ else
70
+ if File.exists?(string_or_io)
71
+ uri = IO.read(string_or_io)
72
+ else
73
+ uri = string_or_io
74
+ end
75
+ end
76
+ doc = Nokogiri::XML.parse(uri)
77
+ # We now have the Poolicy XML data lets parse it
78
+ errors = schema.validate(doc)
79
+ raise SchemaError, errors.join(",") if errors.size > 0
80
+ @common_regex = {}
81
+ @common_attrib = {}
82
+ @tag_rules = {}
83
+ @css_rules = {}
84
+ @directives = Hash.new(false)
85
+ @global_attrib = {}
86
+ @encode_tags = []
87
+ @allowed_empty = []
88
+ @allowed_empty << ALLOWED_EMPTY
89
+ @allowed_empty.flatten!
90
+ parse(doc)
91
+ end
92
+
93
+ # Get a particular directive
94
+ def directive(name)
95
+ @directives[name]
96
+ end
97
+
98
+ # Set a directive for the policy
99
+ def []=(name,value)
100
+ @directives[name] = value
101
+ end
102
+
103
+ # Get a global attribute
104
+ def global(name)
105
+ @global_attrib[name.downcase]
106
+ end
107
+
108
+ # Is the tag in the encode list
109
+ def encode?(tag)
110
+ @encode_tags.include?(tag)
111
+ end
112
+
113
+ # Return the tag rules
114
+ def tags
115
+ @tag_rules
116
+ end
117
+
118
+ # get a specific tag
119
+ def tag(name)
120
+ @tag_rules[name.downcase]
121
+ end
122
+
123
+ # return the css rules
124
+ def properties
125
+ @css_rules
126
+ end
127
+
128
+ # get a specific css rule
129
+ def property(prop)
130
+ @css_rules[prop.downcase]
131
+ end
132
+
133
+ # Get the list of attributes
134
+ def attributes
135
+ @common_attrib
136
+ end
137
+
138
+ # Get a specific attribute
139
+ def attribute(name)
140
+ @common_attrib[name.downcase]
141
+ end
142
+
143
+ # Get the list of expressions
144
+ def expressions
145
+ @common_regex
146
+ end
147
+
148
+ # Get a specific expression
149
+ def expression(name)
150
+ @common_regex[name]
151
+ end
152
+
153
+ def allow_empty?(name)
154
+ @allowed_empty.include?(name.downcase)
155
+ end
156
+
157
+ private
158
+ def make_re(p,context) #:nodoc:
159
+ output = StringIO.open('','w')
160
+ $stderr = output
161
+ begin
162
+ r = /#{p}/
163
+ warning = output.string
164
+ raise PolicyError, "context=#{context}, error=#{$1}, re=#{p}",caller(2) if warning =~ /warning: (.*)$/
165
+ return r
166
+ rescue RegexpError => e
167
+ raise PolicyError, "context=#{context}, error=#{e.message} re=#{p}", caller(2)
168
+ ensure
169
+ $stderr = STDERR
170
+ end
171
+ end
172
+
173
+ # Parse the Policy file
174
+ def parse(node) # :nodoc:
175
+ if node.children.nil? or node.children.last.nil?
176
+ return
177
+ end
178
+ node.children.last.children.each do |section|
179
+ if section.name.eql?("directives")
180
+ process_directves(section)
181
+ elsif section.name.eql?("common-regexps")
182
+ process_common_regexps(section)
183
+ elsif section.name.eql?("common-attributes")
184
+ process_common_attributes(section)
185
+ elsif section.name.eql?("global-tag-attributes")
186
+ process_global_attributes(section)
187
+ elsif section.name.eql?("tags-to-encode")
188
+ process_tag_to_encode(section)
189
+ elsif section.name.eql?("tag-rules")
190
+ process_tag_rules(section)
191
+ elsif section.name.eql?("css-rules")
192
+ process_css_rules(section)
193
+ elsif section.name.eql?("allowed-empty-tags")
194
+ process_empty_tags(section)
195
+ end
196
+ end
197
+ end
198
+
199
+ def process_empty_tags(section)# :nodoc:
200
+ # skip if we had no section
201
+ return if section.element_children.nil?
202
+ section.element_children.each do |dir|
203
+ if dir.name.eql?("literal-list")
204
+ if dir.element_children
205
+ dir.element_children.each do |child|
206
+ tag = child["value"]
207
+ if tag and !tag.empty?
208
+ @allowed_empty << tag.downcase
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end
214
+ end
215
+ # process the directives section
216
+ def process_directves(section) # :nodoc:
217
+ # skip if we had no section
218
+ return if section.element_children.nil?
219
+ # process the rules
220
+ section.element_children.each do |dir|
221
+ name = dir["name"]
222
+ value = dir["value"]
223
+ if name.eql?("maxInputSize")
224
+ @max_input = value.to_i
225
+ else
226
+ if name.eql?("connectionTimeout") or name.eql?("maxStyleSheetImports")
227
+ value = value.to_i
228
+ elsif value =~ /true/i
229
+ value = true
230
+ else
231
+ value = false
232
+ end
233
+ @directives[name] = value
234
+ end
235
+ end
236
+ end
237
+
238
+ # process the <common-regexp> section
239
+ def process_common_regexps(section) # :nodoc:
240
+ # skip if we had no section
241
+ return if section.element_children.nil?
242
+ section.element_children.each do |re|
243
+ @common_regex[re["name"]] = make_re(re["value"],"common-regex(#{re['name']})")
244
+ end
245
+ end
246
+
247
+ # Helper method to process a literal and regex section
248
+ def process_attr_lists(att,node,exception) # :nodoc:
249
+ node.element_children.each do |el|
250
+ if el.name.eql?("regexp-list")
251
+ if el.element_children
252
+ el.element_children.each do |re|
253
+ v = re["value"]
254
+ n = re["name"]
255
+ if n and !n.empty?
256
+ if @common_regex[n].nil?
257
+ raise PolicyError, "regex #{n} in #{exception} but wasnt found in <common-regex>"
258
+ else
259
+ att.expressions << expression(n)
260
+ end
261
+ else
262
+ att.expressions << make_re(v,exception)
263
+ end
264
+ end
265
+ end
266
+ elsif el.name.eql?("literal-list")
267
+ if el.element_children
268
+ el.element_children.each do |re|
269
+ v = re["value"]
270
+ if v and !v.empty?
271
+ att.values << v
272
+ else
273
+ if re.child and re.child.text?
274
+ att.values << re.child.content
275
+ end
276
+ end
277
+ end
278
+ end
279
+ end
280
+ end
281
+ end
282
+
283
+ # Process the <common-attributes> section
284
+ def process_common_attributes(section) # :nodoc:
285
+ # skip if we had no section
286
+ return if section.element_children.nil?
287
+ section.element_children.each do |val|
288
+ invalid = val["onInvalid"]
289
+ name = val["name"]
290
+ desc = val["description"]
291
+ att = Attribute.new(name)
292
+ att.description = desc
293
+ att.action = (invalid.nil? or invalid.empty?) ? DEFAULT_ONINVALID : invalid
294
+ return if val.element_children.nil?
295
+ process_attr_lists(att,val,"common-attribute(#{name})")
296
+ @common_attrib[name.downcase] = att
297
+ end
298
+ end
299
+
300
+ # Process the <global-attributes> section
301
+ def process_global_attributes(section) # :nodoc:
302
+ # skip if we had no section
303
+ return if section.element_children.nil?
304
+ section.element_children.each do |ga|
305
+ name = ga["name"]
306
+ att = @common_attrib[name]
307
+ raise PolicyError, "global attribute #{name} was not defined in <common-attributes>" if att.nil?
308
+ @global_attrib[name.downcase] = att
309
+ end
310
+ end
311
+
312
+ # process the <tag-to-encode> section
313
+ def process_tag_to_encode(section) # :nodoc:
314
+ # skip if we had no section
315
+ return if section.element_children.nil?
316
+ section.element_children.each do |tag|
317
+ if tag.child and tag.child.text?
318
+ @encode_tags << tag.child.content.downcase
319
+ end
320
+ end
321
+ end
322
+
323
+ # Process the <tag-ruls> section
324
+ def process_tag_rules(section) # :nodoc:
325
+ return if section.element_children.nil?
326
+ section.element_children.each do |tx|
327
+ name = tx["name"]
328
+ action = tx["action"]
329
+ t = Tag.new(name)
330
+ t.action = action
331
+ # Add attributes
332
+ if tx.element_children
333
+ tx.element_children.each do |tc|
334
+ catt = @common_attrib[tc["name"]]
335
+ if catt # common attrib with value override
336
+ act = tc["onInvalid"]
337
+ dec = tc["description"]
338
+ ncatt = catt.dup
339
+ ncatt.action = act unless act.nil? or act.empty?
340
+ ncatt.description = dec unless dec.nil? or dec.empty?
341
+ t<< ncatt
342
+ else
343
+ att = Attribute.new(tc["name"])
344
+ att.action = tc["onInvalid"]
345
+ att.description = tc["description"]
346
+ process_attr_lists(att,tc," tag-rules(#{name})")
347
+ t<< att
348
+ end
349
+ end
350
+ end
351
+ # End add attributes
352
+ @tag_rules[name.downcase] = t
353
+ end
354
+ end
355
+
356
+ # Process the <css-rules> section
357
+ def process_css_rules(section) # :nodoc:
358
+ return if section.element_children.nil?
359
+ section.element_children.each do |css|
360
+ name = css["name"]
361
+ desc = css["description"]
362
+ action = css["onInvalid"]
363
+ if action.nil? or action.empty?
364
+ action = DEFAULT_ONINVALID
365
+ end
366
+ prop = CssProperty.new(name)
367
+ prop.action = action
368
+ prop.description = desc
369
+ # Process regex, listerals and shorthands
370
+ if css.element_children
371
+ css.element_children.each do |child|
372
+ empty = child.element_children.nil?
373
+ # Regex
374
+ if child.name.eql?("regexp-list")
375
+ unless empty
376
+ child.element_children.each do |re|
377
+ re_name = re["name"]
378
+ re_value = re["value"]
379
+ gre = expression(re_name)
380
+ if gre
381
+ prop.add_expression(gre)
382
+ elsif re_value and !re_value.empty?
383
+ prop.add_expression(make_re(re_value,"css-rule(#{name})"))
384
+ else
385
+ raise PolicyError, "#{re_name} was referenced in CSS rule #{name} but wasnt found in <common-regexp>"
386
+ end
387
+ end
388
+ end
389
+ elsif child.name.eql?("literal-list") # literals
390
+ unless empty
391
+ child.element_children.each do |li|
392
+ prop.add_value(li["value"]) if li["value"]
393
+ end
394
+ end
395
+ elsif child.name.eql?("category-list") # literals
396
+ unless empty
397
+ child.element_children.each do |li|
398
+ prop.add_category(li["value"]) if li["value"]
399
+ end
400
+ end
401
+
402
+ elsif child.name.eql?("shorthand-list") # refs
403
+ unless empty
404
+ child.element_children.each do |sl|
405
+ prop.add_ref(sl["name"]) if sl["name"]
406
+ end
407
+ end
408
+ end
409
+ end
410
+ end
411
+ @css_rules[name.downcase] = prop
412
+ end
413
+ end
414
+ end
415
+ end
416
+
417
+
418
+ __END__
419
+ <?xml version="1.0" encoding="UTF-8"?>
420
+ <xsd:schema
421
+ xmlns:xsd="http://www.w3.org/2001/XMLSchema">
422
+ <xsd:element name="anti-samy-rules">
423
+ <xsd:complexType>
424
+ <xsd:sequence>
425
+ <xsd:element name="directives" type="Directives" maxOccurs="1" minOccurs="1"/>
426
+ <xsd:element name="common-regexps" type="CommonRegexps" maxOccurs="1" minOccurs="1"/>
427
+ <xsd:element name="common-attributes" type="AttributeList" maxOccurs="1" minOccurs="1"/>
428
+ <xsd:element name="global-tag-attributes" type="AttributeList" maxOccurs="1" minOccurs="1"/>
429
+ <xsd:element name="tags-to-encode" type="TagsToEncodeList" minOccurs="0" maxOccurs="1"/>
430
+ <xsd:element name="tag-rules" type="TagRules" minOccurs="1" maxOccurs="1"/>
431
+ <xsd:element name="css-rules" type="CSSRules" minOccurs="1" maxOccurs="1"/>
432
+ <xsd:element name="allowed-empty-tags" type="AllowedEmptyTags" minOccurs="0" maxOccurs="1"/>
433
+ </xsd:sequence>
434
+ </xsd:complexType>
435
+ </xsd:element>
436
+ <xsd:complexType name="Directives">
437
+ <xsd:sequence maxOccurs="unbounded">
438
+ <xsd:element name="directive" type="Directive" minOccurs="0"/>
439
+ </xsd:sequence>
440
+ </xsd:complexType>
441
+ <xsd:complexType name="Directive">
442
+ <xsd:attribute name="name" use="required">
443
+ <xsd:simpleType>
444
+ <xsd:restriction base="xsd:string">
445
+ <xsd:enumeration value="omitXmlDeclaration"/>
446
+ <xsd:enumeration value="omitDoctypeDeclaration"/>
447
+ <xsd:enumeration value="maxInputSize"/>
448
+ <xsd:enumeration value="useXHTML"/>
449
+ <xsd:enumeration value="embedStyleSheets"/>
450
+ <xsd:enumeration value="maxStyleSheetImports"/>
451
+ <xsd:enumeration value="connectionTimeout"/>
452
+ <xsd:enumeration value="nofollowAnchors"/>
453
+ <xsd:enumeration value="validateParamAsEmbed"/>
454
+ <xsd:enumeration value="preserveComments"/>
455
+ <xsd:enumeration value="preserveSpace"/>
456
+ <xsd:enumeration value="onUnknownTag"/>
457
+ <xsd:enumeration value="formatOutput"/>
458
+ </xsd:restriction>
459
+ </xsd:simpleType>
460
+ </xsd:attribute>
461
+ <xsd:attribute name="value" use="required"/>
462
+ </xsd:complexType>
463
+ <xsd:complexType name="CommonRegexps">
464
+ <xsd:sequence maxOccurs="unbounded">
465
+ <xsd:element name="regexp" type="RegExp" minOccurs="0"/>
466
+ </xsd:sequence>
467
+ </xsd:complexType>
468
+ <xsd:complexType name="AttributeList">
469
+ <xsd:sequence maxOccurs="unbounded">
470
+ <xsd:element name="attribute" type="Attribute" minOccurs="0"/>
471
+ </xsd:sequence>
472
+ </xsd:complexType>
473
+ <xsd:complexType name="TagsToEncodeList">
474
+ <xsd:sequence maxOccurs="unbounded">
475
+ <xsd:element name="tag" minOccurs="0"/>
476
+ </xsd:sequence>
477
+ </xsd:complexType>
478
+ <xsd:complexType name="TagRules">
479
+ <xsd:sequence maxOccurs="unbounded">
480
+ <xsd:element name="tag" type="Tag" minOccurs="0"/>
481
+ </xsd:sequence>
482
+ </xsd:complexType>
483
+ <xsd:complexType name="Tag">
484
+ <xsd:sequence maxOccurs="unbounded">
485
+ <xsd:element name="attribute" type="Attribute" minOccurs="0" />
486
+ </xsd:sequence>
487
+ <xsd:attribute name="name" use="required"/>
488
+ <xsd:attribute name="action" use="required">
489
+ <xsd:simpleType>
490
+ <xsd:restriction base="xsd:string">
491
+ <xsd:enumeration value="validate"/>
492
+ <xsd:enumeration value="truncate"/>
493
+ <xsd:enumeration value="remove"/>
494
+ <xsd:enumeration value="filter"/>
495
+ <xsd:enumeration value="encode"/>
496
+ </xsd:restriction>
497
+ </xsd:simpleType>
498
+ </xsd:attribute>
499
+ </xsd:complexType>
500
+ <xsd:complexType name="Attribute">
501
+ <xsd:sequence>
502
+ <xsd:element name="regexp-list" type="RegexpList" minOccurs="0"/>
503
+ <xsd:element name="literal-list" type="LiteralList" minOccurs="0"/>
504
+ </xsd:sequence>
505
+ <xsd:attribute name="name" use="required"/>
506
+ <xsd:attribute name="description"/>
507
+ <xsd:attribute name="onInvalid">
508
+ <xsd:simpleType>
509
+ <xsd:restriction base="xsd:string">
510
+ <xsd:enumeration value="removeTag"/>
511
+ <xsd:enumeration value="filterTag"/>
512
+ <xsd:enumeration value="encodeTag"/>
513
+ <xsd:enumeration value="removeAttribute"/>
514
+ </xsd:restriction>
515
+ </xsd:simpleType>
516
+ </xsd:attribute>
517
+ </xsd:complexType>
518
+ <xsd:complexType name="RegexpList">
519
+ <xsd:sequence maxOccurs="unbounded">
520
+ <xsd:element name="regexp" type="RegExp" minOccurs="0"/>
521
+ </xsd:sequence>
522
+ </xsd:complexType>
523
+ <xsd:complexType name="RegExp">
524
+ <xsd:attribute name="name" type="xsd:string"/>
525
+ <xsd:attribute name="value" type="xsd:string"/>
526
+ </xsd:complexType>
527
+ <xsd:complexType name="LiteralList">
528
+ <xsd:sequence maxOccurs="unbounded">
529
+ <xsd:element name="literal" type="Literal" minOccurs="0"/>
530
+ </xsd:sequence>
531
+ </xsd:complexType>
532
+ <xsd:complexType name="Literal">
533
+ <xsd:attribute name="value" type="xsd:string"/>
534
+ </xsd:complexType>
535
+ <xsd:complexType name="CSSRules">
536
+ <xsd:sequence maxOccurs="unbounded">
537
+ <xsd:element name="property" type="Property" minOccurs="0"/>
538
+ </xsd:sequence>
539
+ </xsd:complexType>
540
+ <xsd:complexType name="Property">
541
+ <xsd:sequence>
542
+ <xsd:element name="category-list" type="CategoryList" minOccurs="0"/>
543
+ <xsd:element name="literal-list" type="LiteralList" minOccurs="0"/>
544
+ <xsd:element name="regexp-list" type="RegexpList" minOccurs="0"/>
545
+ <xsd:element name="shorthand-list" type="ShorthandList" minOccurs="0"/>
546
+ </xsd:sequence>
547
+ <xsd:attribute name="name" type="xsd:string" use="required"/>
548
+ <xsd:attribute name="default" type="xsd:string"/>
549
+ <xsd:attribute name="description" type="xsd:string"/>
550
+ </xsd:complexType>
551
+ <xsd:complexType name="ShorthandList">
552
+ <xsd:sequence maxOccurs="unbounded">
553
+ <xsd:element name="shorthand" type="Shorthand" minOccurs="0"/>
554
+ </xsd:sequence>
555
+ </xsd:complexType>
556
+ <xsd:complexType name="Shorthand">
557
+ <xsd:attribute name="name" type="xsd:string" use="required"/>
558
+ </xsd:complexType>
559
+ <xsd:complexType name="CategoryList">
560
+ <xsd:sequence maxOccurs="unbounded">
561
+ <xsd:element name="category" type="Category" minOccurs="0"/>
562
+ </xsd:sequence>
563
+ </xsd:complexType>
564
+ <xsd:complexType name="Category">
565
+ <xsd:attribute name="value" type="xsd:string" use="required"/>
566
+ </xsd:complexType>
567
+ <xsd:complexType name="Entity">
568
+ <xsd:attribute name="name" type="xsd:string" use="required"/>
569
+ <xsd:attribute name="cdata" type="xsd:string" use="required"/>
570
+ </xsd:complexType>
571
+ <xsd:complexType name="AllowedEmptyTags">
572
+ <xsd:sequence>
573
+ <xsd:element name="literal-list" type="LiteralList" minOccurs="1"/>
574
+ </xsd:sequence>
575
+ </xsd:complexType>
576
+
577
+ </xsd:schema>