antisamy 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/CHANGELOG.rdoc +13 -0
  2. data/LICENSE.txt +20 -20
  3. data/README.rdoc +41 -41
  4. data/lib/antisamy.rb +46 -46
  5. data/lib/antisamy/css/css_filter.rb +187 -187
  6. data/lib/antisamy/css/css_scanner.rb +84 -84
  7. data/lib/antisamy/css/css_validator.rb +128 -128
  8. data/lib/antisamy/csspool/rsac.rb +1 -1
  9. data/lib/antisamy/csspool/rsac/sac.rb +14 -14
  10. data/lib/antisamy/csspool/rsac/sac/conditions.rb +5 -5
  11. data/lib/antisamy/csspool/rsac/sac/conditions/attribute_condition.rb +50 -50
  12. data/lib/antisamy/csspool/rsac/sac/conditions/begin_hyphen_condition.rb +18 -18
  13. data/lib/antisamy/csspool/rsac/sac/conditions/class_condition.rb +18 -18
  14. data/lib/antisamy/csspool/rsac/sac/conditions/combinator_condition.rb +36 -36
  15. data/lib/antisamy/csspool/rsac/sac/conditions/condition.rb +29 -29
  16. data/lib/antisamy/csspool/rsac/sac/conditions/id_condition.rb +23 -23
  17. data/lib/antisamy/csspool/rsac/sac/conditions/one_of_condition.rb +18 -18
  18. data/lib/antisamy/csspool/rsac/sac/conditions/pseudo_class_condition.rb +20 -20
  19. data/lib/antisamy/csspool/rsac/sac/document_handler.rb +66 -66
  20. data/lib/antisamy/csspool/rsac/sac/error_handler.rb +13 -13
  21. data/lib/antisamy/csspool/rsac/sac/generated_parser.rb +1012 -1012
  22. data/lib/antisamy/csspool/rsac/sac/generated_property_parser.rb +9284 -9284
  23. data/lib/antisamy/csspool/rsac/sac/lexeme.rb +27 -27
  24. data/lib/antisamy/csspool/rsac/sac/lexical_unit.rb +201 -201
  25. data/lib/antisamy/csspool/rsac/sac/parse_exception.rb +4 -4
  26. data/lib/antisamy/csspool/rsac/sac/parser.rb +109 -109
  27. data/lib/antisamy/csspool/rsac/sac/property_parser.rb +44 -44
  28. data/lib/antisamy/csspool/rsac/sac/selectors.rb +5 -5
  29. data/lib/antisamy/csspool/rsac/sac/selectors/child_selector.rb +36 -36
  30. data/lib/antisamy/csspool/rsac/sac/selectors/conditional_selector.rb +45 -45
  31. data/lib/antisamy/csspool/rsac/sac/selectors/descendant_selector.rb +36 -36
  32. data/lib/antisamy/csspool/rsac/sac/selectors/element_selector.rb +35 -35
  33. data/lib/antisamy/csspool/rsac/sac/selectors/selector.rb +25 -25
  34. data/lib/antisamy/csspool/rsac/sac/selectors/sibling_selector.rb +35 -35
  35. data/lib/antisamy/csspool/rsac/sac/selectors/simple_selector.rb +21 -21
  36. data/lib/antisamy/csspool/rsac/sac/token.rb +25 -25
  37. data/lib/antisamy/csspool/rsac/sac/tokenizer.rb +185 -185
  38. data/lib/antisamy/csspool/rsac/stylesheet.rb +3 -3
  39. data/lib/antisamy/csspool/rsac/stylesheet/rule.rb +20 -20
  40. data/lib/antisamy/csspool/rsac/stylesheet/stylesheet.rb +76 -76
  41. data/lib/antisamy/html/handler.rb +112 -99
  42. data/lib/antisamy/html/sax_filter.rb +305 -302
  43. data/lib/antisamy/html/scanner.rb +47 -43
  44. data/lib/antisamy/model/attribute.rb +19 -19
  45. data/lib/antisamy/model/css_property.rb +39 -39
  46. data/lib/antisamy/model/tag.rb +31 -31
  47. data/lib/antisamy/policy.rb +577 -545
  48. data/lib/antisamy/scan_results.rb +89 -89
  49. data/spec/antisamy_spec.rb +208 -142
  50. data/spec/spec_helper.rb +12 -12
  51. metadata +79 -81
@@ -1,43 +1,47 @@
1
- module AntiSamy
2
- class Scanner
3
- attr_accessor :policy, :errors, :nofollow, :pae
4
- DEFAULT_ENCODE = "UTF-8"
5
- ALLOW_EMPTY = %w[br hr a img link iframe script object applet frame base param meta input textarea embed basefont col]
6
- # Setup a basic param tag rule
7
- begin
8
- name_attr = Attribute.new("name")
9
- value_attr = Attribute.new("value")
10
- name_attr.expressions << /.*/
11
- value_attr.expressions << /.*/
12
- @@basic_param_tag_rule = Tag.new("param")
13
- @@basic_param_tag_rule << name_attr
14
- @@basic_param_tag_rule << value_attr
15
- @@basic_param_tag_rule.action = Policy::ACTION_VALIDATE
16
- end
17
-
18
- # Create a scanner with a given policy
19
- def initialize(policy)
20
- @policy = policy
21
- @errors = []
22
- end
23
-
24
- # Scan the input using the provided input and output encoding
25
- # will raise an error if nil input or the maximum input size is exceeded
26
- def scan(input, input_encode, output_encoder)
27
- raise ArgumentError if input.nil?
28
- raise ScanError, "Max input Exceeded #{input.size} > #{@policy.max_input}" if input.size > @policy.max_input
29
- # check poilcy stuff
30
- handler = Handler.new(@policy,output_encoder)
31
- scanner = SaxFilter.new(@policy,handler,@@basic_param_tag_rule)
32
- parser = Nokogiri::HTML::SAX::Parser.new(scanner,input_encode)
33
- #parser.parse(input)
34
- parser.parse(input) do |ctx|
35
- ctx.replace_entities = true
36
- end
37
- results = ScanResults.new(Time.now)
38
- results.clean_html = handler.document
39
- results.messages = handler.errors
40
- results
41
- end
42
- end
43
- end
1
+ module AntiSamy
2
+ class Scanner
3
+ attr_accessor :policy, :errors, :nofollow, :pae
4
+ DEFAULT_ENCODE = "UTF-8"
5
+ ALLOW_EMPTY = %w[br hr a img link iframe script object applet frame base param meta input textarea embed basefont col]
6
+ # Setup a basic param tag rule
7
+ begin
8
+ name_attr = Attribute.new("name")
9
+ value_attr = Attribute.new("value")
10
+ name_attr.expressions << /.*/
11
+ value_attr.expressions << /.*/
12
+ @@basic_param_tag_rule = Tag.new("param")
13
+ @@basic_param_tag_rule << name_attr
14
+ @@basic_param_tag_rule << value_attr
15
+ @@basic_param_tag_rule.action = Policy::ACTION_VALIDATE
16
+ end
17
+
18
+ # Create a scanner with a given policy
19
+ def initialize(policy)
20
+ @policy = policy
21
+ @errors = []
22
+ end
23
+
24
+ # Scan the input using the provided input and output encoding
25
+ # will raise an error if nil input or the maximum input size is exceeded
26
+ def scan(input, input_encode, output_encoder)
27
+ raise ArgumentError if input.nil?
28
+ raise ScanError, "Max input Exceeded #{input.size} > #{@policy.max_input}" if input.size > @policy.max_input
29
+ fragment = true
30
+ if input =~ /\<\s?html\s?.*?\>|DOCTYPE/im
31
+ fragment = false
32
+ end
33
+ # check poilcy stuff
34
+ handler = Handler.new(@policy,output_encoder,fragment)
35
+ scanner = SaxFilter.new(@policy,handler,@@basic_param_tag_rule,fragment)
36
+ parser = Nokogiri::HTML::SAX::Parser.new(scanner,input_encode)
37
+ #parser.parse(input)
38
+ parser.parse(input) do |ctx|
39
+ ctx.replace_entities = true
40
+ end
41
+ results = ScanResults.new(Time.now)
42
+ results.clean_html = handler.document
43
+ results.messages = handler.errors
44
+ results
45
+ end
46
+ end
47
+ end
@@ -1,19 +1,19 @@
1
- module AntiSamy
2
- # A model for HTML attributes and the "rules" they must follow (either literals or regular expressions) in
3
- # order to be considered valid. This is a simple container class
4
- class Attribute
5
- attr_accessor :name, :description, :action, :values, :expressions
6
- ACTION_REMOVE_TAG = "removeTag"
7
- ACTION_FILTER_TAG = "filterTag"
8
- ACTION_ENCODE_TAG = "encodeTag"
9
- ACTION_REMOVE_ATTRIB = "removeAttribute"
10
- # Create a new attribute
11
- def initialize(name)
12
- @name = name
13
- @description = nil
14
- @action = nil
15
- @values = []
16
- @expressions = []
17
- end
18
- end
19
- end
1
+ module AntiSamy
2
+ # A model for HTML attributes and the "rules" they must follow (either literals or regular expressions) in
3
+ # order to be considered valid. This is a simple container class
4
+ class Attribute
5
+ attr_accessor :name, :description, :action, :values, :expressions
6
+ ACTION_REMOVE_TAG = "removeTag"
7
+ ACTION_FILTER_TAG = "filterTag"
8
+ ACTION_ENCODE_TAG = "encodeTag"
9
+ ACTION_REMOVE_ATTRIB = "removeAttribute"
10
+ # Create a new attribute
11
+ def initialize(name)
12
+ @name = name
13
+ @description = nil
14
+ @action = nil
15
+ @values = []
16
+ @expressions = []
17
+ end
18
+ end
19
+ end
@@ -1,39 +1,39 @@
1
- module AntiSamy
2
- # A model for CSS properties and the "rules" they must follow (either literals
3
- # or regular expressions) in order to be considered valid.
4
- class CssProperty
5
- attr_accessor :name, :description, :action, :values, :expressions, :refs, :catagories
6
-
7
- # Create a new property
8
- def initialize(name)
9
- @name = name
10
- @description = nil
11
- @values = []
12
- @expressions = []
13
- @refs = []
14
- @categories = []
15
- @action = nil
16
- end
17
-
18
- # Add a literal value to this property
19
- def add_value(value)
20
- @values << value
21
- end
22
-
23
- # Add a regular expression to this property
24
- def add_expression(exp)
25
- @expressions << exp
26
- end
27
-
28
- # Add a shorthand reference to this property
29
- def add_ref(ref)
30
- @refs << ref
31
- end
32
-
33
- # Add a category to this property
34
- def add_category(cat)
35
- @categories << cat
36
- end
37
-
38
- end
39
- end
1
+ module AntiSamy
2
+ # A model for CSS properties and the "rules" they must follow (either literals
3
+ # or regular expressions) in order to be considered valid.
4
+ class CssProperty
5
+ attr_accessor :name, :description, :action, :values, :expressions, :refs, :catagories
6
+
7
+ # Create a new property
8
+ def initialize(name)
9
+ @name = name
10
+ @description = nil
11
+ @values = []
12
+ @expressions = []
13
+ @refs = []
14
+ @categories = []
15
+ @action = nil
16
+ end
17
+
18
+ # Add a literal value to this property
19
+ def add_value(value)
20
+ @values << value
21
+ end
22
+
23
+ # Add a regular expression to this property
24
+ def add_expression(exp)
25
+ @expressions << exp
26
+ end
27
+
28
+ # Add a shorthand reference to this property
29
+ def add_ref(ref)
30
+ @refs << ref
31
+ end
32
+
33
+ # Add a category to this property
34
+ def add_category(cat)
35
+ @categories << cat
36
+ end
37
+
38
+ end
39
+ end
@@ -1,31 +1,31 @@
1
- module AntiSamy
2
- # A model for HTML "tags" and the rules dictating their validation/filtration. Also contains information
3
- # about their allowed attributes.
4
- class Tag
5
- # Name and Action fields. Actions determine what we do when we see this tag
6
- attr_accessor :name, :action
7
-
8
- # Create a new Tag object
9
- def initialize(name)
10
- @name = name
11
- @action = action
12
- @allowed_attributes = {}
13
- end
14
-
15
- # Add an attribute to this property
16
- def <<(attribute)
17
- @allowed_attributes[attribute.name.downcase] = attribute
18
- end
19
-
20
- # fetch the map of attributes
21
- def attributes
22
- @allowed_attributes
23
- end
24
-
25
- # Fetch a property by name form this tag
26
- def attribute(name)
27
- @allowed_attributes[name]
28
- end
29
-
30
- end
31
- end
1
+ module AntiSamy
2
+ # A model for HTML "tags" and the rules dictating their validation/filtration. Also contains information
3
+ # about their allowed attributes.
4
+ class Tag
5
+ # Name and Action fields. Actions determine what we do when we see this tag
6
+ attr_accessor :name, :action
7
+
8
+ # Create a new Tag object
9
+ def initialize(name)
10
+ @name = name
11
+ @action = action
12
+ @allowed_attributes = {}
13
+ end
14
+
15
+ # Add an attribute to this property
16
+ def <<(attribute)
17
+ @allowed_attributes[attribute.name.downcase] = attribute
18
+ end
19
+
20
+ # fetch the map of attributes
21
+ def attributes
22
+ @allowed_attributes
23
+ end
24
+
25
+ # Fetch a property by name form this tag
26
+ def attribute(name)
27
+ @allowed_attributes[name]
28
+ end
29
+
30
+ end
31
+ end
@@ -1,545 +1,577 @@
1
- require 'stringio'
2
-
3
- module AntiSamy
4
-
5
- # Schema validation Error
6
- class SchemaError < StandardError; end
7
- # Policy validation error
8
- class PolicyError < StandardError; end
9
-
10
- # Model for our policy engine.
11
- # the XSD for AntiSammy is stored in this file after the END section
12
- class Policy
13
- attr_accessor :max_input
14
- # We allow these tags to be empty
15
- ALLOWED_EMPTY = ["br", "hr", "a", "img", "link", "iframe", "script", "object", "applet", "frame", "base", "param", "meta", "input", "textarea", "embed", "basefont", "col"]
16
- # *Actions*
17
- ACTION_FILTER = "filter"
18
- ACTION_TRUNCATE = "truncate"
19
- ACTION_VALIDATE = "validate"
20
- ACTION_REMOVE = "remove"
21
- ACTION_ENCODE = "encode"
22
- # Anything regular express
23
- ANYTHING_REGEX = /.*/
24
- # AntiSammy XSD constants
25
- DEFAULT_ONINVALID = "removeAttribute"
26
- # Directive Name Constants
27
- OMIT_XML_DECL = "omitXmlDeclaration"
28
- OMIT_DOC_TYPE = "omitDoctypeDeclaration"
29
- MAX_INPUT = "maxInputSize"
30
- USE_XHTML = "userXHTML"
31
- FORMAT_OUTPUT = "formatOutput"
32
- # will we allow embedded style sheets
33
- EMBED_STYLESHEETS = "embedStyleSheets"
34
- # Connection timeout in miliseconds
35
- CONN_TIMEOUT = "conenctionTimeout"
36
- ANCHROS_NOFOLLOW = "nofollowAnchors"
37
- VALIDATE_P_AS_E = "validateParamAsEmbed"
38
- PRESERVE_SPACE = "preserveSpace"
39
- PRESERVE_COMMENTS = "preserveComments"
40
- ON_UNKNOWN_TAG = "onUnknownTag"
41
- MAX_SHEETS = "maxStyleSheetImports"
42
-
43
- # Class method to fetch the schema
44
- def self.schema
45
- data = StringIO.new
46
- File.open(__FILE__) do |f|
47
- begin
48
- line = f.gets
49
- end until line.match(/^__END__$/)
50
- while line = f.gets
51
- data << line
52
- end
53
- end
54
- data.rewind
55
- data.read
56
- end
57
-
58
- # Create a policy object.
59
- # You can pass in either:
60
- # * File path
61
- # * IO object
62
- # * String containing the policy XML
63
- # All policies will be validated against the builtin schema file and will raise
64
- # an Error if the policy doesnt conform to the schema
65
- def initialize(string_or_io)
66
- schema = Nokogiri::XML.Schema(Policy.schema)
67
- if string_or_io.respond_to?(:read)
68
- uri = string_or_io.read
69
- else
70
- if File.exists?(string_or_io)
71
- uri = IO.read(string_or_io)
72
- else
73
- uri = string_or_io
74
- end
75
- end
76
- doc = Nokogiri::XML.parse(uri)
77
- # We now have the Poolicy XML data lets parse it
78
- errors = schema.validate(doc)
79
- raise SchemaError, errors.join(",") if errors.size > 0
80
- @common_regex = {}
81
- @common_attrib = {}
82
- @tag_rules = {}
83
- @css_rules = {}
84
- @directives = Hash.new(false)
85
- @global_attrib = {}
86
- @encode_tags = []
87
- parse(doc)
88
- end
89
-
90
- # Get a particular directive
91
- def directive(name)
92
- @directives[name]
93
- end
94
-
95
- # Set a directive for the policy
96
- def []=(name,value)
97
- @directives[name] = value
98
- end
99
-
100
- # Get a global attribute
101
- def global(name)
102
- @global_attrib[name.downcase]
103
- end
104
-
105
- # Is the tag in the encode list
106
- def encode?(tag)
107
- @encode_tags.include?(tag)
108
- end
109
-
110
- # Return the tag rules
111
- def tags
112
- @tag_rules
113
- end
114
-
115
- # get a specific tag
116
- def tag(name)
117
- @tag_rules[name.downcase]
118
- end
119
-
120
- # return the css rules
121
- def properties
122
- @css_rules
123
- end
124
-
125
- # get a specific css rule
126
- def property(prop)
127
- @css_rules[prop.downcase]
128
- end
129
-
130
- # Get the list of attributes
131
- def attributes
132
- @common_attrib
133
- end
134
-
135
- # Get a specific attribute
136
- def attribute(name)
137
- @common_attrib[name.downcase]
138
- end
139
-
140
- # Get the list of expressions
141
- def expressions
142
- @common_regex
143
- end
144
-
145
- # Get a specific expression
146
- def expression(name)
147
- @common_regex[name]
148
- end
149
-
150
- private
151
- def make_re(p,context) #:nodoc:
152
- output = StringIO.open('','w')
153
- $stderr = output
154
- begin
155
- r = /#{p}/
156
- warning = output.string
157
- raise PolicyError, "context=#{context}, error=#{$1}, re=#{p}",caller(2) if warning =~ /warning: (.*)$/
158
- return r
159
- rescue RegexpError => e
160
- raise PolicyError, "context=#{context}, error=#{e.message} re=#{p}", caller(2)
161
- ensure
162
- $stderr = STDERR
163
- end
164
- end
165
-
166
- # Parse the Policy file
167
- def parse(node) # :nodoc:
168
- if node.children.nil? or node.children.last.nil?
169
- return
170
- end
171
- node.children.last.children.each do |section|
172
- if section.name.eql?("directives")
173
- process_directves(section)
174
- elsif section.name.eql?("common-regexps")
175
- process_common_regexps(section)
176
- elsif section.name.eql?("common-attributes")
177
- process_common_attributes(section)
178
- elsif section.name.eql?("global-tag-attributes")
179
- process_global_attributes(section)
180
- elsif section.name.eql?("tags-to-encode")
181
- process_tag_to_encode(section)
182
- elsif section.name.eql?("tag-rules")
183
- process_tag_rules(section)
184
- elsif section.name.eql?("css-rules")
185
- process_css_rules(section)
186
- end
187
- end
188
- end
189
-
190
- # process the directives section
191
- def process_directves(section) # :nodoc:
192
- # skip if we had no section
193
- return if section.element_children.nil?
194
- # process the rules
195
- section.element_children.each do |dir|
196
- name = dir["name"]
197
- value = dir["value"]
198
- if name.eql?("maxInputSize")
199
- @max_input = value.to_i
200
- else
201
- if name.eql?("connectionTimeout") or name.eql?("maxStyleSheetImports")
202
- value = value.to_i
203
- elsif value =~ /true/i
204
- value = true
205
- else
206
- value = false
207
- end
208
- @directives[name] = value
209
- end
210
- end
211
- end
212
-
213
- # process the <common-regexp> section
214
- def process_common_regexps(section) # :nodoc:
215
- # skip if we had no section
216
- return if section.element_children.nil?
217
- section.element_children.each do |re|
218
- @common_regex[re["name"]] = make_re(re["value"],"common-regex(#{re['name']})")
219
- end
220
- end
221
-
222
- # Helper method to process a literal and regex section
223
- def process_attr_lists(att,node,exception) # :nodoc:
224
- node.element_children.each do |el|
225
- if el.name.eql?("regexp-list")
226
- if el.element_children
227
- el.element_children.each do |re|
228
- v = re["value"]
229
- n = re["name"]
230
- if n and !n.empty?
231
- if @common_regex[n].nil?
232
- raise PolicyError, "regex #{n} in #{exception} but wasnt found in <common-regex>"
233
- else
234
- att.expressions << expression(n)
235
- end
236
- else
237
- att.expressions << make_re(v,exception)
238
- end
239
- end
240
- end
241
- elsif el.name.eql?("literal-list")
242
- if el.element_children
243
- el.element_children.each do |re|
244
- v = re["value"]
245
- if v and !v.empty?
246
- att.values << v
247
- else
248
- if re.child and re.child.text?
249
- att.values << re.child.content
250
- end
251
- end
252
- end
253
- end
254
- end
255
- end
256
- end
257
-
258
- # Process the <common-attributes> section
259
- def process_common_attributes(section) # :nodoc:
260
- # skip if we had no section
261
- return if section.element_children.nil?
262
- section.element_children.each do |val|
263
- invalid = val["onInvalid"]
264
- name = val["name"]
265
- desc = val["description"]
266
- att = Attribute.new(name)
267
- att.description = desc
268
- att.action = (invalid.nil? or invalid.empty?) ? DEFAULT_ONINVALID : invalid
269
- return if val.element_children.nil?
270
- process_attr_lists(att,val,"common-attribute(#{name})")
271
- @common_attrib[name.downcase] = att
272
- end
273
- end
274
-
275
- # Process the <global-attributes> section
276
- def process_global_attributes(section) # :nodoc:
277
- # skip if we had no section
278
- return if section.element_children.nil?
279
- section.element_children.each do |ga|
280
- name = ga["name"]
281
- att = @common_attrib[name]
282
- raise PolicyError, "global attribute #{name} was not defined in <common-attributes>" if att.nil?
283
- @global_attrib[name.downcase] = att
284
- end
285
- end
286
-
287
- # process the <tag-to-encode> section
288
- def process_tag_to_encode(section) # :nodoc:
289
- # skip if we had no section
290
- return if section.element_children.nil?
291
- section.element_children.each do |tag|
292
- if tag.child and tag.child.text?
293
- @encode_tags << tag.child.content.downcase
294
- end
295
- end
296
- end
297
-
298
- # Process the <tag-ruls> section
299
- def process_tag_rules(section) # :nodoc:
300
- return if section.element_children.nil?
301
- section.element_children.each do |tx|
302
- name = tx["name"]
303
- action = tx["action"]
304
- t = Tag.new(name)
305
- t.action = action
306
- # Add attributes
307
- if tx.element_children
308
- tx.element_children.each do |tc|
309
- catt = @common_attrib[tc["name"]]
310
- if catt # common attrib with value override
311
- act = tc["onInvalid"]
312
- dec = tc["description"]
313
- ncatt = catt.dup
314
- ncatt.action = act unless act.nil? or act.empty?
315
- ncatt.description = dec unless dec.nil? or dec.empty?
316
- t<< ncatt
317
- else
318
- att = Attribute.new(tc["name"])
319
- att.action = tc["onInvalid"]
320
- att.description = tc["description"]
321
- process_attr_lists(att,tc," tag-rules(#{name})")
322
- t<< att
323
- end
324
- end
325
- end
326
- # End add attributes
327
- @tag_rules[name.downcase] = t
328
- end
329
- end
330
-
331
- # Process the <css-rules> section
332
- def process_css_rules(section) # :nodoc:
333
- return if section.element_children.nil?
334
- section.element_children.each do |css|
335
- name = css["name"]
336
- desc = css["description"]
337
- action = css["onInvalid"]
338
- if action.nil? or action.empty?
339
- action = DEFAULT_ONINVALID
340
- end
341
- prop = CssProperty.new(name)
342
- prop.action = action
343
- prop.description = desc
344
- # Process regex, listerals and shorthands
345
- if css.element_children
346
- css.element_children.each do |child|
347
- empty = child.element_children.nil?
348
- # Regex
349
- if child.name.eql?("regexp-list")
350
- unless empty
351
- child.element_children.each do |re|
352
- re_name = re["name"]
353
- re_value = re["value"]
354
- gre = expression(re_name)
355
- if gre
356
- prop.add_expression(gre)
357
- elsif re_value and !re_value.empty?
358
- prop.add_expression(make_re(re_value,"css-rule(#{name})"))
359
- else
360
- raise PolicyError, "#{re_name} was referenced in CSS rule #{name} but wasnt found in <common-regexp>"
361
- end
362
- end
363
- end
364
- elsif child.name.eql?("literal-list") # literals
365
- unless empty
366
- child.element_children.each do |li|
367
- prop.add_value(li["value"]) if li["value"]
368
- end
369
- end
370
- elsif child.name.eql?("category-list") # literals
371
- unless empty
372
- child.element_children.each do |li|
373
- prop.add_category(li["value"]) if li["value"]
374
- end
375
- end
376
-
377
- elsif child.name.eql?("shorthand-list") # refs
378
- unless empty
379
- child.element_children.each do |sl|
380
- prop.add_ref(sl["name"]) if sl["name"]
381
- end
382
- end
383
- end
384
- end
385
- end
386
- @css_rules[name.downcase] = prop
387
- end
388
- end
389
- end
390
- end
391
-
392
-
393
- __END__
394
- <?xml version="1.0" encoding="UTF-8"?>
395
- <xsd:schema
396
- xmlns:xsd="http://www.w3.org/2001/XMLSchema">
397
- <xsd:element name="anti-samy-rules">
398
- <xsd:complexType>
399
- <xsd:sequence>
400
- <xsd:element name="directives" type="Directives" maxOccurs="1" minOccurs="1"/>
401
- <xsd:element name="common-regexps" type="CommonRegexps" maxOccurs="1" minOccurs="1"/>
402
- <xsd:element name="common-attributes" type="AttributeList" maxOccurs="1" minOccurs="1"/>
403
- <xsd:element name="global-tag-attributes" type="AttributeList" maxOccurs="1" minOccurs="1"/>
404
- <xsd:element name="tags-to-encode" type="TagsToEncodeList" minOccurs="0" maxOccurs="1"/>
405
- <xsd:element name="tag-rules" type="TagRules" minOccurs="1" maxOccurs="1"/>
406
- <xsd:element name="css-rules" type="CSSRules" minOccurs="1" maxOccurs="1"/>
407
- </xsd:sequence>
408
- </xsd:complexType>
409
- </xsd:element>
410
- <xsd:complexType name="Directives">
411
- <xsd:sequence maxOccurs="unbounded">
412
- <xsd:element name="directive" type="Directive" minOccurs="0"/>
413
- </xsd:sequence>
414
- </xsd:complexType>
415
- <xsd:complexType name="Directive">
416
- <xsd:attribute name="name" use="required">
417
- <xsd:simpleType>
418
- <xsd:restriction base="xsd:string">
419
- <xsd:enumeration value="omitXmlDeclaration"/>
420
- <xsd:enumeration value="omitDoctypeDeclaration"/>
421
- <xsd:enumeration value="maxInputSize"/>
422
- <xsd:enumeration value="useXHTML"/>
423
- <xsd:enumeration value="embedStyleSheets"/>
424
- <xsd:enumeration value="maxStyleSheetImports"/>
425
- <xsd:enumeration value="connectionTimeout"/>
426
- <xsd:enumeration value="nofollowAnchors"/>
427
- <xsd:enumeration value="validateParamAsEmbed"/>
428
- <xsd:enumeration value="preserveComments"/>
429
- <xsd:enumeration value="preserveSpace"/>
430
- <xsd:enumeration value="onUnknownTag"/>
431
- <xsd:enumeration value="formatOutput"/>
432
- </xsd:restriction>
433
- </xsd:simpleType>
434
- </xsd:attribute>
435
- <xsd:attribute name="value" use="required"/>
436
- </xsd:complexType>
437
- <xsd:complexType name="CommonRegexps">
438
- <xsd:sequence maxOccurs="unbounded">
439
- <xsd:element name="regexp" type="RegExp" minOccurs="0"/>
440
- </xsd:sequence>
441
- </xsd:complexType>
442
- <xsd:complexType name="AttributeList">
443
- <xsd:sequence maxOccurs="unbounded">
444
- <xsd:element name="attribute" type="Attribute" minOccurs="0"/>
445
- </xsd:sequence>
446
- </xsd:complexType>
447
- <xsd:complexType name="TagsToEncodeList">
448
- <xsd:sequence maxOccurs="unbounded">
449
- <xsd:element name="tag" minOccurs="0"/>
450
- </xsd:sequence>
451
- </xsd:complexType>
452
- <xsd:complexType name="TagRules">
453
- <xsd:sequence maxOccurs="unbounded">
454
- <xsd:element name="tag" type="Tag" minOccurs="0"/>
455
- </xsd:sequence>
456
- </xsd:complexType>
457
- <xsd:complexType name="Tag">
458
- <xsd:sequence maxOccurs="unbounded">
459
- <xsd:element name="attribute" type="Attribute" minOccurs="0" />
460
- </xsd:sequence>
461
- <xsd:attribute name="name" use="required"/>
462
- <xsd:attribute name="action" use="required">
463
- <xsd:simpleType>
464
- <xsd:restriction base="xsd:string">
465
- <xsd:enumeration value="validate"/>
466
- <xsd:enumeration value="truncate"/>
467
- <xsd:enumeration value="remove"/>
468
- <xsd:enumeration value="filter"/>
469
- <xsd:enumeration value="encode"/>
470
- </xsd:restriction>
471
- </xsd:simpleType>
472
- </xsd:attribute>
473
- </xsd:complexType>
474
- <xsd:complexType name="Attribute">
475
- <xsd:sequence>
476
- <xsd:element name="regexp-list" type="RegexpList" minOccurs="0"/>
477
- <xsd:element name="literal-list" type="LiteralList" minOccurs="0"/>
478
- </xsd:sequence>
479
- <xsd:attribute name="name" use="required"/>
480
- <xsd:attribute name="description"/>
481
- <xsd:attribute name="onInvalid">
482
- <xsd:simpleType>
483
- <xsd:restriction base="xsd:string">
484
- <xsd:enumeration value="removeTag"/>
485
- <xsd:enumeration value="filterTag"/>
486
- <xsd:enumeration value="encodeTag"/>
487
- <xsd:enumeration value="removeAttribute"/>
488
- </xsd:restriction>
489
- </xsd:simpleType>
490
- </xsd:attribute>
491
- </xsd:complexType>
492
- <xsd:complexType name="RegexpList">
493
- <xsd:sequence maxOccurs="unbounded">
494
- <xsd:element name="regexp" type="RegExp" minOccurs="0"/>
495
- </xsd:sequence>
496
- </xsd:complexType>
497
- <xsd:complexType name="RegExp">
498
- <xsd:attribute name="name" type="xsd:string"/>
499
- <xsd:attribute name="value" type="xsd:string"/>
500
- </xsd:complexType>
501
- <xsd:complexType name="LiteralList">
502
- <xsd:sequence maxOccurs="unbounded">
503
- <xsd:element name="literal" type="Literal" minOccurs="0"/>
504
- </xsd:sequence>
505
- </xsd:complexType>
506
- <xsd:complexType name="Literal">
507
- <xsd:attribute name="value" type="xsd:string"/>
508
- </xsd:complexType>
509
- <xsd:complexType name="CSSRules">
510
- <xsd:sequence maxOccurs="unbounded">
511
- <xsd:element name="property" type="Property" minOccurs="0"/>
512
- </xsd:sequence>
513
- </xsd:complexType>
514
- <xsd:complexType name="Property">
515
- <xsd:sequence>
516
- <xsd:element name="category-list" type="CategoryList" minOccurs="0"/>
517
- <xsd:element name="literal-list" type="LiteralList" minOccurs="0"/>
518
- <xsd:element name="regexp-list" type="RegexpList" minOccurs="0"/>
519
- <xsd:element name="shorthand-list" type="ShorthandList" minOccurs="0"/>
520
- </xsd:sequence>
521
- <xsd:attribute name="name" type="xsd:string" use="required"/>
522
- <xsd:attribute name="default" type="xsd:string"/>
523
- <xsd:attribute name="description" type="xsd:string"/>
524
- </xsd:complexType>
525
- <xsd:complexType name="ShorthandList">
526
- <xsd:sequence maxOccurs="unbounded">
527
- <xsd:element name="shorthand" type="Shorthand" minOccurs="0"/>
528
- </xsd:sequence>
529
- </xsd:complexType>
530
- <xsd:complexType name="Shorthand">
531
- <xsd:attribute name="name" type="xsd:string" use="required"/>
532
- </xsd:complexType>
533
- <xsd:complexType name="CategoryList">
534
- <xsd:sequence maxOccurs="unbounded">
535
- <xsd:element name="category" type="Category" minOccurs="0"/>
536
- </xsd:sequence>
537
- </xsd:complexType>
538
- <xsd:complexType name="Category">
539
- <xsd:attribute name="value" type="xsd:string" use="required"/>
540
- </xsd:complexType>
541
- <xsd:complexType name="Entity">
542
- <xsd:attribute name="name" type="xsd:string" use="required"/>
543
- <xsd:attribute name="cdata" type="xsd:string" use="required"/>
544
- </xsd:complexType>
545
- </xsd:schema>
1
+ require 'stringio'
2
+
3
+ module AntiSamy
4
+
5
+ # Schema validation Error
6
+ class SchemaError < StandardError; end
7
+ # Policy validation error
8
+ class PolicyError < StandardError; end
9
+
10
+ # Model for our policy engine.
11
+ # the XSD for AntiSammy is stored in this file after the END section
12
+ class Policy
13
+ attr_accessor :max_input
14
+ # We allow these tags to be empty
15
+ ALLOWED_EMPTY = ["br", "hr", "a", "img", "link", "iframe", "script", "object", "applet", "frame", "base", "param", "meta", "input", "textarea", "embed", "basefont", "col"]
16
+ # *Actions*
17
+ ACTION_FILTER = "filter"
18
+ ACTION_TRUNCATE = "truncate"
19
+ ACTION_VALIDATE = "validate"
20
+ ACTION_REMOVE = "remove"
21
+ ACTION_ENCODE = "encode"
22
+ # Anything regular express
23
+ ANYTHING_REGEX = /.*/
24
+ # AntiSammy XSD constants
25
+ DEFAULT_ONINVALID = "removeAttribute"
26
+ # Directive Name Constants
27
+ OMIT_XML_DECL = "omitXmlDeclaration"
28
+ OMIT_DOC_TYPE = "omitDoctypeDeclaration"
29
+ MAX_INPUT = "maxInputSize"
30
+ USE_XHTML = "userXHTML"
31
+ FORMAT_OUTPUT = "formatOutput"
32
+ # will we allow embedded style sheets
33
+ EMBED_STYLESHEETS = "embedStyleSheets"
34
+ # Connection timeout in miliseconds
35
+ CONN_TIMEOUT = "conenctionTimeout"
36
+ ANCHORS_NOFOLLOW = "nofollowAnchors"
37
+ VALIDATE_P_AS_E = "validateParamAsEmbed"
38
+ PRESERVE_SPACE = "preserveSpace"
39
+ PRESERVE_COMMENTS = "preserveComments"
40
+ ON_UNKNOWN_TAG = "onUnknownTag"
41
+ MAX_SHEETS = "maxStyleSheetImports"
42
+
43
+ # Class method to fetch the schema
44
+ def self.schema
45
+ data = StringIO.new
46
+ File.open(__FILE__) do |f|
47
+ begin
48
+ line = f.gets
49
+ end until line.match(/^__END__$/)
50
+ while line = f.gets
51
+ data << line
52
+ end
53
+ end
54
+ data.rewind
55
+ data.read
56
+ end
57
+
58
+ # Create a policy object.
59
+ # You can pass in either:
60
+ # * File path
61
+ # * IO object
62
+ # * String containing the policy XML
63
+ # All policies will be validated against the builtin schema file and will raise
64
+ # an Error if the policy doesnt conform to the schema
65
+ def initialize(string_or_io)
66
+ schema = Nokogiri::XML.Schema(Policy.schema)
67
+ if string_or_io.respond_to?(:read)
68
+ uri = string_or_io.read
69
+ else
70
+ if File.exists?(string_or_io)
71
+ uri = IO.read(string_or_io)
72
+ else
73
+ uri = string_or_io
74
+ end
75
+ end
76
+ doc = Nokogiri::XML.parse(uri)
77
+ # We now have the Poolicy XML data lets parse it
78
+ errors = schema.validate(doc)
79
+ raise SchemaError, errors.join(",") if errors.size > 0
80
+ @common_regex = {}
81
+ @common_attrib = {}
82
+ @tag_rules = {}
83
+ @css_rules = {}
84
+ @directives = Hash.new(false)
85
+ @global_attrib = {}
86
+ @encode_tags = []
87
+ @allowed_empty = []
88
+ @allowed_empty << ALLOWED_EMPTY
89
+ @allowed_empty.flatten!
90
+ parse(doc)
91
+ end
92
+
93
+ # Get a particular directive
94
+ def directive(name)
95
+ @directives[name]
96
+ end
97
+
98
+ # Set a directive for the policy
99
+ def []=(name,value)
100
+ @directives[name] = value
101
+ end
102
+
103
+ # Get a global attribute
104
+ def global(name)
105
+ @global_attrib[name.downcase]
106
+ end
107
+
108
+ # Is the tag in the encode list
109
+ def encode?(tag)
110
+ @encode_tags.include?(tag)
111
+ end
112
+
113
+ # Return the tag rules
114
+ def tags
115
+ @tag_rules
116
+ end
117
+
118
+ # get a specific tag
119
+ def tag(name)
120
+ @tag_rules[name.downcase]
121
+ end
122
+
123
+ # return the css rules
124
+ def properties
125
+ @css_rules
126
+ end
127
+
128
+ # get a specific css rule
129
+ def property(prop)
130
+ @css_rules[prop.downcase]
131
+ end
132
+
133
+ # Get the list of attributes
134
+ def attributes
135
+ @common_attrib
136
+ end
137
+
138
+ # Get a specific attribute
139
+ def attribute(name)
140
+ @common_attrib[name.downcase]
141
+ end
142
+
143
+ # Get the list of expressions
144
+ def expressions
145
+ @common_regex
146
+ end
147
+
148
+ # Get a specific expression
149
+ def expression(name)
150
+ @common_regex[name]
151
+ end
152
+
153
+ def allow_empty?(name)
154
+ @allowed_empty.include?(name.downcase)
155
+ end
156
+
157
+ private
158
+ def make_re(p,context) #:nodoc:
159
+ output = StringIO.open('','w')
160
+ $stderr = output
161
+ begin
162
+ r = /#{p}/
163
+ warning = output.string
164
+ raise PolicyError, "context=#{context}, error=#{$1}, re=#{p}",caller(2) if warning =~ /warning: (.*)$/
165
+ return r
166
+ rescue RegexpError => e
167
+ raise PolicyError, "context=#{context}, error=#{e.message} re=#{p}", caller(2)
168
+ ensure
169
+ $stderr = STDERR
170
+ end
171
+ end
172
+
173
+ # Parse the Policy file
174
+ def parse(node) # :nodoc:
175
+ if node.children.nil? or node.children.last.nil?
176
+ return
177
+ end
178
+ node.children.last.children.each do |section|
179
+ if section.name.eql?("directives")
180
+ process_directves(section)
181
+ elsif section.name.eql?("common-regexps")
182
+ process_common_regexps(section)
183
+ elsif section.name.eql?("common-attributes")
184
+ process_common_attributes(section)
185
+ elsif section.name.eql?("global-tag-attributes")
186
+ process_global_attributes(section)
187
+ elsif section.name.eql?("tags-to-encode")
188
+ process_tag_to_encode(section)
189
+ elsif section.name.eql?("tag-rules")
190
+ process_tag_rules(section)
191
+ elsif section.name.eql?("css-rules")
192
+ process_css_rules(section)
193
+ elsif section.name.eql?("allowed-empty-tags")
194
+ process_empty_tags(section)
195
+ end
196
+ end
197
+ end
198
+
199
+ def process_empty_tags(section)# :nodoc:
200
+ # skip if we had no section
201
+ return if section.element_children.nil?
202
+ section.element_children.each do |dir|
203
+ if dir.name.eql?("literal-list")
204
+ if dir.element_children
205
+ dir.element_children.each do |child|
206
+ tag = child["value"]
207
+ if tag and !tag.empty?
208
+ @allowed_empty << tag.downcase
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end
214
+ end
215
+ # process the directives section
216
+ def process_directves(section) # :nodoc:
217
+ # skip if we had no section
218
+ return if section.element_children.nil?
219
+ # process the rules
220
+ section.element_children.each do |dir|
221
+ name = dir["name"]
222
+ value = dir["value"]
223
+ if name.eql?("maxInputSize")
224
+ @max_input = value.to_i
225
+ else
226
+ if name.eql?("connectionTimeout") or name.eql?("maxStyleSheetImports")
227
+ value = value.to_i
228
+ elsif value =~ /true/i
229
+ value = true
230
+ else
231
+ value = false
232
+ end
233
+ @directives[name] = value
234
+ end
235
+ end
236
+ end
237
+
238
+ # process the <common-regexp> section
239
+ def process_common_regexps(section) # :nodoc:
240
+ # skip if we had no section
241
+ return if section.element_children.nil?
242
+ section.element_children.each do |re|
243
+ @common_regex[re["name"]] = make_re(re["value"],"common-regex(#{re['name']})")
244
+ end
245
+ end
246
+
247
+ # Helper method to process a literal and regex section
248
+ def process_attr_lists(att,node,exception) # :nodoc:
249
+ node.element_children.each do |el|
250
+ if el.name.eql?("regexp-list")
251
+ if el.element_children
252
+ el.element_children.each do |re|
253
+ v = re["value"]
254
+ n = re["name"]
255
+ if n and !n.empty?
256
+ if @common_regex[n].nil?
257
+ raise PolicyError, "regex #{n} in #{exception} but wasnt found in <common-regex>"
258
+ else
259
+ att.expressions << expression(n)
260
+ end
261
+ else
262
+ att.expressions << make_re(v,exception)
263
+ end
264
+ end
265
+ end
266
+ elsif el.name.eql?("literal-list")
267
+ if el.element_children
268
+ el.element_children.each do |re|
269
+ v = re["value"]
270
+ if v and !v.empty?
271
+ att.values << v
272
+ else
273
+ if re.child and re.child.text?
274
+ att.values << re.child.content
275
+ end
276
+ end
277
+ end
278
+ end
279
+ end
280
+ end
281
+ end
282
+
283
+ # Process the <common-attributes> section
284
+ def process_common_attributes(section) # :nodoc:
285
+ # skip if we had no section
286
+ return if section.element_children.nil?
287
+ section.element_children.each do |val|
288
+ invalid = val["onInvalid"]
289
+ name = val["name"]
290
+ desc = val["description"]
291
+ att = Attribute.new(name)
292
+ att.description = desc
293
+ att.action = (invalid.nil? or invalid.empty?) ? DEFAULT_ONINVALID : invalid
294
+ return if val.element_children.nil?
295
+ process_attr_lists(att,val,"common-attribute(#{name})")
296
+ @common_attrib[name.downcase] = att
297
+ end
298
+ end
299
+
300
+ # Process the <global-attributes> section
301
+ def process_global_attributes(section) # :nodoc:
302
+ # skip if we had no section
303
+ return if section.element_children.nil?
304
+ section.element_children.each do |ga|
305
+ name = ga["name"]
306
+ att = @common_attrib[name]
307
+ raise PolicyError, "global attribute #{name} was not defined in <common-attributes>" if att.nil?
308
+ @global_attrib[name.downcase] = att
309
+ end
310
+ end
311
+
312
+ # process the <tag-to-encode> section
313
+ def process_tag_to_encode(section) # :nodoc:
314
+ # skip if we had no section
315
+ return if section.element_children.nil?
316
+ section.element_children.each do |tag|
317
+ if tag.child and tag.child.text?
318
+ @encode_tags << tag.child.content.downcase
319
+ end
320
+ end
321
+ end
322
+
323
+ # Process the <tag-ruls> section
324
+ def process_tag_rules(section) # :nodoc:
325
+ return if section.element_children.nil?
326
+ section.element_children.each do |tx|
327
+ name = tx["name"]
328
+ action = tx["action"]
329
+ t = Tag.new(name)
330
+ t.action = action
331
+ # Add attributes
332
+ if tx.element_children
333
+ tx.element_children.each do |tc|
334
+ catt = @common_attrib[tc["name"]]
335
+ if catt # common attrib with value override
336
+ act = tc["onInvalid"]
337
+ dec = tc["description"]
338
+ ncatt = catt.dup
339
+ ncatt.action = act unless act.nil? or act.empty?
340
+ ncatt.description = dec unless dec.nil? or dec.empty?
341
+ t<< ncatt
342
+ else
343
+ att = Attribute.new(tc["name"])
344
+ att.action = tc["onInvalid"]
345
+ att.description = tc["description"]
346
+ process_attr_lists(att,tc," tag-rules(#{name})")
347
+ t<< att
348
+ end
349
+ end
350
+ end
351
+ # End add attributes
352
+ @tag_rules[name.downcase] = t
353
+ end
354
+ end
355
+
356
+ # Process the <css-rules> section
357
+ def process_css_rules(section) # :nodoc:
358
+ return if section.element_children.nil?
359
+ section.element_children.each do |css|
360
+ name = css["name"]
361
+ desc = css["description"]
362
+ action = css["onInvalid"]
363
+ if action.nil? or action.empty?
364
+ action = DEFAULT_ONINVALID
365
+ end
366
+ prop = CssProperty.new(name)
367
+ prop.action = action
368
+ prop.description = desc
369
+ # Process regex, listerals and shorthands
370
+ if css.element_children
371
+ css.element_children.each do |child|
372
+ empty = child.element_children.nil?
373
+ # Regex
374
+ if child.name.eql?("regexp-list")
375
+ unless empty
376
+ child.element_children.each do |re|
377
+ re_name = re["name"]
378
+ re_value = re["value"]
379
+ gre = expression(re_name)
380
+ if gre
381
+ prop.add_expression(gre)
382
+ elsif re_value and !re_value.empty?
383
+ prop.add_expression(make_re(re_value,"css-rule(#{name})"))
384
+ else
385
+ raise PolicyError, "#{re_name} was referenced in CSS rule #{name} but wasnt found in <common-regexp>"
386
+ end
387
+ end
388
+ end
389
+ elsif child.name.eql?("literal-list") # literals
390
+ unless empty
391
+ child.element_children.each do |li|
392
+ prop.add_value(li["value"]) if li["value"]
393
+ end
394
+ end
395
+ elsif child.name.eql?("category-list") # literals
396
+ unless empty
397
+ child.element_children.each do |li|
398
+ prop.add_category(li["value"]) if li["value"]
399
+ end
400
+ end
401
+
402
+ elsif child.name.eql?("shorthand-list") # refs
403
+ unless empty
404
+ child.element_children.each do |sl|
405
+ prop.add_ref(sl["name"]) if sl["name"]
406
+ end
407
+ end
408
+ end
409
+ end
410
+ end
411
+ @css_rules[name.downcase] = prop
412
+ end
413
+ end
414
+ end
415
+ end
416
+
417
+
418
+ __END__
419
+ <?xml version="1.0" encoding="UTF-8"?>
420
+ <xsd:schema
421
+ xmlns:xsd="http://www.w3.org/2001/XMLSchema">
422
+ <xsd:element name="anti-samy-rules">
423
+ <xsd:complexType>
424
+ <xsd:sequence>
425
+ <xsd:element name="directives" type="Directives" maxOccurs="1" minOccurs="1"/>
426
+ <xsd:element name="common-regexps" type="CommonRegexps" maxOccurs="1" minOccurs="1"/>
427
+ <xsd:element name="common-attributes" type="AttributeList" maxOccurs="1" minOccurs="1"/>
428
+ <xsd:element name="global-tag-attributes" type="AttributeList" maxOccurs="1" minOccurs="1"/>
429
+ <xsd:element name="tags-to-encode" type="TagsToEncodeList" minOccurs="0" maxOccurs="1"/>
430
+ <xsd:element name="tag-rules" type="TagRules" minOccurs="1" maxOccurs="1"/>
431
+ <xsd:element name="css-rules" type="CSSRules" minOccurs="1" maxOccurs="1"/>
432
+ <xsd:element name="allowed-empty-tags" type="AllowedEmptyTags" minOccurs="0" maxOccurs="1"/>
433
+ </xsd:sequence>
434
+ </xsd:complexType>
435
+ </xsd:element>
436
+ <xsd:complexType name="Directives">
437
+ <xsd:sequence maxOccurs="unbounded">
438
+ <xsd:element name="directive" type="Directive" minOccurs="0"/>
439
+ </xsd:sequence>
440
+ </xsd:complexType>
441
+ <xsd:complexType name="Directive">
442
+ <xsd:attribute name="name" use="required">
443
+ <xsd:simpleType>
444
+ <xsd:restriction base="xsd:string">
445
+ <xsd:enumeration value="omitXmlDeclaration"/>
446
+ <xsd:enumeration value="omitDoctypeDeclaration"/>
447
+ <xsd:enumeration value="maxInputSize"/>
448
+ <xsd:enumeration value="useXHTML"/>
449
+ <xsd:enumeration value="embedStyleSheets"/>
450
+ <xsd:enumeration value="maxStyleSheetImports"/>
451
+ <xsd:enumeration value="connectionTimeout"/>
452
+ <xsd:enumeration value="nofollowAnchors"/>
453
+ <xsd:enumeration value="validateParamAsEmbed"/>
454
+ <xsd:enumeration value="preserveComments"/>
455
+ <xsd:enumeration value="preserveSpace"/>
456
+ <xsd:enumeration value="onUnknownTag"/>
457
+ <xsd:enumeration value="formatOutput"/>
458
+ </xsd:restriction>
459
+ </xsd:simpleType>
460
+ </xsd:attribute>
461
+ <xsd:attribute name="value" use="required"/>
462
+ </xsd:complexType>
463
+ <xsd:complexType name="CommonRegexps">
464
+ <xsd:sequence maxOccurs="unbounded">
465
+ <xsd:element name="regexp" type="RegExp" minOccurs="0"/>
466
+ </xsd:sequence>
467
+ </xsd:complexType>
468
+ <xsd:complexType name="AttributeList">
469
+ <xsd:sequence maxOccurs="unbounded">
470
+ <xsd:element name="attribute" type="Attribute" minOccurs="0"/>
471
+ </xsd:sequence>
472
+ </xsd:complexType>
473
+ <xsd:complexType name="TagsToEncodeList">
474
+ <xsd:sequence maxOccurs="unbounded">
475
+ <xsd:element name="tag" minOccurs="0"/>
476
+ </xsd:sequence>
477
+ </xsd:complexType>
478
+ <xsd:complexType name="TagRules">
479
+ <xsd:sequence maxOccurs="unbounded">
480
+ <xsd:element name="tag" type="Tag" minOccurs="0"/>
481
+ </xsd:sequence>
482
+ </xsd:complexType>
483
+ <xsd:complexType name="Tag">
484
+ <xsd:sequence maxOccurs="unbounded">
485
+ <xsd:element name="attribute" type="Attribute" minOccurs="0" />
486
+ </xsd:sequence>
487
+ <xsd:attribute name="name" use="required"/>
488
+ <xsd:attribute name="action" use="required">
489
+ <xsd:simpleType>
490
+ <xsd:restriction base="xsd:string">
491
+ <xsd:enumeration value="validate"/>
492
+ <xsd:enumeration value="truncate"/>
493
+ <xsd:enumeration value="remove"/>
494
+ <xsd:enumeration value="filter"/>
495
+ <xsd:enumeration value="encode"/>
496
+ </xsd:restriction>
497
+ </xsd:simpleType>
498
+ </xsd:attribute>
499
+ </xsd:complexType>
500
+ <xsd:complexType name="Attribute">
501
+ <xsd:sequence>
502
+ <xsd:element name="regexp-list" type="RegexpList" minOccurs="0"/>
503
+ <xsd:element name="literal-list" type="LiteralList" minOccurs="0"/>
504
+ </xsd:sequence>
505
+ <xsd:attribute name="name" use="required"/>
506
+ <xsd:attribute name="description"/>
507
+ <xsd:attribute name="onInvalid">
508
+ <xsd:simpleType>
509
+ <xsd:restriction base="xsd:string">
510
+ <xsd:enumeration value="removeTag"/>
511
+ <xsd:enumeration value="filterTag"/>
512
+ <xsd:enumeration value="encodeTag"/>
513
+ <xsd:enumeration value="removeAttribute"/>
514
+ </xsd:restriction>
515
+ </xsd:simpleType>
516
+ </xsd:attribute>
517
+ </xsd:complexType>
518
+ <xsd:complexType name="RegexpList">
519
+ <xsd:sequence maxOccurs="unbounded">
520
+ <xsd:element name="regexp" type="RegExp" minOccurs="0"/>
521
+ </xsd:sequence>
522
+ </xsd:complexType>
523
+ <xsd:complexType name="RegExp">
524
+ <xsd:attribute name="name" type="xsd:string"/>
525
+ <xsd:attribute name="value" type="xsd:string"/>
526
+ </xsd:complexType>
527
+ <xsd:complexType name="LiteralList">
528
+ <xsd:sequence maxOccurs="unbounded">
529
+ <xsd:element name="literal" type="Literal" minOccurs="0"/>
530
+ </xsd:sequence>
531
+ </xsd:complexType>
532
+ <xsd:complexType name="Literal">
533
+ <xsd:attribute name="value" type="xsd:string"/>
534
+ </xsd:complexType>
535
+ <xsd:complexType name="CSSRules">
536
+ <xsd:sequence maxOccurs="unbounded">
537
+ <xsd:element name="property" type="Property" minOccurs="0"/>
538
+ </xsd:sequence>
539
+ </xsd:complexType>
540
+ <xsd:complexType name="Property">
541
+ <xsd:sequence>
542
+ <xsd:element name="category-list" type="CategoryList" minOccurs="0"/>
543
+ <xsd:element name="literal-list" type="LiteralList" minOccurs="0"/>
544
+ <xsd:element name="regexp-list" type="RegexpList" minOccurs="0"/>
545
+ <xsd:element name="shorthand-list" type="ShorthandList" minOccurs="0"/>
546
+ </xsd:sequence>
547
+ <xsd:attribute name="name" type="xsd:string" use="required"/>
548
+ <xsd:attribute name="default" type="xsd:string"/>
549
+ <xsd:attribute name="description" type="xsd:string"/>
550
+ </xsd:complexType>
551
+ <xsd:complexType name="ShorthandList">
552
+ <xsd:sequence maxOccurs="unbounded">
553
+ <xsd:element name="shorthand" type="Shorthand" minOccurs="0"/>
554
+ </xsd:sequence>
555
+ </xsd:complexType>
556
+ <xsd:complexType name="Shorthand">
557
+ <xsd:attribute name="name" type="xsd:string" use="required"/>
558
+ </xsd:complexType>
559
+ <xsd:complexType name="CategoryList">
560
+ <xsd:sequence maxOccurs="unbounded">
561
+ <xsd:element name="category" type="Category" minOccurs="0"/>
562
+ </xsd:sequence>
563
+ </xsd:complexType>
564
+ <xsd:complexType name="Category">
565
+ <xsd:attribute name="value" type="xsd:string" use="required"/>
566
+ </xsd:complexType>
567
+ <xsd:complexType name="Entity">
568
+ <xsd:attribute name="name" type="xsd:string" use="required"/>
569
+ <xsd:attribute name="cdata" type="xsd:string" use="required"/>
570
+ </xsd:complexType>
571
+ <xsd:complexType name="AllowedEmptyTags">
572
+ <xsd:sequence>
573
+ <xsd:element name="literal-list" type="LiteralList" minOccurs="1"/>
574
+ </xsd:sequence>
575
+ </xsd:complexType>
576
+
577
+ </xsd:schema>