loofah 2.4.0 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -1,23 +1,21 @@
1
1
  # frozen_string_literal: true
2
- require 'cgi'
3
- require 'crass'
2
+ require "cgi"
3
+ require "crass"
4
4
 
5
5
  module Loofah
6
6
  module HTML5 # :nodoc:
7
7
  module Scrub
8
-
9
8
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
10
- CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|r?em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
11
- CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
9
+ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
10
+ CRASS_SEMICOLON = { :node => :semicolon, :raw => ";" }
12
11
 
13
12
  class << self
14
-
15
- def allowed_element? element_name
13
+ def allowed_element?(element_name)
16
14
  ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
17
15
  end
18
16
 
19
17
  # alternative implementation of the html5lib attribute scrubbing algorithm
20
- def scrub_attributes node
18
+ def scrub_attributes(node)
21
19
  node.attribute_nodes.each do |attr_node|
22
20
  attr_name = if attr_node.namespace
23
21
  "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
@@ -36,14 +34,14 @@ module Loofah
36
34
 
37
35
  if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
38
36
  # this block lifted nearly verbatim from HTML5 sanitization
39
- val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
40
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
37
+ val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
38
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
41
39
  attr_node.remove
42
40
  next
43
- elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
41
+ elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
44
42
  # permit only allowed data mediatypes
45
43
  mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
46
- mediatype, _ = mediatype.split(';')[0..1] if mediatype
44
+ mediatype, _ = mediatype.split(";")[0..1] if mediatype
47
45
  if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
48
46
  attr_node.remove
49
47
  next
@@ -51,9 +49,9 @@ module Loofah
51
49
  end
52
50
  end
53
51
  if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
54
- attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
52
+ attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value
55
53
  end
56
- if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
54
+ if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
57
55
  attr_node.remove
58
56
  next
59
57
  end
@@ -68,12 +66,12 @@ module Loofah
68
66
  force_correct_attribute_escaping! node
69
67
  end
70
68
 
71
- def scrub_css_attribute node
72
- style = node.attributes['style']
69
+ def scrub_css_attribute(node)
70
+ style = node.attributes["style"]
73
71
  style.value = scrub_css(style.value) if style
74
72
  end
75
73
 
76
- def scrub_css style
74
+ def scrub_css(style)
77
75
  style_tree = Crass.parse_properties style
78
76
  sanitized_tree = []
79
77
 
@@ -85,7 +83,7 @@ module Loofah
85
83
  name = node[:name].downcase
86
84
  if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
87
85
  sanitized_tree << node << CRASS_SEMICOLON
88
- elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
86
+ elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
89
87
  value = node[:value].split.map do |keyword|
90
88
  if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
91
89
  keyword
@@ -107,7 +105,7 @@ module Loofah
107
105
  #
108
106
  # see comments about CVE-2018-8048 within the tests for more information
109
107
  #
110
- def force_correct_attribute_escaping! node
108
+ def force_correct_attribute_escaping!(node)
111
109
  return unless Nokogiri::VersionInfo.instance.libxml2?
112
110
 
113
111
  node.attribute_nodes.each do |attr_node|
@@ -123,11 +121,10 @@ module Loofah
123
121
  #
124
122
  encoding = attr_node.value.encoding
125
123
  attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
126
- '%' + m.unpack('H2' * m.bytesize).join('%').upcase
124
+ "%" + m.unpack("H2" * m.bytesize).join("%").upcase
127
125
  end.force_encoding(encoding)
128
126
  end
129
127
  end
130
-
131
128
  end
132
129
  end
133
130
  end
@@ -92,7 +92,7 @@ module Loofah
92
92
  # # decidedly not ok for browser:
93
93
  # frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
94
94
  #
95
- def text(options={})
95
+ def text(options = {})
96
96
  result = serialize_root.children.inner_text rescue ""
97
97
  if options[:encode_special_chars] == false
98
98
  result # possibly dangerous if rendered in a browser
@@ -100,8 +100,9 @@ module Loofah
100
100
  encode_special_chars result
101
101
  end
102
102
  end
103
+
103
104
  alias :inner_text :text
104
- alias :to_str :text
105
+ alias :to_str :text
105
106
 
106
107
  #
107
108
  # Returns a plain-text version of the markup contained by the
@@ -113,7 +114,7 @@ module Loofah
113
114
  # Loofah.document("<h1>Title</h1><div>Content</div>").to_text
114
115
  # # => "\nTitle\n\nContent\n"
115
116
  #
116
- def to_text(options={})
117
+ def to_text(options = {})
117
118
  Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
118
119
  end
119
120
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  module Loofah
3
3
  module MetaHelpers # :nodoc:
4
- def self.add_downcased_set_members_to_all_set_constants mojule
4
+ def self.add_downcased_set_members_to_all_set_constants(mojule)
5
5
  mojule.constants.each do |constant_sym|
6
6
  constant = mojule.const_get constant_sym
7
7
  next unless Set === constant
@@ -3,7 +3,7 @@ module Loofah
3
3
  #
4
4
  # A RuntimeError raised when Loofah could not find an appropriate scrubber.
5
5
  #
6
- class ScrubberNotFound < RuntimeError ; end
6
+ class ScrubberNotFound < RuntimeError; end
7
7
 
8
8
  #
9
9
  # A Scrubber wraps up a block (or method) that is run on an HTML node (element):
@@ -37,7 +37,7 @@ module Loofah
37
37
  CONTINUE = Object.new.freeze
38
38
 
39
39
  # Top-down Scrubbers may return STOP to indicate that the subtree should not be traversed.
40
- STOP = Object.new.freeze
40
+ STOP = Object.new.freeze
41
41
 
42
42
  # When a scrubber is initialized, the :direction may be specified
43
43
  # as :top_down (the default) or :bottom_up.
@@ -65,7 +65,7 @@ module Loofah
65
65
  def initialize(options = {}, &block)
66
66
  direction = options[:direction] || :top_down
67
67
  unless [:top_down, :bottom_up].include?(direction)
68
- raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
68
+ raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
69
69
  end
70
70
  @direction, @block = direction, block
71
71
  end
@@ -92,10 +92,10 @@ module Loofah
92
92
  # If the attribute is set, don't overwrite the existing value
93
93
  #
94
94
  def append_attribute(node, attribute, value)
95
- current_value = node.get_attribute(attribute) || ''
95
+ current_value = node.get_attribute(attribute) || ""
96
96
  current_values = current_value.split(/\s+/)
97
97
  updated_value = current_values | [value]
98
- node.set_attribute(attribute, updated_value.join(' '))
98
+ node.set_attribute(attribute, updated_value.join(" "))
99
99
  end
100
100
 
101
101
  private
@@ -119,11 +119,11 @@ module Loofah
119
119
  else
120
120
  return if scrub(node) == STOP
121
121
  end
122
- node.children.each {|j| traverse_conditionally_top_down(j)}
122
+ node.children.each { |j| traverse_conditionally_top_down(j) }
123
123
  end
124
124
 
125
125
  def traverse_conditionally_bottom_up(node)
126
- node.children.each {|j| traverse_conditionally_bottom_up(j)}
126
+ node.children.each { |j| traverse_conditionally_bottom_up(j) }
127
127
  if block
128
128
  block.call(node)
129
129
  else
@@ -206,8 +206,8 @@ module Loofah
206
206
  end
207
207
 
208
208
  def scrub(node)
209
- return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
210
- append_attribute(node, 'rel', 'nofollow')
209
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
210
+ append_attribute(node, "rel", "nofollow")
211
211
  return STOP
212
212
  end
213
213
  end
@@ -227,8 +227,8 @@ module Loofah
227
227
  end
228
228
 
229
229
  def scrub(node)
230
- return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == 'a')
231
- append_attribute(node, 'rel', 'noopener')
230
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
231
+ append_attribute(node, "rel", "noopener")
232
232
  return STOP
233
233
  end
234
234
  end
@@ -268,7 +268,7 @@ module Loofah
268
268
 
269
269
  def scrub(node)
270
270
  if node.type == Nokogiri::XML::Node::TEXT_NODE || node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE
271
- node.content = node.content.gsub(/\u2028|\u2029/, '')
271
+ node.content = node.content.gsub(/\u2028|\u2029/, "")
272
272
  end
273
273
  CONTINUE
274
274
  end
@@ -278,14 +278,14 @@ module Loofah
278
278
  # A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
279
279
  #
280
280
  MAP = {
281
- :escape => Escape,
282
- :prune => Prune,
281
+ :escape => Escape,
282
+ :prune => Prune,
283
283
  :whitewash => Whitewash,
284
- :strip => Strip,
285
- :nofollow => NoFollow,
284
+ :strip => Strip,
285
+ :nofollow => NoFollow,
286
286
  :noopener => NoOpener,
287
287
  :newline_block_elements => NewlineBlockElements,
288
- :unprintable => Unprintable
288
+ :unprintable => Unprintable,
289
289
  }
290
290
 
291
291
  #
@@ -13,7 +13,7 @@ module Loofah
13
13
  # constructor. Applications should use Loofah.fragment to
14
14
  # parse a fragment.
15
15
  #
16
- def parse tags
16
+ def parse(tags)
17
17
  doc = Loofah::XML::Document.new
18
18
  doc.encoding = tags.encoding.name if tags.respond_to?(:encoding)
19
19
  self.new(doc, tags)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loofah
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Dalessio
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-11-25 00:00:00.000000000 Z
12
+ date: 2020-04-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -205,14 +205,14 @@ dependencies:
205
205
  requirements:
206
206
  - - "~>"
207
207
  - !ruby/object:Gem::Version
208
- version: '3.20'
208
+ version: '3.22'
209
209
  type: :development
210
210
  prerelease: false
211
211
  version_requirements: !ruby/object:Gem::Requirement
212
212
  requirements:
213
213
  - - "~>"
214
214
  - !ruby/object:Gem::Version
215
- version: '3.20'
215
+ version: '3.22'
216
216
  description: |-
217
217
  Loofah is a general library for manipulating and transforming HTML/XML documents and fragments, built on top of Nokogiri.
218
218
 
@@ -231,7 +231,6 @@ extra_rdoc_files:
231
231
  - README.md
232
232
  - SECURITY.md
233
233
  files:
234
- - ".gemtest"
235
234
  - CHANGELOG.md
236
235
  - Gemfile
237
236
  - MIT-LICENSE.txt
@@ -257,26 +256,15 @@ files:
257
256
  - lib/loofah/scrubbers.rb
258
257
  - lib/loofah/xml/document.rb
259
258
  - lib/loofah/xml/document_fragment.rb
260
- - test/assets/msword.html
261
- - test/assets/testdata_sanitizer_tests1.dat
262
- - test/helper.rb
263
- - test/html5/test_sanitizer.rb
264
- - test/html5/test_scrub.rb
265
- - test/integration/test_ad_hoc.rb
266
- - test/integration/test_helpers.rb
267
- - test/integration/test_html.rb
268
- - test/integration/test_scrubbers.rb
269
- - test/integration/test_xml.rb
270
- - test/unit/test_api.rb
271
- - test/unit/test_encoding.rb
272
- - test/unit/test_helpers.rb
273
- - test/unit/test_scrubber.rb
274
- - test/unit/test_scrubbers.rb
275
259
  homepage: https://github.com/flavorjones/loofah
276
260
  licenses:
277
261
  - MIT
278
262
  metadata:
279
263
  homepage_uri: https://github.com/flavorjones/loofah
264
+ bug_tracker_uri: https://github.com/flavorjones/loofah/issues
265
+ documentation_uri: https://www.rubydoc.info/gems/loofah/
266
+ changelog_uri: https://github.com/flavorjones/loofah/master/CHANGELOG.md
267
+ source_code_uri: https://github.com/flavorjones/loofah
280
268
  post_install_message:
281
269
  rdoc_options:
282
270
  - "--main"
@@ -294,7 +282,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
294
282
  - !ruby/object:Gem::Version
295
283
  version: '0'
296
284
  requirements: []
297
- rubygems_version: 3.0.3
285
+ rubygems_version: 3.1.2
298
286
  signing_key:
299
287
  specification_version: 4
300
288
  summary: Loofah is a general library for manipulating and transforming HTML/XML documents
data/.gemtest DELETED
File without changes
@@ -1,63 +0,0 @@
1
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
2
- <w:WordDocument>
3
- <w:View>Normal</w:View>
4
- <w:Zoom>0</w:Zoom>
5
- <w:PunctuationKerning/>
6
- <w:ValidateAgainstSchemas/>
7
- <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
8
- <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
9
- <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
10
- <w:Compatibility>
11
- <w:BreakWrappedTables/>
12
- <w:SnapToGridInCell/>
13
- <w:WrapTextWithPunct/>
14
- <w:UseAsianBreakRules/>
15
- <w:DontGrowAutofit/>
16
- </w:Compatibility>
17
- <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
18
- </w:WordDocument>
19
- </xml><![endif]--><!--[if gte mso 9]><xml>
20
- <w:LatentStyles DefLockedState="false" LatentStyleCount="156">
21
- </w:LatentStyles>
22
- </xml><![endif]--><style>
23
- <!--
24
- /* Style Definitions */
25
- p.MsoNormal, li.MsoNormal, div.MsoNormal
26
- {mso-style-parent:"";
27
- margin:0in;
28
- margin-bottom:.0001pt;
29
- mso-pagination:widow-orphan;
30
- font-size:12.0pt;
31
- font-family:"Times New Roman";
32
- mso-fareast-font-family:"Times New Roman";}
33
- @page Section1
34
- {size:8.5in 11.0in;
35
- margin:1.0in 1.25in 1.0in 1.25in;
36
- mso-header-margin:.5in;
37
- mso-footer-margin:.5in;
38
- mso-paper-source:0;}
39
- div.Section1
40
- {page:Section1;}
41
- -->
42
- </style><!--[if gte mso 10]>
43
- <style>
44
- /* Style Definitions */
45
- table.MsoNormalTable
46
- {mso-style-name:"Table Normal";
47
- mso-tstyle-rowband-size:0;
48
- mso-tstyle-colband-size:0;
49
- mso-style-noshow:yes;
50
- mso-style-parent:"";
51
- mso-padding-alt:0in 5.4pt 0in 5.4pt;
52
- mso-para-margin:0in;
53
- mso-para-margin-bottom:.0001pt;
54
- mso-pagination:widow-orphan;
55
- font-size:10.0pt;
56
- font-family:"Times New Roman";
57
- mso-ansi-language:#0400;
58
- mso-fareast-language:#0400;
59
- mso-bidi-language:#0400;}
60
- </style>
61
- <![endif]-->
62
-
63
- <p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
@@ -1,502 +0,0 @@
1
- [
2
- {
3
- "name": "IE_Comments",
4
- "input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
5
- "output": "&lt;!--[if gte IE 4]&gt;&lt;script&gt;alert('XSS');&lt;/script&gt;&lt;![endif]--&gt;"
6
- },
7
-
8
- {
9
- "name": "IE_Comments_2",
10
- "input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
11
- "output": "&lt;script&gt;alert('XSS');&lt;/script&gt;",
12
- "rexml": "Ill-formed XHTML!"
13
- },
14
-
15
- {
16
- "name": "allow_colons_in_path_component",
17
- "input": "<a href=\"./this:that\">foo</a>",
18
- "output": "<a href='./this:that'>foo</a>"
19
- },
20
-
21
- {
22
- "name": "background_attribute",
23
- "input": "<div background=\"javascript:alert('XSS')\"></div>",
24
- "output": "<div/>",
25
- "xhtml": "<div></div>",
26
- "rexml": "<div></div>"
27
- },
28
-
29
- {
30
- "name": "bgsound",
31
- "input": "<bgsound src=\"javascript:alert('XSS');\" />",
32
- "output": "&lt;bgsound src=\"javascript:alert('XSS');\"/&gt;",
33
- "rexml": "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
34
- },
35
-
36
- {
37
- "name": "div_background_image_unicode_encoded",
38
- "input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
39
- "output": "<div>foo</div>"
40
- },
41
-
42
- {
43
- "name": "div_expression",
44
- "input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
45
- "output": "<div>foo</div>"
46
- },
47
-
48
- {
49
- "name": "double_open_angle_brackets",
50
- "input": "<img src=http://ha.ckers.org/scriptlet.html <",
51
- "output": "<img src='http://ha.ckers.org/scriptlet.html'>",
52
- "rexml": "Ill-formed XHTML!"
53
- },
54
-
55
- {
56
- "name": "double_open_angle_brackets_2",
57
- "input": "<script src=http://ha.ckers.org/scriptlet.html <",
58
- "output": "&lt;script src=\"http://ha.ckers.org/scriptlet.html\"&gt;&lt;/script&gt;",
59
- "rexml": "Ill-formed XHTML!"
60
- },
61
-
62
- {
63
- "name": "grave_accents",
64
- "input": "<img src=`javascript:alert('XSS')` />",
65
- "output": "<img>",
66
- "rexml": "Ill-formed XHTML!"
67
- },
68
-
69
- {
70
- "name": "img_dynsrc_lowsrc",
71
- "input": "<img dynsrc=\"javascript:alert('XSS')\" />",
72
- "output": "<img>",
73
- "rexml": "<img />"
74
- },
75
-
76
- {
77
- "name": "img_vbscript",
78
- "input": "<img src='vbscript:msgbox(\"XSS\")' />",
79
- "output": "<img>",
80
- "rexml": "<img />"
81
- },
82
-
83
- {
84
- "name": "input_image",
85
- "input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
86
- "output": "<input type='image'>",
87
- "rexml": "<input type='image' />"
88
- },
89
-
90
- {
91
- "name": "link_stylesheets",
92
- "input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
93
- "output": "&lt;link rel=\"stylesheet\" href=\"javascript:alert('XSS');\"&gt;",
94
- "rexml": "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"/&gt;"
95
- },
96
-
97
- {
98
- "name": "link_stylesheets_2",
99
- "input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
100
- "output": "&lt;link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\"&gt;",
101
- "rexml": "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"/&gt;"
102
- },
103
-
104
- {
105
- "name": "list_style_image",
106
- "input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
107
- "output": "<li>foo</li>"
108
- },
109
-
110
- {
111
- "name": "no_closing_script_tags",
112
- "input": "<script src=http://ha.ckers.org/xss.js?<b>",
113
- "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;&lt;/script&gt;",
114
- "rexml": "Ill-formed XHTML!"
115
- },
116
-
117
- {
118
- "name": "non_alpha_non_digit",
119
- "input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
120
- "output": "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
121
- "rexml": "Ill-formed XHTML!"
122
- },
123
-
124
- {
125
- "name": "non_alpha_non_digit_2",
126
- "input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
127
- "output": "<a>foo</a>",
128
- "rexml": "Ill-formed XHTML!"
129
- },
130
-
131
- {
132
- "name": "non_alpha_non_digit_3",
133
- "input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
134
- "output": "<img>",
135
- "rexml": "Ill-formed XHTML!"
136
- },
137
-
138
- {
139
- "name": "non_alpha_non_digit_II",
140
- "input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
141
- "output": "<a>foo</a>",
142
- "rexml": "Ill-formed XHTML!"
143
- },
144
-
145
- {
146
- "name": "non_alpha_non_digit_III",
147
- "input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
148
- "output": "<a>foo</a>",
149
- "rexml": "Ill-formed XHTML!"
150
- },
151
-
152
- {
153
- "name": "platypus",
154
- "input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
155
- "output": "<a href='http://www.ragingplatypus.com/' style='display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;'>never trust your upstream platypus</a>"
156
- },
157
-
158
- {
159
- "name": "protocol_resolution_in_script_tag",
160
- "input": "<script src=//ha.ckers.org/.j></script>",
161
- "output": "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;",
162
- "rexml": "Ill-formed XHTML!"
163
- },
164
-
165
- {
166
- "name": "should_allow_anchors",
167
- "input": "<a href='foo' onclick='bar'><script>baz</script></a>",
168
- "output": "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
169
- },
170
-
171
- {
172
- "name": "should_allow_image_alt_attribute",
173
- "input": "<img alt='foo' onclick='bar' />",
174
- "output": "<img alt='foo'>",
175
- "rexml": "<img alt='foo' />"
176
- },
177
-
178
- {
179
- "name": "should_allow_image_height_attribute",
180
- "input": "<img height='foo' onclick='bar' />",
181
- "output": "<img height='foo'>",
182
- "rexml": "<img height='foo' />"
183
- },
184
-
185
- {
186
- "name": "should_allow_image_src_attribute",
187
- "input": "<img src='foo' onclick='bar' />",
188
- "output": "<img src='foo'>",
189
- "rexml": "<img src='foo' />"
190
- },
191
-
192
- {
193
- "name": "should_allow_image_width_attribute",
194
- "input": "<img width='foo' onclick='bar' />",
195
- "output": "<img width='foo'>",
196
- "rexml": "<img width='foo' />"
197
- },
198
-
199
- {
200
- "name": "should_handle_blank_text",
201
- "input": "",
202
- "output": ""
203
- },
204
-
205
- {
206
- "name": "should_handle_malformed_image_tags",
207
- "input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
208
- "output": "<img>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;",
209
- "rexml": "Ill-formed XHTML!"
210
- },
211
-
212
- {
213
- "name": "should_handle_non_html",
214
- "input": "abc",
215
- "output": "abc"
216
- },
217
-
218
- {
219
- "name": "should_not_fall_for_ridiculous_hack",
220
- "input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
221
- "output": "<img>",
222
- "rexml": "<img />"
223
- },
224
-
225
- {
226
- "name": "should_not_fall_for_xss_image_hack_0",
227
- "input": "<img src=\"javascript:alert('XSS');\" />",
228
- "output": "<img>",
229
- "rexml": "<img />"
230
- },
231
-
232
- {
233
- "name": "should_not_fall_for_xss_image_hack_1",
234
- "input": "<img src=javascript:alert('XSS') />",
235
- "output": "<img>",
236
- "rexml": "Ill-formed XHTML!"
237
- },
238
-
239
- {
240
- "name": "should_not_fall_for_xss_image_hack_10",
241
- "input": "<img src=\"jav&#x0A;ascript:alert('XSS');\" />",
242
- "output": "<img>",
243
- "rexml": "<img />"
244
- },
245
-
246
- {
247
- "name": "should_not_fall_for_xss_image_hack_11",
248
- "input": "<img src=\"jav&#x0D;ascript:alert('XSS');\" />",
249
- "output": "<img>",
250
- "rexml": "<img />"
251
- },
252
-
253
- {
254
- "name": "should_not_fall_for_xss_image_hack_12",
255
- "input": "<img src=\" &#14; javascript:alert('XSS');\" />",
256
- "output": "<img>",
257
- "rexml": "<img />"
258
- },
259
-
260
- {
261
- "name": "should_not_fall_for_xss_image_hack_13",
262
- "input": "<img src=\"&#x20;javascript:alert('XSS');\" />",
263
- "output": "<img>",
264
- "rexml": "<img />"
265
- },
266
-
267
- {
268
- "name": "should_not_fall_for_xss_image_hack_14",
269
- "input": "<img src=\"&#xA0;javascript:alert('XSS');\" />",
270
- "output": "<img>",
271
- "rexml": "<img />"
272
- },
273
-
274
- {
275
- "name": "should_not_fall_for_xss_image_hack_2",
276
- "input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
277
- "output": "<img>",
278
- "rexml": "<img />"
279
- },
280
-
281
- {
282
- "name": "should_not_fall_for_xss_image_hack_3",
283
- "input": "<img src='javascript:alert(&quot;XSS&quot;)' />",
284
- "output": "<img>",
285
- "rexml": "<img />"
286
- },
287
-
288
- {
289
- "name": "should_not_fall_for_xss_image_hack_4",
290
- "input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
291
- "output": "<img>",
292
- "rexml": "<img />"
293
- },
294
-
295
- {
296
- "name": "should_not_fall_for_xss_image_hack_5",
297
- "input": "<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />",
298
- "output": "<img>",
299
- "rexml": "<img />"
300
- },
301
-
302
- {
303
- "name": "should_not_fall_for_xss_image_hack_6",
304
- "input": "<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />",
305
- "output": "<img>",
306
- "rexml": "<img />"
307
- },
308
-
309
- {
310
- "name": "should_not_fall_for_xss_image_hack_7",
311
- "input": "<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />",
312
- "output": "<img>",
313
- "rexml": "<img />"
314
- },
315
-
316
- {
317
- "name": "should_not_fall_for_xss_image_hack_8",
318
- "input": "<img src=\"jav\tascript:alert('XSS');\" />",
319
- "output": "<img>",
320
- "rexml": "<img />"
321
- },
322
-
323
- {
324
- "name": "should_not_fall_for_xss_image_hack_9",
325
- "input": "<img src=\"jav&#x09;ascript:alert('XSS');\" />",
326
- "output": "<img>",
327
- "rexml": "<img />"
328
- },
329
-
330
- {
331
- "name": "should_sanitize_half_open_scripts",
332
- "input": "<img src=\"javascript:alert('XSS')\"",
333
- "output": "<img>",
334
- "rexml": "Ill-formed XHTML!"
335
- },
336
-
337
- {
338
- "name": "should_sanitize_invalid_script_tag",
339
- "input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
340
- "output": "&lt;script src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
341
- "rexml": "Ill-formed XHTML!"
342
- },
343
-
344
- {
345
- "name": "should_sanitize_script_tag_with_multiple_open_brackets",
346
- "input": "<<script>alert(\"XSS\");//<</script>",
347
- "output": "alert(\"XSS\");//",
348
- "xhtml": "&lt;&lt;script&gt;alert('XSS');//&lt;&lt;/script&gt;",
349
- "rexml": "Ill-formed XHTML!"
350
- },
351
-
352
- {
353
- "name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
354
- "input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
355
- "output": "&lt;iframe src=\"http://ha.ckers.org/scriptlet.html\"&gt;&lt;/iframe&gt;",
356
- "rexml": "Ill-formed XHTML!"
357
- },
358
-
359
- {
360
- "name": "should_sanitize_tag_broken_up_by_null",
361
- "input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
362
- "output": "&lt;scr&gt;&lt;/scr&gt;",
363
- "rexml": "Ill-formed XHTML!"
364
- },
365
-
366
- {
367
- "name": "should_sanitize_unclosed_script",
368
- "input": "<script src=http://ha.ckers.org/xss.js?<b>",
369
- "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;&lt;/script&gt;",
370
- "rexml": "Ill-formed XHTML!"
371
- },
372
-
373
- {
374
- "name": "should_strip_href_attribute_in_a_with_bad_protocols",
375
- "input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
376
- "output": "<a title='1'>boo</a>"
377
- },
378
-
379
- {
380
- "name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
381
- "input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
382
- "output": "<a title='1'>boo</a>"
383
- },
384
-
385
- {
386
- "name": "should_strip_src_attribute_in_img_with_bad_protocols",
387
- "input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
388
- "output": "<img title='1'>boo",
389
- "rexml": "<img title='1' />"
390
- },
391
-
392
- {
393
- "name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
394
- "input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
395
- "output": "<img title='1'>boo",
396
- "rexml": "<img title='1' />"
397
- },
398
-
399
- {
400
- "name": "xml_base",
401
- "input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
402
- "output": "<div>foo</div>"
403
- },
404
-
405
- {
406
- "name": "xul",
407
- "input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
408
- "output": "<p>fubar</p>"
409
- },
410
-
411
- {
412
- "name": "quotes_in_attributes",
413
- "input": "<img src='foo' title='\"foo\" bar' />",
414
- "rexml": "<img src='foo' title='\"foo\" bar' />",
415
- "output": "<img src='foo' title='\"foo\" bar'>"
416
- },
417
-
418
- {
419
- "name": "uri_refs_in_svg_attributes",
420
- "input": "<rect fill='url(#foo)' />",
421
- "rexml": "<rect fill='url(#foo)'></rect>",
422
- "xhtml": "<rect fill='url(#foo)'></rect>",
423
- "output": "<rect fill='url(#foo)'/>"
424
- },
425
-
426
- {
427
- "name": "absolute_uri_refs_in_svg_attributes",
428
- "input": "<rect fill='url(http://bad.com/) #fff' />",
429
- "rexml": "<rect fill=' #fff'></rect>",
430
- "xhtml": "<rect fill=' #fff'></rect>",
431
- "output": "<rect fill=' #fff'/>"
432
- },
433
-
434
- {
435
- "name": "uri_ref_with_space_in svg_attribute",
436
- "input": "<rect fill='url(\n#foo)' />",
437
- "rexml": "<rect fill='url(\n#foo)'></rect>",
438
- "xhtml": "<rect fill='url(\n#foo)'></rect>",
439
- "output": "<rect fill='url(\n#foo)'/>"
440
- },
441
-
442
- {
443
- "name": "absolute_uri_ref_with_space_in svg_attribute",
444
- "input": "<rect fill=\"url(\nhttp://bad.com/)\" />",
445
- "rexml": "<rect></rect>",
446
- "xhtml": "<rect></rect>",
447
- "output": "<rect/>"
448
- },
449
-
450
- {
451
- "name": "allow_html5_image_tag",
452
- "input": "<image src='foo' />",
453
- "rexml": "&lt;image src=\"foo\"&gt;&lt;/image&gt;",
454
- "output": "&lt;image src=\"foo\"/&gt;"
455
- },
456
-
457
- {
458
- "name": "style_attr_end_with_nothing",
459
- "input": "<div style=\"color: blue\" />",
460
- "output": "<div style='color: blue;'/>",
461
- "xhtml": "<div style='color: blue;'></div>",
462
- "rexml": "<div style='color: blue;'></div>"
463
- },
464
-
465
- {
466
- "name": "style_attr_end_with_space",
467
- "input": "<div style=\"color: blue \" />",
468
- "output": "<div style='color: blue ;'/>",
469
- "xhtml": "<div style='color: blue ;'></div>",
470
- "rexml": "<div style='color: blue ;'></div>"
471
- },
472
-
473
- {
474
- "name": "style_attr_end_with_semicolon",
475
- "input": "<div style=\"color: blue;\" />",
476
- "output": "<div style='color: blue;'/>",
477
- "xhtml": "<div style='color: blue;'></div>",
478
- "rexml": "<div style='color: blue;'></div>"
479
- },
480
-
481
- {
482
- "name": "style_attr_end_with_semicolon_space",
483
- "input": "<div style=\"color: blue; \" />",
484
- "output": "<div style='color: blue;'/>",
485
- "xhtml": "<div style='color: blue;'></div>",
486
- "rexml": "<div style='color: blue;'></div>"
487
- },
488
-
489
- {
490
- "name": "attributes_with_embedded_quotes",
491
- "input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
492
- "output": "<img src='doesntexist.jpg%22'onerror=%22alert(1)'>",
493
- "rexml": "Ill-formed XHTML!"
494
- },
495
-
496
- {
497
- "name": "attributes_with_embedded_quotes_II",
498
- "input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
499
- "output": "<img src='notthere.jpg%22%22onerror=%22alert(2)'>",
500
- "rexml": "Ill-formed XHTML!"
501
- }
502
- ]