sanitizer 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,14 +1,8 @@
1
+ # encoding: utf-8
1
2
  class HTMLEntities
2
3
  class Encoder #:nodoc:
3
4
  def basic_entity_regexp
4
- @basic_entity_regexp ||= (
5
- case @flavor
6
- when /^html/
7
- /[<>"]|(\&(?!\w))/
8
- else
9
- /[<>'"]|(\&(?!\w))/
10
- end
11
- )
5
+ @basic_entity_regexp ||= /[<>'"]|(\&(?!(\w+\;)))/
12
6
  end
13
7
  end
14
8
  end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  module Sanitizer
2
- VERSION = "0.1.6"
3
+ VERSION = "0.1.7"
3
4
  end
data/lib/sanitizer.rb CHANGED
@@ -4,6 +4,5 @@ require 'htmlentities'
4
4
 
5
5
  # Local Libs
6
6
  $:.unshift(File.dirname(__FILE__) + '/../../lib')
7
- require 'sanitizer/whitelist'
8
7
  require 'sanitizer/htmlentries'
9
8
  require 'sanitizer/sanitizer'
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
3
 
3
4
  describe Sanitizer do
@@ -35,6 +36,12 @@ describe Sanitizer do
35
36
  output = Sanitizer.sanitize(html)
36
37
  output.should == "Eu &amp; voc&ecirc;"
37
38
  end
39
+
40
+ it "should clean '&' entries even when it is attached to a letter" do
41
+ html = "M&M"
42
+ output = Sanitizer.sanitize(html)
43
+ output.should == "M&amp;M"
44
+ end
38
45
  end
39
46
 
40
47
  describe "html_encode" do
metadata CHANGED
@@ -1,13 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
5
4
  prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 6
10
- version: 0.1.6
5
+ version: 0.1.7
11
6
  platform: ruby
12
7
  authors:
13
8
  - Marcelo Eden
@@ -15,7 +10,7 @@ autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
12
 
18
- date: 2011-05-11 00:00:00 -03:00
13
+ date: 2011-05-12 00:00:00 -03:00
19
14
  default_executable:
20
15
  dependencies:
21
16
  - !ruby/object:Gem::Dependency
@@ -26,44 +21,20 @@ dependencies:
26
21
  requirements:
27
22
  - - ~>
28
23
  - !ruby/object:Gem::Version
29
- hash: 3
30
- segments:
31
- - 2
32
- - 3
33
- - 0
34
24
  version: 2.3.0
35
25
  type: :development
36
26
  version_requirements: *id001
37
- - !ruby/object:Gem::Dependency
38
- name: ruby-debug
39
- prerelease: false
40
- requirement: &id002 !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ">="
44
- - !ruby/object:Gem::Version
45
- hash: 3
46
- segments:
47
- - 0
48
- version: "0"
49
- type: :development
50
- version_requirements: *id002
51
27
  - !ruby/object:Gem::Dependency
52
28
  name: htmlentities
53
29
  prerelease: false
54
- requirement: &id003 !ruby/object:Gem::Requirement
30
+ requirement: &id002 !ruby/object:Gem::Requirement
55
31
  none: false
56
32
  requirements:
57
33
  - - ~>
58
34
  - !ruby/object:Gem::Version
59
- hash: 51
60
- segments:
61
- - 4
62
- - 3
63
- - 0
64
35
  version: 4.3.0
65
36
  type: :runtime
66
- version_requirements: *id003
37
+ version_requirements: *id002
67
38
  description: Sanitizer.clean(text)
68
39
  email:
69
40
  - edendroid@gmail.com
@@ -77,7 +48,6 @@ files:
77
48
  - lib/sanitizer/htmlentries.rb
78
49
  - lib/sanitizer/sanitizer.rb
79
50
  - lib/sanitizer/version.rb
80
- - lib/sanitizer/whitelist.rb
81
51
  - lib/sanitizer.rb
82
52
  - spec/sanitizer_spec.rb
83
53
  has_rdoc: true
@@ -94,18 +64,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
94
64
  requirements:
95
65
  - - ">="
96
66
  - !ruby/object:Gem::Version
97
- hash: 3
98
- segments:
99
- - 0
100
67
  version: "0"
101
68
  required_rubygems_version: !ruby/object:Gem::Requirement
102
69
  none: false
103
70
  requirements:
104
71
  - - ">="
105
72
  - !ruby/object:Gem::Version
106
- hash: 3
107
- segments:
108
- - 0
109
73
  version: "0"
110
74
  requirements: []
111
75
 
@@ -1,180 +0,0 @@
1
- #
2
- # HTML whitelist lifted from HTML5 sanitizer code
3
- # http://code.google.com/p/html5lib/
4
- #
5
- module Sanitizer
6
- module WhiteList
7
- # <html5_license>
8
- #
9
- # Copyright (c) 2006-2008 The Authors
10
- #
11
- # Contributors:
12
- # James Graham - jg307@cam.ac.uk
13
- # Anne van Kesteren - annevankesteren@gmail.com
14
- # Lachlan Hunt - lachlan.hunt@lachy.id.au
15
- # Matt McDonald - kanashii@kanashii.ca
16
- # Sam Ruby - rubys@intertwingly.net
17
- # Ian Hickson (Google) - ian@hixie.ch
18
- # Thomas Broyer - t.broyer@ltgt.net
19
- # Jacques Distler - distler@golem.ph.utexas.edu
20
- # Henri Sivonen - hsivonen@iki.fi
21
- # The Mozilla Foundation (contributions from Henri Sivonen since 2008)
22
- #
23
- # Permission is hereby granted, free of charge, to any person
24
- # obtaining a copy of this software and associated documentation
25
- # files (the "Software"), to deal in the Software without
26
- # restriction, including without limitation the rights to use, copy,
27
- # modify, merge, publish, distribute, sublicense, and/or sell copies
28
- # of the Software, and to permit persons to whom the Software is
29
- # furnished to do so, subject to the following conditions:
30
- #
31
- # The above copyright notice and this permission notice shall be
32
- # included in all copies or substantial portions of the Software.
33
- #
34
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
38
- # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
39
- # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
40
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
41
- # DEALINGS IN THE SOFTWARE.
42
- #
43
- # </html5_license>
44
-
45
- # ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
46
- # button caption center cite code col colgroup dd del dfn dir div dl dt
47
- # em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
48
- # legend li map menu ol optgroup option p pre q s samp select small span
49
- # strike strong sub sup table tbody td textarea tfoot th thead tr tt u
50
- # ul var]
51
- #
52
- # MATHML_ELEMENTS = %w[maction math merror mfrac mi mmultiscripts mn mo
53
- # mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub
54
- # msubsup msup mtable mtd mtext mtr munder munderover none]
55
- #
56
- # SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
57
- # circle defs desc ellipse font-face font-face-name font-face-src g
58
- # glyph hkern image linearGradient line marker metadata missing-glyph
59
- # mpath path polygon polyline radialGradient rect set stop svg switch
60
- # text title tspan use]
61
- #
62
- # ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
63
- # align alt axis border cellpadding cellspacing char charoff charset
64
- # checked cite class clear cols colspan color compact coords datetime
65
- # dir disabled enctype for frame headers height href hreflang hspace id
66
- # ismap label lang longdesc maxlength media method multiple name nohref
67
- # noshade nowrap prompt readonly rel rev rows rowspan rules scope
68
- # selected shape size span src start style summary tabindex target title
69
- # type usemap valign value vspace width xml:lang]
70
- #
71
- # MATHML_ATTRIBUTES = %w[actiontype align columnalign columnalign
72
- # columnalign columnlines columnspacing columnspan depth display
73
- # displaystyle equalcolumns equalrows fence fontstyle fontweight frame
74
- # height linethickness lspace mathbackground mathcolor mathvariant
75
- # mathvariant maxsize minsize other rowalign rowalign rowalign rowlines
76
- # rowspacing rowspan rspace scriptlevel selection separator stretchy
77
- # width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
78
- #
79
- # SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
80
- # arabic-form ascent attributeName attributeType baseProfile bbox begin
81
- # by calcMode cap-height class color color-rendering content cx cy d dx
82
- # dy descent display dur end fill fill-rule font-family font-size
83
- # font-stretch font-style font-variant font-weight from fx fy g1 g2
84
- # glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
85
- # ideographic k keyPoints keySplines keyTimes lang marker-end
86
- # marker-mid marker-start markerHeight markerUnits markerWidth
87
- # mathematical max min name offset opacity orient origin
88
- # overline-position overline-thickness panose-1 path pathLength points
89
- # preserveAspectRatio r refX refY repeatCount repeatDur
90
- # requiredExtensions requiredFeatures restart rotate rx ry slope stemh
91
- # stemv stop-color stop-opacity strikethrough-position
92
- # strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
93
- # stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
94
- # stroke-width systemLanguage target text-anchor to transform type u1
95
- # u2 underline-position underline-thickness unicode unicode-range
96
- # units-per-em values version viewBox visibility width widths x
97
- # x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
98
- # xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
99
- # xmlns:xlink y y1 y2 zoomAndPan]
100
-
101
- ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
102
-
103
- ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
104
- border-bottom-color border-collapse border-color border-left-color
105
- border-right-color border-top-color clear color cursor direction
106
- display elevation float font font-family font-size font-style
107
- font-variant font-weight height letter-spacing line-height overflow
108
- pause pause-after pause-before pitch pitch-range richness speak
109
- speak-header speak-numeral speak-punctuation speech-rate stress
110
- text-align text-decoration text-indent unicode-bidi vertical-align
111
- voice-family volume white-space width]
112
-
113
- ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
114
- brown center collapse dashed dotted fuchsia gray green !important
115
- italic left lime maroon medium none navy normal nowrap olive pointer
116
- purple red right solid silver teal top transparent underline white
117
- yellow]
118
-
119
- ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
120
- stroke-width stroke-linecap stroke-linejoin stroke-opacity]
121
-
122
- ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
123
- telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
124
-
125
- # subclasses may define their own versions of these constants
126
- #ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
127
- #ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
128
- ALLOWED_ELEMENTS = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
129
- 'big', 'blockquote', 'br', 'caption', 'center', 'cite',
130
- 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
131
- 'em', 'embed', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i',
132
- 'img', 'ins', 'kbd', 'li', 'map', 'menu', 'object', 'ol', 'p', 'param', 'pre', 'q',
133
- 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
134
- 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
135
- 'ul', 'var'
136
- ]
137
-
138
- ALLOWED_ATTRIBUTES = ['abbr', 'accept', 'accept-charset', 'accesskey',
139
- 'align', 'alt', 'axis', 'border', 'cellpadding',
140
- 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'clear',
141
- 'cols', 'colspan', 'color', 'compact', 'coords', 'data', 'datetime', 'dir',
142
- 'disabled', 'enctype', 'flashvars', 'for', 'frame', 'headers', 'href', 'hreflang',
143
- 'hspace', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media',
144
- 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt',
145
- 'readonly', 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected',
146
- 'shape', 'span', 'src', 'start', 'summary', 'tabindex', 'target',
147
- 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'xml:lang', 'width'
148
- ]
149
-
150
- ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
151
- ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
152
- ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
153
- ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
154
-
155
- VOID_ELEMENTS = %w[
156
- base
157
- link
158
- meta
159
- hr
160
- br
161
- img
162
- embed
163
- param
164
- area
165
- col
166
- input
167
- ]
168
- end
169
-
170
- module HashedWhiteList
171
- # turn each of the whitelist arrays into a hash for faster lookup
172
- WhiteList.constants.each do |constant|
173
- next unless WhiteList.module_eval("#{constant}").is_a?(Array)
174
- module_eval <<-CODE
175
- #{constant} = {}
176
- WhiteList::#{constant}.each { |c| #{constant}[c] = true ; #{constant}[c.downcase] = true }
177
- CODE
178
- end
179
- end
180
- end