sanitizer 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,8 @@
1
+ # encoding: utf-8
1
2
  class HTMLEntities
2
3
  class Encoder #:nodoc:
3
4
  def basic_entity_regexp
4
- @basic_entity_regexp ||= (
5
- case @flavor
6
- when /^html/
7
- /[<>"]|(\&(?!\w))/
8
- else
9
- /[<>'"]|(\&(?!\w))/
10
- end
11
- )
5
+ @basic_entity_regexp ||= /[<>'"]|(\&(?!(\w+\;)))/
12
6
  end
13
7
  end
14
8
  end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  module Sanitizer
2
- VERSION = "0.1.6"
3
+ VERSION = "0.1.7"
3
4
  end
data/lib/sanitizer.rb CHANGED
@@ -4,6 +4,5 @@ require 'htmlentities'
4
4
 
5
5
  # Local Libs
6
6
  $:.unshift(File.dirname(__FILE__) + '/../../lib')
7
- require 'sanitizer/whitelist'
8
7
  require 'sanitizer/htmlentries'
9
8
  require 'sanitizer/sanitizer'
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
3
 
3
4
  describe Sanitizer do
@@ -35,6 +36,12 @@ describe Sanitizer do
35
36
  output = Sanitizer.sanitize(html)
36
37
  output.should == "Eu &amp; voc&ecirc;"
37
38
  end
39
+
40
+ it "should clean '&' entries even when it is attached to a letter" do
41
+ html = "M&M"
42
+ output = Sanitizer.sanitize(html)
43
+ output.should == "M&amp;M"
44
+ end
38
45
  end
39
46
 
40
47
  describe "html_encode" do
metadata CHANGED
@@ -1,13 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
5
4
  prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 6
10
- version: 0.1.6
5
+ version: 0.1.7
11
6
  platform: ruby
12
7
  authors:
13
8
  - Marcelo Eden
@@ -15,7 +10,7 @@ autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
12
 
18
- date: 2011-05-11 00:00:00 -03:00
13
+ date: 2011-05-12 00:00:00 -03:00
19
14
  default_executable:
20
15
  dependencies:
21
16
  - !ruby/object:Gem::Dependency
@@ -26,44 +21,20 @@ dependencies:
26
21
  requirements:
27
22
  - - ~>
28
23
  - !ruby/object:Gem::Version
29
- hash: 3
30
- segments:
31
- - 2
32
- - 3
33
- - 0
34
24
  version: 2.3.0
35
25
  type: :development
36
26
  version_requirements: *id001
37
- - !ruby/object:Gem::Dependency
38
- name: ruby-debug
39
- prerelease: false
40
- requirement: &id002 !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ">="
44
- - !ruby/object:Gem::Version
45
- hash: 3
46
- segments:
47
- - 0
48
- version: "0"
49
- type: :development
50
- version_requirements: *id002
51
27
  - !ruby/object:Gem::Dependency
52
28
  name: htmlentities
53
29
  prerelease: false
54
- requirement: &id003 !ruby/object:Gem::Requirement
30
+ requirement: &id002 !ruby/object:Gem::Requirement
55
31
  none: false
56
32
  requirements:
57
33
  - - ~>
58
34
  - !ruby/object:Gem::Version
59
- hash: 51
60
- segments:
61
- - 4
62
- - 3
63
- - 0
64
35
  version: 4.3.0
65
36
  type: :runtime
66
- version_requirements: *id003
37
+ version_requirements: *id002
67
38
  description: Sanitizer.clean(text)
68
39
  email:
69
40
  - edendroid@gmail.com
@@ -77,7 +48,6 @@ files:
77
48
  - lib/sanitizer/htmlentries.rb
78
49
  - lib/sanitizer/sanitizer.rb
79
50
  - lib/sanitizer/version.rb
80
- - lib/sanitizer/whitelist.rb
81
51
  - lib/sanitizer.rb
82
52
  - spec/sanitizer_spec.rb
83
53
  has_rdoc: true
@@ -94,18 +64,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
94
64
  requirements:
95
65
  - - ">="
96
66
  - !ruby/object:Gem::Version
97
- hash: 3
98
- segments:
99
- - 0
100
67
  version: "0"
101
68
  required_rubygems_version: !ruby/object:Gem::Requirement
102
69
  none: false
103
70
  requirements:
104
71
  - - ">="
105
72
  - !ruby/object:Gem::Version
106
- hash: 3
107
- segments:
108
- - 0
109
73
  version: "0"
110
74
  requirements: []
111
75
 
@@ -1,180 +0,0 @@
1
- #
2
- # HTML whitelist lifted from HTML5 sanitizer code
3
- # http://code.google.com/p/html5lib/
4
- #
5
- module Sanitizer
6
- module WhiteList
7
- # <html5_license>
8
- #
9
- # Copyright (c) 2006-2008 The Authors
10
- #
11
- # Contributors:
12
- # James Graham - jg307@cam.ac.uk
13
- # Anne van Kesteren - annevankesteren@gmail.com
14
- # Lachlan Hunt - lachlan.hunt@lachy.id.au
15
- # Matt McDonald - kanashii@kanashii.ca
16
- # Sam Ruby - rubys@intertwingly.net
17
- # Ian Hickson (Google) - ian@hixie.ch
18
- # Thomas Broyer - t.broyer@ltgt.net
19
- # Jacques Distler - distler@golem.ph.utexas.edu
20
- # Henri Sivonen - hsivonen@iki.fi
21
- # The Mozilla Foundation (contributions from Henri Sivonen since 2008)
22
- #
23
- # Permission is hereby granted, free of charge, to any person
24
- # obtaining a copy of this software and associated documentation
25
- # files (the "Software"), to deal in the Software without
26
- # restriction, including without limitation the rights to use, copy,
27
- # modify, merge, publish, distribute, sublicense, and/or sell copies
28
- # of the Software, and to permit persons to whom the Software is
29
- # furnished to do so, subject to the following conditions:
30
- #
31
- # The above copyright notice and this permission notice shall be
32
- # included in all copies or substantial portions of the Software.
33
- #
34
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
38
- # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
39
- # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
40
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
41
- # DEALINGS IN THE SOFTWARE.
42
- #
43
- # </html5_license>
44
-
45
- # ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
46
- # button caption center cite code col colgroup dd del dfn dir div dl dt
47
- # em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
48
- # legend li map menu ol optgroup option p pre q s samp select small span
49
- # strike strong sub sup table tbody td textarea tfoot th thead tr tt u
50
- # ul var]
51
- #
52
- # MATHML_ELEMENTS = %w[maction math merror mfrac mi mmultiscripts mn mo
53
- # mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub
54
- # msubsup msup mtable mtd mtext mtr munder munderover none]
55
- #
56
- # SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
57
- # circle defs desc ellipse font-face font-face-name font-face-src g
58
- # glyph hkern image linearGradient line marker metadata missing-glyph
59
- # mpath path polygon polyline radialGradient rect set stop svg switch
60
- # text title tspan use]
61
- #
62
- # ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
63
- # align alt axis border cellpadding cellspacing char charoff charset
64
- # checked cite class clear cols colspan color compact coords datetime
65
- # dir disabled enctype for frame headers height href hreflang hspace id
66
- # ismap label lang longdesc maxlength media method multiple name nohref
67
- # noshade nowrap prompt readonly rel rev rows rowspan rules scope
68
- # selected shape size span src start style summary tabindex target title
69
- # type usemap valign value vspace width xml:lang]
70
- #
71
- # MATHML_ATTRIBUTES = %w[actiontype align columnalign columnalign
72
- # columnalign columnlines columnspacing columnspan depth display
73
- # displaystyle equalcolumns equalrows fence fontstyle fontweight frame
74
- # height linethickness lspace mathbackground mathcolor mathvariant
75
- # mathvariant maxsize minsize other rowalign rowalign rowalign rowlines
76
- # rowspacing rowspan rspace scriptlevel selection separator stretchy
77
- # width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
78
- #
79
- # SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
80
- # arabic-form ascent attributeName attributeType baseProfile bbox begin
81
- # by calcMode cap-height class color color-rendering content cx cy d dx
82
- # dy descent display dur end fill fill-rule font-family font-size
83
- # font-stretch font-style font-variant font-weight from fx fy g1 g2
84
- # glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
85
- # ideographic k keyPoints keySplines keyTimes lang marker-end
86
- # marker-mid marker-start markerHeight markerUnits markerWidth
87
- # mathematical max min name offset opacity orient origin
88
- # overline-position overline-thickness panose-1 path pathLength points
89
- # preserveAspectRatio r refX refY repeatCount repeatDur
90
- # requiredExtensions requiredFeatures restart rotate rx ry slope stemh
91
- # stemv stop-color stop-opacity strikethrough-position
92
- # strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
93
- # stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
94
- # stroke-width systemLanguage target text-anchor to transform type u1
95
- # u2 underline-position underline-thickness unicode unicode-range
96
- # units-per-em values version viewBox visibility width widths x
97
- # x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
98
- # xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
99
- # xmlns:xlink y y1 y2 zoomAndPan]
100
-
101
- ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
102
-
103
- ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
104
- border-bottom-color border-collapse border-color border-left-color
105
- border-right-color border-top-color clear color cursor direction
106
- display elevation float font font-family font-size font-style
107
- font-variant font-weight height letter-spacing line-height overflow
108
- pause pause-after pause-before pitch pitch-range richness speak
109
- speak-header speak-numeral speak-punctuation speech-rate stress
110
- text-align text-decoration text-indent unicode-bidi vertical-align
111
- voice-family volume white-space width]
112
-
113
- ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
114
- brown center collapse dashed dotted fuchsia gray green !important
115
- italic left lime maroon medium none navy normal nowrap olive pointer
116
- purple red right solid silver teal top transparent underline white
117
- yellow]
118
-
119
- ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
120
- stroke-width stroke-linecap stroke-linejoin stroke-opacity]
121
-
122
- ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
123
- telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
124
-
125
- # subclasses may define their own versions of these constants
126
- #ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
127
- #ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
128
- ALLOWED_ELEMENTS = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
129
- 'big', 'blockquote', 'br', 'caption', 'center', 'cite',
130
- 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
131
- 'em', 'embed', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i',
132
- 'img', 'ins', 'kbd', 'li', 'map', 'menu', 'object', 'ol', 'p', 'param', 'pre', 'q',
133
- 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
134
- 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
135
- 'ul', 'var'
136
- ]
137
-
138
- ALLOWED_ATTRIBUTES = ['abbr', 'accept', 'accept-charset', 'accesskey',
139
- 'align', 'alt', 'axis', 'border', 'cellpadding',
140
- 'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'clear',
141
- 'cols', 'colspan', 'color', 'compact', 'coords', 'data', 'datetime', 'dir',
142
- 'disabled', 'enctype', 'flashvars', 'for', 'frame', 'headers', 'href', 'hreflang',
143
- 'hspace', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media',
144
- 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt',
145
- 'readonly', 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected',
146
- 'shape', 'span', 'src', 'start', 'summary', 'tabindex', 'target',
147
- 'title', 'type', 'usemap', 'valign', 'value', 'vspace', 'xml:lang', 'width'
148
- ]
149
-
150
- ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
151
- ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
152
- ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
153
- ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
154
-
155
- VOID_ELEMENTS = %w[
156
- base
157
- link
158
- meta
159
- hr
160
- br
161
- img
162
- embed
163
- param
164
- area
165
- col
166
- input
167
- ]
168
- end
169
-
170
- module HashedWhiteList
171
- # turn each of the whitelist arrays into a hash for faster lookup
172
- WhiteList.constants.each do |constant|
173
- next unless WhiteList.module_eval("#{constant}").is_a?(Array)
174
- module_eval <<-CODE
175
- #{constant} = {}
176
- WhiteList::#{constant}.each { |c| #{constant}[c] = true ; #{constant}[c.downcase] = true }
177
- CODE
178
- end
179
- end
180
- end