lotus-utils 0.3.5 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fbc9ff8be5d774d9e30695445d43995a66cbf21d
4
- data.tar.gz: 4f5ccb96067b17d2c6f77b5b0d5eb5ccd2def30f
3
+ metadata.gz: 64d787540b5ba2d1447be4ca787761da010c880b
4
+ data.tar.gz: a8346f11b65e9500e7a260b53d077bf344e2c00b
5
5
  SHA512:
6
- metadata.gz: d254f1b09c52c91198986bc340f17b9cfe35146bcbeed9464c4f99d390e45eb8dbe8e11cb6672b42716e7136294a00300fe0cb207636a8e0e24cbc80f1d2eeef
7
- data.tar.gz: 18bcf46df8ab2013f334d962ed859919df1e6711562b89c2a8eec959141551f901b975c2dfdd084f00b894f53cd53f460dca6881add6d63c9cb176a13de3c35c
6
+ metadata.gz: b41336646647d1afd5280133b27fab60b88c267693aaef772ad43cc8fb460366be9ee80e04b16ff636a0b92382cb06287df8b2472815a87a3a6ed1d74731f1de
7
+ data.tar.gz: d7da2e60819e1c78de89b3b880ad7793475224b6647e5420d0446856aecff7dcdfa55fb621fc66ec1e1b0db16538d8052f8c9aef69336e98b66eca8482173e62
data/CHANGELOG.md CHANGED
@@ -1,6 +1,12 @@
1
1
  # Lotus::Utils
2
2
  Ruby core extentions and class utilities for Lotus
3
3
 
4
+ ## v0.4.0 - 2015-03-23
5
+ ### Added
6
+ - [Luca Guidi] Introduced `Lotus::Utils::Escape`. It implements OWASP/ESAPI suggestions for HTML, HTML attribute and URL escape utilities.
7
+ - [Luca Guidi] Introduced `Lotus::Utils::String#dasherize`
8
+ - [Luca Guidi] Introduced `Lotus::Utils::String#titleize`
9
+
4
10
  ## v0.3.5 - 2015-03-12
5
11
  ### Added
6
12
  - [Luca Guidi] Introduced `Lotus::Interactor`
@@ -59,7 +59,7 @@ module Lotus
59
59
  # attributes.get(23) # => "foo"
60
60
  # attributes.get('23') # => "foo"
61
61
  # attributes[23] # => "foo"
62
- # attributes[23] # => "foo"
62
+ # attributes['23'] # => "foo"
63
63
  #
64
64
  # attributes.get(:unknown) # => nil
65
65
  # attributes.get('unknown') # => nil
@@ -0,0 +1,574 @@
1
+ module Lotus
2
+ module Utils
3
+ # HTML escape utilities
4
+ #
5
+ # Based on OWASP research and OWASP ESAPI code
6
+ #
7
+ # @since 0.4.0
8
+ #
9
+ # @see https://www.owasp.org
10
+ # @see https://www.owasp.org/index.php/Cross-site_Scripting_%28XSS%29
11
+ # @see https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet
12
+ # @see https://www.owasp.org/index.php/ESAPI
13
+ # @see https://github.com/ESAPI/esapi-java-legacy
14
+ module Escape
15
+ # Hex base for base 10 integer conversion
16
+ #
17
+ # @since 0.4.0
18
+ # @api private
19
+ #
20
+ # @see http://www.ruby-doc.org/core/Fixnum.html#method-i-to_s
21
+ HEX_BASE = 16
22
+
23
+ # Limit for non printable chars
24
+ #
25
+ # @since 0.4.0
26
+ # @api private
27
+ LOW_HEX_CODE_LIMIT = 0xff
28
+
29
+ # Replacement hex for non printable characters
30
+ #
31
+ # @since 0.4.0
32
+ # @api private
33
+ REPLACEMENT_HEX = "fffd".freeze
34
+
35
+ # Low hex codes lookup table
36
+ #
37
+ # @since 0.4.0
38
+ # @api private
39
+ HEX_CODES = (0..255).each_with_object({}) do |c, codes|
40
+ if (c >= 0x30 && c <= 0x39) || (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A)
41
+ codes[c] = nil
42
+ else
43
+ codes[c] = c.to_s(HEX_BASE)
44
+ end
45
+ end.freeze
46
+
47
+ # Non printable chars
48
+ #
49
+ # This is a Hash instead of a Set, to make lookup faster.
50
+ #
51
+ # @since 0.4.0
52
+ # @api private
53
+ #
54
+ # @see https://gist.github.com/jodosha/ac5dd54416de744b9600
55
+ NON_PRINTABLE_CHARS = {
56
+ 0x0 => true, 0x1 => true, 0x2 => true, 0x3 => true, 0x4 => true,
57
+ 0x5 => true, 0x6 => true, 0x7 => true, 0x8 => true, 0x11 => true,
58
+ 0x12 => true, 0x14 => true, 0x15 => true, 0x16 => true, 0x17 => true,
59
+ 0x18 => true, 0x19 => true, 0x1a => true, 0x1b => true, 0x1c => true,
60
+ 0x1d => true, 0x1e => true, 0x1f => true, 0x7f => true, 0x80 => true,
61
+ 0x81 => true, 0x82 => true, 0x83 => true, 0x84 => true, 0x85 => true,
62
+ 0x86 => true, 0x87 => true, 0x88 => true, 0x89 => true, 0x8a => true,
63
+ 0x8b => true, 0x8c => true, 0x8d => true, 0x8e => true, 0x8f => true,
64
+ 0x90 => true, 0x91 => true, 0x92 => true, 0x93 => true, 0x94 => true,
65
+ 0x95 => true, 0x96 => true, 0x97 => true, 0x98 => true, 0x99 => true,
66
+ 0x9a => true, 0x9b => true, 0x9c => true, 0x9d => true, 0x9e => true,
67
+ 0x9f => true
68
+ }.freeze
69
+
70
+ # Lookup table for HTML escape
71
+ #
72
+ # @since 0.4.0
73
+ # @api private
74
+ #
75
+ # @see Lotus::Utils::Escape.html
76
+ HTML_CHARS = {
77
+ '&' => '&amp;',
78
+ '<' => '&lt;',
79
+ '>' => '&gt;',
80
+ '"' => '&quot;',
81
+ "'" => '&apos;',
82
+ '/' => '&#x2F;'
83
+ }.freeze
84
+
85
+ # Lookup table for safe chars for HTML attributes.
86
+ #
87
+ # This is a Hash instead of a Set, to make lookup faster.
88
+ #
89
+ # @since 0.4.0
90
+ # @api private
91
+ #
92
+ # @see Lookup::Utils::Escape.html_attribute
93
+ # @see https://gist.github.com/jodosha/ac5dd54416de744b9600
94
+ HTML_ATTRIBUTE_SAFE_CHARS = {
95
+ ',' => true, '.' => true, '-' => true, '_' => true
96
+ }.freeze
97
+
98
+ # Lookup table for HTML attribute escape
99
+ #
100
+ # @since 0.4.0
101
+ # @api private
102
+ #
103
+ # @see Lotus::Utils::Escape.html_attribute
104
+ HTML_ENTITIES = {
105
+ 34 => 'quot', # quotation mark
106
+ 38 => 'amp', # ampersand
107
+ 60 => 'lt', # less-than sign
108
+ 62 => 'gt', # greater-than sign
109
+ 160 => 'nbsp', # no-break space
110
+ 161 => 'iexcl', # inverted exclamation mark
111
+ 162 => 'cent', # cent sign
112
+ 163 => 'pound', # pound sign
113
+ 164 => 'curren', # currency sign
114
+ 165 => 'yen', # yen sign
115
+ 166 => 'brvbar', # broken bar
116
+ 167 => 'sect', # section sign
117
+ 168 => 'uml', # diaeresis
118
+ 169 => 'copy', # copyright sign
119
+ 170 => 'ordf', # feminine ordinal indicator
120
+ 171 => 'laquo', # left-pointing double angle quotation mark
121
+ 172 => 'not', # not sign
122
+ 173 => 'shy', # soft hyphen
123
+ 174 => 'reg', # registered sign
124
+ 175 => 'macr', # macron
125
+ 176 => 'deg', # degree sign
126
+ 177 => 'plusmn', # plus-minus sign
127
+ 178 => 'sup2', # superscript two
128
+ 179 => 'sup3', # superscript three
129
+ 180 => 'acute', # acute accent
130
+ 181 => 'micro', # micro sign
131
+ 182 => 'para', # pilcrow sign
132
+ 183 => 'middot', # middle dot
133
+ 184 => 'cedil', # cedilla
134
+ 185 => 'sup1', # superscript one
135
+ 186 => 'ordm', # masculine ordinal indicator
136
+ 187 => 'raquo', # right-pointing double angle quotation mark
137
+ 188 => 'frac14', # vulgar fraction one quarter
138
+ 189 => 'frac12', # vulgar fraction one half
139
+ 190 => 'frac34', # vulgar fraction three quarters
140
+ 191 => 'iquest', # inverted question mark
141
+ 192 => 'Agrave', # Latin capital letter a with grave
142
+ 193 => 'Aacute', # Latin capital letter a with acute
143
+ 194 => 'Acirc', # Latin capital letter a with circumflex
144
+ 195 => 'Atilde', # Latin capital letter a with tilde
145
+ 196 => 'Auml', # Latin capital letter a with diaeresis
146
+ 197 => 'Aring', # Latin capital letter a with ring above
147
+ 198 => 'AElig', # Latin capital letter ae
148
+ 199 => 'Ccedil', # Latin capital letter c with cedilla
149
+ 200 => 'Egrave', # Latin capital letter e with grave
150
+ 201 => 'Eacute', # Latin capital letter e with acute
151
+ 202 => 'Ecirc', # Latin capital letter e with circumflex
152
+ 203 => 'Euml', # Latin capital letter e with diaeresis
153
+ 204 => 'Igrave', # Latin capital letter i with grave
154
+ 205 => 'Iacute', # Latin capital letter i with acute
155
+ 206 => 'Icirc', # Latin capital letter i with circumflex
156
+ 207 => 'Iuml', # Latin capital letter i with diaeresis
157
+ 208 => 'ETH', # Latin capital letter eth
158
+ 209 => 'Ntilde', # Latin capital letter n with tilde
159
+ 210 => 'Ograve', # Latin capital letter o with grave
160
+ 211 => 'Oacute', # Latin capital letter o with acute
161
+ 212 => 'Ocirc', # Latin capital letter o with circumflex
162
+ 213 => 'Otilde', # Latin capital letter o with tilde
163
+ 214 => 'Ouml', # Latin capital letter o with diaeresis
164
+ 215 => 'times', # multiplication sign
165
+ 216 => 'Oslash', # Latin capital letter o with stroke
166
+ 217 => 'Ugrave', # Latin capital letter u with grave
167
+ 218 => 'Uacute', # Latin capital letter u with acute
168
+ 219 => 'Ucirc', # Latin capital letter u with circumflex
169
+ 220 => 'Uuml', # Latin capital letter u with diaeresis
170
+ 221 => 'Yacute', # Latin capital letter y with acute
171
+ 222 => 'THORN', # Latin capital letter thorn
172
+ 223 => 'szlig', # Latin small letter sharp sXCOMMAX German Eszett
173
+ 224 => 'agrave', # Latin small letter a with grave
174
+ 225 => 'aacute', # Latin small letter a with acute
175
+ 226 => 'acirc', # Latin small letter a with circumflex
176
+ 227 => 'atilde', # Latin small letter a with tilde
177
+ 228 => 'auml', # Latin small letter a with diaeresis
178
+ 229 => 'aring', # Latin small letter a with ring above
179
+ 230 => 'aelig', # Latin lowercase ligature ae
180
+ 231 => 'ccedil', # Latin small letter c with cedilla
181
+ 232 => 'egrave', # Latin small letter e with grave
182
+ 233 => 'eacute', # Latin small letter e with acute
183
+ 234 => 'ecirc', # Latin small letter e with circumflex
184
+ 235 => 'euml', # Latin small letter e with diaeresis
185
+ 236 => 'igrave', # Latin small letter i with grave
186
+ 237 => 'iacute', # Latin small letter i with acute
187
+ 238 => 'icirc', # Latin small letter i with circumflex
188
+ 239 => 'iuml', # Latin small letter i with diaeresis
189
+ 240 => 'eth', # Latin small letter eth
190
+ 241 => 'ntilde', # Latin small letter n with tilde
191
+ 242 => 'ograve', # Latin small letter o with grave
192
+ 243 => 'oacute', # Latin small letter o with acute
193
+ 244 => 'ocirc', # Latin small letter o with circumflex
194
+ 245 => 'otilde', # Latin small letter o with tilde
195
+ 246 => 'ouml', # Latin small letter o with diaeresis
196
+ 247 => 'divide', # division sign
197
+ 248 => 'oslash', # Latin small letter o with stroke
198
+ 249 => 'ugrave', # Latin small letter u with grave
199
+ 250 => 'uacute', # Latin small letter u with acute
200
+ 251 => 'ucirc', # Latin small letter u with circumflex
201
+ 252 => 'uuml', # Latin small letter u with diaeresis
202
+ 253 => 'yacute', # Latin small letter y with acute
203
+ 254 => 'thorn', # Latin small letter thorn
204
+ 255 => 'yuml', # Latin small letter y with diaeresis
205
+ 338 => 'OElig', # Latin capital ligature oe
206
+ 339 => 'oelig', # Latin small ligature oe
207
+ 352 => 'Scaron', # Latin capital letter s with caron
208
+ 353 => 'scaron', # Latin small letter s with caron
209
+ 376 => 'Yuml', # Latin capital letter y with diaeresis
210
+ 402 => 'fnof', # Latin small letter f with hook
211
+ 710 => 'circ', # modifier letter circumflex accent
212
+ 732 => 'tilde', # small tilde
213
+ 913 => 'Alpha', # Greek capital letter alpha
214
+ 914 => 'Beta', # Greek capital letter beta
215
+ 915 => 'Gamma', # Greek capital letter gamma
216
+ 916 => 'Delta', # Greek capital letter delta
217
+ 917 => 'Epsilon', # Greek capital letter epsilon
218
+ 918 => 'Zeta', # Greek capital letter zeta
219
+ 919 => 'Eta', # Greek capital letter eta
220
+ 920 => 'Theta', # Greek capital letter theta
221
+ 921 => 'Iota', # Greek capital letter iota
222
+ 922 => 'Kappa', # Greek capital letter kappa
223
+ 923 => 'Lambda', # Greek capital letter lambda
224
+ 924 => 'Mu', # Greek capital letter mu
225
+ 925 => 'Nu', # Greek capital letter nu
226
+ 926 => 'Xi', # Greek capital letter xi
227
+ 927 => 'Omicron', # Greek capital letter omicron
228
+ 928 => 'Pi', # Greek capital letter pi
229
+ 929 => 'Rho', # Greek capital letter rho
230
+ 931 => 'Sigma', # Greek capital letter sigma
231
+ 932 => 'Tau', # Greek capital letter tau
232
+ 933 => 'Upsilon', # Greek capital letter upsilon
233
+ 934 => 'Phi', # Greek capital letter phi
234
+ 935 => 'Chi', # Greek capital letter chi
235
+ 936 => 'Psi', # Greek capital letter psi
236
+ 937 => 'Omega', # Greek capital letter omega
237
+ 945 => 'alpha', # Greek small letter alpha
238
+ 946 => 'beta', # Greek small letter beta
239
+ 947 => 'gamma', # Greek small letter gamma
240
+ 948 => 'delta', # Greek small letter delta
241
+ 949 => 'epsilon', # Greek small letter epsilon
242
+ 950 => 'zeta', # Greek small letter zeta
243
+ 951 => 'eta', # Greek small letter eta
244
+ 952 => 'theta', # Greek small letter theta
245
+ 953 => 'iota', # Greek small letter iota
246
+ 954 => 'kappa', # Greek small letter kappa
247
+ 955 => 'lambda', # Greek small letter lambda
248
+ 956 => 'mu', # Greek small letter mu
249
+ 957 => 'nu', # Greek small letter nu
250
+ 958 => 'xi', # Greek small letter xi
251
+ 959 => 'omicron', # Greek small letter omicron
252
+ 960 => 'pi', # Greek small letter pi
253
+ 961 => 'rho', # Greek small letter rho
254
+ 962 => 'sigmaf', # Greek small letter final sigma
255
+ 963 => 'sigma', # Greek small letter sigma
256
+ 964 => 'tau', # Greek small letter tau
257
+ 965 => 'upsilon', # Greek small letter upsilon
258
+ 966 => 'phi', # Greek small letter phi
259
+ 967 => 'chi', # Greek small letter chi
260
+ 968 => 'psi', # Greek small letter psi
261
+ 969 => 'omega', # Greek small letter omega
262
+ 977 => 'thetasym', # Greek theta symbol
263
+ 978 => 'upsih', # Greek upsilon with hook symbol
264
+ 982 => 'piv', # Greek pi symbol
265
+ 8194 => 'ensp', # en space
266
+ 8195 => 'emsp', # em space
267
+ 8201 => 'thinsp', # thin space
268
+ 8204 => 'zwnj', # zero width non-joiner
269
+ 8205 => 'zwj', # zero width joiner
270
+ 8206 => 'lrm', # left-to-right mark
271
+ 8207 => 'rlm', # right-to-left mark
272
+ 8211 => 'ndash', # en dash
273
+ 8212 => 'mdash', # em dash
274
+ 8216 => 'lsquo', # left single quotation mark
275
+ 8217 => 'rsquo', # right single quotation mark
276
+ 8218 => 'sbquo', # single low-9 quotation mark
277
+ 8220 => 'ldquo', # left double quotation mark
278
+ 8221 => 'rdquo', # right double quotation mark
279
+ 8222 => 'bdquo', # double low-9 quotation mark
280
+ 8224 => 'dagger', # dagger
281
+ 8225 => 'Dagger', # double dagger
282
+ 8226 => 'bull', # bullet
283
+ 8230 => 'hellip', # horizontal ellipsis
284
+ 8240 => 'permil', # per mille sign
285
+ 8242 => 'prime', # prime
286
+ 8243 => 'Prime', # double prime
287
+ 8249 => 'lsaquo', # single left-pointing angle quotation mark
288
+ 8250 => 'rsaquo', # single right-pointing angle quotation mark
289
+ 8254 => 'oline', # overline
290
+ 8260 => 'frasl', # fraction slash
291
+ 8364 => 'euro', # euro sign
292
+ 8465 => 'image', # black-letter capital i
293
+ 8472 => 'weierp', # script capital pXCOMMAX Weierstrass p
294
+ 8476 => 'real', # black-letter capital r
295
+ 8482 => 'trade', # trademark sign
296
+ 8501 => 'alefsym', # alef symbol
297
+ 8592 => 'larr', # leftwards arrow
298
+ 8593 => 'uarr', # upwards arrow
299
+ 8594 => 'rarr', # rightwards arrow
300
+ 8595 => 'darr', # downwards arrow
301
+ 8596 => 'harr', # left right arrow
302
+ 8629 => 'crarr', # downwards arrow with corner leftwards
303
+ 8656 => 'lArr', # leftwards double arrow
304
+ 8657 => 'uArr', # upwards double arrow
305
+ 8658 => 'rArr', # rightwards double arrow
306
+ 8659 => 'dArr', # downwards double arrow
307
+ 8660 => 'hArr', # left right double arrow
308
+ 8704 => 'forall', # for all
309
+ 8706 => 'part', # partial differential
310
+ 8707 => 'exist', # there exists
311
+ 8709 => 'empty', # empty set
312
+ 8711 => 'nabla', # nabla
313
+ 8712 => 'isin', # element of
314
+ 8713 => 'notin', # not an element of
315
+ 8715 => 'ni', # contains as member
316
+ 8719 => 'prod', # n-ary product
317
+ 8721 => 'sum', # n-ary summation
318
+ 8722 => 'minus', # minus sign
319
+ 8727 => 'lowast', # asterisk operator
320
+ 8730 => 'radic', # square root
321
+ 8733 => 'prop', # proportional to
322
+ 8734 => 'infin', # infinity
323
+ 8736 => 'ang', # angle
324
+ 8743 => 'and', # logical and
325
+ 8744 => 'or', # logical or
326
+ 8745 => 'cap', # intersection
327
+ 8746 => 'cup', # union
328
+ 8747 => 'int', # integral
329
+ 8756 => 'there4', # therefore
330
+ 8764 => 'sim', # tilde operator
331
+ 8773 => 'cong', # congruent to
332
+ 8776 => 'asymp', # almost equal to
333
+ 8800 => 'ne', # not equal to
334
+ 8801 => 'equiv', # identical toXCOMMAX equivalent to
335
+ 8804 => 'le', # less-than or equal to
336
+ 8805 => 'ge', # greater-than or equal to
337
+ 8834 => 'sub', # subset of
338
+ 8835 => 'sup', # superset of
339
+ 8836 => 'nsub', # not a subset of
340
+ 8838 => 'sube', # subset of or equal to
341
+ 8839 => 'supe', # superset of or equal to
342
+ 8853 => 'oplus', # circled plus
343
+ 8855 => 'otimes', # circled times
344
+ 8869 => 'perp', # up tack
345
+ 8901 => 'sdot', # dot operator
346
+ 8968 => 'lceil', # left ceiling
347
+ 8969 => 'rceil', # right ceiling
348
+ 8970 => 'lfloor', # left floor
349
+ 8971 => 'rfloor', # right floor
350
+ 9001 => 'lang', # left-pointing angle bracket
351
+ 9002 => 'rang', # right-pointing angle bracket
352
+ 9674 => 'loz', # lozenge
353
+ 9824 => 'spades', # black spade suit
354
+ 9827 => 'clubs', # black club suit
355
+ 9829 => 'hearts', # black heart suit
356
+ 9830 => 'diams', # black diamond suit
357
+ }.freeze
358
+
359
+ # Allowed URL schemes
360
+ #
361
+ # @since 0.4.0
362
+ # @api private
363
+ #
364
+ # @see Lotus::Utils::Escape.url
365
+ DEFAULT_URL_SCHEMES = ['http', 'https', 'mailto'].freeze
366
+
367
+ # The output of an escape.
368
+ #
369
+ # It's marked with this special class for two reasons:
370
+ #
371
+ # * Don't double escape the same string (this is for `Lotus::Helpers` compatibility)
372
+ # * Leave open the possibility to developers to mark a string as safe with an higher API (eg. `#raw` in `Lotus::View` or `Lotus::Helpers`)
373
+ #
374
+ # @since 0.4.0
375
+ # @api private
376
+ class SafeString < ::String
377
+ # @return [SafeString] the duped string
378
+ #
379
+ # @since 0.4.0
380
+ # @api private
381
+ #
382
+ # @see http://www.ruby-doc.org/core/String.html#method-i-to_s
383
+ def to_s
384
+ dup
385
+ end
386
+
387
+ # Encode the string the given encoding
388
+ #
389
+ # @return [SafeString] an encoded SafeString
390
+ #
391
+ # @since 0.4.0
392
+ # @api private
393
+ #
394
+ # @see http://www.ruby-doc.org/core/String.html#method-i-encode
395
+ def encode(*args)
396
+ self.class.new super
397
+ end
398
+ end
399
+
400
+ # Escape HTML contents
401
+ #
402
+ # This MUST be used only for tag contents.
403
+ # Please use `html_attribute` for escaping HTML attributes.
404
+ #
405
+ # @param input [String] the input
406
+ #
407
+ # @return [String] the escaped string
408
+ #
409
+ # @since 0.4.0
410
+ #
411
+ # @see https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet OWASP XSS Cheat Sheet Rule #1
412
+ #
413
+ # @example Good practice
414
+ # <div><%= Lotus::Utils::Escape.html('<script>alert(1);</script>') %></div>
415
+ # <div>&lt;script&gt;alert(1);&lt;&#x2F;script&gt;</div>
416
+ #
417
+ # @example Bad practice
418
+ # # WRONG Use Escape.html_attribute
419
+ # <a title="<%= Lotus::Utils::Escape.html('...') %>">link</a>
420
+ def self.html(input)
421
+ input = encode(input)
422
+ return input if input.is_a?(SafeString)
423
+
424
+ result = SafeString.new
425
+
426
+ input.chars do |chr|
427
+ result << HTML_CHARS.fetch(chr, chr)
428
+ end
429
+
430
+ result
431
+ end
432
+
433
+ # Escape HTML attributes
434
+ #
435
+ # This can be used both for HTML attributes and contents.
436
+ # Please note that this is more computational expensive.
437
+ # If you need to escape only HTML contents, please use `.html`.
438
+ #
439
+ # @param input [String] the input
440
+ #
441
+ # @return [String] the escaped string
442
+ #
443
+ # @since 0.4.0
444
+ #
445
+ # @see https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet OWASP XSS Cheat Sheet Rule #2
446
+ #
447
+ # @example Good practice
448
+ # <a title="<%= Lotus::Utils::Escape.html_attribute('...') %>">link</a>
449
+ #
450
+ # @example Good but expensive practice
451
+ # # Alternatively you can use Escape.html
452
+ # <p><%= Lotus::Utils::Escape.html_attribute('...') %></p>
453
+ def self.html_attribute(input)
454
+ input = encode(input)
455
+ return input if input.is_a?(SafeString)
456
+
457
+ result = SafeString.new
458
+
459
+ input.chars do |chr|
460
+ result << encode_char(chr, HTML_ATTRIBUTE_SAFE_CHARS)
461
+ end
462
+
463
+ result
464
+ end
465
+
466
+ # Escape URL for HTML attributes (href, src, etc..).
467
+ #
468
+ # It extracts from the given input the first valid URL that matches the
469
+ # whitelisted schemes (default: http, https and mailto).
470
+ #
471
+ # It's possible to pass a second optional argument to specify different
472
+ # schemes.
473
+ #
474
+ # @param input [String] the input
475
+ # @param schemes [Array<String>] an array of whitelisted schemes
476
+ #
477
+ # @return [String] the escaped string
478
+ #
479
+ # @since 0.4.0
480
+ #
481
+ # @see Lotus::Utils::Escape::DEFAULT_URL_SCHEMES
482
+ # @see http://www.ruby-doc.org/stdlib/libdoc/uri/rdoc/URI.html#method-c-extract
483
+ #
484
+ # @example Basic usage
485
+ # <%
486
+ # good_input = "http://lotusrb.org"
487
+ # evil_input = "javascript:alert('xss')"
488
+ #
489
+ # escaped_good_input = Lotus::Utils::Escape.url(good_input) # => "http://lotusrb.org"
490
+ # escaped_evil_input = Lotus::Utils::Escape.url(evil_input) # => ""
491
+ # %>
492
+ #
493
+ # <a href="<%= escaped_good_input %>">personal website</a>
494
+ # <a href="<%= escaped_evil_input %>">personal website</a>
495
+ #
496
+ # @example Custom scheme
497
+ # <%
498
+ # schemes = ['ftp', 'ftps']
499
+ #
500
+ # accepted = "ftps://ftp.example.org"
501
+ # rejected = "http://www.example.org"
502
+ #
503
+ # escaped_accepted = Lotus::Utils::Escape.url(accepted) # => "ftps://ftp.example.org"
504
+ # escaped_rejected = Lotus::Utils::Escape.url(rejected) # => ""
505
+ # %>
506
+ #
507
+ # <a href="<%= escaped_accepted %>">FTP</a>
508
+ # <a href="<%= escaped_rejected %>">FTP</a>
509
+ def self.url(input, schemes = DEFAULT_URL_SCHEMES)
510
+ input = encode(input)
511
+ return input if input.is_a?(SafeString)
512
+
513
+ SafeString.new(
514
+ URI.extract(
515
+ URI.decode(input),
516
+ schemes
517
+ ).first.to_s
518
+ )
519
+ end
520
+
521
+ private
522
+ # Encode the given string into UTF-8
523
+ #
524
+ # @param input [String] the input
525
+ #
526
+ # @return [String] an UTF-8 encoded string
527
+ #
528
+ # @since 0.4.0
529
+ # @api private
530
+ def self.encode(input)
531
+ input.to_s.encode(Encoding::UTF_8)
532
+ end
533
+
534
+ # Encode the given UTF-8 char.
535
+ #
536
+ # @param char [String] an UTF-8 char
537
+ # @param safe_chars [Hash] a table of safe chars
538
+ #
539
+ # @return [String] an HTML encoded string
540
+ #
541
+ # @since 0.4.0
542
+ # @api private
543
+ def self.encode_char(char, safe_chars = {})
544
+ return char if safe_chars[char]
545
+
546
+ code = char.ord
547
+ hex = hex_for_non_alphanumeric_code(code)
548
+ return char if hex.nil?
549
+
550
+ if NON_PRINTABLE_CHARS[code]
551
+ hex = REPLACEMENT_HEX
552
+ end
553
+
554
+ if entity = HTML_ENTITIES[code]
555
+ "&#{ entity };"
556
+ else
557
+ "&#x#{ hex };"
558
+ end
559
+ end
560
+
561
+ # Transforms the given char code
562
+ #
563
+ # @since 0.4.0
564
+ # @api private
565
+ def self.hex_for_non_alphanumeric_code(input)
566
+ if input < LOW_HEX_CODE_LIMIT
567
+ HEX_CODES[input]
568
+ else
569
+ input.to_s(HEX_BASE)
570
+ end
571
+ end
572
+ end
573
+ end
574
+ end
@@ -32,7 +32,7 @@ module Lotus
32
32
  #
33
33
  # @since 0.3.0
34
34
  # @api private
35
- UNDERSCORE_SEPARATOR = "/".freeze
35
+ UNDERSCORE_SEPARATOR = '/'.freeze
36
36
 
37
37
  # gsub second parameter used in #underscore
38
38
  #
@@ -40,6 +40,24 @@ module Lotus
40
40
  # @api private
41
41
  UNDERSCORE_DIVISION_TARGET = '\1_\2'.freeze
42
42
 
43
+ # Separator for #titleize
44
+ #
45
+ # @since 0.4.0
46
+ # @api private
47
+ TITLEIZE_SEPARATOR = ' '.freeze
48
+
49
+ # Separator for #dasherize
50
+ #
51
+ # @since 0.4.0
52
+ # @api private
53
+ DASHERIZE_SEPARATOR = '-'.freeze
54
+
55
+ # Regexp for #classify
56
+ #
57
+ # @since 0.3.4
58
+ # @api private
59
+ CLASSIFY_WORD_SEPARATOR = /#{CLASSIFY_SEPARATOR}|#{NAMESPACE_SEPARATOR}|#{UNDERSCORE_SEPARATOR}/
60
+
43
61
  # Initialize the string
44
62
  #
45
63
  # @param string [::String, Symbol] the value we want to initialize
@@ -51,6 +69,21 @@ module Lotus
51
69
  @string = string.to_s
52
70
  end
53
71
 
72
+ # Return a titleized version of the string
73
+ #
74
+ # @return [Lotus::Utils::String] the transformed string
75
+ #
76
+ # @since 0.4.0
77
+ #
78
+ # @example
79
+ # require 'lotus/utils/string'
80
+ #
81
+ # string = Lotus::Utils::String.new 'lotus utils'
82
+ # string.titleize # => "Lotus Utils"
83
+ def titleize
84
+ self.class.new underscore.split(CLASSIFY_SEPARATOR).map(&:capitalize).join(TITLEIZE_SEPARATOR)
85
+ end
86
+
54
87
  # Return a CamelCase version of the string
55
88
  #
56
89
  # @return [String] the transformed string
@@ -63,7 +96,14 @@ module Lotus
63
96
  # string = Lotus::Utils::String.new 'lotus_utils'
64
97
  # string.classify # => 'LotusUtils'
65
98
  def classify
66
- self.class.new split(CLASSIFY_SEPARATOR).map(&:capitalize).join
99
+ words = split(CLASSIFY_WORD_SEPARATOR).map(&:capitalize)
100
+ delimiters = scan(CLASSIFY_WORD_SEPARATOR)
101
+
102
+ delimiters.map! do |delimiter|
103
+ delimiter == CLASSIFY_SEPARATOR ? nil : NAMESPACE_SEPARATOR
104
+ end
105
+
106
+ self.class.new words.zip(delimiters).compact.join
67
107
  end
68
108
 
69
109
  # Return a downcased and underscore separated version of the string
@@ -84,10 +124,32 @@ module Lotus
84
124
  new_string = gsub(NAMESPACE_SEPARATOR, UNDERSCORE_SEPARATOR)
85
125
  new_string.gsub!(/([A-Z\d]+)([A-Z][a-z])/, UNDERSCORE_DIVISION_TARGET)
86
126
  new_string.gsub!(/([a-z\d])([A-Z])/, UNDERSCORE_DIVISION_TARGET)
127
+ new_string.gsub!(/[[:space:]]|\-/, UNDERSCORE_DIVISION_TARGET)
87
128
  new_string.downcase!
88
129
  self.class.new new_string
89
130
  end
90
131
 
132
+ # Return a downcased and dash separated version of the string
133
+ #
134
+ # @return [Lotus::Utils::String] the transformed string
135
+ #
136
+ # @since 0.4.0
137
+ #
138
+ # @example
139
+ # require 'lotus/utils/string'
140
+ #
141
+ # string = Lotus::Utils::String.new 'Lotus Utils'
142
+ # string.dasherize # => 'lotus-utils'
143
+ #
144
+ # string = Lotus::Utils::String.new 'lotus_utils'
145
+ # string.dasherize # => 'lotus-utils'
146
+ #
147
+ # string = Lotus::Utils::String.new 'LotusUtils'
148
+ # string.dasherize # => "lotus-utils"
149
+ def dasherize
150
+ self.class.new underscore.split(CLASSIFY_SEPARATOR).join(DASHERIZE_SEPARATOR)
151
+ end
152
+
91
153
  # Return the string without the Ruby namespace of the class
92
154
  #
93
155
  # @return [String] the transformed string
@@ -3,6 +3,6 @@ module Lotus
3
3
  # Defines the version
4
4
  #
5
5
  # @since 0.1.0
6
- VERSION = '0.3.5'.freeze
6
+ VERSION = '0.4.0'.freeze
7
7
  end
8
8
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lotus-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luca Guidi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-03-12 00:00:00.000000000 Z
12
+ date: 2015-03-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -72,6 +72,7 @@ files:
72
72
  - lib/lotus/utils/class.rb
73
73
  - lib/lotus/utils/class_attribute.rb
74
74
  - lib/lotus/utils/deprecation.rb
75
+ - lib/lotus/utils/escape.rb
75
76
  - lib/lotus/utils/hash.rb
76
77
  - lib/lotus/utils/io.rb
77
78
  - lib/lotus/utils/kernel.rb