lotus-utils 0.3.5 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fbc9ff8be5d774d9e30695445d43995a66cbf21d
4
- data.tar.gz: 4f5ccb96067b17d2c6f77b5b0d5eb5ccd2def30f
3
+ metadata.gz: 64d787540b5ba2d1447be4ca787761da010c880b
4
+ data.tar.gz: a8346f11b65e9500e7a260b53d077bf344e2c00b
5
5
  SHA512:
6
- metadata.gz: d254f1b09c52c91198986bc340f17b9cfe35146bcbeed9464c4f99d390e45eb8dbe8e11cb6672b42716e7136294a00300fe0cb207636a8e0e24cbc80f1d2eeef
7
- data.tar.gz: 18bcf46df8ab2013f334d962ed859919df1e6711562b89c2a8eec959141551f901b975c2dfdd084f00b894f53cd53f460dca6881add6d63c9cb176a13de3c35c
6
+ metadata.gz: b41336646647d1afd5280133b27fab60b88c267693aaef772ad43cc8fb460366be9ee80e04b16ff636a0b92382cb06287df8b2472815a87a3a6ed1d74731f1de
7
+ data.tar.gz: d7da2e60819e1c78de89b3b880ad7793475224b6647e5420d0446856aecff7dcdfa55fb621fc66ec1e1b0db16538d8052f8c9aef69336e98b66eca8482173e62
data/CHANGELOG.md CHANGED
@@ -1,6 +1,12 @@
1
1
  # Lotus::Utils
2
2
  Ruby core extentions and class utilities for Lotus
3
3
 
4
+ ## v0.4.0 - 2015-03-23
5
+ ### Added
6
+ - [Luca Guidi] Introduced `Lotus::Utils::Escape`. It implements OWASP/ESAPI suggestions for HTML, HTML attribute and URL escape utilities.
7
+ - [Luca Guidi] Introduced `Lotus::Utils::String#dasherize`
8
+ - [Luca Guidi] Introduced `Lotus::Utils::String#titleize`
9
+
4
10
  ## v0.3.5 - 2015-03-12
5
11
  ### Added
6
12
  - [Luca Guidi] Introduced `Lotus::Interactor`
@@ -59,7 +59,7 @@ module Lotus
59
59
  # attributes.get(23) # => "foo"
60
60
  # attributes.get('23') # => "foo"
61
61
  # attributes[23] # => "foo"
62
- # attributes[23] # => "foo"
62
+ # attributes['23'] # => "foo"
63
63
  #
64
64
  # attributes.get(:unknown) # => nil
65
65
  # attributes.get('unknown') # => nil
@@ -0,0 +1,574 @@
1
+ module Lotus
2
+ module Utils
3
+ # HTML escape utilities
4
+ #
5
+ # Based on OWASP research and OWASP ESAPI code
6
+ #
7
+ # @since 0.4.0
8
+ #
9
+ # @see https://www.owasp.org
10
+ # @see https://www.owasp.org/index.php/Cross-site_Scripting_%28XSS%29
11
+ # @see https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet
12
+ # @see https://www.owasp.org/index.php/ESAPI
13
+ # @see https://github.com/ESAPI/esapi-java-legacy
14
+ module Escape
15
+ # Hex base for base 10 integer conversion
16
+ #
17
+ # @since 0.4.0
18
+ # @api private
19
+ #
20
+ # @see http://www.ruby-doc.org/core/Fixnum.html#method-i-to_s
21
+ HEX_BASE = 16
22
+
23
+ # Limit for non printable chars
24
+ #
25
+ # @since 0.4.0
26
+ # @api private
27
+ LOW_HEX_CODE_LIMIT = 0xff
28
+
29
+ # Replacement hex for non printable characters
30
+ #
31
+ # @since 0.4.0
32
+ # @api private
33
+ REPLACEMENT_HEX = "fffd".freeze
34
+
35
+ # Low hex codes lookup table
36
+ #
37
+ # @since 0.4.0
38
+ # @api private
39
+ HEX_CODES = (0..255).each_with_object({}) do |c, codes|
40
+ if (c >= 0x30 && c <= 0x39) || (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A)
41
+ codes[c] = nil
42
+ else
43
+ codes[c] = c.to_s(HEX_BASE)
44
+ end
45
+ end.freeze
46
+
47
+ # Non printable chars
48
+ #
49
+ # This is a Hash instead of a Set, to make lookup faster.
50
+ #
51
+ # @since 0.4.0
52
+ # @api private
53
+ #
54
+ # @see https://gist.github.com/jodosha/ac5dd54416de744b9600
55
+ NON_PRINTABLE_CHARS = {
56
+ 0x0 => true, 0x1 => true, 0x2 => true, 0x3 => true, 0x4 => true,
57
+ 0x5 => true, 0x6 => true, 0x7 => true, 0x8 => true, 0x11 => true,
58
+ 0x12 => true, 0x14 => true, 0x15 => true, 0x16 => true, 0x17 => true,
59
+ 0x18 => true, 0x19 => true, 0x1a => true, 0x1b => true, 0x1c => true,
60
+ 0x1d => true, 0x1e => true, 0x1f => true, 0x7f => true, 0x80 => true,
61
+ 0x81 => true, 0x82 => true, 0x83 => true, 0x84 => true, 0x85 => true,
62
+ 0x86 => true, 0x87 => true, 0x88 => true, 0x89 => true, 0x8a => true,
63
+ 0x8b => true, 0x8c => true, 0x8d => true, 0x8e => true, 0x8f => true,
64
+ 0x90 => true, 0x91 => true, 0x92 => true, 0x93 => true, 0x94 => true,
65
+ 0x95 => true, 0x96 => true, 0x97 => true, 0x98 => true, 0x99 => true,
66
+ 0x9a => true, 0x9b => true, 0x9c => true, 0x9d => true, 0x9e => true,
67
+ 0x9f => true
68
+ }.freeze
69
+
70
+ # Lookup table for HTML escape
71
+ #
72
+ # @since 0.4.0
73
+ # @api private
74
+ #
75
+ # @see Lotus::Utils::Escape.html
76
+ HTML_CHARS = {
77
+ '&' => '&amp;',
78
+ '<' => '&lt;',
79
+ '>' => '&gt;',
80
+ '"' => '&quot;',
81
+ "'" => '&apos;',
82
+ '/' => '&#x2F;'
83
+ }.freeze
84
+
85
+ # Lookup table for safe chars for HTML attributes.
86
+ #
87
+ # This is a Hash instead of a Set, to make lookup faster.
88
+ #
89
+ # @since 0.4.0
90
+ # @api private
91
+ #
92
+ # @see Lookup::Utils::Escape.html_attribute
93
+ # @see https://gist.github.com/jodosha/ac5dd54416de744b9600
94
+ HTML_ATTRIBUTE_SAFE_CHARS = {
95
+ ',' => true, '.' => true, '-' => true, '_' => true
96
+ }.freeze
97
+
98
+ # Lookup table for HTML attribute escape
99
+ #
100
+ # @since 0.4.0
101
+ # @api private
102
+ #
103
+ # @see Lotus::Utils::Escape.html_attribute
104
+ HTML_ENTITIES = {
105
+ 34 => 'quot', # quotation mark
106
+ 38 => 'amp', # ampersand
107
+ 60 => 'lt', # less-than sign
108
+ 62 => 'gt', # greater-than sign
109
+ 160 => 'nbsp', # no-break space
110
+ 161 => 'iexcl', # inverted exclamation mark
111
+ 162 => 'cent', # cent sign
112
+ 163 => 'pound', # pound sign
113
+ 164 => 'curren', # currency sign
114
+ 165 => 'yen', # yen sign
115
+ 166 => 'brvbar', # broken bar
116
+ 167 => 'sect', # section sign
117
+ 168 => 'uml', # diaeresis
118
+ 169 => 'copy', # copyright sign
119
+ 170 => 'ordf', # feminine ordinal indicator
120
+ 171 => 'laquo', # left-pointing double angle quotation mark
121
+ 172 => 'not', # not sign
122
+ 173 => 'shy', # soft hyphen
123
+ 174 => 'reg', # registered sign
124
+ 175 => 'macr', # macron
125
+ 176 => 'deg', # degree sign
126
+ 177 => 'plusmn', # plus-minus sign
127
+ 178 => 'sup2', # superscript two
128
+ 179 => 'sup3', # superscript three
129
+ 180 => 'acute', # acute accent
130
+ 181 => 'micro', # micro sign
131
+ 182 => 'para', # pilcrow sign
132
+ 183 => 'middot', # middle dot
133
+ 184 => 'cedil', # cedilla
134
+ 185 => 'sup1', # superscript one
135
+ 186 => 'ordm', # masculine ordinal indicator
136
+ 187 => 'raquo', # right-pointing double angle quotation mark
137
+ 188 => 'frac14', # vulgar fraction one quarter
138
+ 189 => 'frac12', # vulgar fraction one half
139
+ 190 => 'frac34', # vulgar fraction three quarters
140
+ 191 => 'iquest', # inverted question mark
141
+ 192 => 'Agrave', # Latin capital letter a with grave
142
+ 193 => 'Aacute', # Latin capital letter a with acute
143
+ 194 => 'Acirc', # Latin capital letter a with circumflex
144
+ 195 => 'Atilde', # Latin capital letter a with tilde
145
+ 196 => 'Auml', # Latin capital letter a with diaeresis
146
+ 197 => 'Aring', # Latin capital letter a with ring above
147
+ 198 => 'AElig', # Latin capital letter ae
148
+ 199 => 'Ccedil', # Latin capital letter c with cedilla
149
+ 200 => 'Egrave', # Latin capital letter e with grave
150
+ 201 => 'Eacute', # Latin capital letter e with acute
151
+ 202 => 'Ecirc', # Latin capital letter e with circumflex
152
+ 203 => 'Euml', # Latin capital letter e with diaeresis
153
+ 204 => 'Igrave', # Latin capital letter i with grave
154
+ 205 => 'Iacute', # Latin capital letter i with acute
155
+ 206 => 'Icirc', # Latin capital letter i with circumflex
156
+ 207 => 'Iuml', # Latin capital letter i with diaeresis
157
+ 208 => 'ETH', # Latin capital letter eth
158
+ 209 => 'Ntilde', # Latin capital letter n with tilde
159
+ 210 => 'Ograve', # Latin capital letter o with grave
160
+ 211 => 'Oacute', # Latin capital letter o with acute
161
+ 212 => 'Ocirc', # Latin capital letter o with circumflex
162
+ 213 => 'Otilde', # Latin capital letter o with tilde
163
+ 214 => 'Ouml', # Latin capital letter o with diaeresis
164
+ 215 => 'times', # multiplication sign
165
+ 216 => 'Oslash', # Latin capital letter o with stroke
166
+ 217 => 'Ugrave', # Latin capital letter u with grave
167
+ 218 => 'Uacute', # Latin capital letter u with acute
168
+ 219 => 'Ucirc', # Latin capital letter u with circumflex
169
+ 220 => 'Uuml', # Latin capital letter u with diaeresis
170
+ 221 => 'Yacute', # Latin capital letter y with acute
171
+ 222 => 'THORN', # Latin capital letter thorn
172
+ 223 => 'szlig', # Latin small letter sharp sXCOMMAX German Eszett
173
+ 224 => 'agrave', # Latin small letter a with grave
174
+ 225 => 'aacute', # Latin small letter a with acute
175
+ 226 => 'acirc', # Latin small letter a with circumflex
176
+ 227 => 'atilde', # Latin small letter a with tilde
177
+ 228 => 'auml', # Latin small letter a with diaeresis
178
+ 229 => 'aring', # Latin small letter a with ring above
179
+ 230 => 'aelig', # Latin lowercase ligature ae
180
+ 231 => 'ccedil', # Latin small letter c with cedilla
181
+ 232 => 'egrave', # Latin small letter e with grave
182
+ 233 => 'eacute', # Latin small letter e with acute
183
+ 234 => 'ecirc', # Latin small letter e with circumflex
184
+ 235 => 'euml', # Latin small letter e with diaeresis
185
+ 236 => 'igrave', # Latin small letter i with grave
186
+ 237 => 'iacute', # Latin small letter i with acute
187
+ 238 => 'icirc', # Latin small letter i with circumflex
188
+ 239 => 'iuml', # Latin small letter i with diaeresis
189
+ 240 => 'eth', # Latin small letter eth
190
+ 241 => 'ntilde', # Latin small letter n with tilde
191
+ 242 => 'ograve', # Latin small letter o with grave
192
+ 243 => 'oacute', # Latin small letter o with acute
193
+ 244 => 'ocirc', # Latin small letter o with circumflex
194
+ 245 => 'otilde', # Latin small letter o with tilde
195
+ 246 => 'ouml', # Latin small letter o with diaeresis
196
+ 247 => 'divide', # division sign
197
+ 248 => 'oslash', # Latin small letter o with stroke
198
+ 249 => 'ugrave', # Latin small letter u with grave
199
+ 250 => 'uacute', # Latin small letter u with acute
200
+ 251 => 'ucirc', # Latin small letter u with circumflex
201
+ 252 => 'uuml', # Latin small letter u with diaeresis
202
+ 253 => 'yacute', # Latin small letter y with acute
203
+ 254 => 'thorn', # Latin small letter thorn
204
+ 255 => 'yuml', # Latin small letter y with diaeresis
205
+ 338 => 'OElig', # Latin capital ligature oe
206
+ 339 => 'oelig', # Latin small ligature oe
207
+ 352 => 'Scaron', # Latin capital letter s with caron
208
+ 353 => 'scaron', # Latin small letter s with caron
209
+ 376 => 'Yuml', # Latin capital letter y with diaeresis
210
+ 402 => 'fnof', # Latin small letter f with hook
211
+ 710 => 'circ', # modifier letter circumflex accent
212
+ 732 => 'tilde', # small tilde
213
+ 913 => 'Alpha', # Greek capital letter alpha
214
+ 914 => 'Beta', # Greek capital letter beta
215
+ 915 => 'Gamma', # Greek capital letter gamma
216
+ 916 => 'Delta', # Greek capital letter delta
217
+ 917 => 'Epsilon', # Greek capital letter epsilon
218
+ 918 => 'Zeta', # Greek capital letter zeta
219
+ 919 => 'Eta', # Greek capital letter eta
220
+ 920 => 'Theta', # Greek capital letter theta
221
+ 921 => 'Iota', # Greek capital letter iota
222
+ 922 => 'Kappa', # Greek capital letter kappa
223
+ 923 => 'Lambda', # Greek capital letter lambda
224
+ 924 => 'Mu', # Greek capital letter mu
225
+ 925 => 'Nu', # Greek capital letter nu
226
+ 926 => 'Xi', # Greek capital letter xi
227
+ 927 => 'Omicron', # Greek capital letter omicron
228
+ 928 => 'Pi', # Greek capital letter pi
229
+ 929 => 'Rho', # Greek capital letter rho
230
+ 931 => 'Sigma', # Greek capital letter sigma
231
+ 932 => 'Tau', # Greek capital letter tau
232
+ 933 => 'Upsilon', # Greek capital letter upsilon
233
+ 934 => 'Phi', # Greek capital letter phi
234
+ 935 => 'Chi', # Greek capital letter chi
235
+ 936 => 'Psi', # Greek capital letter psi
236
+ 937 => 'Omega', # Greek capital letter omega
237
+ 945 => 'alpha', # Greek small letter alpha
238
+ 946 => 'beta', # Greek small letter beta
239
+ 947 => 'gamma', # Greek small letter gamma
240
+ 948 => 'delta', # Greek small letter delta
241
+ 949 => 'epsilon', # Greek small letter epsilon
242
+ 950 => 'zeta', # Greek small letter zeta
243
+ 951 => 'eta', # Greek small letter eta
244
+ 952 => 'theta', # Greek small letter theta
245
+ 953 => 'iota', # Greek small letter iota
246
+ 954 => 'kappa', # Greek small letter kappa
247
+ 955 => 'lambda', # Greek small letter lambda
248
+ 956 => 'mu', # Greek small letter mu
249
+ 957 => 'nu', # Greek small letter nu
250
+ 958 => 'xi', # Greek small letter xi
251
+ 959 => 'omicron', # Greek small letter omicron
252
+ 960 => 'pi', # Greek small letter pi
253
+ 961 => 'rho', # Greek small letter rho
254
+ 962 => 'sigmaf', # Greek small letter final sigma
255
+ 963 => 'sigma', # Greek small letter sigma
256
+ 964 => 'tau', # Greek small letter tau
257
+ 965 => 'upsilon', # Greek small letter upsilon
258
+ 966 => 'phi', # Greek small letter phi
259
+ 967 => 'chi', # Greek small letter chi
260
+ 968 => 'psi', # Greek small letter psi
261
+ 969 => 'omega', # Greek small letter omega
262
+ 977 => 'thetasym', # Greek theta symbol
263
+ 978 => 'upsih', # Greek upsilon with hook symbol
264
+ 982 => 'piv', # Greek pi symbol
265
+ 8194 => 'ensp', # en space
266
+ 8195 => 'emsp', # em space
267
+ 8201 => 'thinsp', # thin space
268
+ 8204 => 'zwnj', # zero width non-joiner
269
+ 8205 => 'zwj', # zero width joiner
270
+ 8206 => 'lrm', # left-to-right mark
271
+ 8207 => 'rlm', # right-to-left mark
272
+ 8211 => 'ndash', # en dash
273
+ 8212 => 'mdash', # em dash
274
+ 8216 => 'lsquo', # left single quotation mark
275
+ 8217 => 'rsquo', # right single quotation mark
276
+ 8218 => 'sbquo', # single low-9 quotation mark
277
+ 8220 => 'ldquo', # left double quotation mark
278
+ 8221 => 'rdquo', # right double quotation mark
279
+ 8222 => 'bdquo', # double low-9 quotation mark
280
+ 8224 => 'dagger', # dagger
281
+ 8225 => 'Dagger', # double dagger
282
+ 8226 => 'bull', # bullet
283
+ 8230 => 'hellip', # horizontal ellipsis
284
+ 8240 => 'permil', # per mille sign
285
+ 8242 => 'prime', # prime
286
+ 8243 => 'Prime', # double prime
287
+ 8249 => 'lsaquo', # single left-pointing angle quotation mark
288
+ 8250 => 'rsaquo', # single right-pointing angle quotation mark
289
+ 8254 => 'oline', # overline
290
+ 8260 => 'frasl', # fraction slash
291
+ 8364 => 'euro', # euro sign
292
+ 8465 => 'image', # black-letter capital i
293
+ 8472 => 'weierp', # script capital pXCOMMAX Weierstrass p
294
+ 8476 => 'real', # black-letter capital r
295
+ 8482 => 'trade', # trademark sign
296
+ 8501 => 'alefsym', # alef symbol
297
+ 8592 => 'larr', # leftwards arrow
298
+ 8593 => 'uarr', # upwards arrow
299
+ 8594 => 'rarr', # rightwards arrow
300
+ 8595 => 'darr', # downwards arrow
301
+ 8596 => 'harr', # left right arrow
302
+ 8629 => 'crarr', # downwards arrow with corner leftwards
303
+ 8656 => 'lArr', # leftwards double arrow
304
+ 8657 => 'uArr', # upwards double arrow
305
+ 8658 => 'rArr', # rightwards double arrow
306
+ 8659 => 'dArr', # downwards double arrow
307
+ 8660 => 'hArr', # left right double arrow
308
+ 8704 => 'forall', # for all
309
+ 8706 => 'part', # partial differential
310
+ 8707 => 'exist', # there exists
311
+ 8709 => 'empty', # empty set
312
+ 8711 => 'nabla', # nabla
313
+ 8712 => 'isin', # element of
314
+ 8713 => 'notin', # not an element of
315
+ 8715 => 'ni', # contains as member
316
+ 8719 => 'prod', # n-ary product
317
+ 8721 => 'sum', # n-ary summation
318
+ 8722 => 'minus', # minus sign
319
+ 8727 => 'lowast', # asterisk operator
320
+ 8730 => 'radic', # square root
321
+ 8733 => 'prop', # proportional to
322
+ 8734 => 'infin', # infinity
323
+ 8736 => 'ang', # angle
324
+ 8743 => 'and', # logical and
325
+ 8744 => 'or', # logical or
326
+ 8745 => 'cap', # intersection
327
+ 8746 => 'cup', # union
328
+ 8747 => 'int', # integral
329
+ 8756 => 'there4', # therefore
330
+ 8764 => 'sim', # tilde operator
331
+ 8773 => 'cong', # congruent to
332
+ 8776 => 'asymp', # almost equal to
333
+ 8800 => 'ne', # not equal to
334
+ 8801 => 'equiv', # identical toXCOMMAX equivalent to
335
+ 8804 => 'le', # less-than or equal to
336
+ 8805 => 'ge', # greater-than or equal to
337
+ 8834 => 'sub', # subset of
338
+ 8835 => 'sup', # superset of
339
+ 8836 => 'nsub', # not a subset of
340
+ 8838 => 'sube', # subset of or equal to
341
+ 8839 => 'supe', # superset of or equal to
342
+ 8853 => 'oplus', # circled plus
343
+ 8855 => 'otimes', # circled times
344
+ 8869 => 'perp', # up tack
345
+ 8901 => 'sdot', # dot operator
346
+ 8968 => 'lceil', # left ceiling
347
+ 8969 => 'rceil', # right ceiling
348
+ 8970 => 'lfloor', # left floor
349
+ 8971 => 'rfloor', # right floor
350
+ 9001 => 'lang', # left-pointing angle bracket
351
+ 9002 => 'rang', # right-pointing angle bracket
352
+ 9674 => 'loz', # lozenge
353
+ 9824 => 'spades', # black spade suit
354
+ 9827 => 'clubs', # black club suit
355
+ 9829 => 'hearts', # black heart suit
356
+ 9830 => 'diams', # black diamond suit
357
+ }.freeze
358
+
359
+ # Allowed URL schemes
360
+ #
361
+ # @since 0.4.0
362
+ # @api private
363
+ #
364
+ # @see Lotus::Utils::Escape.url
365
+ DEFAULT_URL_SCHEMES = ['http', 'https', 'mailto'].freeze
366
+
367
+ # The output of an escape.
368
+ #
369
+ # It's marked with this special class for two reasons:
370
+ #
371
+ # * Don't double escape the same string (this is for `Lotus::Helpers` compatibility)
372
+ # * Leave open the possibility to developers to mark a string as safe with an higher API (eg. `#raw` in `Lotus::View` or `Lotus::Helpers`)
373
+ #
374
+ # @since 0.4.0
375
+ # @api private
376
+ class SafeString < ::String
377
+ # @return [SafeString] the duped string
378
+ #
379
+ # @since 0.4.0
380
+ # @api private
381
+ #
382
+ # @see http://www.ruby-doc.org/core/String.html#method-i-to_s
383
+ def to_s
384
+ dup
385
+ end
386
+
387
+ # Encode the string the given encoding
388
+ #
389
+ # @return [SafeString] an encoded SafeString
390
+ #
391
+ # @since 0.4.0
392
+ # @api private
393
+ #
394
+ # @see http://www.ruby-doc.org/core/String.html#method-i-encode
395
+ def encode(*args)
396
+ self.class.new super
397
+ end
398
+ end
399
+
400
+ # Escape HTML contents
401
+ #
402
+ # This MUST be used only for tag contents.
403
+ # Please use `html_attribute` for escaping HTML attributes.
404
+ #
405
+ # @param input [String] the input
406
+ #
407
+ # @return [String] the escaped string
408
+ #
409
+ # @since 0.4.0
410
+ #
411
+ # @see https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet OWASP XSS Cheat Sheet Rule #1
412
+ #
413
+ # @example Good practice
414
+ # <div><%= Lotus::Utils::Escape.html('<script>alert(1);</script>') %></div>
415
+ # <div>&lt;script&gt;alert(1);&lt;&#x2F;script&gt;</div>
416
+ #
417
+ # @example Bad practice
418
+ # # WRONG Use Escape.html_attribute
419
+ # <a title="<%= Lotus::Utils::Escape.html('...') %>">link</a>
420
+ def self.html(input)
421
+ input = encode(input)
422
+ return input if input.is_a?(SafeString)
423
+
424
+ result = SafeString.new
425
+
426
+ input.chars do |chr|
427
+ result << HTML_CHARS.fetch(chr, chr)
428
+ end
429
+
430
+ result
431
+ end
432
+
433
+ # Escape HTML attributes
434
+ #
435
+ # This can be used both for HTML attributes and contents.
436
+ # Please note that this is more computational expensive.
437
+ # If you need to escape only HTML contents, please use `.html`.
438
+ #
439
+ # @param input [String] the input
440
+ #
441
+ # @return [String] the escaped string
442
+ #
443
+ # @since 0.4.0
444
+ #
445
+ # @see https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet OWASP XSS Cheat Sheet Rule #2
446
+ #
447
+ # @example Good practice
448
+ # <a title="<%= Lotus::Utils::Escape.html_attribute('...') %>">link</a>
449
+ #
450
+ # @example Good but expensive practice
451
+ # # Alternatively you can use Escape.html
452
+ # <p><%= Lotus::Utils::Escape.html_attribute('...') %></p>
453
+ def self.html_attribute(input)
454
+ input = encode(input)
455
+ return input if input.is_a?(SafeString)
456
+
457
+ result = SafeString.new
458
+
459
+ input.chars do |chr|
460
+ result << encode_char(chr, HTML_ATTRIBUTE_SAFE_CHARS)
461
+ end
462
+
463
+ result
464
+ end
465
+
466
+ # Escape URL for HTML attributes (href, src, etc..).
467
+ #
468
+ # It extracts from the given input the first valid URL that matches the
469
+ # whitelisted schemes (default: http, https and mailto).
470
+ #
471
+ # It's possible to pass a second optional argument to specify different
472
+ # schemes.
473
+ #
474
+ # @param input [String] the input
475
+ # @param schemes [Array<String>] an array of whitelisted schemes
476
+ #
477
+ # @return [String] the escaped string
478
+ #
479
+ # @since 0.4.0
480
+ #
481
+ # @see Lotus::Utils::Escape::DEFAULT_URL_SCHEMES
482
+ # @see http://www.ruby-doc.org/stdlib/libdoc/uri/rdoc/URI.html#method-c-extract
483
+ #
484
+ # @example Basic usage
485
+ # <%
486
+ # good_input = "http://lotusrb.org"
487
+ # evil_input = "javascript:alert('xss')"
488
+ #
489
+ # escaped_good_input = Lotus::Utils::Escape.url(good_input) # => "http://lotusrb.org"
490
+ # escaped_evil_input = Lotus::Utils::Escape.url(evil_input) # => ""
491
+ # %>
492
+ #
493
+ # <a href="<%= escaped_good_input %>">personal website</a>
494
+ # <a href="<%= escaped_evil_input %>">personal website</a>
495
+ #
496
+ # @example Custom scheme
497
+ # <%
498
+ # schemes = ['ftp', 'ftps']
499
+ #
500
+ # accepted = "ftps://ftp.example.org"
501
+ # rejected = "http://www.example.org"
502
+ #
503
+ # escaped_accepted = Lotus::Utils::Escape.url(accepted) # => "ftps://ftp.example.org"
504
+ # escaped_rejected = Lotus::Utils::Escape.url(rejected) # => ""
505
+ # %>
506
+ #
507
+ # <a href="<%= escaped_accepted %>">FTP</a>
508
+ # <a href="<%= escaped_rejected %>">FTP</a>
509
+ def self.url(input, schemes = DEFAULT_URL_SCHEMES)
510
+ input = encode(input)
511
+ return input if input.is_a?(SafeString)
512
+
513
+ SafeString.new(
514
+ URI.extract(
515
+ URI.decode(input),
516
+ schemes
517
+ ).first.to_s
518
+ )
519
+ end
520
+
521
+ private
522
+ # Encode the given string into UTF-8
523
+ #
524
+ # @param input [String] the input
525
+ #
526
+ # @return [String] an UTF-8 encoded string
527
+ #
528
+ # @since 0.4.0
529
+ # @api private
530
+ def self.encode(input)
531
+ input.to_s.encode(Encoding::UTF_8)
532
+ end
533
+
534
+ # Encode the given UTF-8 char.
535
+ #
536
+ # @param char [String] an UTF-8 char
537
+ # @param safe_chars [Hash] a table of safe chars
538
+ #
539
+ # @return [String] an HTML encoded string
540
+ #
541
+ # @since 0.4.0
542
+ # @api private
543
+ def self.encode_char(char, safe_chars = {})
544
+ return char if safe_chars[char]
545
+
546
+ code = char.ord
547
+ hex = hex_for_non_alphanumeric_code(code)
548
+ return char if hex.nil?
549
+
550
+ if NON_PRINTABLE_CHARS[code]
551
+ hex = REPLACEMENT_HEX
552
+ end
553
+
554
+ if entity = HTML_ENTITIES[code]
555
+ "&#{ entity };"
556
+ else
557
+ "&#x#{ hex };"
558
+ end
559
+ end
560
+
561
+ # Transforms the given char code
562
+ #
563
+ # @since 0.4.0
564
+ # @api private
565
+ def self.hex_for_non_alphanumeric_code(input)
566
+ if input < LOW_HEX_CODE_LIMIT
567
+ HEX_CODES[input]
568
+ else
569
+ input.to_s(HEX_BASE)
570
+ end
571
+ end
572
+ end
573
+ end
574
+ end
@@ -32,7 +32,7 @@ module Lotus
32
32
  #
33
33
  # @since 0.3.0
34
34
  # @api private
35
- UNDERSCORE_SEPARATOR = "/".freeze
35
+ UNDERSCORE_SEPARATOR = '/'.freeze
36
36
 
37
37
  # gsub second parameter used in #underscore
38
38
  #
@@ -40,6 +40,24 @@ module Lotus
40
40
  # @api private
41
41
  UNDERSCORE_DIVISION_TARGET = '\1_\2'.freeze
42
42
 
43
+ # Separator for #titleize
44
+ #
45
+ # @since 0.4.0
46
+ # @api private
47
+ TITLEIZE_SEPARATOR = ' '.freeze
48
+
49
+ # Separator for #dasherize
50
+ #
51
+ # @since 0.4.0
52
+ # @api private
53
+ DASHERIZE_SEPARATOR = '-'.freeze
54
+
55
+ # Regexp for #classify
56
+ #
57
+ # @since 0.3.4
58
+ # @api private
59
+ CLASSIFY_WORD_SEPARATOR = /#{CLASSIFY_SEPARATOR}|#{NAMESPACE_SEPARATOR}|#{UNDERSCORE_SEPARATOR}/
60
+
43
61
  # Initialize the string
44
62
  #
45
63
  # @param string [::String, Symbol] the value we want to initialize
@@ -51,6 +69,21 @@ module Lotus
51
69
  @string = string.to_s
52
70
  end
53
71
 
72
+ # Return a titleized version of the string
73
+ #
74
+ # @return [Lotus::Utils::String] the transformed string
75
+ #
76
+ # @since 0.4.0
77
+ #
78
+ # @example
79
+ # require 'lotus/utils/string'
80
+ #
81
+ # string = Lotus::Utils::String.new 'lotus utils'
82
+ # string.titleize # => "Lotus Utils"
83
+ def titleize
84
+ self.class.new underscore.split(CLASSIFY_SEPARATOR).map(&:capitalize).join(TITLEIZE_SEPARATOR)
85
+ end
86
+
54
87
  # Return a CamelCase version of the string
55
88
  #
56
89
  # @return [String] the transformed string
@@ -63,7 +96,14 @@ module Lotus
63
96
  # string = Lotus::Utils::String.new 'lotus_utils'
64
97
  # string.classify # => 'LotusUtils'
65
98
  def classify
66
- self.class.new split(CLASSIFY_SEPARATOR).map(&:capitalize).join
99
+ words = split(CLASSIFY_WORD_SEPARATOR).map(&:capitalize)
100
+ delimiters = scan(CLASSIFY_WORD_SEPARATOR)
101
+
102
+ delimiters.map! do |delimiter|
103
+ delimiter == CLASSIFY_SEPARATOR ? nil : NAMESPACE_SEPARATOR
104
+ end
105
+
106
+ self.class.new words.zip(delimiters).compact.join
67
107
  end
68
108
 
69
109
  # Return a downcased and underscore separated version of the string
@@ -84,10 +124,32 @@ module Lotus
84
124
  new_string = gsub(NAMESPACE_SEPARATOR, UNDERSCORE_SEPARATOR)
85
125
  new_string.gsub!(/([A-Z\d]+)([A-Z][a-z])/, UNDERSCORE_DIVISION_TARGET)
86
126
  new_string.gsub!(/([a-z\d])([A-Z])/, UNDERSCORE_DIVISION_TARGET)
127
+ new_string.gsub!(/[[:space:]]|\-/, UNDERSCORE_DIVISION_TARGET)
87
128
  new_string.downcase!
88
129
  self.class.new new_string
89
130
  end
90
131
 
132
+ # Return a downcased and dash separated version of the string
133
+ #
134
+ # @return [Lotus::Utils::String] the transformed string
135
+ #
136
+ # @since 0.4.0
137
+ #
138
+ # @example
139
+ # require 'lotus/utils/string'
140
+ #
141
+ # string = Lotus::Utils::String.new 'Lotus Utils'
142
+ # string.dasherize # => 'lotus-utils'
143
+ #
144
+ # string = Lotus::Utils::String.new 'lotus_utils'
145
+ # string.dasherize # => 'lotus-utils'
146
+ #
147
+ # string = Lotus::Utils::String.new 'LotusUtils'
148
+ # string.dasherize # => "lotus-utils"
149
+ def dasherize
150
+ self.class.new underscore.split(CLASSIFY_SEPARATOR).join(DASHERIZE_SEPARATOR)
151
+ end
152
+
91
153
  # Return the string without the Ruby namespace of the class
92
154
  #
93
155
  # @return [String] the transformed string
@@ -3,6 +3,6 @@ module Lotus
3
3
  # Defines the version
4
4
  #
5
5
  # @since 0.1.0
6
- VERSION = '0.3.5'.freeze
6
+ VERSION = '0.4.0'.freeze
7
7
  end
8
8
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lotus-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luca Guidi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-03-12 00:00:00.000000000 Z
12
+ date: 2015-03-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -72,6 +72,7 @@ files:
72
72
  - lib/lotus/utils/class.rb
73
73
  - lib/lotus/utils/class_attribute.rb
74
74
  - lib/lotus/utils/deprecation.rb
75
+ - lib/lotus/utils/escape.rb
75
76
  - lib/lotus/utils/hash.rb
76
77
  - lib/lotus/utils/io.rb
77
78
  - lib/lotus/utils/kernel.rb