owasp-esapi-ruby 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. data/.document +5 -0
  2. data/AUTHORS +5 -0
  3. data/ChangeLog +69 -0
  4. data/ISSUES +0 -0
  5. data/LICENSE +24 -0
  6. data/README +51 -0
  7. data/Rakefile +63 -0
  8. data/VERSION +1 -0
  9. data/lib/codec/base_codec.rb +99 -0
  10. data/lib/codec/css_codec.rb +101 -0
  11. data/lib/codec/encoder.rb +330 -0
  12. data/lib/codec/html_codec.rb +424 -0
  13. data/lib/codec/javascript_codec.rb +119 -0
  14. data/lib/codec/mysql_codec.rb +131 -0
  15. data/lib/codec/oracle_codec.rb +46 -0
  16. data/lib/codec/os_codec.rb +78 -0
  17. data/lib/codec/percent_codec.rb +53 -0
  18. data/lib/codec/pushable_string.rb +114 -0
  19. data/lib/codec/vbscript_codec.rb +64 -0
  20. data/lib/codec/xml_codec.rb +173 -0
  21. data/lib/esapi.rb +68 -0
  22. data/lib/exceptions.rb +37 -0
  23. data/lib/executor.rb +20 -0
  24. data/lib/owasp-esapi-ruby.rb +13 -0
  25. data/lib/sanitizer/xss.rb +59 -0
  26. data/lib/validator/base_rule.rb +90 -0
  27. data/lib/validator/date_rule.rb +92 -0
  28. data/lib/validator/email.rb +29 -0
  29. data/lib/validator/float_rule.rb +76 -0
  30. data/lib/validator/generic_validator.rb +26 -0
  31. data/lib/validator/integer_rule.rb +61 -0
  32. data/lib/validator/string_rule.rb +146 -0
  33. data/lib/validator/validator_error_list.rb +48 -0
  34. data/lib/validator/zipcode.rb +27 -0
  35. data/spec/codec/css_codec_spec.rb +61 -0
  36. data/spec/codec/html_codec_spec.rb +87 -0
  37. data/spec/codec/javascript_codec_spec.rb +45 -0
  38. data/spec/codec/mysql_codec_spec.rb +44 -0
  39. data/spec/codec/oracle_codec_spec.rb +23 -0
  40. data/spec/codec/os_codec_spec.rb +51 -0
  41. data/spec/codec/percent_codec_spec.rb +34 -0
  42. data/spec/codec/vbcript_codec_spec.rb +23 -0
  43. data/spec/codec/xml_codec_spec.rb +83 -0
  44. data/spec/owasp_esapi_encoder_spec.rb +226 -0
  45. data/spec/owasp_esapi_executor_spec.rb +9 -0
  46. data/spec/owasp_esapi_ruby_email_validator_spec.rb +39 -0
  47. data/spec/owasp_esapi_ruby_xss_sanitizer_spec.rb +66 -0
  48. data/spec/owasp_esapi_ruby_zipcode_validator_spec.rb +42 -0
  49. data/spec/spec_helper.rb +10 -0
  50. data/spec/validator/base_rule_spec.rb +29 -0
  51. data/spec/validator/date_rule_spec.rb +40 -0
  52. data/spec/validator/float_rule_spec.rb +31 -0
  53. data/spec/validator/integer_rule_spec.rb +51 -0
  54. data/spec/validator/string_rule_spec.rb +103 -0
  55. data/spec/validator_skeleton.rb +150 -0
  56. metadata +235 -0
@@ -0,0 +1,424 @@
1
+ # Implementation of the Codec interface for HTML entity encoding.
2
+ module Owasp
3
+ module Esapi
4
+ module Codec
5
+ class HtmlCodec < BaseCodec
6
+ def initialize
7
+ @longest_key = 0
8
+ @lookup_map = {}
9
+ ENTITY_MAP.each_key do |k|
10
+ if k.size > @longest_key
11
+ @longest_key += 1
12
+ end
13
+ @lookup_map[k.downcase] = k
14
+ end
15
+ end
16
+
17
+ # Encodes a Character for safe use in an HTML entity field.
18
+ def encode_char(immune, input)
19
+ c = input
20
+ return input if immune.include?(input)
21
+ # check for alpha numeric
22
+ hex = hex(input)
23
+ return input if hex.nil?
24
+ # check to see if we need to replace an entity
25
+ if ( c.ord <= 0x1f and c != '\t' and c != '\n' and c != '\r' ) || ( c.ord >= 0x7f and c.ord <= 0x9f )
26
+ hex = REPLACEMENT_HEX
27
+ c = REPLACEMENT_CHAR
28
+ end
29
+ # find the entity name if its possible
30
+ ENTITY_MAP.each_pair do |k,v|
31
+ return "&#{k};" if v == c.ord
32
+ end
33
+ #encode as a hex value
34
+ "&#x#{hex};"
35
+ end
36
+
37
+ # Returns the decoded version of the character starting at index, or
38
+ # nil if no decoding is possible.
39
+ # Formats all are legal both with and without semi-colon, upper/lower case:
40
+ # * &#dddd;
41
+ # * &#xhhhh;
42
+ # * &name;
43
+ def decode_char(input)
44
+ # mark the input
45
+ input.mark
46
+ first = input.next
47
+ if first.nil?
48
+ input.reset
49
+ return nil
50
+ end
51
+
52
+ # this isnt an encoded char
53
+ if first != '&'
54
+ input.reset
55
+ return nil
56
+ end
57
+
58
+ # test for numeric encodings
59
+ second = input.next
60
+ if second.nil?
61
+ input.reset
62
+ return nil
63
+ end
64
+ if second == '#'
65
+ c = numeric_entity(input)
66
+ return c unless c.nil?
67
+ elsif second =~ /[a-zA-Z]/
68
+ input.push(second)
69
+ c = named_entity(input)
70
+ return c unless c.nil?
71
+ end
72
+ input.reset
73
+ return nil
74
+ end
75
+
76
+ # check to see if the input is a numeric entity
77
+ def numeric_entity(input) #:nodoc:
78
+ first = input.peek
79
+ return nil if first.nil?
80
+ if first.downcase.eql?("x")
81
+ input.next
82
+ return parse_hex(input)
83
+ end
84
+ return parse_number(input)
85
+ end
86
+
87
+ # check to see if the input is a named entity
88
+ def named_entity(input)#:nodoc:
89
+ possible = ''
90
+ len = min(input.remainder.size,@longest_key)
91
+ if input.peek?("&")
92
+ input.next
93
+ end
94
+ found_key = false
95
+ last_possible = ''
96
+ for i in 0..len do
97
+ possible << input.next if input.next?
98
+ # we have to find the longest match
99
+ # so we dont find sub values
100
+ if @lookup_map[possible.downcase]
101
+ last_possible = @lookup_map[possible.downcase]
102
+ end
103
+ end
104
+ # no matches found return
105
+ return nil if last_possible.empty?
106
+ # reset the input and plow through
107
+ input.reset
108
+ for i in 0..last_possible.size
109
+ input.next
110
+ end
111
+ possible = ENTITY_MAP[last_possible]
112
+ input.next if input.peek?(';')
113
+ possible.chr(Encoding::UTF_8)
114
+ end
115
+ # parse a number int he stream
116
+ def parse_number(input)#:nodoc:
117
+ result = ''
118
+ while input.next?
119
+ c = input.peek
120
+ if c =~ /\d/
121
+ result << c
122
+ input.next
123
+ elsif c == ';'
124
+ input.next
125
+ break;
126
+ else
127
+ break;
128
+ end
129
+ end
130
+
131
+ begin
132
+ i = result.to_i
133
+ return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
134
+ rescue Exception => e
135
+ end
136
+ nil
137
+ end
138
+ # parse a hex value in the stream
139
+ def parse_hex(input)#:nodoc:
140
+ result = ''
141
+ while input.next?
142
+ c = input.peek
143
+ if "0123456789ABCDEFabcdef".include?(c)
144
+ result << c
145
+ input.next
146
+ elsif c == ";"
147
+ input.next
148
+ break
149
+ else
150
+ break
151
+ end
152
+ end
153
+ begin
154
+ i = result.hex
155
+ return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
156
+ rescue Exception => e
157
+ end
158
+ nil
159
+ end
160
+
161
+ # Replacement const hex
162
+ REPLACEMENT_HEX = "fffd"
163
+ # Replacement const char
164
+ REPLACEMENT_CHAR = '\ufffd'
165
+
166
+ # Map of entities to numeric codes
167
+ ENTITY_MAP = {
168
+ 'Aacute' => 193,
169
+ 'aacute' => 225,
170
+ 'Acirc' => 194,
171
+ 'acirc' => 226,
172
+ 'acute' => 180,
173
+ 'AElig' => 198,
174
+ 'aelig' => 230,
175
+ 'Agrave' => 192,
176
+ 'agrave' => 224,
177
+ 'alefsym' => 8501,
178
+ 'Alpha' => 913,
179
+ 'alpha' => 945,
180
+ 'amp' => 38,
181
+ 'and' => 8743,
182
+ 'ang' => 8736,
183
+ 'Aring' => 197,
184
+ 'aring' => 229,
185
+ 'asymp' => 8776,
186
+ 'Atilde' => 195,
187
+ 'atilde' => 227,
188
+ 'Auml' => 196,
189
+ 'auml' => 228,
190
+ 'bdquo' => 8222,
191
+ 'Beta' => 914,
192
+ 'beta' => 946,
193
+ 'brvbar' => 166,
194
+ 'bull' => 8226,
195
+ 'cap' => 8745,
196
+ 'Ccedil' => 199,
197
+ 'ccedil' => 231,
198
+ 'cedil' => 184,
199
+ 'cent' => 162,
200
+ 'Chi' => 935,
201
+ 'chi' => 967,
202
+ 'circ' => 710,
203
+ 'clubs' => 9827,
204
+ 'cong' => 8773,
205
+ 'copy' => 169,
206
+ 'crarr' => 8629,
207
+ 'cup' => 8746,
208
+ 'curren' => 164,
209
+ 'Dagger' => 8225,
210
+ 'dagger' => 8224,
211
+ 'dArr' => 8659,
212
+ 'darr' => 8595,
213
+ 'deg' => 176,
214
+ 'Delta' => 916,
215
+ 'delta' => 948,
216
+ 'diams' => 9830,
217
+ 'divide' => 247,
218
+ 'Eacute' => 201,
219
+ 'eacute' => 233,
220
+ 'Ecirc' => 202,
221
+ 'ecirc' => 234,
222
+ 'Egrave' => 200,
223
+ 'egrave' => 232,
224
+ 'empty' => 8709,
225
+ 'emsp' => 8195,
226
+ 'ensp' => 8194,
227
+ 'Epsilon' => 917,
228
+ 'epsilon' => 949,
229
+ 'equiv' => 8801,
230
+ 'Eta' => 919,
231
+ 'eta' => 951,
232
+ 'ETH' => 208,
233
+ 'eth' => 240,
234
+ 'Euml' => 203,
235
+ 'euml' => 235,
236
+ 'euro' => 8364,
237
+ 'exist' => 8707,
238
+ 'fnof' => 402,
239
+ 'forall' => 8704,
240
+ 'frac12' => 189,
241
+ 'frac14' => 188,
242
+ 'frac34' => 190,
243
+ 'frasl' => 8260,
244
+ 'Gamma' => 915,
245
+ 'gamma' => 947,
246
+ 'ge' => 8805,
247
+ 'gt' => 62,
248
+ 'hArr' => 8660,
249
+ 'harr' => 8596,
250
+ 'hearts' => 9829,
251
+ 'hellip' => 8230,
252
+ 'Iacute' => 205,
253
+ 'iacute' => 237,
254
+ 'Icirc' => 206,
255
+ 'icirc' => 238,
256
+ 'iexcl' => 161,
257
+ 'Igrave' => 204,
258
+ 'igrave' => 236,
259
+ 'image' => 8465,
260
+ 'infin' => 8734,
261
+ 'int' => 8747,
262
+ 'Iota' => 921,
263
+ 'iota' => 953,
264
+ 'iquest' => 191,
265
+ 'isin' => 8712,
266
+ 'Iuml' => 207,
267
+ 'iuml' => 239,
268
+ 'Kappa' => 922,
269
+ 'kappa' => 954,
270
+ 'Lambda' => 923,
271
+ 'lambda' => 955,
272
+ 'lang' => 9001,
273
+ 'laquo' => 171,
274
+ 'lArr' => 8656,
275
+ 'larr' => 8592,
276
+ 'lceil' => 8968,
277
+ 'ldquo' => 8220,
278
+ 'le' => 8804,
279
+ 'lfloor' => 8970,
280
+ 'lowast' => 8727,
281
+ 'loz' => 9674,
282
+ 'lrm' => 8206,
283
+ 'lsaquo' => 8249,
284
+ 'lsquo' => 8216,
285
+ 'lt' => 60,
286
+ 'macr' => 175,
287
+ 'mdash' => 8212,
288
+ 'micro' => 181,
289
+ 'middot' => 183,
290
+ 'minus' => 8722,
291
+ 'Mu' => 924,
292
+ 'mu' => 956,
293
+ 'nabla' => 8711,
294
+ 'nbsp' => 160,
295
+ 'ndash' => 8211,
296
+ 'ne' => 8800,
297
+ 'ni' => 8715,
298
+ 'not' => 172,
299
+ 'notin' => 8713,
300
+ 'nsub' => 8836,
301
+ 'Ntilde' => 209,
302
+ 'ntilde' => 241,
303
+ 'Nu' => 925,
304
+ 'nu' => 957,
305
+ 'Oacute' => 211,
306
+ 'oacute' => 243,
307
+ 'Ocirc' => 212,
308
+ 'ocirc' => 244,
309
+ 'OElig' => 338,
310
+ 'oelig' => 339,
311
+ 'Ograve' => 210,
312
+ 'ograve' => 242,
313
+ 'oline' => 8254,
314
+ 'Omega' => 937,
315
+ 'omega' => 969,
316
+ 'Omicron' => 927,
317
+ 'omicron' => 959,
318
+ 'oplus' => 8853,
319
+ 'or' => 8744,
320
+ 'ordf' => 170,
321
+ 'ordm' => 186,
322
+ 'Oslash' => 216,
323
+ 'oslash' => 248,
324
+ 'Otilde' => 213,
325
+ 'otilde' => 245,
326
+ 'otimes' => 8855,
327
+ 'Ouml' => 214,
328
+ 'ouml' => 246,
329
+ 'para' => 182,
330
+ 'part' => 8706,
331
+ 'permil' => 8240,
332
+ 'perp' => 8869,
333
+ 'Phi' => 934,
334
+ 'phi' => 966,
335
+ 'Pi' => 928,
336
+ 'pi' => 960,
337
+ 'piv' => 982,
338
+ 'plusmn' => 177,
339
+ 'pound' => 163,
340
+ 'Prime' => 8243,
341
+ 'prime' => 8242,
342
+ 'prod' => 8719,
343
+ 'prop' => 8733,
344
+ 'Psi' => 936,
345
+ 'psi' => 968,
346
+ 'quot' => 34,
347
+ 'radic' => 8730,
348
+ 'rang' => 9002,
349
+ 'raquo' => 187,
350
+ 'rArr' => 8658,
351
+ 'rarr' => 8594,
352
+ 'rceil' => 8969,
353
+ 'rdquo' => 8221,
354
+ 'real' => 8476,
355
+ 'reg' => 174,
356
+ 'rfloor' => 8971,
357
+ 'Rho' => 929,
358
+ 'rho' => 961,
359
+ 'rlm' => 8207,
360
+ 'rsaquo' => 8250,
361
+ 'rsquo' => 8217,
362
+ 'sbquo' => 8218,
363
+ 'Scaron' => 352,
364
+ 'scaron' => 353,
365
+ 'sdot' => 8901,
366
+ 'sect' => 167,
367
+ 'shy' => 173,
368
+ 'Sigma' => 931,
369
+ 'sigma' => 963,
370
+ 'sigmaf' => 962,
371
+ 'sim' => 8764,
372
+ 'spades' => 9824,
373
+ 'sub' => 8834,
374
+ 'sube' => 8838,
375
+ 'sum' => 8721,
376
+ 'sup' => 8835,
377
+ 'sup1' => 185,
378
+ 'sup2' => 178,
379
+ 'sup3' => 179,
380
+ 'supe' => 8839,
381
+ 'szlig' => 223,
382
+ 'Tau' => 932,
383
+ 'tau' => 964,
384
+ 'there4' => 8756,
385
+ 'Theta' => 920,
386
+ 'theta' => 952,
387
+ 'thetasym' => 977,
388
+ 'thinsp' => 8201,
389
+ 'THORN' => 222,
390
+ 'thorn' => 254,
391
+ 'tilde' => 732,
392
+ 'times' => 215,
393
+ 'trade' => 8482,
394
+ 'Uacute' => 218,
395
+ 'uacute' => 250,
396
+ 'uArr' => 8657,
397
+ 'uarr' => 8593,
398
+ 'Ucirc' => 219,
399
+ 'ucirc' => 251,
400
+ 'Ugrave' => 217,
401
+ 'ugrave' => 249,
402
+ 'uml' => 168,
403
+ 'upsih' => 978,
404
+ 'Upsilon' => 933,
405
+ 'upsilon' => 965,
406
+ 'Uuml' => 220,
407
+ 'uuml' => 252,
408
+ 'weierp' => 8472,
409
+ 'Xi' => 926,
410
+ 'xi' => 958,
411
+ 'Yacute' => 221,
412
+ 'yacute' => 253,
413
+ 'yen' => 165,
414
+ 'Yuml' => 376,
415
+ 'yuml' => 255,
416
+ 'Zeta' => 918,
417
+ 'zeta' => 950,
418
+ 'zwj' => 8205,
419
+ 'zwnj' => 8204
420
+ }
421
+ end
422
+ end
423
+ end
424
+ end
@@ -0,0 +1,119 @@
1
+ module Owasp
2
+ module Esapi
3
+ module Codec
4
+ class JavascriptCodec < BaseCodec
5
+
6
+ # Returns backslash encoded numeric format. Does not use backslash character escapes
7
+ # such as, \" or \' as these may cause parsing problems. For example, if a javascript
8
+ # attribute, such as onmouseover, contains a \" that will close the entire attribute and
9
+ # allow an attacker to inject another script attribute.
10
+ def encode_char(immune,input)
11
+ return input if immune.include?(input)
12
+ return input if hex(input).nil?
13
+ temp = hex(input)
14
+ if temp.hex < 256
15
+ return "\\x#{'00'[temp.size,2-temp.size]}#{temp.upcase}"
16
+ end
17
+ "\\u#{'0000'[temp.size,4-temp.size]}#{temp.upcase}"
18
+ end
19
+
20
+ # Returns the decoded version of the character starting at index, or
21
+ # null if no decoding is possible.
22
+ # See http://www.planetpdf.com/codecuts/pdfs/tutorial/jsspec.*pdf*
23
+ # Formats all are legal both upper/lower case:
24
+ # * \\a - special characters
25
+ # * \\xHH
26
+ # * \\uHHHH
27
+ # * \\OOO (1, 2, or 3 digits)
28
+ def decode_char(input)
29
+
30
+ input.mark
31
+ first = input.next
32
+ if first.nil?
33
+ input.reset
34
+ return nil
35
+ end
36
+ # check to see if we are dealing with an encoded char
37
+ if first!= "\\"
38
+ input.reset
39
+ return nil
40
+ end
41
+ second = input.next
42
+ if second.nil?
43
+ input.reset
44
+ return nil
45
+ end
46
+
47
+ #Check octal codes
48
+ return 0x08.chr if second == "b"
49
+ return 0x09.chr if second == "t"
50
+ return 0x0a.chr if second == "n"
51
+ return 0x0b.chr if second == "v"
52
+ return 0x0c.chr if second == "f"
53
+ return 0x0d.chr if second == "r"
54
+ return 0x22.chr if second == "\""
55
+ return 0x27.chr if second == "\'"
56
+ return 0x5c.chr if second == "\\"
57
+ if second.downcase == "x" # Hex encoded value
58
+ temp = ''
59
+ for i in 0..1 do
60
+ c = input.next_hex
61
+ temp << c unless c.nil?
62
+ if c.nil?
63
+ input.reset
64
+ return nil
65
+ end
66
+ end
67
+ i = temp.hex
68
+ begin
69
+ return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
70
+ rescue Exception => e
71
+ input.reset
72
+ return nil
73
+ end
74
+ elsif second.downcase == "u" # Unicode encoded value
75
+ temp = ''
76
+ for i in 0..3 do
77
+ c = input.next_hex
78
+ temp << c unless c.nil?
79
+ if c.nil?
80
+ input.reset
81
+ return nil
82
+ end
83
+ end
84
+ i = temp.hex
85
+ begin
86
+ return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
87
+ rescue Exception => e
88
+ input.reset
89
+ return nil
90
+ end
91
+ elsif input.octal?(second) # Octal encoded value
92
+ temp = second
93
+ c = input.next
94
+ unless input.octal?(c)
95
+ input.push(c)
96
+ else
97
+ temp << c
98
+ c = input.next
99
+ unless input.octal?(c)
100
+ input.push(c)
101
+ else
102
+ temp << c
103
+ end
104
+ end
105
+ # build a number
106
+ i = temp.to_i(8)
107
+ begin
108
+ return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
109
+ rescue Exception => e
110
+ input.reset
111
+ return nil
112
+ end
113
+ end
114
+ second
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end