owasp-esapi-ruby 0.30.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. data/.document +5 -0
  2. data/AUTHORS +5 -0
  3. data/ChangeLog +69 -0
  4. data/ISSUES +0 -0
  5. data/LICENSE +24 -0
  6. data/README +51 -0
  7. data/Rakefile +63 -0
  8. data/VERSION +1 -0
  9. data/lib/codec/base_codec.rb +99 -0
  10. data/lib/codec/css_codec.rb +101 -0
  11. data/lib/codec/encoder.rb +330 -0
  12. data/lib/codec/html_codec.rb +424 -0
  13. data/lib/codec/javascript_codec.rb +119 -0
  14. data/lib/codec/mysql_codec.rb +131 -0
  15. data/lib/codec/oracle_codec.rb +46 -0
  16. data/lib/codec/os_codec.rb +78 -0
  17. data/lib/codec/percent_codec.rb +53 -0
  18. data/lib/codec/pushable_string.rb +114 -0
  19. data/lib/codec/vbscript_codec.rb +64 -0
  20. data/lib/codec/xml_codec.rb +173 -0
  21. data/lib/esapi.rb +68 -0
  22. data/lib/exceptions.rb +37 -0
  23. data/lib/executor.rb +20 -0
  24. data/lib/owasp-esapi-ruby.rb +13 -0
  25. data/lib/sanitizer/xss.rb +59 -0
  26. data/lib/validator/base_rule.rb +90 -0
  27. data/lib/validator/date_rule.rb +92 -0
  28. data/lib/validator/email.rb +29 -0
  29. data/lib/validator/float_rule.rb +76 -0
  30. data/lib/validator/generic_validator.rb +26 -0
  31. data/lib/validator/integer_rule.rb +61 -0
  32. data/lib/validator/string_rule.rb +146 -0
  33. data/lib/validator/validator_error_list.rb +48 -0
  34. data/lib/validator/zipcode.rb +27 -0
  35. data/spec/codec/css_codec_spec.rb +61 -0
  36. data/spec/codec/html_codec_spec.rb +87 -0
  37. data/spec/codec/javascript_codec_spec.rb +45 -0
  38. data/spec/codec/mysql_codec_spec.rb +44 -0
  39. data/spec/codec/oracle_codec_spec.rb +23 -0
  40. data/spec/codec/os_codec_spec.rb +51 -0
  41. data/spec/codec/percent_codec_spec.rb +34 -0
  42. data/spec/codec/vbcript_codec_spec.rb +23 -0
  43. data/spec/codec/xml_codec_spec.rb +83 -0
  44. data/spec/owasp_esapi_encoder_spec.rb +226 -0
  45. data/spec/owasp_esapi_executor_spec.rb +9 -0
  46. data/spec/owasp_esapi_ruby_email_validator_spec.rb +39 -0
  47. data/spec/owasp_esapi_ruby_xss_sanitizer_spec.rb +66 -0
  48. data/spec/owasp_esapi_ruby_zipcode_validator_spec.rb +42 -0
  49. data/spec/spec_helper.rb +10 -0
  50. data/spec/validator/base_rule_spec.rb +29 -0
  51. data/spec/validator/date_rule_spec.rb +40 -0
  52. data/spec/validator/float_rule_spec.rb +31 -0
  53. data/spec/validator/integer_rule_spec.rb +51 -0
  54. data/spec/validator/string_rule_spec.rb +103 -0
  55. data/spec/validator_skeleton.rb +150 -0
  56. metadata +235 -0
@@ -0,0 +1,424 @@
1
+ # Implementation of the Codec interface for HTML entity encoding.
2
+ module Owasp
3
+ module Esapi
4
+ module Codec
5
+ class HtmlCodec < BaseCodec
6
+ def initialize
7
+ @longest_key = 0
8
+ @lookup_map = {}
9
+ ENTITY_MAP.each_key do |k|
10
+ if k.size > @longest_key
11
+ @longest_key += 1
12
+ end
13
+ @lookup_map[k.downcase] = k
14
+ end
15
+ end
16
+
17
+ # Encodes a Character for safe use in an HTML entity field.
18
+ def encode_char(immune, input)
19
+ c = input
20
+ return input if immune.include?(input)
21
+ # check for alpha numeric
22
+ hex = hex(input)
23
+ return input if hex.nil?
24
+ # check to see if we need to replace an entity
25
+ if ( c.ord <= 0x1f and c != '\t' and c != '\n' and c != '\r' ) || ( c.ord >= 0x7f and c.ord <= 0x9f )
26
+ hex = REPLACEMENT_HEX
27
+ c = REPLACEMENT_CHAR
28
+ end
29
+ # find the entity name if its possible
30
+ ENTITY_MAP.each_pair do |k,v|
31
+ return "&#{k};" if v == c.ord
32
+ end
33
+ #encode as a hex value
34
+ "&#x#{hex};"
35
+ end
36
+
37
+ # Returns the decoded version of the character starting at index, or
38
+ # nil if no decoding is possible.
39
+ # Formats all are legal both with and without semi-colon, upper/lower case:
40
+ # * &#dddd;
41
+ # * &#xhhhh;
42
+ # * &name;
43
+ def decode_char(input)
44
+ # mark the input
45
+ input.mark
46
+ first = input.next
47
+ if first.nil?
48
+ input.reset
49
+ return nil
50
+ end
51
+
52
+ # this isnt an encoded char
53
+ if first != '&'
54
+ input.reset
55
+ return nil
56
+ end
57
+
58
+ # test for numeric encodings
59
+ second = input.next
60
+ if second.nil?
61
+ input.reset
62
+ return nil
63
+ end
64
+ if second == '#'
65
+ c = numeric_entity(input)
66
+ return c unless c.nil?
67
+ elsif second =~ /[a-zA-Z]/
68
+ input.push(second)
69
+ c = named_entity(input)
70
+ return c unless c.nil?
71
+ end
72
+ input.reset
73
+ return nil
74
+ end
75
+
76
+ # check to see if the input is a numeric entity
77
+ def numeric_entity(input) #:nodoc:
78
+ first = input.peek
79
+ return nil if first.nil?
80
+ if first.downcase.eql?("x")
81
+ input.next
82
+ return parse_hex(input)
83
+ end
84
+ return parse_number(input)
85
+ end
86
+
87
+ # check to see if the input is a named entity
88
+ def named_entity(input)#:nodoc:
89
+ possible = ''
90
+ len = min(input.remainder.size,@longest_key)
91
+ if input.peek?("&")
92
+ input.next
93
+ end
94
+ found_key = false
95
+ last_possible = ''
96
+ for i in 0..len do
97
+ possible << input.next if input.next?
98
+ # we have to find the longest match
99
+ # so we dont find sub values
100
+ if @lookup_map[possible.downcase]
101
+ last_possible = @lookup_map[possible.downcase]
102
+ end
103
+ end
104
+ # no matches found return
105
+ return nil if last_possible.empty?
106
+ # reset the input and plow through
107
+ input.reset
108
+ for i in 0..last_possible.size
109
+ input.next
110
+ end
111
+ possible = ENTITY_MAP[last_possible]
112
+ input.next if input.peek?(';')
113
+ possible.chr(Encoding::UTF_8)
114
+ end
115
+ # parse a number int he stream
116
+ def parse_number(input)#:nodoc:
117
+ result = ''
118
+ while input.next?
119
+ c = input.peek
120
+ if c =~ /\d/
121
+ result << c
122
+ input.next
123
+ elsif c == ';'
124
+ input.next
125
+ break;
126
+ else
127
+ break;
128
+ end
129
+ end
130
+
131
+ begin
132
+ i = result.to_i
133
+ return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
134
+ rescue Exception => e
135
+ end
136
+ nil
137
+ end
138
+ # parse a hex value in the stream
139
+ def parse_hex(input)#:nodoc:
140
+ result = ''
141
+ while input.next?
142
+ c = input.peek
143
+ if "0123456789ABCDEFabcdef".include?(c)
144
+ result << c
145
+ input.next
146
+ elsif c == ";"
147
+ input.next
148
+ break
149
+ else
150
+ break
151
+ end
152
+ end
153
+ begin
154
+ i = result.hex
155
+ return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
156
+ rescue Exception => e
157
+ end
158
+ nil
159
+ end
160
+
161
+ # Replacement const hex
162
+ REPLACEMENT_HEX = "fffd"
163
+ # Replacement const char
164
+ REPLACEMENT_CHAR = '\ufffd'
165
+
166
+ # Map of entities to numeric codes
167
+ ENTITY_MAP = {
168
+ 'Aacute' => 193,
169
+ 'aacute' => 225,
170
+ 'Acirc' => 194,
171
+ 'acirc' => 226,
172
+ 'acute' => 180,
173
+ 'AElig' => 198,
174
+ 'aelig' => 230,
175
+ 'Agrave' => 192,
176
+ 'agrave' => 224,
177
+ 'alefsym' => 8501,
178
+ 'Alpha' => 913,
179
+ 'alpha' => 945,
180
+ 'amp' => 38,
181
+ 'and' => 8743,
182
+ 'ang' => 8736,
183
+ 'Aring' => 197,
184
+ 'aring' => 229,
185
+ 'asymp' => 8776,
186
+ 'Atilde' => 195,
187
+ 'atilde' => 227,
188
+ 'Auml' => 196,
189
+ 'auml' => 228,
190
+ 'bdquo' => 8222,
191
+ 'Beta' => 914,
192
+ 'beta' => 946,
193
+ 'brvbar' => 166,
194
+ 'bull' => 8226,
195
+ 'cap' => 8745,
196
+ 'Ccedil' => 199,
197
+ 'ccedil' => 231,
198
+ 'cedil' => 184,
199
+ 'cent' => 162,
200
+ 'Chi' => 935,
201
+ 'chi' => 967,
202
+ 'circ' => 710,
203
+ 'clubs' => 9827,
204
+ 'cong' => 8773,
205
+ 'copy' => 169,
206
+ 'crarr' => 8629,
207
+ 'cup' => 8746,
208
+ 'curren' => 164,
209
+ 'Dagger' => 8225,
210
+ 'dagger' => 8224,
211
+ 'dArr' => 8659,
212
+ 'darr' => 8595,
213
+ 'deg' => 176,
214
+ 'Delta' => 916,
215
+ 'delta' => 948,
216
+ 'diams' => 9830,
217
+ 'divide' => 247,
218
+ 'Eacute' => 201,
219
+ 'eacute' => 233,
220
+ 'Ecirc' => 202,
221
+ 'ecirc' => 234,
222
+ 'Egrave' => 200,
223
+ 'egrave' => 232,
224
+ 'empty' => 8709,
225
+ 'emsp' => 8195,
226
+ 'ensp' => 8194,
227
+ 'Epsilon' => 917,
228
+ 'epsilon' => 949,
229
+ 'equiv' => 8801,
230
+ 'Eta' => 919,
231
+ 'eta' => 951,
232
+ 'ETH' => 208,
233
+ 'eth' => 240,
234
+ 'Euml' => 203,
235
+ 'euml' => 235,
236
+ 'euro' => 8364,
237
+ 'exist' => 8707,
238
+ 'fnof' => 402,
239
+ 'forall' => 8704,
240
+ 'frac12' => 189,
241
+ 'frac14' => 188,
242
+ 'frac34' => 190,
243
+ 'frasl' => 8260,
244
+ 'Gamma' => 915,
245
+ 'gamma' => 947,
246
+ 'ge' => 8805,
247
+ 'gt' => 62,
248
+ 'hArr' => 8660,
249
+ 'harr' => 8596,
250
+ 'hearts' => 9829,
251
+ 'hellip' => 8230,
252
+ 'Iacute' => 205,
253
+ 'iacute' => 237,
254
+ 'Icirc' => 206,
255
+ 'icirc' => 238,
256
+ 'iexcl' => 161,
257
+ 'Igrave' => 204,
258
+ 'igrave' => 236,
259
+ 'image' => 8465,
260
+ 'infin' => 8734,
261
+ 'int' => 8747,
262
+ 'Iota' => 921,
263
+ 'iota' => 953,
264
+ 'iquest' => 191,
265
+ 'isin' => 8712,
266
+ 'Iuml' => 207,
267
+ 'iuml' => 239,
268
+ 'Kappa' => 922,
269
+ 'kappa' => 954,
270
+ 'Lambda' => 923,
271
+ 'lambda' => 955,
272
+ 'lang' => 9001,
273
+ 'laquo' => 171,
274
+ 'lArr' => 8656,
275
+ 'larr' => 8592,
276
+ 'lceil' => 8968,
277
+ 'ldquo' => 8220,
278
+ 'le' => 8804,
279
+ 'lfloor' => 8970,
280
+ 'lowast' => 8727,
281
+ 'loz' => 9674,
282
+ 'lrm' => 8206,
283
+ 'lsaquo' => 8249,
284
+ 'lsquo' => 8216,
285
+ 'lt' => 60,
286
+ 'macr' => 175,
287
+ 'mdash' => 8212,
288
+ 'micro' => 181,
289
+ 'middot' => 183,
290
+ 'minus' => 8722,
291
+ 'Mu' => 924,
292
+ 'mu' => 956,
293
+ 'nabla' => 8711,
294
+ 'nbsp' => 160,
295
+ 'ndash' => 8211,
296
+ 'ne' => 8800,
297
+ 'ni' => 8715,
298
+ 'not' => 172,
299
+ 'notin' => 8713,
300
+ 'nsub' => 8836,
301
+ 'Ntilde' => 209,
302
+ 'ntilde' => 241,
303
+ 'Nu' => 925,
304
+ 'nu' => 957,
305
+ 'Oacute' => 211,
306
+ 'oacute' => 243,
307
+ 'Ocirc' => 212,
308
+ 'ocirc' => 244,
309
+ 'OElig' => 338,
310
+ 'oelig' => 339,
311
+ 'Ograve' => 210,
312
+ 'ograve' => 242,
313
+ 'oline' => 8254,
314
+ 'Omega' => 937,
315
+ 'omega' => 969,
316
+ 'Omicron' => 927,
317
+ 'omicron' => 959,
318
+ 'oplus' => 8853,
319
+ 'or' => 8744,
320
+ 'ordf' => 170,
321
+ 'ordm' => 186,
322
+ 'Oslash' => 216,
323
+ 'oslash' => 248,
324
+ 'Otilde' => 213,
325
+ 'otilde' => 245,
326
+ 'otimes' => 8855,
327
+ 'Ouml' => 214,
328
+ 'ouml' => 246,
329
+ 'para' => 182,
330
+ 'part' => 8706,
331
+ 'permil' => 8240,
332
+ 'perp' => 8869,
333
+ 'Phi' => 934,
334
+ 'phi' => 966,
335
+ 'Pi' => 928,
336
+ 'pi' => 960,
337
+ 'piv' => 982,
338
+ 'plusmn' => 177,
339
+ 'pound' => 163,
340
+ 'Prime' => 8243,
341
+ 'prime' => 8242,
342
+ 'prod' => 8719,
343
+ 'prop' => 8733,
344
+ 'Psi' => 936,
345
+ 'psi' => 968,
346
+ 'quot' => 34,
347
+ 'radic' => 8730,
348
+ 'rang' => 9002,
349
+ 'raquo' => 187,
350
+ 'rArr' => 8658,
351
+ 'rarr' => 8594,
352
+ 'rceil' => 8969,
353
+ 'rdquo' => 8221,
354
+ 'real' => 8476,
355
+ 'reg' => 174,
356
+ 'rfloor' => 8971,
357
+ 'Rho' => 929,
358
+ 'rho' => 961,
359
+ 'rlm' => 8207,
360
+ 'rsaquo' => 8250,
361
+ 'rsquo' => 8217,
362
+ 'sbquo' => 8218,
363
+ 'Scaron' => 352,
364
+ 'scaron' => 353,
365
+ 'sdot' => 8901,
366
+ 'sect' => 167,
367
+ 'shy' => 173,
368
+ 'Sigma' => 931,
369
+ 'sigma' => 963,
370
+ 'sigmaf' => 962,
371
+ 'sim' => 8764,
372
+ 'spades' => 9824,
373
+ 'sub' => 8834,
374
+ 'sube' => 8838,
375
+ 'sum' => 8721,
376
+ 'sup' => 8835,
377
+ 'sup1' => 185,
378
+ 'sup2' => 178,
379
+ 'sup3' => 179,
380
+ 'supe' => 8839,
381
+ 'szlig' => 223,
382
+ 'Tau' => 932,
383
+ 'tau' => 964,
384
+ 'there4' => 8756,
385
+ 'Theta' => 920,
386
+ 'theta' => 952,
387
+ 'thetasym' => 977,
388
+ 'thinsp' => 8201,
389
+ 'THORN' => 222,
390
+ 'thorn' => 254,
391
+ 'tilde' => 732,
392
+ 'times' => 215,
393
+ 'trade' => 8482,
394
+ 'Uacute' => 218,
395
+ 'uacute' => 250,
396
+ 'uArr' => 8657,
397
+ 'uarr' => 8593,
398
+ 'Ucirc' => 219,
399
+ 'ucirc' => 251,
400
+ 'Ugrave' => 217,
401
+ 'ugrave' => 249,
402
+ 'uml' => 168,
403
+ 'upsih' => 978,
404
+ 'Upsilon' => 933,
405
+ 'upsilon' => 965,
406
+ 'Uuml' => 220,
407
+ 'uuml' => 252,
408
+ 'weierp' => 8472,
409
+ 'Xi' => 926,
410
+ 'xi' => 958,
411
+ 'Yacute' => 221,
412
+ 'yacute' => 253,
413
+ 'yen' => 165,
414
+ 'Yuml' => 376,
415
+ 'yuml' => 255,
416
+ 'Zeta' => 918,
417
+ 'zeta' => 950,
418
+ 'zwj' => 8205,
419
+ 'zwnj' => 8204
420
+ }
421
+ end
422
+ end
423
+ end
424
+ end
@@ -0,0 +1,119 @@
1
+ module Owasp
2
+ module Esapi
3
+ module Codec
4
+ class JavascriptCodec < BaseCodec
5
+
6
+ # Returns backslash encoded numeric format. Does not use backslash character escapes
7
+ # such as, \" or \' as these may cause parsing problems. For example, if a javascript
8
+ # attribute, such as onmouseover, contains a \" that will close the entire attribute and
9
+ # allow an attacker to inject another script attribute.
10
+ def encode_char(immune,input)
11
+ return input if immune.include?(input)
12
+ return input if hex(input).nil?
13
+ temp = hex(input)
14
+ if temp.hex < 256
15
+ return "\\x#{'00'[temp.size,2-temp.size]}#{temp.upcase}"
16
+ end
17
+ "\\u#{'0000'[temp.size,4-temp.size]}#{temp.upcase}"
18
+ end
19
+
20
+ # Returns the decoded version of the character starting at index, or
21
+ # null if no decoding is possible.
22
+ # See http://www.planetpdf.com/codecuts/pdfs/tutorial/jsspec.*pdf*
23
+ # Formats all are legal both upper/lower case:
24
+ # * \\a - special characters
25
+ # * \\xHH
26
+ # * \\uHHHH
27
+ # * \\OOO (1, 2, or 3 digits)
28
+ def decode_char(input)
29
+
30
+ input.mark
31
+ first = input.next
32
+ if first.nil?
33
+ input.reset
34
+ return nil
35
+ end
36
+ # check to see if we are dealing with an encoded char
37
+ if first!= "\\"
38
+ input.reset
39
+ return nil
40
+ end
41
+ second = input.next
42
+ if second.nil?
43
+ input.reset
44
+ return nil
45
+ end
46
+
47
+ #Check octal codes
48
+ return 0x08.chr if second == "b"
49
+ return 0x09.chr if second == "t"
50
+ return 0x0a.chr if second == "n"
51
+ return 0x0b.chr if second == "v"
52
+ return 0x0c.chr if second == "f"
53
+ return 0x0d.chr if second == "r"
54
+ return 0x22.chr if second == "\""
55
+ return 0x27.chr if second == "\'"
56
+ return 0x5c.chr if second == "\\"
57
+ if second.downcase == "x" # Hex encoded value
58
+ temp = ''
59
+ for i in 0..1 do
60
+ c = input.next_hex
61
+ temp << c unless c.nil?
62
+ if c.nil?
63
+ input.reset
64
+ return nil
65
+ end
66
+ end
67
+ i = temp.hex
68
+ begin
69
+ return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
70
+ rescue Exception => e
71
+ input.reset
72
+ return nil
73
+ end
74
+ elsif second.downcase == "u" # Unicode encoded value
75
+ temp = ''
76
+ for i in 0..3 do
77
+ c = input.next_hex
78
+ temp << c unless c.nil?
79
+ if c.nil?
80
+ input.reset
81
+ return nil
82
+ end
83
+ end
84
+ i = temp.hex
85
+ begin
86
+ return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
87
+ rescue Exception => e
88
+ input.reset
89
+ return nil
90
+ end
91
+ elsif input.octal?(second) # Octal encoded value
92
+ temp = second
93
+ c = input.next
94
+ unless input.octal?(c)
95
+ input.push(c)
96
+ else
97
+ temp << c
98
+ c = input.next
99
+ unless input.octal?(c)
100
+ input.push(c)
101
+ else
102
+ temp << c
103
+ end
104
+ end
105
+ # build a number
106
+ i = temp.to_i(8)
107
+ begin
108
+ return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
109
+ rescue Exception => e
110
+ input.reset
111
+ return nil
112
+ end
113
+ end
114
+ second
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end