owasp-esapi-ruby 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/AUTHORS +5 -0
- data/ChangeLog +69 -0
- data/ISSUES +0 -0
- data/LICENSE +24 -0
- data/README +51 -0
- data/Rakefile +63 -0
- data/VERSION +1 -0
- data/lib/codec/base_codec.rb +99 -0
- data/lib/codec/css_codec.rb +101 -0
- data/lib/codec/encoder.rb +330 -0
- data/lib/codec/html_codec.rb +424 -0
- data/lib/codec/javascript_codec.rb +119 -0
- data/lib/codec/mysql_codec.rb +131 -0
- data/lib/codec/oracle_codec.rb +46 -0
- data/lib/codec/os_codec.rb +78 -0
- data/lib/codec/percent_codec.rb +53 -0
- data/lib/codec/pushable_string.rb +114 -0
- data/lib/codec/vbscript_codec.rb +64 -0
- data/lib/codec/xml_codec.rb +173 -0
- data/lib/esapi.rb +68 -0
- data/lib/exceptions.rb +37 -0
- data/lib/executor.rb +20 -0
- data/lib/owasp-esapi-ruby.rb +13 -0
- data/lib/sanitizer/xss.rb +59 -0
- data/lib/validator/base_rule.rb +90 -0
- data/lib/validator/date_rule.rb +92 -0
- data/lib/validator/email.rb +29 -0
- data/lib/validator/float_rule.rb +76 -0
- data/lib/validator/generic_validator.rb +26 -0
- data/lib/validator/integer_rule.rb +61 -0
- data/lib/validator/string_rule.rb +146 -0
- data/lib/validator/validator_error_list.rb +48 -0
- data/lib/validator/zipcode.rb +27 -0
- data/spec/codec/css_codec_spec.rb +61 -0
- data/spec/codec/html_codec_spec.rb +87 -0
- data/spec/codec/javascript_codec_spec.rb +45 -0
- data/spec/codec/mysql_codec_spec.rb +44 -0
- data/spec/codec/oracle_codec_spec.rb +23 -0
- data/spec/codec/os_codec_spec.rb +51 -0
- data/spec/codec/percent_codec_spec.rb +34 -0
- data/spec/codec/vbcript_codec_spec.rb +23 -0
- data/spec/codec/xml_codec_spec.rb +83 -0
- data/spec/owasp_esapi_encoder_spec.rb +226 -0
- data/spec/owasp_esapi_executor_spec.rb +9 -0
- data/spec/owasp_esapi_ruby_email_validator_spec.rb +39 -0
- data/spec/owasp_esapi_ruby_xss_sanitizer_spec.rb +66 -0
- data/spec/owasp_esapi_ruby_zipcode_validator_spec.rb +42 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/validator/base_rule_spec.rb +29 -0
- data/spec/validator/date_rule_spec.rb +40 -0
- data/spec/validator/float_rule_spec.rb +31 -0
- data/spec/validator/integer_rule_spec.rb +51 -0
- data/spec/validator/string_rule_spec.rb +103 -0
- data/spec/validator_skeleton.rb +150 -0
- metadata +235 -0
@@ -0,0 +1,424 @@
|
|
1
|
+
# Implementation of the Codec interface for HTML entity encoding.
|
2
|
+
module Owasp
|
3
|
+
module Esapi
|
4
|
+
module Codec
|
5
|
+
class HtmlCodec < BaseCodec
|
6
|
+
def initialize
|
7
|
+
@longest_key = 0
|
8
|
+
@lookup_map = {}
|
9
|
+
ENTITY_MAP.each_key do |k|
|
10
|
+
if k.size > @longest_key
|
11
|
+
@longest_key += 1
|
12
|
+
end
|
13
|
+
@lookup_map[k.downcase] = k
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Encodes a Character for safe use in an HTML entity field.
|
18
|
+
def encode_char(immune, input)
|
19
|
+
c = input
|
20
|
+
return input if immune.include?(input)
|
21
|
+
# check for alpha numeric
|
22
|
+
hex = hex(input)
|
23
|
+
return input if hex.nil?
|
24
|
+
# check to see if we need to replace an entity
|
25
|
+
if ( c.ord <= 0x1f and c != '\t' and c != '\n' and c != '\r' ) || ( c.ord >= 0x7f and c.ord <= 0x9f )
|
26
|
+
hex = REPLACEMENT_HEX
|
27
|
+
c = REPLACEMENT_CHAR
|
28
|
+
end
|
29
|
+
# find the entity name if its possible
|
30
|
+
ENTITY_MAP.each_pair do |k,v|
|
31
|
+
return "&#{k};" if v == c.ord
|
32
|
+
end
|
33
|
+
#encode as a hex value
|
34
|
+
"&#x#{hex};"
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns the decoded version of the character starting at index, or
|
38
|
+
# nil if no decoding is possible.
|
39
|
+
# Formats all are legal both with and without semi-colon, upper/lower case:
|
40
|
+
# * &#dddd;
|
41
|
+
# * &#xhhhh;
|
42
|
+
# * &name;
|
43
|
+
def decode_char(input)
|
44
|
+
# mark the input
|
45
|
+
input.mark
|
46
|
+
first = input.next
|
47
|
+
if first.nil?
|
48
|
+
input.reset
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
|
52
|
+
# this isnt an encoded char
|
53
|
+
if first != '&'
|
54
|
+
input.reset
|
55
|
+
return nil
|
56
|
+
end
|
57
|
+
|
58
|
+
# test for numeric encodings
|
59
|
+
second = input.next
|
60
|
+
if second.nil?
|
61
|
+
input.reset
|
62
|
+
return nil
|
63
|
+
end
|
64
|
+
if second == '#'
|
65
|
+
c = numeric_entity(input)
|
66
|
+
return c unless c.nil?
|
67
|
+
elsif second =~ /[a-zA-Z]/
|
68
|
+
input.push(second)
|
69
|
+
c = named_entity(input)
|
70
|
+
return c unless c.nil?
|
71
|
+
end
|
72
|
+
input.reset
|
73
|
+
return nil
|
74
|
+
end
|
75
|
+
|
76
|
+
# check to see if the input is a numeric entity
|
77
|
+
def numeric_entity(input) #:nodoc:
|
78
|
+
first = input.peek
|
79
|
+
return nil if first.nil?
|
80
|
+
if first.downcase.eql?("x")
|
81
|
+
input.next
|
82
|
+
return parse_hex(input)
|
83
|
+
end
|
84
|
+
return parse_number(input)
|
85
|
+
end
|
86
|
+
|
87
|
+
# check to see if the input is a named entity
|
88
|
+
def named_entity(input)#:nodoc:
|
89
|
+
possible = ''
|
90
|
+
len = min(input.remainder.size,@longest_key)
|
91
|
+
if input.peek?("&")
|
92
|
+
input.next
|
93
|
+
end
|
94
|
+
found_key = false
|
95
|
+
last_possible = ''
|
96
|
+
for i in 0..len do
|
97
|
+
possible << input.next if input.next?
|
98
|
+
# we have to find the longest match
|
99
|
+
# so we dont find sub values
|
100
|
+
if @lookup_map[possible.downcase]
|
101
|
+
last_possible = @lookup_map[possible.downcase]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
# no matches found return
|
105
|
+
return nil if last_possible.empty?
|
106
|
+
# reset the input and plow through
|
107
|
+
input.reset
|
108
|
+
for i in 0..last_possible.size
|
109
|
+
input.next
|
110
|
+
end
|
111
|
+
possible = ENTITY_MAP[last_possible]
|
112
|
+
input.next if input.peek?(';')
|
113
|
+
possible.chr(Encoding::UTF_8)
|
114
|
+
end
|
115
|
+
# parse a number int he stream
|
116
|
+
def parse_number(input)#:nodoc:
|
117
|
+
result = ''
|
118
|
+
while input.next?
|
119
|
+
c = input.peek
|
120
|
+
if c =~ /\d/
|
121
|
+
result << c
|
122
|
+
input.next
|
123
|
+
elsif c == ';'
|
124
|
+
input.next
|
125
|
+
break;
|
126
|
+
else
|
127
|
+
break;
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
begin
|
132
|
+
i = result.to_i
|
133
|
+
return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
|
134
|
+
rescue Exception => e
|
135
|
+
end
|
136
|
+
nil
|
137
|
+
end
|
138
|
+
# parse a hex value in the stream
|
139
|
+
def parse_hex(input)#:nodoc:
|
140
|
+
result = ''
|
141
|
+
while input.next?
|
142
|
+
c = input.peek
|
143
|
+
if "0123456789ABCDEFabcdef".include?(c)
|
144
|
+
result << c
|
145
|
+
input.next
|
146
|
+
elsif c == ";"
|
147
|
+
input.next
|
148
|
+
break
|
149
|
+
else
|
150
|
+
break
|
151
|
+
end
|
152
|
+
end
|
153
|
+
begin
|
154
|
+
i = result.hex
|
155
|
+
return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
|
156
|
+
rescue Exception => e
|
157
|
+
end
|
158
|
+
nil
|
159
|
+
end
|
160
|
+
|
161
|
+
# Replacement const hex
|
162
|
+
REPLACEMENT_HEX = "fffd"
|
163
|
+
# Replacement const char
|
164
|
+
REPLACEMENT_CHAR = '\ufffd'
|
165
|
+
|
166
|
+
# Map of entities to numeric codes
|
167
|
+
ENTITY_MAP = {
|
168
|
+
'Aacute' => 193,
|
169
|
+
'aacute' => 225,
|
170
|
+
'Acirc' => 194,
|
171
|
+
'acirc' => 226,
|
172
|
+
'acute' => 180,
|
173
|
+
'AElig' => 198,
|
174
|
+
'aelig' => 230,
|
175
|
+
'Agrave' => 192,
|
176
|
+
'agrave' => 224,
|
177
|
+
'alefsym' => 8501,
|
178
|
+
'Alpha' => 913,
|
179
|
+
'alpha' => 945,
|
180
|
+
'amp' => 38,
|
181
|
+
'and' => 8743,
|
182
|
+
'ang' => 8736,
|
183
|
+
'Aring' => 197,
|
184
|
+
'aring' => 229,
|
185
|
+
'asymp' => 8776,
|
186
|
+
'Atilde' => 195,
|
187
|
+
'atilde' => 227,
|
188
|
+
'Auml' => 196,
|
189
|
+
'auml' => 228,
|
190
|
+
'bdquo' => 8222,
|
191
|
+
'Beta' => 914,
|
192
|
+
'beta' => 946,
|
193
|
+
'brvbar' => 166,
|
194
|
+
'bull' => 8226,
|
195
|
+
'cap' => 8745,
|
196
|
+
'Ccedil' => 199,
|
197
|
+
'ccedil' => 231,
|
198
|
+
'cedil' => 184,
|
199
|
+
'cent' => 162,
|
200
|
+
'Chi' => 935,
|
201
|
+
'chi' => 967,
|
202
|
+
'circ' => 710,
|
203
|
+
'clubs' => 9827,
|
204
|
+
'cong' => 8773,
|
205
|
+
'copy' => 169,
|
206
|
+
'crarr' => 8629,
|
207
|
+
'cup' => 8746,
|
208
|
+
'curren' => 164,
|
209
|
+
'Dagger' => 8225,
|
210
|
+
'dagger' => 8224,
|
211
|
+
'dArr' => 8659,
|
212
|
+
'darr' => 8595,
|
213
|
+
'deg' => 176,
|
214
|
+
'Delta' => 916,
|
215
|
+
'delta' => 948,
|
216
|
+
'diams' => 9830,
|
217
|
+
'divide' => 247,
|
218
|
+
'Eacute' => 201,
|
219
|
+
'eacute' => 233,
|
220
|
+
'Ecirc' => 202,
|
221
|
+
'ecirc' => 234,
|
222
|
+
'Egrave' => 200,
|
223
|
+
'egrave' => 232,
|
224
|
+
'empty' => 8709,
|
225
|
+
'emsp' => 8195,
|
226
|
+
'ensp' => 8194,
|
227
|
+
'Epsilon' => 917,
|
228
|
+
'epsilon' => 949,
|
229
|
+
'equiv' => 8801,
|
230
|
+
'Eta' => 919,
|
231
|
+
'eta' => 951,
|
232
|
+
'ETH' => 208,
|
233
|
+
'eth' => 240,
|
234
|
+
'Euml' => 203,
|
235
|
+
'euml' => 235,
|
236
|
+
'euro' => 8364,
|
237
|
+
'exist' => 8707,
|
238
|
+
'fnof' => 402,
|
239
|
+
'forall' => 8704,
|
240
|
+
'frac12' => 189,
|
241
|
+
'frac14' => 188,
|
242
|
+
'frac34' => 190,
|
243
|
+
'frasl' => 8260,
|
244
|
+
'Gamma' => 915,
|
245
|
+
'gamma' => 947,
|
246
|
+
'ge' => 8805,
|
247
|
+
'gt' => 62,
|
248
|
+
'hArr' => 8660,
|
249
|
+
'harr' => 8596,
|
250
|
+
'hearts' => 9829,
|
251
|
+
'hellip' => 8230,
|
252
|
+
'Iacute' => 205,
|
253
|
+
'iacute' => 237,
|
254
|
+
'Icirc' => 206,
|
255
|
+
'icirc' => 238,
|
256
|
+
'iexcl' => 161,
|
257
|
+
'Igrave' => 204,
|
258
|
+
'igrave' => 236,
|
259
|
+
'image' => 8465,
|
260
|
+
'infin' => 8734,
|
261
|
+
'int' => 8747,
|
262
|
+
'Iota' => 921,
|
263
|
+
'iota' => 953,
|
264
|
+
'iquest' => 191,
|
265
|
+
'isin' => 8712,
|
266
|
+
'Iuml' => 207,
|
267
|
+
'iuml' => 239,
|
268
|
+
'Kappa' => 922,
|
269
|
+
'kappa' => 954,
|
270
|
+
'Lambda' => 923,
|
271
|
+
'lambda' => 955,
|
272
|
+
'lang' => 9001,
|
273
|
+
'laquo' => 171,
|
274
|
+
'lArr' => 8656,
|
275
|
+
'larr' => 8592,
|
276
|
+
'lceil' => 8968,
|
277
|
+
'ldquo' => 8220,
|
278
|
+
'le' => 8804,
|
279
|
+
'lfloor' => 8970,
|
280
|
+
'lowast' => 8727,
|
281
|
+
'loz' => 9674,
|
282
|
+
'lrm' => 8206,
|
283
|
+
'lsaquo' => 8249,
|
284
|
+
'lsquo' => 8216,
|
285
|
+
'lt' => 60,
|
286
|
+
'macr' => 175,
|
287
|
+
'mdash' => 8212,
|
288
|
+
'micro' => 181,
|
289
|
+
'middot' => 183,
|
290
|
+
'minus' => 8722,
|
291
|
+
'Mu' => 924,
|
292
|
+
'mu' => 956,
|
293
|
+
'nabla' => 8711,
|
294
|
+
'nbsp' => 160,
|
295
|
+
'ndash' => 8211,
|
296
|
+
'ne' => 8800,
|
297
|
+
'ni' => 8715,
|
298
|
+
'not' => 172,
|
299
|
+
'notin' => 8713,
|
300
|
+
'nsub' => 8836,
|
301
|
+
'Ntilde' => 209,
|
302
|
+
'ntilde' => 241,
|
303
|
+
'Nu' => 925,
|
304
|
+
'nu' => 957,
|
305
|
+
'Oacute' => 211,
|
306
|
+
'oacute' => 243,
|
307
|
+
'Ocirc' => 212,
|
308
|
+
'ocirc' => 244,
|
309
|
+
'OElig' => 338,
|
310
|
+
'oelig' => 339,
|
311
|
+
'Ograve' => 210,
|
312
|
+
'ograve' => 242,
|
313
|
+
'oline' => 8254,
|
314
|
+
'Omega' => 937,
|
315
|
+
'omega' => 969,
|
316
|
+
'Omicron' => 927,
|
317
|
+
'omicron' => 959,
|
318
|
+
'oplus' => 8853,
|
319
|
+
'or' => 8744,
|
320
|
+
'ordf' => 170,
|
321
|
+
'ordm' => 186,
|
322
|
+
'Oslash' => 216,
|
323
|
+
'oslash' => 248,
|
324
|
+
'Otilde' => 213,
|
325
|
+
'otilde' => 245,
|
326
|
+
'otimes' => 8855,
|
327
|
+
'Ouml' => 214,
|
328
|
+
'ouml' => 246,
|
329
|
+
'para' => 182,
|
330
|
+
'part' => 8706,
|
331
|
+
'permil' => 8240,
|
332
|
+
'perp' => 8869,
|
333
|
+
'Phi' => 934,
|
334
|
+
'phi' => 966,
|
335
|
+
'Pi' => 928,
|
336
|
+
'pi' => 960,
|
337
|
+
'piv' => 982,
|
338
|
+
'plusmn' => 177,
|
339
|
+
'pound' => 163,
|
340
|
+
'Prime' => 8243,
|
341
|
+
'prime' => 8242,
|
342
|
+
'prod' => 8719,
|
343
|
+
'prop' => 8733,
|
344
|
+
'Psi' => 936,
|
345
|
+
'psi' => 968,
|
346
|
+
'quot' => 34,
|
347
|
+
'radic' => 8730,
|
348
|
+
'rang' => 9002,
|
349
|
+
'raquo' => 187,
|
350
|
+
'rArr' => 8658,
|
351
|
+
'rarr' => 8594,
|
352
|
+
'rceil' => 8969,
|
353
|
+
'rdquo' => 8221,
|
354
|
+
'real' => 8476,
|
355
|
+
'reg' => 174,
|
356
|
+
'rfloor' => 8971,
|
357
|
+
'Rho' => 929,
|
358
|
+
'rho' => 961,
|
359
|
+
'rlm' => 8207,
|
360
|
+
'rsaquo' => 8250,
|
361
|
+
'rsquo' => 8217,
|
362
|
+
'sbquo' => 8218,
|
363
|
+
'Scaron' => 352,
|
364
|
+
'scaron' => 353,
|
365
|
+
'sdot' => 8901,
|
366
|
+
'sect' => 167,
|
367
|
+
'shy' => 173,
|
368
|
+
'Sigma' => 931,
|
369
|
+
'sigma' => 963,
|
370
|
+
'sigmaf' => 962,
|
371
|
+
'sim' => 8764,
|
372
|
+
'spades' => 9824,
|
373
|
+
'sub' => 8834,
|
374
|
+
'sube' => 8838,
|
375
|
+
'sum' => 8721,
|
376
|
+
'sup' => 8835,
|
377
|
+
'sup1' => 185,
|
378
|
+
'sup2' => 178,
|
379
|
+
'sup3' => 179,
|
380
|
+
'supe' => 8839,
|
381
|
+
'szlig' => 223,
|
382
|
+
'Tau' => 932,
|
383
|
+
'tau' => 964,
|
384
|
+
'there4' => 8756,
|
385
|
+
'Theta' => 920,
|
386
|
+
'theta' => 952,
|
387
|
+
'thetasym' => 977,
|
388
|
+
'thinsp' => 8201,
|
389
|
+
'THORN' => 222,
|
390
|
+
'thorn' => 254,
|
391
|
+
'tilde' => 732,
|
392
|
+
'times' => 215,
|
393
|
+
'trade' => 8482,
|
394
|
+
'Uacute' => 218,
|
395
|
+
'uacute' => 250,
|
396
|
+
'uArr' => 8657,
|
397
|
+
'uarr' => 8593,
|
398
|
+
'Ucirc' => 219,
|
399
|
+
'ucirc' => 251,
|
400
|
+
'Ugrave' => 217,
|
401
|
+
'ugrave' => 249,
|
402
|
+
'uml' => 168,
|
403
|
+
'upsih' => 978,
|
404
|
+
'Upsilon' => 933,
|
405
|
+
'upsilon' => 965,
|
406
|
+
'Uuml' => 220,
|
407
|
+
'uuml' => 252,
|
408
|
+
'weierp' => 8472,
|
409
|
+
'Xi' => 926,
|
410
|
+
'xi' => 958,
|
411
|
+
'Yacute' => 221,
|
412
|
+
'yacute' => 253,
|
413
|
+
'yen' => 165,
|
414
|
+
'Yuml' => 376,
|
415
|
+
'yuml' => 255,
|
416
|
+
'Zeta' => 918,
|
417
|
+
'zeta' => 950,
|
418
|
+
'zwj' => 8205,
|
419
|
+
'zwnj' => 8204
|
420
|
+
}
|
421
|
+
end
|
422
|
+
end
|
423
|
+
end
|
424
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module Owasp
|
2
|
+
module Esapi
|
3
|
+
module Codec
|
4
|
+
class JavascriptCodec < BaseCodec
|
5
|
+
|
6
|
+
# Returns backslash encoded numeric format. Does not use backslash character escapes
|
7
|
+
# such as, \" or \' as these may cause parsing problems. For example, if a javascript
|
8
|
+
# attribute, such as onmouseover, contains a \" that will close the entire attribute and
|
9
|
+
# allow an attacker to inject another script attribute.
|
10
|
+
def encode_char(immune,input)
|
11
|
+
return input if immune.include?(input)
|
12
|
+
return input if hex(input).nil?
|
13
|
+
temp = hex(input)
|
14
|
+
if temp.hex < 256
|
15
|
+
return "\\x#{'00'[temp.size,2-temp.size]}#{temp.upcase}"
|
16
|
+
end
|
17
|
+
"\\u#{'0000'[temp.size,4-temp.size]}#{temp.upcase}"
|
18
|
+
end
|
19
|
+
|
20
|
+
# Returns the decoded version of the character starting at index, or
|
21
|
+
# null if no decoding is possible.
|
22
|
+
# See http://www.planetpdf.com/codecuts/pdfs/tutorial/jsspec.*pdf*
|
23
|
+
# Formats all are legal both upper/lower case:
|
24
|
+
# * \\a - special characters
|
25
|
+
# * \\xHH
|
26
|
+
# * \\uHHHH
|
27
|
+
# * \\OOO (1, 2, or 3 digits)
|
28
|
+
def decode_char(input)
|
29
|
+
|
30
|
+
input.mark
|
31
|
+
first = input.next
|
32
|
+
if first.nil?
|
33
|
+
input.reset
|
34
|
+
return nil
|
35
|
+
end
|
36
|
+
# check to see if we are dealing with an encoded char
|
37
|
+
if first!= "\\"
|
38
|
+
input.reset
|
39
|
+
return nil
|
40
|
+
end
|
41
|
+
second = input.next
|
42
|
+
if second.nil?
|
43
|
+
input.reset
|
44
|
+
return nil
|
45
|
+
end
|
46
|
+
|
47
|
+
#Check octal codes
|
48
|
+
return 0x08.chr if second == "b"
|
49
|
+
return 0x09.chr if second == "t"
|
50
|
+
return 0x0a.chr if second == "n"
|
51
|
+
return 0x0b.chr if second == "v"
|
52
|
+
return 0x0c.chr if second == "f"
|
53
|
+
return 0x0d.chr if second == "r"
|
54
|
+
return 0x22.chr if second == "\""
|
55
|
+
return 0x27.chr if second == "\'"
|
56
|
+
return 0x5c.chr if second == "\\"
|
57
|
+
if second.downcase == "x" # Hex encoded value
|
58
|
+
temp = ''
|
59
|
+
for i in 0..1 do
|
60
|
+
c = input.next_hex
|
61
|
+
temp << c unless c.nil?
|
62
|
+
if c.nil?
|
63
|
+
input.reset
|
64
|
+
return nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
i = temp.hex
|
68
|
+
begin
|
69
|
+
return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
|
70
|
+
rescue Exception => e
|
71
|
+
input.reset
|
72
|
+
return nil
|
73
|
+
end
|
74
|
+
elsif second.downcase == "u" # Unicode encoded value
|
75
|
+
temp = ''
|
76
|
+
for i in 0..3 do
|
77
|
+
c = input.next_hex
|
78
|
+
temp << c unless c.nil?
|
79
|
+
if c.nil?
|
80
|
+
input.reset
|
81
|
+
return nil
|
82
|
+
end
|
83
|
+
end
|
84
|
+
i = temp.hex
|
85
|
+
begin
|
86
|
+
return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
|
87
|
+
rescue Exception => e
|
88
|
+
input.reset
|
89
|
+
return nil
|
90
|
+
end
|
91
|
+
elsif input.octal?(second) # Octal encoded value
|
92
|
+
temp = second
|
93
|
+
c = input.next
|
94
|
+
unless input.octal?(c)
|
95
|
+
input.push(c)
|
96
|
+
else
|
97
|
+
temp << c
|
98
|
+
c = input.next
|
99
|
+
unless input.octal?(c)
|
100
|
+
input.push(c)
|
101
|
+
else
|
102
|
+
temp << c
|
103
|
+
end
|
104
|
+
end
|
105
|
+
# build a number
|
106
|
+
i = temp.to_i(8)
|
107
|
+
begin
|
108
|
+
return i.chr(Encoding::UTF_8) if i >= START_CODE_POINT and i <= END_CODE_POINT
|
109
|
+
rescue Exception => e
|
110
|
+
input.reset
|
111
|
+
return nil
|
112
|
+
end
|
113
|
+
end
|
114
|
+
second
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|