addressable 2.2.3 → 2.7.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of addressable might be problematic. Click here for more details.

@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ # encoding:utf-8
4
+ #--
5
+ # Copyright (C) Bob Aman
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #++
19
+
20
+
21
+ require "idn"
22
+
23
+ module Addressable
24
+ module IDNA
25
+ def self.punycode_encode(value)
26
+ IDN::Punycode.encode(value.to_s)
27
+ end
28
+
29
+ def self.punycode_decode(value)
30
+ IDN::Punycode.decode(value.to_s)
31
+ end
32
+
33
+ def self.unicode_normalize_kc(value)
34
+ IDN::Stringprep.nfkc_normalize(value.to_s)
35
+ end
36
+
37
+ def self.to_ascii(value)
38
+ value.to_s.split('.', -1).map do |segment|
39
+ if segment.size > 0 && segment.size < 64
40
+ IDN::Idna.toASCII(segment, IDN::Idna::ALLOW_UNASSIGNED)
41
+ elsif segment.size >= 64
42
+ segment
43
+ else
44
+ ''
45
+ end
46
+ end.join('.')
47
+ end
48
+
49
+ def self.to_unicode(value)
50
+ value.to_s.split('.', -1).map do |segment|
51
+ if segment.size > 0 && segment.size < 64
52
+ IDN::Idna.toUnicode(segment, IDN::Idna::ALLOW_UNASSIGNED)
53
+ elsif segment.size >= 64
54
+ segment
55
+ else
56
+ ''
57
+ end
58
+ end.join('.')
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,676 @@
1
+ # frozen_string_literal: true
2
+
3
+ # encoding:utf-8
4
+ #--
5
+ # Copyright (C) Bob Aman
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #++
19
+
20
+
21
+ module Addressable
22
+ module IDNA
23
+ # This module is loosely based on idn_actionmailer by Mick Staugaard,
24
+ # the unicode library by Yoshida Masato, and the punycode implementation
25
+ # by Kazuhiro Nishiyama. Most of the code was copied verbatim, but
26
+ # some reformatting was done, and some translation from C was done.
27
+ #
28
+ # Without their code to work from as a base, we'd all still be relying
29
+ # on the presence of libidn. Which nobody ever seems to have installed.
30
+ #
31
+ # Original sources:
32
+ # http://github.com/staugaard/idn_actionmailer
33
+ # http://www.yoshidam.net/Ruby.html#unicode
34
+ # http://rubyforge.org/frs/?group_id=2550
35
+
36
+
37
+ UNICODE_TABLE = File.expand_path(
38
+ File.join(File.dirname(__FILE__), '../../..', 'data/unicode.data')
39
+ )
40
+
41
+ ACE_PREFIX = "xn--"
42
+
43
+ UTF8_REGEX = /\A(?:
44
+ [\x09\x0A\x0D\x20-\x7E] # ASCII
45
+ | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
46
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
47
+ | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
48
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
49
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
50
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4nil5
51
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
52
+ )*\z/mnx
53
+
54
+ UTF8_REGEX_MULTIBYTE = /(?:
55
+ [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
56
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
57
+ | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
58
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
59
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
60
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4nil5
61
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
62
+ )/mnx
63
+
64
+ # :startdoc:
65
+
66
+ # Converts from a Unicode internationalized domain name to an ASCII
67
+ # domain name as described in RFC 3490.
68
+ def self.to_ascii(input)
69
+ input = input.to_s unless input.is_a?(String)
70
+ input = input.dup
71
+ if input.respond_to?(:force_encoding)
72
+ input.force_encoding(Encoding::ASCII_8BIT)
73
+ end
74
+ if input =~ UTF8_REGEX && input =~ UTF8_REGEX_MULTIBYTE
75
+ parts = unicode_downcase(input).split('.')
76
+ parts.map! do |part|
77
+ if part.respond_to?(:force_encoding)
78
+ part.force_encoding(Encoding::ASCII_8BIT)
79
+ end
80
+ if part =~ UTF8_REGEX && part =~ UTF8_REGEX_MULTIBYTE
81
+ ACE_PREFIX + punycode_encode(unicode_normalize_kc(part))
82
+ else
83
+ part
84
+ end
85
+ end
86
+ parts.join('.')
87
+ else
88
+ input
89
+ end
90
+ end
91
+
92
+ # Converts from an ASCII domain name to a Unicode internationalized
93
+ # domain name as described in RFC 3490.
94
+ def self.to_unicode(input)
95
+ input = input.to_s unless input.is_a?(String)
96
+ parts = input.split('.')
97
+ parts.map! do |part|
98
+ if part =~ /^#{ACE_PREFIX}(.+)/
99
+ begin
100
+ punycode_decode(part[/^#{ACE_PREFIX}(.+)/, 1])
101
+ rescue Addressable::IDNA::PunycodeBadInput
102
+ # toUnicode is explicitly defined as never-fails by the spec
103
+ part
104
+ end
105
+ else
106
+ part
107
+ end
108
+ end
109
+ output = parts.join('.')
110
+ if output.respond_to?(:force_encoding)
111
+ output.force_encoding(Encoding::UTF_8)
112
+ end
113
+ output
114
+ end
115
+
116
+ # Unicode normalization form KC.
117
+ def self.unicode_normalize_kc(input)
118
+ input = input.to_s unless input.is_a?(String)
119
+ unpacked = input.unpack("U*")
120
+ unpacked =
121
+ unicode_compose(unicode_sort_canonical(unicode_decompose(unpacked)))
122
+ return unpacked.pack("U*")
123
+ end
124
+
125
+ ##
126
+ # Unicode aware downcase method.
127
+ #
128
+ # @api private
129
+ # @param [String] input
130
+ # The input string.
131
+ # @return [String] The downcased result.
132
+ def self.unicode_downcase(input)
133
+ input = input.to_s unless input.is_a?(String)
134
+ unpacked = input.unpack("U*")
135
+ unpacked.map! { |codepoint| lookup_unicode_lowercase(codepoint) }
136
+ return unpacked.pack("U*")
137
+ end
138
+ private_class_method :unicode_downcase
139
+
140
+ def self.unicode_compose(unpacked)
141
+ unpacked_result = []
142
+ length = unpacked.length
143
+
144
+ return unpacked if length == 0
145
+
146
+ starter = unpacked[0]
147
+ starter_cc = lookup_unicode_combining_class(starter)
148
+ starter_cc = 256 if starter_cc != 0
149
+ for i in 1...length
150
+ ch = unpacked[i]
151
+
152
+ if (starter_cc == 0 &&
153
+ (composite = unicode_compose_pair(starter, ch)) != nil)
154
+ starter = composite
155
+ else
156
+ unpacked_result << starter
157
+ starter = ch
158
+ end
159
+ end
160
+ unpacked_result << starter
161
+ return unpacked_result
162
+ end
163
+ private_class_method :unicode_compose
164
+
165
+ def self.unicode_compose_pair(ch_one, ch_two)
166
+ if ch_one >= HANGUL_LBASE && ch_one < HANGUL_LBASE + HANGUL_LCOUNT &&
167
+ ch_two >= HANGUL_VBASE && ch_two < HANGUL_VBASE + HANGUL_VCOUNT
168
+ # Hangul L + V
169
+ return HANGUL_SBASE + (
170
+ (ch_one - HANGUL_LBASE) * HANGUL_VCOUNT + (ch_two - HANGUL_VBASE)
171
+ ) * HANGUL_TCOUNT
172
+ elsif ch_one >= HANGUL_SBASE &&
173
+ ch_one < HANGUL_SBASE + HANGUL_SCOUNT &&
174
+ (ch_one - HANGUL_SBASE) % HANGUL_TCOUNT == 0 &&
175
+ ch_two >= HANGUL_TBASE && ch_two < HANGUL_TBASE + HANGUL_TCOUNT
176
+ # Hangul LV + T
177
+ return ch_one + (ch_two - HANGUL_TBASE)
178
+ end
179
+
180
+ p = []
181
+ ucs4_to_utf8 = lambda do |ch|
182
+ if ch < 128
183
+ p << ch
184
+ elsif ch < 2048
185
+ p << (ch >> 6 | 192)
186
+ p << (ch & 63 | 128)
187
+ elsif ch < 0x10000
188
+ p << (ch >> 12 | 224)
189
+ p << (ch >> 6 & 63 | 128)
190
+ p << (ch & 63 | 128)
191
+ elsif ch < 0x200000
192
+ p << (ch >> 18 | 240)
193
+ p << (ch >> 12 & 63 | 128)
194
+ p << (ch >> 6 & 63 | 128)
195
+ p << (ch & 63 | 128)
196
+ elsif ch < 0x4000000
197
+ p << (ch >> 24 | 248)
198
+ p << (ch >> 18 & 63 | 128)
199
+ p << (ch >> 12 & 63 | 128)
200
+ p << (ch >> 6 & 63 | 128)
201
+ p << (ch & 63 | 128)
202
+ elsif ch < 0x80000000
203
+ p << (ch >> 30 | 252)
204
+ p << (ch >> 24 & 63 | 128)
205
+ p << (ch >> 18 & 63 | 128)
206
+ p << (ch >> 12 & 63 | 128)
207
+ p << (ch >> 6 & 63 | 128)
208
+ p << (ch & 63 | 128)
209
+ end
210
+ end
211
+
212
+ ucs4_to_utf8.call(ch_one)
213
+ ucs4_to_utf8.call(ch_two)
214
+
215
+ return lookup_unicode_composition(p)
216
+ end
217
+ private_class_method :unicode_compose_pair
218
+
219
+ def self.unicode_sort_canonical(unpacked)
220
+ unpacked = unpacked.dup
221
+ i = 1
222
+ length = unpacked.length
223
+
224
+ return unpacked if length < 2
225
+
226
+ while i < length
227
+ last = unpacked[i-1]
228
+ ch = unpacked[i]
229
+ last_cc = lookup_unicode_combining_class(last)
230
+ cc = lookup_unicode_combining_class(ch)
231
+ if cc != 0 && last_cc != 0 && last_cc > cc
232
+ unpacked[i] = last
233
+ unpacked[i-1] = ch
234
+ i -= 1 if i > 1
235
+ else
236
+ i += 1
237
+ end
238
+ end
239
+ return unpacked
240
+ end
241
+ private_class_method :unicode_sort_canonical
242
+
243
+ def self.unicode_decompose(unpacked)
244
+ unpacked_result = []
245
+ for cp in unpacked
246
+ if cp >= HANGUL_SBASE && cp < HANGUL_SBASE + HANGUL_SCOUNT
247
+ l, v, t = unicode_decompose_hangul(cp)
248
+ unpacked_result << l
249
+ unpacked_result << v if v
250
+ unpacked_result << t if t
251
+ else
252
+ dc = lookup_unicode_compatibility(cp)
253
+ unless dc
254
+ unpacked_result << cp
255
+ else
256
+ unpacked_result.concat(unicode_decompose(dc.unpack("U*")))
257
+ end
258
+ end
259
+ end
260
+ return unpacked_result
261
+ end
262
+ private_class_method :unicode_decompose
263
+
264
+ def self.unicode_decompose_hangul(codepoint)
265
+ sindex = codepoint - HANGUL_SBASE;
266
+ if sindex < 0 || sindex >= HANGUL_SCOUNT
267
+ l = codepoint
268
+ v = t = nil
269
+ return l, v, t
270
+ end
271
+ l = HANGUL_LBASE + sindex / HANGUL_NCOUNT
272
+ v = HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
273
+ t = HANGUL_TBASE + sindex % HANGUL_TCOUNT
274
+ if t == HANGUL_TBASE
275
+ t = nil
276
+ end
277
+ return l, v, t
278
+ end
279
+ private_class_method :unicode_decompose_hangul
280
+
281
+ def self.lookup_unicode_combining_class(codepoint)
282
+ codepoint_data = UNICODE_DATA[codepoint]
283
+ (codepoint_data ?
284
+ (codepoint_data[UNICODE_DATA_COMBINING_CLASS] || 0) :
285
+ 0)
286
+ end
287
+ private_class_method :lookup_unicode_combining_class
288
+
289
+ def self.lookup_unicode_compatibility(codepoint)
290
+ codepoint_data = UNICODE_DATA[codepoint]
291
+ (codepoint_data ?
292
+ codepoint_data[UNICODE_DATA_COMPATIBILITY] : nil)
293
+ end
294
+ private_class_method :lookup_unicode_compatibility
295
+
296
+ def self.lookup_unicode_lowercase(codepoint)
297
+ codepoint_data = UNICODE_DATA[codepoint]
298
+ (codepoint_data ?
299
+ (codepoint_data[UNICODE_DATA_LOWERCASE] || codepoint) :
300
+ codepoint)
301
+ end
302
+ private_class_method :lookup_unicode_lowercase
303
+
304
+ def self.lookup_unicode_composition(unpacked)
305
+ return COMPOSITION_TABLE[unpacked]
306
+ end
307
+ private_class_method :lookup_unicode_composition
308
+
309
+ HANGUL_SBASE = 0xac00
310
+ HANGUL_LBASE = 0x1100
311
+ HANGUL_LCOUNT = 19
312
+ HANGUL_VBASE = 0x1161
313
+ HANGUL_VCOUNT = 21
314
+ HANGUL_TBASE = 0x11a7
315
+ HANGUL_TCOUNT = 28
316
+ HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT # 588
317
+ HANGUL_SCOUNT = HANGUL_LCOUNT * HANGUL_NCOUNT # 11172
318
+
319
+ UNICODE_DATA_COMBINING_CLASS = 0
320
+ UNICODE_DATA_EXCLUSION = 1
321
+ UNICODE_DATA_CANONICAL = 2
322
+ UNICODE_DATA_COMPATIBILITY = 3
323
+ UNICODE_DATA_UPPERCASE = 4
324
+ UNICODE_DATA_LOWERCASE = 5
325
+ UNICODE_DATA_TITLECASE = 6
326
+
327
+ begin
328
+ if defined?(FakeFS)
329
+ fakefs_state = FakeFS.activated?
330
+ FakeFS.deactivate!
331
+ end
332
+ # This is a sparse Unicode table. Codepoints without entries are
333
+ # assumed to have the value: [0, 0, nil, nil, nil, nil, nil]
334
+ UNICODE_DATA = File.open(UNICODE_TABLE, "rb") do |file|
335
+ Marshal.load(file.read)
336
+ end
337
+ ensure
338
+ if defined?(FakeFS)
339
+ FakeFS.activate! if fakefs_state
340
+ end
341
+ end
342
+
343
+ COMPOSITION_TABLE = {}
344
+ UNICODE_DATA.each do |codepoint, data|
345
+ canonical = data[UNICODE_DATA_CANONICAL]
346
+ exclusion = data[UNICODE_DATA_EXCLUSION]
347
+
348
+ if canonical && exclusion == 0
349
+ COMPOSITION_TABLE[canonical.unpack("C*")] = codepoint
350
+ end
351
+ end
352
+
353
+ UNICODE_MAX_LENGTH = 256
354
+ ACE_MAX_LENGTH = 256
355
+
356
+ PUNYCODE_BASE = 36
357
+ PUNYCODE_TMIN = 1
358
+ PUNYCODE_TMAX = 26
359
+ PUNYCODE_SKEW = 38
360
+ PUNYCODE_DAMP = 700
361
+ PUNYCODE_INITIAL_BIAS = 72
362
+ PUNYCODE_INITIAL_N = 0x80
363
+ PUNYCODE_DELIMITER = 0x2D
364
+
365
+ PUNYCODE_MAXINT = 1 << 64
366
+
367
+ PUNYCODE_PRINT_ASCII =
368
+ "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" +
369
+ "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" +
370
+ " !\"\#$%&'()*+,-./" +
371
+ "0123456789:;<=>?" +
372
+ "@ABCDEFGHIJKLMNO" +
373
+ "PQRSTUVWXYZ[\\]^_" +
374
+ "`abcdefghijklmno" +
375
+ "pqrstuvwxyz{|}~\n"
376
+
377
+ # Input is invalid.
378
+ class PunycodeBadInput < StandardError; end
379
+ # Output would exceed the space provided.
380
+ class PunycodeBigOutput < StandardError; end
381
+ # Input needs wider integers to process.
382
+ class PunycodeOverflow < StandardError; end
383
+
384
+ def self.punycode_encode(unicode)
385
+ unicode = unicode.to_s unless unicode.is_a?(String)
386
+ input = unicode.unpack("U*")
387
+ output = [0] * (ACE_MAX_LENGTH + 1)
388
+ input_length = input.size
389
+ output_length = [ACE_MAX_LENGTH]
390
+
391
+ # Initialize the state
392
+ n = PUNYCODE_INITIAL_N
393
+ delta = out = 0
394
+ max_out = output_length[0]
395
+ bias = PUNYCODE_INITIAL_BIAS
396
+
397
+ # Handle the basic code points:
398
+ input_length.times do |j|
399
+ if punycode_basic?(input[j])
400
+ if max_out - out < 2
401
+ raise PunycodeBigOutput,
402
+ "Output would exceed the space provided."
403
+ end
404
+ output[out] = input[j]
405
+ out += 1
406
+ end
407
+ end
408
+
409
+ h = b = out
410
+
411
+ # h is the number of code points that have been handled, b is the
412
+ # number of basic code points, and out is the number of characters
413
+ # that have been output.
414
+
415
+ if b > 0
416
+ output[out] = PUNYCODE_DELIMITER
417
+ out += 1
418
+ end
419
+
420
+ # Main encoding loop:
421
+
422
+ while h < input_length
423
+ # All non-basic code points < n have been
424
+ # handled already. Find the next larger one:
425
+
426
+ m = PUNYCODE_MAXINT
427
+ input_length.times do |j|
428
+ m = input[j] if (n...m) === input[j]
429
+ end
430
+
431
+ # Increase delta enough to advance the decoder's
432
+ # <n,i> state to <m,0>, but guard against overflow:
433
+
434
+ if m - n > (PUNYCODE_MAXINT - delta) / (h + 1)
435
+ raise PunycodeOverflow, "Input needs wider integers to process."
436
+ end
437
+ delta += (m - n) * (h + 1)
438
+ n = m
439
+
440
+ input_length.times do |j|
441
+ # Punycode does not need to check whether input[j] is basic:
442
+ if input[j] < n
443
+ delta += 1
444
+ if delta == 0
445
+ raise PunycodeOverflow,
446
+ "Input needs wider integers to process."
447
+ end
448
+ end
449
+
450
+ if input[j] == n
451
+ # Represent delta as a generalized variable-length integer:
452
+
453
+ q = delta; k = PUNYCODE_BASE
454
+ while true
455
+ if out >= max_out
456
+ raise PunycodeBigOutput,
457
+ "Output would exceed the space provided."
458
+ end
459
+ t = (
460
+ if k <= bias
461
+ PUNYCODE_TMIN
462
+ elsif k >= bias + PUNYCODE_TMAX
463
+ PUNYCODE_TMAX
464
+ else
465
+ k - bias
466
+ end
467
+ )
468
+ break if q < t
469
+ output[out] =
470
+ punycode_encode_digit(t + (q - t) % (PUNYCODE_BASE - t))
471
+ out += 1
472
+ q = (q - t) / (PUNYCODE_BASE - t)
473
+ k += PUNYCODE_BASE
474
+ end
475
+
476
+ output[out] = punycode_encode_digit(q)
477
+ out += 1
478
+ bias = punycode_adapt(delta, h + 1, h == b)
479
+ delta = 0
480
+ h += 1
481
+ end
482
+ end
483
+
484
+ delta += 1
485
+ n += 1
486
+ end
487
+
488
+ output_length[0] = out
489
+
490
+ outlen = out
491
+ outlen.times do |j|
492
+ c = output[j]
493
+ unless c >= 0 && c <= 127
494
+ raise StandardError, "Invalid output char."
495
+ end
496
+ unless PUNYCODE_PRINT_ASCII[c]
497
+ raise PunycodeBadInput, "Input is invalid."
498
+ end
499
+ end
500
+
501
+ output[0..outlen].map { |x| x.chr }.join("").sub(/\0+\z/, "")
502
+ end
503
+ private_class_method :punycode_encode
504
+
505
+ def self.punycode_decode(punycode)
506
+ input = []
507
+ output = []
508
+
509
+ if ACE_MAX_LENGTH * 2 < punycode.size
510
+ raise PunycodeBigOutput, "Output would exceed the space provided."
511
+ end
512
+ punycode.each_byte do |c|
513
+ unless c >= 0 && c <= 127
514
+ raise PunycodeBadInput, "Input is invalid."
515
+ end
516
+ input.push(c)
517
+ end
518
+
519
+ input_length = input.length
520
+ output_length = [UNICODE_MAX_LENGTH]
521
+
522
+ # Initialize the state
523
+ n = PUNYCODE_INITIAL_N
524
+
525
+ out = i = 0
526
+ max_out = output_length[0]
527
+ bias = PUNYCODE_INITIAL_BIAS
528
+
529
+ # Handle the basic code points: Let b be the number of input code
530
+ # points before the last delimiter, or 0 if there is none, then
531
+ # copy the first b code points to the output.
532
+
533
+ b = 0
534
+ input_length.times do |j|
535
+ b = j if punycode_delimiter?(input[j])
536
+ end
537
+ if b > max_out
538
+ raise PunycodeBigOutput, "Output would exceed the space provided."
539
+ end
540
+
541
+ b.times do |j|
542
+ unless punycode_basic?(input[j])
543
+ raise PunycodeBadInput, "Input is invalid."
544
+ end
545
+ output[out] = input[j]
546
+ out+=1
547
+ end
548
+
549
+ # Main decoding loop: Start just after the last delimiter if any
550
+ # basic code points were copied; start at the beginning otherwise.
551
+
552
+ in_ = b > 0 ? b + 1 : 0
553
+ while in_ < input_length
554
+
555
+ # in_ is the index of the next character to be consumed, and
556
+ # out is the number of code points in the output array.
557
+
558
+ # Decode a generalized variable-length integer into delta,
559
+ # which gets added to i. The overflow checking is easier
560
+ # if we increase i as we go, then subtract off its starting
561
+ # value at the end to obtain delta.
562
+
563
+ oldi = i; w = 1; k = PUNYCODE_BASE
564
+ while true
565
+ if in_ >= input_length
566
+ raise PunycodeBadInput, "Input is invalid."
567
+ end
568
+ digit = punycode_decode_digit(input[in_])
569
+ in_+=1
570
+ if digit >= PUNYCODE_BASE
571
+ raise PunycodeBadInput, "Input is invalid."
572
+ end
573
+ if digit > (PUNYCODE_MAXINT - i) / w
574
+ raise PunycodeOverflow, "Input needs wider integers to process."
575
+ end
576
+ i += digit * w
577
+ t = (
578
+ if k <= bias
579
+ PUNYCODE_TMIN
580
+ elsif k >= bias + PUNYCODE_TMAX
581
+ PUNYCODE_TMAX
582
+ else
583
+ k - bias
584
+ end
585
+ )
586
+ break if digit < t
587
+ if w > PUNYCODE_MAXINT / (PUNYCODE_BASE - t)
588
+ raise PunycodeOverflow, "Input needs wider integers to process."
589
+ end
590
+ w *= PUNYCODE_BASE - t
591
+ k += PUNYCODE_BASE
592
+ end
593
+
594
+ bias = punycode_adapt(i - oldi, out + 1, oldi == 0)
595
+
596
+ # I was supposed to wrap around from out + 1 to 0,
597
+ # incrementing n each time, so we'll fix that now:
598
+
599
+ if i / (out + 1) > PUNYCODE_MAXINT - n
600
+ raise PunycodeOverflow, "Input needs wider integers to process."
601
+ end
602
+ n += i / (out + 1)
603
+ i %= out + 1
604
+
605
+ # Insert n at position i of the output:
606
+
607
+ # not needed for Punycode:
608
+ # raise PUNYCODE_INVALID_INPUT if decode_digit(n) <= base
609
+ if out >= max_out
610
+ raise PunycodeBigOutput, "Output would exceed the space provided."
611
+ end
612
+
613
+ #memmove(output + i + 1, output + i, (out - i) * sizeof *output)
614
+ output[i + 1, out - i] = output[i, out - i]
615
+ output[i] = n
616
+ i += 1
617
+
618
+ out += 1
619
+ end
620
+
621
+ output_length[0] = out
622
+
623
+ output.pack("U*")
624
+ end
625
+ private_class_method :punycode_decode
626
+
627
+ def self.punycode_basic?(codepoint)
628
+ codepoint < 0x80
629
+ end
630
+ private_class_method :punycode_basic?
631
+
632
+ def self.punycode_delimiter?(codepoint)
633
+ codepoint == PUNYCODE_DELIMITER
634
+ end
635
+ private_class_method :punycode_delimiter?
636
+
637
+ def self.punycode_encode_digit(d)
638
+ d + 22 + 75 * ((d < 26) ? 1 : 0)
639
+ end
640
+ private_class_method :punycode_encode_digit
641
+
642
+ # Returns the numeric value of a basic codepoint
643
+ # (for use in representing integers) in the range 0 to
644
+ # base - 1, or PUNYCODE_BASE if codepoint does not represent a value.
645
+ def self.punycode_decode_digit(codepoint)
646
+ if codepoint - 48 < 10
647
+ codepoint - 22
648
+ elsif codepoint - 65 < 26
649
+ codepoint - 65
650
+ elsif codepoint - 97 < 26
651
+ codepoint - 97
652
+ else
653
+ PUNYCODE_BASE
654
+ end
655
+ end
656
+ private_class_method :punycode_decode_digit
657
+
658
+ # Bias adaptation method
659
+ def self.punycode_adapt(delta, numpoints, firsttime)
660
+ delta = firsttime ? delta / PUNYCODE_DAMP : delta >> 1
661
+ # delta >> 1 is a faster way of doing delta / 2
662
+ delta += delta / numpoints
663
+ difference = PUNYCODE_BASE - PUNYCODE_TMIN
664
+
665
+ k = 0
666
+ while delta > (difference * PUNYCODE_TMAX) / 2
667
+ delta /= difference
668
+ k += PUNYCODE_BASE
669
+ end
670
+
671
+ k + (difference + 1) * delta / (delta + PUNYCODE_SKEW)
672
+ end
673
+ private_class_method :punycode_adapt
674
+ end
675
+ # :startdoc:
676
+ end