simpleidn 0.0.7 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENCE +2 -2
- data/README.rdoc +7 -7
- data/lib/simpleidn.rb +56 -83
- data/simpleidn.gemspec +2 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91419f6658f1aa0e2efd5f1774879994f822a3d7
|
4
|
+
data.tar.gz: 60663039b50c49baa983dc7d51179dae820d839d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1eca7fc205bdd66663334b91786029b645637d501acb8b61e8661b2298c751a2df39c4854b6f926f29dec505b5530f86c1159b7a3e0a6a8ffa4ef411fd59600d
|
7
|
+
data.tar.gz: 5ca7274d2115e9e3a65f992721df2ec748719754e2ceccce3e03b1f4328c0a795a8fa85f90010d208fbb6f87d119b7f1154ae3d92e099b09d28957e0599e9a68
|
data/LICENCE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License
|
2
2
|
|
3
|
-
Copyright (c) 2011-
|
3
|
+
Copyright (c) 2011-2017 Morten Møller Riis
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
-
THE SOFTWARE.
|
21
|
+
THE SOFTWARE.
|
data/README.rdoc
CHANGED
@@ -4,7 +4,7 @@ This gem allows easy conversion from punycode ACE strings to unicode UTF-8 strin
|
|
4
4
|
|
5
5
|
The implementation is heavily based on the RFC3492 C example implementation but simplified since it does not preserve case.
|
6
6
|
|
7
|
-
This gem works with Ruby 1.
|
7
|
+
This gem works with Ruby 1.9.2, 1.9.3, 2.0, 2.1, 2.2.
|
8
8
|
|
9
9
|
* http://www.whatastruggle.com
|
10
10
|
|
@@ -18,13 +18,13 @@ In your Ruby script you can now.
|
|
18
18
|
|
19
19
|
require 'rubygems'
|
20
20
|
require 'simpleidn'
|
21
|
-
|
21
|
+
|
22
22
|
SimpleIDN.to_unicode("xn--mllerriis-l8a.com")
|
23
|
-
=> "møllerriis.com"
|
24
|
-
|
23
|
+
=> "møllerriis.com"
|
24
|
+
|
25
25
|
SimpleIDN.to_ascii("møllerriis.com")
|
26
|
-
=> "xn--mllerriis-l8a.com"
|
27
|
-
|
26
|
+
=> "xn--mllerriis-l8a.com"
|
27
|
+
|
28
28
|
== Testing / RSpec
|
29
29
|
|
30
30
|
In order to run the test suite you must have <tt>rspec</tt> installed.
|
@@ -36,4 +36,4 @@ http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
|
|
36
36
|
|
37
37
|
Does not preserve uppercase. So if, for some reason, you use uppercase characters (eg. Ø instead of ø), please take note of that.
|
38
38
|
|
39
|
-
Please report any issues!
|
39
|
+
Please report any issues!
|
data/lib/simpleidn.rb
CHANGED
@@ -1,24 +1,5 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
if RUBY_VERSION =~ /^1\.8/
|
3
|
-
$KCODE = "UTF-8"
|
4
|
-
class String
|
5
|
-
def ord
|
6
|
-
self[0]
|
7
|
-
end
|
8
|
-
end
|
9
|
-
else
|
10
|
-
Encoding.default_internal = "UTF-8"
|
11
|
-
end
|
12
|
-
|
13
|
-
class Integer
|
14
|
-
def to_utf8_character
|
15
|
-
[self].pack("U*")
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
1
|
module SimpleIDN
|
20
|
-
|
21
|
-
VERSION = "0.0.7"
|
2
|
+
VERSION = "0.0.9"
|
22
3
|
|
23
4
|
# The ConversionError is raised when an error occurs during a
|
24
5
|
# Punycode <-> Unicode conversion.
|
@@ -26,7 +7,6 @@ module SimpleIDN
|
|
26
7
|
end
|
27
8
|
|
28
9
|
module Punycode
|
29
|
-
|
30
10
|
INITIAL_N = 0x80
|
31
11
|
INITIAL_BIAS = 72
|
32
12
|
DELIMITER = 0x2D
|
@@ -36,6 +16,9 @@ module SimpleIDN
|
|
36
16
|
TMAX = 26
|
37
17
|
SKEW = 38
|
38
18
|
MAXINT = 0x7FFFFFFF
|
19
|
+
ASCII_MAX = 0x7F
|
20
|
+
|
21
|
+
EMPTY = ''.encode(Encoding::UTF_8).freeze
|
39
22
|
|
40
23
|
module_function
|
41
24
|
|
@@ -46,14 +29,12 @@ module SimpleIDN
|
|
46
29
|
cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : cp - 97 < 26 ? cp - 97 : BASE
|
47
30
|
end
|
48
31
|
|
49
|
-
# encode_digit(d
|
32
|
+
# encode_digit(d) returns the basic code point whose value
|
50
33
|
# (when used for representing integers) is d, which needs to be in
|
51
|
-
# the range 0 to base-1.
|
52
|
-
# nonzero, in which case the uppercase form is used. The behavior
|
53
|
-
# is undefined if flag is nonzero and digit d has no uppercase form.
|
34
|
+
# the range 0 to base-1.
|
54
35
|
def encode_digit(d)
|
55
36
|
d + 22 + 75 * (d < 26 ? 1 : 0)
|
56
|
-
# 0..25 map to ASCII a..z
|
37
|
+
# 0..25 map to ASCII a..z
|
57
38
|
# 26..35 map to ASCII 0..9
|
58
39
|
end
|
59
40
|
|
@@ -63,24 +44,17 @@ module SimpleIDN
|
|
63
44
|
delta += (delta / numpoints)
|
64
45
|
|
65
46
|
k = 0
|
66
|
-
while delta > (((BASE - TMIN) * TMAX) / 2)
|
47
|
+
while delta > (((BASE - TMIN) * TMAX) / 2)
|
67
48
|
delta /= BASE - TMIN
|
68
49
|
k += BASE
|
69
50
|
end
|
70
|
-
|
71
|
-
end
|
72
|
-
|
73
|
-
# encode_basic(bcp,flag) forces a basic code point to lowercase if flag is zero,
|
74
|
-
# uppercase if flag is nonzero, and returns the resulting code point.
|
75
|
-
# The code point is unchanged if it is caseless.
|
76
|
-
# The behavior is undefined if bcp is not a basic code point.
|
77
|
-
def encode_basic(bcp, flag)
|
78
|
-
bcp -= (bcp - 97 < 26 ? 1 : 0) << 5
|
79
|
-
return bcp + ((!flag && (bcp - 65 < 26 ? 1 : 0)) << 5)
|
51
|
+
k + (BASE - TMIN + 1) * delta / (delta + SKEW)
|
80
52
|
end
|
81
53
|
|
82
54
|
# Main decode
|
83
55
|
def decode(input)
|
56
|
+
input_encoding = input.encoding
|
57
|
+
input = input.encode(Encoding::UTF_8).codepoints.to_a
|
84
58
|
output = []
|
85
59
|
|
86
60
|
# Initialize the state:
|
@@ -91,18 +65,18 @@ module SimpleIDN
|
|
91
65
|
# Handle the basic code points: Let basic be the number of input code
|
92
66
|
# points before the last delimiter, or 0 if there is none, then
|
93
67
|
# copy the first basic code points to the output.
|
94
|
-
basic = input.rindex(DELIMITER
|
68
|
+
basic = input.rindex(DELIMITER) || 0
|
95
69
|
|
96
|
-
input
|
97
|
-
raise(ConversionError, "Illegal input >= 0x80") if char
|
98
|
-
output << char
|
70
|
+
input[0, basic].each do |char|
|
71
|
+
raise(ConversionError, "Illegal input >= 0x80") if char > ASCII_MAX
|
72
|
+
output << char
|
99
73
|
end
|
100
74
|
|
101
75
|
# Main decoding loop: Start just after the last delimiter if any
|
102
76
|
# basic code points were copied; start at the beginning otherwise.
|
103
77
|
|
104
78
|
ic = basic > 0 ? basic + 1 : 0
|
105
|
-
while ic < input.length
|
79
|
+
while ic < input.length
|
106
80
|
# ic is the index of the next character to be consumed,
|
107
81
|
|
108
82
|
# Decode a generalized variable-length integer into delta,
|
@@ -112,10 +86,10 @@ module SimpleIDN
|
|
112
86
|
oldi = i
|
113
87
|
w = 1
|
114
88
|
k = BASE
|
115
|
-
|
89
|
+
loop do
|
116
90
|
raise(ConversionError, "punycode_bad_input(1)") if ic >= input.length
|
117
91
|
|
118
|
-
digit = decode_digit(input[ic]
|
92
|
+
digit = decode_digit(input[ic])
|
119
93
|
ic += 1
|
120
94
|
|
121
95
|
raise(ConversionError, "punycode_bad_input(2)") if digit >= BASE
|
@@ -142,16 +116,17 @@ module SimpleIDN
|
|
142
116
|
i %= out
|
143
117
|
|
144
118
|
# Insert n at position i of the output:
|
145
|
-
output.insert(i, n
|
119
|
+
output.insert(i, n)
|
146
120
|
i += 1
|
147
121
|
end
|
148
122
|
|
149
|
-
|
123
|
+
output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding)
|
150
124
|
end
|
151
125
|
|
152
126
|
# Main encode function
|
153
127
|
def encode(input)
|
154
|
-
|
128
|
+
input_encoding = input.encoding
|
129
|
+
input = input.encode(Encoding::UTF_8).codepoints.to_a
|
155
130
|
output = []
|
156
131
|
|
157
132
|
# Initialize the state:
|
@@ -160,9 +135,7 @@ module SimpleIDN
|
|
160
135
|
bias = INITIAL_BIAS
|
161
136
|
|
162
137
|
# Handle the basic code points:
|
163
|
-
output = input.select
|
164
|
-
char if char < 0x80
|
165
|
-
end
|
138
|
+
output = input.select { |char| char <= ASCII_MAX }
|
166
139
|
|
167
140
|
h = b = output.length
|
168
141
|
|
@@ -172,7 +145,7 @@ module SimpleIDN
|
|
172
145
|
output << DELIMITER if b > 0
|
173
146
|
|
174
147
|
# Main encoding loop:
|
175
|
-
while h < input.length
|
148
|
+
while h < input.length
|
176
149
|
# All non-basic code points < n have been
|
177
150
|
# handled already. Find the next larger one:
|
178
151
|
|
@@ -190,38 +163,42 @@ module SimpleIDN
|
|
190
163
|
delta += (m - n) * (h + 1)
|
191
164
|
n = m
|
192
165
|
|
193
|
-
input.each_with_index do |char,
|
166
|
+
input.each_with_index do |char, _|
|
194
167
|
if char < n
|
195
168
|
delta += 1
|
196
169
|
raise(ConversionError, "punycode_overflow(2)") if delta > MAXINT
|
197
170
|
end
|
198
171
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
output << encode_digit(q)
|
211
|
-
bias = adapt(delta, h + 1, h == b)
|
212
|
-
delta = 0
|
213
|
-
h += 1
|
172
|
+
next unless char == n
|
173
|
+
|
174
|
+
# Represent delta as a generalized variable-length integer:
|
175
|
+
q = delta
|
176
|
+
k = BASE
|
177
|
+
loop do
|
178
|
+
t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias
|
179
|
+
break if q < t
|
180
|
+
output << encode_digit(t + (q - t) % (BASE - t))
|
181
|
+
q = ((q - t) / (BASE - t)).floor
|
182
|
+
k += BASE
|
214
183
|
end
|
184
|
+
output << encode_digit(q)
|
185
|
+
bias = adapt(delta, h + 1, h == b)
|
186
|
+
delta = 0
|
187
|
+
h += 1
|
215
188
|
end
|
216
189
|
|
217
190
|
delta += 1
|
218
191
|
n += 1
|
219
192
|
end
|
220
|
-
|
193
|
+
output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding)
|
221
194
|
end
|
222
|
-
|
223
195
|
end
|
224
196
|
|
197
|
+
ACE_PREFIX = 'xn--'.encode(Encoding::UTF_8).freeze
|
198
|
+
ASCII_MAX = 0x7F
|
199
|
+
DOT = 0x2E.chr(Encoding::UTF_8).freeze
|
200
|
+
LABEL_SEPERATOR_RE = /[\u002e]/
|
201
|
+
|
225
202
|
module_function
|
226
203
|
|
227
204
|
# Converts a UTF-8 unicode string to a punycode ACE string.
|
@@ -229,16 +206,14 @@ module SimpleIDN
|
|
229
206
|
# SimpleIDN.to_ascii("møllerriis.com")
|
230
207
|
# => "xn--mllerriis-l8a.com"
|
231
208
|
def to_ascii(domain)
|
232
|
-
|
209
|
+
return nil if domain.nil?
|
210
|
+
domain_array = domain.encode(Encoding::UTF_8).split(LABEL_SEPERATOR_RE) rescue []
|
233
211
|
return domain if domain_array.length == 0
|
234
212
|
out = []
|
235
|
-
|
236
|
-
|
237
|
-
s = domain_array[i]
|
238
|
-
out << (s =~ /[^A-Z0-9@\-*_]/i ? "xn--" + Punycode.encode(s) : s)
|
239
|
-
i += 1
|
213
|
+
domain_array.each do |s|
|
214
|
+
out << (s.codepoints.any? { |cp| cp > ASCII_MAX } ? ACE_PREFIX + Punycode.encode(s) : s)
|
240
215
|
end
|
241
|
-
|
216
|
+
out.join(DOT).encode(domain.encoding)
|
242
217
|
end
|
243
218
|
|
244
219
|
# Converts a punycode ACE string to a UTF-8 unicode string.
|
@@ -246,15 +221,13 @@ module SimpleIDN
|
|
246
221
|
# SimpleIDN.to_unicode("xn--mllerriis-l8a.com")
|
247
222
|
# => "møllerriis.com"
|
248
223
|
def to_unicode(domain)
|
249
|
-
|
224
|
+
return nil if domain.nil?
|
225
|
+
domain_array = domain.encode(Encoding::UTF_8).split(LABEL_SEPERATOR_RE) rescue []
|
250
226
|
return domain if domain_array.length == 0
|
251
227
|
out = []
|
252
|
-
|
253
|
-
|
254
|
-
s = domain_array[i]
|
255
|
-
out << (s =~ /^xn\-\-/i ? Punycode.decode(s.gsub('xn--','')) : s)
|
256
|
-
i += 1
|
228
|
+
domain_array.each do |s|
|
229
|
+
out << (s.downcase.start_with?(ACE_PREFIX) ? Punycode.decode(s[ACE_PREFIX.length..-1]) : s)
|
257
230
|
end
|
258
|
-
|
231
|
+
out.join(DOT).encode(domain.encoding)
|
259
232
|
end
|
260
233
|
end
|
data/simpleidn.gemspec
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
# coding: utf-8
|
2
1
|
lib = File.expand_path('../lib', __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require 'simpleidn'
|
@@ -20,4 +19,6 @@ Gem::Specification.new do |spec|
|
|
20
19
|
spec.add_development_dependency "bundler", "~> 1.11"
|
21
20
|
spec.add_development_dependency "rake", "~> 10.0"
|
22
21
|
spec.add_development_dependency "rspec", "~> 3.0"
|
22
|
+
|
23
|
+
spec.required_ruby_version = '>1.9'
|
23
24
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simpleidn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Morten Møller Riis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -77,9 +77,9 @@ require_paths:
|
|
77
77
|
- lib
|
78
78
|
required_ruby_version: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - ">"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '1.9'
|
83
83
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
84
|
requirements:
|
85
85
|
- - ">="
|
@@ -87,7 +87,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
87
|
version: '0'
|
88
88
|
requirements: []
|
89
89
|
rubyforge_project:
|
90
|
-
rubygems_version: 2.
|
90
|
+
rubygems_version: 2.5.1
|
91
91
|
signing_key:
|
92
92
|
specification_version: 4
|
93
93
|
summary: Punycode ACE to unicode UTF-8 (and vice-versa) string conversion.
|