simpleidn 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENCE +2 -2
- data/README.rdoc +7 -7
- data/lib/simpleidn.rb +56 -83
- data/simpleidn.gemspec +2 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91419f6658f1aa0e2efd5f1774879994f822a3d7
|
4
|
+
data.tar.gz: 60663039b50c49baa983dc7d51179dae820d839d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1eca7fc205bdd66663334b91786029b645637d501acb8b61e8661b2298c751a2df39c4854b6f926f29dec505b5530f86c1159b7a3e0a6a8ffa4ef411fd59600d
|
7
|
+
data.tar.gz: 5ca7274d2115e9e3a65f992721df2ec748719754e2ceccce3e03b1f4328c0a795a8fa85f90010d208fbb6f87d119b7f1154ae3d92e099b09d28957e0599e9a68
|
data/LICENCE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License
|
2
2
|
|
3
|
-
Copyright (c) 2011-
|
3
|
+
Copyright (c) 2011-2017 Morten Møller Riis
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
-
THE SOFTWARE.
|
21
|
+
THE SOFTWARE.
|
data/README.rdoc
CHANGED
@@ -4,7 +4,7 @@ This gem allows easy conversion from punycode ACE strings to unicode UTF-8 strin
|
|
4
4
|
|
5
5
|
The implementation is heavily based on the RFC3492 C example implementation but simplified since it does not preserve case.
|
6
6
|
|
7
|
-
This gem works with Ruby 1.
|
7
|
+
This gem works with Ruby 1.9.2, 1.9.3, 2.0, 2.1, 2.2.
|
8
8
|
|
9
9
|
* http://www.whatastruggle.com
|
10
10
|
|
@@ -18,13 +18,13 @@ In your Ruby script you can now.
|
|
18
18
|
|
19
19
|
require 'rubygems'
|
20
20
|
require 'simpleidn'
|
21
|
-
|
21
|
+
|
22
22
|
SimpleIDN.to_unicode("xn--mllerriis-l8a.com")
|
23
|
-
=> "møllerriis.com"
|
24
|
-
|
23
|
+
=> "møllerriis.com"
|
24
|
+
|
25
25
|
SimpleIDN.to_ascii("møllerriis.com")
|
26
|
-
=> "xn--mllerriis-l8a.com"
|
27
|
-
|
26
|
+
=> "xn--mllerriis-l8a.com"
|
27
|
+
|
28
28
|
== Testing / RSpec
|
29
29
|
|
30
30
|
In order to run the test suite you must have <tt>rspec</tt> installed.
|
@@ -36,4 +36,4 @@ http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
|
|
36
36
|
|
37
37
|
Does not preserve uppercase. So if, for some reason, you use uppercase characters (eg. Ø instead of ø), please take note of that.
|
38
38
|
|
39
|
-
Please report any issues!
|
39
|
+
Please report any issues!
|
data/lib/simpleidn.rb
CHANGED
@@ -1,24 +1,5 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
if RUBY_VERSION =~ /^1\.8/
|
3
|
-
$KCODE = "UTF-8"
|
4
|
-
class String
|
5
|
-
def ord
|
6
|
-
self[0]
|
7
|
-
end
|
8
|
-
end
|
9
|
-
else
|
10
|
-
Encoding.default_internal = "UTF-8"
|
11
|
-
end
|
12
|
-
|
13
|
-
class Integer
|
14
|
-
def to_utf8_character
|
15
|
-
[self].pack("U*")
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
1
|
module SimpleIDN
|
20
|
-
|
21
|
-
VERSION = "0.0.7"
|
2
|
+
VERSION = "0.0.9"
|
22
3
|
|
23
4
|
# The ConversionError is raised when an error occurs during a
|
24
5
|
# Punycode <-> Unicode conversion.
|
@@ -26,7 +7,6 @@ module SimpleIDN
|
|
26
7
|
end
|
27
8
|
|
28
9
|
module Punycode
|
29
|
-
|
30
10
|
INITIAL_N = 0x80
|
31
11
|
INITIAL_BIAS = 72
|
32
12
|
DELIMITER = 0x2D
|
@@ -36,6 +16,9 @@ module SimpleIDN
|
|
36
16
|
TMAX = 26
|
37
17
|
SKEW = 38
|
38
18
|
MAXINT = 0x7FFFFFFF
|
19
|
+
ASCII_MAX = 0x7F
|
20
|
+
|
21
|
+
EMPTY = ''.encode(Encoding::UTF_8).freeze
|
39
22
|
|
40
23
|
module_function
|
41
24
|
|
@@ -46,14 +29,12 @@ module SimpleIDN
|
|
46
29
|
cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : cp - 97 < 26 ? cp - 97 : BASE
|
47
30
|
end
|
48
31
|
|
49
|
-
# encode_digit(d
|
32
|
+
# encode_digit(d) returns the basic code point whose value
|
50
33
|
# (when used for representing integers) is d, which needs to be in
|
51
|
-
# the range 0 to base-1.
|
52
|
-
# nonzero, in which case the uppercase form is used. The behavior
|
53
|
-
# is undefined if flag is nonzero and digit d has no uppercase form.
|
34
|
+
# the range 0 to base-1.
|
54
35
|
def encode_digit(d)
|
55
36
|
d + 22 + 75 * (d < 26 ? 1 : 0)
|
56
|
-
# 0..25 map to ASCII a..z
|
37
|
+
# 0..25 map to ASCII a..z
|
57
38
|
# 26..35 map to ASCII 0..9
|
58
39
|
end
|
59
40
|
|
@@ -63,24 +44,17 @@ module SimpleIDN
|
|
63
44
|
delta += (delta / numpoints)
|
64
45
|
|
65
46
|
k = 0
|
66
|
-
while delta > (((BASE - TMIN) * TMAX) / 2)
|
47
|
+
while delta > (((BASE - TMIN) * TMAX) / 2)
|
67
48
|
delta /= BASE - TMIN
|
68
49
|
k += BASE
|
69
50
|
end
|
70
|
-
|
71
|
-
end
|
72
|
-
|
73
|
-
# encode_basic(bcp,flag) forces a basic code point to lowercase if flag is zero,
|
74
|
-
# uppercase if flag is nonzero, and returns the resulting code point.
|
75
|
-
# The code point is unchanged if it is caseless.
|
76
|
-
# The behavior is undefined if bcp is not a basic code point.
|
77
|
-
def encode_basic(bcp, flag)
|
78
|
-
bcp -= (bcp - 97 < 26 ? 1 : 0) << 5
|
79
|
-
return bcp + ((!flag && (bcp - 65 < 26 ? 1 : 0)) << 5)
|
51
|
+
k + (BASE - TMIN + 1) * delta / (delta + SKEW)
|
80
52
|
end
|
81
53
|
|
82
54
|
# Main decode
|
83
55
|
def decode(input)
|
56
|
+
input_encoding = input.encoding
|
57
|
+
input = input.encode(Encoding::UTF_8).codepoints.to_a
|
84
58
|
output = []
|
85
59
|
|
86
60
|
# Initialize the state:
|
@@ -91,18 +65,18 @@ module SimpleIDN
|
|
91
65
|
# Handle the basic code points: Let basic be the number of input code
|
92
66
|
# points before the last delimiter, or 0 if there is none, then
|
93
67
|
# copy the first basic code points to the output.
|
94
|
-
basic = input.rindex(DELIMITER
|
68
|
+
basic = input.rindex(DELIMITER) || 0
|
95
69
|
|
96
|
-
input
|
97
|
-
raise(ConversionError, "Illegal input >= 0x80") if char
|
98
|
-
output << char
|
70
|
+
input[0, basic].each do |char|
|
71
|
+
raise(ConversionError, "Illegal input >= 0x80") if char > ASCII_MAX
|
72
|
+
output << char
|
99
73
|
end
|
100
74
|
|
101
75
|
# Main decoding loop: Start just after the last delimiter if any
|
102
76
|
# basic code points were copied; start at the beginning otherwise.
|
103
77
|
|
104
78
|
ic = basic > 0 ? basic + 1 : 0
|
105
|
-
while ic < input.length
|
79
|
+
while ic < input.length
|
106
80
|
# ic is the index of the next character to be consumed,
|
107
81
|
|
108
82
|
# Decode a generalized variable-length integer into delta,
|
@@ -112,10 +86,10 @@ module SimpleIDN
|
|
112
86
|
oldi = i
|
113
87
|
w = 1
|
114
88
|
k = BASE
|
115
|
-
|
89
|
+
loop do
|
116
90
|
raise(ConversionError, "punycode_bad_input(1)") if ic >= input.length
|
117
91
|
|
118
|
-
digit = decode_digit(input[ic]
|
92
|
+
digit = decode_digit(input[ic])
|
119
93
|
ic += 1
|
120
94
|
|
121
95
|
raise(ConversionError, "punycode_bad_input(2)") if digit >= BASE
|
@@ -142,16 +116,17 @@ module SimpleIDN
|
|
142
116
|
i %= out
|
143
117
|
|
144
118
|
# Insert n at position i of the output:
|
145
|
-
output.insert(i, n
|
119
|
+
output.insert(i, n)
|
146
120
|
i += 1
|
147
121
|
end
|
148
122
|
|
149
|
-
|
123
|
+
output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding)
|
150
124
|
end
|
151
125
|
|
152
126
|
# Main encode function
|
153
127
|
def encode(input)
|
154
|
-
|
128
|
+
input_encoding = input.encoding
|
129
|
+
input = input.encode(Encoding::UTF_8).codepoints.to_a
|
155
130
|
output = []
|
156
131
|
|
157
132
|
# Initialize the state:
|
@@ -160,9 +135,7 @@ module SimpleIDN
|
|
160
135
|
bias = INITIAL_BIAS
|
161
136
|
|
162
137
|
# Handle the basic code points:
|
163
|
-
output = input.select
|
164
|
-
char if char < 0x80
|
165
|
-
end
|
138
|
+
output = input.select { |char| char <= ASCII_MAX }
|
166
139
|
|
167
140
|
h = b = output.length
|
168
141
|
|
@@ -172,7 +145,7 @@ module SimpleIDN
|
|
172
145
|
output << DELIMITER if b > 0
|
173
146
|
|
174
147
|
# Main encoding loop:
|
175
|
-
while h < input.length
|
148
|
+
while h < input.length
|
176
149
|
# All non-basic code points < n have been
|
177
150
|
# handled already. Find the next larger one:
|
178
151
|
|
@@ -190,38 +163,42 @@ module SimpleIDN
|
|
190
163
|
delta += (m - n) * (h + 1)
|
191
164
|
n = m
|
192
165
|
|
193
|
-
input.each_with_index do |char,
|
166
|
+
input.each_with_index do |char, _|
|
194
167
|
if char < n
|
195
168
|
delta += 1
|
196
169
|
raise(ConversionError, "punycode_overflow(2)") if delta > MAXINT
|
197
170
|
end
|
198
171
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
output << encode_digit(q)
|
211
|
-
bias = adapt(delta, h + 1, h == b)
|
212
|
-
delta = 0
|
213
|
-
h += 1
|
172
|
+
next unless char == n
|
173
|
+
|
174
|
+
# Represent delta as a generalized variable-length integer:
|
175
|
+
q = delta
|
176
|
+
k = BASE
|
177
|
+
loop do
|
178
|
+
t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias
|
179
|
+
break if q < t
|
180
|
+
output << encode_digit(t + (q - t) % (BASE - t))
|
181
|
+
q = ((q - t) / (BASE - t)).floor
|
182
|
+
k += BASE
|
214
183
|
end
|
184
|
+
output << encode_digit(q)
|
185
|
+
bias = adapt(delta, h + 1, h == b)
|
186
|
+
delta = 0
|
187
|
+
h += 1
|
215
188
|
end
|
216
189
|
|
217
190
|
delta += 1
|
218
191
|
n += 1
|
219
192
|
end
|
220
|
-
|
193
|
+
output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding)
|
221
194
|
end
|
222
|
-
|
223
195
|
end
|
224
196
|
|
197
|
+
ACE_PREFIX = 'xn--'.encode(Encoding::UTF_8).freeze
|
198
|
+
ASCII_MAX = 0x7F
|
199
|
+
DOT = 0x2E.chr(Encoding::UTF_8).freeze
|
200
|
+
LABEL_SEPERATOR_RE = /[\u002e]/
|
201
|
+
|
225
202
|
module_function
|
226
203
|
|
227
204
|
# Converts a UTF-8 unicode string to a punycode ACE string.
|
@@ -229,16 +206,14 @@ module SimpleIDN
|
|
229
206
|
# SimpleIDN.to_ascii("møllerriis.com")
|
230
207
|
# => "xn--mllerriis-l8a.com"
|
231
208
|
def to_ascii(domain)
|
232
|
-
|
209
|
+
return nil if domain.nil?
|
210
|
+
domain_array = domain.encode(Encoding::UTF_8).split(LABEL_SEPERATOR_RE) rescue []
|
233
211
|
return domain if domain_array.length == 0
|
234
212
|
out = []
|
235
|
-
|
236
|
-
|
237
|
-
s = domain_array[i]
|
238
|
-
out << (s =~ /[^A-Z0-9@\-*_]/i ? "xn--" + Punycode.encode(s) : s)
|
239
|
-
i += 1
|
213
|
+
domain_array.each do |s|
|
214
|
+
out << (s.codepoints.any? { |cp| cp > ASCII_MAX } ? ACE_PREFIX + Punycode.encode(s) : s)
|
240
215
|
end
|
241
|
-
|
216
|
+
out.join(DOT).encode(domain.encoding)
|
242
217
|
end
|
243
218
|
|
244
219
|
# Converts a punycode ACE string to a UTF-8 unicode string.
|
@@ -246,15 +221,13 @@ module SimpleIDN
|
|
246
221
|
# SimpleIDN.to_unicode("xn--mllerriis-l8a.com")
|
247
222
|
# => "møllerriis.com"
|
248
223
|
def to_unicode(domain)
|
249
|
-
|
224
|
+
return nil if domain.nil?
|
225
|
+
domain_array = domain.encode(Encoding::UTF_8).split(LABEL_SEPERATOR_RE) rescue []
|
250
226
|
return domain if domain_array.length == 0
|
251
227
|
out = []
|
252
|
-
|
253
|
-
|
254
|
-
s = domain_array[i]
|
255
|
-
out << (s =~ /^xn\-\-/i ? Punycode.decode(s.gsub('xn--','')) : s)
|
256
|
-
i += 1
|
228
|
+
domain_array.each do |s|
|
229
|
+
out << (s.downcase.start_with?(ACE_PREFIX) ? Punycode.decode(s[ACE_PREFIX.length..-1]) : s)
|
257
230
|
end
|
258
|
-
|
231
|
+
out.join(DOT).encode(domain.encoding)
|
259
232
|
end
|
260
233
|
end
|
data/simpleidn.gemspec
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
# coding: utf-8
|
2
1
|
lib = File.expand_path('../lib', __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require 'simpleidn'
|
@@ -20,4 +19,6 @@ Gem::Specification.new do |spec|
|
|
20
19
|
spec.add_development_dependency "bundler", "~> 1.11"
|
21
20
|
spec.add_development_dependency "rake", "~> 10.0"
|
22
21
|
spec.add_development_dependency "rspec", "~> 3.0"
|
22
|
+
|
23
|
+
spec.required_ruby_version = '>1.9'
|
23
24
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simpleidn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Morten Møller Riis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -77,9 +77,9 @@ require_paths:
|
|
77
77
|
- lib
|
78
78
|
required_ruby_version: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - ">"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '1.9'
|
83
83
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
84
|
requirements:
|
85
85
|
- - ">="
|
@@ -87,7 +87,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
87
|
version: '0'
|
88
88
|
requirements: []
|
89
89
|
rubyforge_project:
|
90
|
-
rubygems_version: 2.
|
90
|
+
rubygems_version: 2.5.1
|
91
91
|
signing_key:
|
92
92
|
specification_version: 4
|
93
93
|
summary: Punycode ACE to unicode UTF-8 (and vice-versa) string conversion.
|