simpleidn 0.0.7 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bbff0e8f2c02135040f6ce79aa7400f1fb4e4329
4
- data.tar.gz: 755dfe2f0c621b57080bea4c5793816e92ced4c9
3
+ metadata.gz: 91419f6658f1aa0e2efd5f1774879994f822a3d7
4
+ data.tar.gz: 60663039b50c49baa983dc7d51179dae820d839d
5
5
  SHA512:
6
- metadata.gz: 670cbaa7ada6f97efcfa5170e64cde78d5d43ae054ab32fec1bd77c669f32888c69f4af54d8109ae6ce29afc1282b2e325174f2d14aee6960991c13498416696
7
- data.tar.gz: 023b37009fe2128e99da467cb9771a195dcb8523430b7008c4417450e8dc1a9cef53caa1e2d37441e54ab4c6a2c4f71f28746e86cef24c059030b7e9f241efc6
6
+ metadata.gz: 1eca7fc205bdd66663334b91786029b645637d501acb8b61e8661b2298c751a2df39c4854b6f926f29dec505b5530f86c1159b7a3e0a6a8ffa4ef411fd59600d
7
+ data.tar.gz: 5ca7274d2115e9e3a65f992721df2ec748719754e2ceccce3e03b1f4328c0a795a8fa85f90010d208fbb6f87d119b7f1154ae3d92e099b09d28957e0599e9a68
data/LICENCE CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License
2
2
 
3
- Copyright (c) 2011-2013 Morten Møller Riis
3
+ Copyright (c) 2011-2017 Morten Møller Riis
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
- THE SOFTWARE.
21
+ THE SOFTWARE.
data/README.rdoc CHANGED
@@ -4,7 +4,7 @@ This gem allows easy conversion from punycode ACE strings to unicode UTF-8 strin
4
4
 
5
5
  The implementation is heavily based on the RFC3492 C example implementation but simplified since it does not preserve case.
6
6
 
7
- This gem works with Ruby 1.8.7, 1.9.2, 1.9.3, 2.0, 2.1, 2.2.
7
+ This gem works with Ruby 1.9.2, 1.9.3, 2.0, 2.1, 2.2.
8
8
 
9
9
  * http://www.whatastruggle.com
10
10
 
@@ -18,13 +18,13 @@ In your Ruby script you can now.
18
18
 
19
19
  require 'rubygems'
20
20
  require 'simpleidn'
21
-
21
+
22
22
  SimpleIDN.to_unicode("xn--mllerriis-l8a.com")
23
- => "møllerriis.com"
24
-
23
+ => "møllerriis.com"
24
+
25
25
  SimpleIDN.to_ascii("møllerriis.com")
26
- => "xn--mllerriis-l8a.com"
27
-
26
+ => "xn--mllerriis-l8a.com"
27
+
28
28
  == Testing / RSpec
29
29
 
30
30
  In order to run the test suite you must have <tt>rspec</tt> installed.
@@ -36,4 +36,4 @@ http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
36
36
 
37
37
  Does not preserve uppercase. So if, for some reason, you use uppercase characters (eg. Ø instead of ø), please take note of that.
38
38
 
39
- Please report any issues!
39
+ Please report any issues!
data/lib/simpleidn.rb CHANGED
@@ -1,24 +1,5 @@
1
- # encoding: UTF-8
2
- if RUBY_VERSION =~ /^1\.8/
3
- $KCODE = "UTF-8"
4
- class String
5
- def ord
6
- self[0]
7
- end
8
- end
9
- else
10
- Encoding.default_internal = "UTF-8"
11
- end
12
-
13
- class Integer
14
- def to_utf8_character
15
- [self].pack("U*")
16
- end
17
- end
18
-
19
1
  module SimpleIDN
20
-
21
- VERSION = "0.0.7"
2
+ VERSION = "0.0.9"
22
3
 
23
4
  # The ConversionError is raised when an error occurs during a
24
5
  # Punycode <-> Unicode conversion.
@@ -26,7 +7,6 @@ module SimpleIDN
26
7
  end
27
8
 
28
9
  module Punycode
29
-
30
10
  INITIAL_N = 0x80
31
11
  INITIAL_BIAS = 72
32
12
  DELIMITER = 0x2D
@@ -36,6 +16,9 @@ module SimpleIDN
36
16
  TMAX = 26
37
17
  SKEW = 38
38
18
  MAXINT = 0x7FFFFFFF
19
+ ASCII_MAX = 0x7F
20
+
21
+ EMPTY = ''.encode(Encoding::UTF_8).freeze
39
22
 
40
23
  module_function
41
24
 
@@ -46,14 +29,12 @@ module SimpleIDN
46
29
  cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : cp - 97 < 26 ? cp - 97 : BASE
47
30
  end
48
31
 
49
- # encode_digit(d,flag) returns the basic code point whose value
32
+ # encode_digit(d) returns the basic code point whose value
50
33
  # (when used for representing integers) is d, which needs to be in
51
- # the range 0 to base-1. The lowercase form is used unless flag is
52
- # nonzero, in which case the uppercase form is used. The behavior
53
- # is undefined if flag is nonzero and digit d has no uppercase form.
34
+ # the range 0 to base-1.
54
35
  def encode_digit(d)
55
36
  d + 22 + 75 * (d < 26 ? 1 : 0)
56
- # 0..25 map to ASCII a..z or A..Z
37
+ # 0..25 map to ASCII a..z
57
38
  # 26..35 map to ASCII 0..9
58
39
  end
59
40
 
@@ -63,24 +44,17 @@ module SimpleIDN
63
44
  delta += (delta / numpoints)
64
45
 
65
46
  k = 0
66
- while delta > (((BASE - TMIN) * TMAX) / 2) do
47
+ while delta > (((BASE - TMIN) * TMAX) / 2)
67
48
  delta /= BASE - TMIN
68
49
  k += BASE
69
50
  end
70
- return k + (BASE - TMIN + 1) * delta / (delta + SKEW)
71
- end
72
-
73
- # encode_basic(bcp,flag) forces a basic code point to lowercase if flag is zero,
74
- # uppercase if flag is nonzero, and returns the resulting code point.
75
- # The code point is unchanged if it is caseless.
76
- # The behavior is undefined if bcp is not a basic code point.
77
- def encode_basic(bcp, flag)
78
- bcp -= (bcp - 97 < 26 ? 1 : 0) << 5
79
- return bcp + ((!flag && (bcp - 65 < 26 ? 1 : 0)) << 5)
51
+ k + (BASE - TMIN + 1) * delta / (delta + SKEW)
80
52
  end
81
53
 
82
54
  # Main decode
83
55
  def decode(input)
56
+ input_encoding = input.encoding
57
+ input = input.encode(Encoding::UTF_8).codepoints.to_a
84
58
  output = []
85
59
 
86
60
  # Initialize the state:
@@ -91,18 +65,18 @@ module SimpleIDN
91
65
  # Handle the basic code points: Let basic be the number of input code
92
66
  # points before the last delimiter, or 0 if there is none, then
93
67
  # copy the first basic code points to the output.
94
- basic = input.rindex(DELIMITER.to_utf8_character) || 0
68
+ basic = input.rindex(DELIMITER) || 0
95
69
 
96
- input.unpack("U*")[0, basic].each do |char|
97
- raise(ConversionError, "Illegal input >= 0x80") if char >= 0x80
98
- output << char.chr # to_utf8_character not needed her because ord < 0x80 (128) which is within US-ASCII.
70
+ input[0, basic].each do |char|
71
+ raise(ConversionError, "Illegal input >= 0x80") if char > ASCII_MAX
72
+ output << char
99
73
  end
100
74
 
101
75
  # Main decoding loop: Start just after the last delimiter if any
102
76
  # basic code points were copied; start at the beginning otherwise.
103
77
 
104
78
  ic = basic > 0 ? basic + 1 : 0
105
- while ic < input.length do
79
+ while ic < input.length
106
80
  # ic is the index of the next character to be consumed,
107
81
 
108
82
  # Decode a generalized variable-length integer into delta,
@@ -112,10 +86,10 @@ module SimpleIDN
112
86
  oldi = i
113
87
  w = 1
114
88
  k = BASE
115
- while true do
89
+ loop do
116
90
  raise(ConversionError, "punycode_bad_input(1)") if ic >= input.length
117
91
 
118
- digit = decode_digit(input[ic].ord)
92
+ digit = decode_digit(input[ic])
119
93
  ic += 1
120
94
 
121
95
  raise(ConversionError, "punycode_bad_input(2)") if digit >= BASE
@@ -142,16 +116,17 @@ module SimpleIDN
142
116
  i %= out
143
117
 
144
118
  # Insert n at position i of the output:
145
- output.insert(i, n.to_utf8_character)
119
+ output.insert(i, n)
146
120
  i += 1
147
121
  end
148
122
 
149
- return output.join
123
+ output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding)
150
124
  end
151
125
 
152
126
  # Main encode function
153
127
  def encode(input)
154
- input = input.unpack("U*")
128
+ input_encoding = input.encoding
129
+ input = input.encode(Encoding::UTF_8).codepoints.to_a
155
130
  output = []
156
131
 
157
132
  # Initialize the state:
@@ -160,9 +135,7 @@ module SimpleIDN
160
135
  bias = INITIAL_BIAS
161
136
 
162
137
  # Handle the basic code points:
163
- output = input.select do |char|
164
- char if char < 0x80
165
- end
138
+ output = input.select { |char| char <= ASCII_MAX }
166
139
 
167
140
  h = b = output.length
168
141
 
@@ -172,7 +145,7 @@ module SimpleIDN
172
145
  output << DELIMITER if b > 0
173
146
 
174
147
  # Main encoding loop:
175
- while h < input.length do
148
+ while h < input.length
176
149
  # All non-basic code points < n have been
177
150
  # handled already. Find the next larger one:
178
151
 
@@ -190,38 +163,42 @@ module SimpleIDN
190
163
  delta += (m - n) * (h + 1)
191
164
  n = m
192
165
 
193
- input.each_with_index do |char, j|
166
+ input.each_with_index do |char, _|
194
167
  if char < n
195
168
  delta += 1
196
169
  raise(ConversionError, "punycode_overflow(2)") if delta > MAXINT
197
170
  end
198
171
 
199
- if (char == n)
200
- # Represent delta as a generalized variable-length integer:
201
- q = delta
202
- k = BASE
203
- while true do
204
- t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias
205
- break if q < t
206
- output << encode_digit(t + (q - t) % (BASE - t))
207
- q = ( (q - t) / (BASE - t) ).floor
208
- k += BASE
209
- end
210
- output << encode_digit(q)
211
- bias = adapt(delta, h + 1, h == b)
212
- delta = 0
213
- h += 1
172
+ next unless char == n
173
+
174
+ # Represent delta as a generalized variable-length integer:
175
+ q = delta
176
+ k = BASE
177
+ loop do
178
+ t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias
179
+ break if q < t
180
+ output << encode_digit(t + (q - t) % (BASE - t))
181
+ q = ((q - t) / (BASE - t)).floor
182
+ k += BASE
214
183
  end
184
+ output << encode_digit(q)
185
+ bias = adapt(delta, h + 1, h == b)
186
+ delta = 0
187
+ h += 1
215
188
  end
216
189
 
217
190
  delta += 1
218
191
  n += 1
219
192
  end
220
- return output.collect {|c| c.to_utf8_character}.join
193
+ output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding)
221
194
  end
222
-
223
195
  end
224
196
 
197
+ ACE_PREFIX = 'xn--'.encode(Encoding::UTF_8).freeze
198
+ ASCII_MAX = 0x7F
199
+ DOT = 0x2E.chr(Encoding::UTF_8).freeze
200
+ LABEL_SEPERATOR_RE = /[\u002e]/
201
+
225
202
  module_function
226
203
 
227
204
  # Converts a UTF-8 unicode string to a punycode ACE string.
@@ -229,16 +206,14 @@ module SimpleIDN
229
206
  # SimpleIDN.to_ascii("møllerriis.com")
230
207
  # => "xn--mllerriis-l8a.com"
231
208
  def to_ascii(domain)
232
- domain_array = domain.split(".") rescue []
209
+ return nil if domain.nil?
210
+ domain_array = domain.encode(Encoding::UTF_8).split(LABEL_SEPERATOR_RE) rescue []
233
211
  return domain if domain_array.length == 0
234
212
  out = []
235
- i = 0
236
- while i < domain_array.length
237
- s = domain_array[i]
238
- out << (s =~ /[^A-Z0-9@\-*_]/i ? "xn--" + Punycode.encode(s) : s)
239
- i += 1
213
+ domain_array.each do |s|
214
+ out << (s.codepoints.any? { |cp| cp > ASCII_MAX } ? ACE_PREFIX + Punycode.encode(s) : s)
240
215
  end
241
- return out.join(".")
216
+ out.join(DOT).encode(domain.encoding)
242
217
  end
243
218
 
244
219
  # Converts a punycode ACE string to a UTF-8 unicode string.
@@ -246,15 +221,13 @@ module SimpleIDN
246
221
  # SimpleIDN.to_unicode("xn--mllerriis-l8a.com")
247
222
  # => "møllerriis.com"
248
223
  def to_unicode(domain)
249
- domain_array = domain.split(".") rescue []
224
+ return nil if domain.nil?
225
+ domain_array = domain.encode(Encoding::UTF_8).split(LABEL_SEPERATOR_RE) rescue []
250
226
  return domain if domain_array.length == 0
251
227
  out = []
252
- i = 0
253
- while i < domain_array.length
254
- s = domain_array[i]
255
- out << (s =~ /^xn\-\-/i ? Punycode.decode(s.gsub('xn--','')) : s)
256
- i += 1
228
+ domain_array.each do |s|
229
+ out << (s.downcase.start_with?(ACE_PREFIX) ? Punycode.decode(s[ACE_PREFIX.length..-1]) : s)
257
230
  end
258
- return out.join(".")
231
+ out.join(DOT).encode(domain.encoding)
259
232
  end
260
233
  end
data/simpleidn.gemspec CHANGED
@@ -1,4 +1,3 @@
1
- # coding: utf-8
2
1
  lib = File.expand_path('../lib', __FILE__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require 'simpleidn'
@@ -20,4 +19,6 @@ Gem::Specification.new do |spec|
20
19
  spec.add_development_dependency "bundler", "~> 1.11"
21
20
  spec.add_development_dependency "rake", "~> 10.0"
22
21
  spec.add_development_dependency "rspec", "~> 3.0"
22
+
23
+ spec.required_ruby_version = '>1.9'
23
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simpleidn
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Morten Møller Riis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-12 00:00:00.000000000 Z
11
+ date: 2017-06-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -77,9 +77,9 @@ require_paths:
77
77
  - lib
78
78
  required_ruby_version: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - ">"
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: '1.9'
83
83
  required_rubygems_version: !ruby/object:Gem::Requirement
84
84
  requirements:
85
85
  - - ">="
@@ -87,7 +87,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
87
87
  version: '0'
88
88
  requirements: []
89
89
  rubyforge_project:
90
- rubygems_version: 2.4.5
90
+ rubygems_version: 2.5.1
91
91
  signing_key:
92
92
  specification_version: 4
93
93
  summary: Punycode ACE to unicode UTF-8 (and vice-versa) string conversion.