idnx 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 931f92a4d37e74baad51a046058aa31aa0054563936e40e451f77eecd8eaf1b1
4
- data.tar.gz: bed5e05b59d801504c2110d851723e09b302dc4a48169334ddebb4a493ed07d1
3
+ metadata.gz: 7e9eef708b35b383a3270b44ca3670e72d92bd3c29e89ecf2df9ba0ded94a144
4
+ data.tar.gz: 4c08ebc4dc52aa8ccf6fe5b1c53c17e7eb2a6e07d11c7d689f049d757d6de379
5
5
  SHA512:
6
- metadata.gz: 905cf486978554c95bb7c7b912338439cb4acd431bb50d11cbe6afaee8047a9b53e28f522b4986873817ca8c76523eab7a5b784109f877001846228e7212ebfa
7
- data.tar.gz: c94fd02aaf52386076a9a022f922d1962c1ce813b82a403f35f0d50fd4c68bb719a7def9a401a34a1aca62f8ac8443695e9dbadba5ed7bf8316459ea0b001a16
6
+ metadata.gz: 960e68d02443bf62b55290187f1785e16b9d16005107ce3b3f7db4ee016c1c91dd867de57d04f8cb92db18081a7b01499ac20fa8082f76ba884e8702806ff8ea
7
+ data.tar.gz: 0be4f68aa311208b5652e2773192487cec450e6ff604d04a1d72f662136309928bd405a095f36eb4dc6f9180dafa10d1f90fe4f847225dd6288b5a2d4aeab29f
data/LICENSE.txt CHANGED
@@ -191,7 +191,7 @@
191
191
  limitations under the License.
192
192
 
193
193
 
194
- * lib/httpx/domain_name.rb
194
+ * lib/idnx/ruby.rb
195
195
 
196
196
  This file is derived from the implementation of punycode available at
197
197
  here:
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Idnx
2
2
 
3
+ [![CI](https://github.com/HoneyryderChuck/idnx/actions/workflows/test.yml/badge.svg)](https://github.com/HoneyryderChuck/idnx/actions/workflows/test.yml)
4
+
5
+
3
6
 
4
7
  `idnx` provides a Ruby API for decoding Internationalized domain names into Punycode.
5
8
 
@@ -7,6 +10,7 @@ It provides multi-platform support by using the most approriate strategy based o
7
10
 
8
11
  * It uses (and requires the installation of) [libidn2](https://github.com/libidn/libidn2) in Linux / MacOS;
9
12
  * It uses [the appropriate winnls APIs](https://docs.microsoft.com/en-us/windows/win32/api/winnls/nf-winnls-idntoascii) in Windows;
13
+ * It falls back to a pure ruby Punycode 2003 implementation;
10
14
 
11
15
  ## Installation
12
16
 
@@ -41,6 +45,16 @@ require "idnx"
41
45
  Idnx.to_punycode("bücher.de") #=> "xn--bcher-kva.de"
42
46
  ```
43
47
 
48
+ ## Ruby Support Policy
49
+
50
+ This library supports at least ruby 2.4 .It also supports both JRuby and Truffleruby.
51
+
52
+ ## Known Issues
53
+
54
+ ### JRuby on MacOS
55
+
56
+ `idnx` won't work in MacOS when using JRuby 9.2 or lower, due to jruby FFI not having the same path lookup logic than it's counterpart for CRuby, thereby not finding `brew`-installed `libidn2`. This has been fixed since JRuby 9.3 .
57
+
44
58
  ## Development
45
59
 
46
60
  If you want to contribute, fork this project, and submit changes via a PR on github.
data/lib/idnx/idn2.rb CHANGED
@@ -21,12 +21,12 @@ module Idnx
21
21
  IDN2_NONTRANSITIONAL = 8
22
22
 
23
23
  FLAGS = if Gem::Version.new(VERSION) >= Gem::Version.new("0.14.0")
24
- IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL
25
- else
26
- IDN2_NFC_INPUT
27
- end
24
+ IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL
25
+ else
26
+ IDN2_NFC_INPUT
27
+ end
28
28
 
29
- attach_function :idn2_lookup_ul, [:string, :pointer, :int], :int
29
+ attach_function :idn2_lookup_ul, %i[string pointer int], :int
30
30
  attach_function :idn2_strerror, [:int], :string
31
31
  attach_function :idn2_free, [:pointer], :void
32
32
 
@@ -36,9 +36,7 @@ module Idnx
36
36
  string_ptr = FFI::MemoryPointer.new(:pointer)
37
37
  result = idn2_lookup_ul(hostname, string_ptr, FLAGS)
38
38
 
39
- if result != IDN2_OK
40
- result = idn2_lookup_ul(hostname, string_ptr, IDN2_TRANSITIONAL)
41
- end
39
+ result = idn2_lookup_ul(hostname, string_ptr, IDN2_TRANSITIONAL) if result != IDN2_OK
42
40
 
43
41
  if result != IDN2_OK
44
42
  string_ptr.free
data/lib/idnx/ruby.rb ADDED
@@ -0,0 +1,299 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Idnx
4
+ module Ruby
5
+ module_function
6
+
7
+ def lookup(hostname)
8
+ Punycode.encode_hostname(hostname)
9
+ end
10
+
11
+ # :nocov:
12
+ # -*- coding: utf-8 -*-
13
+ #--
14
+ # punycode.rb - PunyCode encoder for the Domain Name library
15
+ #
16
+ # Copyright (C) 2011-2017 Akinori MUSHA, All rights reserved.
17
+ #
18
+ # Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN
19
+ # Library.
20
+ #
21
+ # Copyright (C) 2000-2002 Verisign Inc., All rights reserved.
22
+ #
23
+ # Redistribution and use in source and binary forms, with or
24
+ # without modification, are permitted provided that the following
25
+ # conditions are met:
26
+ #
27
+ # 1) Redistributions of source code must retain the above copyright
28
+ # notice, this list of conditions and the following disclaimer.
29
+ #
30
+ # 2) Redistributions in binary form must reproduce the above copyright
31
+ # notice, this list of conditions and the following disclaimer in
32
+ # the documentation and/or other materials provided with the
33
+ # distribution.
34
+ #
35
+ # 3) Neither the name of the VeriSign Inc. nor the names of its
36
+ # contributors may be used to endorse or promote products derived
37
+ # from this software without specific prior written permission.
38
+ #
39
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
40
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
41
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
42
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
43
+ # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
44
+ # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
45
+ # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
46
+ # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
47
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
49
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
50
+ # POSSIBILITY OF SUCH DAMAGE.
51
+ #
52
+ # This software is licensed under the BSD open source license. For more
53
+ # information visit www.opensource.org.
54
+ #
55
+ # Authors:
56
+ # John Colosi (VeriSign)
57
+ # Srikanth Veeramachaneni (VeriSign)
58
+ # Nagesh Chigurupati (Verisign)
59
+ # Praveen Srinivasan(Verisign)
60
+ #++
61
+ module Punycode
62
+ BASE = 36
63
+ TMIN = 1
64
+ TMAX = 26
65
+ SKEW = 38
66
+ DAMP = 700
67
+ INITIAL_BIAS = 72
68
+ INITIAL_N = 0x80
69
+ DELIMITER = "-"
70
+
71
+ MAXINT = (1 << 32) - 1
72
+
73
+ LOBASE = BASE - TMIN
74
+ CUTOFF = LOBASE * TMAX / 2
75
+
76
+ RE_NONBASIC = /[^\x00-\x7f]/.freeze
77
+
78
+ # Returns the numeric value of a basic code point (for use in
79
+ # representing integers) in the range 0 to base-1, or nil if cp
80
+ # is does not represent a value.
81
+ DECODE_DIGIT = {}.tap do |map|
82
+ # ASCII A..Z map to 0..25
83
+ # ASCII a..z map to 0..25
84
+ (0..25).each { |i| map[65 + i] = map[97 + i] = i }
85
+ # ASCII 0..9 map to 26..35
86
+ (26..35).each { |i| map[22 + i] = i }
87
+ end
88
+
89
+ # Returns the basic code point whose value (when used for
90
+ # representing integers) is d, which must be in the range 0 to
91
+ # BASE-1. The lowercase form is used unless flag is true, in
92
+ # which case the uppercase form is used. The behavior is
93
+ # undefined if flag is nonzero and digit d has no uppercase
94
+ # form.
95
+ ENCODE_DIGIT = proc { |d, flag|
96
+ (d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
97
+ # 0..25 map to ASCII a..z or A..Z
98
+ # 26..35 map to ASCII 0..9
99
+ }
100
+
101
+ DOT = "."
102
+ PREFIX = "xn--"
103
+
104
+ # Most errors we raise are basically kind of ArgumentError.
105
+ class ArgumentError < ::ArgumentError; end
106
+ class BufferOverflowError < ArgumentError; end
107
+
108
+ module_function
109
+
110
+ # Encode a +string+ in Punycode
111
+ def encode(string)
112
+ input = string.unpack("U*")
113
+ output = +""
114
+
115
+ # Initialize the state
116
+ n = INITIAL_N
117
+ delta = 0
118
+ bias = INITIAL_BIAS
119
+
120
+ # Handle the basic code points
121
+ input.each { |cp| output << cp.chr if cp < 0x80 }
122
+
123
+ h = b = output.length
124
+
125
+ # h is the number of code points that have been handled, b is the
126
+ # number of basic code points, and out is the number of characters
127
+ # that have been output.
128
+
129
+ output << DELIMITER if b > 0
130
+
131
+ # Main encoding loop
132
+
133
+ while h < input.length
134
+ # All non-basic code points < n have been handled already. Find
135
+ # the next larger one
136
+
137
+ m = MAXINT
138
+ input.each do |cp|
139
+ m = cp if (n...m) === cp
140
+ end
141
+
142
+ # Increase delta enough to advance the decoder's <n,i> state to
143
+ # <m,0>, but guard against overflow
144
+
145
+ delta += (m - n) * (h + 1)
146
+ raise BufferOverflowError if delta > MAXINT
147
+
148
+ n = m
149
+
150
+ input.each do |cp|
151
+ # AMC-ACE-Z can use this simplified version instead
152
+ if cp < n
153
+ delta += 1
154
+ raise BufferOverflowError if delta > MAXINT
155
+ elsif cp == n
156
+ # Represent delta as a generalized variable-length integer
157
+ q = delta
158
+ k = BASE
159
+ loop do
160
+ t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
161
+ break if q < t
162
+
163
+ q, r = (q - t).divmod(BASE - t)
164
+ output << ENCODE_DIGIT[t + r, false]
165
+ k += BASE
166
+ end
167
+
168
+ output << ENCODE_DIGIT[q, false]
169
+
170
+ # Adapt the bias
171
+ delta = h == b ? delta / DAMP : delta >> 1
172
+ delta += delta / (h + 1)
173
+ bias = 0
174
+ while delta > CUTOFF
175
+ delta /= LOBASE
176
+ bias += BASE
177
+ end
178
+ bias += (LOBASE + 1) * delta / (delta + SKEW)
179
+
180
+ delta = 0
181
+ h += 1
182
+ end
183
+ end
184
+
185
+ delta += 1
186
+ n += 1
187
+ end
188
+
189
+ output
190
+ end
191
+
192
+ # Encode a hostname using IDN/Punycode algorithms
193
+ def encode_hostname(hostname)
194
+ hostname.match(RE_NONBASIC) || (return hostname)
195
+
196
+ hostname.split(DOT).map do |name|
197
+ if name.match(RE_NONBASIC)
198
+ PREFIX + encode(name)
199
+ else
200
+ name
201
+ end
202
+ end.join(DOT)
203
+ end
204
+
205
+ # Decode a +string+ encoded in Punycode
206
+ def decode(string)
207
+ # Initialize the state
208
+ n = INITIAL_N
209
+ i = 0
210
+ bias = INITIAL_BIAS
211
+
212
+ if j = string.rindex(DELIMITER)
213
+ b = string[0...j]
214
+
215
+ b.match(RE_NONBASIC) &&
216
+ raise(ArgumentError, "Illegal character is found in basic part: #{string.inspect}")
217
+
218
+ # Handle the basic code points
219
+
220
+ output = b.unpack("U*")
221
+ u = string[(j + 1)..-1]
222
+ else
223
+ output = []
224
+ u = string
225
+ end
226
+
227
+ # Main decoding loop: Start just after the last delimiter if any
228
+ # basic code points were copied; start at the beginning
229
+ # otherwise.
230
+
231
+ input = u.unpack("C*")
232
+ input_length = input.length
233
+ h = 0
234
+ out = output.length
235
+
236
+ while h < input_length
237
+ # Decode a generalized variable-length integer into delta,
238
+ # which gets added to i. The overflow checking is easier
239
+ # if we increase i as we go, then subtract off its starting
240
+ # value at the end to obtain delta.
241
+
242
+ oldi = i
243
+ w = 1
244
+ k = BASE
245
+
246
+ loop do
247
+ (digit = DECODE_DIGIT[input[h]]) ||
248
+ raise(ArgumentError, "Illegal character is found in non-basic part: #{string.inspect}")
249
+ h += 1
250
+ i += digit * w
251
+ raise BufferOverflowError if i > MAXINT
252
+
253
+ t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
254
+ break if digit < t
255
+
256
+ w *= BASE - t
257
+ raise BufferOverflowError if w > MAXINT
258
+
259
+ k += BASE
260
+ (h < input_length) || raise(ArgumentError, "Malformed input given: #{string.inspect}")
261
+ end
262
+
263
+ # Adapt the bias
264
+ delta = oldi == 0 ? i / DAMP : (i - oldi) >> 1
265
+ delta += delta / (out + 1)
266
+ bias = 0
267
+ while delta > CUTOFF
268
+ delta /= LOBASE
269
+ bias += BASE
270
+ end
271
+ bias += (LOBASE + 1) * delta / (delta + SKEW)
272
+
273
+ # i was supposed to wrap around from out+1 to 0, incrementing
274
+ # n each time, so we'll fix that now:
275
+
276
+ q, i = i.divmod(out + 1)
277
+ n += q
278
+ raise BufferOverflowError if n > MAXINT
279
+
280
+ # Insert n at position i of the output:
281
+
282
+ output[i, 0] = n
283
+
284
+ out += 1
285
+ i += 1
286
+ end
287
+ output.pack("U*")
288
+ end
289
+
290
+ # Decode a hostname using IDN/Punycode algorithms
291
+ def decode_hostname(hostname)
292
+ hostname.gsub(/(\A|#{Regexp.quote(DOT)})#{Regexp.quote(PREFIX)}([^#{Regexp.quote(DOT)}]*)/o) do
293
+ Regexp.last_match(1) << decode(Regexp.last_match(2))
294
+ end
295
+ end
296
+ end
297
+ # :nocov:
298
+ end
299
+ end
data/lib/idnx/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Idnx
4
- VERSION = "0.0.1"
4
+ VERSION = "0.1.0"
5
5
  end
data/lib/idnx/windows.rb CHANGED
@@ -45,9 +45,9 @@ module Idnx
45
45
  # LPCCH lpDefaultChar,
46
46
  # LPBOOL lpUsedDefaultChar
47
47
  # );
48
- attach_function :IdnToAscii, [:uint, :pointer, :int, :pointer, :int], :int
49
- attach_function :MultiByteToWideChar, [:uint, :uint, :string, :int, :pointer, :int], :int
50
- attach_function :WideCharToMultiByte, [:uint, :uint, :pointer, :int, :pointer, :int, :pointer, :pointer], :int
48
+ attach_function :IdnToAscii, %i[uint pointer int pointer int], :int
49
+ attach_function :MultiByteToWideChar, %i[uint uint string int pointer int], :int
50
+ attach_function :WideCharToMultiByte, %i[uint uint pointer int pointer int pointer pointer], :int
51
51
 
52
52
  module_function
53
53
 
@@ -55,6 +55,7 @@ module Idnx
55
55
  # turn to wchar
56
56
  wchar_len = MultiByteToWideChar(WIN32OLE::CP_UTF8, MB_ERR_INVALID_CHARS, hostname, -1, nil, 0)
57
57
  raise Error, "Failed to convert \"#{hostname}\" to wchar" if wchar_len.zero?
58
+
58
59
  wchar_ptr = FFI::MemoryPointer.new(:wchar_t, wchar_len)
59
60
  wchar_len = MultiByteToWideChar(WIN32OLE::CP_UTF8, 0, hostname, -1, wchar_ptr, wchar_len)
60
61
  raise Error, "Failed to convert \"#{hostname}\" to wchar" if wchar_len.zero?
@@ -64,27 +65,27 @@ module Idnx
64
65
  punycode_len = IdnToAscii(0, wchar_ptr, -1, punycode, IDN_MAX_LENGTH)
65
66
  wchar_ptr.free
66
67
 
67
- if punycode_len == 0
68
+ if punycode_len.zero?
68
69
  last_error = FFI::LastError.error
69
70
 
70
71
  # operation completed successfully, hostname is not an IDN
71
72
  # return hostname if last_error == 0
72
73
 
73
74
  message = case last_error
74
- when ERROR_INSUFFICIENT_BUFFER
75
- "The supplied buffer size was not large enough, or it was incorrectly set to NULL"
76
- when ERROR_INVALID_FLAGS
77
- "The values supplied for flags were not valid"
78
- when ERROR_INVALID_NAME
79
- "An invalid name was supplied to the function"
80
- when ERROR_INVALID_PARAMETER
81
- "Any of the parameter values was invalid"
82
- when ERROR_NO_UNICODE_TRANSLATION
83
- "An invalid Unicode was found in a string"
84
- else
85
- "Failed to convert \"#{hostname}\"; (error: #{last_error})" \
86
- "\n\nhttps://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes#system-error-codes-1"
87
- end
75
+ when ERROR_INSUFFICIENT_BUFFER
76
+ "The supplied buffer size was not large enough, or it was incorrectly set to NULL"
77
+ when ERROR_INVALID_FLAGS
78
+ "The values supplied for flags were not valid"
79
+ when ERROR_INVALID_NAME
80
+ "An invalid name was supplied to the function"
81
+ when ERROR_INVALID_PARAMETER
82
+ "Any of the parameter values was invalid"
83
+ when ERROR_NO_UNICODE_TRANSLATION
84
+ "An invalid Unicode was found in a string"
85
+ else
86
+ "Failed to convert \"#{hostname}\"; (error: #{last_error})" \
87
+ "\n\nhttps://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes#system-error-codes-1"
88
+ end
88
89
  punycode.free
89
90
  raise Error, message
90
91
  end
@@ -92,9 +93,11 @@ module Idnx
92
93
  # turn to unicode
93
94
  unicode_len = WideCharToMultiByte(WIN32OLE::CP_UTF8, 0, punycode, -1, nil, 0, nil, nil)
94
95
  raise Error, "Failed to convert \"#{hostname}\" to utf8" if unicode_len.zero?
96
+
95
97
  utf8_ptr = FFI::MemoryPointer.new(:char, unicode_len)
96
98
  unicode_len = WideCharToMultiByte(WIN32OLE::CP_UTF8, 0, punycode, -1, utf8_ptr, unicode_len, nil, nil)
97
99
  raise Error, "Failed to convert \"#{hostname}\" to utf8" if unicode_len.zero?
100
+
98
101
  unicode = utf8_ptr.read_string(utf8_ptr.size)
99
102
  unicode.strip! # remove null-byte
100
103
  end
data/lib/idnx.rb CHANGED
@@ -4,7 +4,7 @@ require_relative "idnx/version"
4
4
  require "ffi"
5
5
 
6
6
  module Idnx
7
- Error = Class.new(StandardError)
7
+ class Error < StandardError; end
8
8
 
9
9
  module_function
10
10
 
@@ -16,5 +16,11 @@ end
16
16
  if FFI::Platform.windows?
17
17
  require "idnx/windows"
18
18
  else
19
- require "idnx/idn2"
19
+ begin
20
+ require "idnx/idn2"
21
+ rescue LoadError
22
+ # fallback to pure ruby punycode 2003 implementation
23
+ require "idnx/ruby"
24
+ Idnx::Lib = Idnx::Ruby
25
+ end
20
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: idnx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - HoneyryderChuck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-11 00:00:00.000000000 Z
11
+ date: 2021-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -39,6 +39,7 @@ files:
39
39
  - README.md
40
40
  - lib/idnx.rb
41
41
  - lib/idnx/idn2.rb
42
+ - lib/idnx/ruby.rb
42
43
  - lib/idnx/version.rb
43
44
  - lib/idnx/windows.rb
44
45
  homepage: https://github.com/honeyryderchuck/idnx