idnx 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 931f92a4d37e74baad51a046058aa31aa0054563936e40e451f77eecd8eaf1b1
4
- data.tar.gz: bed5e05b59d801504c2110d851723e09b302dc4a48169334ddebb4a493ed07d1
3
+ metadata.gz: 7e9eef708b35b383a3270b44ca3670e72d92bd3c29e89ecf2df9ba0ded94a144
4
+ data.tar.gz: 4c08ebc4dc52aa8ccf6fe5b1c53c17e7eb2a6e07d11c7d689f049d757d6de379
5
5
  SHA512:
6
- metadata.gz: 905cf486978554c95bb7c7b912338439cb4acd431bb50d11cbe6afaee8047a9b53e28f522b4986873817ca8c76523eab7a5b784109f877001846228e7212ebfa
7
- data.tar.gz: c94fd02aaf52386076a9a022f922d1962c1ce813b82a403f35f0d50fd4c68bb719a7def9a401a34a1aca62f8ac8443695e9dbadba5ed7bf8316459ea0b001a16
6
+ metadata.gz: 960e68d02443bf62b55290187f1785e16b9d16005107ce3b3f7db4ee016c1c91dd867de57d04f8cb92db18081a7b01499ac20fa8082f76ba884e8702806ff8ea
7
+ data.tar.gz: 0be4f68aa311208b5652e2773192487cec450e6ff604d04a1d72f662136309928bd405a095f36eb4dc6f9180dafa10d1f90fe4f847225dd6288b5a2d4aeab29f
data/LICENSE.txt CHANGED
@@ -191,7 +191,7 @@
191
191
  limitations under the License.
192
192
 
193
193
 
194
- * lib/httpx/domain_name.rb
194
+ * lib/idnx/ruby.rb
195
195
 
196
196
  This file is derived from the implementation of punycode available at
197
197
  here:
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Idnx
2
2
 
3
+ [![CI](https://github.com/HoneyryderChuck/idnx/actions/workflows/test.yml/badge.svg)](https://github.com/HoneyryderChuck/idnx/actions/workflows/test.yml)
4
+
5
+
3
6
 
4
7
  `idnx` provides a Ruby API for decoding Internationalized domain names into Punycode.
5
8
 
@@ -7,6 +10,7 @@ It provides multi-platform support by using the most approriate strategy based o
7
10
 
8
11
  * It uses (and requires the installation of) [libidn2](https://github.com/libidn/libidn2) in Linux / MacOS;
9
12
  * It uses [the appropriate winnls APIs](https://docs.microsoft.com/en-us/windows/win32/api/winnls/nf-winnls-idntoascii) in Windows;
13
+ * It falls back to a pure ruby Punycode 2003 implementation;
10
14
 
11
15
  ## Installation
12
16
 
@@ -41,6 +45,16 @@ require "idnx"
41
45
  Idnx.to_punycode("bücher.de") #=> "xn--bcher-kva.de"
42
46
  ```
43
47
 
48
+ ## Ruby Support Policy
49
+
50
+ This library supports at least ruby 2.4 .It also supports both JRuby and Truffleruby.
51
+
52
+ ## Known Issues
53
+
54
+ ### JRuby on MacOS
55
+
56
+ `idnx` won't work in MacOS when using JRuby 9.2 or lower, due to jruby FFI not having the same path lookup logic than it's counterpart for CRuby, thereby not finding `brew`-installed `libidn2`. This has been fixed since JRuby 9.3 .
57
+
44
58
  ## Development
45
59
 
46
60
  If you want to contribute, fork this project, and submit changes via a PR on github.
data/lib/idnx/idn2.rb CHANGED
@@ -21,12 +21,12 @@ module Idnx
21
21
  IDN2_NONTRANSITIONAL = 8
22
22
 
23
23
  FLAGS = if Gem::Version.new(VERSION) >= Gem::Version.new("0.14.0")
24
- IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL
25
- else
26
- IDN2_NFC_INPUT
27
- end
24
+ IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL
25
+ else
26
+ IDN2_NFC_INPUT
27
+ end
28
28
 
29
- attach_function :idn2_lookup_ul, [:string, :pointer, :int], :int
29
+ attach_function :idn2_lookup_ul, %i[string pointer int], :int
30
30
  attach_function :idn2_strerror, [:int], :string
31
31
  attach_function :idn2_free, [:pointer], :void
32
32
 
@@ -36,9 +36,7 @@ module Idnx
36
36
  string_ptr = FFI::MemoryPointer.new(:pointer)
37
37
  result = idn2_lookup_ul(hostname, string_ptr, FLAGS)
38
38
 
39
- if result != IDN2_OK
40
- result = idn2_lookup_ul(hostname, string_ptr, IDN2_TRANSITIONAL)
41
- end
39
+ result = idn2_lookup_ul(hostname, string_ptr, IDN2_TRANSITIONAL) if result != IDN2_OK
42
40
 
43
41
  if result != IDN2_OK
44
42
  string_ptr.free
data/lib/idnx/ruby.rb ADDED
@@ -0,0 +1,299 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Idnx
4
+ module Ruby
5
+ module_function
6
+
7
+ def lookup(hostname)
8
+ Punycode.encode_hostname(hostname)
9
+ end
10
+
11
+ # :nocov:
12
+ # -*- coding: utf-8 -*-
13
+ #--
14
+ # punycode.rb - PunyCode encoder for the Domain Name library
15
+ #
16
+ # Copyright (C) 2011-2017 Akinori MUSHA, All rights reserved.
17
+ #
18
+ # Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN
19
+ # Library.
20
+ #
21
+ # Copyright (C) 2000-2002 Verisign Inc., All rights reserved.
22
+ #
23
+ # Redistribution and use in source and binary forms, with or
24
+ # without modification, are permitted provided that the following
25
+ # conditions are met:
26
+ #
27
+ # 1) Redistributions of source code must retain the above copyright
28
+ # notice, this list of conditions and the following disclaimer.
29
+ #
30
+ # 2) Redistributions in binary form must reproduce the above copyright
31
+ # notice, this list of conditions and the following disclaimer in
32
+ # the documentation and/or other materials provided with the
33
+ # distribution.
34
+ #
35
+ # 3) Neither the name of the VeriSign Inc. nor the names of its
36
+ # contributors may be used to endorse or promote products derived
37
+ # from this software without specific prior written permission.
38
+ #
39
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
40
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
41
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
42
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
43
+ # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
44
+ # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
45
+ # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
46
+ # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
47
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
49
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
50
+ # POSSIBILITY OF SUCH DAMAGE.
51
+ #
52
+ # This software is licensed under the BSD open source license. For more
53
+ # information visit www.opensource.org.
54
+ #
55
+ # Authors:
56
+ # John Colosi (VeriSign)
57
+ # Srikanth Veeramachaneni (VeriSign)
58
+ # Nagesh Chigurupati (Verisign)
59
+ # Praveen Srinivasan(Verisign)
60
+ #++
61
+ module Punycode
62
+ BASE = 36
63
+ TMIN = 1
64
+ TMAX = 26
65
+ SKEW = 38
66
+ DAMP = 700
67
+ INITIAL_BIAS = 72
68
+ INITIAL_N = 0x80
69
+ DELIMITER = "-"
70
+
71
+ MAXINT = (1 << 32) - 1
72
+
73
+ LOBASE = BASE - TMIN
74
+ CUTOFF = LOBASE * TMAX / 2
75
+
76
+ RE_NONBASIC = /[^\x00-\x7f]/.freeze
77
+
78
+ # Returns the numeric value of a basic code point (for use in
79
+ # representing integers) in the range 0 to base-1, or nil if cp
80
+ # is does not represent a value.
81
+ DECODE_DIGIT = {}.tap do |map|
82
+ # ASCII A..Z map to 0..25
83
+ # ASCII a..z map to 0..25
84
+ (0..25).each { |i| map[65 + i] = map[97 + i] = i }
85
+ # ASCII 0..9 map to 26..35
86
+ (26..35).each { |i| map[22 + i] = i }
87
+ end
88
+
89
+ # Returns the basic code point whose value (when used for
90
+ # representing integers) is d, which must be in the range 0 to
91
+ # BASE-1. The lowercase form is used unless flag is true, in
92
+ # which case the uppercase form is used. The behavior is
93
+ # undefined if flag is nonzero and digit d has no uppercase
94
+ # form.
95
+ ENCODE_DIGIT = proc { |d, flag|
96
+ (d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
97
+ # 0..25 map to ASCII a..z or A..Z
98
+ # 26..35 map to ASCII 0..9
99
+ }
100
+
101
+ DOT = "."
102
+ PREFIX = "xn--"
103
+
104
+ # Most errors we raise are basically kind of ArgumentError.
105
+ class ArgumentError < ::ArgumentError; end
106
+ class BufferOverflowError < ArgumentError; end
107
+
108
+ module_function
109
+
110
+ # Encode a +string+ in Punycode
111
+ def encode(string)
112
+ input = string.unpack("U*")
113
+ output = +""
114
+
115
+ # Initialize the state
116
+ n = INITIAL_N
117
+ delta = 0
118
+ bias = INITIAL_BIAS
119
+
120
+ # Handle the basic code points
121
+ input.each { |cp| output << cp.chr if cp < 0x80 }
122
+
123
+ h = b = output.length
124
+
125
+ # h is the number of code points that have been handled, b is the
126
+ # number of basic code points, and out is the number of characters
127
+ # that have been output.
128
+
129
+ output << DELIMITER if b > 0
130
+
131
+ # Main encoding loop
132
+
133
+ while h < input.length
134
+ # All non-basic code points < n have been handled already. Find
135
+ # the next larger one
136
+
137
+ m = MAXINT
138
+ input.each do |cp|
139
+ m = cp if (n...m) === cp
140
+ end
141
+
142
+ # Increase delta enough to advance the decoder's <n,i> state to
143
+ # <m,0>, but guard against overflow
144
+
145
+ delta += (m - n) * (h + 1)
146
+ raise BufferOverflowError if delta > MAXINT
147
+
148
+ n = m
149
+
150
+ input.each do |cp|
151
+ # AMC-ACE-Z can use this simplified version instead
152
+ if cp < n
153
+ delta += 1
154
+ raise BufferOverflowError if delta > MAXINT
155
+ elsif cp == n
156
+ # Represent delta as a generalized variable-length integer
157
+ q = delta
158
+ k = BASE
159
+ loop do
160
+ t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
161
+ break if q < t
162
+
163
+ q, r = (q - t).divmod(BASE - t)
164
+ output << ENCODE_DIGIT[t + r, false]
165
+ k += BASE
166
+ end
167
+
168
+ output << ENCODE_DIGIT[q, false]
169
+
170
+ # Adapt the bias
171
+ delta = h == b ? delta / DAMP : delta >> 1
172
+ delta += delta / (h + 1)
173
+ bias = 0
174
+ while delta > CUTOFF
175
+ delta /= LOBASE
176
+ bias += BASE
177
+ end
178
+ bias += (LOBASE + 1) * delta / (delta + SKEW)
179
+
180
+ delta = 0
181
+ h += 1
182
+ end
183
+ end
184
+
185
+ delta += 1
186
+ n += 1
187
+ end
188
+
189
+ output
190
+ end
191
+
192
+ # Encode a hostname using IDN/Punycode algorithms
193
+ def encode_hostname(hostname)
194
+ hostname.match(RE_NONBASIC) || (return hostname)
195
+
196
+ hostname.split(DOT).map do |name|
197
+ if name.match(RE_NONBASIC)
198
+ PREFIX + encode(name)
199
+ else
200
+ name
201
+ end
202
+ end.join(DOT)
203
+ end
204
+
205
+ # Decode a +string+ encoded in Punycode
206
+ def decode(string)
207
+ # Initialize the state
208
+ n = INITIAL_N
209
+ i = 0
210
+ bias = INITIAL_BIAS
211
+
212
+ if j = string.rindex(DELIMITER)
213
+ b = string[0...j]
214
+
215
+ b.match(RE_NONBASIC) &&
216
+ raise(ArgumentError, "Illegal character is found in basic part: #{string.inspect}")
217
+
218
+ # Handle the basic code points
219
+
220
+ output = b.unpack("U*")
221
+ u = string[(j + 1)..-1]
222
+ else
223
+ output = []
224
+ u = string
225
+ end
226
+
227
+ # Main decoding loop: Start just after the last delimiter if any
228
+ # basic code points were copied; start at the beginning
229
+ # otherwise.
230
+
231
+ input = u.unpack("C*")
232
+ input_length = input.length
233
+ h = 0
234
+ out = output.length
235
+
236
+ while h < input_length
237
+ # Decode a generalized variable-length integer into delta,
238
+ # which gets added to i. The overflow checking is easier
239
+ # if we increase i as we go, then subtract off its starting
240
+ # value at the end to obtain delta.
241
+
242
+ oldi = i
243
+ w = 1
244
+ k = BASE
245
+
246
+ loop do
247
+ (digit = DECODE_DIGIT[input[h]]) ||
248
+ raise(ArgumentError, "Illegal character is found in non-basic part: #{string.inspect}")
249
+ h += 1
250
+ i += digit * w
251
+ raise BufferOverflowError if i > MAXINT
252
+
253
+ t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
254
+ break if digit < t
255
+
256
+ w *= BASE - t
257
+ raise BufferOverflowError if w > MAXINT
258
+
259
+ k += BASE
260
+ (h < input_length) || raise(ArgumentError, "Malformed input given: #{string.inspect}")
261
+ end
262
+
263
+ # Adapt the bias
264
+ delta = oldi == 0 ? i / DAMP : (i - oldi) >> 1
265
+ delta += delta / (out + 1)
266
+ bias = 0
267
+ while delta > CUTOFF
268
+ delta /= LOBASE
269
+ bias += BASE
270
+ end
271
+ bias += (LOBASE + 1) * delta / (delta + SKEW)
272
+
273
+ # i was supposed to wrap around from out+1 to 0, incrementing
274
+ # n each time, so we'll fix that now:
275
+
276
+ q, i = i.divmod(out + 1)
277
+ n += q
278
+ raise BufferOverflowError if n > MAXINT
279
+
280
+ # Insert n at position i of the output:
281
+
282
+ output[i, 0] = n
283
+
284
+ out += 1
285
+ i += 1
286
+ end
287
+ output.pack("U*")
288
+ end
289
+
290
+ # Decode a hostname using IDN/Punycode algorithms
291
+ def decode_hostname(hostname)
292
+ hostname.gsub(/(\A|#{Regexp.quote(DOT)})#{Regexp.quote(PREFIX)}([^#{Regexp.quote(DOT)}]*)/o) do
293
+ Regexp.last_match(1) << decode(Regexp.last_match(2))
294
+ end
295
+ end
296
+ end
297
+ # :nocov:
298
+ end
299
+ end
data/lib/idnx/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Idnx
4
- VERSION = "0.0.1"
4
+ VERSION = "0.1.0"
5
5
  end
data/lib/idnx/windows.rb CHANGED
@@ -45,9 +45,9 @@ module Idnx
45
45
  # LPCCH lpDefaultChar,
46
46
  # LPBOOL lpUsedDefaultChar
47
47
  # );
48
- attach_function :IdnToAscii, [:uint, :pointer, :int, :pointer, :int], :int
49
- attach_function :MultiByteToWideChar, [:uint, :uint, :string, :int, :pointer, :int], :int
50
- attach_function :WideCharToMultiByte, [:uint, :uint, :pointer, :int, :pointer, :int, :pointer, :pointer], :int
48
+ attach_function :IdnToAscii, %i[uint pointer int pointer int], :int
49
+ attach_function :MultiByteToWideChar, %i[uint uint string int pointer int], :int
50
+ attach_function :WideCharToMultiByte, %i[uint uint pointer int pointer int pointer pointer], :int
51
51
 
52
52
  module_function
53
53
 
@@ -55,6 +55,7 @@ module Idnx
55
55
  # turn to wchar
56
56
  wchar_len = MultiByteToWideChar(WIN32OLE::CP_UTF8, MB_ERR_INVALID_CHARS, hostname, -1, nil, 0)
57
57
  raise Error, "Failed to convert \"#{hostname}\" to wchar" if wchar_len.zero?
58
+
58
59
  wchar_ptr = FFI::MemoryPointer.new(:wchar_t, wchar_len)
59
60
  wchar_len = MultiByteToWideChar(WIN32OLE::CP_UTF8, 0, hostname, -1, wchar_ptr, wchar_len)
60
61
  raise Error, "Failed to convert \"#{hostname}\" to wchar" if wchar_len.zero?
@@ -64,27 +65,27 @@ module Idnx
64
65
  punycode_len = IdnToAscii(0, wchar_ptr, -1, punycode, IDN_MAX_LENGTH)
65
66
  wchar_ptr.free
66
67
 
67
- if punycode_len == 0
68
+ if punycode_len.zero?
68
69
  last_error = FFI::LastError.error
69
70
 
70
71
  # operation completed successfully, hostname is not an IDN
71
72
  # return hostname if last_error == 0
72
73
 
73
74
  message = case last_error
74
- when ERROR_INSUFFICIENT_BUFFER
75
- "The supplied buffer size was not large enough, or it was incorrectly set to NULL"
76
- when ERROR_INVALID_FLAGS
77
- "The values supplied for flags were not valid"
78
- when ERROR_INVALID_NAME
79
- "An invalid name was supplied to the function"
80
- when ERROR_INVALID_PARAMETER
81
- "Any of the parameter values was invalid"
82
- when ERROR_NO_UNICODE_TRANSLATION
83
- "An invalid Unicode was found in a string"
84
- else
85
- "Failed to convert \"#{hostname}\"; (error: #{last_error})" \
86
- "\n\nhttps://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes#system-error-codes-1"
87
- end
75
+ when ERROR_INSUFFICIENT_BUFFER
76
+ "The supplied buffer size was not large enough, or it was incorrectly set to NULL"
77
+ when ERROR_INVALID_FLAGS
78
+ "The values supplied for flags were not valid"
79
+ when ERROR_INVALID_NAME
80
+ "An invalid name was supplied to the function"
81
+ when ERROR_INVALID_PARAMETER
82
+ "Any of the parameter values was invalid"
83
+ when ERROR_NO_UNICODE_TRANSLATION
84
+ "An invalid Unicode was found in a string"
85
+ else
86
+ "Failed to convert \"#{hostname}\"; (error: #{last_error})" \
87
+ "\n\nhttps://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes#system-error-codes-1"
88
+ end
88
89
  punycode.free
89
90
  raise Error, message
90
91
  end
@@ -92,9 +93,11 @@ module Idnx
92
93
  # turn to unicode
93
94
  unicode_len = WideCharToMultiByte(WIN32OLE::CP_UTF8, 0, punycode, -1, nil, 0, nil, nil)
94
95
  raise Error, "Failed to convert \"#{hostname}\" to utf8" if unicode_len.zero?
96
+
95
97
  utf8_ptr = FFI::MemoryPointer.new(:char, unicode_len)
96
98
  unicode_len = WideCharToMultiByte(WIN32OLE::CP_UTF8, 0, punycode, -1, utf8_ptr, unicode_len, nil, nil)
97
99
  raise Error, "Failed to convert \"#{hostname}\" to utf8" if unicode_len.zero?
100
+
98
101
  unicode = utf8_ptr.read_string(utf8_ptr.size)
99
102
  unicode.strip! # remove null-byte
100
103
  end
data/lib/idnx.rb CHANGED
@@ -4,7 +4,7 @@ require_relative "idnx/version"
4
4
  require "ffi"
5
5
 
6
6
  module Idnx
7
- Error = Class.new(StandardError)
7
+ class Error < StandardError; end
8
8
 
9
9
  module_function
10
10
 
@@ -16,5 +16,11 @@ end
16
16
  if FFI::Platform.windows?
17
17
  require "idnx/windows"
18
18
  else
19
- require "idnx/idn2"
19
+ begin
20
+ require "idnx/idn2"
21
+ rescue LoadError
22
+ # fallback to pure ruby punycode 2003 implementation
23
+ require "idnx/ruby"
24
+ Idnx::Lib = Idnx::Ruby
25
+ end
20
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: idnx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - HoneyryderChuck
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-11 00:00:00.000000000 Z
11
+ date: 2021-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -39,6 +39,7 @@ files:
39
39
  - README.md
40
40
  - lib/idnx.rb
41
41
  - lib/idnx/idn2.rb
42
+ - lib/idnx/ruby.rb
42
43
  - lib/idnx/version.rb
43
44
  - lib/idnx/windows.rb
44
45
  homepage: https://github.com/honeyryderchuck/idnx