domain_name 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ class DomainName
2
+ def self.etld_data
3
+ @@etld_data ||= {
4
+ <% etld_data.each_pair { |key, value| %> <%= key.inspect %> => <%= value.inspect %>,
5
+ <% } %> }
6
+ end
7
+ end
@@ -0,0 +1,182 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # punycode.rb - PunyCode encoder for the Domain Name library
4
+ #
5
+ # Copyright (C) 2011 Akinori MUSHA, All rights reserved.
6
+ #
7
+ # Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN
8
+ # Library.
9
+ #
10
+ # Copyright (C) 2000-2002 Verisign Inc., All rights reserved.
11
+ #
12
+ # Redistribution and use in source and binary forms, with or
13
+ # without modification, are permitted provided that the following
14
+ # conditions are met:
15
+ #
16
+ # 1) Redistributions of source code must retain the above copyright
17
+ # notice, this list of conditions and the following disclaimer.
18
+ #
19
+ # 2) Redistributions in binary form must reproduce the above copyright
20
+ # notice, this list of conditions and the following disclaimer in
21
+ # the documentation and/or other materials provided with the
22
+ # distribution.
23
+ #
24
+ # 3) Neither the name of the VeriSign Inc. nor the names of its
25
+ # contributors may be used to endorse or promote products derived
26
+ # from this software without specific prior written permission.
27
+ #
28
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
31
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
32
+ # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
33
+ # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
34
+ # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
35
+ # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
36
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
38
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39
+ # POSSIBILITY OF SUCH DAMAGE.
40
+ #
41
+ # This software is licensed under the BSD open source license. For more
42
+ # information visit www.opensource.org.
43
+ #
44
+ # Authors:
45
+ # John Colosi (VeriSign)
46
+ # Srikanth Veeramachaneni (VeriSign)
47
+ # Nagesh Chigurupati (Verisign)
48
+ # Praveen Srinivasan(Verisign)
49
+
50
+ class DomainName
51
+ module Punycode
52
+ BASE = 36
53
+ TMIN = 1
54
+ TMAX = 26
55
+ SKEW = 38
56
+ DAMP = 700
57
+ INITIAL_BIAS = 72
58
+ INITIAL_N = 0x80
59
+ DELIMITER = '-'
60
+
61
+ # The maximum value of an DWORD variable
62
+ MAXINT = (1 << 64) - 1
63
+
64
+ # Used in the calculation of bias:
65
+ LOBASE = BASE - TMIN
66
+
67
+ # Used in the calculation of bias:
68
+ CUTOFF = LOBASE * TMAX / 2
69
+
70
+ class Error < StandardError; end
71
+ class BufferOverflowError < Error; end
72
+
73
+ # Returns the basic code point whose value (when used for
74
+ # representing integers) is d, which must be in the range 0 to
75
+ # BASE-1. The lowercase form is used unless flag is true, in
76
+ # which case the uppercase form is used. The behavior is
77
+ # undefined if flag is nonzero and digit d has no uppercase form.
78
+ def encode_digit(d, flag)
79
+ (d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
80
+ # 0..25 map to ASCII a..z or A..Z
81
+ # 26..35 map to ASCII 0..9
82
+ end
83
+ module_function :encode_digit
84
+
85
+ # Main encode function
86
+ def encode(string)
87
+ input = string.unpack('U*')
88
+ output = ''
89
+
90
+ # Initialize the state
91
+ n = INITIAL_N
92
+ delta = 0
93
+ bias = INITIAL_BIAS;
94
+
95
+ # Handle the basic code points
96
+ input.each { |cp| output << cp.chr if cp < 0x80 }
97
+
98
+ h = b = output.length
99
+
100
+ # h is the number of code points that have been handled, b is the
101
+ # number of basic code points, and out is the number of characters
102
+ # that have been output.
103
+
104
+ output << DELIMITER if b > 0
105
+
106
+ # Main encoding loop
107
+
108
+ while h < input.length
109
+ # All non-basic code points < n have been handled already. Find
110
+ # the next larger one
111
+
112
+ m = MAXINT
113
+ input.each { |cp|
114
+ m = cp if (n...m) === cp
115
+ }
116
+
117
+ # Increase delta enough to advance the decoder's <n,i> state to
118
+ # <m,0>, but guard against overflow
119
+
120
+ if m - n > (MAXINT - delta) / (h + 1)
121
+ raise BufferOverflowError
122
+ end
123
+ delta += (m - n) * (h + 1)
124
+ n = m
125
+
126
+ input.each { |cp|
127
+ # AMC-ACE-Z can use this simplified version instead
128
+ if cp < n && (delta += 1) == 0
129
+ raise BufferOverflowError
130
+ end
131
+
132
+ if cp == n
133
+ # Represent delta as a generalized variable-length integer
134
+ q = delta
135
+ k = BASE
136
+ loop {
137
+ t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias;
138
+ break if q < t
139
+ output << encode_digit(t + (q - t) % (BASE - t), false)
140
+ q = (q - t) / (BASE - t)
141
+ k += BASE
142
+ }
143
+
144
+ output << encode_digit(q, false)
145
+
146
+ # Adapt the bias
147
+ delta = h == b ? delta / DAMP : delta >> 1
148
+ delta += delta / (h + 1)
149
+ bias = 0
150
+ while delta > CUTOFF
151
+ delta /= LOBASE
152
+ bias += BASE
153
+ end
154
+ bias += (LOBASE + 1) * delta / (delta + SKEW)
155
+
156
+ delta = 0
157
+ h += 1
158
+ end
159
+ }
160
+
161
+ delta += 1
162
+ n += 1
163
+ end
164
+
165
+ output
166
+ end
167
+ module_function :encode
168
+
169
+ def encode_hostname(hostname)
170
+ hostname.match(/[^\x00-\x7f]/) or return hostname
171
+
172
+ hostname.split('.').map { |name|
173
+ if name.match(/[^\x00-\x7f]/)
174
+ 'xn--' << encode(name)
175
+ else
176
+ name
177
+ end
178
+ }.join('.')
179
+ end
180
+ module_function :encode_hostname
181
+ end
182
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'domain_name'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,96 @@
1
+ require 'helper'
2
+
3
+ class TestDomainName < Test::Unit::TestCase
4
+ should "encode labels just as listed in RFC 3492 #7.1 (slightly modified)" do
5
+ [
6
+ ['(A) Arabic (Egyptian)',
7
+ [0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
8
+ 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F],
9
+ 'egbpdaj6bu4bxfgehfvwxn'],
10
+ ['(B) Chinese (simplified)',
11
+ [0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587],
12
+ 'ihqwcrb4cv8a8dqg056pqjye'],
13
+ ['(C) Chinese (traditional)',
14
+ [0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587],
15
+ 'ihqwctvzc91f659drss3x8bo0yb'],
16
+ ['(D) Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky',
17
+ [0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
18
+ 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
19
+ 0x0065, 0x0073, 0x006B, 0x0079],
20
+ 'Proprostnemluvesky-uyb24dma41a'],
21
+ ['(E) Hebrew',
22
+ [0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
23
+ 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
24
+ 0x05D1, 0x05E8, 0x05D9, 0x05EA],
25
+ '4dbcagdahymbxekheh6e0a7fei0b'],
26
+ ['(F) Hindi (Devanagari)',
27
+ [0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
28
+ 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
29
+ 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
30
+ 0x0939, 0x0948, 0x0902],
31
+ 'i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd'],
32
+ ['(G) Japanese (kanji and hiragana)',
33
+ [0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
34
+ 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B],
35
+ 'n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa'],
36
+ ['(H) Korean (Hangul syllables)',
37
+ [0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
38
+ 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
39
+ 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C],
40
+ '989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j' <<
41
+ 'psd879ccm6fea98c'],
42
+ ['(I) Russian (Cyrillic)',
43
+ [0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
44
+ 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
45
+ 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
46
+ 0x0438],
47
+ 'b1abfaaepdrnnbgefbadotcwatmq2g4l'],
48
+ ['(J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol',
49
+ [0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
50
+ 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
51
+ 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
52
+ 0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
53
+ 0x0061, 0x00F1, 0x006F, 0x006C],
54
+ 'PorqunopuedensimplementehablarenEspaol-fmd56a'],
55
+ ['(K) Vietnamese: T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch' <<
56
+ '<ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t',
57
+ [0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
58
+ 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
59
+ 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
60
+ 0x0056, 0x0069, 0x1EC7, 0x0074],
61
+ 'TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g'],
62
+ ['(L) 3<nen>B<gumi><kinpachi><sensei>',
63
+ [0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F],
64
+ '3B-ww4c5e180e575a65lsy2b'],
65
+ ['(M) <amuro><namie>-with-SUPER-MONKEYS',
66
+ [0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
67
+ 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
68
+ 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053],
69
+ '-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n'],
70
+ ['(N) Hello-Another-Way-<sorezore><no><basho>',
71
+ [0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
72
+ 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
73
+ 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240],
74
+ 'Hello-Another-Way--fc4qua05auwb3674vfr0b'],
75
+ ['(O) <hitotsu><yane><no><shita>2',
76
+ [0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032],
77
+ '2-u9tlzr9756bt3uc0v'],
78
+ ['(P) Maji<de>Koi<suru>5<byou><mae>',
79
+ [0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
80
+ 0x308B, 0x0035, 0x79D2, 0x524D],
81
+ 'MajiKoi5-783gue6qz075azm5e'],
82
+ ['(Q) <pafii>de<runba>',
83
+ [0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0],
84
+ 'de-jg4avhby1noc0d'],
85
+ ['(R) <sono><supiido><de>',
86
+ [0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067],
87
+ 'd9juau41awczczp'],
88
+ ['(S) -> $1.00 <-',
89
+ [0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, 0x0020,
90
+ 0x003C, 0x002D],
91
+ '-> $1.00 <--']
92
+ ].each { |title, cps, punycode|
93
+ assert_equal punycode, DomainName::Punycode.encode(cps.pack('U*')), title
94
+ }
95
+ end
96
+ end
@@ -0,0 +1,89 @@
1
+ require 'helper'
2
+
3
+ class TestDomainName < Test::Unit::TestCase
4
+ should "raise ArgumentError if hostname starts with a dot" do
5
+ [
6
+ # Leading dot.
7
+ '.com',
8
+ '.example',
9
+ '.example.com',
10
+ '.example.example',
11
+ ].each { |hostname|
12
+ assert_raises(ArgumentError) { DomainName.new(hostname) }
13
+ }
14
+ end
15
+
16
+ should "parse canonical domain names correctly" do
17
+ [
18
+ # Mixed case.
19
+ ['COM', nil],
20
+ ['example.COM', 'example.com'],
21
+ ['WwW.example.COM', 'example.com'],
22
+ # Unlisted TLD.
23
+ ['example', 'example'],
24
+ ['example.example', 'example.example'],
25
+ ['b.example.example', 'example.example'],
26
+ ['a.b.example.example', 'example.example'],
27
+ # Listed, but non-Internet, TLD.
28
+ ['local', 'local'],
29
+ ['example.local', 'example.local'],
30
+ ['b.example.local', 'example.local'],
31
+ ['a.b.example.local', 'example.local'],
32
+ # TLD with only 1 rule.
33
+ ['biz', nil],
34
+ ['domain.biz', 'domain.biz'],
35
+ ['b.domain.biz', 'domain.biz'],
36
+ ['a.b.domain.biz', 'domain.biz'],
37
+ # TLD with some 2-level rules.
38
+ ['com', nil],
39
+ ['example.com', 'example.com'],
40
+ ['b.example.com', 'example.com'],
41
+ ['a.b.example.com', 'example.com'],
42
+ ['uk.com', nil],
43
+ ['example.uk.com', 'example.uk.com'],
44
+ ['b.example.uk.com', 'example.uk.com'],
45
+ ['a.b.example.uk.com', 'example.uk.com'],
46
+ ['test.ac', 'test.ac'],
47
+ # TLD with only 1 (wildcard) rule.
48
+ ['cy', nil],
49
+ ['c.cy', nil],
50
+ ['b.c.cy', 'b.c.cy'],
51
+ ['a.b.c.cy', 'b.c.cy'],
52
+ # More complex TLD.
53
+ ['jp', nil],
54
+ ['test.jp', 'test.jp'],
55
+ ['www.test.jp', 'test.jp'],
56
+ ['ac.jp', nil],
57
+ ['test.ac.jp', 'test.ac.jp'],
58
+ ['www.test.ac.jp', 'test.ac.jp'],
59
+ ['kyoto.jp', nil],
60
+ ['c.kyoto.jp', nil],
61
+ ['b.c.kyoto.jp', 'b.c.kyoto.jp'],
62
+ ['a.b.c.kyoto.jp', 'b.c.kyoto.jp'],
63
+ ['pref.kyoto.jp', 'pref.kyoto.jp'], # Exception rule
64
+ ['www.pref.kyoto.jp', 'pref.kyoto.jp'], # Exception rule.
65
+ ['city.kyoto.jp', 'city.kyoto.jp'], # Exception rule.
66
+ ['www.city.kyoto.jp', 'city.kyoto.jp'], # Exception rule.
67
+ # TLD with a wildcard rule and exceptions.
68
+ ['om', nil],
69
+ ['test.om', nil],
70
+ ['b.test.om', 'b.test.om'],
71
+ ['a.b.test.om', 'b.test.om'],
72
+ ['songfest.om', 'songfest.om'],
73
+ ['www.songfest.om', 'songfest.om'],
74
+ # US K12.
75
+ ['us', nil],
76
+ ['test.us', 'test.us'],
77
+ ['www.test.us', 'test.us'],
78
+ ['ak.us', nil],
79
+ ['test.ak.us', 'test.ak.us'],
80
+ ['www.test.ak.us', 'test.ak.us'],
81
+ ['k12.ak.us', nil],
82
+ ['test.k12.ak.us', 'test.k12.ak.us'],
83
+ ['www.test.k12.ak.us', 'test.k12.ak.us'],
84
+ ].each { |hostname, domain|
85
+ dn = DomainName.new(hostname)
86
+ assert_equal(domain, dn.domain)
87
+ }
88
+ end
89
+ end
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ $basedir = Pathname.new(__FILE__).dirname.parent
6
+ $LOAD_PATH.unshift $basedir + 'lib'
7
+ require 'domain_name'
8
+ require 'set'
9
+ require 'erb'
10
+
11
+ def main
12
+ dat_file = $basedir + 'data' + 'effective_tld_names.dat'
13
+ dir = $basedir + 'lib' + 'domain_name'
14
+ erb_file = dir + 'etld_data.rb.erb'
15
+ rb_file = dir + 'etld_data.rb'
16
+
17
+ File.open(dat_file, 'r:utf-8') { |dat|
18
+ etld_data = parse(dat)
19
+ File.open(rb_file, 'w:utf-8') { |rb|
20
+ File.open(erb_file, 'r:utf-8') { |erb|
21
+ rb.print ERB.new(erb.read).result(binding)
22
+ }
23
+ }
24
+ }
25
+ end
26
+
27
+ def normalize_hostname(domain)
28
+ DomainName.normalize(domain)
29
+ end
30
+
31
+ def parse(f)
32
+ {}.tap { |table|
33
+ tlds = Set[]
34
+ f.each_line { |line|
35
+ line.sub!(%r{//.*}, '')
36
+ line.strip!
37
+ next if line.empty?
38
+ case line
39
+ when /^local$/
40
+ # ignore .local
41
+ next
42
+ when /^([^!*]+)$/
43
+ domain = normalize_hostname($1)
44
+ value = 0
45
+ when /^\*\.([^!*]+)$/
46
+ domain = normalize_hostname($1)
47
+ value = -1
48
+ when /^\!([^!*]+)$/
49
+ domain = normalize_hostname($1)
50
+ value = 1
51
+ else
52
+ raise "syntax error: #{line}"
53
+ end
54
+ tld = domain.match(/(?:^|\.)([^.]+)$/)[1]
55
+ table[tld] ||= 1
56
+ table[domain] = value
57
+ }
58
+ }
59
+ end
60
+
61
+ main()