domain_name 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +15 -0
- data/LICENSE.txt +107 -0
- data/README.md +66 -0
- data/Rakefile +77 -0
- data/VERSION +1 -0
- data/data/effective_tld_names.dat +5189 -0
- data/domain_name.gemspec +67 -0
- data/lib/domain_name.rb +175 -0
- data/lib/domain_name/etld_data.rb +4299 -0
- data/lib/domain_name/etld_data.rb.erb +7 -0
- data/lib/domain_name/punycode.rb +182 -0
- data/test/helper.rb +18 -0
- data/test/test_domain_name-punycode.rb +96 -0
- data/test/test_domain_name.rb +89 -0
- data/tool/gen_etld_data.rb +61 -0
- metadata +161 -0
@@ -0,0 +1,182 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# punycode.rb - PunyCode encoder for the Domain Name library
|
4
|
+
#
|
5
|
+
# Copyright (C) 2011 Akinori MUSHA, All rights reserved.
|
6
|
+
#
|
7
|
+
# Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN
|
8
|
+
# Library.
|
9
|
+
#
|
10
|
+
# Copyright (C) 2000-2002 Verisign Inc., All rights reserved.
|
11
|
+
#
|
12
|
+
# Redistribution and use in source and binary forms, with or
|
13
|
+
# without modification, are permitted provided that the following
|
14
|
+
# conditions are met:
|
15
|
+
#
|
16
|
+
# 1) Redistributions of source code must retain the above copyright
|
17
|
+
# notice, this list of conditions and the following disclaimer.
|
18
|
+
#
|
19
|
+
# 2) Redistributions in binary form must reproduce the above copyright
|
20
|
+
# notice, this list of conditions and the following disclaimer in
|
21
|
+
# the documentation and/or other materials provided with the
|
22
|
+
# distribution.
|
23
|
+
#
|
24
|
+
# 3) Neither the name of the VeriSign Inc. nor the names of its
|
25
|
+
# contributors may be used to endorse or promote products derived
|
26
|
+
# from this software without specific prior written permission.
|
27
|
+
#
|
28
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
29
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
30
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
31
|
+
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
32
|
+
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
33
|
+
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
34
|
+
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
35
|
+
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
36
|
+
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
37
|
+
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
38
|
+
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
39
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
40
|
+
#
|
41
|
+
# This software is licensed under the BSD open source license. For more
|
42
|
+
# information visit www.opensource.org.
|
43
|
+
#
|
44
|
+
# Authors:
|
45
|
+
# John Colosi (VeriSign)
|
46
|
+
# Srikanth Veeramachaneni (VeriSign)
|
47
|
+
# Nagesh Chigurupati (Verisign)
|
48
|
+
# Praveen Srinivasan(Verisign)
|
49
|
+
|
50
|
+
class DomainName
|
51
|
+
module Punycode
|
52
|
+
BASE = 36
|
53
|
+
TMIN = 1
|
54
|
+
TMAX = 26
|
55
|
+
SKEW = 38
|
56
|
+
DAMP = 700
|
57
|
+
INITIAL_BIAS = 72
|
58
|
+
INITIAL_N = 0x80
|
59
|
+
DELIMITER = '-'
|
60
|
+
|
61
|
+
# The maximum value of an DWORD variable
|
62
|
+
MAXINT = (1 << 64) - 1
|
63
|
+
|
64
|
+
# Used in the calculation of bias:
|
65
|
+
LOBASE = BASE - TMIN
|
66
|
+
|
67
|
+
# Used in the calculation of bias:
|
68
|
+
CUTOFF = LOBASE * TMAX / 2
|
69
|
+
|
70
|
+
class Error < StandardError; end
|
71
|
+
class BufferOverflowError < Error; end
|
72
|
+
|
73
|
+
# Returns the basic code point whose value (when used for
|
74
|
+
# representing integers) is d, which must be in the range 0 to
|
75
|
+
# BASE-1. The lowercase form is used unless flag is true, in
|
76
|
+
# which case the uppercase form is used. The behavior is
|
77
|
+
# undefined if flag is nonzero and digit d has no uppercase form.
|
78
|
+
def encode_digit(d, flag)
|
79
|
+
(d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
|
80
|
+
# 0..25 map to ASCII a..z or A..Z
|
81
|
+
# 26..35 map to ASCII 0..9
|
82
|
+
end
|
83
|
+
module_function :encode_digit
|
84
|
+
|
85
|
+
# Main encode function
|
86
|
+
def encode(string)
|
87
|
+
input = string.unpack('U*')
|
88
|
+
output = ''
|
89
|
+
|
90
|
+
# Initialize the state
|
91
|
+
n = INITIAL_N
|
92
|
+
delta = 0
|
93
|
+
bias = INITIAL_BIAS;
|
94
|
+
|
95
|
+
# Handle the basic code points
|
96
|
+
input.each { |cp| output << cp.chr if cp < 0x80 }
|
97
|
+
|
98
|
+
h = b = output.length
|
99
|
+
|
100
|
+
# h is the number of code points that have been handled, b is the
|
101
|
+
# number of basic code points, and out is the number of characters
|
102
|
+
# that have been output.
|
103
|
+
|
104
|
+
output << DELIMITER if b > 0
|
105
|
+
|
106
|
+
# Main encoding loop
|
107
|
+
|
108
|
+
while h < input.length
|
109
|
+
# All non-basic code points < n have been handled already. Find
|
110
|
+
# the next larger one
|
111
|
+
|
112
|
+
m = MAXINT
|
113
|
+
input.each { |cp|
|
114
|
+
m = cp if (n...m) === cp
|
115
|
+
}
|
116
|
+
|
117
|
+
# Increase delta enough to advance the decoder's <n,i> state to
|
118
|
+
# <m,0>, but guard against overflow
|
119
|
+
|
120
|
+
if m - n > (MAXINT - delta) / (h + 1)
|
121
|
+
raise BufferOverflowError
|
122
|
+
end
|
123
|
+
delta += (m - n) * (h + 1)
|
124
|
+
n = m
|
125
|
+
|
126
|
+
input.each { |cp|
|
127
|
+
# AMC-ACE-Z can use this simplified version instead
|
128
|
+
if cp < n && (delta += 1) == 0
|
129
|
+
raise BufferOverflowError
|
130
|
+
end
|
131
|
+
|
132
|
+
if cp == n
|
133
|
+
# Represent delta as a generalized variable-length integer
|
134
|
+
q = delta
|
135
|
+
k = BASE
|
136
|
+
loop {
|
137
|
+
t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias;
|
138
|
+
break if q < t
|
139
|
+
output << encode_digit(t + (q - t) % (BASE - t), false)
|
140
|
+
q = (q - t) / (BASE - t)
|
141
|
+
k += BASE
|
142
|
+
}
|
143
|
+
|
144
|
+
output << encode_digit(q, false)
|
145
|
+
|
146
|
+
# Adapt the bias
|
147
|
+
delta = h == b ? delta / DAMP : delta >> 1
|
148
|
+
delta += delta / (h + 1)
|
149
|
+
bias = 0
|
150
|
+
while delta > CUTOFF
|
151
|
+
delta /= LOBASE
|
152
|
+
bias += BASE
|
153
|
+
end
|
154
|
+
bias += (LOBASE + 1) * delta / (delta + SKEW)
|
155
|
+
|
156
|
+
delta = 0
|
157
|
+
h += 1
|
158
|
+
end
|
159
|
+
}
|
160
|
+
|
161
|
+
delta += 1
|
162
|
+
n += 1
|
163
|
+
end
|
164
|
+
|
165
|
+
output
|
166
|
+
end
|
167
|
+
module_function :encode
|
168
|
+
|
169
|
+
def encode_hostname(hostname)
|
170
|
+
hostname.match(/[^\x00-\x7f]/) or return hostname
|
171
|
+
|
172
|
+
hostname.split('.').map { |name|
|
173
|
+
if name.match(/[^\x00-\x7f]/)
|
174
|
+
'xn--' << encode(name)
|
175
|
+
else
|
176
|
+
name
|
177
|
+
end
|
178
|
+
}.join('.')
|
179
|
+
end
|
180
|
+
module_function :encode_hostname
|
181
|
+
end
|
182
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'domain_name'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestDomainName < Test::Unit::TestCase
|
4
|
+
should "encode labels just as listed in RFC 3492 #7.1 (slightly modified)" do
|
5
|
+
[
|
6
|
+
['(A) Arabic (Egyptian)',
|
7
|
+
[0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
|
8
|
+
0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F],
|
9
|
+
'egbpdaj6bu4bxfgehfvwxn'],
|
10
|
+
['(B) Chinese (simplified)',
|
11
|
+
[0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587],
|
12
|
+
'ihqwcrb4cv8a8dqg056pqjye'],
|
13
|
+
['(C) Chinese (traditional)',
|
14
|
+
[0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587],
|
15
|
+
'ihqwctvzc91f659drss3x8bo0yb'],
|
16
|
+
['(D) Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky',
|
17
|
+
[0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
|
18
|
+
0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
|
19
|
+
0x0065, 0x0073, 0x006B, 0x0079],
|
20
|
+
'Proprostnemluvesky-uyb24dma41a'],
|
21
|
+
['(E) Hebrew',
|
22
|
+
[0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
|
23
|
+
0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
|
24
|
+
0x05D1, 0x05E8, 0x05D9, 0x05EA],
|
25
|
+
'4dbcagdahymbxekheh6e0a7fei0b'],
|
26
|
+
['(F) Hindi (Devanagari)',
|
27
|
+
[0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
|
28
|
+
0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
|
29
|
+
0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
|
30
|
+
0x0939, 0x0948, 0x0902],
|
31
|
+
'i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd'],
|
32
|
+
['(G) Japanese (kanji and hiragana)',
|
33
|
+
[0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
|
34
|
+
0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B],
|
35
|
+
'n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa'],
|
36
|
+
['(H) Korean (Hangul syllables)',
|
37
|
+
[0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
38
|
+
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
|
39
|
+
0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C],
|
40
|
+
'989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j' <<
|
41
|
+
'psd879ccm6fea98c'],
|
42
|
+
['(I) Russian (Cyrillic)',
|
43
|
+
[0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
|
44
|
+
0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
|
45
|
+
0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
|
46
|
+
0x0438],
|
47
|
+
'b1abfaaepdrnnbgefbadotcwatmq2g4l'],
|
48
|
+
['(J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol',
|
49
|
+
[0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
|
50
|
+
0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
|
51
|
+
0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
|
52
|
+
0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
|
53
|
+
0x0061, 0x00F1, 0x006F, 0x006C],
|
54
|
+
'PorqunopuedensimplementehablarenEspaol-fmd56a'],
|
55
|
+
['(K) Vietnamese: T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch' <<
|
56
|
+
'<ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t',
|
57
|
+
[0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
|
58
|
+
0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
|
59
|
+
0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
|
60
|
+
0x0056, 0x0069, 0x1EC7, 0x0074],
|
61
|
+
'TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g'],
|
62
|
+
['(L) 3<nen>B<gumi><kinpachi><sensei>',
|
63
|
+
[0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F],
|
64
|
+
'3B-ww4c5e180e575a65lsy2b'],
|
65
|
+
['(M) <amuro><namie>-with-SUPER-MONKEYS',
|
66
|
+
[0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
|
67
|
+
0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
|
68
|
+
0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053],
|
69
|
+
'-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n'],
|
70
|
+
['(N) Hello-Another-Way-<sorezore><no><basho>',
|
71
|
+
[0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
|
72
|
+
0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
|
73
|
+
0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240],
|
74
|
+
'Hello-Another-Way--fc4qua05auwb3674vfr0b'],
|
75
|
+
['(O) <hitotsu><yane><no><shita>2',
|
76
|
+
[0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032],
|
77
|
+
'2-u9tlzr9756bt3uc0v'],
|
78
|
+
['(P) Maji<de>Koi<suru>5<byou><mae>',
|
79
|
+
[0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
|
80
|
+
0x308B, 0x0035, 0x79D2, 0x524D],
|
81
|
+
'MajiKoi5-783gue6qz075azm5e'],
|
82
|
+
['(Q) <pafii>de<runba>',
|
83
|
+
[0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0],
|
84
|
+
'de-jg4avhby1noc0d'],
|
85
|
+
['(R) <sono><supiido><de>',
|
86
|
+
[0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067],
|
87
|
+
'd9juau41awczczp'],
|
88
|
+
['(S) -> $1.00 <-',
|
89
|
+
[0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, 0x0020,
|
90
|
+
0x003C, 0x002D],
|
91
|
+
'-> $1.00 <--']
|
92
|
+
].each { |title, cps, punycode|
|
93
|
+
assert_equal punycode, DomainName::Punycode.encode(cps.pack('U*')), title
|
94
|
+
}
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestDomainName < Test::Unit::TestCase
|
4
|
+
should "raise ArgumentError if hostname starts with a dot" do
|
5
|
+
[
|
6
|
+
# Leading dot.
|
7
|
+
'.com',
|
8
|
+
'.example',
|
9
|
+
'.example.com',
|
10
|
+
'.example.example',
|
11
|
+
].each { |hostname|
|
12
|
+
assert_raises(ArgumentError) { DomainName.new(hostname) }
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
should "parse canonical domain names correctly" do
|
17
|
+
[
|
18
|
+
# Mixed case.
|
19
|
+
['COM', nil],
|
20
|
+
['example.COM', 'example.com'],
|
21
|
+
['WwW.example.COM', 'example.com'],
|
22
|
+
# Unlisted TLD.
|
23
|
+
['example', 'example'],
|
24
|
+
['example.example', 'example.example'],
|
25
|
+
['b.example.example', 'example.example'],
|
26
|
+
['a.b.example.example', 'example.example'],
|
27
|
+
# Listed, but non-Internet, TLD.
|
28
|
+
['local', 'local'],
|
29
|
+
['example.local', 'example.local'],
|
30
|
+
['b.example.local', 'example.local'],
|
31
|
+
['a.b.example.local', 'example.local'],
|
32
|
+
# TLD with only 1 rule.
|
33
|
+
['biz', nil],
|
34
|
+
['domain.biz', 'domain.biz'],
|
35
|
+
['b.domain.biz', 'domain.biz'],
|
36
|
+
['a.b.domain.biz', 'domain.biz'],
|
37
|
+
# TLD with some 2-level rules.
|
38
|
+
['com', nil],
|
39
|
+
['example.com', 'example.com'],
|
40
|
+
['b.example.com', 'example.com'],
|
41
|
+
['a.b.example.com', 'example.com'],
|
42
|
+
['uk.com', nil],
|
43
|
+
['example.uk.com', 'example.uk.com'],
|
44
|
+
['b.example.uk.com', 'example.uk.com'],
|
45
|
+
['a.b.example.uk.com', 'example.uk.com'],
|
46
|
+
['test.ac', 'test.ac'],
|
47
|
+
# TLD with only 1 (wildcard) rule.
|
48
|
+
['cy', nil],
|
49
|
+
['c.cy', nil],
|
50
|
+
['b.c.cy', 'b.c.cy'],
|
51
|
+
['a.b.c.cy', 'b.c.cy'],
|
52
|
+
# More complex TLD.
|
53
|
+
['jp', nil],
|
54
|
+
['test.jp', 'test.jp'],
|
55
|
+
['www.test.jp', 'test.jp'],
|
56
|
+
['ac.jp', nil],
|
57
|
+
['test.ac.jp', 'test.ac.jp'],
|
58
|
+
['www.test.ac.jp', 'test.ac.jp'],
|
59
|
+
['kyoto.jp', nil],
|
60
|
+
['c.kyoto.jp', nil],
|
61
|
+
['b.c.kyoto.jp', 'b.c.kyoto.jp'],
|
62
|
+
['a.b.c.kyoto.jp', 'b.c.kyoto.jp'],
|
63
|
+
['pref.kyoto.jp', 'pref.kyoto.jp'], # Exception rule
|
64
|
+
['www.pref.kyoto.jp', 'pref.kyoto.jp'], # Exception rule.
|
65
|
+
['city.kyoto.jp', 'city.kyoto.jp'], # Exception rule.
|
66
|
+
['www.city.kyoto.jp', 'city.kyoto.jp'], # Exception rule.
|
67
|
+
# TLD with a wildcard rule and exceptions.
|
68
|
+
['om', nil],
|
69
|
+
['test.om', nil],
|
70
|
+
['b.test.om', 'b.test.om'],
|
71
|
+
['a.b.test.om', 'b.test.om'],
|
72
|
+
['songfest.om', 'songfest.om'],
|
73
|
+
['www.songfest.om', 'songfest.om'],
|
74
|
+
# US K12.
|
75
|
+
['us', nil],
|
76
|
+
['test.us', 'test.us'],
|
77
|
+
['www.test.us', 'test.us'],
|
78
|
+
['ak.us', nil],
|
79
|
+
['test.ak.us', 'test.ak.us'],
|
80
|
+
['www.test.ak.us', 'test.ak.us'],
|
81
|
+
['k12.ak.us', nil],
|
82
|
+
['test.k12.ak.us', 'test.k12.ak.us'],
|
83
|
+
['www.test.k12.ak.us', 'test.k12.ak.us'],
|
84
|
+
].each { |hostname, domain|
|
85
|
+
dn = DomainName.new(hostname)
|
86
|
+
assert_equal(domain, dn.domain)
|
87
|
+
}
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'pathname'
|
5
|
+
$basedir = Pathname.new(__FILE__).dirname.parent
|
6
|
+
$LOAD_PATH.unshift $basedir + 'lib'
|
7
|
+
require 'domain_name'
|
8
|
+
require 'set'
|
9
|
+
require 'erb'
|
10
|
+
|
11
|
+
def main
|
12
|
+
dat_file = $basedir + 'data' + 'effective_tld_names.dat'
|
13
|
+
dir = $basedir + 'lib' + 'domain_name'
|
14
|
+
erb_file = dir + 'etld_data.rb.erb'
|
15
|
+
rb_file = dir + 'etld_data.rb'
|
16
|
+
|
17
|
+
File.open(dat_file, 'r:utf-8') { |dat|
|
18
|
+
etld_data = parse(dat)
|
19
|
+
File.open(rb_file, 'w:utf-8') { |rb|
|
20
|
+
File.open(erb_file, 'r:utf-8') { |erb|
|
21
|
+
rb.print ERB.new(erb.read).result(binding)
|
22
|
+
}
|
23
|
+
}
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
def normalize_hostname(domain)
|
28
|
+
DomainName.normalize(domain)
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse(f)
|
32
|
+
{}.tap { |table|
|
33
|
+
tlds = Set[]
|
34
|
+
f.each_line { |line|
|
35
|
+
line.sub!(%r{//.*}, '')
|
36
|
+
line.strip!
|
37
|
+
next if line.empty?
|
38
|
+
case line
|
39
|
+
when /^local$/
|
40
|
+
# ignore .local
|
41
|
+
next
|
42
|
+
when /^([^!*]+)$/
|
43
|
+
domain = normalize_hostname($1)
|
44
|
+
value = 0
|
45
|
+
when /^\*\.([^!*]+)$/
|
46
|
+
domain = normalize_hostname($1)
|
47
|
+
value = -1
|
48
|
+
when /^\!([^!*]+)$/
|
49
|
+
domain = normalize_hostname($1)
|
50
|
+
value = 1
|
51
|
+
else
|
52
|
+
raise "syntax error: #{line}"
|
53
|
+
end
|
54
|
+
tld = domain.match(/(?:^|\.)([^.]+)$/)[1]
|
55
|
+
table[tld] ||= 1
|
56
|
+
table[domain] = value
|
57
|
+
}
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
main()
|