domain_name 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ class DomainName
2
+ def self.etld_data
3
+ @@etld_data ||= {
4
+ <% etld_data.each_pair { |key, value| %> <%= key.inspect %> => <%= value.inspect %>,
5
+ <% } %> }
6
+ end
7
+ end
@@ -0,0 +1,182 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # punycode.rb - PunyCode encoder for the Domain Name library
4
+ #
5
+ # Copyright (C) 2011 Akinori MUSHA, All rights reserved.
6
+ #
7
+ # Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN
8
+ # Library.
9
+ #
10
+ # Copyright (C) 2000-2002 Verisign Inc., All rights reserved.
11
+ #
12
+ # Redistribution and use in source and binary forms, with or
13
+ # without modification, are permitted provided that the following
14
+ # conditions are met:
15
+ #
16
+ # 1) Redistributions of source code must retain the above copyright
17
+ # notice, this list of conditions and the following disclaimer.
18
+ #
19
+ # 2) Redistributions in binary form must reproduce the above copyright
20
+ # notice, this list of conditions and the following disclaimer in
21
+ # the documentation and/or other materials provided with the
22
+ # distribution.
23
+ #
24
+ # 3) Neither the name of the VeriSign Inc. nor the names of its
25
+ # contributors may be used to endorse or promote products derived
26
+ # from this software without specific prior written permission.
27
+ #
28
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
31
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
32
+ # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
33
+ # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
34
+ # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
35
+ # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
36
+ # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
38
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39
+ # POSSIBILITY OF SUCH DAMAGE.
40
+ #
41
+ # This software is licensed under the BSD open source license. For more
42
+ # information visit www.opensource.org.
43
+ #
44
+ # Authors:
45
+ # John Colosi (VeriSign)
46
+ # Srikanth Veeramachaneni (VeriSign)
47
+ # Nagesh Chigurupati (Verisign)
48
+ # Praveen Srinivasan(Verisign)
49
+
50
+ class DomainName
51
+ module Punycode
52
+ BASE = 36
53
+ TMIN = 1
54
+ TMAX = 26
55
+ SKEW = 38
56
+ DAMP = 700
57
+ INITIAL_BIAS = 72
58
+ INITIAL_N = 0x80
59
+ DELIMITER = '-'
60
+
61
+ # The maximum value of an DWORD variable
62
+ MAXINT = (1 << 64) - 1
63
+
64
+ # Used in the calculation of bias:
65
+ LOBASE = BASE - TMIN
66
+
67
+ # Used in the calculation of bias:
68
+ CUTOFF = LOBASE * TMAX / 2
69
+
70
+ class Error < StandardError; end
71
+ class BufferOverflowError < Error; end
72
+
73
+ # Returns the basic code point whose value (when used for
74
+ # representing integers) is d, which must be in the range 0 to
75
+ # BASE-1. The lowercase form is used unless flag is true, in
76
+ # which case the uppercase form is used. The behavior is
77
+ # undefined if flag is nonzero and digit d has no uppercase form.
78
+ def encode_digit(d, flag)
79
+ (d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
80
+ # 0..25 map to ASCII a..z or A..Z
81
+ # 26..35 map to ASCII 0..9
82
+ end
83
+ module_function :encode_digit
84
+
85
+ # Main encode function
86
+ def encode(string)
87
+ input = string.unpack('U*')
88
+ output = ''
89
+
90
+ # Initialize the state
91
+ n = INITIAL_N
92
+ delta = 0
93
+ bias = INITIAL_BIAS;
94
+
95
+ # Handle the basic code points
96
+ input.each { |cp| output << cp.chr if cp < 0x80 }
97
+
98
+ h = b = output.length
99
+
100
+ # h is the number of code points that have been handled, b is the
101
+ # number of basic code points, and out is the number of characters
102
+ # that have been output.
103
+
104
+ output << DELIMITER if b > 0
105
+
106
+ # Main encoding loop
107
+
108
+ while h < input.length
109
+ # All non-basic code points < n have been handled already. Find
110
+ # the next larger one
111
+
112
+ m = MAXINT
113
+ input.each { |cp|
114
+ m = cp if (n...m) === cp
115
+ }
116
+
117
+ # Increase delta enough to advance the decoder's <n,i> state to
118
+ # <m,0>, but guard against overflow
119
+
120
+ if m - n > (MAXINT - delta) / (h + 1)
121
+ raise BufferOverflowError
122
+ end
123
+ delta += (m - n) * (h + 1)
124
+ n = m
125
+
126
+ input.each { |cp|
127
+ # AMC-ACE-Z can use this simplified version instead
128
+ if cp < n && (delta += 1) == 0
129
+ raise BufferOverflowError
130
+ end
131
+
132
+ if cp == n
133
+ # Represent delta as a generalized variable-length integer
134
+ q = delta
135
+ k = BASE
136
+ loop {
137
+ t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias;
138
+ break if q < t
139
+ output << encode_digit(t + (q - t) % (BASE - t), false)
140
+ q = (q - t) / (BASE - t)
141
+ k += BASE
142
+ }
143
+
144
+ output << encode_digit(q, false)
145
+
146
+ # Adapt the bias
147
+ delta = h == b ? delta / DAMP : delta >> 1
148
+ delta += delta / (h + 1)
149
+ bias = 0
150
+ while delta > CUTOFF
151
+ delta /= LOBASE
152
+ bias += BASE
153
+ end
154
+ bias += (LOBASE + 1) * delta / (delta + SKEW)
155
+
156
+ delta = 0
157
+ h += 1
158
+ end
159
+ }
160
+
161
+ delta += 1
162
+ n += 1
163
+ end
164
+
165
+ output
166
+ end
167
+ module_function :encode
168
+
169
+ def encode_hostname(hostname)
170
+ hostname.match(/[^\x00-\x7f]/) or return hostname
171
+
172
+ hostname.split('.').map { |name|
173
+ if name.match(/[^\x00-\x7f]/)
174
+ 'xn--' << encode(name)
175
+ else
176
+ name
177
+ end
178
+ }.join('.')
179
+ end
180
+ module_function :encode_hostname
181
+ end
182
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'domain_name'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,96 @@
1
+ require 'helper'
2
+
3
+ class TestDomainName < Test::Unit::TestCase
4
+ should "encode labels just as listed in RFC 3492 #7.1 (slightly modified)" do
5
+ [
6
+ ['(A) Arabic (Egyptian)',
7
+ [0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
8
+ 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F],
9
+ 'egbpdaj6bu4bxfgehfvwxn'],
10
+ ['(B) Chinese (simplified)',
11
+ [0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587],
12
+ 'ihqwcrb4cv8a8dqg056pqjye'],
13
+ ['(C) Chinese (traditional)',
14
+ [0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587],
15
+ 'ihqwctvzc91f659drss3x8bo0yb'],
16
+ ['(D) Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky',
17
+ [0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
18
+ 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
19
+ 0x0065, 0x0073, 0x006B, 0x0079],
20
+ 'Proprostnemluvesky-uyb24dma41a'],
21
+ ['(E) Hebrew',
22
+ [0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
23
+ 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
24
+ 0x05D1, 0x05E8, 0x05D9, 0x05EA],
25
+ '4dbcagdahymbxekheh6e0a7fei0b'],
26
+ ['(F) Hindi (Devanagari)',
27
+ [0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
28
+ 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
29
+ 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
30
+ 0x0939, 0x0948, 0x0902],
31
+ 'i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd'],
32
+ ['(G) Japanese (kanji and hiragana)',
33
+ [0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
34
+ 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B],
35
+ 'n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa'],
36
+ ['(H) Korean (Hangul syllables)',
37
+ [0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
38
+ 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
39
+ 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C],
40
+ '989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j' <<
41
+ 'psd879ccm6fea98c'],
42
+ ['(I) Russian (Cyrillic)',
43
+ [0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
44
+ 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
45
+ 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
46
+ 0x0438],
47
+ 'b1abfaaepdrnnbgefbadotcwatmq2g4l'],
48
+ ['(J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol',
49
+ [0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
50
+ 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
51
+ 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
52
+ 0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
53
+ 0x0061, 0x00F1, 0x006F, 0x006C],
54
+ 'PorqunopuedensimplementehablarenEspaol-fmd56a'],
55
+ ['(K) Vietnamese: T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch' <<
56
+ '<ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t',
57
+ [0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
58
+ 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
59
+ 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
60
+ 0x0056, 0x0069, 0x1EC7, 0x0074],
61
+ 'TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g'],
62
+ ['(L) 3<nen>B<gumi><kinpachi><sensei>',
63
+ [0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F],
64
+ '3B-ww4c5e180e575a65lsy2b'],
65
+ ['(M) <amuro><namie>-with-SUPER-MONKEYS',
66
+ [0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
67
+ 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
68
+ 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053],
69
+ '-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n'],
70
+ ['(N) Hello-Another-Way-<sorezore><no><basho>',
71
+ [0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
72
+ 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
73
+ 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240],
74
+ 'Hello-Another-Way--fc4qua05auwb3674vfr0b'],
75
+ ['(O) <hitotsu><yane><no><shita>2',
76
+ [0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032],
77
+ '2-u9tlzr9756bt3uc0v'],
78
+ ['(P) Maji<de>Koi<suru>5<byou><mae>',
79
+ [0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
80
+ 0x308B, 0x0035, 0x79D2, 0x524D],
81
+ 'MajiKoi5-783gue6qz075azm5e'],
82
+ ['(Q) <pafii>de<runba>',
83
+ [0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0],
84
+ 'de-jg4avhby1noc0d'],
85
+ ['(R) <sono><supiido><de>',
86
+ [0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067],
87
+ 'd9juau41awczczp'],
88
+ ['(S) -> $1.00 <-',
89
+ [0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, 0x0020,
90
+ 0x003C, 0x002D],
91
+ '-> $1.00 <--']
92
+ ].each { |title, cps, punycode|
93
+ assert_equal punycode, DomainName::Punycode.encode(cps.pack('U*')), title
94
+ }
95
+ end
96
+ end
@@ -0,0 +1,89 @@
1
+ require 'helper'
2
+
3
+ class TestDomainName < Test::Unit::TestCase
4
+ should "raise ArgumentError if hostname starts with a dot" do
5
+ [
6
+ # Leading dot.
7
+ '.com',
8
+ '.example',
9
+ '.example.com',
10
+ '.example.example',
11
+ ].each { |hostname|
12
+ assert_raises(ArgumentError) { DomainName.new(hostname) }
13
+ }
14
+ end
15
+
16
+ should "parse canonical domain names correctly" do
17
+ [
18
+ # Mixed case.
19
+ ['COM', nil],
20
+ ['example.COM', 'example.com'],
21
+ ['WwW.example.COM', 'example.com'],
22
+ # Unlisted TLD.
23
+ ['example', 'example'],
24
+ ['example.example', 'example.example'],
25
+ ['b.example.example', 'example.example'],
26
+ ['a.b.example.example', 'example.example'],
27
+ # Listed, but non-Internet, TLD.
28
+ ['local', 'local'],
29
+ ['example.local', 'example.local'],
30
+ ['b.example.local', 'example.local'],
31
+ ['a.b.example.local', 'example.local'],
32
+ # TLD with only 1 rule.
33
+ ['biz', nil],
34
+ ['domain.biz', 'domain.biz'],
35
+ ['b.domain.biz', 'domain.biz'],
36
+ ['a.b.domain.biz', 'domain.biz'],
37
+ # TLD with some 2-level rules.
38
+ ['com', nil],
39
+ ['example.com', 'example.com'],
40
+ ['b.example.com', 'example.com'],
41
+ ['a.b.example.com', 'example.com'],
42
+ ['uk.com', nil],
43
+ ['example.uk.com', 'example.uk.com'],
44
+ ['b.example.uk.com', 'example.uk.com'],
45
+ ['a.b.example.uk.com', 'example.uk.com'],
46
+ ['test.ac', 'test.ac'],
47
+ # TLD with only 1 (wildcard) rule.
48
+ ['cy', nil],
49
+ ['c.cy', nil],
50
+ ['b.c.cy', 'b.c.cy'],
51
+ ['a.b.c.cy', 'b.c.cy'],
52
+ # More complex TLD.
53
+ ['jp', nil],
54
+ ['test.jp', 'test.jp'],
55
+ ['www.test.jp', 'test.jp'],
56
+ ['ac.jp', nil],
57
+ ['test.ac.jp', 'test.ac.jp'],
58
+ ['www.test.ac.jp', 'test.ac.jp'],
59
+ ['kyoto.jp', nil],
60
+ ['c.kyoto.jp', nil],
61
+ ['b.c.kyoto.jp', 'b.c.kyoto.jp'],
62
+ ['a.b.c.kyoto.jp', 'b.c.kyoto.jp'],
63
+ ['pref.kyoto.jp', 'pref.kyoto.jp'], # Exception rule
64
+ ['www.pref.kyoto.jp', 'pref.kyoto.jp'], # Exception rule.
65
+ ['city.kyoto.jp', 'city.kyoto.jp'], # Exception rule.
66
+ ['www.city.kyoto.jp', 'city.kyoto.jp'], # Exception rule.
67
+ # TLD with a wildcard rule and exceptions.
68
+ ['om', nil],
69
+ ['test.om', nil],
70
+ ['b.test.om', 'b.test.om'],
71
+ ['a.b.test.om', 'b.test.om'],
72
+ ['songfest.om', 'songfest.om'],
73
+ ['www.songfest.om', 'songfest.om'],
74
+ # US K12.
75
+ ['us', nil],
76
+ ['test.us', 'test.us'],
77
+ ['www.test.us', 'test.us'],
78
+ ['ak.us', nil],
79
+ ['test.ak.us', 'test.ak.us'],
80
+ ['www.test.ak.us', 'test.ak.us'],
81
+ ['k12.ak.us', nil],
82
+ ['test.k12.ak.us', 'test.k12.ak.us'],
83
+ ['www.test.k12.ak.us', 'test.k12.ak.us'],
84
+ ].each { |hostname, domain|
85
+ dn = DomainName.new(hostname)
86
+ assert_equal(domain, dn.domain)
87
+ }
88
+ end
89
+ end
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ $basedir = Pathname.new(__FILE__).dirname.parent
6
+ $LOAD_PATH.unshift $basedir + 'lib'
7
+ require 'domain_name'
8
+ require 'set'
9
+ require 'erb'
10
+
11
+ def main
12
+ dat_file = $basedir + 'data' + 'effective_tld_names.dat'
13
+ dir = $basedir + 'lib' + 'domain_name'
14
+ erb_file = dir + 'etld_data.rb.erb'
15
+ rb_file = dir + 'etld_data.rb'
16
+
17
+ File.open(dat_file, 'r:utf-8') { |dat|
18
+ etld_data = parse(dat)
19
+ File.open(rb_file, 'w:utf-8') { |rb|
20
+ File.open(erb_file, 'r:utf-8') { |erb|
21
+ rb.print ERB.new(erb.read).result(binding)
22
+ }
23
+ }
24
+ }
25
+ end
26
+
27
+ def normalize_hostname(domain)
28
+ DomainName.normalize(domain)
29
+ end
30
+
31
+ def parse(f)
32
+ {}.tap { |table|
33
+ tlds = Set[]
34
+ f.each_line { |line|
35
+ line.sub!(%r{//.*}, '')
36
+ line.strip!
37
+ next if line.empty?
38
+ case line
39
+ when /^local$/
40
+ # ignore .local
41
+ next
42
+ when /^([^!*]+)$/
43
+ domain = normalize_hostname($1)
44
+ value = 0
45
+ when /^\*\.([^!*]+)$/
46
+ domain = normalize_hostname($1)
47
+ value = -1
48
+ when /^\!([^!*]+)$/
49
+ domain = normalize_hostname($1)
50
+ value = 1
51
+ else
52
+ raise "syntax error: #{line}"
53
+ end
54
+ tld = domain.match(/(?:^|\.)([^.]+)$/)[1]
55
+ table[tld] ||= 1
56
+ table[domain] = value
57
+ }
58
+ }
59
+ end
60
+
61
+ main()