public_suffix 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rubocop.yml +36 -0
- data/.rubocop_defaults.yml +179 -0
- data/.ruby-gemset +1 -0
- data/.travis.yml +31 -0
- data/.yardopts +1 -0
- data/2.0-Upgrade.md +52 -0
- data/CHANGELOG.md +353 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +202 -0
- data/Rakefile +51 -0
- data/bin/console +15 -0
- data/data/list.txt +12966 -0
- data/lib/public_suffix.rb +179 -0
- data/lib/public_suffix/domain.rb +235 -0
- data/lib/public_suffix/errors.rb +41 -0
- data/lib/public_suffix/list.rb +247 -0
- data/lib/public_suffix/rule.rb +350 -0
- data/lib/public_suffix/version.rb +13 -0
- data/public_suffix.gemspec +25 -0
- data/test/.empty +2 -0
- data/test/acceptance_test.rb +129 -0
- data/test/benchmarks/bm_find.rb +66 -0
- data/test/benchmarks/bm_find_all.rb +102 -0
- data/test/benchmarks/bm_names.rb +91 -0
- data/test/benchmarks/bm_select.rb +26 -0
- data/test/benchmarks/bm_select_incremental.rb +25 -0
- data/test/benchmarks/bm_valid.rb +101 -0
- data/test/profilers/domain_profiler.rb +12 -0
- data/test/profilers/find_profiler.rb +12 -0
- data/test/profilers/find_profiler_jp.rb +12 -0
- data/test/profilers/initialization_profiler.rb +11 -0
- data/test/profilers/list_profsize.rb +11 -0
- data/test/profilers/object_binsize.rb +57 -0
- data/test/psl_test.rb +52 -0
- data/test/test_helper.rb +18 -0
- data/test/tests.txt +98 -0
- data/test/unit/domain_test.rb +106 -0
- data/test/unit/errors_test.rb +25 -0
- data/test/unit/list_test.rb +241 -0
- data/test/unit/public_suffix_test.rb +188 -0
- data/test/unit/rule_test.rb +222 -0
- metadata +151 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# = Public Suffix
|
5
|
+
#
|
6
|
+
# Domain name parser based on the Public Suffix List.
|
7
|
+
#
|
8
|
+
# Copyright (c) 2009-2019 Simone Carletti <weppos@weppos.net>
|
9
|
+
|
10
|
+
module PublicSuffix
|
11
|
+
# The current library version.
|
12
|
+
VERSION = "3.1.1"
|
13
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$LOAD_PATH.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "public_suffix/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "public_suffix"
|
7
|
+
s.version = PublicSuffix::VERSION
|
8
|
+
s.authors = ["Simone Carletti"]
|
9
|
+
s.email = ["weppos@weppos.net"]
|
10
|
+
s.homepage = "https://simonecarletti.com/code/publicsuffix-ruby"
|
11
|
+
s.summary = "Domain name parser based on the Public Suffix List."
|
12
|
+
s.description = "PublicSuffix can parse and decompose a domain name into top level domain, domain and subdomains."
|
13
|
+
s.licenses = ["MIT"]
|
14
|
+
|
15
|
+
s.required_ruby_version = ">= 2.1"
|
16
|
+
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.extra_rdoc_files = %w( LICENSE.txt )
|
21
|
+
|
22
|
+
s.add_development_dependency "rake"
|
23
|
+
s.add_development_dependency "mocha"
|
24
|
+
s.add_development_dependency "yard"
|
25
|
+
end
|
data/test/.empty
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "test_helper"
|
4
|
+
|
5
|
+
class AcceptanceTest < Minitest::Test
|
6
|
+
|
7
|
+
VALID_CASES = [
|
8
|
+
["example.com", "example.com", [nil, "example", "com"]],
|
9
|
+
["foo.example.com", "example.com", ["foo", "example", "com"]],
|
10
|
+
|
11
|
+
["verybritish.co.uk", "verybritish.co.uk", [nil, "verybritish", "co.uk"]],
|
12
|
+
["foo.verybritish.co.uk", "verybritish.co.uk", ["foo", "verybritish", "co.uk"]],
|
13
|
+
|
14
|
+
["parliament.uk", "parliament.uk", [nil, "parliament", "uk"]],
|
15
|
+
["foo.parliament.uk", "parliament.uk", ["foo", "parliament", "uk"]],
|
16
|
+
].freeze
|
17
|
+
|
18
|
+
def test_valid
|
19
|
+
VALID_CASES.each do |input, domain, results|
|
20
|
+
parsed = PublicSuffix.parse(input)
|
21
|
+
trd, sld, tld = results
|
22
|
+
assert_equal tld, parsed.tld, "Invalid tld for `#{name}`"
|
23
|
+
assert_equal sld, parsed.sld, "Invalid sld for `#{name}`"
|
24
|
+
if trd.nil?
|
25
|
+
assert_nil parsed.trd, "Invalid trd for `#{name}`"
|
26
|
+
else
|
27
|
+
assert_equal trd, parsed.trd, "Invalid trd for `#{name}`"
|
28
|
+
end
|
29
|
+
|
30
|
+
assert_equal domain, PublicSuffix.domain(input)
|
31
|
+
assert PublicSuffix.valid?(input)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
INVALID_CASES = [
|
37
|
+
["nic.bd", PublicSuffix::DomainNotAllowed],
|
38
|
+
[nil, PublicSuffix::DomainInvalid],
|
39
|
+
["", PublicSuffix::DomainInvalid],
|
40
|
+
[" ", PublicSuffix::DomainInvalid],
|
41
|
+
].freeze
|
42
|
+
|
43
|
+
def test_invalid
|
44
|
+
INVALID_CASES.each do |(name, error)|
|
45
|
+
assert_raises(error) { PublicSuffix.parse(name) }
|
46
|
+
assert !PublicSuffix.valid?(name)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
REJECTED_CASES = [
|
52
|
+
["www. .com", true],
|
53
|
+
["foo.co..uk", true],
|
54
|
+
["goo,gle.com", true],
|
55
|
+
["-google.com", true],
|
56
|
+
["google-.com", true],
|
57
|
+
|
58
|
+
# This case was covered in GH-15.
|
59
|
+
# I decided to cover this case because it's not easily reproducible with URI.parse
|
60
|
+
# and can lead to several false positives.
|
61
|
+
["http://google.com", false],
|
62
|
+
].freeze
|
63
|
+
|
64
|
+
def test_rejected
|
65
|
+
REJECTED_CASES.each do |name, expected|
|
66
|
+
assert_equal expected, PublicSuffix.valid?(name),
|
67
|
+
"Expected %s to be %s" % [name.inspect, expected.inspect]
|
68
|
+
assert !valid_domain?(name),
|
69
|
+
"#{name} expected to be invalid"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
CASE_CASES = [
|
75
|
+
["Www.google.com", %w( www google com )],
|
76
|
+
["www.Google.com", %w( www google com )],
|
77
|
+
["www.google.Com", %w( www google com )],
|
78
|
+
].freeze
|
79
|
+
|
80
|
+
def test_ignore_case
|
81
|
+
CASE_CASES.each do |name, results|
|
82
|
+
domain = PublicSuffix.parse(name)
|
83
|
+
trd, sld, tld = results
|
84
|
+
assert_equal tld, domain.tld, "Invalid tld for `#{name}'"
|
85
|
+
assert_equal sld, domain.sld, "Invalid sld for `#{name}'"
|
86
|
+
assert_equal trd, domain.trd, "Invalid trd for `#{name}'"
|
87
|
+
assert PublicSuffix.valid?(name)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
INCLUDE_PRIVATE_CASES = [
|
93
|
+
["blogspot.com", true, "blogspot.com"],
|
94
|
+
["blogspot.com", false, nil],
|
95
|
+
["subdomain.blogspot.com", true, "blogspot.com"],
|
96
|
+
["subdomain.blogspot.com", false, "subdomain.blogspot.com"],
|
97
|
+
].freeze
|
98
|
+
|
99
|
+
def test_ignore_private
|
100
|
+
# test domain and parse
|
101
|
+
INCLUDE_PRIVATE_CASES.each do |given, ignore_private, expected|
|
102
|
+
if expected.nil?
|
103
|
+
assert_nil PublicSuffix.domain(given, ignore_private: ignore_private)
|
104
|
+
else
|
105
|
+
assert_equal expected, PublicSuffix.domain(given, ignore_private: ignore_private)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
# test valid?
|
109
|
+
INCLUDE_PRIVATE_CASES.each do |given, ignore_private, expected|
|
110
|
+
assert_equal !expected.nil?, PublicSuffix.valid?(given, ignore_private: ignore_private)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
def valid_uri?(name)
|
116
|
+
uri = URI.parse(name)
|
117
|
+
!uri.host.nil?
|
118
|
+
rescue
|
119
|
+
false
|
120
|
+
end
|
121
|
+
|
122
|
+
def valid_domain?(name)
|
123
|
+
uri = URI.parse(name)
|
124
|
+
!uri.host.nil? && uri.scheme.nil?
|
125
|
+
rescue
|
126
|
+
false
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require_relative "../../lib/public_suffix"
|
3
|
+
|
4
|
+
NAME_SHORT = "example.de"
|
5
|
+
NAME_MEDIUM = "www.subdomain.example.de"
|
6
|
+
NAME_LONG = "one.two.three.four.five.example.de"
|
7
|
+
NAME_WILD = "one.two.three.four.five.example.bd"
|
8
|
+
NAME_EXCP = "one.two.three.four.five.www.ck"
|
9
|
+
|
10
|
+
IAAA = "www.example.ac"
|
11
|
+
IZZZ = "www.example.zone"
|
12
|
+
|
13
|
+
PAAA = "one.two.three.four.five.example.beep.pl"
|
14
|
+
PZZZ = "one.two.three.four.five.example.now.sh"
|
15
|
+
|
16
|
+
JP = "www.yokoshibahikari.chiba.jp"
|
17
|
+
IT = "www.example.it"
|
18
|
+
COM = "www.example.com"
|
19
|
+
|
20
|
+
TIMES = (ARGV.first || 50_000).to_i
|
21
|
+
|
22
|
+
# Initialize
|
23
|
+
PublicSuffixList = PublicSuffix::List.default
|
24
|
+
PublicSuffixList.find("example.com")
|
25
|
+
|
26
|
+
Benchmark.bmbm(25) do |x|
|
27
|
+
x.report("NAME_SHORT") do
|
28
|
+
TIMES.times { PublicSuffixList.find(NAME_SHORT) != nil }
|
29
|
+
end
|
30
|
+
x.report("NAME_MEDIUM") do
|
31
|
+
TIMES.times { PublicSuffixList.find(NAME_MEDIUM) != nil }
|
32
|
+
end
|
33
|
+
x.report("NAME_LONG") do
|
34
|
+
TIMES.times { PublicSuffixList.find(NAME_LONG) != nil }
|
35
|
+
end
|
36
|
+
x.report("NAME_WILD") do
|
37
|
+
TIMES.times { PublicSuffixList.find(NAME_WILD) != nil }
|
38
|
+
end
|
39
|
+
x.report("NAME_EXCP") do
|
40
|
+
TIMES.times { PublicSuffixList.find(NAME_EXCP) != nil }
|
41
|
+
end
|
42
|
+
|
43
|
+
x.report("IAAA") do
|
44
|
+
TIMES.times { PublicSuffixList.find(IAAA) != nil }
|
45
|
+
end
|
46
|
+
x.report("IZZZ") do
|
47
|
+
TIMES.times { PublicSuffixList.find(IZZZ) != nil }
|
48
|
+
end
|
49
|
+
|
50
|
+
x.report("PAAA") do
|
51
|
+
TIMES.times { PublicSuffixList.find(PAAA) != nil }
|
52
|
+
end
|
53
|
+
x.report("PZZZ") do
|
54
|
+
TIMES.times { PublicSuffixList.find(PZZZ) != nil }
|
55
|
+
end
|
56
|
+
|
57
|
+
x.report("JP") do
|
58
|
+
TIMES.times { PublicSuffixList.find(JP) != nil }
|
59
|
+
end
|
60
|
+
x.report("IT") do
|
61
|
+
TIMES.times { PublicSuffixList.find(IT) != nil }
|
62
|
+
end
|
63
|
+
x.report("COM") do
|
64
|
+
TIMES.times { PublicSuffixList.find(COM) != nil }
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require_relative "../../lib/public_suffix"
|
3
|
+
|
4
|
+
NAME_SHORT = "example.de"
|
5
|
+
NAME_MEDIUM = "www.subdomain.example.de"
|
6
|
+
NAME_LONG = "one.two.three.four.five.example.de"
|
7
|
+
NAME_WILD = "one.two.three.four.five.example.bd"
|
8
|
+
NAME_EXCP = "one.two.three.four.five.www.ck"
|
9
|
+
|
10
|
+
IAAA = "www.example.ac"
|
11
|
+
IZZZ = "www.example.zone"
|
12
|
+
|
13
|
+
PAAA = "one.two.three.four.five.example.beep.pl"
|
14
|
+
PZZZ = "one.two.three.four.five.example.now.sh"
|
15
|
+
|
16
|
+
JP = "www.yokoshibahikari.chiba.jp"
|
17
|
+
IT = "www.example.it"
|
18
|
+
COM = "www.example.com"
|
19
|
+
|
20
|
+
TIMES = (ARGV.first || 50_000).to_i
|
21
|
+
|
22
|
+
# Initialize
|
23
|
+
PublicSuffixList = PublicSuffix::List.default
|
24
|
+
PublicSuffixList.find("example.com")
|
25
|
+
|
26
|
+
Benchmark.bmbm(25) do |x|
|
27
|
+
x.report("NAME_SHORT") do
|
28
|
+
TIMES.times { PublicSuffixList.find(NAME_SHORT) != nil }
|
29
|
+
end
|
30
|
+
x.report("NAME_SHORT (noprivate)") do
|
31
|
+
TIMES.times { PublicSuffixList.find(NAME_SHORT, ignore_private: true) != nil }
|
32
|
+
end
|
33
|
+
x.report("NAME_MEDIUM") do
|
34
|
+
TIMES.times { PublicSuffixList.find(NAME_MEDIUM) != nil }
|
35
|
+
end
|
36
|
+
x.report("NAME_MEDIUM (noprivate)") do
|
37
|
+
TIMES.times { PublicSuffixList.find(NAME_MEDIUM, ignore_private: true) != nil }
|
38
|
+
end
|
39
|
+
x.report("NAME_LONG") do
|
40
|
+
TIMES.times { PublicSuffixList.find(NAME_LONG) != nil }
|
41
|
+
end
|
42
|
+
x.report("NAME_LONG (noprivate)") do
|
43
|
+
TIMES.times { PublicSuffixList.find(NAME_LONG, ignore_private: true) != nil }
|
44
|
+
end
|
45
|
+
x.report("NAME_WILD") do
|
46
|
+
TIMES.times { PublicSuffixList.find(NAME_WILD) != nil }
|
47
|
+
end
|
48
|
+
x.report("NAME_WILD (noprivate)") do
|
49
|
+
TIMES.times { PublicSuffixList.find(NAME_WILD, ignore_private: true) != nil }
|
50
|
+
end
|
51
|
+
x.report("NAME_EXCP") do
|
52
|
+
TIMES.times { PublicSuffixList.find(NAME_EXCP) != nil }
|
53
|
+
end
|
54
|
+
x.report("NAME_EXCP (noprivate)") do
|
55
|
+
TIMES.times { PublicSuffixList.find(NAME_EXCP, ignore_private: true) != nil }
|
56
|
+
end
|
57
|
+
|
58
|
+
x.report("IAAA") do
|
59
|
+
TIMES.times { PublicSuffixList.find(IAAA) != nil }
|
60
|
+
end
|
61
|
+
x.report("IAAA (noprivate)") do
|
62
|
+
TIMES.times { PublicSuffixList.find(IAAA, ignore_private: true) != nil }
|
63
|
+
end
|
64
|
+
x.report("IZZZ") do
|
65
|
+
TIMES.times { PublicSuffixList.find(IZZZ) != nil }
|
66
|
+
end
|
67
|
+
x.report("IZZZ (noprivate)") do
|
68
|
+
TIMES.times { PublicSuffixList.find(IZZZ, ignore_private: true) != nil }
|
69
|
+
end
|
70
|
+
|
71
|
+
x.report("PAAA") do
|
72
|
+
TIMES.times { PublicSuffixList.find(PAAA) != nil }
|
73
|
+
end
|
74
|
+
x.report("PAAA (noprivate)") do
|
75
|
+
TIMES.times { PublicSuffixList.find(PAAA, ignore_private: true) != nil }
|
76
|
+
end
|
77
|
+
x.report("PZZZ") do
|
78
|
+
TIMES.times { PublicSuffixList.find(PZZZ) != nil }
|
79
|
+
end
|
80
|
+
x.report("PZZZ (noprivate)") do
|
81
|
+
TIMES.times { PublicSuffixList.find(PZZZ, ignore_private: true) != nil }
|
82
|
+
end
|
83
|
+
|
84
|
+
x.report("JP") do
|
85
|
+
TIMES.times { PublicSuffixList.find(JP) != nil }
|
86
|
+
end
|
87
|
+
x.report("JP (noprivate)") do
|
88
|
+
TIMES.times { PublicSuffixList.find(JP, ignore_private: true) != nil }
|
89
|
+
end
|
90
|
+
x.report("IT") do
|
91
|
+
TIMES.times { PublicSuffixList.find(IT) != nil }
|
92
|
+
end
|
93
|
+
x.report("IT (noprivate)") do
|
94
|
+
TIMES.times { PublicSuffixList.find(IT, ignore_private: true) != nil }
|
95
|
+
end
|
96
|
+
x.report("COM") do
|
97
|
+
TIMES.times { PublicSuffixList.find(COM) != nil }
|
98
|
+
end
|
99
|
+
x.report("COM (noprivate)") do
|
100
|
+
TIMES.times { PublicSuffixList.find(COM, ignore_private: true) != nil }
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'benchmark/ips'
|
2
|
+
|
3
|
+
STRING = "www.subdomain.example.com"
|
4
|
+
ARRAY = %w(
|
5
|
+
com
|
6
|
+
example.com
|
7
|
+
subdomain.example.com
|
8
|
+
www.subdomain.example.com
|
9
|
+
)
|
10
|
+
|
11
|
+
def tokenizer1(string)
|
12
|
+
parts = string.split(".").reverse!
|
13
|
+
index = 0
|
14
|
+
query = parts[index]
|
15
|
+
names = []
|
16
|
+
|
17
|
+
loop do
|
18
|
+
names << query
|
19
|
+
|
20
|
+
index += 1
|
21
|
+
break if index >= parts.size
|
22
|
+
query = parts[index] + "." + query
|
23
|
+
end
|
24
|
+
names
|
25
|
+
end
|
26
|
+
|
27
|
+
def tokenizer2(string)
|
28
|
+
parts = string.split(".")
|
29
|
+
index = parts.size - 1
|
30
|
+
query = parts[index]
|
31
|
+
names = []
|
32
|
+
|
33
|
+
loop do
|
34
|
+
names << query
|
35
|
+
|
36
|
+
index -= 1
|
37
|
+
break if index < 0
|
38
|
+
query = parts[index] + "." + query
|
39
|
+
end
|
40
|
+
names
|
41
|
+
end
|
42
|
+
|
43
|
+
def tokenizer3(string)
|
44
|
+
isx = string.size
|
45
|
+
idx = string.size - 1
|
46
|
+
names = []
|
47
|
+
|
48
|
+
loop do
|
49
|
+
isx = string.rindex(".", isx - 1) || -1
|
50
|
+
names << string[isx + 1, idx - isx]
|
51
|
+
|
52
|
+
break if isx <= 0
|
53
|
+
end
|
54
|
+
names
|
55
|
+
end
|
56
|
+
|
57
|
+
def tokenizer4(string)
|
58
|
+
isx = string.size
|
59
|
+
idx = string.size - 1
|
60
|
+
names = []
|
61
|
+
|
62
|
+
loop do
|
63
|
+
isx = string.rindex(".", isx - 1) || -1
|
64
|
+
names << string[(isx+1)..idx]
|
65
|
+
|
66
|
+
break if isx <= 0
|
67
|
+
end
|
68
|
+
names
|
69
|
+
end
|
70
|
+
|
71
|
+
(x = tokenizer1(STRING)) == ARRAY or fail("tokenizer1 failed: #{x.inspect}")
|
72
|
+
(x = tokenizer2(STRING)) == ARRAY or fail("tokenizer2 failed: #{x.inspect}")
|
73
|
+
(x = tokenizer3(STRING)) == ARRAY or fail("tokenizer3 failed: #{x.inspect}")
|
74
|
+
(x = tokenizer4(STRING)) == ARRAY or fail("tokenizer4 failed: #{x.inspect}")
|
75
|
+
|
76
|
+
Benchmark.ips do |x|
|
77
|
+
x.report("tokenizer1") do
|
78
|
+
tokenizer1(STRING).is_a?(Array)
|
79
|
+
end
|
80
|
+
x.report("tokenizer2") do
|
81
|
+
tokenizer2(STRING).is_a?(Array)
|
82
|
+
end
|
83
|
+
x.report("tokenizer3") do
|
84
|
+
tokenizer3(STRING).is_a?(Array)
|
85
|
+
end
|
86
|
+
x.report("tokenizer4") do
|
87
|
+
tokenizer4(STRING).is_a?(Array)
|
88
|
+
end
|
89
|
+
|
90
|
+
x.compare!
|
91
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require_relative "../../lib/public_suffix"
|
3
|
+
|
4
|
+
JP = "www.yokoshibahikari.chiba.jp"
|
5
|
+
|
6
|
+
TIMES = (ARGV.first || 50_000).to_i
|
7
|
+
|
8
|
+
# Initialize
|
9
|
+
class PublicSuffix::List
|
10
|
+
public :select
|
11
|
+
end
|
12
|
+
PublicSuffixList = PublicSuffix::List.default
|
13
|
+
PublicSuffixList.select("example.jp")
|
14
|
+
PublicSuffixList.find("example.jp")
|
15
|
+
|
16
|
+
Benchmark.bmbm(25) do |x|
|
17
|
+
x.report("JP select") do
|
18
|
+
TIMES.times { PublicSuffixList.select(JP) }
|
19
|
+
end
|
20
|
+
x.report("JP find") do
|
21
|
+
TIMES.times { PublicSuffixList.find(JP) }
|
22
|
+
end
|
23
|
+
# x.report("JP (noprivate)") do
|
24
|
+
# TIMES.times { PublicSuffixList.find(JP, ignore_private: true) != nil }
|
25
|
+
# end
|
26
|
+
end
|