public_suffix 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rubocop.yml +36 -0
  4. data/.rubocop_defaults.yml +179 -0
  5. data/.ruby-gemset +1 -0
  6. data/.travis.yml +31 -0
  7. data/.yardopts +1 -0
  8. data/2.0-Upgrade.md +52 -0
  9. data/CHANGELOG.md +353 -0
  10. data/Gemfile +12 -0
  11. data/LICENSE.txt +22 -0
  12. data/README.md +202 -0
  13. data/Rakefile +51 -0
  14. data/bin/console +15 -0
  15. data/data/list.txt +12966 -0
  16. data/lib/public_suffix.rb +179 -0
  17. data/lib/public_suffix/domain.rb +235 -0
  18. data/lib/public_suffix/errors.rb +41 -0
  19. data/lib/public_suffix/list.rb +247 -0
  20. data/lib/public_suffix/rule.rb +350 -0
  21. data/lib/public_suffix/version.rb +13 -0
  22. data/public_suffix.gemspec +25 -0
  23. data/test/.empty +2 -0
  24. data/test/acceptance_test.rb +129 -0
  25. data/test/benchmarks/bm_find.rb +66 -0
  26. data/test/benchmarks/bm_find_all.rb +102 -0
  27. data/test/benchmarks/bm_names.rb +91 -0
  28. data/test/benchmarks/bm_select.rb +26 -0
  29. data/test/benchmarks/bm_select_incremental.rb +25 -0
  30. data/test/benchmarks/bm_valid.rb +101 -0
  31. data/test/profilers/domain_profiler.rb +12 -0
  32. data/test/profilers/find_profiler.rb +12 -0
  33. data/test/profilers/find_profiler_jp.rb +12 -0
  34. data/test/profilers/initialization_profiler.rb +11 -0
  35. data/test/profilers/list_profsize.rb +11 -0
  36. data/test/profilers/object_binsize.rb +57 -0
  37. data/test/psl_test.rb +52 -0
  38. data/test/test_helper.rb +18 -0
  39. data/test/tests.txt +98 -0
  40. data/test/unit/domain_test.rb +106 -0
  41. data/test/unit/errors_test.rb +25 -0
  42. data/test/unit/list_test.rb +241 -0
  43. data/test/unit/public_suffix_test.rb +188 -0
  44. data/test/unit/rule_test.rb +222 -0
  45. metadata +151 -0
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # = Public Suffix
5
+ #
6
+ # Domain name parser based on the Public Suffix List.
7
+ #
8
+ # Copyright (c) 2009-2019 Simone Carletti <weppos@weppos.net>
9
+
10
+ module PublicSuffix
11
+ # The current library version.
12
+ VERSION = "3.1.1"
13
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $LOAD_PATH.push File.expand_path("../lib", __FILE__)
3
+ require "public_suffix/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "public_suffix"
7
+ s.version = PublicSuffix::VERSION
8
+ s.authors = ["Simone Carletti"]
9
+ s.email = ["weppos@weppos.net"]
10
+ s.homepage = "https://simonecarletti.com/code/publicsuffix-ruby"
11
+ s.summary = "Domain name parser based on the Public Suffix List."
12
+ s.description = "PublicSuffix can parse and decompose a domain name into top level domain, domain and subdomains."
13
+ s.licenses = ["MIT"]
14
+
15
+ s.required_ruby_version = ">= 2.1"
16
+
17
+ s.require_paths = ["lib"]
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.extra_rdoc_files = %w( LICENSE.txt )
21
+
22
+ s.add_development_dependency "rake"
23
+ s.add_development_dependency "mocha"
24
+ s.add_development_dependency "yard"
25
+ end
@@ -0,0 +1,2 @@
1
+ # This is an empty file I use to force a non-empty commit when I only need to store notes
2
+ ..
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "test_helper"
4
+
5
+ class AcceptanceTest < Minitest::Test
6
+
7
+ VALID_CASES = [
8
+ ["example.com", "example.com", [nil, "example", "com"]],
9
+ ["foo.example.com", "example.com", ["foo", "example", "com"]],
10
+
11
+ ["verybritish.co.uk", "verybritish.co.uk", [nil, "verybritish", "co.uk"]],
12
+ ["foo.verybritish.co.uk", "verybritish.co.uk", ["foo", "verybritish", "co.uk"]],
13
+
14
+ ["parliament.uk", "parliament.uk", [nil, "parliament", "uk"]],
15
+ ["foo.parliament.uk", "parliament.uk", ["foo", "parliament", "uk"]],
16
+ ].freeze
17
+
18
+ def test_valid
19
+ VALID_CASES.each do |input, domain, results|
20
+ parsed = PublicSuffix.parse(input)
21
+ trd, sld, tld = results
22
+ assert_equal tld, parsed.tld, "Invalid tld for `#{name}`"
23
+ assert_equal sld, parsed.sld, "Invalid sld for `#{name}`"
24
+ if trd.nil?
25
+ assert_nil parsed.trd, "Invalid trd for `#{name}`"
26
+ else
27
+ assert_equal trd, parsed.trd, "Invalid trd for `#{name}`"
28
+ end
29
+
30
+ assert_equal domain, PublicSuffix.domain(input)
31
+ assert PublicSuffix.valid?(input)
32
+ end
33
+ end
34
+
35
+
36
+ INVALID_CASES = [
37
+ ["nic.bd", PublicSuffix::DomainNotAllowed],
38
+ [nil, PublicSuffix::DomainInvalid],
39
+ ["", PublicSuffix::DomainInvalid],
40
+ [" ", PublicSuffix::DomainInvalid],
41
+ ].freeze
42
+
43
+ def test_invalid
44
+ INVALID_CASES.each do |(name, error)|
45
+ assert_raises(error) { PublicSuffix.parse(name) }
46
+ assert !PublicSuffix.valid?(name)
47
+ end
48
+ end
49
+
50
+
51
+ REJECTED_CASES = [
52
+ ["www. .com", true],
53
+ ["foo.co..uk", true],
54
+ ["goo,gle.com", true],
55
+ ["-google.com", true],
56
+ ["google-.com", true],
57
+
58
+ # This case was covered in GH-15.
59
+ # I decided to cover this case because it's not easily reproducible with URI.parse
60
+ # and can lead to several false positives.
61
+ ["http://google.com", false],
62
+ ].freeze
63
+
64
+ def test_rejected
65
+ REJECTED_CASES.each do |name, expected|
66
+ assert_equal expected, PublicSuffix.valid?(name),
67
+ "Expected %s to be %s" % [name.inspect, expected.inspect]
68
+ assert !valid_domain?(name),
69
+ "#{name} expected to be invalid"
70
+ end
71
+ end
72
+
73
+
74
+ CASE_CASES = [
75
+ ["Www.google.com", %w( www google com )],
76
+ ["www.Google.com", %w( www google com )],
77
+ ["www.google.Com", %w( www google com )],
78
+ ].freeze
79
+
80
+ def test_ignore_case
81
+ CASE_CASES.each do |name, results|
82
+ domain = PublicSuffix.parse(name)
83
+ trd, sld, tld = results
84
+ assert_equal tld, domain.tld, "Invalid tld for `#{name}'"
85
+ assert_equal sld, domain.sld, "Invalid sld for `#{name}'"
86
+ assert_equal trd, domain.trd, "Invalid trd for `#{name}'"
87
+ assert PublicSuffix.valid?(name)
88
+ end
89
+ end
90
+
91
+
92
+ INCLUDE_PRIVATE_CASES = [
93
+ ["blogspot.com", true, "blogspot.com"],
94
+ ["blogspot.com", false, nil],
95
+ ["subdomain.blogspot.com", true, "blogspot.com"],
96
+ ["subdomain.blogspot.com", false, "subdomain.blogspot.com"],
97
+ ].freeze
98
+
99
+ def test_ignore_private
100
+ # test domain and parse
101
+ INCLUDE_PRIVATE_CASES.each do |given, ignore_private, expected|
102
+ if expected.nil?
103
+ assert_nil PublicSuffix.domain(given, ignore_private: ignore_private)
104
+ else
105
+ assert_equal expected, PublicSuffix.domain(given, ignore_private: ignore_private)
106
+ end
107
+ end
108
+ # test valid?
109
+ INCLUDE_PRIVATE_CASES.each do |given, ignore_private, expected|
110
+ assert_equal !expected.nil?, PublicSuffix.valid?(given, ignore_private: ignore_private)
111
+ end
112
+ end
113
+
114
+
115
+ def valid_uri?(name)
116
+ uri = URI.parse(name)
117
+ !uri.host.nil?
118
+ rescue
119
+ false
120
+ end
121
+
122
+ def valid_domain?(name)
123
+ uri = URI.parse(name)
124
+ !uri.host.nil? && uri.scheme.nil?
125
+ rescue
126
+ false
127
+ end
128
+
129
+ end
@@ -0,0 +1,66 @@
1
+ require 'benchmark'
2
+ require_relative "../../lib/public_suffix"
3
+
4
+ NAME_SHORT = "example.de"
5
+ NAME_MEDIUM = "www.subdomain.example.de"
6
+ NAME_LONG = "one.two.three.four.five.example.de"
7
+ NAME_WILD = "one.two.three.four.five.example.bd"
8
+ NAME_EXCP = "one.two.three.four.five.www.ck"
9
+
10
+ IAAA = "www.example.ac"
11
+ IZZZ = "www.example.zone"
12
+
13
+ PAAA = "one.two.three.four.five.example.beep.pl"
14
+ PZZZ = "one.two.three.four.five.example.now.sh"
15
+
16
+ JP = "www.yokoshibahikari.chiba.jp"
17
+ IT = "www.example.it"
18
+ COM = "www.example.com"
19
+
20
+ TIMES = (ARGV.first || 50_000).to_i
21
+
22
+ # Initialize
23
+ PublicSuffixList = PublicSuffix::List.default
24
+ PublicSuffixList.find("example.com")
25
+
26
+ Benchmark.bmbm(25) do |x|
27
+ x.report("NAME_SHORT") do
28
+ TIMES.times { PublicSuffixList.find(NAME_SHORT) != nil }
29
+ end
30
+ x.report("NAME_MEDIUM") do
31
+ TIMES.times { PublicSuffixList.find(NAME_MEDIUM) != nil }
32
+ end
33
+ x.report("NAME_LONG") do
34
+ TIMES.times { PublicSuffixList.find(NAME_LONG) != nil }
35
+ end
36
+ x.report("NAME_WILD") do
37
+ TIMES.times { PublicSuffixList.find(NAME_WILD) != nil }
38
+ end
39
+ x.report("NAME_EXCP") do
40
+ TIMES.times { PublicSuffixList.find(NAME_EXCP) != nil }
41
+ end
42
+
43
+ x.report("IAAA") do
44
+ TIMES.times { PublicSuffixList.find(IAAA) != nil }
45
+ end
46
+ x.report("IZZZ") do
47
+ TIMES.times { PublicSuffixList.find(IZZZ) != nil }
48
+ end
49
+
50
+ x.report("PAAA") do
51
+ TIMES.times { PublicSuffixList.find(PAAA) != nil }
52
+ end
53
+ x.report("PZZZ") do
54
+ TIMES.times { PublicSuffixList.find(PZZZ) != nil }
55
+ end
56
+
57
+ x.report("JP") do
58
+ TIMES.times { PublicSuffixList.find(JP) != nil }
59
+ end
60
+ x.report("IT") do
61
+ TIMES.times { PublicSuffixList.find(IT) != nil }
62
+ end
63
+ x.report("COM") do
64
+ TIMES.times { PublicSuffixList.find(COM) != nil }
65
+ end
66
+ end
@@ -0,0 +1,102 @@
1
+ require 'benchmark'
2
+ require_relative "../../lib/public_suffix"
3
+
4
+ NAME_SHORT = "example.de"
5
+ NAME_MEDIUM = "www.subdomain.example.de"
6
+ NAME_LONG = "one.two.three.four.five.example.de"
7
+ NAME_WILD = "one.two.three.four.five.example.bd"
8
+ NAME_EXCP = "one.two.three.four.five.www.ck"
9
+
10
+ IAAA = "www.example.ac"
11
+ IZZZ = "www.example.zone"
12
+
13
+ PAAA = "one.two.three.four.five.example.beep.pl"
14
+ PZZZ = "one.two.three.four.five.example.now.sh"
15
+
16
+ JP = "www.yokoshibahikari.chiba.jp"
17
+ IT = "www.example.it"
18
+ COM = "www.example.com"
19
+
20
+ TIMES = (ARGV.first || 50_000).to_i
21
+
22
+ # Initialize
23
+ PublicSuffixList = PublicSuffix::List.default
24
+ PublicSuffixList.find("example.com")
25
+
26
+ Benchmark.bmbm(25) do |x|
27
+ x.report("NAME_SHORT") do
28
+ TIMES.times { PublicSuffixList.find(NAME_SHORT) != nil }
29
+ end
30
+ x.report("NAME_SHORT (noprivate)") do
31
+ TIMES.times { PublicSuffixList.find(NAME_SHORT, ignore_private: true) != nil }
32
+ end
33
+ x.report("NAME_MEDIUM") do
34
+ TIMES.times { PublicSuffixList.find(NAME_MEDIUM) != nil }
35
+ end
36
+ x.report("NAME_MEDIUM (noprivate)") do
37
+ TIMES.times { PublicSuffixList.find(NAME_MEDIUM, ignore_private: true) != nil }
38
+ end
39
+ x.report("NAME_LONG") do
40
+ TIMES.times { PublicSuffixList.find(NAME_LONG) != nil }
41
+ end
42
+ x.report("NAME_LONG (noprivate)") do
43
+ TIMES.times { PublicSuffixList.find(NAME_LONG, ignore_private: true) != nil }
44
+ end
45
+ x.report("NAME_WILD") do
46
+ TIMES.times { PublicSuffixList.find(NAME_WILD) != nil }
47
+ end
48
+ x.report("NAME_WILD (noprivate)") do
49
+ TIMES.times { PublicSuffixList.find(NAME_WILD, ignore_private: true) != nil }
50
+ end
51
+ x.report("NAME_EXCP") do
52
+ TIMES.times { PublicSuffixList.find(NAME_EXCP) != nil }
53
+ end
54
+ x.report("NAME_EXCP (noprivate)") do
55
+ TIMES.times { PublicSuffixList.find(NAME_EXCP, ignore_private: true) != nil }
56
+ end
57
+
58
+ x.report("IAAA") do
59
+ TIMES.times { PublicSuffixList.find(IAAA) != nil }
60
+ end
61
+ x.report("IAAA (noprivate)") do
62
+ TIMES.times { PublicSuffixList.find(IAAA, ignore_private: true) != nil }
63
+ end
64
+ x.report("IZZZ") do
65
+ TIMES.times { PublicSuffixList.find(IZZZ) != nil }
66
+ end
67
+ x.report("IZZZ (noprivate)") do
68
+ TIMES.times { PublicSuffixList.find(IZZZ, ignore_private: true) != nil }
69
+ end
70
+
71
+ x.report("PAAA") do
72
+ TIMES.times { PublicSuffixList.find(PAAA) != nil }
73
+ end
74
+ x.report("PAAA (noprivate)") do
75
+ TIMES.times { PublicSuffixList.find(PAAA, ignore_private: true) != nil }
76
+ end
77
+ x.report("PZZZ") do
78
+ TIMES.times { PublicSuffixList.find(PZZZ) != nil }
79
+ end
80
+ x.report("PZZZ (noprivate)") do
81
+ TIMES.times { PublicSuffixList.find(PZZZ, ignore_private: true) != nil }
82
+ end
83
+
84
+ x.report("JP") do
85
+ TIMES.times { PublicSuffixList.find(JP) != nil }
86
+ end
87
+ x.report("JP (noprivate)") do
88
+ TIMES.times { PublicSuffixList.find(JP, ignore_private: true) != nil }
89
+ end
90
+ x.report("IT") do
91
+ TIMES.times { PublicSuffixList.find(IT) != nil }
92
+ end
93
+ x.report("IT (noprivate)") do
94
+ TIMES.times { PublicSuffixList.find(IT, ignore_private: true) != nil }
95
+ end
96
+ x.report("COM") do
97
+ TIMES.times { PublicSuffixList.find(COM) != nil }
98
+ end
99
+ x.report("COM (noprivate)") do
100
+ TIMES.times { PublicSuffixList.find(COM, ignore_private: true) != nil }
101
+ end
102
+ end
@@ -0,0 +1,91 @@
1
+ require 'benchmark/ips'
2
+
3
+ STRING = "www.subdomain.example.com"
4
+ ARRAY = %w(
5
+ com
6
+ example.com
7
+ subdomain.example.com
8
+ www.subdomain.example.com
9
+ )
10
+
11
+ def tokenizer1(string)
12
+ parts = string.split(".").reverse!
13
+ index = 0
14
+ query = parts[index]
15
+ names = []
16
+
17
+ loop do
18
+ names << query
19
+
20
+ index += 1
21
+ break if index >= parts.size
22
+ query = parts[index] + "." + query
23
+ end
24
+ names
25
+ end
26
+
27
+ def tokenizer2(string)
28
+ parts = string.split(".")
29
+ index = parts.size - 1
30
+ query = parts[index]
31
+ names = []
32
+
33
+ loop do
34
+ names << query
35
+
36
+ index -= 1
37
+ break if index < 0
38
+ query = parts[index] + "." + query
39
+ end
40
+ names
41
+ end
42
+
43
+ def tokenizer3(string)
44
+ isx = string.size
45
+ idx = string.size - 1
46
+ names = []
47
+
48
+ loop do
49
+ isx = string.rindex(".", isx - 1) || -1
50
+ names << string[isx + 1, idx - isx]
51
+
52
+ break if isx <= 0
53
+ end
54
+ names
55
+ end
56
+
57
+ def tokenizer4(string)
58
+ isx = string.size
59
+ idx = string.size - 1
60
+ names = []
61
+
62
+ loop do
63
+ isx = string.rindex(".", isx - 1) || -1
64
+ names << string[(isx+1)..idx]
65
+
66
+ break if isx <= 0
67
+ end
68
+ names
69
+ end
70
+
71
+ (x = tokenizer1(STRING)) == ARRAY or fail("tokenizer1 failed: #{x.inspect}")
72
+ (x = tokenizer2(STRING)) == ARRAY or fail("tokenizer2 failed: #{x.inspect}")
73
+ (x = tokenizer3(STRING)) == ARRAY or fail("tokenizer3 failed: #{x.inspect}")
74
+ (x = tokenizer4(STRING)) == ARRAY or fail("tokenizer4 failed: #{x.inspect}")
75
+
76
+ Benchmark.ips do |x|
77
+ x.report("tokenizer1") do
78
+ tokenizer1(STRING).is_a?(Array)
79
+ end
80
+ x.report("tokenizer2") do
81
+ tokenizer2(STRING).is_a?(Array)
82
+ end
83
+ x.report("tokenizer3") do
84
+ tokenizer3(STRING).is_a?(Array)
85
+ end
86
+ x.report("tokenizer4") do
87
+ tokenizer4(STRING).is_a?(Array)
88
+ end
89
+
90
+ x.compare!
91
+ end
@@ -0,0 +1,26 @@
1
+ require 'benchmark'
2
+ require_relative "../../lib/public_suffix"
3
+
4
+ JP = "www.yokoshibahikari.chiba.jp"
5
+
6
+ TIMES = (ARGV.first || 50_000).to_i
7
+
8
+ # Initialize
9
+ class PublicSuffix::List
10
+ public :select
11
+ end
12
+ PublicSuffixList = PublicSuffix::List.default
13
+ PublicSuffixList.select("example.jp")
14
+ PublicSuffixList.find("example.jp")
15
+
16
+ Benchmark.bmbm(25) do |x|
17
+ x.report("JP select") do
18
+ TIMES.times { PublicSuffixList.select(JP) }
19
+ end
20
+ x.report("JP find") do
21
+ TIMES.times { PublicSuffixList.find(JP) }
22
+ end
23
+ # x.report("JP (noprivate)") do
24
+ # TIMES.times { PublicSuffixList.find(JP, ignore_private: true) != nil }
25
+ # end
26
+ end