public_suffix 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rubocop.yml +36 -0
  4. data/.rubocop_defaults.yml +179 -0
  5. data/.ruby-gemset +1 -0
  6. data/.travis.yml +31 -0
  7. data/.yardopts +1 -0
  8. data/2.0-Upgrade.md +52 -0
  9. data/CHANGELOG.md +353 -0
  10. data/Gemfile +12 -0
  11. data/LICENSE.txt +22 -0
  12. data/README.md +202 -0
  13. data/Rakefile +51 -0
  14. data/bin/console +15 -0
  15. data/data/list.txt +12966 -0
  16. data/lib/public_suffix.rb +179 -0
  17. data/lib/public_suffix/domain.rb +235 -0
  18. data/lib/public_suffix/errors.rb +41 -0
  19. data/lib/public_suffix/list.rb +247 -0
  20. data/lib/public_suffix/rule.rb +350 -0
  21. data/lib/public_suffix/version.rb +13 -0
  22. data/public_suffix.gemspec +25 -0
  23. data/test/.empty +2 -0
  24. data/test/acceptance_test.rb +129 -0
  25. data/test/benchmarks/bm_find.rb +66 -0
  26. data/test/benchmarks/bm_find_all.rb +102 -0
  27. data/test/benchmarks/bm_names.rb +91 -0
  28. data/test/benchmarks/bm_select.rb +26 -0
  29. data/test/benchmarks/bm_select_incremental.rb +25 -0
  30. data/test/benchmarks/bm_valid.rb +101 -0
  31. data/test/profilers/domain_profiler.rb +12 -0
  32. data/test/profilers/find_profiler.rb +12 -0
  33. data/test/profilers/find_profiler_jp.rb +12 -0
  34. data/test/profilers/initialization_profiler.rb +11 -0
  35. data/test/profilers/list_profsize.rb +11 -0
  36. data/test/profilers/object_binsize.rb +57 -0
  37. data/test/psl_test.rb +52 -0
  38. data/test/test_helper.rb +18 -0
  39. data/test/tests.txt +98 -0
  40. data/test/unit/domain_test.rb +106 -0
  41. data/test/unit/errors_test.rb +25 -0
  42. data/test/unit/list_test.rb +241 -0
  43. data/test/unit/public_suffix_test.rb +188 -0
  44. data/test/unit/rule_test.rb +222 -0
  45. metadata +151 -0
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # = Public Suffix
5
+ #
6
+ # Domain name parser based on the Public Suffix List.
7
+ #
8
+ # Copyright (c) 2009-2019 Simone Carletti <weppos@weppos.net>
9
+
10
+ module PublicSuffix
11
+ # The current library version.
12
+ VERSION = "3.1.1"
13
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $LOAD_PATH.push File.expand_path("../lib", __FILE__)
3
+ require "public_suffix/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "public_suffix"
7
+ s.version = PublicSuffix::VERSION
8
+ s.authors = ["Simone Carletti"]
9
+ s.email = ["weppos@weppos.net"]
10
+ s.homepage = "https://simonecarletti.com/code/publicsuffix-ruby"
11
+ s.summary = "Domain name parser based on the Public Suffix List."
12
+ s.description = "PublicSuffix can parse and decompose a domain name into top level domain, domain and subdomains."
13
+ s.licenses = ["MIT"]
14
+
15
+ s.required_ruby_version = ">= 2.1"
16
+
17
+ s.require_paths = ["lib"]
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.extra_rdoc_files = %w( LICENSE.txt )
21
+
22
+ s.add_development_dependency "rake"
23
+ s.add_development_dependency "mocha"
24
+ s.add_development_dependency "yard"
25
+ end
@@ -0,0 +1,2 @@
1
+ # This is an empty file I use to force a non-empty commit when I only need to store notes
2
+ ..
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "test_helper"
4
+
5
+ class AcceptanceTest < Minitest::Test
6
+
7
+ VALID_CASES = [
8
+ ["example.com", "example.com", [nil, "example", "com"]],
9
+ ["foo.example.com", "example.com", ["foo", "example", "com"]],
10
+
11
+ ["verybritish.co.uk", "verybritish.co.uk", [nil, "verybritish", "co.uk"]],
12
+ ["foo.verybritish.co.uk", "verybritish.co.uk", ["foo", "verybritish", "co.uk"]],
13
+
14
+ ["parliament.uk", "parliament.uk", [nil, "parliament", "uk"]],
15
+ ["foo.parliament.uk", "parliament.uk", ["foo", "parliament", "uk"]],
16
+ ].freeze
17
+
18
+ def test_valid
19
+ VALID_CASES.each do |input, domain, results|
20
+ parsed = PublicSuffix.parse(input)
21
+ trd, sld, tld = results
22
+ assert_equal tld, parsed.tld, "Invalid tld for `#{name}`"
23
+ assert_equal sld, parsed.sld, "Invalid sld for `#{name}`"
24
+ if trd.nil?
25
+ assert_nil parsed.trd, "Invalid trd for `#{name}`"
26
+ else
27
+ assert_equal trd, parsed.trd, "Invalid trd for `#{name}`"
28
+ end
29
+
30
+ assert_equal domain, PublicSuffix.domain(input)
31
+ assert PublicSuffix.valid?(input)
32
+ end
33
+ end
34
+
35
+
36
+ INVALID_CASES = [
37
+ ["nic.bd", PublicSuffix::DomainNotAllowed],
38
+ [nil, PublicSuffix::DomainInvalid],
39
+ ["", PublicSuffix::DomainInvalid],
40
+ [" ", PublicSuffix::DomainInvalid],
41
+ ].freeze
42
+
43
+ def test_invalid
44
+ INVALID_CASES.each do |(name, error)|
45
+ assert_raises(error) { PublicSuffix.parse(name) }
46
+ assert !PublicSuffix.valid?(name)
47
+ end
48
+ end
49
+
50
+
51
+ REJECTED_CASES = [
52
+ ["www. .com", true],
53
+ ["foo.co..uk", true],
54
+ ["goo,gle.com", true],
55
+ ["-google.com", true],
56
+ ["google-.com", true],
57
+
58
+ # This case was covered in GH-15.
59
+ # I decided to cover this case because it's not easily reproducible with URI.parse
60
+ # and can lead to several false positives.
61
+ ["http://google.com", false],
62
+ ].freeze
63
+
64
+ def test_rejected
65
+ REJECTED_CASES.each do |name, expected|
66
+ assert_equal expected, PublicSuffix.valid?(name),
67
+ "Expected %s to be %s" % [name.inspect, expected.inspect]
68
+ assert !valid_domain?(name),
69
+ "#{name} expected to be invalid"
70
+ end
71
+ end
72
+
73
+
74
+ CASE_CASES = [
75
+ ["Www.google.com", %w( www google com )],
76
+ ["www.Google.com", %w( www google com )],
77
+ ["www.google.Com", %w( www google com )],
78
+ ].freeze
79
+
80
+ def test_ignore_case
81
+ CASE_CASES.each do |name, results|
82
+ domain = PublicSuffix.parse(name)
83
+ trd, sld, tld = results
84
+ assert_equal tld, domain.tld, "Invalid tld for `#{name}'"
85
+ assert_equal sld, domain.sld, "Invalid sld for `#{name}'"
86
+ assert_equal trd, domain.trd, "Invalid trd for `#{name}'"
87
+ assert PublicSuffix.valid?(name)
88
+ end
89
+ end
90
+
91
+
92
+ INCLUDE_PRIVATE_CASES = [
93
+ ["blogspot.com", true, "blogspot.com"],
94
+ ["blogspot.com", false, nil],
95
+ ["subdomain.blogspot.com", true, "blogspot.com"],
96
+ ["subdomain.blogspot.com", false, "subdomain.blogspot.com"],
97
+ ].freeze
98
+
99
+ def test_ignore_private
100
+ # test domain and parse
101
+ INCLUDE_PRIVATE_CASES.each do |given, ignore_private, expected|
102
+ if expected.nil?
103
+ assert_nil PublicSuffix.domain(given, ignore_private: ignore_private)
104
+ else
105
+ assert_equal expected, PublicSuffix.domain(given, ignore_private: ignore_private)
106
+ end
107
+ end
108
+ # test valid?
109
+ INCLUDE_PRIVATE_CASES.each do |given, ignore_private, expected|
110
+ assert_equal !expected.nil?, PublicSuffix.valid?(given, ignore_private: ignore_private)
111
+ end
112
+ end
113
+
114
+
115
+ def valid_uri?(name)
116
+ uri = URI.parse(name)
117
+ !uri.host.nil?
118
+ rescue
119
+ false
120
+ end
121
+
122
+ def valid_domain?(name)
123
+ uri = URI.parse(name)
124
+ !uri.host.nil? && uri.scheme.nil?
125
+ rescue
126
+ false
127
+ end
128
+
129
+ end
@@ -0,0 +1,66 @@
1
+ require 'benchmark'
2
+ require_relative "../../lib/public_suffix"
3
+
4
+ NAME_SHORT = "example.de"
5
+ NAME_MEDIUM = "www.subdomain.example.de"
6
+ NAME_LONG = "one.two.three.four.five.example.de"
7
+ NAME_WILD = "one.two.three.four.five.example.bd"
8
+ NAME_EXCP = "one.two.three.four.five.www.ck"
9
+
10
+ IAAA = "www.example.ac"
11
+ IZZZ = "www.example.zone"
12
+
13
+ PAAA = "one.two.three.four.five.example.beep.pl"
14
+ PZZZ = "one.two.three.four.five.example.now.sh"
15
+
16
+ JP = "www.yokoshibahikari.chiba.jp"
17
+ IT = "www.example.it"
18
+ COM = "www.example.com"
19
+
20
+ TIMES = (ARGV.first || 50_000).to_i
21
+
22
+ # Initialize
23
+ PublicSuffixList = PublicSuffix::List.default
24
+ PublicSuffixList.find("example.com")
25
+
26
+ Benchmark.bmbm(25) do |x|
27
+ x.report("NAME_SHORT") do
28
+ TIMES.times { PublicSuffixList.find(NAME_SHORT) != nil }
29
+ end
30
+ x.report("NAME_MEDIUM") do
31
+ TIMES.times { PublicSuffixList.find(NAME_MEDIUM) != nil }
32
+ end
33
+ x.report("NAME_LONG") do
34
+ TIMES.times { PublicSuffixList.find(NAME_LONG) != nil }
35
+ end
36
+ x.report("NAME_WILD") do
37
+ TIMES.times { PublicSuffixList.find(NAME_WILD) != nil }
38
+ end
39
+ x.report("NAME_EXCP") do
40
+ TIMES.times { PublicSuffixList.find(NAME_EXCP) != nil }
41
+ end
42
+
43
+ x.report("IAAA") do
44
+ TIMES.times { PublicSuffixList.find(IAAA) != nil }
45
+ end
46
+ x.report("IZZZ") do
47
+ TIMES.times { PublicSuffixList.find(IZZZ) != nil }
48
+ end
49
+
50
+ x.report("PAAA") do
51
+ TIMES.times { PublicSuffixList.find(PAAA) != nil }
52
+ end
53
+ x.report("PZZZ") do
54
+ TIMES.times { PublicSuffixList.find(PZZZ) != nil }
55
+ end
56
+
57
+ x.report("JP") do
58
+ TIMES.times { PublicSuffixList.find(JP) != nil }
59
+ end
60
+ x.report("IT") do
61
+ TIMES.times { PublicSuffixList.find(IT) != nil }
62
+ end
63
+ x.report("COM") do
64
+ TIMES.times { PublicSuffixList.find(COM) != nil }
65
+ end
66
+ end
@@ -0,0 +1,102 @@
1
+ require 'benchmark'
2
+ require_relative "../../lib/public_suffix"
3
+
4
+ NAME_SHORT = "example.de"
5
+ NAME_MEDIUM = "www.subdomain.example.de"
6
+ NAME_LONG = "one.two.three.four.five.example.de"
7
+ NAME_WILD = "one.two.three.four.five.example.bd"
8
+ NAME_EXCP = "one.two.three.four.five.www.ck"
9
+
10
+ IAAA = "www.example.ac"
11
+ IZZZ = "www.example.zone"
12
+
13
+ PAAA = "one.two.three.four.five.example.beep.pl"
14
+ PZZZ = "one.two.three.four.five.example.now.sh"
15
+
16
+ JP = "www.yokoshibahikari.chiba.jp"
17
+ IT = "www.example.it"
18
+ COM = "www.example.com"
19
+
20
+ TIMES = (ARGV.first || 50_000).to_i
21
+
22
+ # Initialize
23
+ PublicSuffixList = PublicSuffix::List.default
24
+ PublicSuffixList.find("example.com")
25
+
26
+ Benchmark.bmbm(25) do |x|
27
+ x.report("NAME_SHORT") do
28
+ TIMES.times { PublicSuffixList.find(NAME_SHORT) != nil }
29
+ end
30
+ x.report("NAME_SHORT (noprivate)") do
31
+ TIMES.times { PublicSuffixList.find(NAME_SHORT, ignore_private: true) != nil }
32
+ end
33
+ x.report("NAME_MEDIUM") do
34
+ TIMES.times { PublicSuffixList.find(NAME_MEDIUM) != nil }
35
+ end
36
+ x.report("NAME_MEDIUM (noprivate)") do
37
+ TIMES.times { PublicSuffixList.find(NAME_MEDIUM, ignore_private: true) != nil }
38
+ end
39
+ x.report("NAME_LONG") do
40
+ TIMES.times { PublicSuffixList.find(NAME_LONG) != nil }
41
+ end
42
+ x.report("NAME_LONG (noprivate)") do
43
+ TIMES.times { PublicSuffixList.find(NAME_LONG, ignore_private: true) != nil }
44
+ end
45
+ x.report("NAME_WILD") do
46
+ TIMES.times { PublicSuffixList.find(NAME_WILD) != nil }
47
+ end
48
+ x.report("NAME_WILD (noprivate)") do
49
+ TIMES.times { PublicSuffixList.find(NAME_WILD, ignore_private: true) != nil }
50
+ end
51
+ x.report("NAME_EXCP") do
52
+ TIMES.times { PublicSuffixList.find(NAME_EXCP) != nil }
53
+ end
54
+ x.report("NAME_EXCP (noprivate)") do
55
+ TIMES.times { PublicSuffixList.find(NAME_EXCP, ignore_private: true) != nil }
56
+ end
57
+
58
+ x.report("IAAA") do
59
+ TIMES.times { PublicSuffixList.find(IAAA) != nil }
60
+ end
61
+ x.report("IAAA (noprivate)") do
62
+ TIMES.times { PublicSuffixList.find(IAAA, ignore_private: true) != nil }
63
+ end
64
+ x.report("IZZZ") do
65
+ TIMES.times { PublicSuffixList.find(IZZZ) != nil }
66
+ end
67
+ x.report("IZZZ (noprivate)") do
68
+ TIMES.times { PublicSuffixList.find(IZZZ, ignore_private: true) != nil }
69
+ end
70
+
71
+ x.report("PAAA") do
72
+ TIMES.times { PublicSuffixList.find(PAAA) != nil }
73
+ end
74
+ x.report("PAAA (noprivate)") do
75
+ TIMES.times { PublicSuffixList.find(PAAA, ignore_private: true) != nil }
76
+ end
77
+ x.report("PZZZ") do
78
+ TIMES.times { PublicSuffixList.find(PZZZ) != nil }
79
+ end
80
+ x.report("PZZZ (noprivate)") do
81
+ TIMES.times { PublicSuffixList.find(PZZZ, ignore_private: true) != nil }
82
+ end
83
+
84
+ x.report("JP") do
85
+ TIMES.times { PublicSuffixList.find(JP) != nil }
86
+ end
87
+ x.report("JP (noprivate)") do
88
+ TIMES.times { PublicSuffixList.find(JP, ignore_private: true) != nil }
89
+ end
90
+ x.report("IT") do
91
+ TIMES.times { PublicSuffixList.find(IT) != nil }
92
+ end
93
+ x.report("IT (noprivate)") do
94
+ TIMES.times { PublicSuffixList.find(IT, ignore_private: true) != nil }
95
+ end
96
+ x.report("COM") do
97
+ TIMES.times { PublicSuffixList.find(COM) != nil }
98
+ end
99
+ x.report("COM (noprivate)") do
100
+ TIMES.times { PublicSuffixList.find(COM, ignore_private: true) != nil }
101
+ end
102
+ end
@@ -0,0 +1,91 @@
1
+ require 'benchmark/ips'
2
+
3
+ STRING = "www.subdomain.example.com"
4
+ ARRAY = %w(
5
+ com
6
+ example.com
7
+ subdomain.example.com
8
+ www.subdomain.example.com
9
+ )
10
+
11
+ def tokenizer1(string)
12
+ parts = string.split(".").reverse!
13
+ index = 0
14
+ query = parts[index]
15
+ names = []
16
+
17
+ loop do
18
+ names << query
19
+
20
+ index += 1
21
+ break if index >= parts.size
22
+ query = parts[index] + "." + query
23
+ end
24
+ names
25
+ end
26
+
27
+ def tokenizer2(string)
28
+ parts = string.split(".")
29
+ index = parts.size - 1
30
+ query = parts[index]
31
+ names = []
32
+
33
+ loop do
34
+ names << query
35
+
36
+ index -= 1
37
+ break if index < 0
38
+ query = parts[index] + "." + query
39
+ end
40
+ names
41
+ end
42
+
43
+ def tokenizer3(string)
44
+ isx = string.size
45
+ idx = string.size - 1
46
+ names = []
47
+
48
+ loop do
49
+ isx = string.rindex(".", isx - 1) || -1
50
+ names << string[isx + 1, idx - isx]
51
+
52
+ break if isx <= 0
53
+ end
54
+ names
55
+ end
56
+
57
+ def tokenizer4(string)
58
+ isx = string.size
59
+ idx = string.size - 1
60
+ names = []
61
+
62
+ loop do
63
+ isx = string.rindex(".", isx - 1) || -1
64
+ names << string[(isx+1)..idx]
65
+
66
+ break if isx <= 0
67
+ end
68
+ names
69
+ end
70
+
71
+ (x = tokenizer1(STRING)) == ARRAY or fail("tokenizer1 failed: #{x.inspect}")
72
+ (x = tokenizer2(STRING)) == ARRAY or fail("tokenizer2 failed: #{x.inspect}")
73
+ (x = tokenizer3(STRING)) == ARRAY or fail("tokenizer3 failed: #{x.inspect}")
74
+ (x = tokenizer4(STRING)) == ARRAY or fail("tokenizer4 failed: #{x.inspect}")
75
+
76
+ Benchmark.ips do |x|
77
+ x.report("tokenizer1") do
78
+ tokenizer1(STRING).is_a?(Array)
79
+ end
80
+ x.report("tokenizer2") do
81
+ tokenizer2(STRING).is_a?(Array)
82
+ end
83
+ x.report("tokenizer3") do
84
+ tokenizer3(STRING).is_a?(Array)
85
+ end
86
+ x.report("tokenizer4") do
87
+ tokenizer4(STRING).is_a?(Array)
88
+ end
89
+
90
+ x.compare!
91
+ end
@@ -0,0 +1,26 @@
1
+ require 'benchmark'
2
+ require_relative "../../lib/public_suffix"
3
+
4
+ JP = "www.yokoshibahikari.chiba.jp"
5
+
6
+ TIMES = (ARGV.first || 50_000).to_i
7
+
8
+ # Initialize
9
+ class PublicSuffix::List
10
+ public :select
11
+ end
12
+ PublicSuffixList = PublicSuffix::List.default
13
+ PublicSuffixList.select("example.jp")
14
+ PublicSuffixList.find("example.jp")
15
+
16
+ Benchmark.bmbm(25) do |x|
17
+ x.report("JP select") do
18
+ TIMES.times { PublicSuffixList.select(JP) }
19
+ end
20
+ x.report("JP find") do
21
+ TIMES.times { PublicSuffixList.find(JP) }
22
+ end
23
+ # x.report("JP (noprivate)") do
24
+ # TIMES.times { PublicSuffixList.find(JP, ignore_private: true) != nil }
25
+ # end
26
+ end