public_suffix 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rubocop.yml +36 -0
  4. data/.rubocop_defaults.yml +179 -0
  5. data/.ruby-gemset +1 -0
  6. data/.travis.yml +31 -0
  7. data/.yardopts +1 -0
  8. data/2.0-Upgrade.md +52 -0
  9. data/CHANGELOG.md +353 -0
  10. data/Gemfile +12 -0
  11. data/LICENSE.txt +22 -0
  12. data/README.md +202 -0
  13. data/Rakefile +51 -0
  14. data/bin/console +15 -0
  15. data/data/list.txt +12966 -0
  16. data/lib/public_suffix.rb +179 -0
  17. data/lib/public_suffix/domain.rb +235 -0
  18. data/lib/public_suffix/errors.rb +41 -0
  19. data/lib/public_suffix/list.rb +247 -0
  20. data/lib/public_suffix/rule.rb +350 -0
  21. data/lib/public_suffix/version.rb +13 -0
  22. data/public_suffix.gemspec +25 -0
  23. data/test/.empty +2 -0
  24. data/test/acceptance_test.rb +129 -0
  25. data/test/benchmarks/bm_find.rb +66 -0
  26. data/test/benchmarks/bm_find_all.rb +102 -0
  27. data/test/benchmarks/bm_names.rb +91 -0
  28. data/test/benchmarks/bm_select.rb +26 -0
  29. data/test/benchmarks/bm_select_incremental.rb +25 -0
  30. data/test/benchmarks/bm_valid.rb +101 -0
  31. data/test/profilers/domain_profiler.rb +12 -0
  32. data/test/profilers/find_profiler.rb +12 -0
  33. data/test/profilers/find_profiler_jp.rb +12 -0
  34. data/test/profilers/initialization_profiler.rb +11 -0
  35. data/test/profilers/list_profsize.rb +11 -0
  36. data/test/profilers/object_binsize.rb +57 -0
  37. data/test/psl_test.rb +52 -0
  38. data/test/test_helper.rb +18 -0
  39. data/test/tests.txt +98 -0
  40. data/test/unit/domain_test.rb +106 -0
  41. data/test/unit/errors_test.rb +25 -0
  42. data/test/unit/list_test.rb +241 -0
  43. data/test/unit/public_suffix_test.rb +188 -0
  44. data/test/unit/rule_test.rb +222 -0
  45. metadata +151 -0
@@ -0,0 +1,25 @@
1
+ require 'benchmark'
2
+ require_relative "../../lib/public_suffix"
3
+
4
+ JP = "www.yokoshibahikari.chiba.jp"
5
+
6
+ TIMES = (ARGV.first || 50_000).to_i
7
+
8
+ # Initialize
9
+ class PublicSuffix::List
10
+ public :select
11
+ end
12
+ PublicSuffixList = PublicSuffix::List.default
13
+ PublicSuffixList.select("example.jp")
14
+
15
+ Benchmark.bmbm(25) do |x|
16
+ x.report("select jp") do
17
+ TIMES.times { PublicSuffixList.select("jp") }
18
+ end
19
+ x.report("select example.jp") do
20
+ TIMES.times { PublicSuffixList.select("example.jp") }
21
+ end
22
+ x.report("select www.example.jp") do
23
+ TIMES.times { PublicSuffixList.select("www.example.jp") }
24
+ end
25
+ end
@@ -0,0 +1,101 @@
1
+ require 'benchmark'
2
+ require_relative "../../lib/public_suffix"
3
+
4
+ NAME_SHORT = "example.de"
5
+ NAME_MEDIUM = "www.subdomain.example.de"
6
+ NAME_LONG = "one.two.three.four.five.example.de"
7
+ NAME_WILD = "one.two.three.four.five.example.bd"
8
+ NAME_EXCP = "one.two.three.four.five.www.ck"
9
+
10
+ IAAA = "www.example.ac"
11
+ IZZZ = "www.example.zone"
12
+
13
+ PAAA = "one.two.three.four.five.example.beep.pl"
14
+ PZZZ = "one.two.three.four.five.example.now.sh"
15
+
16
+ JP = "www.yokoshibahikari.chiba.jp"
17
+ IT = "www.example.it"
18
+ COM = "www.example.com"
19
+
20
+ TIMES = (ARGV.first || 50_000).to_i
21
+
22
+ # Initialize
23
+ PublicSuffix.valid?("example.com")
24
+
25
+ Benchmark.bmbm(25) do |x|
26
+ x.report("NAME_SHORT") do
27
+ TIMES.times { PublicSuffix.valid?(NAME_SHORT) == true }
28
+ end
29
+ x.report("NAME_SHORT (noprivate)") do
30
+ TIMES.times { PublicSuffix.valid?(NAME_SHORT, ignore_private: true) == true }
31
+ end
32
+ x.report("NAME_MEDIUM") do
33
+ TIMES.times { PublicSuffix.valid?(NAME_MEDIUM) == true }
34
+ end
35
+ x.report("NAME_MEDIUM (noprivate)") do
36
+ TIMES.times { PublicSuffix.valid?(NAME_MEDIUM, ignore_private: true) == true }
37
+ end
38
+ x.report("NAME_LONG") do
39
+ TIMES.times { PublicSuffix.valid?(NAME_LONG) == true }
40
+ end
41
+ x.report("NAME_LONG (noprivate)") do
42
+ TIMES.times { PublicSuffix.valid?(NAME_LONG, ignore_private: true) == true }
43
+ end
44
+ x.report("NAME_WILD") do
45
+ TIMES.times { PublicSuffix.valid?(NAME_WILD) == true }
46
+ end
47
+ x.report("NAME_WILD (noprivate)") do
48
+ TIMES.times { PublicSuffix.valid?(NAME_WILD, ignore_private: true) == true }
49
+ end
50
+ x.report("NAME_EXCP") do
51
+ TIMES.times { PublicSuffix.valid?(NAME_EXCP) == true }
52
+ end
53
+ x.report("NAME_EXCP (noprivate)") do
54
+ TIMES.times { PublicSuffix.valid?(NAME_EXCP, ignore_private: true) == true }
55
+ end
56
+
57
+ x.report("IAAA") do
58
+ TIMES.times { PublicSuffix.valid?(IAAA) == true }
59
+ end
60
+ x.report("IAAA (noprivate)") do
61
+ TIMES.times { PublicSuffix.valid?(IAAA, ignore_private: true) == true }
62
+ end
63
+ x.report("IZZZ") do
64
+ TIMES.times { PublicSuffix.valid?(IZZZ) == true }
65
+ end
66
+ x.report("IZZZ (noprivate)") do
67
+ TIMES.times { PublicSuffix.valid?(IZZZ, ignore_private: true) == true }
68
+ end
69
+
70
+ x.report("PAAA") do
71
+ TIMES.times { PublicSuffix.valid?(PAAA) == true }
72
+ end
73
+ x.report("PAAA (noprivate)") do
74
+ TIMES.times { PublicSuffix.valid?(PAAA, ignore_private: true) == true }
75
+ end
76
+ x.report("PZZZ") do
77
+ TIMES.times { PublicSuffix.valid?(PZZZ) == true }
78
+ end
79
+ x.report("PZZZ (noprivate)") do
80
+ TIMES.times { PublicSuffix.valid?(PZZZ, ignore_private: true) == true }
81
+ end
82
+
83
+ x.report("JP") do
84
+ TIMES.times { PublicSuffix.valid?(JP) == true }
85
+ end
86
+ x.report("JP (noprivate)") do
87
+ TIMES.times { PublicSuffix.valid?(JP, ignore_private: true) == true }
88
+ end
89
+ x.report("IT") do
90
+ TIMES.times { PublicSuffix.valid?(IT) == true }
91
+ end
92
+ x.report("IT (noprivate)") do
93
+ TIMES.times { PublicSuffix.valid?(IT, ignore_private: true) == true }
94
+ end
95
+ x.report("COM") do
96
+ TIMES.times { PublicSuffix.valid?(COM) == true }
97
+ end
98
+ x.report("COM (noprivate)") do
99
+ TIMES.times { PublicSuffix.valid?(COM, ignore_private: true) == true }
100
+ end
101
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
2
+
3
+ require "memory_profiler"
4
+ require "public_suffix"
5
+
6
+ PublicSuffix::List.default
7
+
8
+ report = MemoryProfiler.report do
9
+ PublicSuffix.domain("www.example.com")
10
+ end
11
+
12
+ report.pretty_print
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
2
+
3
+ require "memory_profiler"
4
+ require "public_suffix"
5
+
6
+ PublicSuffix::List.default
7
+
8
+ report = MemoryProfiler.report do
9
+ PublicSuffix::List.default.find("www.example.com")
10
+ end
11
+
12
+ report.pretty_print
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
2
+
3
+ require "memory_profiler"
4
+ require "public_suffix"
5
+
6
+ PublicSuffix::List.default
7
+
8
+ report = MemoryProfiler.report do
9
+ PublicSuffix::List.default.find("a.b.ide.kyoto.jp")
10
+ end
11
+
12
+ report.pretty_print
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
2
+
3
+ require "memory_profiler"
4
+ require "public_suffix"
5
+
6
+ report = MemoryProfiler.report do
7
+ PublicSuffix::List.default
8
+ end
9
+
10
+ report.pretty_print
11
+ # report.pretty_print(to_file: 'profiler-%s-%d.txt' % [ARGV[0], Time.now.to_i])
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
2
+
3
+ require_relative "object_binsize"
4
+ require "public_suffix"
5
+
6
+ list = PublicSuffix::List.default
7
+ puts "#{list.size} rules:"
8
+
9
+ prof = ObjectBinsize.new
10
+ prof.report(PublicSuffix::List.default, label: "PublicSuffix::List size")
11
+ prof.report(PublicSuffix::List.default.instance_variable_get(:@rules), label: "Size of rules")
@@ -0,0 +1,57 @@
1
+ require 'tempfile'
2
+
3
+ # A very simple memory profiles that checks the full size of a variable
4
+ # by serializing into a binary file.
5
+ #
6
+ # Yes, I know this is very rough, but there are cases where ObjectSpace.memsize_of
7
+ # doesn't cooperate, and this is one of the possible workarounds.
8
+ #
9
+ # For certain cases, it works (TM).
10
+ class ObjectBinsize
11
+
12
+ def measure(var, label: nil)
13
+ dump(var, label: label)
14
+ end
15
+
16
+ def report(var, label: nil, padding: 10)
17
+ file = measure(var, label: label)
18
+
19
+ size = format_integer(file.size)
20
+ name = label || File.basename(file.path)
21
+ printf("%#{padding}s %s\n", size, name)
22
+ end
23
+
24
+ private
25
+
26
+ def dump(var, **args)
27
+ file = Tempfile.new(args[:label].to_s)
28
+ file.write(Marshal.dump(var))
29
+ file
30
+ ensure
31
+ file.close
32
+ end
33
+
34
+ def format_integer(int)
35
+ int.to_s.reverse.gsub(/...(?=.)/, '\&,').reverse
36
+ end
37
+
38
+ end
39
+
40
+ if __FILE__ == $0
41
+ prof = ObjectBinsize.new
42
+
43
+ prof.report(nil, label: "nil")
44
+ prof.report(false, label: "false")
45
+ prof.report(true, label: "true")
46
+ prof.report(0, label: "integer")
47
+ prof.report("", label: "empty string")
48
+ prof.report({}, label: "empty hash")
49
+ prof.report({}, label: "empty array")
50
+
51
+ prof.report({ foo: "1" }, label: "hash 1 item (symbol)")
52
+ prof.report({ foo: "1", bar: 2 }, label: "hash 2 items (symbol)")
53
+ prof.report({ "foo" => "1" }, label: "hash 1 item (string)")
54
+ prof.report({ "foo" => "1", "bar" => 2 }, label: "hash 2 items (string)")
55
+
56
+ prof.report("big string" * 200, label: "big string * 200")
57
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "test_helper"
4
+ require "public_suffix"
5
+
6
+ # This test runs against the current PSL file and ensures
7
+ # the definitions satisfies the test suite.
8
+ class PslTest < Minitest::Test
9
+
10
+ ROOT = File.expand_path("..", __dir__)
11
+
12
+ # rubocop:disable Security/Eval
13
+ def self.tests
14
+ File.readlines(File.join(ROOT, "test/tests.txt")).map do |line|
15
+ line = line.strip
16
+ next if line.empty?
17
+ next if line.start_with?("//")
18
+
19
+ input, output = line.split(", ")
20
+
21
+ # handle the case of eval("null"), it must be eval("nil")
22
+ input = "nil" if input == "null"
23
+ output = "nil" if output == "null"
24
+
25
+ input = eval(input)
26
+ output = eval(output)
27
+ [input, output]
28
+ end
29
+ end
30
+ # rubocop:enable Security/Eval
31
+
32
+
33
+ def test_valid
34
+ # Parse the PSL and run the tests
35
+ data = File.read(PublicSuffix::List::DEFAULT_LIST_PATH)
36
+ PublicSuffix::List.default = PublicSuffix::List.parse(data)
37
+
38
+ failures = []
39
+ self.class.tests.each do |input, output|
40
+ # Punycode domains are not supported ATM
41
+ next if input =~ /xn\-\-/
42
+
43
+ domain = PublicSuffix.domain(input) rescue nil
44
+ failures << [input, output, domain] if output != domain
45
+ end
46
+
47
+ message = "The following #{failures.size} tests fail:\n"
48
+ failures.each { |i, o, d| message += "Expected %s to be %s, got %s\n" % [i.inspect, o.inspect, d.inspect] }
49
+ assert_equal 0, failures.size, message
50
+ end
51
+
52
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ if ENV["COVERAGE"]
4
+ require "simplecov"
5
+ SimpleCov.start
6
+
7
+ require "codecov"
8
+ SimpleCov.formatter = SimpleCov::Formatter::Codecov
9
+ end
10
+
11
+ require "minitest/autorun"
12
+ require "minitest/reporters"
13
+ require "mocha/setup"
14
+
15
+ Minitest::Reporters.use! Minitest::Reporters::DefaultReporter.new(color: true)
16
+
17
+ $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
18
+ require "public_suffix"
@@ -0,0 +1,98 @@
1
+ // Any copyright is dedicated to the Public Domain.
2
+ // http://creativecommons.org/publicdomain/zero/1.0/
3
+
4
+ // null input
5
+ null, null
6
+ // Mixed case
7
+ 'COM', null
8
+ 'example.COM', 'example.com'
9
+ 'WwW.example.COM', 'example.com'
10
+ // Leading dot
11
+ '.com', null
12
+ '.example', null
13
+ '.example.com', null
14
+ '.example.example', null
15
+ // Unlisted TLD
16
+ 'example', null
17
+ 'example.example', 'example.example'
18
+ 'b.example.example', 'example.example'
19
+ 'a.b.example.example', 'example.example'
20
+ // Listed, but non-Internet, TLD
21
+ //'local', null
22
+ //'example.local', null
23
+ //'b.example.local', null
24
+ //'a.b.example.local', null
25
+ // TLD with only 1 rule
26
+ 'biz', null
27
+ 'domain.biz', 'domain.biz'
28
+ 'b.domain.biz', 'domain.biz'
29
+ 'a.b.domain.biz', 'domain.biz'
30
+ // TLD with some 2-level rules
31
+ 'com', null
32
+ 'example.com', 'example.com'
33
+ 'b.example.com', 'example.com'
34
+ 'a.b.example.com', 'example.com'
35
+ 'uk.com', null
36
+ 'example.uk.com', 'example.uk.com'
37
+ 'b.example.uk.com', 'example.uk.com'
38
+ 'a.b.example.uk.com', 'example.uk.com'
39
+ 'test.ac', 'test.ac'
40
+ // TLD with only 1 (wildcard) rule
41
+ 'mm', null
42
+ 'c.mm', null
43
+ 'b.c.mm', 'b.c.mm'
44
+ 'a.b.c.mm', 'b.c.mm'
45
+ // More complex TLD
46
+ 'jp', null
47
+ 'test.jp', 'test.jp'
48
+ 'www.test.jp', 'test.jp'
49
+ 'ac.jp', null
50
+ 'test.ac.jp', 'test.ac.jp'
51
+ 'www.test.ac.jp', 'test.ac.jp'
52
+ 'kyoto.jp', null
53
+ 'test.kyoto.jp', 'test.kyoto.jp'
54
+ 'ide.kyoto.jp', null
55
+ 'b.ide.kyoto.jp', 'b.ide.kyoto.jp'
56
+ 'a.b.ide.kyoto.jp', 'b.ide.kyoto.jp'
57
+ 'c.kobe.jp', null
58
+ 'b.c.kobe.jp', 'b.c.kobe.jp'
59
+ 'a.b.c.kobe.jp', 'b.c.kobe.jp'
60
+ 'city.kobe.jp', 'city.kobe.jp'
61
+ 'www.city.kobe.jp', 'city.kobe.jp'
62
+ // TLD with a wildcard rule and exceptions
63
+ 'ck', null
64
+ 'test.ck', null
65
+ 'b.test.ck', 'b.test.ck'
66
+ 'a.b.test.ck', 'b.test.ck'
67
+ 'www.ck', 'www.ck'
68
+ 'www.www.ck', 'www.ck'
69
+ // US K12
70
+ 'us', null
71
+ 'test.us', 'test.us'
72
+ 'www.test.us', 'test.us'
73
+ 'ak.us', null
74
+ 'test.ak.us', 'test.ak.us'
75
+ 'www.test.ak.us', 'test.ak.us'
76
+ 'k12.ak.us', null
77
+ 'test.k12.ak.us', 'test.k12.ak.us'
78
+ 'www.test.k12.ak.us', 'test.k12.ak.us'
79
+ // IDN labels
80
+ '食狮.com.cn', '食狮.com.cn'
81
+ '食狮.公司.cn', '食狮.公司.cn'
82
+ 'www.食狮.公司.cn', '食狮.公司.cn'
83
+ 'shishi.公司.cn', 'shishi.公司.cn'
84
+ '公司.cn', null
85
+ '食狮.中国', '食狮.中国'
86
+ 'www.食狮.中国', '食狮.中国'
87
+ 'shishi.中国', 'shishi.中国'
88
+ '中国', null
89
+ // Same as above, but punycoded
90
+ 'xn--85x722f.com.cn', 'xn--85x722f.com.cn'
91
+ 'xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn'
92
+ 'www.xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn'
93
+ 'shishi.xn--55qx5d.cn', 'shishi.xn--55qx5d.cn'
94
+ 'xn--55qx5d.cn', null
95
+ 'xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s'
96
+ 'www.xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s'
97
+ 'shishi.xn--fiqs8s', 'shishi.xn--fiqs8s'
98
+ 'xn--fiqs8s', null
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "test_helper"
4
+
5
+ class PublicSuffix::DomainTest < Minitest::Test
6
+
7
+ def setup
8
+ @klass = PublicSuffix::Domain
9
+ end
10
+
11
+ # Tokenizes given input into labels.
12
+ def test_self_name_to_labels
13
+ assert_equal %w( someone spaces live com ),
14
+ PublicSuffix::Domain.name_to_labels("someone.spaces.live.com")
15
+ assert_equal %w( leontina23samiko wiki zoho com ),
16
+ PublicSuffix::Domain.name_to_labels("leontina23samiko.wiki.zoho.com")
17
+ end
18
+
19
+ # Converts input into String.
20
+ def test_self_name_to_labels_converts_input_to_string
21
+ assert_equal %w( someone spaces live com ),
22
+ PublicSuffix::Domain.name_to_labels(:"someone.spaces.live.com")
23
+ end
24
+
25
+
26
+ def test_initialize_with_tld
27
+ domain = @klass.new("com")
28
+ assert_equal "com", domain.tld
29
+ assert_nil domain.sld
30
+ assert_nil domain.trd
31
+ end
32
+
33
+ def test_initialize_with_tld_and_sld
34
+ domain = @klass.new("com", "google")
35
+ assert_equal "com", domain.tld
36
+ assert_equal "google", domain.sld
37
+ assert_nil domain.trd
38
+ end
39
+
40
+ def test_initialize_with_tld_and_sld_and_trd
41
+ domain = @klass.new("com", "google", "www")
42
+ assert_equal "com", domain.tld
43
+ assert_equal "google", domain.sld
44
+ assert_equal "www", domain.trd
45
+ end
46
+
47
+
48
+ def test_to_s
49
+ assert_equal "com", @klass.new("com").to_s
50
+ assert_equal "google.com", @klass.new("com", "google").to_s
51
+ assert_equal "www.google.com", @klass.new("com", "google", "www").to_s
52
+ end
53
+
54
+ def test_to_a
55
+ assert_equal [nil, nil, "com"], @klass.new("com").to_a
56
+ assert_equal [nil, "google", "com"], @klass.new("com", "google").to_a
57
+ assert_equal ["www", "google", "com"], @klass.new("com", "google", "www").to_a
58
+ end
59
+
60
+
61
+ def test_tld
62
+ assert_equal "com", @klass.new("com", "google", "www").tld
63
+ end
64
+
65
+ def test_sld
66
+ assert_equal "google", @klass.new("com", "google", "www").sld
67
+ end
68
+
69
+ def test_trd
70
+ assert_equal "www", @klass.new("com", "google", "www").trd
71
+ end
72
+
73
+
74
+ def test_name
75
+ assert_equal "com", @klass.new("com").name
76
+ assert_equal "google.com", @klass.new("com", "google").name
77
+ assert_equal "www.google.com", @klass.new("com", "google", "www").name
78
+ end
79
+
80
+ def test_domain
81
+ assert_nil @klass.new("com").domain
82
+ assert_nil @klass.new("tldnotlisted").domain
83
+ assert_equal "google.com", @klass.new("com", "google").domain
84
+ assert_equal "google.tldnotlisted", @klass.new("tldnotlisted", "google").domain
85
+ assert_equal "google.com", @klass.new("com", "google", "www").domain
86
+ assert_equal "google.tldnotlisted", @klass.new("tldnotlisted", "google", "www").domain
87
+ end
88
+
89
+ def test_subdomain
90
+ assert_nil @klass.new("com").subdomain
91
+ assert_nil @klass.new("tldnotlisted").subdomain
92
+ assert_nil @klass.new("com", "google").subdomain
93
+ assert_nil @klass.new("tldnotlisted", "google").subdomain
94
+ assert_equal "www.google.com", @klass.new("com", "google", "www").subdomain
95
+ assert_equal "www.google.tldnotlisted", @klass.new("tldnotlisted", "google", "www").subdomain
96
+ end
97
+
98
+
99
+ def test_domain_question
100
+ assert !@klass.new("com").domain?
101
+ assert @klass.new("com", "example").domain?
102
+ assert @klass.new("com", "example", "www").domain?
103
+ assert @klass.new("tldnotlisted", "example").domain?
104
+ end
105
+
106
+ end