public_suffix 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rubocop.yml +36 -0
  4. data/.rubocop_defaults.yml +179 -0
  5. data/.ruby-gemset +1 -0
  6. data/.travis.yml +31 -0
  7. data/.yardopts +1 -0
  8. data/2.0-Upgrade.md +52 -0
  9. data/CHANGELOG.md +353 -0
  10. data/Gemfile +12 -0
  11. data/LICENSE.txt +22 -0
  12. data/README.md +202 -0
  13. data/Rakefile +51 -0
  14. data/bin/console +15 -0
  15. data/data/list.txt +12966 -0
  16. data/lib/public_suffix.rb +179 -0
  17. data/lib/public_suffix/domain.rb +235 -0
  18. data/lib/public_suffix/errors.rb +41 -0
  19. data/lib/public_suffix/list.rb +247 -0
  20. data/lib/public_suffix/rule.rb +350 -0
  21. data/lib/public_suffix/version.rb +13 -0
  22. data/public_suffix.gemspec +25 -0
  23. data/test/.empty +2 -0
  24. data/test/acceptance_test.rb +129 -0
  25. data/test/benchmarks/bm_find.rb +66 -0
  26. data/test/benchmarks/bm_find_all.rb +102 -0
  27. data/test/benchmarks/bm_names.rb +91 -0
  28. data/test/benchmarks/bm_select.rb +26 -0
  29. data/test/benchmarks/bm_select_incremental.rb +25 -0
  30. data/test/benchmarks/bm_valid.rb +101 -0
  31. data/test/profilers/domain_profiler.rb +12 -0
  32. data/test/profilers/find_profiler.rb +12 -0
  33. data/test/profilers/find_profiler_jp.rb +12 -0
  34. data/test/profilers/initialization_profiler.rb +11 -0
  35. data/test/profilers/list_profsize.rb +11 -0
  36. data/test/profilers/object_binsize.rb +57 -0
  37. data/test/psl_test.rb +52 -0
  38. data/test/test_helper.rb +18 -0
  39. data/test/tests.txt +98 -0
  40. data/test/unit/domain_test.rb +106 -0
  41. data/test/unit/errors_test.rb +25 -0
  42. data/test/unit/list_test.rb +241 -0
  43. data/test/unit/public_suffix_test.rb +188 -0
  44. data/test/unit/rule_test.rb +222 -0
  45. metadata +151 -0
@@ -0,0 +1,25 @@
1
+ require 'benchmark'
2
+ require_relative "../../lib/public_suffix"
3
+
4
+ JP = "www.yokoshibahikari.chiba.jp"
5
+
6
+ TIMES = (ARGV.first || 50_000).to_i
7
+
8
+ # Initialize
9
+ class PublicSuffix::List
10
+ public :select
11
+ end
12
+ PublicSuffixList = PublicSuffix::List.default
13
+ PublicSuffixList.select("example.jp")
14
+
15
+ Benchmark.bmbm(25) do |x|
16
+ x.report("select jp") do
17
+ TIMES.times { PublicSuffixList.select("jp") }
18
+ end
19
+ x.report("select example.jp") do
20
+ TIMES.times { PublicSuffixList.select("example.jp") }
21
+ end
22
+ x.report("select www.example.jp") do
23
+ TIMES.times { PublicSuffixList.select("www.example.jp") }
24
+ end
25
+ end
@@ -0,0 +1,101 @@
1
+ require 'benchmark'
2
+ require_relative "../../lib/public_suffix"
3
+
4
+ NAME_SHORT = "example.de"
5
+ NAME_MEDIUM = "www.subdomain.example.de"
6
+ NAME_LONG = "one.two.three.four.five.example.de"
7
+ NAME_WILD = "one.two.three.four.five.example.bd"
8
+ NAME_EXCP = "one.two.three.four.five.www.ck"
9
+
10
+ IAAA = "www.example.ac"
11
+ IZZZ = "www.example.zone"
12
+
13
+ PAAA = "one.two.three.four.five.example.beep.pl"
14
+ PZZZ = "one.two.three.four.five.example.now.sh"
15
+
16
+ JP = "www.yokoshibahikari.chiba.jp"
17
+ IT = "www.example.it"
18
+ COM = "www.example.com"
19
+
20
+ TIMES = (ARGV.first || 50_000).to_i
21
+
22
+ # Initialize
23
+ PublicSuffix.valid?("example.com")
24
+
25
+ Benchmark.bmbm(25) do |x|
26
+ x.report("NAME_SHORT") do
27
+ TIMES.times { PublicSuffix.valid?(NAME_SHORT) == true }
28
+ end
29
+ x.report("NAME_SHORT (noprivate)") do
30
+ TIMES.times { PublicSuffix.valid?(NAME_SHORT, ignore_private: true) == true }
31
+ end
32
+ x.report("NAME_MEDIUM") do
33
+ TIMES.times { PublicSuffix.valid?(NAME_MEDIUM) == true }
34
+ end
35
+ x.report("NAME_MEDIUM (noprivate)") do
36
+ TIMES.times { PublicSuffix.valid?(NAME_MEDIUM, ignore_private: true) == true }
37
+ end
38
+ x.report("NAME_LONG") do
39
+ TIMES.times { PublicSuffix.valid?(NAME_LONG) == true }
40
+ end
41
+ x.report("NAME_LONG (noprivate)") do
42
+ TIMES.times { PublicSuffix.valid?(NAME_LONG, ignore_private: true) == true }
43
+ end
44
+ x.report("NAME_WILD") do
45
+ TIMES.times { PublicSuffix.valid?(NAME_WILD) == true }
46
+ end
47
+ x.report("NAME_WILD (noprivate)") do
48
+ TIMES.times { PublicSuffix.valid?(NAME_WILD, ignore_private: true) == true }
49
+ end
50
+ x.report("NAME_EXCP") do
51
+ TIMES.times { PublicSuffix.valid?(NAME_EXCP) == true }
52
+ end
53
+ x.report("NAME_EXCP (noprivate)") do
54
+ TIMES.times { PublicSuffix.valid?(NAME_EXCP, ignore_private: true) == true }
55
+ end
56
+
57
+ x.report("IAAA") do
58
+ TIMES.times { PublicSuffix.valid?(IAAA) == true }
59
+ end
60
+ x.report("IAAA (noprivate)") do
61
+ TIMES.times { PublicSuffix.valid?(IAAA, ignore_private: true) == true }
62
+ end
63
+ x.report("IZZZ") do
64
+ TIMES.times { PublicSuffix.valid?(IZZZ) == true }
65
+ end
66
+ x.report("IZZZ (noprivate)") do
67
+ TIMES.times { PublicSuffix.valid?(IZZZ, ignore_private: true) == true }
68
+ end
69
+
70
+ x.report("PAAA") do
71
+ TIMES.times { PublicSuffix.valid?(PAAA) == true }
72
+ end
73
+ x.report("PAAA (noprivate)") do
74
+ TIMES.times { PublicSuffix.valid?(PAAA, ignore_private: true) == true }
75
+ end
76
+ x.report("PZZZ") do
77
+ TIMES.times { PublicSuffix.valid?(PZZZ) == true }
78
+ end
79
+ x.report("PZZZ (noprivate)") do
80
+ TIMES.times { PublicSuffix.valid?(PZZZ, ignore_private: true) == true }
81
+ end
82
+
83
+ x.report("JP") do
84
+ TIMES.times { PublicSuffix.valid?(JP) == true }
85
+ end
86
+ x.report("JP (noprivate)") do
87
+ TIMES.times { PublicSuffix.valid?(JP, ignore_private: true) == true }
88
+ end
89
+ x.report("IT") do
90
+ TIMES.times { PublicSuffix.valid?(IT) == true }
91
+ end
92
+ x.report("IT (noprivate)") do
93
+ TIMES.times { PublicSuffix.valid?(IT, ignore_private: true) == true }
94
+ end
95
+ x.report("COM") do
96
+ TIMES.times { PublicSuffix.valid?(COM) == true }
97
+ end
98
+ x.report("COM (noprivate)") do
99
+ TIMES.times { PublicSuffix.valid?(COM, ignore_private: true) == true }
100
+ end
101
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
2
+
3
+ require "memory_profiler"
4
+ require "public_suffix"
5
+
6
+ PublicSuffix::List.default
7
+
8
+ report = MemoryProfiler.report do
9
+ PublicSuffix.domain("www.example.com")
10
+ end
11
+
12
+ report.pretty_print
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
2
+
3
+ require "memory_profiler"
4
+ require "public_suffix"
5
+
6
+ PublicSuffix::List.default
7
+
8
+ report = MemoryProfiler.report do
9
+ PublicSuffix::List.default.find("www.example.com")
10
+ end
11
+
12
+ report.pretty_print
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
2
+
3
+ require "memory_profiler"
4
+ require "public_suffix"
5
+
6
+ PublicSuffix::List.default
7
+
8
+ report = MemoryProfiler.report do
9
+ PublicSuffix::List.default.find("a.b.ide.kyoto.jp")
10
+ end
11
+
12
+ report.pretty_print
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
2
+
3
+ require "memory_profiler"
4
+ require "public_suffix"
5
+
6
+ report = MemoryProfiler.report do
7
+ PublicSuffix::List.default
8
+ end
9
+
10
+ report.pretty_print
11
+ # report.pretty_print(to_file: 'profiler-%s-%d.txt' % [ARGV[0], Time.now.to_i])
@@ -0,0 +1,11 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
2
+
3
+ require_relative "object_binsize"
4
+ require "public_suffix"
5
+
6
+ list = PublicSuffix::List.default
7
+ puts "#{list.size} rules:"
8
+
9
+ prof = ObjectBinsize.new
10
+ prof.report(PublicSuffix::List.default, label: "PublicSuffix::List size")
11
+ prof.report(PublicSuffix::List.default.instance_variable_get(:@rules), label: "Size of rules")
@@ -0,0 +1,57 @@
1
+ require 'tempfile'
2
+
3
+ # A very simple memory profiles that checks the full size of a variable
4
+ # by serializing into a binary file.
5
+ #
6
+ # Yes, I know this is very rough, but there are cases where ObjectSpace.memsize_of
7
+ # doesn't cooperate, and this is one of the possible workarounds.
8
+ #
9
+ # For certain cases, it works (TM).
10
+ class ObjectBinsize
11
+
12
+ def measure(var, label: nil)
13
+ dump(var, label: label)
14
+ end
15
+
16
+ def report(var, label: nil, padding: 10)
17
+ file = measure(var, label: label)
18
+
19
+ size = format_integer(file.size)
20
+ name = label || File.basename(file.path)
21
+ printf("%#{padding}s %s\n", size, name)
22
+ end
23
+
24
+ private
25
+
26
+ def dump(var, **args)
27
+ file = Tempfile.new(args[:label].to_s)
28
+ file.write(Marshal.dump(var))
29
+ file
30
+ ensure
31
+ file.close
32
+ end
33
+
34
+ def format_integer(int)
35
+ int.to_s.reverse.gsub(/...(?=.)/, '\&,').reverse
36
+ end
37
+
38
+ end
39
+
40
+ if __FILE__ == $0
41
+ prof = ObjectBinsize.new
42
+
43
+ prof.report(nil, label: "nil")
44
+ prof.report(false, label: "false")
45
+ prof.report(true, label: "true")
46
+ prof.report(0, label: "integer")
47
+ prof.report("", label: "empty string")
48
+ prof.report({}, label: "empty hash")
49
+ prof.report({}, label: "empty array")
50
+
51
+ prof.report({ foo: "1" }, label: "hash 1 item (symbol)")
52
+ prof.report({ foo: "1", bar: 2 }, label: "hash 2 items (symbol)")
53
+ prof.report({ "foo" => "1" }, label: "hash 1 item (string)")
54
+ prof.report({ "foo" => "1", "bar" => 2 }, label: "hash 2 items (string)")
55
+
56
+ prof.report("big string" * 200, label: "big string * 200")
57
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "test_helper"
4
+ require "public_suffix"
5
+
6
+ # This test runs against the current PSL file and ensures
7
+ # the definitions satisfies the test suite.
8
+ class PslTest < Minitest::Test
9
+
10
+ ROOT = File.expand_path("..", __dir__)
11
+
12
+ # rubocop:disable Security/Eval
13
+ def self.tests
14
+ File.readlines(File.join(ROOT, "test/tests.txt")).map do |line|
15
+ line = line.strip
16
+ next if line.empty?
17
+ next if line.start_with?("//")
18
+
19
+ input, output = line.split(", ")
20
+
21
+ # handle the case of eval("null"), it must be eval("nil")
22
+ input = "nil" if input == "null"
23
+ output = "nil" if output == "null"
24
+
25
+ input = eval(input)
26
+ output = eval(output)
27
+ [input, output]
28
+ end
29
+ end
30
+ # rubocop:enable Security/Eval
31
+
32
+
33
+ def test_valid
34
+ # Parse the PSL and run the tests
35
+ data = File.read(PublicSuffix::List::DEFAULT_LIST_PATH)
36
+ PublicSuffix::List.default = PublicSuffix::List.parse(data)
37
+
38
+ failures = []
39
+ self.class.tests.each do |input, output|
40
+ # Punycode domains are not supported ATM
41
+ next if input =~ /xn\-\-/
42
+
43
+ domain = PublicSuffix.domain(input) rescue nil
44
+ failures << [input, output, domain] if output != domain
45
+ end
46
+
47
+ message = "The following #{failures.size} tests fail:\n"
48
+ failures.each { |i, o, d| message += "Expected %s to be %s, got %s\n" % [i.inspect, o.inspect, d.inspect] }
49
+ assert_equal 0, failures.size, message
50
+ end
51
+
52
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ if ENV["COVERAGE"]
4
+ require "simplecov"
5
+ SimpleCov.start
6
+
7
+ require "codecov"
8
+ SimpleCov.formatter = SimpleCov::Formatter::Codecov
9
+ end
10
+
11
+ require "minitest/autorun"
12
+ require "minitest/reporters"
13
+ require "mocha/setup"
14
+
15
+ Minitest::Reporters.use! Minitest::Reporters::DefaultReporter.new(color: true)
16
+
17
+ $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
18
+ require "public_suffix"
@@ -0,0 +1,98 @@
1
+ // Any copyright is dedicated to the Public Domain.
2
+ // http://creativecommons.org/publicdomain/zero/1.0/
3
+
4
+ // null input
5
+ null, null
6
+ // Mixed case
7
+ 'COM', null
8
+ 'example.COM', 'example.com'
9
+ 'WwW.example.COM', 'example.com'
10
+ // Leading dot
11
+ '.com', null
12
+ '.example', null
13
+ '.example.com', null
14
+ '.example.example', null
15
+ // Unlisted TLD
16
+ 'example', null
17
+ 'example.example', 'example.example'
18
+ 'b.example.example', 'example.example'
19
+ 'a.b.example.example', 'example.example'
20
+ // Listed, but non-Internet, TLD
21
+ //'local', null
22
+ //'example.local', null
23
+ //'b.example.local', null
24
+ //'a.b.example.local', null
25
+ // TLD with only 1 rule
26
+ 'biz', null
27
+ 'domain.biz', 'domain.biz'
28
+ 'b.domain.biz', 'domain.biz'
29
+ 'a.b.domain.biz', 'domain.biz'
30
+ // TLD with some 2-level rules
31
+ 'com', null
32
+ 'example.com', 'example.com'
33
+ 'b.example.com', 'example.com'
34
+ 'a.b.example.com', 'example.com'
35
+ 'uk.com', null
36
+ 'example.uk.com', 'example.uk.com'
37
+ 'b.example.uk.com', 'example.uk.com'
38
+ 'a.b.example.uk.com', 'example.uk.com'
39
+ 'test.ac', 'test.ac'
40
+ // TLD with only 1 (wildcard) rule
41
+ 'mm', null
42
+ 'c.mm', null
43
+ 'b.c.mm', 'b.c.mm'
44
+ 'a.b.c.mm', 'b.c.mm'
45
+ // More complex TLD
46
+ 'jp', null
47
+ 'test.jp', 'test.jp'
48
+ 'www.test.jp', 'test.jp'
49
+ 'ac.jp', null
50
+ 'test.ac.jp', 'test.ac.jp'
51
+ 'www.test.ac.jp', 'test.ac.jp'
52
+ 'kyoto.jp', null
53
+ 'test.kyoto.jp', 'test.kyoto.jp'
54
+ 'ide.kyoto.jp', null
55
+ 'b.ide.kyoto.jp', 'b.ide.kyoto.jp'
56
+ 'a.b.ide.kyoto.jp', 'b.ide.kyoto.jp'
57
+ 'c.kobe.jp', null
58
+ 'b.c.kobe.jp', 'b.c.kobe.jp'
59
+ 'a.b.c.kobe.jp', 'b.c.kobe.jp'
60
+ 'city.kobe.jp', 'city.kobe.jp'
61
+ 'www.city.kobe.jp', 'city.kobe.jp'
62
+ // TLD with a wildcard rule and exceptions
63
+ 'ck', null
64
+ 'test.ck', null
65
+ 'b.test.ck', 'b.test.ck'
66
+ 'a.b.test.ck', 'b.test.ck'
67
+ 'www.ck', 'www.ck'
68
+ 'www.www.ck', 'www.ck'
69
+ // US K12
70
+ 'us', null
71
+ 'test.us', 'test.us'
72
+ 'www.test.us', 'test.us'
73
+ 'ak.us', null
74
+ 'test.ak.us', 'test.ak.us'
75
+ 'www.test.ak.us', 'test.ak.us'
76
+ 'k12.ak.us', null
77
+ 'test.k12.ak.us', 'test.k12.ak.us'
78
+ 'www.test.k12.ak.us', 'test.k12.ak.us'
79
+ // IDN labels
80
+ '食狮.com.cn', '食狮.com.cn'
81
+ '食狮.公司.cn', '食狮.公司.cn'
82
+ 'www.食狮.公司.cn', '食狮.公司.cn'
83
+ 'shishi.公司.cn', 'shishi.公司.cn'
84
+ '公司.cn', null
85
+ '食狮.中国', '食狮.中国'
86
+ 'www.食狮.中国', '食狮.中国'
87
+ 'shishi.中国', 'shishi.中国'
88
+ '中国', null
89
+ // Same as above, but punycoded
90
+ 'xn--85x722f.com.cn', 'xn--85x722f.com.cn'
91
+ 'xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn'
92
+ 'www.xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn'
93
+ 'shishi.xn--55qx5d.cn', 'shishi.xn--55qx5d.cn'
94
+ 'xn--55qx5d.cn', null
95
+ 'xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s'
96
+ 'www.xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s'
97
+ 'shishi.xn--fiqs8s', 'shishi.xn--fiqs8s'
98
+ 'xn--fiqs8s', null
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "test_helper"
4
+
5
+ class PublicSuffix::DomainTest < Minitest::Test
6
+
7
+ def setup
8
+ @klass = PublicSuffix::Domain
9
+ end
10
+
11
+ # Tokenizes given input into labels.
12
+ def test_self_name_to_labels
13
+ assert_equal %w( someone spaces live com ),
14
+ PublicSuffix::Domain.name_to_labels("someone.spaces.live.com")
15
+ assert_equal %w( leontina23samiko wiki zoho com ),
16
+ PublicSuffix::Domain.name_to_labels("leontina23samiko.wiki.zoho.com")
17
+ end
18
+
19
+ # Converts input into String.
20
+ def test_self_name_to_labels_converts_input_to_string
21
+ assert_equal %w( someone spaces live com ),
22
+ PublicSuffix::Domain.name_to_labels(:"someone.spaces.live.com")
23
+ end
24
+
25
+
26
+ def test_initialize_with_tld
27
+ domain = @klass.new("com")
28
+ assert_equal "com", domain.tld
29
+ assert_nil domain.sld
30
+ assert_nil domain.trd
31
+ end
32
+
33
+ def test_initialize_with_tld_and_sld
34
+ domain = @klass.new("com", "google")
35
+ assert_equal "com", domain.tld
36
+ assert_equal "google", domain.sld
37
+ assert_nil domain.trd
38
+ end
39
+
40
+ def test_initialize_with_tld_and_sld_and_trd
41
+ domain = @klass.new("com", "google", "www")
42
+ assert_equal "com", domain.tld
43
+ assert_equal "google", domain.sld
44
+ assert_equal "www", domain.trd
45
+ end
46
+
47
+
48
+ def test_to_s
49
+ assert_equal "com", @klass.new("com").to_s
50
+ assert_equal "google.com", @klass.new("com", "google").to_s
51
+ assert_equal "www.google.com", @klass.new("com", "google", "www").to_s
52
+ end
53
+
54
+ def test_to_a
55
+ assert_equal [nil, nil, "com"], @klass.new("com").to_a
56
+ assert_equal [nil, "google", "com"], @klass.new("com", "google").to_a
57
+ assert_equal ["www", "google", "com"], @klass.new("com", "google", "www").to_a
58
+ end
59
+
60
+
61
+ def test_tld
62
+ assert_equal "com", @klass.new("com", "google", "www").tld
63
+ end
64
+
65
+ def test_sld
66
+ assert_equal "google", @klass.new("com", "google", "www").sld
67
+ end
68
+
69
+ def test_trd
70
+ assert_equal "www", @klass.new("com", "google", "www").trd
71
+ end
72
+
73
+
74
+ def test_name
75
+ assert_equal "com", @klass.new("com").name
76
+ assert_equal "google.com", @klass.new("com", "google").name
77
+ assert_equal "www.google.com", @klass.new("com", "google", "www").name
78
+ end
79
+
80
+ def test_domain
81
+ assert_nil @klass.new("com").domain
82
+ assert_nil @klass.new("tldnotlisted").domain
83
+ assert_equal "google.com", @klass.new("com", "google").domain
84
+ assert_equal "google.tldnotlisted", @klass.new("tldnotlisted", "google").domain
85
+ assert_equal "google.com", @klass.new("com", "google", "www").domain
86
+ assert_equal "google.tldnotlisted", @klass.new("tldnotlisted", "google", "www").domain
87
+ end
88
+
89
+ def test_subdomain
90
+ assert_nil @klass.new("com").subdomain
91
+ assert_nil @klass.new("tldnotlisted").subdomain
92
+ assert_nil @klass.new("com", "google").subdomain
93
+ assert_nil @klass.new("tldnotlisted", "google").subdomain
94
+ assert_equal "www.google.com", @klass.new("com", "google", "www").subdomain
95
+ assert_equal "www.google.tldnotlisted", @klass.new("tldnotlisted", "google", "www").subdomain
96
+ end
97
+
98
+
99
+ def test_domain_question
100
+ assert !@klass.new("com").domain?
101
+ assert @klass.new("com", "example").domain?
102
+ assert @klass.new("com", "example", "www").domain?
103
+ assert @klass.new("tldnotlisted", "example").domain?
104
+ end
105
+
106
+ end