gman 7.0.0 → 7.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +5 -5
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  5. data/.github/config.yml +23 -0
  6. data/.github/funding.yml +1 -0
  7. data/.github/no-response.yml +15 -0
  8. data/.github/release-drafter.yml +4 -0
  9. data/.github/settings.yml +33 -0
  10. data/.github/stale.yml +29 -0
  11. data/.gitignore +1 -0
  12. data/.rspec +2 -0
  13. data/.rubocop.yml +14 -5
  14. data/.rubocop_todo.yml +84 -0
  15. data/.ruby-version +1 -1
  16. data/Gemfile +2 -0
  17. data/bin/gman +6 -4
  18. data/bin/gman_filter +5 -7
  19. data/config/domains.txt +8454 -168
  20. data/config/vendor/academic.txt +6 -7
  21. data/config/vendor/dotgovs.csv +5786 -5560
  22. data/docs/CODE_OF_CONDUCT.md +46 -0
  23. data/docs/CONTRIBUTING.md +92 -0
  24. data/{README.md → docs/README.md} +3 -3
  25. data/docs/SECURITY.md +3 -0
  26. data/docs/_config.yml +2 -0
  27. data/gman.gemspec +18 -17
  28. data/lib/gman.rb +4 -2
  29. data/lib/gman/country_codes.rb +17 -17
  30. data/lib/gman/domain_list.rb +25 -9
  31. data/lib/gman/identifier.rb +57 -19
  32. data/lib/gman/importer.rb +31 -21
  33. data/lib/gman/locality.rb +8 -6
  34. data/lib/gman/version.rb +3 -1
  35. data/script/add +2 -0
  36. data/script/alphabetize +2 -0
  37. data/script/cibuild +1 -1
  38. data/script/dedupe +2 -1
  39. data/script/profile +2 -1
  40. data/script/prune +5 -3
  41. data/script/reconcile-us +6 -3
  42. data/script/vendor-federal-de +2 -1
  43. data/script/vendor-municipal-de +2 -1
  44. data/script/vendor-nl +2 -0
  45. data/script/vendor-public-suffix +6 -4
  46. data/script/vendor-se +2 -1
  47. data/script/vendor-swot +3 -1
  48. data/script/vendor-us +5 -3
  49. data/spec/fixtures/domains.txt +4 -0
  50. data/{test → spec}/fixtures/obama.txt +0 -0
  51. data/spec/gman/bin_spec.rb +101 -0
  52. data/spec/gman/country_code_spec.rb +39 -0
  53. data/spec/gman/domain_list_spec.rb +110 -0
  54. data/spec/gman/domains_spec.rb +25 -0
  55. data/spec/gman/identifier_spec.rb +218 -0
  56. data/spec/gman/importer_spec.rb +236 -0
  57. data/spec/gman/locality_spec.rb +24 -0
  58. data/spec/gman_spec.rb +74 -0
  59. data/spec/spec_helper.rb +31 -0
  60. metadata +89 -81
  61. data/.rake_tasks +0 -0
  62. data/CONTRIBUTING.md +0 -22
  63. data/Rakefile +0 -22
  64. data/test/fixtures/domains.txt +0 -2
  65. data/test/helper.rb +0 -48
  66. data/test/test_gman.rb +0 -56
  67. data/test/test_gman_bin.rb +0 -75
  68. data/test/test_gman_country_codes.rb +0 -18
  69. data/test/test_gman_domain_list.rb +0 -112
  70. data/test/test_gman_domains.rb +0 -32
  71. data/test/test_gman_filter.rb +0 -17
  72. data/test/test_gman_identifier.rb +0 -106
  73. data/test/test_gman_importer.rb +0 -244
  74. data/test/test_gman_locality.rb +0 -10
File without changes
@@ -1,22 +0,0 @@
1
- # Contributing to Gman
2
-
3
- ## How to contribute
4
-
5
- 1. Fork the project
6
- 2. Create a descriptive branch
7
- 3. Make your change
8
- 4. Submit a pull request
9
-
10
- ## Code
11
-
12
- Open an issue, or submit a pull request
13
-
14
- ## Domains
15
-
16
- Domains live in `./config/domains.txt` as a list of TLDs and SLD+TLDs.
17
-
18
- Right now, the only valid government top level domains (TLDs), represent the US government and are `.gov`, and `.mil`.
19
-
20
- Secondary domains (e.g., `gov.uk`, or `mil.au`) detect non-US government entities.
21
-
22
- To add or remove a domain from the list of known government domains, simply edit the `domains.txt` file.
data/Rakefile DELETED
@@ -1,22 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts 'Run `bundle install` to install missing gems'
8
- exit e.status_code
9
- end
10
- require 'rake'
11
-
12
- require 'rake/testtask'
13
- Rake::TestTask.new(:test) do |test|
14
- test.libs << 'lib' << 'test'
15
- test.pattern = 'test/**/test_gman*.rb'
16
- test.verbose = true
17
- end
18
-
19
- desc 'Open console with gman loaded'
20
- task :console do
21
- exec 'irb -r ./lib/gman.rb'
22
- end
@@ -1,2 +0,0 @@
1
- // test
2
- gov
@@ -1,48 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- require 'minitest/autorun'
4
- require 'parallel'
5
- require 'open3'
6
-
7
- begin
8
- Bundler.setup(:default, :development)
9
- rescue Bundler::BundlerError => e
10
- $stderr.puts e.message
11
- $stderr.puts 'Run `bundle install` to install missing gems'
12
- exit e.status_code
13
- end
14
-
15
- require 'shoulda'
16
-
17
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
18
- $LOAD_PATH.unshift(File.dirname(__FILE__))
19
- require_relative '../lib/gman'
20
- require_relative '../lib/gman/domain_list'
21
- require_relative '../lib/gman/importer'
22
-
23
- def bin_path(cmd = 'gman')
24
- File.expand_path "../bin/#{cmd}", File.dirname(__FILE__)
25
- end
26
-
27
- def test_bin(*args)
28
- Open3.capture2e('bundle', 'exec', bin_path, *args)
29
- end
30
-
31
- def fixture_path(fixture)
32
- File.expand_path "./fixtures/#{fixture}", File.dirname(__FILE__)
33
- end
34
-
35
- def with_env(key, value)
36
- old_env = ENV[key]
37
- ENV[key] = value
38
- yield
39
- ENV[key] = old_env
40
- end
41
-
42
- def stubbed_list_path
43
- File.expand_path './fixtures/domains.txt', File.dirname(__FILE__)
44
- end
45
-
46
- def stubbed_list
47
- Gman::DomainList.new(path: stubbed_list_path)
48
- end
@@ -1,56 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- VALID = ['foo.gov',
4
- 'http://foo.mil',
5
- 'foo@bar.gc.ca',
6
- 'foo.gov.au',
7
- 'https://www.foo.gouv.fr',
8
- 'foo@ci.champaign.il.us',
9
- 'foo.bar.baz.gov.au',
10
- 'foo@bar.gov.uk',
11
- 'foo.gov',
12
- 'foo.fed.us',
13
- 'foo.state.il.us',
14
- 'state.il.us',
15
- 'foo@af.mil',
16
- 'foo.gov.in'
17
- ].freeze
18
-
19
- INVALID = ['foo.bar.com',
20
- 'bar@foo.biz',
21
- 'http://www.foo.biz',
22
- 'foo.uk',
23
- 'gov',
24
- 'foo@k12.champaign.il.us',
25
- 'foo@kii.gov.by',
26
- 'foo',
27
- '',
28
- nil,
29
- ' ',
30
- 'foo.city.il.us',
31
- 'foo.ci.il.us',
32
- 'foo.zx.us',
33
- 'foo@mail.gov.ua'
34
- ].freeze
35
-
36
- class TestGman < Minitest::Test
37
- VALID.each do |domain|
38
- should "recognize #{domain} as a government domain" do
39
- assert Gman.valid?(domain)
40
- end
41
- end
42
-
43
- INVALID.each do |domain|
44
- should "recognize #{domain} as a non-government domain" do
45
- refute Gman.valid?(domain)
46
- end
47
- end
48
-
49
- should 'not allow educational domains' do
50
- assert_equal false, Gman.valid?('foo@gwu.edu')
51
- end
52
-
53
- should 'returns the path to domains.txt' do
54
- assert_equal true, File.exist?(Gman.list_path)
55
- end
56
- end
@@ -1,75 +0,0 @@
1
- require_relative 'helper'
2
-
3
- class TestGmanBin < Minitest::Test
4
- def setup
5
- @output, @status = test_bin('whitehouse.gov')
6
- end
7
-
8
- should 'parse the domain' do
9
- output, = test_bin('bar.gov')
10
- assert_match(/Domain : bar.gov/, output)
11
-
12
- output, = test_bin('foo@bar.gov')
13
- assert_match(/Domain : bar.gov/, output)
14
-
15
- output, = test_bin('http://bar.gov/foo')
16
- assert_match(/Domain : bar.gov/, output)
17
- end
18
-
19
- should 'err on invalid domains' do
20
- output, status = test_bin('foo.invalid')
21
- assert_equal 1, status.exitstatus
22
- assert_match(/Invalid domain/, output)
23
- end
24
-
25
- should 'err on non-government domains' do
26
- output, status = test_bin('github.com')
27
- assert_equal 1, status.exitstatus
28
- assert_match(/Not a government domain/, output)
29
- end
30
-
31
- should 'know the type' do
32
- assert_match(/federal/, @output)
33
- assert_equal 0, @status.exitstatus
34
- end
35
-
36
- should 'know the agency' do
37
- assert_match(/Executive Office of the President/, @output)
38
- assert_equal 0, @status.exitstatus
39
- end
40
-
41
- should 'know the country' do
42
- assert_match(/United States/, @output)
43
- assert_equal 0, @status.exitstatus
44
- end
45
-
46
- should 'know the city' do
47
- assert_match(/Washington/, @output)
48
- assert_equal 0, @status.exitstatus
49
- end
50
-
51
- should 'know the state' do
52
- assert_match(/DC/, @output)
53
- assert_equal 0, @status.exitstatus
54
- end
55
-
56
- should 'allow you to disable colorization' do
57
- output, = test_bin('whitehouse.gov', '--no-color')
58
- refute_match(/\e\[32m/, output)
59
- end
60
-
61
- should 'color by default' do
62
- assert_match(/\e\[32m/, @output)
63
- end
64
-
65
- should 'show help text' do
66
- output, = test_bin
67
- assert_match(/Usage/i, output)
68
-
69
- output, = test_bin('')
70
- assert_match(/Usage/i, output)
71
-
72
- output, = test_bin('--no-color')
73
- assert_match(/Usage/i, output)
74
- end
75
- end
@@ -1,18 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanCountryCodes < Minitest::Test
4
- should "determine a domain's country" do
5
- name = Gman.new('whitehouse.gov').country.name
6
- assert_equal 'United States of America', name
7
-
8
- name = Gman.new('foo.gov.uk').country.name
9
- assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
10
-
11
- assert_equal 'United States of America', Gman.new('army.mil').country.name
12
- assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
13
- end
14
-
15
- should 'not err out on an unknown country code' do
16
- assert_equal nil, Gman.new('foo.eu').country
17
- end
18
- end
@@ -1,112 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanDomainList < Minitest::Test
4
- INIT_TYPES = [:path, :contents, :data].freeze
5
-
6
- def setup
7
- @original_domain_list = File.read(stubbed_list_path)
8
- end
9
-
10
- def teardown
11
- File.write stubbed_list_path, @original_domain_list
12
- end
13
-
14
- def domain_list(type)
15
- case type
16
- when :path
17
- Gman::DomainList.new(path: Gman.list_path)
18
- when :contents
19
- contents = File.read(Gman.list_path)
20
- Gman::DomainList.new(contents: contents)
21
- when :data
22
- data = Gman::DomainList.new(path: Gman.list_path).to_h
23
- Gman::DomainList.new(data: data)
24
- end
25
- end
26
-
27
- INIT_TYPES.each do |type|
28
- context "when initalized with #{type}" do
29
- should 'store the init vars' do
30
- refute domain_list(type).public_send(type).nil?
31
- end
32
-
33
- should 'return the domain data' do
34
- list = domain_list(type)
35
- assert list.data.key? 'Canada federal'
36
- assert list.data.any? { |_key, values| values.include? 'gov' }
37
- end
38
-
39
- should 'return the list contents' do
40
- list = domain_list(type)
41
- assert_match(/^gov$/, list.contents)
42
- end
43
-
44
- should 'return the list path' do
45
- list = domain_list(type)
46
- assert_equal list.path, Gman.list_path
47
- end
48
-
49
- should 'return the public suffix parsed list' do
50
- list = domain_list(type)
51
- assert list.public_suffix_list.class == PublicSuffix::List
52
- end
53
-
54
- should 'know if a domain is valid' do
55
- list = domain_list(type)
56
- assert list.valid? 'whitehouse.gov'
57
- end
58
-
59
- should 'know if a domain is invalid' do
60
- list = domain_list(type)
61
- refute list.valid? 'example.com'
62
- end
63
-
64
- should 'return the domain groups' do
65
- list = domain_list(type)
66
- assert list.groups.include?('Canada federal')
67
- end
68
-
69
- should 'return the domains' do
70
- list = domain_list(type)
71
- assert list.domains.include?('gov')
72
- end
73
-
74
- should 'return the domain count' do
75
- list = domain_list(type)
76
- assert list.count.is_a?(Integer)
77
- assert list.count > 100
78
- end
79
-
80
- should 'alphabetize the list' do
81
- list = domain_list(type)
82
- list.data['Canada municipal'].shuffle!
83
- assert list.data['Canada municipal'].first != '100milehouse.com'
84
- list.alphabetize
85
- assert list.data['Canada municipal'].first == '100milehouse.com'
86
- end
87
-
88
- should 'write the list' do
89
- list = domain_list(type)
90
- list.instance_variable_set('@path', stubbed_list_path)
91
- list.data = { 'foo' => ['bar.gov', 'baz.net'] }
92
- list.write
93
- contents = File.read(stubbed_list_path)
94
- assert_match %r{^// foo$}, contents
95
- expected = "// foo\nbar.gov\nbaz.net"
96
- assert contents.include?(expected)
97
- end
98
-
99
- should 'output the list in public_suffix format' do
100
- list = domain_list(type)
101
- string = list.to_s
102
- assert_match %r{^// Canada federal$}, string
103
- assert string.include? "// Canada federal\ncanada\.ca\n"
104
- end
105
-
106
- should "find a domain's parent" do
107
- list = domain_list(type)
108
- assert_equal 'gov.uk', list.parent_domain('foo.gov.uk')
109
- end
110
- end
111
- end
112
- end
@@ -1,32 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanDomains < Minitest::Test
4
- WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
5
-
6
- def resolve_domains?
7
- ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
8
- end
9
-
10
- should 'only contains valid domains' do
11
- importer = Gman::Importer.new({})
12
- if resolve_domains?
13
- importer.logger.info <<-MSG
14
- Validating that all domains resolve. This may take a while...
15
- MSG
16
- else
17
- importer.logger.info 'Skipping domain resolution.' \
18
- 'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
19
- 'to validate that domains resolve.'
20
- end
21
-
22
- invalid = []
23
- options = { skip_dupe: true, skip_resolve: !resolve_domains? }
24
- Gman.list.to_h.each do |group, domains|
25
- next if WHITELIST.include?(group)
26
- Parallel.each(domains, in_threads: 4) do |domain|
27
- invalid.push(domain) unless importer.valid_domain?(domain, options)
28
- end
29
- end
30
- assert_equal [], invalid.flatten.reject(&:empty?)
31
- end
32
- end
@@ -1,17 +0,0 @@
1
- HERE = File.dirname(__FILE__)
2
- require File.join(HERE, 'helper')
3
-
4
- class TestGmanFilter < Minitest::Test
5
- txt_path = fixture_path 'obama.txt'
6
- exec_path = bin_path 'gman_filter'
7
-
8
- should 'remove non-gov/mil addresses' do
9
- output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
10
- expected = %w(
11
- mr.senator@obama.senate.gov
12
- president@whitehouse.gov
13
- commander.in.chief@us.army.mil
14
- ).join("\n") + "\n"
15
- assert_equal output, expected
16
- end
17
- end
@@ -1,106 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanIdentifier < Minitest::Test
4
- should 'Parse the dotgov list' do
5
- assert Gman.dotgov_list
6
- assert_equal CSV::Table, Gman.dotgov_list.class
7
- assert_equal CSV::Row, Gman.dotgov_list.first.class
8
- assert Gman.dotgov_list.first['Domain Name']
9
- end
10
-
11
- context 'locality domains' do
12
- should 'detect state domains' do
13
- domain = Gman.new('state.ak.us')
14
- assert domain.state?
15
-
16
- refute domain.dotgov?
17
- refute domain.city?
18
- refute domain.federal?
19
- refute domain.county?
20
-
21
- assert_equal :state, domain.type
22
- assert_equal 'AK', domain.state
23
- end
24
-
25
- should 'detect city domains' do
26
- domain = Gman.new('ci.champaign.il.us')
27
- assert domain.city?
28
-
29
- refute domain.dotgov?
30
- refute domain.state?
31
- refute domain.federal?
32
- refute domain.county?
33
-
34
- assert_equal :city, domain.type
35
- assert_equal 'IL', domain.state
36
- end
37
- end
38
-
39
- context 'dotgovs' do
40
- should 'detect federal dotgovs' do
41
- domain = Gman.new 'whitehouse.gov'
42
- assert domain.federal?
43
- assert domain.dotgov?
44
-
45
- refute domain.city?
46
- refute domain.state?
47
- refute domain.county?
48
-
49
- assert_equal :federal, domain.type
50
- assert_equal 'DC', domain.state
51
- assert_equal 'Washington', domain.city
52
- assert_equal 'Executive Office of the President', domain.agency
53
- end
54
-
55
- should 'detect state dotgovs' do
56
- domain = Gman.new 'illinois.gov'
57
- assert domain.state?
58
- assert domain.dotgov?
59
-
60
- refute domain.city?
61
- refute domain.federal?
62
- refute domain.county?
63
-
64
- assert_equal :state, domain.type
65
- assert_equal 'IL', domain.state
66
- assert_equal 'Springfield', domain.city
67
- end
68
-
69
- should 'detect county dotgovs' do
70
- domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
71
- assert domain.county?
72
- assert domain.dotgov?
73
-
74
- refute domain.city?
75
- refute domain.federal?
76
- refute domain.state?
77
-
78
- assert_equal :county, domain.type
79
- assert_equal 'PA', domain.state
80
- assert_equal 'Pittsburgh', domain.city
81
- end
82
-
83
- should 'detect the list category' do
84
- category = Gman.new('whitehouse.gov').send('list_category')
85
- assert_equal 'US Federal', category
86
- end
87
- end
88
-
89
- context 'non-dotgov domains' do
90
- should "determine a domain's group" do
91
- assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
92
- assert_equal :unknown, Gman.new('cityofperu.org').type
93
-
94
- assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
95
- assert_equal :"Canada municipal", Gman.new('acme.ca').type
96
-
97
- assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
98
- assert_equal :"Canada federal", Gman.new('canada.ca').type
99
- end
100
-
101
- should 'detect the state' do
102
- assert_equal 'OR', Gman.new('ashland.or.us').state
103
- refute Gman.new('canada.ca').state
104
- end
105
- end
106
- end