gman 7.0.0 → 7.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +5 -5
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  5. data/.github/config.yml +23 -0
  6. data/.github/funding.yml +1 -0
  7. data/.github/no-response.yml +15 -0
  8. data/.github/release-drafter.yml +4 -0
  9. data/.github/settings.yml +33 -0
  10. data/.github/stale.yml +29 -0
  11. data/.gitignore +1 -0
  12. data/.rspec +2 -0
  13. data/.rubocop.yml +14 -5
  14. data/.rubocop_todo.yml +84 -0
  15. data/.ruby-version +1 -1
  16. data/Gemfile +2 -0
  17. data/bin/gman +6 -4
  18. data/bin/gman_filter +5 -7
  19. data/config/domains.txt +8454 -168
  20. data/config/vendor/academic.txt +6 -7
  21. data/config/vendor/dotgovs.csv +5786 -5560
  22. data/docs/CODE_OF_CONDUCT.md +46 -0
  23. data/docs/CONTRIBUTING.md +92 -0
  24. data/{README.md → docs/README.md} +3 -3
  25. data/docs/SECURITY.md +3 -0
  26. data/docs/_config.yml +2 -0
  27. data/gman.gemspec +18 -17
  28. data/lib/gman.rb +4 -2
  29. data/lib/gman/country_codes.rb +17 -17
  30. data/lib/gman/domain_list.rb +25 -9
  31. data/lib/gman/identifier.rb +57 -19
  32. data/lib/gman/importer.rb +31 -21
  33. data/lib/gman/locality.rb +8 -6
  34. data/lib/gman/version.rb +3 -1
  35. data/script/add +2 -0
  36. data/script/alphabetize +2 -0
  37. data/script/cibuild +1 -1
  38. data/script/dedupe +2 -1
  39. data/script/profile +2 -1
  40. data/script/prune +5 -3
  41. data/script/reconcile-us +6 -3
  42. data/script/vendor-federal-de +2 -1
  43. data/script/vendor-municipal-de +2 -1
  44. data/script/vendor-nl +2 -0
  45. data/script/vendor-public-suffix +6 -4
  46. data/script/vendor-se +2 -1
  47. data/script/vendor-swot +3 -1
  48. data/script/vendor-us +5 -3
  49. data/spec/fixtures/domains.txt +4 -0
  50. data/{test → spec}/fixtures/obama.txt +0 -0
  51. data/spec/gman/bin_spec.rb +101 -0
  52. data/spec/gman/country_code_spec.rb +39 -0
  53. data/spec/gman/domain_list_spec.rb +110 -0
  54. data/spec/gman/domains_spec.rb +25 -0
  55. data/spec/gman/identifier_spec.rb +218 -0
  56. data/spec/gman/importer_spec.rb +236 -0
  57. data/spec/gman/locality_spec.rb +24 -0
  58. data/spec/gman_spec.rb +74 -0
  59. data/spec/spec_helper.rb +31 -0
  60. metadata +89 -81
  61. data/.rake_tasks +0 -0
  62. data/CONTRIBUTING.md +0 -22
  63. data/Rakefile +0 -22
  64. data/test/fixtures/domains.txt +0 -2
  65. data/test/helper.rb +0 -48
  66. data/test/test_gman.rb +0 -56
  67. data/test/test_gman_bin.rb +0 -75
  68. data/test/test_gman_country_codes.rb +0 -18
  69. data/test/test_gman_domain_list.rb +0 -112
  70. data/test/test_gman_domains.rb +0 -32
  71. data/test/test_gman_filter.rb +0 -17
  72. data/test/test_gman_identifier.rb +0 -106
  73. data/test/test_gman_importer.rb +0 -244
  74. data/test/test_gman_locality.rb +0 -10
File without changes
@@ -1,22 +0,0 @@
1
- # Contributing to Gman
2
-
3
- ## How to contribute
4
-
5
- 1. Fork the project
6
- 2. Create a descriptive branch
7
- 3. Make your change
8
- 4. Submit a pull request
9
-
10
- ## Code
11
-
12
- Open an issue, or submit a pull request
13
-
14
- ## Domains
15
-
16
- Domains live in `./config/domains.txt` as a list of TLDs and SLD+TLDs.
17
-
18
- Right now, the only valid government top level domains (TLDs), represent the US government and are `.gov`, and `.mil`.
19
-
20
- Secondary domains (e.g., `gov.uk`, or `mil.au`) detect non-US government entities.
21
-
22
- To add or remove a domain from the list of known government domains, simply edit the `domains.txt` file.
data/Rakefile DELETED
@@ -1,22 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts 'Run `bundle install` to install missing gems'
8
- exit e.status_code
9
- end
10
- require 'rake'
11
-
12
- require 'rake/testtask'
13
- Rake::TestTask.new(:test) do |test|
14
- test.libs << 'lib' << 'test'
15
- test.pattern = 'test/**/test_gman*.rb'
16
- test.verbose = true
17
- end
18
-
19
- desc 'Open console with gman loaded'
20
- task :console do
21
- exec 'irb -r ./lib/gman.rb'
22
- end
@@ -1,2 +0,0 @@
1
- // test
2
- gov
@@ -1,48 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- require 'minitest/autorun'
4
- require 'parallel'
5
- require 'open3'
6
-
7
- begin
8
- Bundler.setup(:default, :development)
9
- rescue Bundler::BundlerError => e
10
- $stderr.puts e.message
11
- $stderr.puts 'Run `bundle install` to install missing gems'
12
- exit e.status_code
13
- end
14
-
15
- require 'shoulda'
16
-
17
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
18
- $LOAD_PATH.unshift(File.dirname(__FILE__))
19
- require_relative '../lib/gman'
20
- require_relative '../lib/gman/domain_list'
21
- require_relative '../lib/gman/importer'
22
-
23
- def bin_path(cmd = 'gman')
24
- File.expand_path "../bin/#{cmd}", File.dirname(__FILE__)
25
- end
26
-
27
- def test_bin(*args)
28
- Open3.capture2e('bundle', 'exec', bin_path, *args)
29
- end
30
-
31
- def fixture_path(fixture)
32
- File.expand_path "./fixtures/#{fixture}", File.dirname(__FILE__)
33
- end
34
-
35
- def with_env(key, value)
36
- old_env = ENV[key]
37
- ENV[key] = value
38
- yield
39
- ENV[key] = old_env
40
- end
41
-
42
- def stubbed_list_path
43
- File.expand_path './fixtures/domains.txt', File.dirname(__FILE__)
44
- end
45
-
46
- def stubbed_list
47
- Gman::DomainList.new(path: stubbed_list_path)
48
- end
@@ -1,56 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- VALID = ['foo.gov',
4
- 'http://foo.mil',
5
- 'foo@bar.gc.ca',
6
- 'foo.gov.au',
7
- 'https://www.foo.gouv.fr',
8
- 'foo@ci.champaign.il.us',
9
- 'foo.bar.baz.gov.au',
10
- 'foo@bar.gov.uk',
11
- 'foo.gov',
12
- 'foo.fed.us',
13
- 'foo.state.il.us',
14
- 'state.il.us',
15
- 'foo@af.mil',
16
- 'foo.gov.in'
17
- ].freeze
18
-
19
- INVALID = ['foo.bar.com',
20
- 'bar@foo.biz',
21
- 'http://www.foo.biz',
22
- 'foo.uk',
23
- 'gov',
24
- 'foo@k12.champaign.il.us',
25
- 'foo@kii.gov.by',
26
- 'foo',
27
- '',
28
- nil,
29
- ' ',
30
- 'foo.city.il.us',
31
- 'foo.ci.il.us',
32
- 'foo.zx.us',
33
- 'foo@mail.gov.ua'
34
- ].freeze
35
-
36
- class TestGman < Minitest::Test
37
- VALID.each do |domain|
38
- should "recognize #{domain} as a government domain" do
39
- assert Gman.valid?(domain)
40
- end
41
- end
42
-
43
- INVALID.each do |domain|
44
- should "recognize #{domain} as a non-government domain" do
45
- refute Gman.valid?(domain)
46
- end
47
- end
48
-
49
- should 'not allow educational domains' do
50
- assert_equal false, Gman.valid?('foo@gwu.edu')
51
- end
52
-
53
- should 'returns the path to domains.txt' do
54
- assert_equal true, File.exist?(Gman.list_path)
55
- end
56
- end
@@ -1,75 +0,0 @@
1
- require_relative 'helper'
2
-
3
- class TestGmanBin < Minitest::Test
4
- def setup
5
- @output, @status = test_bin('whitehouse.gov')
6
- end
7
-
8
- should 'parse the domain' do
9
- output, = test_bin('bar.gov')
10
- assert_match(/Domain : bar.gov/, output)
11
-
12
- output, = test_bin('foo@bar.gov')
13
- assert_match(/Domain : bar.gov/, output)
14
-
15
- output, = test_bin('http://bar.gov/foo')
16
- assert_match(/Domain : bar.gov/, output)
17
- end
18
-
19
- should 'err on invalid domains' do
20
- output, status = test_bin('foo.invalid')
21
- assert_equal 1, status.exitstatus
22
- assert_match(/Invalid domain/, output)
23
- end
24
-
25
- should 'err on non-government domains' do
26
- output, status = test_bin('github.com')
27
- assert_equal 1, status.exitstatus
28
- assert_match(/Not a government domain/, output)
29
- end
30
-
31
- should 'know the type' do
32
- assert_match(/federal/, @output)
33
- assert_equal 0, @status.exitstatus
34
- end
35
-
36
- should 'know the agency' do
37
- assert_match(/Executive Office of the President/, @output)
38
- assert_equal 0, @status.exitstatus
39
- end
40
-
41
- should 'know the country' do
42
- assert_match(/United States/, @output)
43
- assert_equal 0, @status.exitstatus
44
- end
45
-
46
- should 'know the city' do
47
- assert_match(/Washington/, @output)
48
- assert_equal 0, @status.exitstatus
49
- end
50
-
51
- should 'know the state' do
52
- assert_match(/DC/, @output)
53
- assert_equal 0, @status.exitstatus
54
- end
55
-
56
- should 'allow you to disable colorization' do
57
- output, = test_bin('whitehouse.gov', '--no-color')
58
- refute_match(/\e\[32m/, output)
59
- end
60
-
61
- should 'color by default' do
62
- assert_match(/\e\[32m/, @output)
63
- end
64
-
65
- should 'show help text' do
66
- output, = test_bin
67
- assert_match(/Usage/i, output)
68
-
69
- output, = test_bin('')
70
- assert_match(/Usage/i, output)
71
-
72
- output, = test_bin('--no-color')
73
- assert_match(/Usage/i, output)
74
- end
75
- end
@@ -1,18 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanCountryCodes < Minitest::Test
4
- should "determine a domain's country" do
5
- name = Gman.new('whitehouse.gov').country.name
6
- assert_equal 'United States of America', name
7
-
8
- name = Gman.new('foo.gov.uk').country.name
9
- assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
10
-
11
- assert_equal 'United States of America', Gman.new('army.mil').country.name
12
- assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
13
- end
14
-
15
- should 'not err out on an unknown country code' do
16
- assert_equal nil, Gman.new('foo.eu').country
17
- end
18
- end
@@ -1,112 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanDomainList < Minitest::Test
4
- INIT_TYPES = [:path, :contents, :data].freeze
5
-
6
- def setup
7
- @original_domain_list = File.read(stubbed_list_path)
8
- end
9
-
10
- def teardown
11
- File.write stubbed_list_path, @original_domain_list
12
- end
13
-
14
- def domain_list(type)
15
- case type
16
- when :path
17
- Gman::DomainList.new(path: Gman.list_path)
18
- when :contents
19
- contents = File.read(Gman.list_path)
20
- Gman::DomainList.new(contents: contents)
21
- when :data
22
- data = Gman::DomainList.new(path: Gman.list_path).to_h
23
- Gman::DomainList.new(data: data)
24
- end
25
- end
26
-
27
- INIT_TYPES.each do |type|
28
- context "when initalized with #{type}" do
29
- should 'store the init vars' do
30
- refute domain_list(type).public_send(type).nil?
31
- end
32
-
33
- should 'return the domain data' do
34
- list = domain_list(type)
35
- assert list.data.key? 'Canada federal'
36
- assert list.data.any? { |_key, values| values.include? 'gov' }
37
- end
38
-
39
- should 'return the list contents' do
40
- list = domain_list(type)
41
- assert_match(/^gov$/, list.contents)
42
- end
43
-
44
- should 'return the list path' do
45
- list = domain_list(type)
46
- assert_equal list.path, Gman.list_path
47
- end
48
-
49
- should 'return the public suffix parsed list' do
50
- list = domain_list(type)
51
- assert list.public_suffix_list.class == PublicSuffix::List
52
- end
53
-
54
- should 'know if a domain is valid' do
55
- list = domain_list(type)
56
- assert list.valid? 'whitehouse.gov'
57
- end
58
-
59
- should 'know if a domain is invalid' do
60
- list = domain_list(type)
61
- refute list.valid? 'example.com'
62
- end
63
-
64
- should 'return the domain groups' do
65
- list = domain_list(type)
66
- assert list.groups.include?('Canada federal')
67
- end
68
-
69
- should 'return the domains' do
70
- list = domain_list(type)
71
- assert list.domains.include?('gov')
72
- end
73
-
74
- should 'return the domain count' do
75
- list = domain_list(type)
76
- assert list.count.is_a?(Integer)
77
- assert list.count > 100
78
- end
79
-
80
- should 'alphabetize the list' do
81
- list = domain_list(type)
82
- list.data['Canada municipal'].shuffle!
83
- assert list.data['Canada municipal'].first != '100milehouse.com'
84
- list.alphabetize
85
- assert list.data['Canada municipal'].first == '100milehouse.com'
86
- end
87
-
88
- should 'write the list' do
89
- list = domain_list(type)
90
- list.instance_variable_set('@path', stubbed_list_path)
91
- list.data = { 'foo' => ['bar.gov', 'baz.net'] }
92
- list.write
93
- contents = File.read(stubbed_list_path)
94
- assert_match %r{^// foo$}, contents
95
- expected = "// foo\nbar.gov\nbaz.net"
96
- assert contents.include?(expected)
97
- end
98
-
99
- should 'output the list in public_suffix format' do
100
- list = domain_list(type)
101
- string = list.to_s
102
- assert_match %r{^// Canada federal$}, string
103
- assert string.include? "// Canada federal\ncanada\.ca\n"
104
- end
105
-
106
- should "find a domain's parent" do
107
- list = domain_list(type)
108
- assert_equal 'gov.uk', list.parent_domain('foo.gov.uk')
109
- end
110
- end
111
- end
112
- end
@@ -1,32 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanDomains < Minitest::Test
4
- WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
5
-
6
- def resolve_domains?
7
- ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
8
- end
9
-
10
- should 'only contains valid domains' do
11
- importer = Gman::Importer.new({})
12
- if resolve_domains?
13
- importer.logger.info <<-MSG
14
- Validating that all domains resolve. This may take a while...
15
- MSG
16
- else
17
- importer.logger.info 'Skipping domain resolution.' \
18
- 'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
19
- 'to validate that domains resolve.'
20
- end
21
-
22
- invalid = []
23
- options = { skip_dupe: true, skip_resolve: !resolve_domains? }
24
- Gman.list.to_h.each do |group, domains|
25
- next if WHITELIST.include?(group)
26
- Parallel.each(domains, in_threads: 4) do |domain|
27
- invalid.push(domain) unless importer.valid_domain?(domain, options)
28
- end
29
- end
30
- assert_equal [], invalid.flatten.reject(&:empty?)
31
- end
32
- end
@@ -1,17 +0,0 @@
1
- HERE = File.dirname(__FILE__)
2
- require File.join(HERE, 'helper')
3
-
4
- class TestGmanFilter < Minitest::Test
5
- txt_path = fixture_path 'obama.txt'
6
- exec_path = bin_path 'gman_filter'
7
-
8
- should 'remove non-gov/mil addresses' do
9
- output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
10
- expected = %w(
11
- mr.senator@obama.senate.gov
12
- president@whitehouse.gov
13
- commander.in.chief@us.army.mil
14
- ).join("\n") + "\n"
15
- assert_equal output, expected
16
- end
17
- end
@@ -1,106 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanIdentifier < Minitest::Test
4
- should 'Parse the dotgov list' do
5
- assert Gman.dotgov_list
6
- assert_equal CSV::Table, Gman.dotgov_list.class
7
- assert_equal CSV::Row, Gman.dotgov_list.first.class
8
- assert Gman.dotgov_list.first['Domain Name']
9
- end
10
-
11
- context 'locality domains' do
12
- should 'detect state domains' do
13
- domain = Gman.new('state.ak.us')
14
- assert domain.state?
15
-
16
- refute domain.dotgov?
17
- refute domain.city?
18
- refute domain.federal?
19
- refute domain.county?
20
-
21
- assert_equal :state, domain.type
22
- assert_equal 'AK', domain.state
23
- end
24
-
25
- should 'detect city domains' do
26
- domain = Gman.new('ci.champaign.il.us')
27
- assert domain.city?
28
-
29
- refute domain.dotgov?
30
- refute domain.state?
31
- refute domain.federal?
32
- refute domain.county?
33
-
34
- assert_equal :city, domain.type
35
- assert_equal 'IL', domain.state
36
- end
37
- end
38
-
39
- context 'dotgovs' do
40
- should 'detect federal dotgovs' do
41
- domain = Gman.new 'whitehouse.gov'
42
- assert domain.federal?
43
- assert domain.dotgov?
44
-
45
- refute domain.city?
46
- refute domain.state?
47
- refute domain.county?
48
-
49
- assert_equal :federal, domain.type
50
- assert_equal 'DC', domain.state
51
- assert_equal 'Washington', domain.city
52
- assert_equal 'Executive Office of the President', domain.agency
53
- end
54
-
55
- should 'detect state dotgovs' do
56
- domain = Gman.new 'illinois.gov'
57
- assert domain.state?
58
- assert domain.dotgov?
59
-
60
- refute domain.city?
61
- refute domain.federal?
62
- refute domain.county?
63
-
64
- assert_equal :state, domain.type
65
- assert_equal 'IL', domain.state
66
- assert_equal 'Springfield', domain.city
67
- end
68
-
69
- should 'detect county dotgovs' do
70
- domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
71
- assert domain.county?
72
- assert domain.dotgov?
73
-
74
- refute domain.city?
75
- refute domain.federal?
76
- refute domain.state?
77
-
78
- assert_equal :county, domain.type
79
- assert_equal 'PA', domain.state
80
- assert_equal 'Pittsburgh', domain.city
81
- end
82
-
83
- should 'detect the list category' do
84
- category = Gman.new('whitehouse.gov').send('list_category')
85
- assert_equal 'US Federal', category
86
- end
87
- end
88
-
89
- context 'non-dotgov domains' do
90
- should "determine a domain's group" do
91
- assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
92
- assert_equal :unknown, Gman.new('cityofperu.org').type
93
-
94
- assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
95
- assert_equal :"Canada municipal", Gman.new('acme.ca').type
96
-
97
- assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
98
- assert_equal :"Canada federal", Gman.new('canada.ca').type
99
- end
100
-
101
- should 'detect the state' do
102
- assert_equal 'OR', Gman.new('ashland.or.us').state
103
- refute Gman.new('canada.ca').state
104
- end
105
- end
106
- end