gman 7.0.1 → 7.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +3 -0
- data/.ruby-version +1 -1
- data/config/domains.txt +8259 -42
- data/config/vendor/academic.txt +6 -7
- data/config/vendor/dotgovs.csv +5634 -5560
- data/contributing.json +32 -0
- data/gman.gemspec +3 -5
- data/lib/gman.rb +1 -1
- data/lib/gman/domain_list.rb +18 -6
- data/lib/gman/identifier.rb +2 -2
- data/lib/gman/importer.rb +1 -1
- data/lib/gman/version.rb +1 -1
- data/script/cibuild +1 -1
- data/script/dedupe +1 -1
- data/script/vendor-swot +1 -1
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +99 -0
- data/spec/gman/country_code_spec.rb +36 -0
- data/spec/gman/domain_list_spec.rb +108 -0
- data/spec/gman/domains_spec.rb +22 -0
- data/spec/gman/identifier_spec.rb +182 -0
- data/spec/gman/importer_spec.rb +227 -0
- data/spec/gman/locality_spec.rb +22 -0
- data/spec/gman_spec.rb +72 -0
- data/spec/spec_helper.rb +29 -0
- metadata +52 -83
- data/.rake_tasks +0 -0
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -48
- data/test/test_gman.rb +0 -56
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domain_list.rb +0 -112
- data/test/test_gman_domains.rb +0 -32
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -244
- data/test/test_gman_locality.rb +0 -10
data/contributing.json
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
{
|
2
|
+
"commit": {
|
3
|
+
"subject_cannot_be_empty": true,
|
4
|
+
"subject_must_be_longer_than": 4,
|
5
|
+
"subject_must_be_shorter_than": 101,
|
6
|
+
"subject_lines_must_be_shorter_than": 51,
|
7
|
+
"subject_must_be_single_line": true,
|
8
|
+
"subject_must_be_in_tense": "imperative",
|
9
|
+
"subject_must_start_with_case": "lower",
|
10
|
+
"subject_must_not_end_with_dot": true,
|
11
|
+
"body_lines_must_be_shorter_than": 73
|
12
|
+
},
|
13
|
+
"pull_request": {
|
14
|
+
"subject_cannot_be_empty": true,
|
15
|
+
"subject_must_be_longer_than": 4,
|
16
|
+
"subject_must_be_shorter_than": 101,
|
17
|
+
"subject_must_be_in_tense": "imperative",
|
18
|
+
"subject_must_start_with_case": "upper",
|
19
|
+
"subject_must_not_end_with_dot": true,
|
20
|
+
"body_cannot_be_empty": true
|
21
|
+
},
|
22
|
+
"issue": {
|
23
|
+
"subject_cannot_be_empty": true,
|
24
|
+
"subject_must_be_longer_than": 4,
|
25
|
+
"subject_must_be_shorter_than": 101,
|
26
|
+
"subject_must_be_in_tense": "imperative",
|
27
|
+
"subject_must_start_with_case": "upper",
|
28
|
+
"subject_must_not_end_with_dot": true,
|
29
|
+
"body_cannot_be_empty": true,
|
30
|
+
"body_must_include_reproduction_steps": true
|
31
|
+
}
|
32
|
+
}
|
data/gman.gemspec
CHANGED
@@ -26,18 +26,16 @@ Gem::Specification.new do |s|
|
|
26
26
|
s.required_ruby_version = '~> 2.0'
|
27
27
|
|
28
28
|
s.add_dependency('iso_country_codes', '~> 0.6')
|
29
|
-
s.add_dependency('naughty_or_nice', '
|
29
|
+
s.add_dependency('naughty_or_nice', '= 2.1')
|
30
30
|
s.add_dependency('colored', '~> 1.2')
|
31
|
+
s.add_dependency('public_suffix', '~> 2.0')
|
31
32
|
|
32
33
|
s.add_development_dependency('swot', '~> 1.0')
|
33
|
-
s.add_development_dependency('rake', '~> 10.4')
|
34
|
-
s.add_development_dependency('shoulda', '~> 3.5')
|
35
|
-
s.add_development_dependency('rdoc', '~> 4.2')
|
36
|
-
s.add_development_dependency('bundler', '~> 1.10')
|
37
34
|
s.add_development_dependency('pry', '~> 0.10')
|
38
35
|
s.add_development_dependency('parallel', '~> 1.6')
|
39
36
|
s.add_development_dependency('mechanize', '~> 2.7')
|
40
37
|
s.add_development_dependency('addressable', '~> 2.3')
|
41
38
|
s.add_development_dependency('ruby-prof', '~> 0.15')
|
42
39
|
s.add_development_dependency('rubocop', '~> 0.37')
|
40
|
+
s.add_development_dependency('rspec', '~> 3.5')
|
43
41
|
end
|
data/lib/gman.rb
CHANGED
data/lib/gman/domain_list.rb
CHANGED
@@ -58,12 +58,10 @@ class Gman
|
|
58
58
|
@public_suffix_list ||= PublicSuffix::List.parse(contents)
|
59
59
|
end
|
60
60
|
|
61
|
-
# domain is on the domain list
|
62
|
-
# domain is not explicitly blacklisted and
|
63
|
-
# domain matches a standard public suffix list rule
|
61
|
+
# domain is on the domain list
|
64
62
|
def valid?(domain)
|
65
|
-
rule = public_suffix_list.find(domain)
|
66
|
-
!rule.nil?
|
63
|
+
rule = public_suffix_list.find(domain, default: nil)
|
64
|
+
!(rule.nil? || rule.is_a?(PublicSuffix::Rule::Exception))
|
67
65
|
end
|
68
66
|
|
69
67
|
# Returns an array of strings representing the list groups
|
@@ -82,9 +80,13 @@ class Gman
|
|
82
80
|
end
|
83
81
|
|
84
82
|
# Alphabetize groups and domains within each group
|
83
|
+
# We need to ensure exceptions appear after their coresponding rules
|
85
84
|
def alphabetize
|
86
85
|
@data = data.sort_by { |k, _v| k.downcase }.to_h
|
87
|
-
@data.
|
86
|
+
@data.map do |_group, domains|
|
87
|
+
domains.sort! { |a, b| sort_with_exceptions(a, b) }
|
88
|
+
domains.uniq!
|
89
|
+
end
|
88
90
|
end
|
89
91
|
|
90
92
|
# Write the domain list to disk
|
@@ -149,5 +151,15 @@ class Gman
|
|
149
151
|
hash[key] ||= []
|
150
152
|
hash[key].push value
|
151
153
|
end
|
154
|
+
|
155
|
+
def sort_with_exceptions(a, b)
|
156
|
+
if a.start_with?('!') && !b.start_with?('!')
|
157
|
+
1
|
158
|
+
elsif b.start_with?('!') && !a.start_with?('!')
|
159
|
+
-1
|
160
|
+
else
|
161
|
+
a <=> b
|
162
|
+
end
|
163
|
+
end
|
152
164
|
end
|
153
165
|
end
|
data/lib/gman/identifier.rb
CHANGED
@@ -76,7 +76,7 @@ class Gman
|
|
76
76
|
@list_category ||= begin
|
77
77
|
match = Gman.list.public_suffix_list.find(domain.to_s)
|
78
78
|
return unless match
|
79
|
-
regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.
|
79
|
+
regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.value)}\n}im
|
80
80
|
matches = Gman.list.contents.match(regex)
|
81
81
|
matches[1] if matches
|
82
82
|
end
|
@@ -91,7 +91,7 @@ class Gman
|
|
91
91
|
return @dotgov_listing if defined? @dotgov_listing
|
92
92
|
return unless dotgov?
|
93
93
|
@dotgov_listing = Gman.dotgov_list.find do |listing|
|
94
|
-
listing['Domain Name'].casecmp("#{domain.sld}.gov")
|
94
|
+
listing['Domain Name'].casecmp("#{domain.sld}.gov").zero?
|
95
95
|
end
|
96
96
|
end
|
97
97
|
|
data/lib/gman/importer.rb
CHANGED
@@ -125,7 +125,7 @@ class Gman
|
|
125
125
|
return false if domain.empty?
|
126
126
|
if BLACKLIST.include?(domain)
|
127
127
|
reject(domain, 'blacklist')
|
128
|
-
elsif !PublicSuffix.valid?(".#{domain}")
|
128
|
+
elsif !PublicSuffix.valid?("foo.#{domain}")
|
129
129
|
reject(domain, 'invalid')
|
130
130
|
elsif Swot.is_academic?(domain)
|
131
131
|
reject(domain, 'academic')
|
data/lib/gman/version.rb
CHANGED
data/script/cibuild
CHANGED
data/script/dedupe
CHANGED
@@ -12,7 +12,7 @@ puts "Current list contains #{current.count} domains..."
|
|
12
12
|
|
13
13
|
dupe = current.count - current.domains.uniq.count
|
14
14
|
puts "Found #{dupe} duplicate domains"
|
15
|
-
exit 0 if dupe
|
15
|
+
exit 0 if dupe.zero?
|
16
16
|
|
17
17
|
dupes = current.domains.select { |domain| current.domains.count(domain) > 1 }
|
18
18
|
|
data/script/vendor-swot
CHANGED
File without changes
|
@@ -0,0 +1,99 @@
|
|
1
|
+
RSpec.describe 'Gman bin' do
|
2
|
+
let(:domain) { 'whitehouse.gov' }
|
3
|
+
let(:args) { [domain] }
|
4
|
+
let(:command) { 'gman' }
|
5
|
+
let(:bin_path) do
|
6
|
+
File.expand_path "../../bin/#{command}", File.dirname(__FILE__)
|
7
|
+
end
|
8
|
+
let(:response_parts) { Open3.capture2e('bundle', 'exec', bin_path, *args) }
|
9
|
+
let(:output) { response_parts[0] }
|
10
|
+
let(:status) { response_parts[1] }
|
11
|
+
let(:exit_code) { status.exitstatus }
|
12
|
+
|
13
|
+
context 'a valid domain' do
|
14
|
+
it 'parses the domain' do
|
15
|
+
expect(output).to match('Domain : whitehouse.gov')
|
16
|
+
end
|
17
|
+
|
18
|
+
it "knows it's valid" do
|
19
|
+
expect(output).to match('Valid government domain')
|
20
|
+
expect(exit_code).to eql(0)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'knows the type' do
|
24
|
+
expect(output).to match('federal')
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'knows the agency' do
|
28
|
+
expect(output).to match('Executive Office of the President')
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'knows the country' do
|
32
|
+
expect(output).to match('United States')
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'knows the city' do
|
36
|
+
expect(output).to match('Washington')
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'knows the state' do
|
40
|
+
expect(output).to match('DC')
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'colors by default' do
|
44
|
+
expect(output).to match(/\e\[32m/)
|
45
|
+
end
|
46
|
+
|
47
|
+
context 'with colorization disabled' do
|
48
|
+
let(:args) { [domain, '--no-color'] }
|
49
|
+
|
50
|
+
it "doesn't color" do
|
51
|
+
expect(output).to_not match(/\e\[32m/)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context 'with no args' do
|
57
|
+
let(:args) { [] }
|
58
|
+
|
59
|
+
it 'displays the help text' do
|
60
|
+
expect(output).to match('USAGE')
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context 'an invalid domain' do
|
65
|
+
let(:domain) { 'foo.invalid' }
|
66
|
+
|
67
|
+
it 'knows the domain is invalid' do
|
68
|
+
expect(output).to match('Invalid domain')
|
69
|
+
expect(exit_code).to eql(1)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
context 'a non-government domain' do
|
74
|
+
let(:domain) { 'github.com' }
|
75
|
+
|
76
|
+
it "knows it's not a government domain" do
|
77
|
+
expect(output).to match('Not a government domain')
|
78
|
+
expect(exit_code).to eql(1)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
context 'filtering' do
|
83
|
+
let(:command) { 'gman_filter' }
|
84
|
+
let(:txt_path) do
|
85
|
+
File.expand_path '../fixtures/obama.txt', File.dirname(__FILE__)
|
86
|
+
end
|
87
|
+
let(:args) { [txt_path] }
|
88
|
+
|
89
|
+
it 'returns only government domains' do
|
90
|
+
expected = <<-EOS
|
91
|
+
mr.senator@obama.senate.gov
|
92
|
+
president@whitehouse.gov
|
93
|
+
commander.in.chief@us.army.mil
|
94
|
+
EOS
|
95
|
+
|
96
|
+
expect(output).to eql(expected)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
RSpec.describe 'Gman Country Codes' do
|
2
|
+
{
|
3
|
+
'whitehouse.gov' => 'United States of America',
|
4
|
+
'foo.gov.uk' => 'United Kingdom of Great Britain and Northern Ireland',
|
5
|
+
'army.mil' => 'United States of America',
|
6
|
+
'foo.gc.ca' => 'Canada',
|
7
|
+
'foo.eu' => nil
|
8
|
+
}.each do |domain, expected_country|
|
9
|
+
context "given #{domain.inspect}" do
|
10
|
+
subject { Gman.new(domain) }
|
11
|
+
let(:country) { subject.country }
|
12
|
+
|
13
|
+
it 'knows the country' do
|
14
|
+
if expected_country.nil?
|
15
|
+
expect(country).to be_nil
|
16
|
+
else
|
17
|
+
expect(country.name).to eql(expected_country)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'knows the alpha2' do
|
22
|
+
expected = case expected_country
|
23
|
+
when 'United States of America'
|
24
|
+
'us'
|
25
|
+
when 'Canada'
|
26
|
+
'ca'
|
27
|
+
when 'United Kingdom of Great Britain and Northern Ireland'
|
28
|
+
'gb'
|
29
|
+
else
|
30
|
+
'eu'
|
31
|
+
end
|
32
|
+
expect(subject.alpha2).to eql(expected)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
RSpec.describe Gman::DomainList do
|
2
|
+
let(:data) { subject.data }
|
3
|
+
let(:canada) { data['Canada municipal'] }
|
4
|
+
|
5
|
+
[:path, :contents, :data].each do |type|
|
6
|
+
context "when initialized by #{type}" do
|
7
|
+
subject do
|
8
|
+
case type
|
9
|
+
when :path
|
10
|
+
described_class.new(path: Gman.list_path)
|
11
|
+
when :contents
|
12
|
+
contents = File.read(Gman.list_path)
|
13
|
+
described_class.new(contents: contents)
|
14
|
+
when :data
|
15
|
+
data = described_class.new(path: Gman.list_path).to_h
|
16
|
+
described_class.new(data: data)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'stores the init var' do
|
21
|
+
expect(subject.send(type)).to_not be_nil
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'returns the domain data' do
|
25
|
+
expect(data).to have_key('Canada federal')
|
26
|
+
expect(data.values.flatten).to include('gov')
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns the list contents' do
|
30
|
+
expect(subject.contents).to match(/^gov$/)
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'knows the list path' do
|
34
|
+
expect(subject.path).to eql(Gman.list_path)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns the PublicSuffix list' do
|
38
|
+
expect(subject.public_suffix_list).to be_a(PublicSuffix::List)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'knows if a domain is valid' do
|
42
|
+
expect(subject.valid?('whitehouse.gov')).to eql(true)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'knows if a domain is invalid' do
|
46
|
+
expect(subject.valid?('example.com')).to eql(false)
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'returns the domain groups' do
|
50
|
+
expect(subject.groups).to include('Canada federal')
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'returns the domains' do
|
54
|
+
expect(subject.domains).to include('gov')
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'returns the domain count' do
|
58
|
+
expect(subject.count).to be_a(Integer)
|
59
|
+
expect(subject.count).to be > 100
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'alphabetizes the list' do
|
63
|
+
canada.shuffle!
|
64
|
+
expect(canada.first).to_not eql('100milehouse.com')
|
65
|
+
subject.alphabetize
|
66
|
+
expect(canada.first).to eql('100milehouse.com')
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'outputs public suffix format' do
|
70
|
+
expect(subject.to_s).to match("// Canada federal\ncanada\.ca\n")
|
71
|
+
end
|
72
|
+
|
73
|
+
it "finds a domain's parent" do
|
74
|
+
expect(subject.parent_domain('foo.gov.uk')).to eql('gov.uk')
|
75
|
+
end
|
76
|
+
|
77
|
+
context 'with the list path stubbed' do
|
78
|
+
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
79
|
+
|
80
|
+
before do
|
81
|
+
subject.instance_variable_set('@path', stubbed_list_path)
|
82
|
+
end
|
83
|
+
|
84
|
+
context 'with list data stubbed' do
|
85
|
+
before do
|
86
|
+
subject.data = { 'foo' => ['!mail.bar.gov', 'bar.gov', 'baz.net'] }
|
87
|
+
end
|
88
|
+
|
89
|
+
context 'alphabetizing' do
|
90
|
+
before { subject.alphabetize }
|
91
|
+
|
92
|
+
it 'puts exceptions last' do
|
93
|
+
expect(subject.data['foo'].last).to eql('!mail.bar.gov')
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
context 'writing' do
|
98
|
+
before { subject.write }
|
99
|
+
|
100
|
+
it 'writes the contents' do
|
101
|
+
expect(stubbed_file_contents).to match("// foo\nbar.gov\nbaz.net")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
RSpec.describe 'Gman domains' do
|
2
|
+
let(:resolve_domains?) { ENV['GMAN_RESOLVE_DOMAINS'] == 'true' }
|
3
|
+
let(:importer) { Gman::Importer.new({}) }
|
4
|
+
let(:options) { { skip_dupe: true, skip_resolve: !resolve_domains? } }
|
5
|
+
|
6
|
+
Gman.list.to_h.each do |group, domains|
|
7
|
+
next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
|
8
|
+
|
9
|
+
context "the #{group} group" do
|
10
|
+
it 'only contains valid domains' do
|
11
|
+
invalid_domains = []
|
12
|
+
|
13
|
+
Parallel.each(domains, in_threads: 4) do |domain|
|
14
|
+
next if importer.valid_domain?(domain, options)
|
15
|
+
invalid_domains.push domain
|
16
|
+
end
|
17
|
+
|
18
|
+
expect(invalid_domains).to be_empty
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|