gman 7.0.1 → 7.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +3 -0
- data/.ruby-version +1 -1
- data/config/domains.txt +8259 -42
- data/config/vendor/academic.txt +6 -7
- data/config/vendor/dotgovs.csv +5634 -5560
- data/contributing.json +32 -0
- data/gman.gemspec +3 -5
- data/lib/gman.rb +1 -1
- data/lib/gman/domain_list.rb +18 -6
- data/lib/gman/identifier.rb +2 -2
- data/lib/gman/importer.rb +1 -1
- data/lib/gman/version.rb +1 -1
- data/script/cibuild +1 -1
- data/script/dedupe +1 -1
- data/script/vendor-swot +1 -1
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +99 -0
- data/spec/gman/country_code_spec.rb +36 -0
- data/spec/gman/domain_list_spec.rb +108 -0
- data/spec/gman/domains_spec.rb +22 -0
- data/spec/gman/identifier_spec.rb +182 -0
- data/spec/gman/importer_spec.rb +227 -0
- data/spec/gman/locality_spec.rb +22 -0
- data/spec/gman_spec.rb +72 -0
- data/spec/spec_helper.rb +29 -0
- metadata +52 -83
- data/.rake_tasks +0 -0
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -48
- data/test/test_gman.rb +0 -56
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domain_list.rb +0 -112
- data/test/test_gman_domains.rb +0 -32
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -244
- data/test/test_gman_locality.rb +0 -10
data/contributing.json
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
{
|
2
|
+
"commit": {
|
3
|
+
"subject_cannot_be_empty": true,
|
4
|
+
"subject_must_be_longer_than": 4,
|
5
|
+
"subject_must_be_shorter_than": 101,
|
6
|
+
"subject_lines_must_be_shorter_than": 51,
|
7
|
+
"subject_must_be_single_line": true,
|
8
|
+
"subject_must_be_in_tense": "imperative",
|
9
|
+
"subject_must_start_with_case": "lower",
|
10
|
+
"subject_must_not_end_with_dot": true,
|
11
|
+
"body_lines_must_be_shorter_than": 73
|
12
|
+
},
|
13
|
+
"pull_request": {
|
14
|
+
"subject_cannot_be_empty": true,
|
15
|
+
"subject_must_be_longer_than": 4,
|
16
|
+
"subject_must_be_shorter_than": 101,
|
17
|
+
"subject_must_be_in_tense": "imperative",
|
18
|
+
"subject_must_start_with_case": "upper",
|
19
|
+
"subject_must_not_end_with_dot": true,
|
20
|
+
"body_cannot_be_empty": true
|
21
|
+
},
|
22
|
+
"issue": {
|
23
|
+
"subject_cannot_be_empty": true,
|
24
|
+
"subject_must_be_longer_than": 4,
|
25
|
+
"subject_must_be_shorter_than": 101,
|
26
|
+
"subject_must_be_in_tense": "imperative",
|
27
|
+
"subject_must_start_with_case": "upper",
|
28
|
+
"subject_must_not_end_with_dot": true,
|
29
|
+
"body_cannot_be_empty": true,
|
30
|
+
"body_must_include_reproduction_steps": true
|
31
|
+
}
|
32
|
+
}
|
data/gman.gemspec
CHANGED
@@ -26,18 +26,16 @@ Gem::Specification.new do |s|
|
|
26
26
|
s.required_ruby_version = '~> 2.0'
|
27
27
|
|
28
28
|
s.add_dependency('iso_country_codes', '~> 0.6')
|
29
|
-
s.add_dependency('naughty_or_nice', '
|
29
|
+
s.add_dependency('naughty_or_nice', '= 2.1')
|
30
30
|
s.add_dependency('colored', '~> 1.2')
|
31
|
+
s.add_dependency('public_suffix', '~> 2.0')
|
31
32
|
|
32
33
|
s.add_development_dependency('swot', '~> 1.0')
|
33
|
-
s.add_development_dependency('rake', '~> 10.4')
|
34
|
-
s.add_development_dependency('shoulda', '~> 3.5')
|
35
|
-
s.add_development_dependency('rdoc', '~> 4.2')
|
36
|
-
s.add_development_dependency('bundler', '~> 1.10')
|
37
34
|
s.add_development_dependency('pry', '~> 0.10')
|
38
35
|
s.add_development_dependency('parallel', '~> 1.6')
|
39
36
|
s.add_development_dependency('mechanize', '~> 2.7')
|
40
37
|
s.add_development_dependency('addressable', '~> 2.3')
|
41
38
|
s.add_development_dependency('ruby-prof', '~> 0.15')
|
42
39
|
s.add_development_dependency('rubocop', '~> 0.37')
|
40
|
+
s.add_development_dependency('rspec', '~> 3.5')
|
43
41
|
end
|
data/lib/gman.rb
CHANGED
data/lib/gman/domain_list.rb
CHANGED
@@ -58,12 +58,10 @@ class Gman
|
|
58
58
|
@public_suffix_list ||= PublicSuffix::List.parse(contents)
|
59
59
|
end
|
60
60
|
|
61
|
-
# domain is on the domain list
|
62
|
-
# domain is not explicitly blacklisted and
|
63
|
-
# domain matches a standard public suffix list rule
|
61
|
+
# domain is on the domain list
|
64
62
|
def valid?(domain)
|
65
|
-
rule = public_suffix_list.find(domain)
|
66
|
-
!rule.nil?
|
63
|
+
rule = public_suffix_list.find(domain, default: nil)
|
64
|
+
!(rule.nil? || rule.is_a?(PublicSuffix::Rule::Exception))
|
67
65
|
end
|
68
66
|
|
69
67
|
# Returns an array of strings representing the list groups
|
@@ -82,9 +80,13 @@ class Gman
|
|
82
80
|
end
|
83
81
|
|
84
82
|
# Alphabetize groups and domains within each group
|
83
|
+
# We need to ensure exceptions appear after their coresponding rules
|
85
84
|
def alphabetize
|
86
85
|
@data = data.sort_by { |k, _v| k.downcase }.to_h
|
87
|
-
@data.
|
86
|
+
@data.map do |_group, domains|
|
87
|
+
domains.sort! { |a, b| sort_with_exceptions(a, b) }
|
88
|
+
domains.uniq!
|
89
|
+
end
|
88
90
|
end
|
89
91
|
|
90
92
|
# Write the domain list to disk
|
@@ -149,5 +151,15 @@ class Gman
|
|
149
151
|
hash[key] ||= []
|
150
152
|
hash[key].push value
|
151
153
|
end
|
154
|
+
|
155
|
+
def sort_with_exceptions(a, b)
|
156
|
+
if a.start_with?('!') && !b.start_with?('!')
|
157
|
+
1
|
158
|
+
elsif b.start_with?('!') && !a.start_with?('!')
|
159
|
+
-1
|
160
|
+
else
|
161
|
+
a <=> b
|
162
|
+
end
|
163
|
+
end
|
152
164
|
end
|
153
165
|
end
|
data/lib/gman/identifier.rb
CHANGED
@@ -76,7 +76,7 @@ class Gman
|
|
76
76
|
@list_category ||= begin
|
77
77
|
match = Gman.list.public_suffix_list.find(domain.to_s)
|
78
78
|
return unless match
|
79
|
-
regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.
|
79
|
+
regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.value)}\n}im
|
80
80
|
matches = Gman.list.contents.match(regex)
|
81
81
|
matches[1] if matches
|
82
82
|
end
|
@@ -91,7 +91,7 @@ class Gman
|
|
91
91
|
return @dotgov_listing if defined? @dotgov_listing
|
92
92
|
return unless dotgov?
|
93
93
|
@dotgov_listing = Gman.dotgov_list.find do |listing|
|
94
|
-
listing['Domain Name'].casecmp("#{domain.sld}.gov")
|
94
|
+
listing['Domain Name'].casecmp("#{domain.sld}.gov").zero?
|
95
95
|
end
|
96
96
|
end
|
97
97
|
|
data/lib/gman/importer.rb
CHANGED
@@ -125,7 +125,7 @@ class Gman
|
|
125
125
|
return false if domain.empty?
|
126
126
|
if BLACKLIST.include?(domain)
|
127
127
|
reject(domain, 'blacklist')
|
128
|
-
elsif !PublicSuffix.valid?(".#{domain}")
|
128
|
+
elsif !PublicSuffix.valid?("foo.#{domain}")
|
129
129
|
reject(domain, 'invalid')
|
130
130
|
elsif Swot.is_academic?(domain)
|
131
131
|
reject(domain, 'academic')
|
data/lib/gman/version.rb
CHANGED
data/script/cibuild
CHANGED
data/script/dedupe
CHANGED
@@ -12,7 +12,7 @@ puts "Current list contains #{current.count} domains..."
|
|
12
12
|
|
13
13
|
dupe = current.count - current.domains.uniq.count
|
14
14
|
puts "Found #{dupe} duplicate domains"
|
15
|
-
exit 0 if dupe
|
15
|
+
exit 0 if dupe.zero?
|
16
16
|
|
17
17
|
dupes = current.domains.select { |domain| current.domains.count(domain) > 1 }
|
18
18
|
|
data/script/vendor-swot
CHANGED
File without changes
|
@@ -0,0 +1,99 @@
|
|
1
|
+
RSpec.describe 'Gman bin' do
|
2
|
+
let(:domain) { 'whitehouse.gov' }
|
3
|
+
let(:args) { [domain] }
|
4
|
+
let(:command) { 'gman' }
|
5
|
+
let(:bin_path) do
|
6
|
+
File.expand_path "../../bin/#{command}", File.dirname(__FILE__)
|
7
|
+
end
|
8
|
+
let(:response_parts) { Open3.capture2e('bundle', 'exec', bin_path, *args) }
|
9
|
+
let(:output) { response_parts[0] }
|
10
|
+
let(:status) { response_parts[1] }
|
11
|
+
let(:exit_code) { status.exitstatus }
|
12
|
+
|
13
|
+
context 'a valid domain' do
|
14
|
+
it 'parses the domain' do
|
15
|
+
expect(output).to match('Domain : whitehouse.gov')
|
16
|
+
end
|
17
|
+
|
18
|
+
it "knows it's valid" do
|
19
|
+
expect(output).to match('Valid government domain')
|
20
|
+
expect(exit_code).to eql(0)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'knows the type' do
|
24
|
+
expect(output).to match('federal')
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'knows the agency' do
|
28
|
+
expect(output).to match('Executive Office of the President')
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'knows the country' do
|
32
|
+
expect(output).to match('United States')
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'knows the city' do
|
36
|
+
expect(output).to match('Washington')
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'knows the state' do
|
40
|
+
expect(output).to match('DC')
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'colors by default' do
|
44
|
+
expect(output).to match(/\e\[32m/)
|
45
|
+
end
|
46
|
+
|
47
|
+
context 'with colorization disabled' do
|
48
|
+
let(:args) { [domain, '--no-color'] }
|
49
|
+
|
50
|
+
it "doesn't color" do
|
51
|
+
expect(output).to_not match(/\e\[32m/)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context 'with no args' do
|
57
|
+
let(:args) { [] }
|
58
|
+
|
59
|
+
it 'displays the help text' do
|
60
|
+
expect(output).to match('USAGE')
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context 'an invalid domain' do
|
65
|
+
let(:domain) { 'foo.invalid' }
|
66
|
+
|
67
|
+
it 'knows the domain is invalid' do
|
68
|
+
expect(output).to match('Invalid domain')
|
69
|
+
expect(exit_code).to eql(1)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
context 'a non-government domain' do
|
74
|
+
let(:domain) { 'github.com' }
|
75
|
+
|
76
|
+
it "knows it's not a government domain" do
|
77
|
+
expect(output).to match('Not a government domain')
|
78
|
+
expect(exit_code).to eql(1)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
context 'filtering' do
|
83
|
+
let(:command) { 'gman_filter' }
|
84
|
+
let(:txt_path) do
|
85
|
+
File.expand_path '../fixtures/obama.txt', File.dirname(__FILE__)
|
86
|
+
end
|
87
|
+
let(:args) { [txt_path] }
|
88
|
+
|
89
|
+
it 'returns only government domains' do
|
90
|
+
expected = <<-EOS
|
91
|
+
mr.senator@obama.senate.gov
|
92
|
+
president@whitehouse.gov
|
93
|
+
commander.in.chief@us.army.mil
|
94
|
+
EOS
|
95
|
+
|
96
|
+
expect(output).to eql(expected)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
RSpec.describe 'Gman Country Codes' do
|
2
|
+
{
|
3
|
+
'whitehouse.gov' => 'United States of America',
|
4
|
+
'foo.gov.uk' => 'United Kingdom of Great Britain and Northern Ireland',
|
5
|
+
'army.mil' => 'United States of America',
|
6
|
+
'foo.gc.ca' => 'Canada',
|
7
|
+
'foo.eu' => nil
|
8
|
+
}.each do |domain, expected_country|
|
9
|
+
context "given #{domain.inspect}" do
|
10
|
+
subject { Gman.new(domain) }
|
11
|
+
let(:country) { subject.country }
|
12
|
+
|
13
|
+
it 'knows the country' do
|
14
|
+
if expected_country.nil?
|
15
|
+
expect(country).to be_nil
|
16
|
+
else
|
17
|
+
expect(country.name).to eql(expected_country)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'knows the alpha2' do
|
22
|
+
expected = case expected_country
|
23
|
+
when 'United States of America'
|
24
|
+
'us'
|
25
|
+
when 'Canada'
|
26
|
+
'ca'
|
27
|
+
when 'United Kingdom of Great Britain and Northern Ireland'
|
28
|
+
'gb'
|
29
|
+
else
|
30
|
+
'eu'
|
31
|
+
end
|
32
|
+
expect(subject.alpha2).to eql(expected)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
RSpec.describe Gman::DomainList do
|
2
|
+
let(:data) { subject.data }
|
3
|
+
let(:canada) { data['Canada municipal'] }
|
4
|
+
|
5
|
+
[:path, :contents, :data].each do |type|
|
6
|
+
context "when initialized by #{type}" do
|
7
|
+
subject do
|
8
|
+
case type
|
9
|
+
when :path
|
10
|
+
described_class.new(path: Gman.list_path)
|
11
|
+
when :contents
|
12
|
+
contents = File.read(Gman.list_path)
|
13
|
+
described_class.new(contents: contents)
|
14
|
+
when :data
|
15
|
+
data = described_class.new(path: Gman.list_path).to_h
|
16
|
+
described_class.new(data: data)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'stores the init var' do
|
21
|
+
expect(subject.send(type)).to_not be_nil
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'returns the domain data' do
|
25
|
+
expect(data).to have_key('Canada federal')
|
26
|
+
expect(data.values.flatten).to include('gov')
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns the list contents' do
|
30
|
+
expect(subject.contents).to match(/^gov$/)
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'knows the list path' do
|
34
|
+
expect(subject.path).to eql(Gman.list_path)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns the PublicSuffix list' do
|
38
|
+
expect(subject.public_suffix_list).to be_a(PublicSuffix::List)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'knows if a domain is valid' do
|
42
|
+
expect(subject.valid?('whitehouse.gov')).to eql(true)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'knows if a domain is invalid' do
|
46
|
+
expect(subject.valid?('example.com')).to eql(false)
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'returns the domain groups' do
|
50
|
+
expect(subject.groups).to include('Canada federal')
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'returns the domains' do
|
54
|
+
expect(subject.domains).to include('gov')
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'returns the domain count' do
|
58
|
+
expect(subject.count).to be_a(Integer)
|
59
|
+
expect(subject.count).to be > 100
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'alphabetizes the list' do
|
63
|
+
canada.shuffle!
|
64
|
+
expect(canada.first).to_not eql('100milehouse.com')
|
65
|
+
subject.alphabetize
|
66
|
+
expect(canada.first).to eql('100milehouse.com')
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'outputs public suffix format' do
|
70
|
+
expect(subject.to_s).to match("// Canada federal\ncanada\.ca\n")
|
71
|
+
end
|
72
|
+
|
73
|
+
it "finds a domain's parent" do
|
74
|
+
expect(subject.parent_domain('foo.gov.uk')).to eql('gov.uk')
|
75
|
+
end
|
76
|
+
|
77
|
+
context 'with the list path stubbed' do
|
78
|
+
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
79
|
+
|
80
|
+
before do
|
81
|
+
subject.instance_variable_set('@path', stubbed_list_path)
|
82
|
+
end
|
83
|
+
|
84
|
+
context 'with list data stubbed' do
|
85
|
+
before do
|
86
|
+
subject.data = { 'foo' => ['!mail.bar.gov', 'bar.gov', 'baz.net'] }
|
87
|
+
end
|
88
|
+
|
89
|
+
context 'alphabetizing' do
|
90
|
+
before { subject.alphabetize }
|
91
|
+
|
92
|
+
it 'puts exceptions last' do
|
93
|
+
expect(subject.data['foo'].last).to eql('!mail.bar.gov')
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
context 'writing' do
|
98
|
+
before { subject.write }
|
99
|
+
|
100
|
+
it 'writes the contents' do
|
101
|
+
expect(stubbed_file_contents).to match("// foo\nbar.gov\nbaz.net")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
RSpec.describe 'Gman domains' do
|
2
|
+
let(:resolve_domains?) { ENV['GMAN_RESOLVE_DOMAINS'] == 'true' }
|
3
|
+
let(:importer) { Gman::Importer.new({}) }
|
4
|
+
let(:options) { { skip_dupe: true, skip_resolve: !resolve_domains? } }
|
5
|
+
|
6
|
+
Gman.list.to_h.each do |group, domains|
|
7
|
+
next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
|
8
|
+
|
9
|
+
context "the #{group} group" do
|
10
|
+
it 'only contains valid domains' do
|
11
|
+
invalid_domains = []
|
12
|
+
|
13
|
+
Parallel.each(domains, in_threads: 4) do |domain|
|
14
|
+
next if importer.valid_domain?(domain, options)
|
15
|
+
invalid_domains.push domain
|
16
|
+
end
|
17
|
+
|
18
|
+
expect(invalid_domains).to be_empty
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|