gman 7.0.4 → 7.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/gman.gemspec CHANGED
@@ -18,17 +18,16 @@ Gem::Specification.new do |s|
18
18
  s.licenses = ['MIT']
19
19
 
20
20
  s.files = `git ls-files`.split("\n")
21
- s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
22
21
  s.executables = `git ls-files -- bin/*`.split("\n").map do |f|
23
22
  File.basename(f)
24
23
  end
25
24
 
26
25
  s.require_paths = ['lib']
27
- s.required_ruby_version = '~> 2.5'
26
+ s.required_ruby_version = '>= 2.5', '< 4.0'
28
27
 
29
28
  s.add_dependency('colored', '~> 1.2')
30
29
  s.add_dependency('iso_country_codes', '~> 0.6')
31
- s.add_dependency('naughty_or_nice', '= 2.1.1')
30
+ s.add_dependency('naughty_or_nice', '>= 2.1.1')
32
31
  s.add_dependency('public_suffix', '>= 3.0')
33
32
 
34
33
  s.add_development_dependency('addressable', '~> 2.3')
@@ -39,6 +38,8 @@ Gem::Specification.new do |s|
39
38
  s.add_development_dependency('rubocop', '~> 1.0')
40
39
  s.add_development_dependency('rubocop-performance', '~> 1.5')
41
40
  s.add_development_dependency('rubocop-rspec', '~> 2.0')
42
- s.add_development_dependency('ruby-prof', '~> 0.15')
41
+ s.add_development_dependency('ruby-prof', '~> 1.4')
42
+ s.add_development_dependency('ruby-progressbar', '~> 1.10')
43
43
  s.add_development_dependency('swot', '~> 1.0')
44
+ s.metadata['rubygems_mfa_required'] = 'true'
44
45
  end
@@ -26,7 +26,7 @@ class Gman
26
26
  def_hash_delegator :dotgov_listing, :Agency
27
27
  def_hash_delegator :dotgov_listing, :Organization
28
28
  def_hash_delegator :dotgov_listing, :City
29
- def_hash_delegator :dotgov_listing, :"Domain Type"
29
+ def_hash_delegator :dotgov_listing, :'Domain Type'
30
30
  private :domain_type
31
31
 
32
32
  def type
@@ -60,7 +60,7 @@ class Gman
60
60
  def federal?
61
61
  return false unless dotgov_listing
62
62
 
63
- domain_type =~ /^Federal Agency/i
63
+ domain_type =~ /^Federal/i
64
64
  end
65
65
 
66
66
  def city?
@@ -87,7 +87,7 @@ class Gman
87
87
  if matches
88
88
  matches[1] == 'state'
89
89
  elsif dotgov_listing
90
- domain_type == 'State/Local Govt'
90
+ domain_type == 'State/Local Govt' || domain_type == 'State'
91
91
  else
92
92
  false
93
93
  end
@@ -108,14 +108,14 @@ class Gman
108
108
  private
109
109
 
110
110
  def list_category
111
- @list_category ||= begin
112
- match = Gman.list.public_suffix_list.find(domain.to_s)
113
- return unless match
111
+ return @list_category if defined?(@list_category)
114
112
 
115
- regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.value)}\n}im
116
- matches = Gman.list.contents.match(regex)
117
- matches[1] if matches
118
- end
113
+ match = Gman.list.public_suffix_list.find(domain.to_s)
114
+ return @list_category = nil unless match
115
+
116
+ regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.value)}\n}im
117
+ matches = Gman.list.contents.match(regex)
118
+ @list_category = matches ? matches[1] : nil
119
119
  end
120
120
 
121
121
  def matches
data/lib/gman/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Gman
4
- VERSION = '7.0.4'
4
+ VERSION = '7.0.6'
5
5
  end
data/lib/gman.rb CHANGED
@@ -43,12 +43,11 @@ class Gman
43
43
  #
44
44
  # Returns boolean true if a government domain
45
45
  def valid?
46
- @valid ||= begin
47
- return false unless valid_domain?
48
- return false if academic?
46
+ return @valid if defined?(@valid)
49
47
 
50
- locality? || public_suffix_valid?
51
- end
48
+ @valid = false unless valid_domain?
49
+ @valid = false if academic?
50
+ @valid ||= locality? || public_suffix_valid?
52
51
  end
53
52
 
54
53
  def locality?
data/script/profile CHANGED
@@ -8,7 +8,7 @@ require './lib/gman'
8
8
  # without pre-loading the Gman list for an accurate benchmark
9
9
  count = (ARGV[0] || 100).to_i
10
10
  domains = File.readlines('./config/domains.txt')
11
- domains = domains.select { |l| l =~ /^[a-z0-9]/i }
11
+ domains = domains.grep(/^[a-z0-9]/i)
12
12
  domains = domains.sample(count)
13
13
 
14
14
  RubyProf.start
data/script/prune CHANGED
@@ -10,7 +10,7 @@ require_relative '../lib/gman/domain_list'
10
10
  domains = ARGV
11
11
  domains = domains.clone.map { |d| d.delete ',' }
12
12
 
13
- list = File.open('./config/domains.txt').read
13
+ list = File.read('./config/domains.txt')
14
14
  puts "Starting list: #{Gman::DomainList.current.count} domains"
15
15
 
16
16
  domains.each do |domain|
data/script/reconcile-us CHANGED
@@ -26,7 +26,7 @@ data.each do |row|
26
26
  group = row
27
27
  domains[group] = []
28
28
  else
29
- domains[group].push row.sub("\.\t", '').strip
29
+ domains[group].push row.sub(".\t", '').strip
30
30
  end
31
31
  end
32
32
 
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # ! /usr/bin/env ruby
4
+ # frozen_string_literal: true
5
+
6
+ #
7
+ # Add one or more domains to a given group, running the standard import checks
8
+ #
9
+ # Usage: script/add [GROUP] [DOMAIN(S)]
10
+
11
+ require './lib/gman/importer'
12
+ require 'parallel'
13
+
14
+ importer = Gman::Importer.new({})
15
+ options = { skip_dupe: true, skip_resolve: false }
16
+ list_path = File.expand_path '../config/domains.txt', __dir__
17
+
18
+ importer.logger.info "Starting list: #{Gman::DomainList.current.count} domains"
19
+
20
+ Gman.list.to_h.values.shuffle.each do |domains|
21
+ # next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
22
+
23
+ Parallel.each(domains, progress: "Validating") do |domain|
24
+ next if domain.start_with?("!")
25
+ next if importer.valid_domain?(domain, options)
26
+
27
+ importer.logger.warn "#{domain} is not valid, removing from list"
28
+ list = File.read(list_path)
29
+ list.gsub!(/^#{Regexp.escape(domain)}$\n/, '')
30
+ File.write list_path, list
31
+ end
32
+ end
33
+
34
+ importer.logger.info "Ending list: #{Gman::DomainList.current.count} domains"
data/script/vendor CHANGED
@@ -10,4 +10,4 @@ for file in script/vendor-*; do
10
10
  fi
11
11
  done
12
12
 
13
- script/alphabetize
13
+ bundle exec script/alphabetize
@@ -3,15 +3,6 @@
3
3
  # Vendors the full list of US .gov domains from https://github.com/GSA/data
4
4
  # Usage: script/vendor-gov-list
5
5
 
6
- # Set up
7
- mkdir tmp
8
- rm -Rf tmp/gsa-data
9
-
10
6
  # Vendor the last file in the dotgov-domains folder that ends in `-full.csv`
11
- git clone https://github.com/GSA/data tmp/gsa-data
12
- pattern="tmp/gsa-data/dotgov-domains/*-full.csv"
13
- files=( $pattern )
14
- cp -f "${files[@]:(-1)}" config/vendor/dotgovs.csv
7
+ wget https://raw.githubusercontent.com/cisagov/dotgov-data/main/current-full.csv -O ./config/vendor/dotgovs.csv
15
8
 
16
- # Clean up
17
- rm -Rf tmp/gsa-data
data/script/vendor-us CHANGED
@@ -14,25 +14,29 @@
14
14
 
15
15
  require './lib/gman'
16
16
  require 'open-uri'
17
+ require 'csv'
17
18
 
19
+ path = File.expand_path('./vendor-us-tmp.csv')
18
20
  blacklist = %w[usagovQUASI usagovFEDgov]
19
- source = 'https://raw.githubusercontent.com/GSA/govt-urls/master/government-urls-hierarchical-list.txt'
21
+ source = 'https://raw.githubusercontent.com/GSA/govt-urls/main/1_govt_urls_full.csv'
22
+ domains = {}
20
23
 
21
- data = URI.open(source).read
22
- data = data.split('_' * 74)
23
- data = data.last.strip
24
- data = data.split(/\r?\n/).reject(&:empty?)
24
+ begin
25
+ raw = URI.open(source).read
26
+ File.write(path, raw)
27
+ data = CSV.table(path)
25
28
 
26
- domains = {}
27
- group = ''
28
- data.each do |row|
29
- if /^\w/.match?(row)
30
- group = row
31
- domains[group] = []
32
- else
33
- domains[group].push row.sub("\.\t", '').strip
29
+ data.each do |domain|
30
+ next if domain[:type_of_government] == 'Quasigovernmental'
31
+
32
+ group = "US #{domain[:type_of_government]}"
33
+ group += " (#{domain[:state]})" if domain[:type_of_government] != 'Federal' && domain[:state]
34
+ domains[group] ||= []
35
+ domains[group] << domain[:domain_name]
34
36
  end
35
- end
36
37
 
37
- domains.reject! { |g, _| blacklist.include?(g) }
38
- Gman::Importer.new(domains).import
38
+ domains.reject! { |g, _| blacklist.include?(g) }
39
+ Gman::Importer.new(domains).import
40
+ ensure
41
+ File.delete(path)
42
+ end
@@ -69,7 +69,7 @@ RSpec.describe Gman::DomainList do
69
69
  end
70
70
 
71
71
  it 'outputs public suffix format' do
72
- expect(subject.to_s).to match("// Canada federal\ncanada\.ca\n")
72
+ expect(subject.to_s).to match("// Canada federal\ncanada.ca\n")
73
73
  end
74
74
 
75
75
  it "finds a domain's parent" do
@@ -80,7 +80,7 @@ RSpec.describe Gman::DomainList do
80
80
  let(:stubbed_file_contents) { File.read(stubbed_list_path) }
81
81
 
82
82
  before do
83
- subject.instance_variable_set('@path', stubbed_list_path)
83
+ subject.instance_variable_set(:@path, stubbed_list_path)
84
84
  end
85
85
 
86
86
  context 'with list data stubbed' do
@@ -137,7 +137,7 @@ RSpec.describe 'Gman identifier' do
137
137
  end
138
138
 
139
139
  context 'a county .gov' do
140
- let(:domain) { 'ALLEGHENYCOUNTYPA.GOV' }
140
+ let(:domain) { '211DUPAGE.GOV' }
141
141
 
142
142
  it "knows it's a county" do
143
143
  expect(subject).to be_a_county
@@ -161,11 +161,11 @@ RSpec.describe 'Gman identifier' do
161
161
  end
162
162
 
163
163
  it 'knows the state' do
164
- expect(subject.state).to eql('PA')
164
+ expect(subject.state).to eql('IL')
165
165
  end
166
166
 
167
167
  it 'knows the city' do
168
- expect(subject.city).to eql('Pittsburgh')
168
+ expect(subject.city).to eql('Wheaton')
169
169
  end
170
170
  end
171
171
 
@@ -203,8 +203,8 @@ RSpec.describe 'Gman identifier' do
203
203
  context "determining a domain's type" do
204
204
  {
205
205
  unknown: 'cityofperu.org',
206
- "Canada municipal": 'acme.ca',
207
- "Canada federal": 'canada.ca'
206
+ 'Canada municipal': 'acme.ca',
207
+ 'Canada federal': 'canada.ca'
208
208
  }.each do |expected, domain|
209
209
  context "Given the #{domain} domain" do
210
210
  let(:domain) { domain }
@@ -9,7 +9,7 @@ RSpec.describe Gman::Importer do
9
9
  let(:domain_list) { subject.domain_list }
10
10
 
11
11
  before do
12
- subject.instance_variable_set '@logger', logger
12
+ subject.instance_variable_set :@logger, logger
13
13
  end
14
14
 
15
15
  it 'inits the domain list' do
@@ -68,7 +68,7 @@ RSpec.describe Gman::Importer do
68
68
  let(:stubbed_list) { Gman::DomainList.new(path: stubbed_list_path) }
69
69
  let(:stubbed_file_contents) { File.read(stubbed_list_path) }
70
70
 
71
- before { subject.instance_variable_set '@current', stubbed_list }
71
+ before { subject.instance_variable_set :@current, stubbed_list }
72
72
 
73
73
  context 'writing' do
74
74
  before { @current = subject.current.to_s }
data/spec/gman_spec.rb CHANGED
@@ -18,7 +18,7 @@ RSpec.describe Gman do
18
18
 
19
19
  context 'invalid domains' do
20
20
  ['foo.bar.com', 'bar@foo.biz', 'http://www.foo.biz',
21
- 'foo.uk', 'gov', 'foo@k12.champaign.il.us', 'foo@kii.gov.by',
21
+ 'foo.uk', 'gov', 'foo@k12.champaign.il.us', # 'foo@kii.gov.by',
22
22
  'foo', '', nil, ' ', 'foo.city.il.us', 'foo.ci.il.us',
23
23
  'foo.zx.us', 'foo@mail.gov.ua', 'foo@gwu.edu'].each do |domain|
24
24
  subject { described_class.new(domain) }
data/spec/spec_helper.rb CHANGED
@@ -24,7 +24,7 @@ def stubbed_list_path
24
24
  end
25
25
 
26
26
  def with_env(key, value)
27
- old_env = ENV[key]
27
+ old_env = ENV.fetch(key, nil)
28
28
  ENV[key] = value
29
29
  yield
30
30
  ENV[key] = old_env
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gman
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.0.4
4
+ version: 7.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-13 00:00:00.000000000 Z
11
+ date: 2022-12-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: colored
@@ -42,14 +42,14 @@ dependencies:
42
42
  name: naughty_or_nice
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: 2.1.1
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: 2.1.1
55
55
  - !ruby/object:Gem::Dependency
@@ -184,14 +184,28 @@ dependencies:
184
184
  requirements:
185
185
  - - "~>"
186
186
  - !ruby/object:Gem::Version
187
- version: '0.15'
187
+ version: '1.4'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: '1.4'
195
+ - !ruby/object:Gem::Dependency
196
+ name: ruby-progressbar
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: '1.10'
188
202
  type: :development
189
203
  prerelease: false
190
204
  version_requirements: !ruby/object:Gem::Requirement
191
205
  requirements:
192
206
  - - "~>"
193
207
  - !ruby/object:Gem::Version
194
- version: '0.15'
208
+ version: '1.10'
195
209
  - !ruby/object:Gem::Dependency
196
210
  name: swot
197
211
  requirement: !ruby/object:Gem::Requirement
@@ -220,17 +234,21 @@ files:
220
234
  - ".github/ISSUE_TEMPLATE/bug_report.md"
221
235
  - ".github/ISSUE_TEMPLATE/feature_request.md"
222
236
  - ".github/config.yml"
237
+ - ".github/dependabot.yml"
223
238
  - ".github/funding.yml"
224
239
  - ".github/no-response.yml"
225
240
  - ".github/release-drafter.yml"
226
241
  - ".github/settings.yml"
227
242
  - ".github/stale.yml"
243
+ - ".github/workflows/ci.yml"
244
+ - ".github/workflows/clean.yml"
245
+ - ".github/workflows/codeql-analysis.yml"
246
+ - ".github/workflows/validate.yml"
247
+ - ".github/workflows/vendor.yml"
228
248
  - ".gitignore"
229
249
  - ".rspec"
230
250
  - ".rubocop.yml"
231
251
  - ".rubocop_todo.yml"
232
- - ".ruby-version"
233
- - ".travis.yml"
234
252
  - Gemfile
235
253
  - LICENSE
236
254
  - bin/gman
@@ -261,13 +279,11 @@ files:
261
279
  - script/prune
262
280
  - script/reconcile-us
263
281
  - script/release
282
+ - script/validate-domains
264
283
  - script/vendor
265
284
  - script/vendor-federal-de
266
285
  - script/vendor-gov-list
267
- - script/vendor-municipal-de
268
- - script/vendor-nl
269
286
  - script/vendor-public-suffix
270
- - script/vendor-se
271
287
  - script/vendor-swot
272
288
  - script/vendor-us
273
289
  - spec/fixtures/domains.txt
@@ -284,35 +300,28 @@ files:
284
300
  homepage: https://github.com/benbalter/gman
285
301
  licenses:
286
302
  - MIT
287
- metadata: {}
303
+ metadata:
304
+ rubygems_mfa_required: 'true'
288
305
  post_install_message:
289
306
  rdoc_options: []
290
307
  require_paths:
291
308
  - lib
292
309
  required_ruby_version: !ruby/object:Gem::Requirement
293
310
  requirements:
294
- - - "~>"
311
+ - - ">="
295
312
  - !ruby/object:Gem::Version
296
313
  version: '2.5'
314
+ - - "<"
315
+ - !ruby/object:Gem::Version
316
+ version: '4.0'
297
317
  required_rubygems_version: !ruby/object:Gem::Requirement
298
318
  requirements:
299
319
  - - ">="
300
320
  - !ruby/object:Gem::Version
301
321
  version: '0'
302
322
  requirements: []
303
- rubygems_version: 3.0.3
323
+ rubygems_version: 3.2.33
304
324
  signing_key:
305
325
  specification_version: 4
306
326
  summary: Check if a given domain or email address belong to a governemnt entity
307
- test_files:
308
- - spec/fixtures/domains.txt
309
- - spec/fixtures/obama.txt
310
- - spec/gman/bin_spec.rb
311
- - spec/gman/country_code_spec.rb
312
- - spec/gman/domain_list_spec.rb
313
- - spec/gman/domains_spec.rb
314
- - spec/gman/identifier_spec.rb
315
- - spec/gman/importer_spec.rb
316
- - spec/gman/locality_spec.rb
317
- - spec/gman_spec.rb
318
- - spec/spec_helper.rb
327
+ test_files: []
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 2.6.6
data/.travis.yml DELETED
@@ -1,4 +0,0 @@
1
- langauage: ruby
2
- script: "script/cibuild"
3
- sudo: false
4
- cache: bundler
@@ -1,23 +0,0 @@
1
- #! /usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require 'csv'
5
- require 'open-uri'
6
- require './lib/gman'
7
-
8
- url = 'http://www.mik.nrw.de/nc/themen-aufgaben/kommunales/kommunale-adressen.html?tx_szkommunaldb_pi1%5Bexport%5D=csv'
9
-
10
- csv = URI.open(url).read.force_encoding('iso-8859-1').encode('UTF-8')
11
-
12
- # For some reason, the header row is actually the last row
13
- # Pop the last line off the file and prepend it at the begining
14
- # So that when we pass it to CSV it detects the headers properly
15
- lines = csv.split("\n")
16
- lines.unshift lines.pop
17
- csv = lines.join("\n")
18
-
19
- # Load municipal domains
20
- data = CSV.parse(csv, headers: true, col_sep: ';')
21
- domains = data.map { |row| row['Internet'] }
22
-
23
- Gman::Importer.new('German Municipalities' => domains).import
data/script/vendor-nl DELETED
@@ -1,21 +0,0 @@
1
- #! /usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # See https://github.com/github/government.github.com/pull/367#issuecomment-102108763
5
-
6
- require 'fileutils'
7
- require './lib/gman'
8
-
9
- FileUtils.rm_rf('almanak.overheid.nl')
10
- commands = [
11
- "wget -q -r -nc -np https://almanak.overheid.nl/
12
- grep @ -rI almanak.overheid.nl/",
13
- 'cut -f 2 -d @',
14
- "cut -f 1 -d '\"'",
15
- 'grep \\.nl$',
16
- 'sort',
17
- 'uniq'
18
- ]
19
- domains = system commands.join('|')
20
-
21
- Gman::Importer.new('Netherlands' => domains.split("\n")).import
data/script/vendor-se DELETED
@@ -1,21 +0,0 @@
1
- #! /usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require 'mechanize'
5
- require 'csv'
6
- require './lib/gman'
7
-
8
- url = 'http://www.myndighetsregistret.scb.se/Myndighet.aspx'
9
- agent = Mechanize.new
10
- page = agent.get(url)
11
- form = page.forms.first
12
- form.radiobuttons.find { |r| r.value = 'Textfil' }.check
13
- submit_button = form.buttons.find { |b| b.type == 'submit' }
14
- response = agent.submit(form, submit_button)
15
-
16
- rows = CSV.parse(response.content, headers: true, col_sep: "\t")
17
- domains = rows.map do |row|
18
- row['Webbadress'] unless /UNIVERSITET/.match?(row['Namn'])
19
- end
20
-
21
- Gman::Importer.new('Swedish Administrative Authorities' => domains).import