gman 7.0.4 → 7.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +10 -0
- data/.github/workflows/ci.yml +23 -0
- data/.github/workflows/clean.yml +31 -0
- data/.github/workflows/codeql-analysis.yml +70 -0
- data/.github/workflows/validate.yml +30 -0
- data/.github/workflows/vendor.yml +29 -0
- data/.rubocop.yml +1 -5
- data/config/domains.txt +507 -10
- data/config/vendor/academic.txt +0 -1
- data/config/vendor/dotgovs.csv +8570 -5786
- data/gman.gemspec +5 -4
- data/lib/gman/identifier.rb +10 -10
- data/lib/gman/version.rb +1 -1
- data/lib/gman.rb +4 -5
- data/script/profile +1 -1
- data/script/prune +1 -1
- data/script/reconcile-us +1 -1
- data/script/validate-domains +34 -0
- data/script/vendor +1 -1
- data/script/vendor-gov-list +1 -10
- data/script/vendor-us +20 -16
- data/spec/gman/domain_list_spec.rb +2 -2
- data/spec/gman/identifier_spec.rb +5 -5
- data/spec/gman/importer_spec.rb +2 -2
- data/spec/gman_spec.rb +1 -1
- data/spec/spec_helper.rb +1 -1
- metadata +35 -26
- data/.ruby-version +0 -1
- data/.travis.yml +0 -4
- data/script/vendor-municipal-de +0 -23
- data/script/vendor-nl +0 -21
- data/script/vendor-se +0 -21
data/gman.gemspec
CHANGED
@@ -18,17 +18,16 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.licenses = ['MIT']
|
19
19
|
|
20
20
|
s.files = `git ls-files`.split("\n")
|
21
|
-
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
22
21
|
s.executables = `git ls-files -- bin/*`.split("\n").map do |f|
|
23
22
|
File.basename(f)
|
24
23
|
end
|
25
24
|
|
26
25
|
s.require_paths = ['lib']
|
27
|
-
s.required_ruby_version = '
|
26
|
+
s.required_ruby_version = '>= 2.5', '< 4.0'
|
28
27
|
|
29
28
|
s.add_dependency('colored', '~> 1.2')
|
30
29
|
s.add_dependency('iso_country_codes', '~> 0.6')
|
31
|
-
s.add_dependency('naughty_or_nice', '
|
30
|
+
s.add_dependency('naughty_or_nice', '>= 2.1.1')
|
32
31
|
s.add_dependency('public_suffix', '>= 3.0')
|
33
32
|
|
34
33
|
s.add_development_dependency('addressable', '~> 2.3')
|
@@ -39,6 +38,8 @@ Gem::Specification.new do |s|
|
|
39
38
|
s.add_development_dependency('rubocop', '~> 1.0')
|
40
39
|
s.add_development_dependency('rubocop-performance', '~> 1.5')
|
41
40
|
s.add_development_dependency('rubocop-rspec', '~> 2.0')
|
42
|
-
s.add_development_dependency('ruby-prof', '~>
|
41
|
+
s.add_development_dependency('ruby-prof', '~> 1.4')
|
42
|
+
s.add_development_dependency('ruby-progressbar', '~> 1.10')
|
43
43
|
s.add_development_dependency('swot', '~> 1.0')
|
44
|
+
s.metadata['rubygems_mfa_required'] = 'true'
|
44
45
|
end
|
data/lib/gman/identifier.rb
CHANGED
@@ -26,7 +26,7 @@ class Gman
|
|
26
26
|
def_hash_delegator :dotgov_listing, :Agency
|
27
27
|
def_hash_delegator :dotgov_listing, :Organization
|
28
28
|
def_hash_delegator :dotgov_listing, :City
|
29
|
-
def_hash_delegator :dotgov_listing, :
|
29
|
+
def_hash_delegator :dotgov_listing, :'Domain Type'
|
30
30
|
private :domain_type
|
31
31
|
|
32
32
|
def type
|
@@ -60,7 +60,7 @@ class Gman
|
|
60
60
|
def federal?
|
61
61
|
return false unless dotgov_listing
|
62
62
|
|
63
|
-
domain_type =~ /^Federal
|
63
|
+
domain_type =~ /^Federal/i
|
64
64
|
end
|
65
65
|
|
66
66
|
def city?
|
@@ -87,7 +87,7 @@ class Gman
|
|
87
87
|
if matches
|
88
88
|
matches[1] == 'state'
|
89
89
|
elsif dotgov_listing
|
90
|
-
domain_type == 'State/Local Govt'
|
90
|
+
domain_type == 'State/Local Govt' || domain_type == 'State'
|
91
91
|
else
|
92
92
|
false
|
93
93
|
end
|
@@ -108,14 +108,14 @@ class Gman
|
|
108
108
|
private
|
109
109
|
|
110
110
|
def list_category
|
111
|
-
@list_category
|
112
|
-
match = Gman.list.public_suffix_list.find(domain.to_s)
|
113
|
-
return unless match
|
111
|
+
return @list_category if defined?(@list_category)
|
114
112
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
113
|
+
match = Gman.list.public_suffix_list.find(domain.to_s)
|
114
|
+
return @list_category = nil unless match
|
115
|
+
|
116
|
+
regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.value)}\n}im
|
117
|
+
matches = Gman.list.contents.match(regex)
|
118
|
+
@list_category = matches ? matches[1] : nil
|
119
119
|
end
|
120
120
|
|
121
121
|
def matches
|
data/lib/gman/version.rb
CHANGED
data/lib/gman.rb
CHANGED
@@ -43,12 +43,11 @@ class Gman
|
|
43
43
|
#
|
44
44
|
# Returns boolean true if a government domain
|
45
45
|
def valid?
|
46
|
-
@valid
|
47
|
-
return false unless valid_domain?
|
48
|
-
return false if academic?
|
46
|
+
return @valid if defined?(@valid)
|
49
47
|
|
50
|
-
|
51
|
-
|
48
|
+
@valid = false unless valid_domain?
|
49
|
+
@valid = false if academic?
|
50
|
+
@valid ||= locality? || public_suffix_valid?
|
52
51
|
end
|
53
52
|
|
54
53
|
def locality?
|
data/script/profile
CHANGED
@@ -8,7 +8,7 @@ require './lib/gman'
|
|
8
8
|
# without pre-loading the Gman list for an accurate benchmark
|
9
9
|
count = (ARGV[0] || 100).to_i
|
10
10
|
domains = File.readlines('./config/domains.txt')
|
11
|
-
domains = domains.
|
11
|
+
domains = domains.grep(/^[a-z0-9]/i)
|
12
12
|
domains = domains.sample(count)
|
13
13
|
|
14
14
|
RubyProf.start
|
data/script/prune
CHANGED
@@ -10,7 +10,7 @@ require_relative '../lib/gman/domain_list'
|
|
10
10
|
domains = ARGV
|
11
11
|
domains = domains.clone.map { |d| d.delete ',' }
|
12
12
|
|
13
|
-
list = File.
|
13
|
+
list = File.read('./config/domains.txt')
|
14
14
|
puts "Starting list: #{Gman::DomainList.current.count} domains"
|
15
15
|
|
16
16
|
domains.each do |domain|
|
data/script/reconcile-us
CHANGED
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# ! /usr/bin/env ruby
|
4
|
+
# frozen_string_literal: true
|
5
|
+
|
6
|
+
#
|
7
|
+
# Add one or more domains to a given group, running the standard import checks
|
8
|
+
#
|
9
|
+
# Usage: script/add [GROUP] [DOMAIN(S)]
|
10
|
+
|
11
|
+
require './lib/gman/importer'
|
12
|
+
require 'parallel'
|
13
|
+
|
14
|
+
importer = Gman::Importer.new({})
|
15
|
+
options = { skip_dupe: true, skip_resolve: false }
|
16
|
+
list_path = File.expand_path '../config/domains.txt', __dir__
|
17
|
+
|
18
|
+
importer.logger.info "Starting list: #{Gman::DomainList.current.count} domains"
|
19
|
+
|
20
|
+
Gman.list.to_h.values.shuffle.each do |domains|
|
21
|
+
# next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
|
22
|
+
|
23
|
+
Parallel.each(domains, progress: "Validating") do |domain|
|
24
|
+
next if domain.start_with?("!")
|
25
|
+
next if importer.valid_domain?(domain, options)
|
26
|
+
|
27
|
+
importer.logger.warn "#{domain} is not valid, removing from list"
|
28
|
+
list = File.read(list_path)
|
29
|
+
list.gsub!(/^#{Regexp.escape(domain)}$\n/, '')
|
30
|
+
File.write list_path, list
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
importer.logger.info "Ending list: #{Gman::DomainList.current.count} domains"
|
data/script/vendor
CHANGED
data/script/vendor-gov-list
CHANGED
@@ -3,15 +3,6 @@
|
|
3
3
|
# Vendors the full list of US .gov domains from https://github.com/GSA/data
|
4
4
|
# Usage: script/vendor-gov-list
|
5
5
|
|
6
|
-
# Set up
|
7
|
-
mkdir tmp
|
8
|
-
rm -Rf tmp/gsa-data
|
9
|
-
|
10
6
|
# Vendor the last file in the dotgov-domains folder that ends in `-full.csv`
|
11
|
-
|
12
|
-
pattern="tmp/gsa-data/dotgov-domains/*-full.csv"
|
13
|
-
files=( $pattern )
|
14
|
-
cp -f "${files[@]:(-1)}" config/vendor/dotgovs.csv
|
7
|
+
wget https://raw.githubusercontent.com/cisagov/dotgov-data/main/current-full.csv -O ./config/vendor/dotgovs.csv
|
15
8
|
|
16
|
-
# Clean up
|
17
|
-
rm -Rf tmp/gsa-data
|
data/script/vendor-us
CHANGED
@@ -14,25 +14,29 @@
|
|
14
14
|
|
15
15
|
require './lib/gman'
|
16
16
|
require 'open-uri'
|
17
|
+
require 'csv'
|
17
18
|
|
19
|
+
path = File.expand_path('./vendor-us-tmp.csv')
|
18
20
|
blacklist = %w[usagovQUASI usagovFEDgov]
|
19
|
-
source = 'https://raw.githubusercontent.com/GSA/govt-urls/
|
21
|
+
source = 'https://raw.githubusercontent.com/GSA/govt-urls/main/1_govt_urls_full.csv'
|
22
|
+
domains = {}
|
20
23
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
data =
|
24
|
+
begin
|
25
|
+
raw = URI.open(source).read
|
26
|
+
File.write(path, raw)
|
27
|
+
data = CSV.table(path)
|
25
28
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
group
|
31
|
-
domains[group]
|
32
|
-
|
33
|
-
domains[group].push row.sub("\.\t", '').strip
|
29
|
+
data.each do |domain|
|
30
|
+
next if domain[:type_of_government] == 'Quasigovernmental'
|
31
|
+
|
32
|
+
group = "US #{domain[:type_of_government]}"
|
33
|
+
group += " (#{domain[:state]})" if domain[:type_of_government] != 'Federal' && domain[:state]
|
34
|
+
domains[group] ||= []
|
35
|
+
domains[group] << domain[:domain_name]
|
34
36
|
end
|
35
|
-
end
|
36
37
|
|
37
|
-
domains.reject! { |g, _| blacklist.include?(g) }
|
38
|
-
Gman::Importer.new(domains).import
|
38
|
+
domains.reject! { |g, _| blacklist.include?(g) }
|
39
|
+
Gman::Importer.new(domains).import
|
40
|
+
ensure
|
41
|
+
File.delete(path)
|
42
|
+
end
|
@@ -69,7 +69,7 @@ RSpec.describe Gman::DomainList do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
it 'outputs public suffix format' do
|
72
|
-
expect(subject.to_s).to match("// Canada federal\ncanada
|
72
|
+
expect(subject.to_s).to match("// Canada federal\ncanada.ca\n")
|
73
73
|
end
|
74
74
|
|
75
75
|
it "finds a domain's parent" do
|
@@ -80,7 +80,7 @@ RSpec.describe Gman::DomainList do
|
|
80
80
|
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
81
81
|
|
82
82
|
before do
|
83
|
-
subject.instance_variable_set(
|
83
|
+
subject.instance_variable_set(:@path, stubbed_list_path)
|
84
84
|
end
|
85
85
|
|
86
86
|
context 'with list data stubbed' do
|
@@ -137,7 +137,7 @@ RSpec.describe 'Gman identifier' do
|
|
137
137
|
end
|
138
138
|
|
139
139
|
context 'a county .gov' do
|
140
|
-
let(:domain) { '
|
140
|
+
let(:domain) { '211DUPAGE.GOV' }
|
141
141
|
|
142
142
|
it "knows it's a county" do
|
143
143
|
expect(subject).to be_a_county
|
@@ -161,11 +161,11 @@ RSpec.describe 'Gman identifier' do
|
|
161
161
|
end
|
162
162
|
|
163
163
|
it 'knows the state' do
|
164
|
-
expect(subject.state).to eql('
|
164
|
+
expect(subject.state).to eql('IL')
|
165
165
|
end
|
166
166
|
|
167
167
|
it 'knows the city' do
|
168
|
-
expect(subject.city).to eql('
|
168
|
+
expect(subject.city).to eql('Wheaton')
|
169
169
|
end
|
170
170
|
end
|
171
171
|
|
@@ -203,8 +203,8 @@ RSpec.describe 'Gman identifier' do
|
|
203
203
|
context "determining a domain's type" do
|
204
204
|
{
|
205
205
|
unknown: 'cityofperu.org',
|
206
|
-
|
207
|
-
|
206
|
+
'Canada municipal': 'acme.ca',
|
207
|
+
'Canada federal': 'canada.ca'
|
208
208
|
}.each do |expected, domain|
|
209
209
|
context "Given the #{domain} domain" do
|
210
210
|
let(:domain) { domain }
|
data/spec/gman/importer_spec.rb
CHANGED
@@ -9,7 +9,7 @@ RSpec.describe Gman::Importer do
|
|
9
9
|
let(:domain_list) { subject.domain_list }
|
10
10
|
|
11
11
|
before do
|
12
|
-
subject.instance_variable_set
|
12
|
+
subject.instance_variable_set :@logger, logger
|
13
13
|
end
|
14
14
|
|
15
15
|
it 'inits the domain list' do
|
@@ -68,7 +68,7 @@ RSpec.describe Gman::Importer do
|
|
68
68
|
let(:stubbed_list) { Gman::DomainList.new(path: stubbed_list_path) }
|
69
69
|
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
70
70
|
|
71
|
-
before { subject.instance_variable_set
|
71
|
+
before { subject.instance_variable_set :@current, stubbed_list }
|
72
72
|
|
73
73
|
context 'writing' do
|
74
74
|
before { @current = subject.current.to_s }
|
data/spec/gman_spec.rb
CHANGED
@@ -18,7 +18,7 @@ RSpec.describe Gman do
|
|
18
18
|
|
19
19
|
context 'invalid domains' do
|
20
20
|
['foo.bar.com', 'bar@foo.biz', 'http://www.foo.biz',
|
21
|
-
'foo.uk', 'gov', 'foo@k12.champaign.il.us', 'foo@kii.gov.by',
|
21
|
+
'foo.uk', 'gov', 'foo@k12.champaign.il.us', # 'foo@kii.gov.by',
|
22
22
|
'foo', '', nil, ' ', 'foo.city.il.us', 'foo.ci.il.us',
|
23
23
|
'foo.zx.us', 'foo@mail.gov.ua', 'foo@gwu.edu'].each do |domain|
|
24
24
|
subject { described_class.new(domain) }
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 7.0.
|
4
|
+
version: 7.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-12-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colored
|
@@ -42,14 +42,14 @@ dependencies:
|
|
42
42
|
name: naughty_or_nice
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: 2.1.1
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 2.1.1
|
55
55
|
- !ruby/object:Gem::Dependency
|
@@ -184,14 +184,28 @@ dependencies:
|
|
184
184
|
requirements:
|
185
185
|
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: '
|
187
|
+
version: '1.4'
|
188
|
+
type: :development
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '1.4'
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: ruby-progressbar
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - "~>"
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '1.10'
|
188
202
|
type: :development
|
189
203
|
prerelease: false
|
190
204
|
version_requirements: !ruby/object:Gem::Requirement
|
191
205
|
requirements:
|
192
206
|
- - "~>"
|
193
207
|
- !ruby/object:Gem::Version
|
194
|
-
version: '
|
208
|
+
version: '1.10'
|
195
209
|
- !ruby/object:Gem::Dependency
|
196
210
|
name: swot
|
197
211
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,17 +234,21 @@ files:
|
|
220
234
|
- ".github/ISSUE_TEMPLATE/bug_report.md"
|
221
235
|
- ".github/ISSUE_TEMPLATE/feature_request.md"
|
222
236
|
- ".github/config.yml"
|
237
|
+
- ".github/dependabot.yml"
|
223
238
|
- ".github/funding.yml"
|
224
239
|
- ".github/no-response.yml"
|
225
240
|
- ".github/release-drafter.yml"
|
226
241
|
- ".github/settings.yml"
|
227
242
|
- ".github/stale.yml"
|
243
|
+
- ".github/workflows/ci.yml"
|
244
|
+
- ".github/workflows/clean.yml"
|
245
|
+
- ".github/workflows/codeql-analysis.yml"
|
246
|
+
- ".github/workflows/validate.yml"
|
247
|
+
- ".github/workflows/vendor.yml"
|
228
248
|
- ".gitignore"
|
229
249
|
- ".rspec"
|
230
250
|
- ".rubocop.yml"
|
231
251
|
- ".rubocop_todo.yml"
|
232
|
-
- ".ruby-version"
|
233
|
-
- ".travis.yml"
|
234
252
|
- Gemfile
|
235
253
|
- LICENSE
|
236
254
|
- bin/gman
|
@@ -261,13 +279,11 @@ files:
|
|
261
279
|
- script/prune
|
262
280
|
- script/reconcile-us
|
263
281
|
- script/release
|
282
|
+
- script/validate-domains
|
264
283
|
- script/vendor
|
265
284
|
- script/vendor-federal-de
|
266
285
|
- script/vendor-gov-list
|
267
|
-
- script/vendor-municipal-de
|
268
|
-
- script/vendor-nl
|
269
286
|
- script/vendor-public-suffix
|
270
|
-
- script/vendor-se
|
271
287
|
- script/vendor-swot
|
272
288
|
- script/vendor-us
|
273
289
|
- spec/fixtures/domains.txt
|
@@ -284,35 +300,28 @@ files:
|
|
284
300
|
homepage: https://github.com/benbalter/gman
|
285
301
|
licenses:
|
286
302
|
- MIT
|
287
|
-
metadata:
|
303
|
+
metadata:
|
304
|
+
rubygems_mfa_required: 'true'
|
288
305
|
post_install_message:
|
289
306
|
rdoc_options: []
|
290
307
|
require_paths:
|
291
308
|
- lib
|
292
309
|
required_ruby_version: !ruby/object:Gem::Requirement
|
293
310
|
requirements:
|
294
|
-
- - "
|
311
|
+
- - ">="
|
295
312
|
- !ruby/object:Gem::Version
|
296
313
|
version: '2.5'
|
314
|
+
- - "<"
|
315
|
+
- !ruby/object:Gem::Version
|
316
|
+
version: '4.0'
|
297
317
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
298
318
|
requirements:
|
299
319
|
- - ">="
|
300
320
|
- !ruby/object:Gem::Version
|
301
321
|
version: '0'
|
302
322
|
requirements: []
|
303
|
-
rubygems_version: 3.
|
323
|
+
rubygems_version: 3.2.33
|
304
324
|
signing_key:
|
305
325
|
specification_version: 4
|
306
326
|
summary: Check if a given domain or email address belong to a governemnt entity
|
307
|
-
test_files:
|
308
|
-
- spec/fixtures/domains.txt
|
309
|
-
- spec/fixtures/obama.txt
|
310
|
-
- spec/gman/bin_spec.rb
|
311
|
-
- spec/gman/country_code_spec.rb
|
312
|
-
- spec/gman/domain_list_spec.rb
|
313
|
-
- spec/gman/domains_spec.rb
|
314
|
-
- spec/gman/identifier_spec.rb
|
315
|
-
- spec/gman/importer_spec.rb
|
316
|
-
- spec/gman/locality_spec.rb
|
317
|
-
- spec/gman_spec.rb
|
318
|
-
- spec/spec_helper.rb
|
327
|
+
test_files: []
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.6.6
|
data/.travis.yml
DELETED
data/script/vendor-municipal-de
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
#! /usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
require 'csv'
|
5
|
-
require 'open-uri'
|
6
|
-
require './lib/gman'
|
7
|
-
|
8
|
-
url = 'http://www.mik.nrw.de/nc/themen-aufgaben/kommunales/kommunale-adressen.html?tx_szkommunaldb_pi1%5Bexport%5D=csv'
|
9
|
-
|
10
|
-
csv = URI.open(url).read.force_encoding('iso-8859-1').encode('UTF-8')
|
11
|
-
|
12
|
-
# For some reason, the header row is actually the last row
|
13
|
-
# Pop the last line off the file and prepend it at the begining
|
14
|
-
# So that when we pass it to CSV it detects the headers properly
|
15
|
-
lines = csv.split("\n")
|
16
|
-
lines.unshift lines.pop
|
17
|
-
csv = lines.join("\n")
|
18
|
-
|
19
|
-
# Load municipal domains
|
20
|
-
data = CSV.parse(csv, headers: true, col_sep: ';')
|
21
|
-
domains = data.map { |row| row['Internet'] }
|
22
|
-
|
23
|
-
Gman::Importer.new('German Municipalities' => domains).import
|
data/script/vendor-nl
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
#! /usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
# See https://github.com/github/government.github.com/pull/367#issuecomment-102108763
|
5
|
-
|
6
|
-
require 'fileutils'
|
7
|
-
require './lib/gman'
|
8
|
-
|
9
|
-
FileUtils.rm_rf('almanak.overheid.nl')
|
10
|
-
commands = [
|
11
|
-
"wget -q -r -nc -np https://almanak.overheid.nl/
|
12
|
-
grep @ -rI almanak.overheid.nl/",
|
13
|
-
'cut -f 2 -d @',
|
14
|
-
"cut -f 1 -d '\"'",
|
15
|
-
'grep \\.nl$',
|
16
|
-
'sort',
|
17
|
-
'uniq'
|
18
|
-
]
|
19
|
-
domains = system commands.join('|')
|
20
|
-
|
21
|
-
Gman::Importer.new('Netherlands' => domains.split("\n")).import
|
data/script/vendor-se
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
#! /usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
require 'mechanize'
|
5
|
-
require 'csv'
|
6
|
-
require './lib/gman'
|
7
|
-
|
8
|
-
url = 'http://www.myndighetsregistret.scb.se/Myndighet.aspx'
|
9
|
-
agent = Mechanize.new
|
10
|
-
page = agent.get(url)
|
11
|
-
form = page.forms.first
|
12
|
-
form.radiobuttons.find { |r| r.value = 'Textfil' }.check
|
13
|
-
submit_button = form.buttons.find { |b| b.type == 'submit' }
|
14
|
-
response = agent.submit(form, submit_button)
|
15
|
-
|
16
|
-
rows = CSV.parse(response.content, headers: true, col_sep: "\t")
|
17
|
-
domains = rows.map do |row|
|
18
|
-
row['Webbadress'] unless /UNIVERSITET/.match?(row['Namn'])
|
19
|
-
end
|
20
|
-
|
21
|
-
Gman::Importer.new('Swedish Administrative Authorities' => domains).import
|