gman 7.0.5 → 7.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +10 -0
- data/.github/workflows/ci.yml +23 -0
- data/.github/workflows/clean.yml +31 -0
- data/.github/workflows/codeql-analysis.yml +70 -0
- data/.github/workflows/validate.yml +30 -0
- data/.github/workflows/vendor.yml +29 -0
- data/.rubocop.yml +1 -5
- data/config/domains.txt +507 -10
- data/config/vendor/academic.txt +0 -1
- data/config/vendor/dotgovs.csv +8570 -5786
- data/gman.gemspec +4 -3
- data/lib/gman/identifier.rb +10 -10
- data/lib/gman/version.rb +1 -1
- data/lib/gman.rb +4 -5
- data/script/profile +1 -1
- data/script/prune +1 -1
- data/script/reconcile-us +1 -1
- data/script/validate-domains +34 -0
- data/script/vendor +1 -1
- data/script/vendor-gov-list +1 -10
- data/script/vendor-us +20 -16
- data/spec/gman/domain_list_spec.rb +2 -2
- data/spec/gman/identifier_spec.rb +5 -5
- data/spec/gman/importer_spec.rb +2 -2
- data/spec/gman_spec.rb +1 -1
- data/spec/spec_helper.rb +1 -1
- metadata +33 -24
- data/.ruby-version +0 -1
- data/.travis.yml +0 -4
- data/script/vendor-municipal-de +0 -23
- data/script/vendor-nl +0 -21
- data/script/vendor-se +0 -21
data/gman.gemspec
CHANGED
@@ -18,13 +18,12 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.licenses = ['MIT']
|
19
19
|
|
20
20
|
s.files = `git ls-files`.split("\n")
|
21
|
-
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
22
21
|
s.executables = `git ls-files -- bin/*`.split("\n").map do |f|
|
23
22
|
File.basename(f)
|
24
23
|
end
|
25
24
|
|
26
25
|
s.require_paths = ['lib']
|
27
|
-
s.required_ruby_version = '
|
26
|
+
s.required_ruby_version = '>= 2.5', '< 4.0'
|
28
27
|
|
29
28
|
s.add_dependency('colored', '~> 1.2')
|
30
29
|
s.add_dependency('iso_country_codes', '~> 0.6')
|
@@ -39,6 +38,8 @@ Gem::Specification.new do |s|
|
|
39
38
|
s.add_development_dependency('rubocop', '~> 1.0')
|
40
39
|
s.add_development_dependency('rubocop-performance', '~> 1.5')
|
41
40
|
s.add_development_dependency('rubocop-rspec', '~> 2.0')
|
42
|
-
s.add_development_dependency('ruby-prof', '~>
|
41
|
+
s.add_development_dependency('ruby-prof', '~> 1.4')
|
42
|
+
s.add_development_dependency('ruby-progressbar', '~> 1.10')
|
43
43
|
s.add_development_dependency('swot', '~> 1.0')
|
44
|
+
s.metadata['rubygems_mfa_required'] = 'true'
|
44
45
|
end
|
data/lib/gman/identifier.rb
CHANGED
@@ -26,7 +26,7 @@ class Gman
|
|
26
26
|
def_hash_delegator :dotgov_listing, :Agency
|
27
27
|
def_hash_delegator :dotgov_listing, :Organization
|
28
28
|
def_hash_delegator :dotgov_listing, :City
|
29
|
-
def_hash_delegator :dotgov_listing, :
|
29
|
+
def_hash_delegator :dotgov_listing, :'Domain Type'
|
30
30
|
private :domain_type
|
31
31
|
|
32
32
|
def type
|
@@ -60,7 +60,7 @@ class Gman
|
|
60
60
|
def federal?
|
61
61
|
return false unless dotgov_listing
|
62
62
|
|
63
|
-
domain_type =~ /^Federal
|
63
|
+
domain_type =~ /^Federal/i
|
64
64
|
end
|
65
65
|
|
66
66
|
def city?
|
@@ -87,7 +87,7 @@ class Gman
|
|
87
87
|
if matches
|
88
88
|
matches[1] == 'state'
|
89
89
|
elsif dotgov_listing
|
90
|
-
domain_type == 'State/Local Govt'
|
90
|
+
domain_type == 'State/Local Govt' || domain_type == 'State'
|
91
91
|
else
|
92
92
|
false
|
93
93
|
end
|
@@ -108,14 +108,14 @@ class Gman
|
|
108
108
|
private
|
109
109
|
|
110
110
|
def list_category
|
111
|
-
@list_category
|
112
|
-
match = Gman.list.public_suffix_list.find(domain.to_s)
|
113
|
-
return unless match
|
111
|
+
return @list_category if defined?(@list_category)
|
114
112
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
113
|
+
match = Gman.list.public_suffix_list.find(domain.to_s)
|
114
|
+
return @list_category = nil unless match
|
115
|
+
|
116
|
+
regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.value)}\n}im
|
117
|
+
matches = Gman.list.contents.match(regex)
|
118
|
+
@list_category = matches ? matches[1] : nil
|
119
119
|
end
|
120
120
|
|
121
121
|
def matches
|
data/lib/gman/version.rb
CHANGED
data/lib/gman.rb
CHANGED
@@ -43,12 +43,11 @@ class Gman
|
|
43
43
|
#
|
44
44
|
# Returns boolean true if a government domain
|
45
45
|
def valid?
|
46
|
-
@valid
|
47
|
-
return false unless valid_domain?
|
48
|
-
return false if academic?
|
46
|
+
return @valid if defined?(@valid)
|
49
47
|
|
50
|
-
|
51
|
-
|
48
|
+
@valid = false unless valid_domain?
|
49
|
+
@valid = false if academic?
|
50
|
+
@valid ||= locality? || public_suffix_valid?
|
52
51
|
end
|
53
52
|
|
54
53
|
def locality?
|
data/script/profile
CHANGED
@@ -8,7 +8,7 @@ require './lib/gman'
|
|
8
8
|
# without pre-loading the Gman list for an accurate benchmark
|
9
9
|
count = (ARGV[0] || 100).to_i
|
10
10
|
domains = File.readlines('./config/domains.txt')
|
11
|
-
domains = domains.
|
11
|
+
domains = domains.grep(/^[a-z0-9]/i)
|
12
12
|
domains = domains.sample(count)
|
13
13
|
|
14
14
|
RubyProf.start
|
data/script/prune
CHANGED
@@ -10,7 +10,7 @@ require_relative '../lib/gman/domain_list'
|
|
10
10
|
domains = ARGV
|
11
11
|
domains = domains.clone.map { |d| d.delete ',' }
|
12
12
|
|
13
|
-
list = File.
|
13
|
+
list = File.read('./config/domains.txt')
|
14
14
|
puts "Starting list: #{Gman::DomainList.current.count} domains"
|
15
15
|
|
16
16
|
domains.each do |domain|
|
data/script/reconcile-us
CHANGED
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# ! /usr/bin/env ruby
|
4
|
+
# frozen_string_literal: true
|
5
|
+
|
6
|
+
#
|
7
|
+
# Add one or more domains to a given group, running the standard import checks
|
8
|
+
#
|
9
|
+
# Usage: script/add [GROUP] [DOMAIN(S)]
|
10
|
+
|
11
|
+
require './lib/gman/importer'
|
12
|
+
require 'parallel'
|
13
|
+
|
14
|
+
importer = Gman::Importer.new({})
|
15
|
+
options = { skip_dupe: true, skip_resolve: false }
|
16
|
+
list_path = File.expand_path '../config/domains.txt', __dir__
|
17
|
+
|
18
|
+
importer.logger.info "Starting list: #{Gman::DomainList.current.count} domains"
|
19
|
+
|
20
|
+
Gman.list.to_h.values.shuffle.each do |domains|
|
21
|
+
# next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
|
22
|
+
|
23
|
+
Parallel.each(domains, progress: "Validating") do |domain|
|
24
|
+
next if domain.start_with?("!")
|
25
|
+
next if importer.valid_domain?(domain, options)
|
26
|
+
|
27
|
+
importer.logger.warn "#{domain} is not valid, removing from list"
|
28
|
+
list = File.read(list_path)
|
29
|
+
list.gsub!(/^#{Regexp.escape(domain)}$\n/, '')
|
30
|
+
File.write list_path, list
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
importer.logger.info "Ending list: #{Gman::DomainList.current.count} domains"
|
data/script/vendor
CHANGED
data/script/vendor-gov-list
CHANGED
@@ -3,15 +3,6 @@
|
|
3
3
|
# Vendors the full list of US .gov domains from https://github.com/GSA/data
|
4
4
|
# Usage: script/vendor-gov-list
|
5
5
|
|
6
|
-
# Set up
|
7
|
-
mkdir tmp
|
8
|
-
rm -Rf tmp/gsa-data
|
9
|
-
|
10
6
|
# Vendor the last file in the dotgov-domains folder that ends in `-full.csv`
|
11
|
-
|
12
|
-
pattern="tmp/gsa-data/dotgov-domains/*-full.csv"
|
13
|
-
files=( $pattern )
|
14
|
-
cp -f "${files[@]:(-1)}" config/vendor/dotgovs.csv
|
7
|
+
wget https://raw.githubusercontent.com/cisagov/dotgov-data/main/current-full.csv -O ./config/vendor/dotgovs.csv
|
15
8
|
|
16
|
-
# Clean up
|
17
|
-
rm -Rf tmp/gsa-data
|
data/script/vendor-us
CHANGED
@@ -14,25 +14,29 @@
|
|
14
14
|
|
15
15
|
require './lib/gman'
|
16
16
|
require 'open-uri'
|
17
|
+
require 'csv'
|
17
18
|
|
19
|
+
path = File.expand_path('./vendor-us-tmp.csv')
|
18
20
|
blacklist = %w[usagovQUASI usagovFEDgov]
|
19
|
-
source = 'https://raw.githubusercontent.com/GSA/govt-urls/
|
21
|
+
source = 'https://raw.githubusercontent.com/GSA/govt-urls/main/1_govt_urls_full.csv'
|
22
|
+
domains = {}
|
20
23
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
data =
|
24
|
+
begin
|
25
|
+
raw = URI.open(source).read
|
26
|
+
File.write(path, raw)
|
27
|
+
data = CSV.table(path)
|
25
28
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
group
|
31
|
-
domains[group]
|
32
|
-
|
33
|
-
domains[group].push row.sub("\.\t", '').strip
|
29
|
+
data.each do |domain|
|
30
|
+
next if domain[:type_of_government] == 'Quasigovernmental'
|
31
|
+
|
32
|
+
group = "US #{domain[:type_of_government]}"
|
33
|
+
group += " (#{domain[:state]})" if domain[:type_of_government] != 'Federal' && domain[:state]
|
34
|
+
domains[group] ||= []
|
35
|
+
domains[group] << domain[:domain_name]
|
34
36
|
end
|
35
|
-
end
|
36
37
|
|
37
|
-
domains.reject! { |g, _| blacklist.include?(g) }
|
38
|
-
Gman::Importer.new(domains).import
|
38
|
+
domains.reject! { |g, _| blacklist.include?(g) }
|
39
|
+
Gman::Importer.new(domains).import
|
40
|
+
ensure
|
41
|
+
File.delete(path)
|
42
|
+
end
|
@@ -69,7 +69,7 @@ RSpec.describe Gman::DomainList do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
it 'outputs public suffix format' do
|
72
|
-
expect(subject.to_s).to match("// Canada federal\ncanada
|
72
|
+
expect(subject.to_s).to match("// Canada federal\ncanada.ca\n")
|
73
73
|
end
|
74
74
|
|
75
75
|
it "finds a domain's parent" do
|
@@ -80,7 +80,7 @@ RSpec.describe Gman::DomainList do
|
|
80
80
|
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
81
81
|
|
82
82
|
before do
|
83
|
-
subject.instance_variable_set(
|
83
|
+
subject.instance_variable_set(:@path, stubbed_list_path)
|
84
84
|
end
|
85
85
|
|
86
86
|
context 'with list data stubbed' do
|
@@ -137,7 +137,7 @@ RSpec.describe 'Gman identifier' do
|
|
137
137
|
end
|
138
138
|
|
139
139
|
context 'a county .gov' do
|
140
|
-
let(:domain) { '
|
140
|
+
let(:domain) { '211DUPAGE.GOV' }
|
141
141
|
|
142
142
|
it "knows it's a county" do
|
143
143
|
expect(subject).to be_a_county
|
@@ -161,11 +161,11 @@ RSpec.describe 'Gman identifier' do
|
|
161
161
|
end
|
162
162
|
|
163
163
|
it 'knows the state' do
|
164
|
-
expect(subject.state).to eql('
|
164
|
+
expect(subject.state).to eql('IL')
|
165
165
|
end
|
166
166
|
|
167
167
|
it 'knows the city' do
|
168
|
-
expect(subject.city).to eql('
|
168
|
+
expect(subject.city).to eql('Wheaton')
|
169
169
|
end
|
170
170
|
end
|
171
171
|
|
@@ -203,8 +203,8 @@ RSpec.describe 'Gman identifier' do
|
|
203
203
|
context "determining a domain's type" do
|
204
204
|
{
|
205
205
|
unknown: 'cityofperu.org',
|
206
|
-
|
207
|
-
|
206
|
+
'Canada municipal': 'acme.ca',
|
207
|
+
'Canada federal': 'canada.ca'
|
208
208
|
}.each do |expected, domain|
|
209
209
|
context "Given the #{domain} domain" do
|
210
210
|
let(:domain) { domain }
|
data/spec/gman/importer_spec.rb
CHANGED
@@ -9,7 +9,7 @@ RSpec.describe Gman::Importer do
|
|
9
9
|
let(:domain_list) { subject.domain_list }
|
10
10
|
|
11
11
|
before do
|
12
|
-
subject.instance_variable_set
|
12
|
+
subject.instance_variable_set :@logger, logger
|
13
13
|
end
|
14
14
|
|
15
15
|
it 'inits the domain list' do
|
@@ -68,7 +68,7 @@ RSpec.describe Gman::Importer do
|
|
68
68
|
let(:stubbed_list) { Gman::DomainList.new(path: stubbed_list_path) }
|
69
69
|
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
70
70
|
|
71
|
-
before { subject.instance_variable_set
|
71
|
+
before { subject.instance_variable_set :@current, stubbed_list }
|
72
72
|
|
73
73
|
context 'writing' do
|
74
74
|
before { @current = subject.current.to_s }
|
data/spec/gman_spec.rb
CHANGED
@@ -18,7 +18,7 @@ RSpec.describe Gman do
|
|
18
18
|
|
19
19
|
context 'invalid domains' do
|
20
20
|
['foo.bar.com', 'bar@foo.biz', 'http://www.foo.biz',
|
21
|
-
'foo.uk', 'gov', 'foo@k12.champaign.il.us', 'foo@kii.gov.by',
|
21
|
+
'foo.uk', 'gov', 'foo@k12.champaign.il.us', # 'foo@kii.gov.by',
|
22
22
|
'foo', '', nil, ' ', 'foo.city.il.us', 'foo.ci.il.us',
|
23
23
|
'foo.zx.us', 'foo@mail.gov.ua', 'foo@gwu.edu'].each do |domain|
|
24
24
|
subject { described_class.new(domain) }
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 7.0.
|
4
|
+
version: 7.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-12-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colored
|
@@ -184,14 +184,28 @@ dependencies:
|
|
184
184
|
requirements:
|
185
185
|
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: '
|
187
|
+
version: '1.4'
|
188
188
|
type: :development
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
192
|
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: '
|
194
|
+
version: '1.4'
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: ruby-progressbar
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - "~>"
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '1.10'
|
202
|
+
type: :development
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - "~>"
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: '1.10'
|
195
209
|
- !ruby/object:Gem::Dependency
|
196
210
|
name: swot
|
197
211
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,17 +234,21 @@ files:
|
|
220
234
|
- ".github/ISSUE_TEMPLATE/bug_report.md"
|
221
235
|
- ".github/ISSUE_TEMPLATE/feature_request.md"
|
222
236
|
- ".github/config.yml"
|
237
|
+
- ".github/dependabot.yml"
|
223
238
|
- ".github/funding.yml"
|
224
239
|
- ".github/no-response.yml"
|
225
240
|
- ".github/release-drafter.yml"
|
226
241
|
- ".github/settings.yml"
|
227
242
|
- ".github/stale.yml"
|
243
|
+
- ".github/workflows/ci.yml"
|
244
|
+
- ".github/workflows/clean.yml"
|
245
|
+
- ".github/workflows/codeql-analysis.yml"
|
246
|
+
- ".github/workflows/validate.yml"
|
247
|
+
- ".github/workflows/vendor.yml"
|
228
248
|
- ".gitignore"
|
229
249
|
- ".rspec"
|
230
250
|
- ".rubocop.yml"
|
231
251
|
- ".rubocop_todo.yml"
|
232
|
-
- ".ruby-version"
|
233
|
-
- ".travis.yml"
|
234
252
|
- Gemfile
|
235
253
|
- LICENSE
|
236
254
|
- bin/gman
|
@@ -261,13 +279,11 @@ files:
|
|
261
279
|
- script/prune
|
262
280
|
- script/reconcile-us
|
263
281
|
- script/release
|
282
|
+
- script/validate-domains
|
264
283
|
- script/vendor
|
265
284
|
- script/vendor-federal-de
|
266
285
|
- script/vendor-gov-list
|
267
|
-
- script/vendor-municipal-de
|
268
|
-
- script/vendor-nl
|
269
286
|
- script/vendor-public-suffix
|
270
|
-
- script/vendor-se
|
271
287
|
- script/vendor-swot
|
272
288
|
- script/vendor-us
|
273
289
|
- spec/fixtures/domains.txt
|
@@ -284,35 +300,28 @@ files:
|
|
284
300
|
homepage: https://github.com/benbalter/gman
|
285
301
|
licenses:
|
286
302
|
- MIT
|
287
|
-
metadata:
|
303
|
+
metadata:
|
304
|
+
rubygems_mfa_required: 'true'
|
288
305
|
post_install_message:
|
289
306
|
rdoc_options: []
|
290
307
|
require_paths:
|
291
308
|
- lib
|
292
309
|
required_ruby_version: !ruby/object:Gem::Requirement
|
293
310
|
requirements:
|
294
|
-
- - "
|
311
|
+
- - ">="
|
295
312
|
- !ruby/object:Gem::Version
|
296
313
|
version: '2.5'
|
314
|
+
- - "<"
|
315
|
+
- !ruby/object:Gem::Version
|
316
|
+
version: '4.0'
|
297
317
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
298
318
|
requirements:
|
299
319
|
- - ">="
|
300
320
|
- !ruby/object:Gem::Version
|
301
321
|
version: '0'
|
302
322
|
requirements: []
|
303
|
-
rubygems_version: 3.
|
323
|
+
rubygems_version: 3.2.33
|
304
324
|
signing_key:
|
305
325
|
specification_version: 4
|
306
326
|
summary: Check if a given domain or email address belong to a governemnt entity
|
307
|
-
test_files:
|
308
|
-
- spec/fixtures/domains.txt
|
309
|
-
- spec/fixtures/obama.txt
|
310
|
-
- spec/gman/bin_spec.rb
|
311
|
-
- spec/gman/country_code_spec.rb
|
312
|
-
- spec/gman/domain_list_spec.rb
|
313
|
-
- spec/gman/domains_spec.rb
|
314
|
-
- spec/gman/identifier_spec.rb
|
315
|
-
- spec/gman/importer_spec.rb
|
316
|
-
- spec/gman/locality_spec.rb
|
317
|
-
- spec/gman_spec.rb
|
318
|
-
- spec/spec_helper.rb
|
327
|
+
test_files: []
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.6.6
|
data/.travis.yml
DELETED
data/script/vendor-municipal-de
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
#! /usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
require 'csv'
|
5
|
-
require 'open-uri'
|
6
|
-
require './lib/gman'
|
7
|
-
|
8
|
-
url = 'http://www.mik.nrw.de/nc/themen-aufgaben/kommunales/kommunale-adressen.html?tx_szkommunaldb_pi1%5Bexport%5D=csv'
|
9
|
-
|
10
|
-
csv = URI.open(url).read.force_encoding('iso-8859-1').encode('UTF-8')
|
11
|
-
|
12
|
-
# For some reason, the header row is actually the last row
|
13
|
-
# Pop the last line off the file and prepend it at the begining
|
14
|
-
# So that when we pass it to CSV it detects the headers properly
|
15
|
-
lines = csv.split("\n")
|
16
|
-
lines.unshift lines.pop
|
17
|
-
csv = lines.join("\n")
|
18
|
-
|
19
|
-
# Load municipal domains
|
20
|
-
data = CSV.parse(csv, headers: true, col_sep: ';')
|
21
|
-
domains = data.map { |row| row['Internet'] }
|
22
|
-
|
23
|
-
Gman::Importer.new('German Municipalities' => domains).import
|
data/script/vendor-nl
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
#! /usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
# See https://github.com/github/government.github.com/pull/367#issuecomment-102108763
|
5
|
-
|
6
|
-
require 'fileutils'
|
7
|
-
require './lib/gman'
|
8
|
-
|
9
|
-
FileUtils.rm_rf('almanak.overheid.nl')
|
10
|
-
commands = [
|
11
|
-
"wget -q -r -nc -np https://almanak.overheid.nl/
|
12
|
-
grep @ -rI almanak.overheid.nl/",
|
13
|
-
'cut -f 2 -d @',
|
14
|
-
"cut -f 1 -d '\"'",
|
15
|
-
'grep \\.nl$',
|
16
|
-
'sort',
|
17
|
-
'uniq'
|
18
|
-
]
|
19
|
-
domains = system commands.join('|')
|
20
|
-
|
21
|
-
Gman::Importer.new('Netherlands' => domains.split("\n")).import
|
data/script/vendor-se
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
#! /usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
require 'mechanize'
|
5
|
-
require 'csv'
|
6
|
-
require './lib/gman'
|
7
|
-
|
8
|
-
url = 'http://www.myndighetsregistret.scb.se/Myndighet.aspx'
|
9
|
-
agent = Mechanize.new
|
10
|
-
page = agent.get(url)
|
11
|
-
form = page.forms.first
|
12
|
-
form.radiobuttons.find { |r| r.value = 'Textfil' }.check
|
13
|
-
submit_button = form.buttons.find { |b| b.type == 'submit' }
|
14
|
-
response = agent.submit(form, submit_button)
|
15
|
-
|
16
|
-
rows = CSV.parse(response.content, headers: true, col_sep: "\t")
|
17
|
-
domains = rows.map do |row|
|
18
|
-
row['Webbadress'] unless /UNIVERSITET/.match?(row['Namn'])
|
19
|
-
end
|
20
|
-
|
21
|
-
Gman::Importer.new('Swedish Administrative Authorities' => domains).import
|