gman 6.0.1 → 7.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +5 -5
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  5. data/.github/config.yml +23 -0
  6. data/.github/funding.yml +1 -0
  7. data/.github/no-response.yml +15 -0
  8. data/.github/release-drafter.yml +4 -0
  9. data/.github/settings.yml +33 -0
  10. data/.github/stale.yml +29 -0
  11. data/.gitignore +1 -0
  12. data/.rspec +2 -0
  13. data/.rubocop.yml +17 -5
  14. data/.rubocop_todo.yml +84 -0
  15. data/.ruby-version +1 -1
  16. data/Gemfile +2 -0
  17. data/bin/gman +6 -4
  18. data/bin/gman_filter +5 -7
  19. data/config/domains.txt +8446 -173
  20. data/config/vendor/academic.txt +8038 -0
  21. data/config/vendor/dotgovs.csv +5786 -5560
  22. data/docs/CODE_OF_CONDUCT.md +46 -0
  23. data/docs/CONTRIBUTING.md +92 -0
  24. data/{README.md → docs/README.md} +3 -3
  25. data/docs/SECURITY.md +3 -0
  26. data/docs/_config.yml +2 -0
  27. data/gman.gemspec +18 -17
  28. data/lib/gman.rb +25 -21
  29. data/lib/gman/country_codes.rb +17 -17
  30. data/lib/gman/domain_list.rb +123 -41
  31. data/lib/gman/identifier.rb +59 -21
  32. data/lib/gman/importer.rb +39 -40
  33. data/lib/gman/locality.rb +23 -21
  34. data/lib/gman/version.rb +3 -1
  35. data/script/add +2 -0
  36. data/script/alphabetize +2 -0
  37. data/script/cibuild +1 -1
  38. data/script/dedupe +2 -1
  39. data/script/profile +2 -1
  40. data/script/prune +5 -3
  41. data/script/reconcile-us +6 -3
  42. data/script/vendor +1 -1
  43. data/script/vendor-federal-de +3 -3
  44. data/script/vendor-municipal-de +3 -3
  45. data/script/vendor-nl +4 -1
  46. data/script/vendor-public-suffix +7 -6
  47. data/script/vendor-se +3 -3
  48. data/script/vendor-swot +43 -0
  49. data/script/vendor-us +8 -5
  50. data/spec/fixtures/domains.txt +4 -0
  51. data/{test → spec}/fixtures/obama.txt +0 -0
  52. data/spec/gman/bin_spec.rb +101 -0
  53. data/spec/gman/country_code_spec.rb +39 -0
  54. data/spec/gman/domain_list_spec.rb +110 -0
  55. data/spec/gman/domains_spec.rb +25 -0
  56. data/spec/gman/identifier_spec.rb +218 -0
  57. data/spec/gman/importer_spec.rb +236 -0
  58. data/spec/gman/locality_spec.rb +24 -0
  59. data/spec/gman_spec.rb +74 -0
  60. data/spec/spec_helper.rb +31 -0
  61. metadata +86 -73
  62. data/CONTRIBUTING.md +0 -22
  63. data/Rakefile +0 -22
  64. data/test/fixtures/domains.txt +0 -2
  65. data/test/helper.rb +0 -40
  66. data/test/test_gman.rb +0 -62
  67. data/test/test_gman_bin.rb +0 -75
  68. data/test/test_gman_country_codes.rb +0 -18
  69. data/test/test_gman_domains.rb +0 -33
  70. data/test/test_gman_filter.rb +0 -17
  71. data/test/test_gman_identifier.rb +0 -106
  72. data/test/test_gman_importer.rb +0 -250
  73. data/test/test_gman_locality.rb +0 -10
@@ -1,22 +0,0 @@
1
- # Contributing to Gman
2
-
3
- ## How to contribute
4
-
5
- 1. Fork the project
6
- 2. Create a descriptive branch
7
- 3. Make your change
8
- 4. Submit a pull request
9
-
10
- ## Code
11
-
12
- Open an issue, or submit a pull request
13
-
14
- ## Domains
15
-
16
- Domains live in `./config/domains.txt` as a list of TLDs and SLD+TLDs.
17
-
18
- Right now, the only valid government top level domains (TLDs), represent the US government and are `.gov`, and `.mil`.
19
-
20
- Secondary domains (e.g., `gov.uk`, or `mil.au`) detect non-US government entities.
21
-
22
- To add or remove a domain from the list of known government domains, simply edit the `domains.txt` file.
data/Rakefile DELETED
@@ -1,22 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts 'Run `bundle install` to install missing gems'
8
- exit e.status_code
9
- end
10
- require 'rake'
11
-
12
- require 'rake/testtask'
13
- Rake::TestTask.new(:test) do |test|
14
- test.libs << 'lib' << 'test'
15
- test.pattern = 'test/**/test_gman*.rb'
16
- test.verbose = true
17
- end
18
-
19
- desc 'Open console with gman loaded'
20
- task :console do
21
- exec 'irb -r ./lib/gman.rb'
22
- end
@@ -1,2 +0,0 @@
1
- // test
2
- gov
@@ -1,40 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- require 'minitest/autorun'
4
- require 'parallel'
5
- require 'open3'
6
-
7
- begin
8
- Bundler.setup(:default, :development)
9
- rescue Bundler::BundlerError => e
10
- $stderr.puts e.message
11
- $stderr.puts 'Run `bundle install` to install missing gems'
12
- exit e.status_code
13
- end
14
-
15
- require 'shoulda'
16
-
17
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
18
- $LOAD_PATH.unshift(File.dirname(__FILE__))
19
- require_relative '../lib/gman'
20
- require_relative '../lib/gman/domain_list'
21
- require_relative '../lib/gman/importer'
22
-
23
- def bin_path(cmd = 'gman')
24
- File.expand_path "../bin/#{cmd}", File.dirname(__FILE__)
25
- end
26
-
27
- def test_bin(*args)
28
- Open3.capture2e('bundle', 'exec', bin_path, *args)
29
- end
30
-
31
- def fixture_path(fixture)
32
- File.expand_path "./fixtures/#{fixture}", File.dirname(__FILE__)
33
- end
34
-
35
- def with_env(key, value)
36
- old_env = ENV[key]
37
- ENV[key] = value
38
- yield
39
- ENV[key] = old_env
40
- end
@@ -1,62 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- VALID = ['foo.gov',
4
- 'http://foo.mil',
5
- 'foo@bar.gc.ca',
6
- 'foo.gov.au',
7
- 'https://www.foo.gouv.fr',
8
- 'foo@ci.champaign.il.us',
9
- 'foo.bar.baz.gov.au',
10
- 'foo@bar.gov.uk',
11
- 'foo.gov',
12
- 'foo.fed.us',
13
- 'foo.state.il.us',
14
- 'state.il.us',
15
- 'foo@af.mil',
16
- 'foo.gov.in'
17
- ].freeze
18
-
19
- INVALID = ['foo.bar.com',
20
- 'bar@foo.biz',
21
- 'http://www.foo.biz',
22
- 'foo.uk',
23
- 'gov',
24
- 'foo@k12.champaign.il.us',
25
- 'foo@kii.gov.by',
26
- 'foo',
27
- '',
28
- nil,
29
- ' ',
30
- 'foo.city.il.us',
31
- 'foo.ci.il.us',
32
- 'foo.zx.us',
33
- 'foo@mail.gov.ua'
34
- ].freeze
35
-
36
- class TestGman < Minitest::Test
37
- VALID.each do |domain|
38
- should "recognize #{domain} as a government domain" do
39
- assert Gman.valid?(domain)
40
- end
41
- end
42
-
43
- INVALID.each do |domain|
44
- should "recognize #{domain} as a non-government domain" do
45
- refute Gman.valid?(domain)
46
- end
47
- end
48
-
49
- should 'not allow educational domains' do
50
- assert_equal false, Gman.valid?('foo@gwu.edu')
51
- end
52
-
53
- should 'returns the path to domains.txt' do
54
- assert_equal true, File.exist?(Gman.list_path)
55
- end
56
-
57
- should 'stub domains when asked' do
58
- with_env 'GMAN_STUB_DOMAINS', 'true' do
59
- assert_equal fixture_path('domains.txt'), Gman.list_path
60
- end
61
- end
62
- end
@@ -1,75 +0,0 @@
1
- require_relative 'helper'
2
-
3
- class TestGmanBin < Minitest::Test
4
- def setup
5
- @output, @status = test_bin('whitehouse.gov')
6
- end
7
-
8
- should 'parse the domain' do
9
- output, = test_bin('bar.gov')
10
- assert_match(/Domain : bar.gov/, output)
11
-
12
- output, = test_bin('foo@bar.gov')
13
- assert_match(/Domain : bar.gov/, output)
14
-
15
- output, = test_bin('http://bar.gov/foo')
16
- assert_match(/Domain : bar.gov/, output)
17
- end
18
-
19
- should 'err on invalid domains' do
20
- output, status = test_bin('foo.invalid')
21
- assert_equal 1, status.exitstatus
22
- assert_match(/Invalid domain/, output)
23
- end
24
-
25
- should 'err on non-government domains' do
26
- output, status = test_bin('github.com')
27
- assert_equal 1, status.exitstatus
28
- assert_match(/Not a government domain/, output)
29
- end
30
-
31
- should 'know the type' do
32
- assert_match(/federal/, @output)
33
- assert_equal 0, @status.exitstatus
34
- end
35
-
36
- should 'know the agency' do
37
- assert_match(/Executive Office of the President/, @output)
38
- assert_equal 0, @status.exitstatus
39
- end
40
-
41
- should 'know the country' do
42
- assert_match(/United States/, @output)
43
- assert_equal 0, @status.exitstatus
44
- end
45
-
46
- should 'know the city' do
47
- assert_match(/Washington/, @output)
48
- assert_equal 0, @status.exitstatus
49
- end
50
-
51
- should 'know the state' do
52
- assert_match(/DC/, @output)
53
- assert_equal 0, @status.exitstatus
54
- end
55
-
56
- should 'allow you to disable colorization' do
57
- output, = test_bin('whitehouse.gov', '--no-color')
58
- refute_match(/\e\[32m/, output)
59
- end
60
-
61
- should 'color by default' do
62
- assert_match(/\e\[32m/, @output)
63
- end
64
-
65
- should 'show help text' do
66
- output, = test_bin
67
- assert_match(/Usage/i, output)
68
-
69
- output, = test_bin('')
70
- assert_match(/Usage/i, output)
71
-
72
- output, = test_bin('--no-color')
73
- assert_match(/Usage/i, output)
74
- end
75
- end
@@ -1,18 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanCountryCodes < Minitest::Test
4
- should "determine a domain's country" do
5
- name = Gman.new('whitehouse.gov').country.name
6
- assert_equal 'United States of America', name
7
-
8
- name = Gman.new('foo.gov.uk').country.name
9
- assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
10
-
11
- assert_equal 'United States of America', Gman.new('army.mil').country.name
12
- assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
13
- end
14
-
15
- should 'not err out on an unknown country code' do
16
- assert_equal nil, Gman.new('foo.eu').country
17
- end
18
- end
@@ -1,33 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanDomains < Minitest::Test
4
- WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
5
-
6
- def resolve_domains?
7
- ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
8
- end
9
-
10
- should 'only contains valid domains' do
11
- importer = Gman::Importer.new({})
12
- if resolve_domains?
13
- importer.logger.info <<-MSG
14
- Validating that all domains resolve. This may take a while...
15
- MSG
16
- else
17
- importer.logger.info 'Skipping domain resolution.' \
18
- 'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
19
- 'to validate that domains resolve.'
20
- end
21
-
22
- invalid = []
23
- list = Gman::DomainList.current.list
24
- Parallel.each(list, in_threads: 2) do |group, domains|
25
- next if WHITELIST.include?(group)
26
- invalid.push domains.reject { |domain|
27
- options = { skip_dupe: true, skip_resolve: !resolve_domains? }
28
- importer.valid_domain?(domain, options)
29
- }
30
- end
31
- assert_equal [], invalid.flatten.reject(&:empty?)
32
- end
33
- end
@@ -1,17 +0,0 @@
1
- HERE = File.dirname(__FILE__)
2
- require File.join(HERE, 'helper')
3
-
4
- class TestGmanFilter < Minitest::Test
5
- txt_path = fixture_path 'obama.txt'
6
- exec_path = bin_path 'gman_filter'
7
-
8
- should 'remove non-gov/mil addresses' do
9
- output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
10
- expected = %w(
11
- mr.senator@obama.senate.gov
12
- president@whitehouse.gov
13
- commander.in.chief@us.army.mil
14
- ).join("\n") + "\n"
15
- assert_equal output, expected
16
- end
17
- end
@@ -1,106 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanIdentifier < Minitest::Test
4
- should 'Parse the dotgov list' do
5
- assert Gman.dotgov_list
6
- assert_equal CSV::Table, Gman.dotgov_list.class
7
- assert_equal CSV::Row, Gman.dotgov_list.first.class
8
- assert Gman.dotgov_list.first['Domain Name']
9
- end
10
-
11
- context 'locality domains' do
12
- should 'detect state domains' do
13
- domain = Gman.new('state.ak.us')
14
- assert domain.state?
15
-
16
- refute domain.dotgov?
17
- refute domain.city?
18
- refute domain.federal?
19
- refute domain.county?
20
-
21
- assert_equal :state, domain.type
22
- assert_equal 'AK', domain.state
23
- end
24
-
25
- should 'detect city domains' do
26
- domain = Gman.new('ci.champaign.il.us')
27
- assert domain.city?
28
-
29
- refute domain.dotgov?
30
- refute domain.state?
31
- refute domain.federal?
32
- refute domain.county?
33
-
34
- assert_equal :city, domain.type
35
- assert_equal 'IL', domain.state
36
- end
37
- end
38
-
39
- context 'dotgovs' do
40
- should 'detect federal dotgovs' do
41
- domain = Gman.new 'whitehouse.gov'
42
- assert domain.federal?
43
- assert domain.dotgov?
44
-
45
- refute domain.city?
46
- refute domain.state?
47
- refute domain.county?
48
-
49
- assert_equal :federal, domain.type
50
- assert_equal 'DC', domain.state
51
- assert_equal 'Washington', domain.city
52
- assert_equal 'Executive Office of the President', domain.agency
53
- end
54
-
55
- should 'detect state dotgovs' do
56
- domain = Gman.new 'illinois.gov'
57
- assert domain.state?
58
- assert domain.dotgov?
59
-
60
- refute domain.city?
61
- refute domain.federal?
62
- refute domain.county?
63
-
64
- assert_equal :state, domain.type
65
- assert_equal 'IL', domain.state
66
- assert_equal 'Springfield', domain.city
67
- end
68
-
69
- should 'detect county dotgovs' do
70
- domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
71
- assert domain.county?
72
- assert domain.dotgov?
73
-
74
- refute domain.city?
75
- refute domain.federal?
76
- refute domain.state?
77
-
78
- assert_equal :county, domain.type
79
- assert_equal 'PA', domain.state
80
- assert_equal 'Pittsburgh', domain.city
81
- end
82
-
83
- should 'detect the list category' do
84
- category = Gman.new('whitehouse.gov').send('list_category')
85
- assert_equal 'US Federal', category
86
- end
87
- end
88
-
89
- context 'non-dotgov domains' do
90
- should "determine a domain's group" do
91
- assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
92
- assert_equal :unknown, Gman.new('cityofperu.org').type
93
-
94
- assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
95
- assert_equal :"Canada municipal", Gman.new('acme.ca').type
96
-
97
- assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
98
- assert_equal :"Canada federal", Gman.new('canada.ca').type
99
- end
100
-
101
- should 'detect the state' do
102
- assert_equal 'OR', Gman.new('ashland.or.us').state
103
- refute Gman.new('canada.ca').state
104
- end
105
- end
106
- end
@@ -1,250 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGManImporter < Minitest::Test
4
- def setup
5
- @importer = Gman::Importer.new 'test' => ['example.com']
6
- @stdout = StringIO.new
7
- @importer.instance_variable_set '@logger', Logger.new(@stdout)
8
-
9
- with_env 'GMAN_STUB_DOMAINS', 'true' do
10
- @original_domain_list = File.open(Gman.list_path).read
11
- end
12
- end
13
-
14
- def teardown
15
- with_env 'GMAN_STUB_DOMAINS', 'true' do
16
- File.write Gman.list_path, @original_domain_list
17
- end
18
- end
19
-
20
- should 'init the domain list' do
21
- assert_equal Gman::DomainList, @importer.domains.class
22
- assert_equal 1, @importer.domains.domains.count
23
- assert_equal 'example.com', @importer.domains.domains.first
24
- end
25
-
26
- should 'init the logger' do
27
- assert_equal Logger, @importer.logger.class
28
- end
29
-
30
- should 'return the current domain list' do
31
- assert_equal Gman::DomainList, @importer.current.class
32
- end
33
-
34
- should 'return the resolver' do
35
- assert_equal Resolv::DNS, @importer.resolver.class
36
- end
37
-
38
- context 'domain rejection' do
39
- should 'return false for a rejected domain' do
40
- refute @importer.reject 'example.com', 'reasons'
41
- end
42
-
43
- should 'return the reason when asked' do
44
- with_env 'RECONCILING', 'true' do
45
- assert_equal 'reasons', @importer.reject('example.com', 'reasons')
46
- end
47
- end
48
- end
49
-
50
- context 'manipulating the domain list' do
51
- should 'normalize domains within the domain list' do
52
- importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
53
- importer.send :normalize_domains!
54
- assert_equal 'example.com', importer.domains.domains.first
55
- end
56
-
57
- should 'remove invalid domains from the domain list' do
58
- importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
59
- importer.instance_variable_set '@logger', Logger.new(@stdout)
60
-
61
- assert_equal 2, importer.domains.domains.count
62
- importer.send :ensure_validity!
63
- assert_equal 1, importer.domains.domains.count
64
- end
65
-
66
- context 'writing the domain list' do
67
- should 'add domains to the current domain list' do
68
- with_env 'GMAN_STUB_DOMAINS', 'true' do
69
- domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
70
- importer = Gman::Importer.new domains
71
- importer.send :add_to_current
72
- expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
73
- assert_equal expected, File.open(Gman.list_path).read
74
- end
75
- end
76
-
77
- should 'import' do
78
- with_env 'GMAN_STUB_DOMAINS', 'true' do
79
- domains = {
80
- 'test' => ['www.example.com', 'goo.github.io'],
81
- 'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
82
- }
83
-
84
- importer = Gman::Importer.new domains
85
- importer.instance_variable_set '@logger', Logger.new(@stdout)
86
- importer.import(skip_resolve: true)
87
-
88
- expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
89
- assert_equal expected, File.open(Gman.list_path).read
90
- end
91
- end
92
- end
93
- end
94
-
95
- context 'domain validation' do
96
- should 'allow valid domains' do
97
- assert @importer.send :ensure_valid, 'whitehouse.gov'
98
- end
99
-
100
- should 'reject empty domains' do
101
- refute @importer.send :ensure_valid, ''
102
- end
103
-
104
- should 'reject blacklisted domains' do
105
- refute @importer.send :ensure_valid, 'egovlink.com'
106
- end
107
-
108
- should 'reject invalid domains' do
109
- refute @importer.send :ensure_valid, 'foo.invalid'
110
- end
111
-
112
- should 'reject academic domains' do
113
- refute @importer.send :ensure_valid, 'harvard.edu'
114
- end
115
-
116
- should "reject regex'd domains" do
117
- refute @importer.send :ensure_valid, 'foo.github.io'
118
- end
119
- end
120
-
121
- context 'duplicate domains' do
122
- should 'know a unique domain is not a dupe' do
123
- refute @importer.send :dupe?, 'gman.com'
124
- end
125
-
126
- should "know when a domain's a dupe" do
127
- assert @importer.send :dupe?, 'gov'
128
- end
129
-
130
- should "know when a domain's a subdomain of an existing domain" do
131
- assert @importer.send :dupe?, 'whitehouse.gov'
132
- end
133
-
134
- should 'allow unique domains' do
135
- assert @importer.send :ensure_not_dupe, 'gman.com'
136
- end
137
-
138
- should 'reject duplicate domains' do
139
- refute @importer.send :ensure_not_dupe, 'gov'
140
- end
141
-
142
- should 'reject subdomains' do
143
- refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
144
- end
145
- end
146
-
147
- context 'domain resolution' do
148
- should 'know if a domain resolves' do
149
- assert @importer.domain_resolves?('github.com')
150
- assert @importer.send :ensure_resolves, 'github.com'
151
- end
152
-
153
- should "know if a domain doesn't resolve" do
154
- refute @importer.domain_resolves?('foo.invalid')
155
- refute @importer.send :ensure_resolves, 'foo.invalid'
156
- end
157
-
158
- should 'know if a domain has an IP' do
159
- end
160
-
161
- should 'know if a domain returns a given record' do
162
- end
163
- end
164
-
165
- context 'regex checks' do
166
- should 'pass valid domains' do
167
- assert @importer.send :ensure_regex, 'example.com'
168
- end
169
-
170
- should 'reject domains that begin with home.' do
171
- refute @importer.send :ensure_regex, 'home.example.com'
172
- end
173
-
174
- should 'reject domains that begin with user.' do
175
- refute @importer.send :ensure_regex, 'user.example.com'
176
- end
177
-
178
- should 'reject domains that begin with site.' do
179
- refute @importer.send :ensure_regex, 'user.example.com'
180
- end
181
-
182
- should 'reject weebly domains' do
183
- refute @importer.send :ensure_regex, 'foo.weebly.com'
184
- end
185
-
186
- should 'reject wordpress domains' do
187
- refute @importer.send :ensure_regex, 'foo.wordpress.com'
188
- end
189
-
190
- should 'reject govoffice domains' do
191
- refute @importer.send :ensure_regex, 'foo.govoffice.com'
192
- refute @importer.send :ensure_regex, 'foo.govoffice1.com'
193
- end
194
-
195
- should 'reject homestead domains' do
196
- refute @importer.send :ensure_regex, 'foo.homestead.com'
197
- end
198
-
199
- should 'reject wix domains' do
200
- refute @importer.send :ensure_regex, 'foo.wix.com'
201
- end
202
-
203
- should 'reject blogspot domains' do
204
- refute @importer.send :ensure_regex, 'foo.blogspot.com'
205
- end
206
-
207
- should 'reject tripod domains' do
208
- refute @importer.send :ensure_regex, 'foo.tripod.com'
209
- end
210
-
211
- should 'reject squarespace domains' do
212
- refute @importer.send :ensure_regex, 'foo.squarespace.com'
213
- end
214
-
215
- should 'reject github.io domains' do
216
- refute @importer.send :ensure_regex, 'foo.github.io'
217
- end
218
-
219
- should 'reject locality domains' do
220
- refute @importer.send :ensure_regex, 'ci.champaign.il.us'
221
- end
222
- end
223
-
224
- context 'normalizing domains' do
225
- should 'normalize URLs to domains' do
226
- expected = 'example.com'
227
- assert_equal expected, @importer.normalize_domain('http://example.com')
228
- end
229
-
230
- should 'strip WWW' do
231
- assert_equal 'example.com', @importer.normalize_domain('www.example.com')
232
- end
233
-
234
- should 'remove trailing slashes' do
235
- assert_equal 'example.com', @importer.normalize_domain('example.com/')
236
- end
237
-
238
- should 'remove paths' do
239
- assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
240
- end
241
-
242
- should 'remove paths with trailing slashes' do
243
- assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
244
- end
245
-
246
- should 'downcase' do
247
- assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
248
- end
249
- end
250
- end