gman 6.0.1 → 7.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +5 -5
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  5. data/.github/config.yml +23 -0
  6. data/.github/funding.yml +1 -0
  7. data/.github/no-response.yml +15 -0
  8. data/.github/release-drafter.yml +4 -0
  9. data/.github/settings.yml +33 -0
  10. data/.github/stale.yml +29 -0
  11. data/.gitignore +1 -0
  12. data/.rspec +2 -0
  13. data/.rubocop.yml +17 -5
  14. data/.rubocop_todo.yml +84 -0
  15. data/.ruby-version +1 -1
  16. data/Gemfile +2 -0
  17. data/bin/gman +6 -4
  18. data/bin/gman_filter +5 -7
  19. data/config/domains.txt +8446 -173
  20. data/config/vendor/academic.txt +8038 -0
  21. data/config/vendor/dotgovs.csv +5786 -5560
  22. data/docs/CODE_OF_CONDUCT.md +46 -0
  23. data/docs/CONTRIBUTING.md +92 -0
  24. data/{README.md → docs/README.md} +3 -3
  25. data/docs/SECURITY.md +3 -0
  26. data/docs/_config.yml +2 -0
  27. data/gman.gemspec +18 -17
  28. data/lib/gman.rb +25 -21
  29. data/lib/gman/country_codes.rb +17 -17
  30. data/lib/gman/domain_list.rb +123 -41
  31. data/lib/gman/identifier.rb +59 -21
  32. data/lib/gman/importer.rb +39 -40
  33. data/lib/gman/locality.rb +23 -21
  34. data/lib/gman/version.rb +3 -1
  35. data/script/add +2 -0
  36. data/script/alphabetize +2 -0
  37. data/script/cibuild +1 -1
  38. data/script/dedupe +2 -1
  39. data/script/profile +2 -1
  40. data/script/prune +5 -3
  41. data/script/reconcile-us +6 -3
  42. data/script/vendor +1 -1
  43. data/script/vendor-federal-de +3 -3
  44. data/script/vendor-municipal-de +3 -3
  45. data/script/vendor-nl +4 -1
  46. data/script/vendor-public-suffix +7 -6
  47. data/script/vendor-se +3 -3
  48. data/script/vendor-swot +43 -0
  49. data/script/vendor-us +8 -5
  50. data/spec/fixtures/domains.txt +4 -0
  51. data/{test → spec}/fixtures/obama.txt +0 -0
  52. data/spec/gman/bin_spec.rb +101 -0
  53. data/spec/gman/country_code_spec.rb +39 -0
  54. data/spec/gman/domain_list_spec.rb +110 -0
  55. data/spec/gman/domains_spec.rb +25 -0
  56. data/spec/gman/identifier_spec.rb +218 -0
  57. data/spec/gman/importer_spec.rb +236 -0
  58. data/spec/gman/locality_spec.rb +24 -0
  59. data/spec/gman_spec.rb +74 -0
  60. data/spec/spec_helper.rb +31 -0
  61. metadata +86 -73
  62. data/CONTRIBUTING.md +0 -22
  63. data/Rakefile +0 -22
  64. data/test/fixtures/domains.txt +0 -2
  65. data/test/helper.rb +0 -40
  66. data/test/test_gman.rb +0 -62
  67. data/test/test_gman_bin.rb +0 -75
  68. data/test/test_gman_country_codes.rb +0 -18
  69. data/test/test_gman_domains.rb +0 -33
  70. data/test/test_gman_filter.rb +0 -17
  71. data/test/test_gman_identifier.rb +0 -106
  72. data/test/test_gman_importer.rb +0 -250
  73. data/test/test_gman_locality.rb +0 -10
@@ -1,22 +0,0 @@
1
- # Contributing to Gman
2
-
3
- ## How to contribute
4
-
5
- 1. Fork the project
6
- 2. Create a descriptive branch
7
- 3. Make your change
8
- 4. Submit a pull request
9
-
10
- ## Code
11
-
12
- Open an issue, or submit a pull request
13
-
14
- ## Domains
15
-
16
- Domains live in `./config/domains.txt` as a list of TLDs and SLD+TLDs.
17
-
18
- Right now, the only valid government top level domains (TLDs), represent the US government and are `.gov`, and `.mil`.
19
-
20
- Secondary domains (e.g., `gov.uk`, or `mil.au`) detect non-US government entities.
21
-
22
- To add or remove a domain from the list of known government domains, simply edit the `domains.txt` file.
data/Rakefile DELETED
@@ -1,22 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts 'Run `bundle install` to install missing gems'
8
- exit e.status_code
9
- end
10
- require 'rake'
11
-
12
- require 'rake/testtask'
13
- Rake::TestTask.new(:test) do |test|
14
- test.libs << 'lib' << 'test'
15
- test.pattern = 'test/**/test_gman*.rb'
16
- test.verbose = true
17
- end
18
-
19
- desc 'Open console with gman loaded'
20
- task :console do
21
- exec 'irb -r ./lib/gman.rb'
22
- end
@@ -1,2 +0,0 @@
1
- // test
2
- gov
@@ -1,40 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- require 'minitest/autorun'
4
- require 'parallel'
5
- require 'open3'
6
-
7
- begin
8
- Bundler.setup(:default, :development)
9
- rescue Bundler::BundlerError => e
10
- $stderr.puts e.message
11
- $stderr.puts 'Run `bundle install` to install missing gems'
12
- exit e.status_code
13
- end
14
-
15
- require 'shoulda'
16
-
17
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
18
- $LOAD_PATH.unshift(File.dirname(__FILE__))
19
- require_relative '../lib/gman'
20
- require_relative '../lib/gman/domain_list'
21
- require_relative '../lib/gman/importer'
22
-
23
- def bin_path(cmd = 'gman')
24
- File.expand_path "../bin/#{cmd}", File.dirname(__FILE__)
25
- end
26
-
27
- def test_bin(*args)
28
- Open3.capture2e('bundle', 'exec', bin_path, *args)
29
- end
30
-
31
- def fixture_path(fixture)
32
- File.expand_path "./fixtures/#{fixture}", File.dirname(__FILE__)
33
- end
34
-
35
- def with_env(key, value)
36
- old_env = ENV[key]
37
- ENV[key] = value
38
- yield
39
- ENV[key] = old_env
40
- end
@@ -1,62 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- VALID = ['foo.gov',
4
- 'http://foo.mil',
5
- 'foo@bar.gc.ca',
6
- 'foo.gov.au',
7
- 'https://www.foo.gouv.fr',
8
- 'foo@ci.champaign.il.us',
9
- 'foo.bar.baz.gov.au',
10
- 'foo@bar.gov.uk',
11
- 'foo.gov',
12
- 'foo.fed.us',
13
- 'foo.state.il.us',
14
- 'state.il.us',
15
- 'foo@af.mil',
16
- 'foo.gov.in'
17
- ].freeze
18
-
19
- INVALID = ['foo.bar.com',
20
- 'bar@foo.biz',
21
- 'http://www.foo.biz',
22
- 'foo.uk',
23
- 'gov',
24
- 'foo@k12.champaign.il.us',
25
- 'foo@kii.gov.by',
26
- 'foo',
27
- '',
28
- nil,
29
- ' ',
30
- 'foo.city.il.us',
31
- 'foo.ci.il.us',
32
- 'foo.zx.us',
33
- 'foo@mail.gov.ua'
34
- ].freeze
35
-
36
- class TestGman < Minitest::Test
37
- VALID.each do |domain|
38
- should "recognize #{domain} as a government domain" do
39
- assert Gman.valid?(domain)
40
- end
41
- end
42
-
43
- INVALID.each do |domain|
44
- should "recognize #{domain} as a non-government domain" do
45
- refute Gman.valid?(domain)
46
- end
47
- end
48
-
49
- should 'not allow educational domains' do
50
- assert_equal false, Gman.valid?('foo@gwu.edu')
51
- end
52
-
53
- should 'returns the path to domains.txt' do
54
- assert_equal true, File.exist?(Gman.list_path)
55
- end
56
-
57
- should 'stub domains when asked' do
58
- with_env 'GMAN_STUB_DOMAINS', 'true' do
59
- assert_equal fixture_path('domains.txt'), Gman.list_path
60
- end
61
- end
62
- end
@@ -1,75 +0,0 @@
1
- require_relative 'helper'
2
-
3
- class TestGmanBin < Minitest::Test
4
- def setup
5
- @output, @status = test_bin('whitehouse.gov')
6
- end
7
-
8
- should 'parse the domain' do
9
- output, = test_bin('bar.gov')
10
- assert_match(/Domain : bar.gov/, output)
11
-
12
- output, = test_bin('foo@bar.gov')
13
- assert_match(/Domain : bar.gov/, output)
14
-
15
- output, = test_bin('http://bar.gov/foo')
16
- assert_match(/Domain : bar.gov/, output)
17
- end
18
-
19
- should 'err on invalid domains' do
20
- output, status = test_bin('foo.invalid')
21
- assert_equal 1, status.exitstatus
22
- assert_match(/Invalid domain/, output)
23
- end
24
-
25
- should 'err on non-government domains' do
26
- output, status = test_bin('github.com')
27
- assert_equal 1, status.exitstatus
28
- assert_match(/Not a government domain/, output)
29
- end
30
-
31
- should 'know the type' do
32
- assert_match(/federal/, @output)
33
- assert_equal 0, @status.exitstatus
34
- end
35
-
36
- should 'know the agency' do
37
- assert_match(/Executive Office of the President/, @output)
38
- assert_equal 0, @status.exitstatus
39
- end
40
-
41
- should 'know the country' do
42
- assert_match(/United States/, @output)
43
- assert_equal 0, @status.exitstatus
44
- end
45
-
46
- should 'know the city' do
47
- assert_match(/Washington/, @output)
48
- assert_equal 0, @status.exitstatus
49
- end
50
-
51
- should 'know the state' do
52
- assert_match(/DC/, @output)
53
- assert_equal 0, @status.exitstatus
54
- end
55
-
56
- should 'allow you to disable colorization' do
57
- output, = test_bin('whitehouse.gov', '--no-color')
58
- refute_match(/\e\[32m/, output)
59
- end
60
-
61
- should 'color by default' do
62
- assert_match(/\e\[32m/, @output)
63
- end
64
-
65
- should 'show help text' do
66
- output, = test_bin
67
- assert_match(/Usage/i, output)
68
-
69
- output, = test_bin('')
70
- assert_match(/Usage/i, output)
71
-
72
- output, = test_bin('--no-color')
73
- assert_match(/Usage/i, output)
74
- end
75
- end
@@ -1,18 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanCountryCodes < Minitest::Test
4
- should "determine a domain's country" do
5
- name = Gman.new('whitehouse.gov').country.name
6
- assert_equal 'United States of America', name
7
-
8
- name = Gman.new('foo.gov.uk').country.name
9
- assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
10
-
11
- assert_equal 'United States of America', Gman.new('army.mil').country.name
12
- assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
13
- end
14
-
15
- should 'not err out on an unknown country code' do
16
- assert_equal nil, Gman.new('foo.eu').country
17
- end
18
- end
@@ -1,33 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanDomains < Minitest::Test
4
- WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
5
-
6
- def resolve_domains?
7
- ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
8
- end
9
-
10
- should 'only contains valid domains' do
11
- importer = Gman::Importer.new({})
12
- if resolve_domains?
13
- importer.logger.info <<-MSG
14
- Validating that all domains resolve. This may take a while...
15
- MSG
16
- else
17
- importer.logger.info 'Skipping domain resolution.' \
18
- 'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
19
- 'to validate that domains resolve.'
20
- end
21
-
22
- invalid = []
23
- list = Gman::DomainList.current.list
24
- Parallel.each(list, in_threads: 2) do |group, domains|
25
- next if WHITELIST.include?(group)
26
- invalid.push domains.reject { |domain|
27
- options = { skip_dupe: true, skip_resolve: !resolve_domains? }
28
- importer.valid_domain?(domain, options)
29
- }
30
- end
31
- assert_equal [], invalid.flatten.reject(&:empty?)
32
- end
33
- end
@@ -1,17 +0,0 @@
1
- HERE = File.dirname(__FILE__)
2
- require File.join(HERE, 'helper')
3
-
4
- class TestGmanFilter < Minitest::Test
5
- txt_path = fixture_path 'obama.txt'
6
- exec_path = bin_path 'gman_filter'
7
-
8
- should 'remove non-gov/mil addresses' do
9
- output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
10
- expected = %w(
11
- mr.senator@obama.senate.gov
12
- president@whitehouse.gov
13
- commander.in.chief@us.army.mil
14
- ).join("\n") + "\n"
15
- assert_equal output, expected
16
- end
17
- end
@@ -1,106 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanIdentifier < Minitest::Test
4
- should 'Parse the dotgov list' do
5
- assert Gman.dotgov_list
6
- assert_equal CSV::Table, Gman.dotgov_list.class
7
- assert_equal CSV::Row, Gman.dotgov_list.first.class
8
- assert Gman.dotgov_list.first['Domain Name']
9
- end
10
-
11
- context 'locality domains' do
12
- should 'detect state domains' do
13
- domain = Gman.new('state.ak.us')
14
- assert domain.state?
15
-
16
- refute domain.dotgov?
17
- refute domain.city?
18
- refute domain.federal?
19
- refute domain.county?
20
-
21
- assert_equal :state, domain.type
22
- assert_equal 'AK', domain.state
23
- end
24
-
25
- should 'detect city domains' do
26
- domain = Gman.new('ci.champaign.il.us')
27
- assert domain.city?
28
-
29
- refute domain.dotgov?
30
- refute domain.state?
31
- refute domain.federal?
32
- refute domain.county?
33
-
34
- assert_equal :city, domain.type
35
- assert_equal 'IL', domain.state
36
- end
37
- end
38
-
39
- context 'dotgovs' do
40
- should 'detect federal dotgovs' do
41
- domain = Gman.new 'whitehouse.gov'
42
- assert domain.federal?
43
- assert domain.dotgov?
44
-
45
- refute domain.city?
46
- refute domain.state?
47
- refute domain.county?
48
-
49
- assert_equal :federal, domain.type
50
- assert_equal 'DC', domain.state
51
- assert_equal 'Washington', domain.city
52
- assert_equal 'Executive Office of the President', domain.agency
53
- end
54
-
55
- should 'detect state dotgovs' do
56
- domain = Gman.new 'illinois.gov'
57
- assert domain.state?
58
- assert domain.dotgov?
59
-
60
- refute domain.city?
61
- refute domain.federal?
62
- refute domain.county?
63
-
64
- assert_equal :state, domain.type
65
- assert_equal 'IL', domain.state
66
- assert_equal 'Springfield', domain.city
67
- end
68
-
69
- should 'detect county dotgovs' do
70
- domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
71
- assert domain.county?
72
- assert domain.dotgov?
73
-
74
- refute domain.city?
75
- refute domain.federal?
76
- refute domain.state?
77
-
78
- assert_equal :county, domain.type
79
- assert_equal 'PA', domain.state
80
- assert_equal 'Pittsburgh', domain.city
81
- end
82
-
83
- should 'detect the list category' do
84
- category = Gman.new('whitehouse.gov').send('list_category')
85
- assert_equal 'US Federal', category
86
- end
87
- end
88
-
89
- context 'non-dotgov domains' do
90
- should "determine a domain's group" do
91
- assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
92
- assert_equal :unknown, Gman.new('cityofperu.org').type
93
-
94
- assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
95
- assert_equal :"Canada municipal", Gman.new('acme.ca').type
96
-
97
- assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
98
- assert_equal :"Canada federal", Gman.new('canada.ca').type
99
- end
100
-
101
- should 'detect the state' do
102
- assert_equal 'OR', Gman.new('ashland.or.us').state
103
- refute Gman.new('canada.ca').state
104
- end
105
- end
106
- end
@@ -1,250 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGManImporter < Minitest::Test
4
- def setup
5
- @importer = Gman::Importer.new 'test' => ['example.com']
6
- @stdout = StringIO.new
7
- @importer.instance_variable_set '@logger', Logger.new(@stdout)
8
-
9
- with_env 'GMAN_STUB_DOMAINS', 'true' do
10
- @original_domain_list = File.open(Gman.list_path).read
11
- end
12
- end
13
-
14
- def teardown
15
- with_env 'GMAN_STUB_DOMAINS', 'true' do
16
- File.write Gman.list_path, @original_domain_list
17
- end
18
- end
19
-
20
- should 'init the domain list' do
21
- assert_equal Gman::DomainList, @importer.domains.class
22
- assert_equal 1, @importer.domains.domains.count
23
- assert_equal 'example.com', @importer.domains.domains.first
24
- end
25
-
26
- should 'init the logger' do
27
- assert_equal Logger, @importer.logger.class
28
- end
29
-
30
- should 'return the current domain list' do
31
- assert_equal Gman::DomainList, @importer.current.class
32
- end
33
-
34
- should 'return the resolver' do
35
- assert_equal Resolv::DNS, @importer.resolver.class
36
- end
37
-
38
- context 'domain rejection' do
39
- should 'return false for a rejected domain' do
40
- refute @importer.reject 'example.com', 'reasons'
41
- end
42
-
43
- should 'return the reason when asked' do
44
- with_env 'RECONCILING', 'true' do
45
- assert_equal 'reasons', @importer.reject('example.com', 'reasons')
46
- end
47
- end
48
- end
49
-
50
- context 'manipulating the domain list' do
51
- should 'normalize domains within the domain list' do
52
- importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
53
- importer.send :normalize_domains!
54
- assert_equal 'example.com', importer.domains.domains.first
55
- end
56
-
57
- should 'remove invalid domains from the domain list' do
58
- importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
59
- importer.instance_variable_set '@logger', Logger.new(@stdout)
60
-
61
- assert_equal 2, importer.domains.domains.count
62
- importer.send :ensure_validity!
63
- assert_equal 1, importer.domains.domains.count
64
- end
65
-
66
- context 'writing the domain list' do
67
- should 'add domains to the current domain list' do
68
- with_env 'GMAN_STUB_DOMAINS', 'true' do
69
- domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
70
- importer = Gman::Importer.new domains
71
- importer.send :add_to_current
72
- expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
73
- assert_equal expected, File.open(Gman.list_path).read
74
- end
75
- end
76
-
77
- should 'import' do
78
- with_env 'GMAN_STUB_DOMAINS', 'true' do
79
- domains = {
80
- 'test' => ['www.example.com', 'goo.github.io'],
81
- 'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
82
- }
83
-
84
- importer = Gman::Importer.new domains
85
- importer.instance_variable_set '@logger', Logger.new(@stdout)
86
- importer.import(skip_resolve: true)
87
-
88
- expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
89
- assert_equal expected, File.open(Gman.list_path).read
90
- end
91
- end
92
- end
93
- end
94
-
95
- context 'domain validation' do
96
- should 'allow valid domains' do
97
- assert @importer.send :ensure_valid, 'whitehouse.gov'
98
- end
99
-
100
- should 'reject empty domains' do
101
- refute @importer.send :ensure_valid, ''
102
- end
103
-
104
- should 'reject blacklisted domains' do
105
- refute @importer.send :ensure_valid, 'egovlink.com'
106
- end
107
-
108
- should 'reject invalid domains' do
109
- refute @importer.send :ensure_valid, 'foo.invalid'
110
- end
111
-
112
- should 'reject academic domains' do
113
- refute @importer.send :ensure_valid, 'harvard.edu'
114
- end
115
-
116
- should "reject regex'd domains" do
117
- refute @importer.send :ensure_valid, 'foo.github.io'
118
- end
119
- end
120
-
121
- context 'duplicate domains' do
122
- should 'know a unique domain is not a dupe' do
123
- refute @importer.send :dupe?, 'gman.com'
124
- end
125
-
126
- should "know when a domain's a dupe" do
127
- assert @importer.send :dupe?, 'gov'
128
- end
129
-
130
- should "know when a domain's a subdomain of an existing domain" do
131
- assert @importer.send :dupe?, 'whitehouse.gov'
132
- end
133
-
134
- should 'allow unique domains' do
135
- assert @importer.send :ensure_not_dupe, 'gman.com'
136
- end
137
-
138
- should 'reject duplicate domains' do
139
- refute @importer.send :ensure_not_dupe, 'gov'
140
- end
141
-
142
- should 'reject subdomains' do
143
- refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
144
- end
145
- end
146
-
147
- context 'domain resolution' do
148
- should 'know if a domain resolves' do
149
- assert @importer.domain_resolves?('github.com')
150
- assert @importer.send :ensure_resolves, 'github.com'
151
- end
152
-
153
- should "know if a domain doesn't resolve" do
154
- refute @importer.domain_resolves?('foo.invalid')
155
- refute @importer.send :ensure_resolves, 'foo.invalid'
156
- end
157
-
158
- should 'know if a domain has an IP' do
159
- end
160
-
161
- should 'know if a domain returns a given record' do
162
- end
163
- end
164
-
165
- context 'regex checks' do
166
- should 'pass valid domains' do
167
- assert @importer.send :ensure_regex, 'example.com'
168
- end
169
-
170
- should 'reject domains that begin with home.' do
171
- refute @importer.send :ensure_regex, 'home.example.com'
172
- end
173
-
174
- should 'reject domains that begin with user.' do
175
- refute @importer.send :ensure_regex, 'user.example.com'
176
- end
177
-
178
- should 'reject domains that begin with site.' do
179
- refute @importer.send :ensure_regex, 'user.example.com'
180
- end
181
-
182
- should 'reject weebly domains' do
183
- refute @importer.send :ensure_regex, 'foo.weebly.com'
184
- end
185
-
186
- should 'reject wordpress domains' do
187
- refute @importer.send :ensure_regex, 'foo.wordpress.com'
188
- end
189
-
190
- should 'reject govoffice domains' do
191
- refute @importer.send :ensure_regex, 'foo.govoffice.com'
192
- refute @importer.send :ensure_regex, 'foo.govoffice1.com'
193
- end
194
-
195
- should 'reject homestead domains' do
196
- refute @importer.send :ensure_regex, 'foo.homestead.com'
197
- end
198
-
199
- should 'reject wix domains' do
200
- refute @importer.send :ensure_regex, 'foo.wix.com'
201
- end
202
-
203
- should 'reject blogspot domains' do
204
- refute @importer.send :ensure_regex, 'foo.blogspot.com'
205
- end
206
-
207
- should 'reject tripod domains' do
208
- refute @importer.send :ensure_regex, 'foo.tripod.com'
209
- end
210
-
211
- should 'reject squarespace domains' do
212
- refute @importer.send :ensure_regex, 'foo.squarespace.com'
213
- end
214
-
215
- should 'reject github.io domains' do
216
- refute @importer.send :ensure_regex, 'foo.github.io'
217
- end
218
-
219
- should 'reject locality domains' do
220
- refute @importer.send :ensure_regex, 'ci.champaign.il.us'
221
- end
222
- end
223
-
224
- context 'normalizing domains' do
225
- should 'normalize URLs to domains' do
226
- expected = 'example.com'
227
- assert_equal expected, @importer.normalize_domain('http://example.com')
228
- end
229
-
230
- should 'strip WWW' do
231
- assert_equal 'example.com', @importer.normalize_domain('www.example.com')
232
- end
233
-
234
- should 'remove trailing slashes' do
235
- assert_equal 'example.com', @importer.normalize_domain('example.com/')
236
- end
237
-
238
- should 'remove paths' do
239
- assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
240
- end
241
-
242
- should 'remove paths with trailing slashes' do
243
- assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
244
- end
245
-
246
- should 'downcase' do
247
- assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
248
- end
249
- end
250
- end