gman 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (9) hide show
  1. data/Gemfile +6 -6
  2. data/README.md +0 -10
  3. data/Rakefile +9 -1
  4. data/gman.gemspec +1 -2
  5. data/lib/domains.txt +10031 -9909
  6. data/lib/gman.rb +56 -18
  7. data/test/helper.rb +21 -20
  8. data/test/test_gman.rb +83 -122
  9. metadata +2 -18
data/lib/gman.rb CHANGED
@@ -2,10 +2,48 @@ require 'public_suffix'
2
2
  require 'yaml'
3
3
  require 'swot'
4
4
  require "addressable/uri"
5
- require "email_veracity"
6
5
 
7
6
  module Gman
8
7
 
8
+ # Source: http://bit.ly/1n2X9iv
9
+ EMAIL_REGEX = %r{
10
+ ^
11
+ (
12
+ [\w\!\#\$\%\&\'\*\+\-\/\=\?\^\`\{\|\}\~]+
13
+ \.
14
+ )
15
+ *
16
+ [\w\!\#\$\%\&\'\*\+\-\/\=\?\^\`\{\|\}\~]+
17
+ @
18
+ (
19
+ (
20
+ (
21
+ (
22
+ (
23
+ [a-z0-9]{1}
24
+ [a-z0-9\-]{0,62}
25
+ [a-z0-9]{1}
26
+ )
27
+ |
28
+ [a-z]
29
+ )
30
+ \.
31
+ )+
32
+ [a-z]{2,6}
33
+ )
34
+ |
35
+ (
36
+ \d{1,3}
37
+ \.
38
+ ){3}
39
+ \d{1,3}
40
+ (
41
+ \:\d{1,5}
42
+ )?
43
+ )
44
+ $
45
+ }xi
46
+
9
47
  class << self
10
48
 
11
49
  # Normalizes and checks if a given string represents a governemnt domain
@@ -16,20 +54,12 @@ module Gman
16
54
  # "foo.gov.uk"
17
55
  # "http://foo.bar.gov"
18
56
  #
19
- # check_mx - if an email is passed, check the domain for an mx record
20
- #
21
57
  # Returns boolean true if a government domain
22
- def valid?(text, check_mx=false)
58
+ def valid?(text)
23
59
 
24
60
  domain = get_domain text
25
61
  return false unless PublicSuffix.valid?(domain)
26
62
 
27
- # validate mx record
28
- if check_mx && email?(text)
29
- EmailVeracity::Config[:skip_lookup] = false
30
- return false unless EmailVeracity::Address.new(text).valid?
31
- end
32
-
33
63
  # Ensure non-edu
34
64
  return false if Swot::is_academic?(domain)
35
65
 
@@ -44,7 +74,7 @@ module Gman
44
74
  # returns an instance of our custom public suffix list
45
75
  # list behaves like PublicSuffix::List but is limited to our whitelisted domains
46
76
  def list
47
- @list ||= PublicSuffix::List::parse( File.new(File.join(File.dirname(__FILE__), "domains.txt"), "r:utf-8"))
77
+ @list ||= PublicSuffix::List::parse(File.new(list_path, "r:utf-8"))
48
78
  end
49
79
 
50
80
  # Get the FQDN name from a URL or email address.
@@ -60,12 +90,16 @@ module Gman
60
90
  if uri.host # valid https?://* URI
61
91
  uri.host
62
92
  elsif email?(text)
63
- EmailVeracity::Address.new(text).domain.to_s
93
+ text.match(/@([\w\.\-]+)\Z/i)[1]
64
94
  else # url sans http://
65
- uri = Addressable::URI.parse("http://#{text}")
66
- # properly parse http://foo edge cases
67
- # see https://github.com/sporkmonger/addressable/issues/145
68
- uri.host if uri.host =~ /\./
95
+ begin
96
+ uri = Addressable::URI.parse("http://#{text}")
97
+ # properly parse http://foo edge cases
98
+ # see https://github.com/sporkmonger/addressable/issues/145
99
+ uri.host if uri.host =~ /\./
100
+ rescue Addressable::URI::InvalidURIError
101
+ nil
102
+ end
69
103
  end
70
104
  end
71
105
 
@@ -86,8 +120,12 @@ module Gman
86
120
  #
87
121
  # Returns true if email, otherwise false
88
122
  def email?(text)
89
- EmailVeracity::Config[:skip_lookup] = true
90
- EmailVeracity::Address.new(text).valid?
123
+ text =~ EMAIL_REGEX
124
+ end
125
+
126
+ # Returns the absolute path to the domain list
127
+ def list_path
128
+ @list_path ||= File.join(File.dirname(__FILE__), "domains.txt")
91
129
  end
92
130
  end
93
131
  end
data/test/helper.rb CHANGED
@@ -1,20 +1,21 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts "Run `bundle install` to install missing gems"
8
- exit e.status_code
9
- end
10
- require 'test/unit'
11
- require 'shoulda'
12
-
13
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
- $LOAD_PATH.unshift(File.dirname(__FILE__))
15
- require 'gman'
16
- require 'net/dns'
17
- require 'net/dns/resolver'
18
-
19
- class Test::Unit::TestCase
20
- end
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'gman'
16
+ require 'net/dns'
17
+ require 'net/dns/resolver'
18
+ require './lib/gman/parser'
19
+
20
+ class Test::Unit::TestCase
21
+ end
data/test/test_gman.rb CHANGED
@@ -1,122 +1,83 @@
1
- require 'helper'
2
-
3
- VALID = [ "foo.gov",
4
- "http://foo.mil",
5
- "foo@bar.gc.ca",
6
- "foo.gov.au",
7
- "https://www.foo.gouv.fr",
8
- "foo@ci.champaign.il.us",
9
- "foo.bar.baz.gov.au",
10
- "foo@bar.gov.uk",
11
- ".gov",
12
- "foo.fed.us",
13
- ]
14
-
15
- INVALID = [ "foo.bar.com",
16
- "bar@foo.biz",
17
- "http://www.foo.biz",
18
- "foo.uk",
19
- "gov",
20
- "foo@k12.champaign.il.us",
21
- "foo@kii.gov.by",
22
- "foo",
23
- "",
24
- nil,
25
- " ",
26
- ]
27
-
28
- class TestGman < Test::Unit::TestCase
29
-
30
- def domain_resolves?(domain)
31
- res = Net::DNS::Resolver.new
32
- res.nameservers = ["8.8.8.8","8.8.4.4", "208.67.222.222", "208.67.220.220"]
33
- packet = res.search(domain, Net::DNS::NS)
34
- packet.header.anCount > 0
35
- end
36
-
37
- should "recognize government email addresses and domains" do
38
- VALID.each do |test|
39
- assert_equal true, Gman::valid?(test), "#{test} should be detected as a valid government domain"
40
- end
41
- end
42
-
43
- should "not recognize non-government email addresses and domains" do
44
- INVALID.each do |test|
45
- assert_equal false, Gman::valid?(test), "#{test} should be detected as an invalid government domain"
46
- end
47
- end
48
-
49
- should "not contain any educational domains" do
50
- Gman.list.each do |entry|
51
- assert_equal false, Swot::is_academic?(entry.name), "#{entry.name} is an academic domain"
52
- end
53
- end
54
-
55
- should "not contain any invalid domains" do
56
- Gman.list.each do |entry|
57
- assert_equal true, PublicSuffix.valid?("foo.#{entry.name}"), "#{entry.name} is not a valid domain"
58
- end
59
- end
60
-
61
- should "not allow educational domains" do
62
- assert_equal false, Gman::valid?("foo@gwu.edu")
63
- end
64
-
65
- should "properly parse domains from strings" do
66
- assert_equal "github.gov", Gman::get_domain("foo@github.gov")
67
- assert_equal "foo.github.gov", Gman::get_domain("foo.github.gov")
68
- assert_equal "github.gov", Gman::get_domain("http://github.gov")
69
- assert_equal "github.gov", Gman::get_domain("https://github.gov")
70
- assert_equal ".gov", Gman::get_domain(".gov")
71
- assert_equal nil, Gman.get_domain("foo")
72
- end
73
-
74
- should "validate mx records when asked" do
75
- assert_equal true, Gman.valid?("foo@nasa.gov", true)
76
- assert_equal false, Gman.valid?("foo@github.gov", true)
77
- assert_equal true, Gman.valid?("foo@github.gov", false)
78
- end
79
-
80
- should "pass any url on the list" do
81
- Gman.list.each do |entry|
82
- assert_equal true, Gman.valid?("http://foo.#{entry.name}/bar"), "http://foo.#{entry.name}/bar is not a valid"
83
- end
84
- end
85
-
86
- should "pass any email on the list" do
87
- Gman.list.each do |entry|
88
- assert_equal true, Gman.valid?("foo@bar.#{entry.name}"), "foo@bar.#{entry.name} is not a valid"
89
- end
90
- end
91
-
92
- should "pass any domain on the list" do
93
- Gman.list.each do |entry|
94
- assert_equal true, Gman.valid?("foo.#{entry.name}"), "foo.#{entry.name} is not a valid domain"
95
- end
96
- end
97
-
98
- should "only contain resolvable domains" do
99
- Gman.list.each do |entry|
100
- assert_equal true, domain_resolves? domain
101
- end
102
- end
103
-
104
-
105
- should "not err out on invalid domains" do
106
- assert_equal false, Gman.valid?("foo@act.gov.au")
107
- assert_equal "act.gov.au", Gman.get_domain("foo@act.gov.au")
108
- assert_equal nil, Gman.domain_parts("foo@act.gov.au")
109
- end
110
-
111
- should "return public suffix domain" do
112
- assert_equal PublicSuffix::Domain, Gman.domain_parts("whitehouse.gov").class
113
- assert_equal NilClass, Gman.domain_parts("foo.bar").class
114
- end
115
-
116
- should "parse domain parts" do
117
- assert_equal "gov", Gman.domain_parts("foo@bar.gov").tld
118
- assert_equal "bar", Gman.domain_parts("foo.bar.gov").sld
119
- assert_equal "bar", Gman.domain_parts("https://foo.bar.gov").sld
120
- assert_equal "bar.gov", Gman.domain_parts("foo@bar.gov").domain
121
- end
122
- end
1
+ require File.join(File.dirname(__FILE__), 'helper')
2
+
3
+ VALID = [ "foo.gov",
4
+ "http://foo.mil",
5
+ "foo@bar.gc.ca",
6
+ "foo.gov.au",
7
+ "https://www.foo.gouv.fr",
8
+ "foo@ci.champaign.il.us",
9
+ "foo.bar.baz.gov.au",
10
+ "foo@bar.gov.uk",
11
+ ".gov",
12
+ "foo.fed.us",
13
+ ]
14
+
15
+ INVALID = [ "foo.bar.com",
16
+ "bar@foo.biz",
17
+ "http://www.foo.biz",
18
+ "foo.uk",
19
+ "gov",
20
+ "foo@k12.champaign.il.us",
21
+ "foo@kii.gov.by",
22
+ "foo",
23
+ "",
24
+ nil,
25
+ " ",
26
+ ]
27
+
28
+ class TestGman < Test::Unit::TestCase
29
+
30
+
31
+ should "recognize government email addresses and domains" do
32
+ VALID.each do |test|
33
+ assert_equal true, Gman::valid?(test), "#{test} should be detected as a valid government domain"
34
+ end
35
+ end
36
+
37
+ should "not recognize non-government email addresses and domains" do
38
+ INVALID.each do |test|
39
+ assert_equal false, Gman::valid?(test), "#{test} should be detected as an invalid government domain"
40
+ end
41
+ end
42
+
43
+ should "not allow educational domains" do
44
+ assert_equal false, Gman::valid?("foo@gwu.edu")
45
+ end
46
+
47
+ should "properly parse domains from strings" do
48
+ assert_equal "github.gov", Gman::get_domain("foo@github.gov")
49
+ assert_equal "foo.github.gov", Gman::get_domain("foo.github.gov")
50
+ assert_equal "github.gov", Gman::get_domain("http://github.gov")
51
+ assert_equal "github.gov", Gman::get_domain("https://github.gov")
52
+ assert_equal ".gov", Gman::get_domain(".gov")
53
+ assert_equal nil, Gman.get_domain("foo")
54
+ end
55
+
56
+ should "not err out on invalid domains" do
57
+ assert_equal false, Gman.valid?("foo@act.gov.au")
58
+ assert_equal "act.gov.au", Gman.get_domain("foo@act.gov.au")
59
+ assert_equal nil, Gman.domain_parts("foo@act.gov.au")
60
+ end
61
+
62
+ should "return public suffix domain" do
63
+ assert_equal PublicSuffix::Domain, Gman.domain_parts("whitehouse.gov").class
64
+ assert_equal NilClass, Gman.domain_parts("foo.bar").class
65
+ end
66
+
67
+ should "parse domain parts" do
68
+ assert_equal "gov", Gman.domain_parts("foo@bar.gov").tld
69
+ assert_equal "bar", Gman.domain_parts("foo.bar.gov").sld
70
+ assert_equal "bar", Gman.domain_parts("https://foo.bar.gov").sld
71
+ assert_equal "bar.gov", Gman.domain_parts("foo@bar.gov").domain
72
+ end
73
+
74
+ should "not err out on invalid hosts" do
75
+ assert_nothing_raised do
76
+ assert_equal nil, Gman.get_domain("</@foo.com")
77
+ end
78
+ end
79
+
80
+ should "returns the path to domains.txt" do
81
+ assert_equal true, File.exists?(Gman.list_path)
82
+ end
83
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gman
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 2.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-23 00:00:00.000000000 Z
12
+ date: 2014-01-31 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: public_suffix
@@ -43,22 +43,6 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
- - !ruby/object:Gem::Dependency
47
- name: email_veracity
48
- requirement: !ruby/object:Gem::Requirement
49
- none: false
50
- requirements:
51
- - - ! '>='
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
- type: :runtime
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
- requirements:
59
- - - ! '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
46
  - !ruby/object:Gem::Dependency
63
47
  name: addressable
64
48
  requirement: !ruby/object:Gem::Requirement