gman 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (9) hide show
  1. data/Gemfile +6 -6
  2. data/README.md +0 -10
  3. data/Rakefile +9 -1
  4. data/gman.gemspec +1 -2
  5. data/lib/domains.txt +10031 -9909
  6. data/lib/gman.rb +56 -18
  7. data/test/helper.rb +21 -20
  8. data/test/test_gman.rb +83 -122
  9. metadata +2 -18
data/lib/gman.rb CHANGED
@@ -2,10 +2,48 @@ require 'public_suffix'
2
2
  require 'yaml'
3
3
  require 'swot'
4
4
  require "addressable/uri"
5
- require "email_veracity"
6
5
 
7
6
  module Gman
8
7
 
8
+ # Source: http://bit.ly/1n2X9iv
9
+ EMAIL_REGEX = %r{
10
+ ^
11
+ (
12
+ [\w\!\#\$\%\&\'\*\+\-\/\=\?\^\`\{\|\}\~]+
13
+ \.
14
+ )
15
+ *
16
+ [\w\!\#\$\%\&\'\*\+\-\/\=\?\^\`\{\|\}\~]+
17
+ @
18
+ (
19
+ (
20
+ (
21
+ (
22
+ (
23
+ [a-z0-9]{1}
24
+ [a-z0-9\-]{0,62}
25
+ [a-z0-9]{1}
26
+ )
27
+ |
28
+ [a-z]
29
+ )
30
+ \.
31
+ )+
32
+ [a-z]{2,6}
33
+ )
34
+ |
35
+ (
36
+ \d{1,3}
37
+ \.
38
+ ){3}
39
+ \d{1,3}
40
+ (
41
+ \:\d{1,5}
42
+ )?
43
+ )
44
+ $
45
+ }xi
46
+
9
47
  class << self
10
48
 
11
49
  # Normalizes and checks if a given string represents a governemnt domain
@@ -16,20 +54,12 @@ module Gman
16
54
  # "foo.gov.uk"
17
55
  # "http://foo.bar.gov"
18
56
  #
19
- # check_mx - if an email is passed, check the domain for an mx record
20
- #
21
57
  # Returns boolean true if a government domain
22
- def valid?(text, check_mx=false)
58
+ def valid?(text)
23
59
 
24
60
  domain = get_domain text
25
61
  return false unless PublicSuffix.valid?(domain)
26
62
 
27
- # validate mx record
28
- if check_mx && email?(text)
29
- EmailVeracity::Config[:skip_lookup] = false
30
- return false unless EmailVeracity::Address.new(text).valid?
31
- end
32
-
33
63
  # Ensure non-edu
34
64
  return false if Swot::is_academic?(domain)
35
65
 
@@ -44,7 +74,7 @@ module Gman
44
74
  # returns an instance of our custom public suffix list
45
75
  # list behaves like PublicSuffix::List but is limited to our whitelisted domains
46
76
  def list
47
- @list ||= PublicSuffix::List::parse( File.new(File.join(File.dirname(__FILE__), "domains.txt"), "r:utf-8"))
77
+ @list ||= PublicSuffix::List::parse(File.new(list_path, "r:utf-8"))
48
78
  end
49
79
 
50
80
  # Get the FQDN name from a URL or email address.
@@ -60,12 +90,16 @@ module Gman
60
90
  if uri.host # valid https?://* URI
61
91
  uri.host
62
92
  elsif email?(text)
63
- EmailVeracity::Address.new(text).domain.to_s
93
+ text.match(/@([\w\.\-]+)\Z/i)[1]
64
94
  else # url sans http://
65
- uri = Addressable::URI.parse("http://#{text}")
66
- # properly parse http://foo edge cases
67
- # see https://github.com/sporkmonger/addressable/issues/145
68
- uri.host if uri.host =~ /\./
95
+ begin
96
+ uri = Addressable::URI.parse("http://#{text}")
97
+ # properly parse http://foo edge cases
98
+ # see https://github.com/sporkmonger/addressable/issues/145
99
+ uri.host if uri.host =~ /\./
100
+ rescue Addressable::URI::InvalidURIError
101
+ nil
102
+ end
69
103
  end
70
104
  end
71
105
 
@@ -86,8 +120,12 @@ module Gman
86
120
  #
87
121
  # Returns true if email, otherwise false
88
122
  def email?(text)
89
- EmailVeracity::Config[:skip_lookup] = true
90
- EmailVeracity::Address.new(text).valid?
123
+ text =~ EMAIL_REGEX
124
+ end
125
+
126
+ # Returns the absolute path to the domain list
127
+ def list_path
128
+ @list_path ||= File.join(File.dirname(__FILE__), "domains.txt")
91
129
  end
92
130
  end
93
131
  end
data/test/helper.rb CHANGED
@@ -1,20 +1,21 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts "Run `bundle install` to install missing gems"
8
- exit e.status_code
9
- end
10
- require 'test/unit'
11
- require 'shoulda'
12
-
13
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
- $LOAD_PATH.unshift(File.dirname(__FILE__))
15
- require 'gman'
16
- require 'net/dns'
17
- require 'net/dns/resolver'
18
-
19
- class Test::Unit::TestCase
20
- end
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'gman'
16
+ require 'net/dns'
17
+ require 'net/dns/resolver'
18
+ require './lib/gman/parser'
19
+
20
+ class Test::Unit::TestCase
21
+ end
data/test/test_gman.rb CHANGED
@@ -1,122 +1,83 @@
1
- require 'helper'
2
-
3
- VALID = [ "foo.gov",
4
- "http://foo.mil",
5
- "foo@bar.gc.ca",
6
- "foo.gov.au",
7
- "https://www.foo.gouv.fr",
8
- "foo@ci.champaign.il.us",
9
- "foo.bar.baz.gov.au",
10
- "foo@bar.gov.uk",
11
- ".gov",
12
- "foo.fed.us",
13
- ]
14
-
15
- INVALID = [ "foo.bar.com",
16
- "bar@foo.biz",
17
- "http://www.foo.biz",
18
- "foo.uk",
19
- "gov",
20
- "foo@k12.champaign.il.us",
21
- "foo@kii.gov.by",
22
- "foo",
23
- "",
24
- nil,
25
- " ",
26
- ]
27
-
28
- class TestGman < Test::Unit::TestCase
29
-
30
- def domain_resolves?(domain)
31
- res = Net::DNS::Resolver.new
32
- res.nameservers = ["8.8.8.8","8.8.4.4", "208.67.222.222", "208.67.220.220"]
33
- packet = res.search(domain, Net::DNS::NS)
34
- packet.header.anCount > 0
35
- end
36
-
37
- should "recognize government email addresses and domains" do
38
- VALID.each do |test|
39
- assert_equal true, Gman::valid?(test), "#{test} should be detected as a valid government domain"
40
- end
41
- end
42
-
43
- should "not recognize non-government email addresses and domains" do
44
- INVALID.each do |test|
45
- assert_equal false, Gman::valid?(test), "#{test} should be detected as an invalid government domain"
46
- end
47
- end
48
-
49
- should "not contain any educational domains" do
50
- Gman.list.each do |entry|
51
- assert_equal false, Swot::is_academic?(entry.name), "#{entry.name} is an academic domain"
52
- end
53
- end
54
-
55
- should "not contain any invalid domains" do
56
- Gman.list.each do |entry|
57
- assert_equal true, PublicSuffix.valid?("foo.#{entry.name}"), "#{entry.name} is not a valid domain"
58
- end
59
- end
60
-
61
- should "not allow educational domains" do
62
- assert_equal false, Gman::valid?("foo@gwu.edu")
63
- end
64
-
65
- should "properly parse domains from strings" do
66
- assert_equal "github.gov", Gman::get_domain("foo@github.gov")
67
- assert_equal "foo.github.gov", Gman::get_domain("foo.github.gov")
68
- assert_equal "github.gov", Gman::get_domain("http://github.gov")
69
- assert_equal "github.gov", Gman::get_domain("https://github.gov")
70
- assert_equal ".gov", Gman::get_domain(".gov")
71
- assert_equal nil, Gman.get_domain("foo")
72
- end
73
-
74
- should "validate mx records when asked" do
75
- assert_equal true, Gman.valid?("foo@nasa.gov", true)
76
- assert_equal false, Gman.valid?("foo@github.gov", true)
77
- assert_equal true, Gman.valid?("foo@github.gov", false)
78
- end
79
-
80
- should "pass any url on the list" do
81
- Gman.list.each do |entry|
82
- assert_equal true, Gman.valid?("http://foo.#{entry.name}/bar"), "http://foo.#{entry.name}/bar is not a valid"
83
- end
84
- end
85
-
86
- should "pass any email on the list" do
87
- Gman.list.each do |entry|
88
- assert_equal true, Gman.valid?("foo@bar.#{entry.name}"), "foo@bar.#{entry.name} is not a valid"
89
- end
90
- end
91
-
92
- should "pass any domain on the list" do
93
- Gman.list.each do |entry|
94
- assert_equal true, Gman.valid?("foo.#{entry.name}"), "foo.#{entry.name} is not a valid domain"
95
- end
96
- end
97
-
98
- should "only contain resolvable domains" do
99
- Gman.list.each do |entry|
100
- assert_equal true, domain_resolves? domain
101
- end
102
- end
103
-
104
-
105
- should "not err out on invalid domains" do
106
- assert_equal false, Gman.valid?("foo@act.gov.au")
107
- assert_equal "act.gov.au", Gman.get_domain("foo@act.gov.au")
108
- assert_equal nil, Gman.domain_parts("foo@act.gov.au")
109
- end
110
-
111
- should "return public suffix domain" do
112
- assert_equal PublicSuffix::Domain, Gman.domain_parts("whitehouse.gov").class
113
- assert_equal NilClass, Gman.domain_parts("foo.bar").class
114
- end
115
-
116
- should "parse domain parts" do
117
- assert_equal "gov", Gman.domain_parts("foo@bar.gov").tld
118
- assert_equal "bar", Gman.domain_parts("foo.bar.gov").sld
119
- assert_equal "bar", Gman.domain_parts("https://foo.bar.gov").sld
120
- assert_equal "bar.gov", Gman.domain_parts("foo@bar.gov").domain
121
- end
122
- end
1
+ require File.join(File.dirname(__FILE__), 'helper')
2
+
3
+ VALID = [ "foo.gov",
4
+ "http://foo.mil",
5
+ "foo@bar.gc.ca",
6
+ "foo.gov.au",
7
+ "https://www.foo.gouv.fr",
8
+ "foo@ci.champaign.il.us",
9
+ "foo.bar.baz.gov.au",
10
+ "foo@bar.gov.uk",
11
+ ".gov",
12
+ "foo.fed.us",
13
+ ]
14
+
15
+ INVALID = [ "foo.bar.com",
16
+ "bar@foo.biz",
17
+ "http://www.foo.biz",
18
+ "foo.uk",
19
+ "gov",
20
+ "foo@k12.champaign.il.us",
21
+ "foo@kii.gov.by",
22
+ "foo",
23
+ "",
24
+ nil,
25
+ " ",
26
+ ]
27
+
28
+ class TestGman < Test::Unit::TestCase
29
+
30
+
31
+ should "recognize government email addresses and domains" do
32
+ VALID.each do |test|
33
+ assert_equal true, Gman::valid?(test), "#{test} should be detected as a valid government domain"
34
+ end
35
+ end
36
+
37
+ should "not recognize non-government email addresses and domains" do
38
+ INVALID.each do |test|
39
+ assert_equal false, Gman::valid?(test), "#{test} should be detected as an invalid government domain"
40
+ end
41
+ end
42
+
43
+ should "not allow educational domains" do
44
+ assert_equal false, Gman::valid?("foo@gwu.edu")
45
+ end
46
+
47
+ should "properly parse domains from strings" do
48
+ assert_equal "github.gov", Gman::get_domain("foo@github.gov")
49
+ assert_equal "foo.github.gov", Gman::get_domain("foo.github.gov")
50
+ assert_equal "github.gov", Gman::get_domain("http://github.gov")
51
+ assert_equal "github.gov", Gman::get_domain("https://github.gov")
52
+ assert_equal ".gov", Gman::get_domain(".gov")
53
+ assert_equal nil, Gman.get_domain("foo")
54
+ end
55
+
56
+ should "not err out on invalid domains" do
57
+ assert_equal false, Gman.valid?("foo@act.gov.au")
58
+ assert_equal "act.gov.au", Gman.get_domain("foo@act.gov.au")
59
+ assert_equal nil, Gman.domain_parts("foo@act.gov.au")
60
+ end
61
+
62
+ should "return public suffix domain" do
63
+ assert_equal PublicSuffix::Domain, Gman.domain_parts("whitehouse.gov").class
64
+ assert_equal NilClass, Gman.domain_parts("foo.bar").class
65
+ end
66
+
67
+ should "parse domain parts" do
68
+ assert_equal "gov", Gman.domain_parts("foo@bar.gov").tld
69
+ assert_equal "bar", Gman.domain_parts("foo.bar.gov").sld
70
+ assert_equal "bar", Gman.domain_parts("https://foo.bar.gov").sld
71
+ assert_equal "bar.gov", Gman.domain_parts("foo@bar.gov").domain
72
+ end
73
+
74
+ should "not err out on invalid hosts" do
75
+ assert_nothing_raised do
76
+ assert_equal nil, Gman.get_domain("</@foo.com")
77
+ end
78
+ end
79
+
80
+ should "returns the path to domains.txt" do
81
+ assert_equal true, File.exists?(Gman.list_path)
82
+ end
83
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gman
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 2.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-23 00:00:00.000000000 Z
12
+ date: 2014-01-31 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: public_suffix
@@ -43,22 +43,6 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
- - !ruby/object:Gem::Dependency
47
- name: email_veracity
48
- requirement: !ruby/object:Gem::Requirement
49
- none: false
50
- requirements:
51
- - - ! '>='
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
- type: :runtime
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
- requirements:
59
- - - ! '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
46
  - !ruby/object:Gem::Dependency
63
47
  name: addressable
64
48
  requirement: !ruby/object:Gem::Requirement