gman 2.1.3 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +15 -11
- data/gman.gemspec +1 -0
- data/lib/gman.rb +112 -45
- data/lib/gman/version.rb +2 -2
- data/test/test_gman.rb +29 -15
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec17e3756367f6413456872d727b1f2e3511d1e6
|
4
|
+
data.tar.gz: e97c45cf1d7e50f4c53e339693190d2f70057e8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eda6f27e0de5c78c9eb8b1500d07f5e871583a442db0fab7d1c7cfd9b9bf006fbd8787a1b03f038d848ddd11d974c17da22bdd52abf0e8de234cf8557bf6a275
|
7
|
+
data.tar.gz: 880d2a6aff067d5abd3ebde56d84fac0e560d3d2f2a78aa855796ebc564d7e7eb18fbc12d05aa1336e50910a4077af247bfcfc1cf1b53dd5979f68bb10893303
|
data/README.md
CHANGED
@@ -22,29 +22,33 @@ Or add this to your `Gemfile` before doing a `bundle install`:
|
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
+
### In general
|
26
|
+
|
25
27
|
### Verify email addresses
|
26
28
|
|
27
29
|
```ruby
|
28
|
-
Gman.valid? "foo@bar.gov"
|
29
|
-
Gman.valid? "foo@bar.com"
|
30
|
+
Gman.valid? "foo@bar.gov" #=> true
|
31
|
+
Gman.valid? "foo@bar.com" #=> false
|
30
32
|
```
|
31
33
|
|
32
34
|
### Verify domain
|
33
35
|
|
34
36
|
```ruby
|
35
|
-
Gman.valid? "http://foo.bar.gov"
|
36
|
-
Gman.valid? "foo.bar.gov"
|
37
|
-
Gman.valid? "foo.gov"
|
38
|
-
Gman.valid? "foo.biz"
|
37
|
+
Gman.valid? "http://foo.bar.gov" #=> true
|
38
|
+
Gman.valid? "foo.bar.gov" #=> true
|
39
|
+
Gman.valid? "foo.gov" #=> true
|
40
|
+
Gman.valid? "foo.biz" #=> false
|
39
41
|
```
|
40
42
|
|
41
|
-
### Get
|
43
|
+
### Get the ISO Country Code information represented by a government domain
|
42
44
|
|
43
45
|
```ruby
|
44
|
-
Gman.
|
45
|
-
|
46
|
-
|
47
|
-
|
46
|
+
domain = Gman.new "whitehouse.gov" #=> #<Gman domain="whitehouse.gov" valid=true>
|
47
|
+
domain.country.name #=> "United States"
|
48
|
+
domain.country.alpha2 #=> "US"
|
49
|
+
domain.country.alpha3 #=> "USA"
|
50
|
+
domain.country.currency #=> "USD"
|
51
|
+
domain.conutry.calling_code #=> "+1"
|
48
52
|
```
|
49
53
|
|
50
54
|
## Contributing
|
data/gman.gemspec
CHANGED
@@ -30,6 +30,7 @@ Gem::Specification.new do |s|
|
|
30
30
|
s.add_dependency( "public_suffix", '~> 1.4')
|
31
31
|
s.add_dependency( "swot", '~> 0.3.1' )
|
32
32
|
s.add_dependency( "addressable", '~> 2.3' )
|
33
|
+
s.add_dependency( "iso_country_codes", "~> 0.4" )
|
33
34
|
|
34
35
|
s.add_development_dependency( "rake" )
|
35
36
|
s.add_development_dependency( "shoulda" )
|
data/lib/gman.rb
CHANGED
@@ -2,9 +2,10 @@ require 'public_suffix'
|
|
2
2
|
require 'yaml'
|
3
3
|
require 'swot'
|
4
4
|
require "addressable/uri"
|
5
|
-
require
|
5
|
+
require 'iso_country_codes'
|
6
|
+
require_relative "gman/version"
|
6
7
|
|
7
|
-
|
8
|
+
class Gman
|
8
9
|
|
9
10
|
# Source: http://bit.ly/1n2X9iv
|
10
11
|
EMAIL_REGEX = %r{
|
@@ -45,8 +46,27 @@ module Gman
|
|
45
46
|
$
|
46
47
|
}xi
|
47
48
|
|
49
|
+
# Map last part of TLD to alpha2 country code
|
50
|
+
ALPHA2_MAP = {
|
51
|
+
:ac => 'sh',
|
52
|
+
:uk => 'gb',
|
53
|
+
:su => 'ru',
|
54
|
+
:tp => 'tl',
|
55
|
+
:yu => 'rs',
|
56
|
+
:gov => "us",
|
57
|
+
:mil => "us",
|
58
|
+
:org => "us",
|
59
|
+
:com => "us",
|
60
|
+
:net => "us",
|
61
|
+
:edu => "us",
|
62
|
+
:travel => "us",
|
63
|
+
:info => "us"
|
64
|
+
}
|
65
|
+
|
48
66
|
class << self
|
49
67
|
|
68
|
+
attr_writer :list
|
69
|
+
|
50
70
|
# Normalizes and checks if a given string represents a government domain
|
51
71
|
# Possible strings to test:
|
52
72
|
# ".gov"
|
@@ -57,19 +77,14 @@ module Gman
|
|
57
77
|
#
|
58
78
|
# Returns boolean true if a government domain
|
59
79
|
def valid?(text)
|
80
|
+
Gman.new(text).valid?
|
81
|
+
end
|
60
82
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
# check using public suffix's standard logic
|
68
|
-
rule = list.find domain
|
69
|
-
return true if !rule.nil? && rule.allow?(domain)
|
70
|
-
|
71
|
-
# also allow for explicit matches to domain list
|
72
|
-
list.rules.any? { |rule| rule.value == domain }
|
83
|
+
# Is the given string in the form of a valid email address?
|
84
|
+
#
|
85
|
+
# Returns true if email, otherwise false
|
86
|
+
def email?(text)
|
87
|
+
Gman.new(text).email?
|
73
88
|
end
|
74
89
|
|
75
90
|
# returns an instance of our custom public suffix list
|
@@ -78,23 +93,37 @@ module Gman
|
|
78
93
|
@list ||= PublicSuffix::List::parse(File.new(list_path, "r:utf-8"))
|
79
94
|
end
|
80
95
|
|
81
|
-
#
|
82
|
-
|
83
|
-
|
84
|
-
|
96
|
+
# Returns the absolute path to the domain list
|
97
|
+
def list_path
|
98
|
+
File.join(File.dirname(__FILE__), "domains.txt")
|
99
|
+
end
|
100
|
+
end
|
85
101
|
|
86
|
-
|
102
|
+
# Creates a new Gman instance
|
103
|
+
#
|
104
|
+
# text - the input string to check for governmentiness
|
105
|
+
def initialize(text)
|
106
|
+
@text = text.to_s.downcase.strip
|
107
|
+
end
|
108
|
+
|
109
|
+
# Parse the domain from the input string
|
110
|
+
#
|
111
|
+
# Can handle urls, domains, or emails
|
112
|
+
#
|
113
|
+
# Returns the domain string
|
114
|
+
def domain
|
115
|
+
@domain ||= begin
|
116
|
+
return nil if @text.empty?
|
87
117
|
|
88
|
-
|
89
|
-
uri = Addressable::URI.parse(text)
|
118
|
+
uri = Addressable::URI.parse(@text)
|
90
119
|
|
91
120
|
if uri.host # valid https?://* URI
|
92
121
|
uri.host
|
93
|
-
elsif email?
|
94
|
-
text.match(/@([\w\.\-]+)\Z/i)[1]
|
122
|
+
elsif email?
|
123
|
+
@text.match(/@([\w\.\-]+)\Z/i)[1]
|
95
124
|
else # url sans http://
|
96
125
|
begin
|
97
|
-
uri = Addressable::URI.parse("http://#{text}")
|
126
|
+
uri = Addressable::URI.parse("http://#{@text}")
|
98
127
|
# properly parse http://foo edge cases
|
99
128
|
# see https://github.com/sporkmonger/addressable/issues/145
|
100
129
|
uri.host if uri.host =~ /\./
|
@@ -103,30 +132,68 @@ module Gman
|
|
103
132
|
end
|
104
133
|
end
|
105
134
|
end
|
135
|
+
end
|
136
|
+
alias_method :to_s, :domain
|
106
137
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
#
|
112
|
-
|
113
|
-
begin
|
114
|
-
PublicSuffix.parse get_domain(text)
|
115
|
-
rescue
|
116
|
-
nil
|
117
|
-
end
|
118
|
-
end
|
138
|
+
# Checks if the input string represents a government domain
|
139
|
+
#
|
140
|
+
# Returns boolean true if a government domain
|
141
|
+
def valid?
|
142
|
+
# Ensure it's a valid domain
|
143
|
+
return false unless PublicSuffix.valid?(domain)
|
119
144
|
|
120
|
-
#
|
121
|
-
|
122
|
-
# Returns true if email, otherwise false
|
123
|
-
def email?(text)
|
124
|
-
text =~ EMAIL_REGEX
|
125
|
-
end
|
145
|
+
# Ensure non-edu
|
146
|
+
return false if Swot::is_academic?(domain)
|
126
147
|
|
127
|
-
#
|
128
|
-
|
129
|
-
|
148
|
+
# check using public suffix's standard logic
|
149
|
+
rule = Gman.list.find domain
|
150
|
+
return true if !rule.nil? && rule.allow?(domain)
|
151
|
+
|
152
|
+
# also allow for explicit matches to domain list
|
153
|
+
Gman.list.rules.any? { |rule| rule.value == domain }
|
154
|
+
end
|
155
|
+
|
156
|
+
# Is the input text in the form of a valid email address?
|
157
|
+
#
|
158
|
+
# Returns true if email, otherwise false
|
159
|
+
def email?
|
160
|
+
!!(@text =~ EMAIL_REGEX)
|
161
|
+
end
|
162
|
+
|
163
|
+
# Helper function to return the public suffix domain object
|
164
|
+
#
|
165
|
+
# Supports all domain strings (URLs, emails)
|
166
|
+
#
|
167
|
+
# Returns the domain object or nil, but no errors, never an error
|
168
|
+
def domain_parts
|
169
|
+
PublicSuffix.parse domain
|
170
|
+
rescue PublicSuffix::DomainInvalid
|
171
|
+
nil
|
172
|
+
end
|
173
|
+
|
174
|
+
# Returns the two character alpha county code represented by the domain
|
175
|
+
#
|
176
|
+
# e.g., United States = US, United Kingdom = GB
|
177
|
+
def alpha2
|
178
|
+
alpha2 = domain_parts.tld.split('.').last
|
179
|
+
if ALPHA2_MAP[alpha2.to_sym]
|
180
|
+
ALPHA2_MAP[alpha2.to_sym]
|
181
|
+
else
|
182
|
+
alpha2
|
130
183
|
end
|
131
184
|
end
|
185
|
+
|
186
|
+
# Returns the ISO Country represented by the domain
|
187
|
+
#
|
188
|
+
# Example Usage:
|
189
|
+
# Gman.new("foo.gov").country.name => "United States"
|
190
|
+
# Gman.new("foo.gov").country.currency => "USD"
|
191
|
+
def country
|
192
|
+
@country ||= IsoCountryCodes.find(alpha2)
|
193
|
+
end
|
194
|
+
|
195
|
+
# Console output
|
196
|
+
def inspect
|
197
|
+
"#<Gman domain=\"#{domain}\" valid=#{valid?}>"
|
198
|
+
end
|
132
199
|
end
|
data/lib/gman/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
VERSION = '
|
1
|
+
class Gman
|
2
|
+
VERSION = '3.0.0'
|
3
3
|
end
|
data/test/test_gman.rb
CHANGED
@@ -44,37 +44,51 @@ class TestGman < Minitest::Test
|
|
44
44
|
end
|
45
45
|
|
46
46
|
should "properly parse domains from strings" do
|
47
|
-
assert_equal "github.gov", Gman
|
48
|
-
assert_equal "foo.github.gov", Gman::
|
49
|
-
assert_equal "github.gov", Gman::
|
50
|
-
assert_equal "github.gov", Gman::
|
51
|
-
assert_equal ".gov", Gman::
|
52
|
-
assert_equal nil, Gman.
|
47
|
+
assert_equal "github.gov", Gman.new("foo@github.gov").domain
|
48
|
+
assert_equal "foo.github.gov", Gman::new("foo.github.gov").domain
|
49
|
+
assert_equal "github.gov", Gman::new("http://github.gov").domain
|
50
|
+
assert_equal "github.gov", Gman::new("https://github.gov").domain
|
51
|
+
assert_equal ".gov", Gman::new(".gov").domain
|
52
|
+
assert_equal nil, Gman.new("foo").domain
|
53
53
|
end
|
54
54
|
|
55
55
|
should "not err out on invalid domains" do
|
56
56
|
assert_equal false, Gman.valid?("foo@gov.invalid")
|
57
|
-
assert_equal "gov.invalid", Gman.
|
58
|
-
assert_equal nil, Gman.
|
57
|
+
assert_equal "gov.invalid", Gman.new("foo@gov.invalid").domain
|
58
|
+
assert_equal nil, Gman.new("foo@gov.invalid").domain_parts
|
59
59
|
end
|
60
60
|
|
61
61
|
should "return public suffix domain" do
|
62
|
-
assert_equal PublicSuffix::Domain, Gman.
|
63
|
-
assert_equal NilClass, Gman.
|
62
|
+
assert_equal PublicSuffix::Domain, Gman.new("whitehouse.gov").domain_parts.class
|
63
|
+
assert_equal NilClass, Gman.new("foo.invalid").domain_parts.class
|
64
64
|
end
|
65
65
|
|
66
66
|
should "parse domain parts" do
|
67
|
-
assert_equal "gov", Gman.
|
68
|
-
assert_equal "bar", Gman.
|
69
|
-
assert_equal "bar", Gman.
|
70
|
-
assert_equal "bar.gov", Gman.
|
67
|
+
assert_equal "gov", Gman.new("foo@bar.gov").domain_parts.tld
|
68
|
+
assert_equal "bar", Gman.new("foo.bar.gov").domain_parts.sld
|
69
|
+
assert_equal "bar", Gman.new("https://foo.bar.gov").domain_parts.sld
|
70
|
+
assert_equal "bar.gov", Gman.new("foo@bar.gov").domain_parts.domain
|
71
71
|
end
|
72
72
|
|
73
73
|
should "not err out on invalid hosts" do
|
74
|
-
assert_equal nil, Gman.
|
74
|
+
assert_equal nil, Gman.new("</@foo.com").domain
|
75
75
|
end
|
76
76
|
|
77
77
|
should "returns the path to domains.txt" do
|
78
78
|
assert_equal true, File.exists?(Gman.list_path)
|
79
79
|
end
|
80
|
+
|
81
|
+
should "parse the alpha2" do
|
82
|
+
assert_equal "us", Gman.new("whitehouse.gov").alpha2
|
83
|
+
assert_equal "us", Gman.new("army.mil").alpha2
|
84
|
+
assert_equal "gb", Gman.new("foo.gov.uk").alpha2
|
85
|
+
assert_equal "ca", Gman.new("gov.ca").alpha2
|
86
|
+
end
|
87
|
+
|
88
|
+
should "determine a domain's country" do
|
89
|
+
assert_equal "United States", Gman.new("whitehouse.gov").country.name
|
90
|
+
assert_equal "United States", Gman.new("army.mil").country.name
|
91
|
+
assert_equal "United Kingdom", Gman.new("foo.gov.uk").country.name
|
92
|
+
assert_equal "Canada", Gman.new("foo.gc.ca").country.name
|
93
|
+
end
|
80
94
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: public_suffix
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: iso_country_codes
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.4'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: rake
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|