gman 2.1.3 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -11
- data/gman.gemspec +1 -0
- data/lib/gman.rb +112 -45
- data/lib/gman/version.rb +2 -2
- data/test/test_gman.rb +29 -15
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec17e3756367f6413456872d727b1f2e3511d1e6
|
4
|
+
data.tar.gz: e97c45cf1d7e50f4c53e339693190d2f70057e8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eda6f27e0de5c78c9eb8b1500d07f5e871583a442db0fab7d1c7cfd9b9bf006fbd8787a1b03f038d848ddd11d974c17da22bdd52abf0e8de234cf8557bf6a275
|
7
|
+
data.tar.gz: 880d2a6aff067d5abd3ebde56d84fac0e560d3d2f2a78aa855796ebc564d7e7eb18fbc12d05aa1336e50910a4077af247bfcfc1cf1b53dd5979f68bb10893303
|
data/README.md
CHANGED
@@ -22,29 +22,33 @@ Or add this to your `Gemfile` before doing a `bundle install`:
|
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
+
### In general
|
26
|
+
|
25
27
|
### Verify email addresses
|
26
28
|
|
27
29
|
```ruby
|
28
|
-
Gman.valid? "foo@bar.gov"
|
29
|
-
Gman.valid? "foo@bar.com"
|
30
|
+
Gman.valid? "foo@bar.gov" #=> true
|
31
|
+
Gman.valid? "foo@bar.com" #=> false
|
30
32
|
```
|
31
33
|
|
32
34
|
### Verify domain
|
33
35
|
|
34
36
|
```ruby
|
35
|
-
Gman.valid? "http://foo.bar.gov"
|
36
|
-
Gman.valid? "foo.bar.gov"
|
37
|
-
Gman.valid? "foo.gov"
|
38
|
-
Gman.valid? "foo.biz"
|
37
|
+
Gman.valid? "http://foo.bar.gov" #=> true
|
38
|
+
Gman.valid? "foo.bar.gov" #=> true
|
39
|
+
Gman.valid? "foo.gov" #=> true
|
40
|
+
Gman.valid? "foo.biz" #=> false
|
39
41
|
```
|
40
42
|
|
41
|
-
### Get
|
43
|
+
### Get the ISO Country Code information represented by a government domain
|
42
44
|
|
43
45
|
```ruby
|
44
|
-
Gman.
|
45
|
-
|
46
|
-
|
47
|
-
|
46
|
+
domain = Gman.new "whitehouse.gov" #=> #<Gman domain="whitehouse.gov" valid=true>
|
47
|
+
domain.country.name #=> "United States"
|
48
|
+
domain.country.alpha2 #=> "US"
|
49
|
+
domain.country.alpha3 #=> "USA"
|
50
|
+
domain.country.currency #=> "USD"
|
51
|
+
domain.conutry.calling_code #=> "+1"
|
48
52
|
```
|
49
53
|
|
50
54
|
## Contributing
|
data/gman.gemspec
CHANGED
@@ -30,6 +30,7 @@ Gem::Specification.new do |s|
|
|
30
30
|
s.add_dependency( "public_suffix", '~> 1.4')
|
31
31
|
s.add_dependency( "swot", '~> 0.3.1' )
|
32
32
|
s.add_dependency( "addressable", '~> 2.3' )
|
33
|
+
s.add_dependency( "iso_country_codes", "~> 0.4" )
|
33
34
|
|
34
35
|
s.add_development_dependency( "rake" )
|
35
36
|
s.add_development_dependency( "shoulda" )
|
data/lib/gman.rb
CHANGED
@@ -2,9 +2,10 @@ require 'public_suffix'
|
|
2
2
|
require 'yaml'
|
3
3
|
require 'swot'
|
4
4
|
require "addressable/uri"
|
5
|
-
require
|
5
|
+
require 'iso_country_codes'
|
6
|
+
require_relative "gman/version"
|
6
7
|
|
7
|
-
|
8
|
+
class Gman
|
8
9
|
|
9
10
|
# Source: http://bit.ly/1n2X9iv
|
10
11
|
EMAIL_REGEX = %r{
|
@@ -45,8 +46,27 @@ module Gman
|
|
45
46
|
$
|
46
47
|
}xi
|
47
48
|
|
49
|
+
# Map last part of TLD to alpha2 country code
|
50
|
+
ALPHA2_MAP = {
|
51
|
+
:ac => 'sh',
|
52
|
+
:uk => 'gb',
|
53
|
+
:su => 'ru',
|
54
|
+
:tp => 'tl',
|
55
|
+
:yu => 'rs',
|
56
|
+
:gov => "us",
|
57
|
+
:mil => "us",
|
58
|
+
:org => "us",
|
59
|
+
:com => "us",
|
60
|
+
:net => "us",
|
61
|
+
:edu => "us",
|
62
|
+
:travel => "us",
|
63
|
+
:info => "us"
|
64
|
+
}
|
65
|
+
|
48
66
|
class << self
|
49
67
|
|
68
|
+
attr_writer :list
|
69
|
+
|
50
70
|
# Normalizes and checks if a given string represents a government domain
|
51
71
|
# Possible strings to test:
|
52
72
|
# ".gov"
|
@@ -57,19 +77,14 @@ module Gman
|
|
57
77
|
#
|
58
78
|
# Returns boolean true if a government domain
|
59
79
|
def valid?(text)
|
80
|
+
Gman.new(text).valid?
|
81
|
+
end
|
60
82
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
# check using public suffix's standard logic
|
68
|
-
rule = list.find domain
|
69
|
-
return true if !rule.nil? && rule.allow?(domain)
|
70
|
-
|
71
|
-
# also allow for explicit matches to domain list
|
72
|
-
list.rules.any? { |rule| rule.value == domain }
|
83
|
+
# Is the given string in the form of a valid email address?
|
84
|
+
#
|
85
|
+
# Returns true if email, otherwise false
|
86
|
+
def email?(text)
|
87
|
+
Gman.new(text).email?
|
73
88
|
end
|
74
89
|
|
75
90
|
# returns an instance of our custom public suffix list
|
@@ -78,23 +93,37 @@ module Gman
|
|
78
93
|
@list ||= PublicSuffix::List::parse(File.new(list_path, "r:utf-8"))
|
79
94
|
end
|
80
95
|
|
81
|
-
#
|
82
|
-
|
83
|
-
|
84
|
-
|
96
|
+
# Returns the absolute path to the domain list
|
97
|
+
def list_path
|
98
|
+
File.join(File.dirname(__FILE__), "domains.txt")
|
99
|
+
end
|
100
|
+
end
|
85
101
|
|
86
|
-
|
102
|
+
# Creates a new Gman instance
|
103
|
+
#
|
104
|
+
# text - the input string to check for governmentiness
|
105
|
+
def initialize(text)
|
106
|
+
@text = text.to_s.downcase.strip
|
107
|
+
end
|
108
|
+
|
109
|
+
# Parse the domain from the input string
|
110
|
+
#
|
111
|
+
# Can handle urls, domains, or emails
|
112
|
+
#
|
113
|
+
# Returns the domain string
|
114
|
+
def domain
|
115
|
+
@domain ||= begin
|
116
|
+
return nil if @text.empty?
|
87
117
|
|
88
|
-
|
89
|
-
uri = Addressable::URI.parse(text)
|
118
|
+
uri = Addressable::URI.parse(@text)
|
90
119
|
|
91
120
|
if uri.host # valid https?://* URI
|
92
121
|
uri.host
|
93
|
-
elsif email?
|
94
|
-
text.match(/@([\w\.\-]+)\Z/i)[1]
|
122
|
+
elsif email?
|
123
|
+
@text.match(/@([\w\.\-]+)\Z/i)[1]
|
95
124
|
else # url sans http://
|
96
125
|
begin
|
97
|
-
uri = Addressable::URI.parse("http://#{text}")
|
126
|
+
uri = Addressable::URI.parse("http://#{@text}")
|
98
127
|
# properly parse http://foo edge cases
|
99
128
|
# see https://github.com/sporkmonger/addressable/issues/145
|
100
129
|
uri.host if uri.host =~ /\./
|
@@ -103,30 +132,68 @@ module Gman
|
|
103
132
|
end
|
104
133
|
end
|
105
134
|
end
|
135
|
+
end
|
136
|
+
alias_method :to_s, :domain
|
106
137
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
#
|
112
|
-
|
113
|
-
begin
|
114
|
-
PublicSuffix.parse get_domain(text)
|
115
|
-
rescue
|
116
|
-
nil
|
117
|
-
end
|
118
|
-
end
|
138
|
+
# Checks if the input string represents a government domain
|
139
|
+
#
|
140
|
+
# Returns boolean true if a government domain
|
141
|
+
def valid?
|
142
|
+
# Ensure it's a valid domain
|
143
|
+
return false unless PublicSuffix.valid?(domain)
|
119
144
|
|
120
|
-
#
|
121
|
-
|
122
|
-
# Returns true if email, otherwise false
|
123
|
-
def email?(text)
|
124
|
-
text =~ EMAIL_REGEX
|
125
|
-
end
|
145
|
+
# Ensure non-edu
|
146
|
+
return false if Swot::is_academic?(domain)
|
126
147
|
|
127
|
-
#
|
128
|
-
|
129
|
-
|
148
|
+
# check using public suffix's standard logic
|
149
|
+
rule = Gman.list.find domain
|
150
|
+
return true if !rule.nil? && rule.allow?(domain)
|
151
|
+
|
152
|
+
# also allow for explicit matches to domain list
|
153
|
+
Gman.list.rules.any? { |rule| rule.value == domain }
|
154
|
+
end
|
155
|
+
|
156
|
+
# Is the input text in the form of a valid email address?
|
157
|
+
#
|
158
|
+
# Returns true if email, otherwise false
|
159
|
+
def email?
|
160
|
+
!!(@text =~ EMAIL_REGEX)
|
161
|
+
end
|
162
|
+
|
163
|
+
# Helper function to return the public suffix domain object
|
164
|
+
#
|
165
|
+
# Supports all domain strings (URLs, emails)
|
166
|
+
#
|
167
|
+
# Returns the domain object or nil, but no errors, never an error
|
168
|
+
def domain_parts
|
169
|
+
PublicSuffix.parse domain
|
170
|
+
rescue PublicSuffix::DomainInvalid
|
171
|
+
nil
|
172
|
+
end
|
173
|
+
|
174
|
+
# Returns the two character alpha county code represented by the domain
|
175
|
+
#
|
176
|
+
# e.g., United States = US, United Kingdom = GB
|
177
|
+
def alpha2
|
178
|
+
alpha2 = domain_parts.tld.split('.').last
|
179
|
+
if ALPHA2_MAP[alpha2.to_sym]
|
180
|
+
ALPHA2_MAP[alpha2.to_sym]
|
181
|
+
else
|
182
|
+
alpha2
|
130
183
|
end
|
131
184
|
end
|
185
|
+
|
186
|
+
# Returns the ISO Country represented by the domain
|
187
|
+
#
|
188
|
+
# Example Usage:
|
189
|
+
# Gman.new("foo.gov").country.name => "United States"
|
190
|
+
# Gman.new("foo.gov").country.currency => "USD"
|
191
|
+
def country
|
192
|
+
@country ||= IsoCountryCodes.find(alpha2)
|
193
|
+
end
|
194
|
+
|
195
|
+
# Console output
|
196
|
+
def inspect
|
197
|
+
"#<Gman domain=\"#{domain}\" valid=#{valid?}>"
|
198
|
+
end
|
132
199
|
end
|
data/lib/gman/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
VERSION = '
|
1
|
+
class Gman
|
2
|
+
VERSION = '3.0.0'
|
3
3
|
end
|
data/test/test_gman.rb
CHANGED
@@ -44,37 +44,51 @@ class TestGman < Minitest::Test
|
|
44
44
|
end
|
45
45
|
|
46
46
|
should "properly parse domains from strings" do
|
47
|
-
assert_equal "github.gov", Gman
|
48
|
-
assert_equal "foo.github.gov", Gman::
|
49
|
-
assert_equal "github.gov", Gman::
|
50
|
-
assert_equal "github.gov", Gman::
|
51
|
-
assert_equal ".gov", Gman::
|
52
|
-
assert_equal nil, Gman.
|
47
|
+
assert_equal "github.gov", Gman.new("foo@github.gov").domain
|
48
|
+
assert_equal "foo.github.gov", Gman::new("foo.github.gov").domain
|
49
|
+
assert_equal "github.gov", Gman::new("http://github.gov").domain
|
50
|
+
assert_equal "github.gov", Gman::new("https://github.gov").domain
|
51
|
+
assert_equal ".gov", Gman::new(".gov").domain
|
52
|
+
assert_equal nil, Gman.new("foo").domain
|
53
53
|
end
|
54
54
|
|
55
55
|
should "not err out on invalid domains" do
|
56
56
|
assert_equal false, Gman.valid?("foo@gov.invalid")
|
57
|
-
assert_equal "gov.invalid", Gman.
|
58
|
-
assert_equal nil, Gman.
|
57
|
+
assert_equal "gov.invalid", Gman.new("foo@gov.invalid").domain
|
58
|
+
assert_equal nil, Gman.new("foo@gov.invalid").domain_parts
|
59
59
|
end
|
60
60
|
|
61
61
|
should "return public suffix domain" do
|
62
|
-
assert_equal PublicSuffix::Domain, Gman.
|
63
|
-
assert_equal NilClass, Gman.
|
62
|
+
assert_equal PublicSuffix::Domain, Gman.new("whitehouse.gov").domain_parts.class
|
63
|
+
assert_equal NilClass, Gman.new("foo.invalid").domain_parts.class
|
64
64
|
end
|
65
65
|
|
66
66
|
should "parse domain parts" do
|
67
|
-
assert_equal "gov", Gman.
|
68
|
-
assert_equal "bar", Gman.
|
69
|
-
assert_equal "bar", Gman.
|
70
|
-
assert_equal "bar.gov", Gman.
|
67
|
+
assert_equal "gov", Gman.new("foo@bar.gov").domain_parts.tld
|
68
|
+
assert_equal "bar", Gman.new("foo.bar.gov").domain_parts.sld
|
69
|
+
assert_equal "bar", Gman.new("https://foo.bar.gov").domain_parts.sld
|
70
|
+
assert_equal "bar.gov", Gman.new("foo@bar.gov").domain_parts.domain
|
71
71
|
end
|
72
72
|
|
73
73
|
should "not err out on invalid hosts" do
|
74
|
-
assert_equal nil, Gman.
|
74
|
+
assert_equal nil, Gman.new("</@foo.com").domain
|
75
75
|
end
|
76
76
|
|
77
77
|
should "returns the path to domains.txt" do
|
78
78
|
assert_equal true, File.exists?(Gman.list_path)
|
79
79
|
end
|
80
|
+
|
81
|
+
should "parse the alpha2" do
|
82
|
+
assert_equal "us", Gman.new("whitehouse.gov").alpha2
|
83
|
+
assert_equal "us", Gman.new("army.mil").alpha2
|
84
|
+
assert_equal "gb", Gman.new("foo.gov.uk").alpha2
|
85
|
+
assert_equal "ca", Gman.new("gov.ca").alpha2
|
86
|
+
end
|
87
|
+
|
88
|
+
should "determine a domain's country" do
|
89
|
+
assert_equal "United States", Gman.new("whitehouse.gov").country.name
|
90
|
+
assert_equal "United States", Gman.new("army.mil").country.name
|
91
|
+
assert_equal "United Kingdom", Gman.new("foo.gov.uk").country.name
|
92
|
+
assert_equal "Canada", Gman.new("foo.gc.ca").country.name
|
93
|
+
end
|
80
94
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: public_suffix
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: iso_country_codes
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.4'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: rake
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|