gman 4.1.4 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/vendor/dotgovs.csv +5321 -0
- data/lib/gman.rb +13 -2
- data/lib/gman/identifier.rb +109 -0
- metadata +4 -2
data/lib/gman.rb
CHANGED
@@ -1,20 +1,31 @@
|
|
1
1
|
require 'naughty_or_nice'
|
2
2
|
require 'swot'
|
3
3
|
require 'iso_country_codes'
|
4
|
+
require 'csv'
|
4
5
|
require_relative 'gman/country_codes'
|
5
6
|
require_relative 'gman/locality'
|
7
|
+
require_relative 'gman/identifier'
|
6
8
|
|
7
9
|
class Gman < NaughtyOrNice
|
8
10
|
class << self
|
11
|
+
|
9
12
|
# returns an instance of our custom public suffix list
|
10
13
|
# list behaves like PublicSuffix::List but is limited to our whitelisted domains
|
11
14
|
def list
|
12
|
-
@list ||= PublicSuffix::List::parse(
|
15
|
+
@list ||= PublicSuffix::List::parse(list_contents)
|
16
|
+
end
|
17
|
+
|
18
|
+
def config_path
|
19
|
+
File.join(File.dirname(__FILE__), "../config")
|
13
20
|
end
|
14
21
|
|
15
22
|
# Returns the absolute path to the domain list
|
16
23
|
def list_path
|
17
|
-
File.join(
|
24
|
+
File.join(config_path,"domains.txt")
|
25
|
+
end
|
26
|
+
|
27
|
+
def list_contents
|
28
|
+
@list_contents ||= File.new(list_path, "r:utf-8").read
|
18
29
|
end
|
19
30
|
end
|
20
31
|
|
@@ -0,0 +1,109 @@
|
|
1
|
+
class Gman < NaughtyOrNice
|
2
|
+
|
3
|
+
def type
|
4
|
+
if state?
|
5
|
+
:state
|
6
|
+
elsif district?
|
7
|
+
:district
|
8
|
+
elsif cog?
|
9
|
+
:cog
|
10
|
+
elsif city?
|
11
|
+
:city
|
12
|
+
elsif federal?
|
13
|
+
:federal
|
14
|
+
elsif county?
|
15
|
+
:county
|
16
|
+
elsif list_category.include?("usagov")
|
17
|
+
:unknown
|
18
|
+
else
|
19
|
+
list_category.to_sym
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def state
|
24
|
+
if matches
|
25
|
+
matches[4].upcase
|
26
|
+
elsif dotgov_listing
|
27
|
+
dotgov_listing["State"]
|
28
|
+
elsif list_category
|
29
|
+
matches = list_category.match(/usagov([A-Z]{2})/)
|
30
|
+
matches[1] if matches
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def city
|
35
|
+
dotgov_listing["City"] if dotgov_listing
|
36
|
+
end
|
37
|
+
|
38
|
+
def agency
|
39
|
+
dotgov_listing["Agency"] if federal?
|
40
|
+
end
|
41
|
+
|
42
|
+
def dotgov?
|
43
|
+
domain_parts.tld == "gov"
|
44
|
+
end
|
45
|
+
|
46
|
+
def federal?
|
47
|
+
dotgov_listing && dotgov_listing["Domain Type"] == "Federal Agency"
|
48
|
+
end
|
49
|
+
|
50
|
+
def city?
|
51
|
+
if matches
|
52
|
+
%w[ci town vil].include?(matches[3])
|
53
|
+
elsif dotgov_listing
|
54
|
+
dotgov_listing["Domain Type"] == "City"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def county?
|
59
|
+
if matches
|
60
|
+
matches[3] == "co"
|
61
|
+
elsif dotgov_listing
|
62
|
+
dotgov_listing["Domain Type"] == "County"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def state?
|
67
|
+
if matches
|
68
|
+
matches[1] == "state"
|
69
|
+
elsif dotgov_listing
|
70
|
+
dotgov_listing["Domain Type"] == "State/Local Govt"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def district?
|
75
|
+
!!matches && matches[1] == "dst"
|
76
|
+
end
|
77
|
+
|
78
|
+
def cog?
|
79
|
+
!!matches && matches[1] == "cog"
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def list_category
|
85
|
+
@list_category ||= begin
|
86
|
+
if match = Gman.list.find(domain)
|
87
|
+
regex = Regexp.new "\/\/ ([^\\n]+)\\n?[^\/\/]*\\n#{Regexp.escape(match.name)}\\n", "im"
|
88
|
+
matches = Gman.list_contents.match(regex)
|
89
|
+
matches[1] if matches
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def matches
|
95
|
+
@matches ||= domain.match(LOCALITY_REGEX)
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.dotgov_list_path
|
99
|
+
File.join Gman.config_path, "vendor/dotgovs.csv"
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.dotgov_list
|
103
|
+
@dotgov_list ||= CSV.read(dotgov_list_path, :headers => true)
|
104
|
+
end
|
105
|
+
|
106
|
+
def dotgov_listing
|
107
|
+
@dotgov_listing ||= Gman.dotgov_list.find { |d| d["Domain Name"].downcase == "#{domain_parts.sld}.gov" } if dotgov?
|
108
|
+
end
|
109
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gman
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: swot
|
@@ -147,8 +147,10 @@ files:
|
|
147
147
|
- LICENSE
|
148
148
|
- bin/gman_filter
|
149
149
|
- config/domains.txt
|
150
|
+
- config/vendor/dotgovs.csv
|
150
151
|
- lib/gman.rb
|
151
152
|
- lib/gman/country_codes.rb
|
153
|
+
- lib/gman/identifier.rb
|
152
154
|
- lib/gman/locality.rb
|
153
155
|
homepage: https://github.com/benbalter/gman
|
154
156
|
licenses:
|