classyfier 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/lib/classyfier.rb +2 -1
- data/lib/classyfier/us_regions.rb +127 -0
- data/lib/classyfier/version.rb +1 -1
- metadata +3 -2
data/Gemfile.lock
CHANGED
data/lib/classyfier.rb
CHANGED
@@ -0,0 +1,127 @@
|
|
1
|
+
module Classyfier
|
2
|
+
module Classifiers
|
3
|
+
class USRegions
|
4
|
+
SOUTH = "south"
|
5
|
+
WEST = "west"
|
6
|
+
MIDWEST = "midwest"
|
7
|
+
NORTHEAST = "northeast"
|
8
|
+
|
9
|
+
REGIONS = [SOUTH, WEST, MIDWEST, NORTHEAST]
|
10
|
+
|
11
|
+
def initialize()
|
12
|
+
@codes = {
|
13
|
+
"AK" => WEST,
|
14
|
+
"AL" => SOUTH,
|
15
|
+
"AR" => SOUTH,
|
16
|
+
"AZ" => WEST,
|
17
|
+
"CA" => WEST,
|
18
|
+
"CO" => WEST,
|
19
|
+
"CT" => NORTHEAST,
|
20
|
+
"DC" => NORTHEAST,
|
21
|
+
"DE" => NORTHEAST,
|
22
|
+
"FL" => SOUTH,
|
23
|
+
"GA" => SOUTH,
|
24
|
+
"HI" => nil,
|
25
|
+
"IA" => MIDWEST,
|
26
|
+
"ID" => WEST,
|
27
|
+
"IL" => MIDWEST,
|
28
|
+
"IN" => MIDWEST,
|
29
|
+
"KS" => MIDWEST,
|
30
|
+
"KY" => SOUTH,
|
31
|
+
"LA" => SOUTH,
|
32
|
+
"MA" => NORTHEAST,
|
33
|
+
"MD" => NORTHEAST,
|
34
|
+
"ME" => NORTHEAST,
|
35
|
+
"MI" => MIDWEST,
|
36
|
+
"MN" => MIDWEST,
|
37
|
+
"MO" => MIDWEST,
|
38
|
+
"MS" => SOUTH,
|
39
|
+
"MT" => WEST,
|
40
|
+
"NC" => SOUTH,
|
41
|
+
"ND" => MIDWEST,
|
42
|
+
"NE" => MIDWEST,
|
43
|
+
"NH" => NORTHEAST,
|
44
|
+
"NJ" => NORTHEAST,
|
45
|
+
"NM" => WEST,
|
46
|
+
"NV" => WEST,
|
47
|
+
"NY" => NORTHEAST,
|
48
|
+
"OH" => MIDWEST,
|
49
|
+
"OK" => MIDWEST,
|
50
|
+
"OR" => WEST,
|
51
|
+
"PA" => NORTHEAST,
|
52
|
+
"RI" => NORTHEAST,
|
53
|
+
"SC" => SOUTH,
|
54
|
+
"SD" => MIDWEST,
|
55
|
+
"TN" => SOUTH,
|
56
|
+
"TX" => SOUTH,
|
57
|
+
"UT" => WEST,
|
58
|
+
"VA" => SOUTH,
|
59
|
+
"VT" => NORTHEAST,
|
60
|
+
"WA" => WEST,
|
61
|
+
"WI" => MIDWEST,
|
62
|
+
"WV" => SOUTH,
|
63
|
+
"WY" => WEST }
|
64
|
+
|
65
|
+
@names = {"alaska"=>"AK", "alabama"=>"AL", "arkansas"=>"AR", "american samoa"=>"AS", "arizona"=>"AZ", "california"=>"CA", "colorado"=>"CO", "connecticut"=>"CT", "district of columbia"=>"DC", "delaware"=>"DE", "florida"=>"FL", "georgia"=>"GA", "guam"=>"GU", "hawaii"=>"HI", "iowa"=>"IA", "idaho"=>"ID", "illinois"=>"IL", "indiana"=>"IN", "kansas"=>"KS", "kentucky"=>"KY", "louisiana"=>"LA", "massachusetts"=>"MA", "maryland"=>"MD", "maine"=>"ME", "michigan"=>"MI", "minnesota"=>"MN", "missouri"=>"MO", "northern mariana islands"=>"MP", "mississippi"=>"MS", "montana"=>"MT", "north carolina"=>"NC", "north dakota"=>"ND", "nebraska"=>"NE", "new hampshire"=>"NH", "new jersey"=>"NJ", "new mexico"=>"NM", "nevada"=>"NV", "new york"=>"NY", "ohio"=>"OH", "oklahoma"=>"OK", "oregon"=>"OR", "pennsylvania"=>"PA", "puerto rico"=>"PR", "rhode island"=>"RI", "south carolina"=>"SC", "south dakota"=>"SD", "tennessee"=>"TN", "texas"=>"TX", "united states minor outlying islands"=>"UM", "utah"=>"UT", "virginia"=>"VA", "virgin islands"=>"VI", "vermont"=>"VT", "washington"=>"WA", "wisconsin"=>"WI", "west virginia"=>"WV", "wyoming"=>"WY"}
|
66
|
+
|
67
|
+
@regional_coords = {
|
68
|
+
SOUTH => {
|
69
|
+
:top => 37,
|
70
|
+
:bottom => 30,
|
71
|
+
:right => 75,
|
72
|
+
:left => 100
|
73
|
+
},
|
74
|
+
WEST => {
|
75
|
+
:top => 48,
|
76
|
+
:bottom => 32,
|
77
|
+
:right => 103,
|
78
|
+
:left => 125
|
79
|
+
},
|
80
|
+
MIDWEST => {
|
81
|
+
:top => 48,
|
82
|
+
:bottom => 30,
|
83
|
+
:right => 80,
|
84
|
+
:left => 35
|
85
|
+
},
|
86
|
+
NORTHEAST => {
|
87
|
+
:top => 48,
|
88
|
+
:bottom => 37,
|
89
|
+
:right => 65,
|
90
|
+
:left => 80
|
91
|
+
},
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
def classify(opts = {:lat => nil, :long => nil, :text => nil})
|
96
|
+
if opts[:lat] && opts[:long]
|
97
|
+
for region in REGIONS
|
98
|
+
if opts[:long] >= @regional_coords[region][:right] &&
|
99
|
+
opts[:long] <= @regional_coords[region][:left] &&
|
100
|
+
opts[:lat] >= @regional_coords[region][:bottom] &&
|
101
|
+
opts[:lat] <= @regional_coords[region][:top]
|
102
|
+
return region
|
103
|
+
end
|
104
|
+
end
|
105
|
+
return nil
|
106
|
+
end
|
107
|
+
|
108
|
+
unless !opts[:text]
|
109
|
+
chunks = opts[:text].split(" ")
|
110
|
+
for word in chunks
|
111
|
+
cleanword = clean_word(word.downcase)
|
112
|
+
|
113
|
+
state = cleanword.upcase if @codes[cleanword.upcase]
|
114
|
+
state = @names[cleanword] if !state && @names[cleanword]
|
115
|
+
|
116
|
+
return @codes[state] if state
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
protected
|
122
|
+
def clean_word(word)
|
123
|
+
word.downcase.strip.gsub(/[^a-z]/, '')
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
data/lib/classyfier/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: classyfier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-16 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Simple Ruby Classifier Utilities
|
15
15
|
email:
|
@@ -28,6 +28,7 @@ files:
|
|
28
28
|
- lib/classyfier/english.rb
|
29
29
|
- lib/classyfier/gender.rb
|
30
30
|
- lib/classyfier/naive_bayes.rb
|
31
|
+
- lib/classyfier/us_regions.rb
|
31
32
|
- lib/classyfier/version.rb
|
32
33
|
- lib/data/census_female_1990.dat
|
33
34
|
- lib/data/census_male_1990.dat
|