classyfier 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/lib/classyfier.rb +2 -1
- data/lib/classyfier/us_regions.rb +127 -0
- data/lib/classyfier/version.rb +1 -1
- metadata +3 -2
data/Gemfile.lock
CHANGED
data/lib/classyfier.rb
CHANGED
@@ -0,0 +1,127 @@
|
|
1
|
+
module Classyfier
|
2
|
+
module Classifiers
|
3
|
+
class USRegions
|
4
|
+
SOUTH = "south"
|
5
|
+
WEST = "west"
|
6
|
+
MIDWEST = "midwest"
|
7
|
+
NORTHEAST = "northeast"
|
8
|
+
|
9
|
+
REGIONS = [SOUTH, WEST, MIDWEST, NORTHEAST]
|
10
|
+
|
11
|
+
def initialize()
|
12
|
+
@codes = {
|
13
|
+
"AK" => WEST,
|
14
|
+
"AL" => SOUTH,
|
15
|
+
"AR" => SOUTH,
|
16
|
+
"AZ" => WEST,
|
17
|
+
"CA" => WEST,
|
18
|
+
"CO" => WEST,
|
19
|
+
"CT" => NORTHEAST,
|
20
|
+
"DC" => NORTHEAST,
|
21
|
+
"DE" => NORTHEAST,
|
22
|
+
"FL" => SOUTH,
|
23
|
+
"GA" => SOUTH,
|
24
|
+
"HI" => nil,
|
25
|
+
"IA" => MIDWEST,
|
26
|
+
"ID" => WEST,
|
27
|
+
"IL" => MIDWEST,
|
28
|
+
"IN" => MIDWEST,
|
29
|
+
"KS" => MIDWEST,
|
30
|
+
"KY" => SOUTH,
|
31
|
+
"LA" => SOUTH,
|
32
|
+
"MA" => NORTHEAST,
|
33
|
+
"MD" => NORTHEAST,
|
34
|
+
"ME" => NORTHEAST,
|
35
|
+
"MI" => MIDWEST,
|
36
|
+
"MN" => MIDWEST,
|
37
|
+
"MO" => MIDWEST,
|
38
|
+
"MS" => SOUTH,
|
39
|
+
"MT" => WEST,
|
40
|
+
"NC" => SOUTH,
|
41
|
+
"ND" => MIDWEST,
|
42
|
+
"NE" => MIDWEST,
|
43
|
+
"NH" => NORTHEAST,
|
44
|
+
"NJ" => NORTHEAST,
|
45
|
+
"NM" => WEST,
|
46
|
+
"NV" => WEST,
|
47
|
+
"NY" => NORTHEAST,
|
48
|
+
"OH" => MIDWEST,
|
49
|
+
"OK" => MIDWEST,
|
50
|
+
"OR" => WEST,
|
51
|
+
"PA" => NORTHEAST,
|
52
|
+
"RI" => NORTHEAST,
|
53
|
+
"SC" => SOUTH,
|
54
|
+
"SD" => MIDWEST,
|
55
|
+
"TN" => SOUTH,
|
56
|
+
"TX" => SOUTH,
|
57
|
+
"UT" => WEST,
|
58
|
+
"VA" => SOUTH,
|
59
|
+
"VT" => NORTHEAST,
|
60
|
+
"WA" => WEST,
|
61
|
+
"WI" => MIDWEST,
|
62
|
+
"WV" => SOUTH,
|
63
|
+
"WY" => WEST }
|
64
|
+
|
65
|
+
@names = {"alaska"=>"AK", "alabama"=>"AL", "arkansas"=>"AR", "american samoa"=>"AS", "arizona"=>"AZ", "california"=>"CA", "colorado"=>"CO", "connecticut"=>"CT", "district of columbia"=>"DC", "delaware"=>"DE", "florida"=>"FL", "georgia"=>"GA", "guam"=>"GU", "hawaii"=>"HI", "iowa"=>"IA", "idaho"=>"ID", "illinois"=>"IL", "indiana"=>"IN", "kansas"=>"KS", "kentucky"=>"KY", "louisiana"=>"LA", "massachusetts"=>"MA", "maryland"=>"MD", "maine"=>"ME", "michigan"=>"MI", "minnesota"=>"MN", "missouri"=>"MO", "northern mariana islands"=>"MP", "mississippi"=>"MS", "montana"=>"MT", "north carolina"=>"NC", "north dakota"=>"ND", "nebraska"=>"NE", "new hampshire"=>"NH", "new jersey"=>"NJ", "new mexico"=>"NM", "nevada"=>"NV", "new york"=>"NY", "ohio"=>"OH", "oklahoma"=>"OK", "oregon"=>"OR", "pennsylvania"=>"PA", "puerto rico"=>"PR", "rhode island"=>"RI", "south carolina"=>"SC", "south dakota"=>"SD", "tennessee"=>"TN", "texas"=>"TX", "united states minor outlying islands"=>"UM", "utah"=>"UT", "virginia"=>"VA", "virgin islands"=>"VI", "vermont"=>"VT", "washington"=>"WA", "wisconsin"=>"WI", "west virginia"=>"WV", "wyoming"=>"WY"}
|
66
|
+
|
67
|
+
@regional_coords = {
|
68
|
+
SOUTH => {
|
69
|
+
:top => 37,
|
70
|
+
:bottom => 30,
|
71
|
+
:right => 75,
|
72
|
+
:left => 100
|
73
|
+
},
|
74
|
+
WEST => {
|
75
|
+
:top => 48,
|
76
|
+
:bottom => 32,
|
77
|
+
:right => 103,
|
78
|
+
:left => 125
|
79
|
+
},
|
80
|
+
MIDWEST => {
|
81
|
+
:top => 48,
|
82
|
+
:bottom => 30,
|
83
|
+
:right => 80,
|
84
|
+
:left => 35
|
85
|
+
},
|
86
|
+
NORTHEAST => {
|
87
|
+
:top => 48,
|
88
|
+
:bottom => 37,
|
89
|
+
:right => 65,
|
90
|
+
:left => 80
|
91
|
+
},
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
def classify(opts = {:lat => nil, :long => nil, :text => nil})
|
96
|
+
if opts[:lat] && opts[:long]
|
97
|
+
for region in REGIONS
|
98
|
+
if opts[:long] >= @regional_coords[region][:right] &&
|
99
|
+
opts[:long] <= @regional_coords[region][:left] &&
|
100
|
+
opts[:lat] >= @regional_coords[region][:bottom] &&
|
101
|
+
opts[:lat] <= @regional_coords[region][:top]
|
102
|
+
return region
|
103
|
+
end
|
104
|
+
end
|
105
|
+
return nil
|
106
|
+
end
|
107
|
+
|
108
|
+
unless !opts[:text]
|
109
|
+
chunks = opts[:text].split(" ")
|
110
|
+
for word in chunks
|
111
|
+
cleanword = clean_word(word.downcase)
|
112
|
+
|
113
|
+
state = cleanword.upcase if @codes[cleanword.upcase]
|
114
|
+
state = @names[cleanword] if !state && @names[cleanword]
|
115
|
+
|
116
|
+
return @codes[state] if state
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
protected
|
122
|
+
def clean_word(word)
|
123
|
+
word.downcase.strip.gsub(/[^a-z]/, '')
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
data/lib/classyfier/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: classyfier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-16 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Simple Ruby Classifier Utilities
|
15
15
|
email:
|
@@ -28,6 +28,7 @@ files:
|
|
28
28
|
- lib/classyfier/english.rb
|
29
29
|
- lib/classyfier/gender.rb
|
30
30
|
- lib/classyfier/naive_bayes.rb
|
31
|
+
- lib/classyfier/us_regions.rb
|
31
32
|
- lib/classyfier/version.rb
|
32
33
|
- lib/data/census_female_1990.dat
|
33
34
|
- lib/data/census_male_1990.dat
|