geomash 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,145 @@
1
+ module Geomash
2
+ class Geonames
3
+
4
+ def self.geonames_username
5
+ Geomash.config[:geonames_username] || '<username>'
6
+ end
7
+
8
+ def self.get_geonames_data(geoname_id)
9
+ max_retry = 3
10
+ sleep_time = 60 # In seconds
11
+ retry_count = 0
12
+
13
+ hier_geo = {}
14
+ coords = {}
15
+ geonames_data = {}
16
+
17
+ begin
18
+ if retry_count > 0
19
+ sleep(sleep_time)
20
+ end
21
+ retry_count = retry_count + 1
22
+
23
+ geonames_response = Typhoeus::Request.get("http://api.geonames.org/hierarchy?username=#{self.geonames_username}&lang=en&style=FULL&geonameId=" + geoname_id)
24
+
25
+ end until (geonames_response.code != 500 || retry_count == max_retry)
26
+
27
+ unless geonames_response.code == 500
28
+ parsed_xml = Nokogiri::Slop(geonames_response.body)
29
+
30
+ parsed_xml.geonames.geoname.each do |geoname|
31
+ hier_geo[geoname.fcode.text.downcase.to_sym] = geoname.toponymName.text
32
+ end
33
+
34
+ #FIXME: Code4Lib lazy implementation... will get last result
35
+ geoname = parsed_xml.geonames.geoname.last
36
+ coords[:latitude] = geoname.lat.text
37
+ coords[:longitude] = geoname.lng.text
38
+ coords[:combined] = coords[:latitude] + ',' + coords[:longitude]
39
+ #FIXME: Will be corrected as part of Geomash rename later this week.
40
+ begin
41
+ coords[:box] = {}
42
+ coords[:box][:west] = geoname.bbox.west.text
43
+ coords[:box][:north] = geoname.bbox.north.text
44
+ coords[:box][:east] = geoname.bbox.east.text
45
+ coords[:box][:south] = geoname.bbox.south.text
46
+ rescue
47
+ coords[:box] = {}
48
+ end
49
+
50
+ geonames_data[:coords] = coords
51
+ geonames_data[:hier_geo] = hier_geo.present? ? hier_geo : nil
52
+ end
53
+
54
+ return geonames_data
55
+ end
56
+
57
+
58
+ def self.geonames_id_from_geo_hash(geo_hash)
59
+ return nil if Geomash::Geonames.geonames_username == '<username>'
60
+ geo_hash = geo_hash.clone
61
+
62
+ max_retry = 3
63
+ sleep_time = 60 # In seconds
64
+ retry_count = 0
65
+
66
+ geonames_search_array = []
67
+ return_hash = {}
68
+
69
+ #Don't do both neighborhood and city!
70
+ if geo_hash[:neighborhood_part].present?
71
+ geonames_search_array << geo_hash[:neighborhood_part]
72
+ exact_name_term = geo_hash[:neighborhood_part]
73
+ elsif geo_hash[:city_part].present?
74
+ geonames_search_array << geo_hash[:city_part]
75
+ exact_name_term = geo_hash[:neighborhood_part]
76
+ end
77
+
78
+ geonames_search_array << geo_hash[:state_part] if geo_hash[:state_part].present?
79
+ exact_name_term ||= geo_hash[:neighborhood_part]
80
+ geonames_search_array << geo_hash[:country_part] if geo_hash[:country_part].present?
81
+ exact_name_term ||= geo_hash[:country_part]
82
+ geonames_search_string = geonames_search_array.join(', ')
83
+
84
+ exact_name_term = geonames_search_array.first.strip
85
+
86
+ begin
87
+ if retry_count > 0
88
+ sleep(sleep_time)
89
+ end
90
+ retry_count = retry_count + 1
91
+
92
+ geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q=#{CGI.escape(geonames_search_string)}&name_equals=#{CGI.escape(exact_name_term)}&country=#{Country.find_country_by_name(geo_hash[:country_part]).alpha2}")
93
+
94
+ end until (geonames_response.code != 500 || retry_count == max_retry)
95
+
96
+ unless geonames_response.code == 500
97
+
98
+ parsed_xml = Nokogiri::Slop(geonames_response.body)
99
+
100
+ begin
101
+ raise "geonames status error message of: #{parsed_xml.to_s}" if parsed_xml.geonames.status
102
+ rescue
103
+ #Do nothing but FIXME to not use slop
104
+ end
105
+
106
+ #This is ugly and needs to be redone to achieve better recursive...
107
+ if parsed_xml.geonames.totalResultsCount.text == '0'
108
+ if geo_hash[:neighborhood_part].present?
109
+ geo_hash_temp = geo_hash.clone
110
+ geo_hash_temp[:neighborhood_part] = nil
111
+ return_hash = geonames_id_from_geo_hash(geo_hash_temp)
112
+ return return_hash if return_hash.present?
113
+ elsif geo_hash[:city_part].present?
114
+ geo_hash_temp = geo_hash.clone
115
+ geo_hash_temp[:city_part] = nil
116
+ return_hash = geonames_id_from_geo_hash(geo_hash_temp)
117
+ return return_hash if return_hash.present?
118
+ end
119
+
120
+ return nil
121
+ end
122
+
123
+ #Exact Match ... FIXME to not use Slop
124
+ if parsed_xml.geonames.geoname.class == Nokogiri::XML::Element
125
+ return_hash[:id] = parsed_xml.geonames.geoname.geonameId.text
126
+ return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
127
+ elsif parsed_xml.geonames.geoname.class ==Nokogiri::XML::NodeSet
128
+ return_hash[:id] = parsed_xml.geonames.geoname.first.geonameId.text
129
+ return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
130
+ end
131
+ return_hash[:original_string_differs] = Geomash::Standardizer.parsed_and_original_check(geo_hash)
132
+
133
+ end
134
+
135
+ if geonames_response.code == 500
136
+ raise 'Geonames Server appears to not be responding for Geographic query: ' + term
137
+ end
138
+
139
+ return return_hash if return_hash.present?
140
+
141
+ return nil
142
+
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,220 @@
1
+ module Geomash
2
+ class Parser
3
+
4
+ def self.mapquest_key
5
+ Geomash.config[:mapquest_key] || '<mapquest_key>'
6
+ end
7
+
8
+ def self.bing_key
9
+ Geomash.config[:bing_key] || '<bing_key>'
10
+ end
11
+
12
+ def self.timeout
13
+ Geomash.config[:timeout]
14
+ end
15
+
16
+ #Note: Limited to only looking at United States places...
17
+ def self.parse_bing_api(term, parse_term_flag=false)
18
+ return_hash = {}
19
+ retry_count = 3
20
+
21
+ #Skip if no bing_key... possibly move this elsewhere?
22
+ return return_hash if self.bing_key == '<bing_key>'
23
+
24
+ return_hash[:original_term] = term
25
+
26
+ term = Geomash::Standardizer.parse_for_geographic_term(term) if parse_term_flag
27
+ term = Geomash::Standardizer.standardize_geographic_term(term)
28
+
29
+ if term.blank?
30
+ return {}
31
+ end
32
+
33
+ return_hash[:standardized_term] = term
34
+
35
+ #Bing API does badly with parentheses...
36
+ if term.match(/[\(\)]+/)
37
+ return {}
38
+ end
39
+
40
+ #Sometimes with building, city, state, bing is dumb and will only return state. Example: Boston Harbor, Boston, Mass.
41
+ #So if not a street address, pass to have google handle it for better results...
42
+ #Example of another bad record: South Street bridge, West Bridgewater, Mass. would give a place in Holyoke
43
+ if term.split(',').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
44
+ return {}
45
+ end
46
+
47
+ Geocoder.configure(:lookup => :bing,:api_key => self.bing_key,:timeout => self.timeout, :always_raise => :all)
48
+ bing_api_result = Geocoder.search(term)
49
+
50
+ rescue SocketError => e
51
+ retry unless (retry_count -= 1).zero?
52
+ else
53
+
54
+ #Use only for United States results... international results are inaccurate.
55
+ if bing_api_result.present? && bing_api_result.first.data["address"]["countryRegion"] == 'United States'
56
+
57
+ if bing_api_result.first.data["entityType"] == 'Neighborhood'
58
+ return {} #Doesn't return a city... Google handles this better.
59
+ end
60
+
61
+ if bing_api_result.first.data["address"]["addressLine"].present?
62
+ return_hash[:term_differs_from_tgn] = true
63
+ return_hash[:street_part] = bing_api_result.first.data["address"]["addressLine"]
64
+ return_hash[:coords] = {:latitude=>bing_api_result.first.data["geocodePoints"].first["coordinates"].first.to_s,
65
+ :longitude=>bing_api_result.first.data["geocodePoints"].first["coordinates"].last.to_s,
66
+ :combined=>bing_api_result.first.data["geocodePoints"].first["coordinates"].first.to_s + ',' + bing_api_result.first.data["geocodePoints"].first["coordinates"].last.to_s}
67
+ end
68
+
69
+ return_hash[:country_part] = bing_api_result.first.data["address"]["countryRegion"]
70
+
71
+ if return_hash[:country_part] == 'United States'
72
+ return_hash[:state_part] = Geomash::Constants::STATE_ABBR[bing_api_result.first.data["address"]["adminDistrict"]]
73
+ else
74
+ return_hash[:state_part] = bing_api_result.first.data["address"]["adminDistrict"]
75
+ end
76
+
77
+ return_hash[:city_part] = bing_api_result.first.data["address"]["locality"]
78
+ else
79
+ return {}
80
+ end
81
+
82
+ #Only return if USA for now. International results often awful.
83
+ return return_hash[:country_part] == 'United States' ? return_hash : {}
84
+ end
85
+
86
+ #Mapquest allows unlimited requests - start here?
87
+ def self.parse_mapquest_api(term, parse_term_flag=false)
88
+ return_hash = {}
89
+ retry_count = 3
90
+
91
+ #Skip if no bing_key... possibly move this elsewhere?
92
+ return return_hash if self.mapquest_key == '<mapquest_key>'
93
+
94
+ return_hash[:original_term] = term
95
+
96
+ term = Geomash::Standardizer.parse_for_geographic_term(term) if parse_term_flag
97
+ term = Geomash::Standardizer.standardize_geographic_term(term)
98
+
99
+ if term.blank?
100
+ return {}
101
+ end
102
+
103
+ return_hash[:standardized_term] = term
104
+
105
+ #Mapquest returns bad data for: Manchester, Mass.
106
+ if term.include?('Manchester') || term.include?('Atlanta, MI')
107
+ return {}
108
+ end
109
+
110
+ #Messed up with just neighborhoods. Example: Hyde Park (Boston, Mass.) or Hyde Park (Boston, Mass.)
111
+ #So if not a street address, pass to have google handle it for better results...
112
+ if term.split(',').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
113
+ return {}
114
+ end
115
+
116
+ Geocoder.configure(:lookup => :mapquest,:api_key => self.mapquest_key,:timeout => self.timeout, :always_raise => :all)
117
+
118
+ mapquest_api_result = Geocoder.search(term)
119
+ rescue SocketError => e
120
+ retry unless (retry_count -= 1).zero?
121
+ else
122
+
123
+
124
+ #If this call returned a result...
125
+ if mapquest_api_result.present?
126
+
127
+ if mapquest_api_result.first.data["street"].present?
128
+ #return_hash[:term_differs_from_tgn] = true
129
+ return_hash[:street_part] = mapquest_api_result.first.data["street"]
130
+ return_hash[:coords] = {:latitude=>mapquest_api_result.first.data['latLng']['lat'].to_s,
131
+ :longitude=>mapquest_api_result.first.data['latLng']['lng'].to_s,
132
+ :combined=>mapquest_api_result.first.data['latLng']['lat'].to_s + ',' + mapquest_api_result.first.data['latLng']['lng'].to_s}
133
+ end
134
+
135
+ return_hash[:country_part] = Country.new(mapquest_api_result.first.data["adminArea1"]).name
136
+
137
+ if return_hash[:country_part] == 'United States'
138
+ return_hash[:state_part] = Geomash::Constants::STATE_ABBR[mapquest_api_result.first.data["adminArea3"]] || mapquest_api_result.first.data["adminArea4"]
139
+ else
140
+ return_hash[:state_part] = mapquest_api_result.first.data["adminArea3"].gsub(' province', '')
141
+ end
142
+
143
+ return_hash[:city_part] = mapquest_api_result.first.data["adminArea5"]
144
+
145
+ return_hash[:city_part] = return_hash[:city_part].gsub(' City', '') #Return New York as New York City...
146
+ end
147
+
148
+ #Only return if USA for now. Google is better with stuff like: 'Long Binh, Vietnam'
149
+ #Also only return if there is a city if there were more than two terms passed in. Fixes: Roxbury, MA
150
+ return {} unless return_hash[:country_part] == 'United States'
151
+ return {} if term.split(',').length >= 2 && return_hash[:city_part].blank?
152
+
153
+ return return_hash
154
+ end
155
+
156
+ #Final fallback is google API. The best but we are limited to 2500 requests per day unless we pay the $10k a year premium account...
157
+ #Note: If google cannot find street, it will return just city/state, like for "Salem Street and Paradise Road, Swampscott, MA, 01907"
158
+ #Seems like it sets a partial_match=>true in the data section...
159
+ def self.parse_google_api(term, parse_term_flag=false)
160
+ return_hash = {}
161
+ retry_count = 3
162
+
163
+ return_hash[:original_term] = term
164
+
165
+ term = Geomash::Standardizer.parse_for_geographic_term(term) if parse_term_flag
166
+ term = Geomash::Standardizer.standardize_geographic_term(term)
167
+
168
+ #Soviet Union returns back a place in Kazakhstan
169
+ if term.blank? || term == 'Soviet Union'
170
+ return {}
171
+ end
172
+
173
+ return_hash[:standardized_term] = term
174
+
175
+ ::Geocoder.configure(:lookup => :google,:api_key => nil,:timeout => self.timeout, :always_raise => :all)
176
+
177
+ google_api_result = ::Geocoder.search(term)
178
+ rescue SocketError => e
179
+ retry unless (retry_count -= 1).zero?
180
+ else
181
+
182
+
183
+ #Check if only a partial match. To avoid errors, strip out the first part and try again...
184
+ #Need better way to check for street endings. See: http://pe.usps.gov/text/pub28/28apc_002.htm
185
+ if google_api_result.present?
186
+ if google_api_result.first.data['partial_match'] && term.split(',').length > 1 && !term.downcase.include?('street') && !term.downcase.include?('st.') && !term.downcase.include?('avenue') && !term.downcase.include?('ave.') && !term.downcase.include?('court') && !term.downcase.include?('dr.')
187
+ term = term.split(',')[1..term.split(',').length-1].join(',').strip
188
+ google_api_result = Geocoder.search(term)
189
+ end
190
+ end
191
+
192
+ if google_api_result.present?
193
+ #Types: street number, route, neighborhood, establishment, transit_station, bus_station
194
+ google_api_result.first.data["address_components"].each do |result|
195
+ if (result['types'] & ['street number', 'route', 'establishment', 'transit_station', 'bus_station']).present? || (result['types'].include?('neighborhood') && !result['types'].include?('political'))
196
+ #return_hash[:term_differs_from_tgn] = true
197
+ #TODO: Not implemented for Google results right now.
198
+ #return_hash[:street_part] = 'TODO: Not Implemented for Google Results'
199
+ return_hash[:coords] = {:latitude=>google_api_result.first.data['geometry']['location']['lat'].to_s,
200
+ :longitude=>google_api_result.first.data['geometry']['location']['lng'].to_s,
201
+ :combined=>google_api_result.first.data['geometry']['location']['lat'].to_s + ',' + google_api_result.first.data['geometry']['location']['lng'].to_s}
202
+ elsif (result['types'] & ['country']).present?
203
+ return_hash[:country_part] = result['long_name']
204
+ elsif (result['types'] & ['administrative_area_level_1']).present?
205
+ return_hash[:state_part] = result['long_name'].to_ascii
206
+ elsif (result['types'] & ['locality']).present?
207
+ return_hash[:city_part] = result['long_name']
208
+ elsif (result['types'] & ['sublocality', 'political']).length == 2 || result['types'].include?('neighborhood')
209
+ return_hash[:neighborhood_part] = result['long_name']
210
+ end
211
+ end
212
+
213
+ return_hash[:term_differs_from_tgn] ||= google_api_result.first.data['partial_match'] unless google_api_result.first.data['partial_match'].blank?
214
+ end
215
+
216
+
217
+ return return_hash
218
+ end
219
+ end
220
+ end
@@ -0,0 +1,250 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Geomash
3
+ class Standardizer
4
+
5
+ #Take a subject string and look for potential geographic terms.
6
+ def self.parse_for_geographic_term(term)
7
+ geo_term = ''
8
+
9
+ #Likely too long to be an address... some fields have junk with an address string...
10
+ if term.length > 125
11
+ return ''
12
+ end
13
+
14
+ state_abbr_list = ['Mass']
15
+ state_name_list = []
16
+ country_name_list = []
17
+
18
+ #Countries gem of https://github.com/hexorx/countries
19
+ Country.new('US').states.each do |state_abbr, state_names|
20
+ state_abbr_list << ' ' + state_abbr
21
+ state_name_list << state_names["name"]
22
+ end
23
+
24
+ Country.all.each do |country_name_abbr_pair|
25
+ country_name_list << country_name_abbr_pair.first
26
+ end
27
+
28
+ #Parsing a subject geographic term.
29
+ if term.include?('--')
30
+ term.split('--').each_with_index do |split_term, index|
31
+ if state_name_list.any? { |state| split_term.include? state } || country_name_list.any? { |country| split_term.include? country }
32
+ geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
33
+ elsif state_abbr_list.any? { |abbr| split_term.include? abbr }
34
+ geo_term = split_term
35
+ end
36
+ end
37
+ #Other than a '--' field
38
+ #Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
39
+ elsif term.include?(' - ')
40
+ term.split(' - ').each do |split_term|
41
+ if state_name_list.any? { |state| split_term.include? state } || state_abbr_list.any? { |abbr| split_term.include? abbr } || country_name_list.any? { |country| split_term.include? country }
42
+ geo_term = split_term
43
+ end
44
+
45
+ end
46
+ else
47
+ if state_name_list.any? { |state| term.include? state } || state_abbr_list.any? { |abbr| term.include? abbr } || country_name_list.any? { |country| term.include? country }
48
+ geo_term = term
49
+ end
50
+ end
51
+
52
+ return geo_term
53
+ end
54
+
55
+ #Make a string in a standard format.
56
+ def self.standardize_geographic_term(geo_term)
57
+
58
+ geo_term = geo_term.clone #Don't change original
59
+
60
+ #Remove common junk terms
61
+ Geomash::Constants::JUNK_TERMS.each { |term| geo_term.gsub!(term, '') }
62
+
63
+ #Strip any leading periods or commas from junk terms
64
+ geo_term = geo_term.gsub(/^[\.,]+/, '').strip
65
+
66
+ #Replace any four TGN dashes from removing a junk term
67
+ geo_term = geo_term.gsub('----', '--')
68
+
69
+ #Replace any semicolons with commas... possible strip them?
70
+ geo_term = geo_term.gsub(';', ',')
71
+
72
+ #Terms in paranthesis will cause some geographic parsers to freak out. Switch to commas instead.
73
+ if geo_term.match(/[\(\)]+/)
74
+ #Attempt to fix address if something like (word)
75
+ if geo_term.match(/ \(+.*\)+/)
76
+ #Make this replacement better?
77
+ geo_term = geo_term.gsub(/ *\((?=[\S ]+\))/,', ')
78
+ geo_term = geo_term.gsub(')', '')
79
+
80
+ #Else skip this as data returned likely will be unreliable for now... FIXME when use case occurs.
81
+ else
82
+ return nil
83
+ end
84
+ end
85
+
86
+ return geo_term
87
+ end
88
+
89
+ #Attempt to dedup a list of geographic areas.
90
+ #FIXME: Horrendous first pass.
91
+ #Aggresive flag removes less specific matches. IE. ['Hanoi, Vietnam' and 'Vietnam'] would return just ['Hanoi, Vietnam']
92
+ def self.dedup_geo(geo_list, aggressive=false)
93
+ geo_list = geo_list.clone
94
+
95
+ base_word_geo_list = []
96
+ geo_list.each do |geo_term|
97
+ geo_term = geo_term.gsub('(','').gsub(')','').gsub('.','').gsub(',','').gsub(';','')
98
+ #Remove common junk terms
99
+ Geomash::Constants::JUNK_TERMS.each { |term| geo_term.gsub!(term, '') }
100
+
101
+ geo_term = geo_term.squish
102
+
103
+ base_word_geo_list << geo_term
104
+ end
105
+
106
+ indexes_to_remove = []
107
+
108
+ 0.upto base_word_geo_list.size-1 do |index|
109
+ matched_words_count = []
110
+ current_best_term = geo_list[index]
111
+ current_best_term_index = index
112
+
113
+ base_word_geo_list[index].split(' ').each { |word|
114
+
115
+ (index+1).upto base_word_geo_list.size-1 do |inner_index|
116
+ if base_word_geo_list[inner_index].split(' ').any? { |single_word| single_word == word }
117
+ matched_words_count[inner_index] ||= 0
118
+ matched_words_count[inner_index] = matched_words_count[inner_index] + 1
119
+
120
+ end
121
+ end
122
+ }
123
+
124
+ matched_words_count.each_with_index do |matched_count, matched_index|
125
+ matched_count ||= 0
126
+
127
+ if (matched_count == base_word_geo_list[matched_index].split(' ').size) && ((base_word_geo_list[matched_index].split(' ').size < base_word_geo_list[index].split(' ').size && aggressive) || (base_word_geo_list[matched_index].split(' ').size == base_word_geo_list[index].split(' ').size))
128
+ if current_best_term.split(',').size < geo_list[matched_index].split(',').size || (current_best_term.size+1 < geo_list[matched_index].size && !geo_list[matched_index].include?('('))
129
+ current_best_term = geo_list[matched_index]
130
+ indexes_to_remove << current_best_term_index
131
+ current_best_term_index = matched_index
132
+ else
133
+ indexes_to_remove << matched_index
134
+ end
135
+ end
136
+
137
+ end
138
+ end
139
+
140
+ indexes_to_remove.each do |removal_index|
141
+ geo_list[removal_index] = nil
142
+ end
143
+
144
+ return geo_list.compact
145
+ end
146
+
147
+ def self.parsed_and_original_check(geo_hash)
148
+ term = geo_hash[:standardized_term]
149
+
150
+ if geo_hash[:street_part].present? || geo_hash[:coords].present?
151
+ return true
152
+ end
153
+
154
+ #Keep original string if three parts at least or if there is a number in the term.
155
+ #TODO: Make this better!
156
+ if (term.split(',').length >= 3 && geo_hash[:neighborhood_part].blank?) || (term.split(',').length >= 2 && geo_hash[:city_part].blank?) || term.split(',').length >= 4 || term.match(/\d/).present?
157
+ return true
158
+ end
159
+
160
+ if geo_hash[:country_part] != 'United States'
161
+ if geo_hash[:city_part].blank? && geo_hash[:state_part].blank?
162
+ #Currently do noting
163
+ elsif !((geo_hash[:city_part].present? && term.to_ascii.downcase.include?(geo_hash[:city_part].to_ascii.downcase)) || (geo_hash[:state_part].present? && term.to_ascii.downcase.include?(geo_hash[:state_part].to_ascii.downcase)))
164
+ return true
165
+ end
166
+ end
167
+
168
+
169
+ return false
170
+ end
171
+
172
+
173
+
174
+ #Take LCSH subjects and make them standard.
175
+ def self.LCSHize(value)
176
+ #Remove ending periods ... except when an initial or etc.
177
+ if value.last == '.' && value[-2].match(/[^A-Z]/) && !value[-4..-1].match('etc.')
178
+ value = value.slice(0..-2)
179
+ end
180
+
181
+ #Fix when '- -' occurs
182
+ value = value.gsub(/-\s-/,'--')
183
+
184
+ #Fix for "em" dashes - two types?
185
+ value = value.gsub('—','--')
186
+
187
+ #Fix for "em" dashes - two types?
188
+ value = value.gsub('–','--')
189
+
190
+ #Fix for ' - ' combinations
191
+ value = value.gsub(' - ','--')
192
+
193
+ #Remove white space after and before '--'
194
+ value = value.gsub(/\s+--/,'--')
195
+ value = value.gsub(/--\s+/,'--')
196
+
197
+ #Ensure first work is capitalized
198
+ value[0] = value.first.capitalize[0]
199
+
200
+ #Strip any white space
201
+ value = strip_value(value)
202
+
203
+ return value
204
+ end
205
+
206
+ def self.strip_value(value)
207
+ if(value.blank?)
208
+ return nil
209
+ else
210
+ if value.class == Float || value.class == Fixnum
211
+ value = value.to_i.to_s
212
+ end
213
+
214
+ # Make sure it is all UTF-8 and not character encodings or HTML tags and remove any cariage returns
215
+ return utf8Encode(value)
216
+ end
217
+ end
218
+
219
+ #TODO: Better name for this. Should be part of an overall helped gem.
220
+ def self.utf8Encode(value)
221
+ return HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
222
+ end
223
+
224
+
225
+ def self.try_with_entered_names(geo_hash)
226
+ geo_hash_local = geo_hash.clone
227
+ if geo_hash_local[:neighborhood_part].present?
228
+ orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:neighborhood_part].downcase.to_ascii}
229
+ geo_hash_local[:neighborhood_part] = orig_string_check.first.strip if orig_string_check.present? && orig_string_check != geo_hash_local[:neighborhood_part]
230
+ return geo_hash_local
231
+ end
232
+
233
+ if geo_hash_local[:city_part].present?
234
+ orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:city_part].downcase.to_ascii}
235
+ geo_hash_local[:city_part] = orig_string_check.first.strip if orig_string_check.present?
236
+ return geo_hash_local
237
+ end
238
+
239
+
240
+ if geo_hash_local[:state_part].present?
241
+ orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:state_part].downcase.to_ascii}
242
+ geo_hash_local[:state_part] = orig_string_check.first.strip if orig_string_check.present?
243
+ return geo_hash_local
244
+ end
245
+
246
+ return nil
247
+ end
248
+
249
+ end
250
+ end