geomash 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,145 @@
1
+ module Geomash
2
+ class Geonames
3
+
4
+ def self.geonames_username
5
+ Geomash.config[:geonames_username] || '<username>'
6
+ end
7
+
8
+ def self.get_geonames_data(geoname_id)
9
+ max_retry = 3
10
+ sleep_time = 60 # In seconds
11
+ retry_count = 0
12
+
13
+ hier_geo = {}
14
+ coords = {}
15
+ geonames_data = {}
16
+
17
+ begin
18
+ if retry_count > 0
19
+ sleep(sleep_time)
20
+ end
21
+ retry_count = retry_count + 1
22
+
23
+ geonames_response = Typhoeus::Request.get("http://api.geonames.org/hierarchy?username=#{self.geonames_username}&lang=en&style=FULL&geonameId=" + geoname_id)
24
+
25
+ end until (geonames_response.code != 500 || retry_count == max_retry)
26
+
27
+ unless geonames_response.code == 500
28
+ parsed_xml = Nokogiri::Slop(geonames_response.body)
29
+
30
+ parsed_xml.geonames.geoname.each do |geoname|
31
+ hier_geo[geoname.fcode.text.downcase.to_sym] = geoname.toponymName.text
32
+ end
33
+
34
+ #FIXME: Code4Lib lazy implementation... will get last result
35
+ geoname = parsed_xml.geonames.geoname.last
36
+ coords[:latitude] = geoname.lat.text
37
+ coords[:longitude] = geoname.lng.text
38
+ coords[:combined] = coords[:latitude] + ',' + coords[:longitude]
39
+ #FIXME: Will be corrected as part of Geomash rename later this week.
40
+ begin
41
+ coords[:box] = {}
42
+ coords[:box][:west] = geoname.bbox.west.text
43
+ coords[:box][:north] = geoname.bbox.north.text
44
+ coords[:box][:east] = geoname.bbox.east.text
45
+ coords[:box][:south] = geoname.bbox.south.text
46
+ rescue
47
+ coords[:box] = {}
48
+ end
49
+
50
+ geonames_data[:coords] = coords
51
+ geonames_data[:hier_geo] = hier_geo.present? ? hier_geo : nil
52
+ end
53
+
54
+ return geonames_data
55
+ end
56
+
57
+
58
+ def self.geonames_id_from_geo_hash(geo_hash)
59
+ return nil if Geomash::Geonames.geonames_username == '<username>'
60
+ geo_hash = geo_hash.clone
61
+
62
+ max_retry = 3
63
+ sleep_time = 60 # In seconds
64
+ retry_count = 0
65
+
66
+ geonames_search_array = []
67
+ return_hash = {}
68
+
69
+ #Don't do both neighborhood and city!
70
+ if geo_hash[:neighborhood_part].present?
71
+ geonames_search_array << geo_hash[:neighborhood_part]
72
+ exact_name_term = geo_hash[:neighborhood_part]
73
+ elsif geo_hash[:city_part].present?
74
+ geonames_search_array << geo_hash[:city_part]
75
+ exact_name_term = geo_hash[:neighborhood_part]
76
+ end
77
+
78
+ geonames_search_array << geo_hash[:state_part] if geo_hash[:state_part].present?
79
+ exact_name_term ||= geo_hash[:neighborhood_part]
80
+ geonames_search_array << geo_hash[:country_part] if geo_hash[:country_part].present?
81
+ exact_name_term ||= geo_hash[:country_part]
82
+ geonames_search_string = geonames_search_array.join(', ')
83
+
84
+ exact_name_term = geonames_search_array.first.strip
85
+
86
+ begin
87
+ if retry_count > 0
88
+ sleep(sleep_time)
89
+ end
90
+ retry_count = retry_count + 1
91
+
92
+ geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q=#{CGI.escape(geonames_search_string)}&name_equals=#{CGI.escape(exact_name_term)}&country=#{Country.find_country_by_name(geo_hash[:country_part]).alpha2}")
93
+
94
+ end until (geonames_response.code != 500 || retry_count == max_retry)
95
+
96
+ unless geonames_response.code == 500
97
+
98
+ parsed_xml = Nokogiri::Slop(geonames_response.body)
99
+
100
+ begin
101
+ raise "geonames status error message of: #{parsed_xml.to_s}" if parsed_xml.geonames.status
102
+ rescue
103
+ #Do nothing but FIXME to not use slop
104
+ end
105
+
106
+ #This is ugly and needs to be redone to achieve better recursive...
107
+ if parsed_xml.geonames.totalResultsCount.text == '0'
108
+ if geo_hash[:neighborhood_part].present?
109
+ geo_hash_temp = geo_hash.clone
110
+ geo_hash_temp[:neighborhood_part] = nil
111
+ return_hash = geonames_id_from_geo_hash(geo_hash_temp)
112
+ return return_hash if return_hash.present?
113
+ elsif geo_hash[:city_part].present?
114
+ geo_hash_temp = geo_hash.clone
115
+ geo_hash_temp[:city_part] = nil
116
+ return_hash = geonames_id_from_geo_hash(geo_hash_temp)
117
+ return return_hash if return_hash.present?
118
+ end
119
+
120
+ return nil
121
+ end
122
+
123
+ #Exact Match ... FIXME to not use Slop
124
+ if parsed_xml.geonames.geoname.class == Nokogiri::XML::Element
125
+ return_hash[:id] = parsed_xml.geonames.geoname.geonameId.text
126
+ return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
127
+ elsif parsed_xml.geonames.geoname.class ==Nokogiri::XML::NodeSet
128
+ return_hash[:id] = parsed_xml.geonames.geoname.first.geonameId.text
129
+ return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
130
+ end
131
+ return_hash[:original_string_differs] = Geomash::Standardizer.parsed_and_original_check(geo_hash)
132
+
133
+ end
134
+
135
+ if geonames_response.code == 500
136
+ raise 'Geonames Server appears to not be responding for Geographic query: ' + term
137
+ end
138
+
139
+ return return_hash if return_hash.present?
140
+
141
+ return nil
142
+
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,220 @@
1
+ module Geomash
2
+ class Parser
3
+
4
+ def self.mapquest_key
5
+ Geomash.config[:mapquest_key] || '<mapquest_key>'
6
+ end
7
+
8
+ def self.bing_key
9
+ Geomash.config[:bing_key] || '<bing_key>'
10
+ end
11
+
12
+ def self.timeout
13
+ Geomash.config[:timeout]
14
+ end
15
+
16
+ #Note: Limited to only looking at United States places...
17
+ def self.parse_bing_api(term, parse_term_flag=false)
18
+ return_hash = {}
19
+ retry_count = 3
20
+
21
+ #Skip if no bing_key... possibly move this elsewhere?
22
+ return return_hash if self.bing_key == '<bing_key>'
23
+
24
+ return_hash[:original_term] = term
25
+
26
+ term = Geomash::Standardizer.parse_for_geographic_term(term) if parse_term_flag
27
+ term = Geomash::Standardizer.standardize_geographic_term(term)
28
+
29
+ if term.blank?
30
+ return {}
31
+ end
32
+
33
+ return_hash[:standardized_term] = term
34
+
35
+ #Bing API does badly with parentheses...
36
+ if term.match(/[\(\)]+/)
37
+ return {}
38
+ end
39
+
40
+ #Sometimes with building, city, state, bing is dumb and will only return state. Example: Boston Harbor, Boston, Mass.
41
+ #So if not a street address, pass to have google handle it for better results...
42
+ #Example of another bad record: South Street bridge, West Bridgewater, Mass. would give a place in Holyoke
43
+ if term.split(',').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
44
+ return {}
45
+ end
46
+
47
+ Geocoder.configure(:lookup => :bing,:api_key => self.bing_key,:timeout => self.timeout, :always_raise => :all)
48
+ bing_api_result = Geocoder.search(term)
49
+
50
+ rescue SocketError => e
51
+ retry unless (retry_count -= 1).zero?
52
+ else
53
+
54
+ #Use only for United States results... international results are inaccurate.
55
+ if bing_api_result.present? && bing_api_result.first.data["address"]["countryRegion"] == 'United States'
56
+
57
+ if bing_api_result.first.data["entityType"] == 'Neighborhood'
58
+ return {} #Doesn't return a city... Google handles this better.
59
+ end
60
+
61
+ if bing_api_result.first.data["address"]["addressLine"].present?
62
+ return_hash[:term_differs_from_tgn] = true
63
+ return_hash[:street_part] = bing_api_result.first.data["address"]["addressLine"]
64
+ return_hash[:coords] = {:latitude=>bing_api_result.first.data["geocodePoints"].first["coordinates"].first.to_s,
65
+ :longitude=>bing_api_result.first.data["geocodePoints"].first["coordinates"].last.to_s,
66
+ :combined=>bing_api_result.first.data["geocodePoints"].first["coordinates"].first.to_s + ',' + bing_api_result.first.data["geocodePoints"].first["coordinates"].last.to_s}
67
+ end
68
+
69
+ return_hash[:country_part] = bing_api_result.first.data["address"]["countryRegion"]
70
+
71
+ if return_hash[:country_part] == 'United States'
72
+ return_hash[:state_part] = Geomash::Constants::STATE_ABBR[bing_api_result.first.data["address"]["adminDistrict"]]
73
+ else
74
+ return_hash[:state_part] = bing_api_result.first.data["address"]["adminDistrict"]
75
+ end
76
+
77
+ return_hash[:city_part] = bing_api_result.first.data["address"]["locality"]
78
+ else
79
+ return {}
80
+ end
81
+
82
+ #Only return if USA for now. International results often awful.
83
+ return return_hash[:country_part] == 'United States' ? return_hash : {}
84
+ end
85
+
86
+ #Mapquest allows unlimited requests - start here?
87
+ def self.parse_mapquest_api(term, parse_term_flag=false)
88
+ return_hash = {}
89
+ retry_count = 3
90
+
91
+ #Skip if no bing_key... possibly move this elsewhere?
92
+ return return_hash if self.mapquest_key == '<mapquest_key>'
93
+
94
+ return_hash[:original_term] = term
95
+
96
+ term = Geomash::Standardizer.parse_for_geographic_term(term) if parse_term_flag
97
+ term = Geomash::Standardizer.standardize_geographic_term(term)
98
+
99
+ if term.blank?
100
+ return {}
101
+ end
102
+
103
+ return_hash[:standardized_term] = term
104
+
105
+ #Mapquest returns bad data for: Manchester, Mass.
106
+ if term.include?('Manchester') || term.include?('Atlanta, MI')
107
+ return {}
108
+ end
109
+
110
+ #Messed up with just neighborhoods. Example: Hyde Park (Boston, Mass.) or Hyde Park (Boston, Mass.)
111
+ #So if not a street address, pass to have google handle it for better results...
112
+ if term.split(',').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
113
+ return {}
114
+ end
115
+
116
+ Geocoder.configure(:lookup => :mapquest,:api_key => self.mapquest_key,:timeout => self.timeout, :always_raise => :all)
117
+
118
+ mapquest_api_result = Geocoder.search(term)
119
+ rescue SocketError => e
120
+ retry unless (retry_count -= 1).zero?
121
+ else
122
+
123
+
124
+ #If this call returned a result...
125
+ if mapquest_api_result.present?
126
+
127
+ if mapquest_api_result.first.data["street"].present?
128
+ #return_hash[:term_differs_from_tgn] = true
129
+ return_hash[:street_part] = mapquest_api_result.first.data["street"]
130
+ return_hash[:coords] = {:latitude=>mapquest_api_result.first.data['latLng']['lat'].to_s,
131
+ :longitude=>mapquest_api_result.first.data['latLng']['lng'].to_s,
132
+ :combined=>mapquest_api_result.first.data['latLng']['lat'].to_s + ',' + mapquest_api_result.first.data['latLng']['lng'].to_s}
133
+ end
134
+
135
+ return_hash[:country_part] = Country.new(mapquest_api_result.first.data["adminArea1"]).name
136
+
137
+ if return_hash[:country_part] == 'United States'
138
+ return_hash[:state_part] = Geomash::Constants::STATE_ABBR[mapquest_api_result.first.data["adminArea3"]] || mapquest_api_result.first.data["adminArea4"]
139
+ else
140
+ return_hash[:state_part] = mapquest_api_result.first.data["adminArea3"].gsub(' province', '')
141
+ end
142
+
143
+ return_hash[:city_part] = mapquest_api_result.first.data["adminArea5"]
144
+
145
+ return_hash[:city_part] = return_hash[:city_part].gsub(' City', '') #Return New York as New York City...
146
+ end
147
+
148
+ #Only return if USA for now. Google is better with stuff like: 'Long Binh, Vietnam'
149
+ #Also only return if there is a city if there were more than two terms passed in. Fixes: Roxbury, MA
150
+ return {} unless return_hash[:country_part] == 'United States'
151
+ return {} if term.split(',').length >= 2 && return_hash[:city_part].blank?
152
+
153
+ return return_hash
154
+ end
155
+
156
+ #Final fallback is google API. The best but we are limited to 2500 requests per day unless we pay the $10k a year premium account...
157
+ #Note: If google cannot find street, it will return just city/state, like for "Salem Street and Paradise Road, Swampscott, MA, 01907"
158
+ #Seems like it sets a partial_match=>true in the data section...
159
+ def self.parse_google_api(term, parse_term_flag=false)
160
+ return_hash = {}
161
+ retry_count = 3
162
+
163
+ return_hash[:original_term] = term
164
+
165
+ term = Geomash::Standardizer.parse_for_geographic_term(term) if parse_term_flag
166
+ term = Geomash::Standardizer.standardize_geographic_term(term)
167
+
168
+ #Soviet Union returns back a place in Kazakhstan
169
+ if term.blank? || term == 'Soviet Union'
170
+ return {}
171
+ end
172
+
173
+ return_hash[:standardized_term] = term
174
+
175
+ ::Geocoder.configure(:lookup => :google,:api_key => nil,:timeout => self.timeout, :always_raise => :all)
176
+
177
+ google_api_result = ::Geocoder.search(term)
178
+ rescue SocketError => e
179
+ retry unless (retry_count -= 1).zero?
180
+ else
181
+
182
+
183
+ #Check if only a partial match. To avoid errors, strip out the first part and try again...
184
+ #Need better way to check for street endings. See: http://pe.usps.gov/text/pub28/28apc_002.htm
185
+ if google_api_result.present?
186
+ if google_api_result.first.data['partial_match'] && term.split(',').length > 1 && !term.downcase.include?('street') && !term.downcase.include?('st.') && !term.downcase.include?('avenue') && !term.downcase.include?('ave.') && !term.downcase.include?('court') && !term.downcase.include?('dr.')
187
+ term = term.split(',')[1..term.split(',').length-1].join(',').strip
188
+ google_api_result = Geocoder.search(term)
189
+ end
190
+ end
191
+
192
+ if google_api_result.present?
193
+ #Types: street number, route, neighborhood, establishment, transit_station, bus_station
194
+ google_api_result.first.data["address_components"].each do |result|
195
+ if (result['types'] & ['street number', 'route', 'establishment', 'transit_station', 'bus_station']).present? || (result['types'].include?('neighborhood') && !result['types'].include?('political'))
196
+ #return_hash[:term_differs_from_tgn] = true
197
+ #TODO: Not implemented for Google results right now.
198
+ #return_hash[:street_part] = 'TODO: Not Implemented for Google Results'
199
+ return_hash[:coords] = {:latitude=>google_api_result.first.data['geometry']['location']['lat'].to_s,
200
+ :longitude=>google_api_result.first.data['geometry']['location']['lng'].to_s,
201
+ :combined=>google_api_result.first.data['geometry']['location']['lat'].to_s + ',' + google_api_result.first.data['geometry']['location']['lng'].to_s}
202
+ elsif (result['types'] & ['country']).present?
203
+ return_hash[:country_part] = result['long_name']
204
+ elsif (result['types'] & ['administrative_area_level_1']).present?
205
+ return_hash[:state_part] = result['long_name'].to_ascii
206
+ elsif (result['types'] & ['locality']).present?
207
+ return_hash[:city_part] = result['long_name']
208
+ elsif (result['types'] & ['sublocality', 'political']).length == 2 || result['types'].include?('neighborhood')
209
+ return_hash[:neighborhood_part] = result['long_name']
210
+ end
211
+ end
212
+
213
+ return_hash[:term_differs_from_tgn] ||= google_api_result.first.data['partial_match'] unless google_api_result.first.data['partial_match'].blank?
214
+ end
215
+
216
+
217
+ return return_hash
218
+ end
219
+ end
220
+ end
@@ -0,0 +1,250 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Geomash
3
+ class Standardizer
4
+
5
+ #Take a subject string and look for potential geographic terms.
6
+ def self.parse_for_geographic_term(term)
7
+ geo_term = ''
8
+
9
+ #Likely too long to be an address... some fields have junk with an address string...
10
+ if term.length > 125
11
+ return ''
12
+ end
13
+
14
+ state_abbr_list = ['Mass']
15
+ state_name_list = []
16
+ country_name_list = []
17
+
18
+ #Countries gem of https://github.com/hexorx/countries
19
+ Country.new('US').states.each do |state_abbr, state_names|
20
+ state_abbr_list << ' ' + state_abbr
21
+ state_name_list << state_names["name"]
22
+ end
23
+
24
+ Country.all.each do |country_name_abbr_pair|
25
+ country_name_list << country_name_abbr_pair.first
26
+ end
27
+
28
+ #Parsing a subject geographic term.
29
+ if term.include?('--')
30
+ term.split('--').each_with_index do |split_term, index|
31
+ if state_name_list.any? { |state| split_term.include? state } || country_name_list.any? { |country| split_term.include? country }
32
+ geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
33
+ elsif state_abbr_list.any? { |abbr| split_term.include? abbr }
34
+ geo_term = split_term
35
+ end
36
+ end
37
+ #Other than a '--' field
38
+ #Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
39
+ elsif term.include?(' - ')
40
+ term.split(' - ').each do |split_term|
41
+ if state_name_list.any? { |state| split_term.include? state } || state_abbr_list.any? { |abbr| split_term.include? abbr } || country_name_list.any? { |country| split_term.include? country }
42
+ geo_term = split_term
43
+ end
44
+
45
+ end
46
+ else
47
+ if state_name_list.any? { |state| term.include? state } || state_abbr_list.any? { |abbr| term.include? abbr } || country_name_list.any? { |country| term.include? country }
48
+ geo_term = term
49
+ end
50
+ end
51
+
52
+ return geo_term
53
+ end
54
+
55
+ #Make a string in a standard format.
56
+ def self.standardize_geographic_term(geo_term)
57
+
58
+ geo_term = geo_term.clone #Don't change original
59
+
60
+ #Remove common junk terms
61
+ Geomash::Constants::JUNK_TERMS.each { |term| geo_term.gsub!(term, '') }
62
+
63
+ #Strip any leading periods or commas from junk terms
64
+ geo_term = geo_term.gsub(/^[\.,]+/, '').strip
65
+
66
+ #Replace any four TGN dashes from removing a junk term
67
+ geo_term = geo_term.gsub('----', '--')
68
+
69
+ #Replace any semicolons with commas... possible strip them?
70
+ geo_term = geo_term.gsub(';', ',')
71
+
72
+ #Terms in paranthesis will cause some geographic parsers to freak out. Switch to commas instead.
73
+ if geo_term.match(/[\(\)]+/)
74
+ #Attempt to fix address if something like (word)
75
+ if geo_term.match(/ \(+.*\)+/)
76
+ #Make this replacement better?
77
+ geo_term = geo_term.gsub(/ *\((?=[\S ]+\))/,', ')
78
+ geo_term = geo_term.gsub(')', '')
79
+
80
+ #Else skip this as data returned likely will be unreliable for now... FIXME when use case occurs.
81
+ else
82
+ return nil
83
+ end
84
+ end
85
+
86
+ return geo_term
87
+ end
88
+
89
+ #Attempt to dedup a list of geographic areas.
90
+ #FIXME: Horrendous first pass.
91
+ #Aggresive flag removes less specific matches. IE. ['Hanoi, Vietnam' and 'Vietnam'] would return just ['Hanoi, Vietnam']
92
+ def self.dedup_geo(geo_list, aggressive=false)
93
+ geo_list = geo_list.clone
94
+
95
+ base_word_geo_list = []
96
+ geo_list.each do |geo_term|
97
+ geo_term = geo_term.gsub('(','').gsub(')','').gsub('.','').gsub(',','').gsub(';','')
98
+ #Remove common junk terms
99
+ Geomash::Constants::JUNK_TERMS.each { |term| geo_term.gsub!(term, '') }
100
+
101
+ geo_term = geo_term.squish
102
+
103
+ base_word_geo_list << geo_term
104
+ end
105
+
106
+ indexes_to_remove = []
107
+
108
+ 0.upto base_word_geo_list.size-1 do |index|
109
+ matched_words_count = []
110
+ current_best_term = geo_list[index]
111
+ current_best_term_index = index
112
+
113
+ base_word_geo_list[index].split(' ').each { |word|
114
+
115
+ (index+1).upto base_word_geo_list.size-1 do |inner_index|
116
+ if base_word_geo_list[inner_index].split(' ').any? { |single_word| single_word == word }
117
+ matched_words_count[inner_index] ||= 0
118
+ matched_words_count[inner_index] = matched_words_count[inner_index] + 1
119
+
120
+ end
121
+ end
122
+ }
123
+
124
+ matched_words_count.each_with_index do |matched_count, matched_index|
125
+ matched_count ||= 0
126
+
127
+ if (matched_count == base_word_geo_list[matched_index].split(' ').size) && ((base_word_geo_list[matched_index].split(' ').size < base_word_geo_list[index].split(' ').size && aggressive) || (base_word_geo_list[matched_index].split(' ').size == base_word_geo_list[index].split(' ').size))
128
+ if current_best_term.split(',').size < geo_list[matched_index].split(',').size || (current_best_term.size+1 < geo_list[matched_index].size && !geo_list[matched_index].include?('('))
129
+ current_best_term = geo_list[matched_index]
130
+ indexes_to_remove << current_best_term_index
131
+ current_best_term_index = matched_index
132
+ else
133
+ indexes_to_remove << matched_index
134
+ end
135
+ end
136
+
137
+ end
138
+ end
139
+
140
+ indexes_to_remove.each do |removal_index|
141
+ geo_list[removal_index] = nil
142
+ end
143
+
144
+ return geo_list.compact
145
+ end
146
+
147
+ def self.parsed_and_original_check(geo_hash)
148
+ term = geo_hash[:standardized_term]
149
+
150
+ if geo_hash[:street_part].present? || geo_hash[:coords].present?
151
+ return true
152
+ end
153
+
154
+ #Keep original string if three parts at least or if there is a number in the term.
155
+ #TODO: Make this better!
156
+ if (term.split(',').length >= 3 && geo_hash[:neighborhood_part].blank?) || (term.split(',').length >= 2 && geo_hash[:city_part].blank?) || term.split(',').length >= 4 || term.match(/\d/).present?
157
+ return true
158
+ end
159
+
160
+ if geo_hash[:country_part] != 'United States'
161
+ if geo_hash[:city_part].blank? && geo_hash[:state_part].blank?
162
+ #Currently do noting
163
+ elsif !((geo_hash[:city_part].present? && term.to_ascii.downcase.include?(geo_hash[:city_part].to_ascii.downcase)) || (geo_hash[:state_part].present? && term.to_ascii.downcase.include?(geo_hash[:state_part].to_ascii.downcase)))
164
+ return true
165
+ end
166
+ end
167
+
168
+
169
+ return false
170
+ end
171
+
172
+
173
+
174
+ #Take LCSH subjects and make them standard.
175
+ def self.LCSHize(value)
176
+ #Remove ending periods ... except when an initial or etc.
177
+ if value.last == '.' && value[-2].match(/[^A-Z]/) && !value[-4..-1].match('etc.')
178
+ value = value.slice(0..-2)
179
+ end
180
+
181
+ #Fix when '- -' occurs
182
+ value = value.gsub(/-\s-/,'--')
183
+
184
+ #Fix for "em" dashes - two types?
185
+ value = value.gsub('—','--')
186
+
187
+ #Fix for "em" dashes - two types?
188
+ value = value.gsub('–','--')
189
+
190
+ #Fix for ' - ' combinations
191
+ value = value.gsub(' - ','--')
192
+
193
+ #Remove white space after and before '--'
194
+ value = value.gsub(/\s+--/,'--')
195
+ value = value.gsub(/--\s+/,'--')
196
+
197
+ #Ensure first work is capitalized
198
+ value[0] = value.first.capitalize[0]
199
+
200
+ #Strip any white space
201
+ value = strip_value(value)
202
+
203
+ return value
204
+ end
205
+
206
+ def self.strip_value(value)
207
+ if(value.blank?)
208
+ return nil
209
+ else
210
+ if value.class == Float || value.class == Fixnum
211
+ value = value.to_i.to_s
212
+ end
213
+
214
+ # Make sure it is all UTF-8 and not character encodings or HTML tags and remove any cariage returns
215
+ return utf8Encode(value)
216
+ end
217
+ end
218
+
219
+ #TODO: Better name for this. Should be part of an overall helped gem.
220
+ def self.utf8Encode(value)
221
+ return HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
222
+ end
223
+
224
+
225
+ def self.try_with_entered_names(geo_hash)
226
+ geo_hash_local = geo_hash.clone
227
+ if geo_hash_local[:neighborhood_part].present?
228
+ orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:neighborhood_part].downcase.to_ascii}
229
+ geo_hash_local[:neighborhood_part] = orig_string_check.first.strip if orig_string_check.present? && orig_string_check != geo_hash_local[:neighborhood_part]
230
+ return geo_hash_local
231
+ end
232
+
233
+ if geo_hash_local[:city_part].present?
234
+ orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:city_part].downcase.to_ascii}
235
+ geo_hash_local[:city_part] = orig_string_check.first.strip if orig_string_check.present?
236
+ return geo_hash_local
237
+ end
238
+
239
+
240
+ if geo_hash_local[:state_part].present?
241
+ orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:state_part].downcase.to_ascii}
242
+ geo_hash_local[:state_part] = orig_string_check.first.strip if orig_string_check.present?
243
+ return geo_hash_local
244
+ end
245
+
246
+ return nil
247
+ end
248
+
249
+ end
250
+ end