geomash 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9de5bd2cdc879dc6103df0df9d7b31f195c02e73
4
- data.tar.gz: 6bc1355c2a93d2886c1d77a00e705894b8720418
3
+ metadata.gz: b181c5b1f79695f4b8ba54cf5e641c9cdaefe842
4
+ data.tar.gz: f6db8e134bf152b162765a86b688848f16d0753d
5
5
  SHA512:
6
- metadata.gz: 6622f04afaae5560a36930441afe000b3b45cb53221b6f7fddf449b5b892bdcc59e35cf1088a50eb11b811edff662ecf43234ef1b348f48f1f6cfac18d4d5486
7
- data.tar.gz: 22d728689b333961bcc8e37628188797af5c1773732a6d6a5fff2120284ddf1b18fb5eac9e8fd3bfadbf648d97aac5cb827021b796cf2efb8f600cd406ff7d83
6
+ metadata.gz: 0a5e74f73c036f36d0d030c2edc57fd2b026add354ee845e6f5bf6ab847f81d62dda8afe08add796b63ae0ee2b87446471a1ca083c60114d7401bc88e8476989
7
+ data.tar.gz: c0af8abaa745856c0e416f461e230574c869f5736f3c4aa8cb416dac102fe301eac669c1a308ff91d748d3e2be78511e3c0fb96fd8d0a7851e944573e0cd2506
@@ -6,6 +6,7 @@ development:
6
6
  #NOTE: Bing also fairly unreliable but mostly works...
7
7
  bing_key: <bing_key>
8
8
  timeout: 7
9
+ parser_cache_enabled: false #See Readme before enabling this
9
10
  test: &TEST_
10
11
  tgn_enabled: true
11
12
  geonames_username: <username>
@@ -14,6 +15,7 @@ test: &TEST_
14
15
  #NOTE: Bing also fairly unreliable but mostly works...
15
16
  bing_key: <bing_key>
16
17
  timeout: 7
18
+ parser_cache_enabled: false #See Readme before enabling this
17
19
  production:
18
20
  tgn_enabled: true
19
21
  geonames_username: <username>
@@ -22,3 +24,4 @@ production:
22
24
  #NOTE: Bing also fairly unreliable but mostly works...
23
25
  bing_key: <bing_key>
24
26
  timeout: 7
27
+ parser_cache_enabled: false #See Readme before enabling this
@@ -0,0 +1,55 @@
1
+ #Taken from: https://github.com/alexreisner/geocoder/blob/master/examples/autoexpire_cache_dalli.rb
2
+ module Geomash
3
+ class AutoexpireCacheDalli
4
+ def initialize(store, ttl = 86400)
5
+ @store = store
6
+ @keys = 'GeocoderDalliClientKeys'
7
+ @ttl = ttl
8
+ end
9
+
10
+ def [](url)
11
+ res = @store.get(url)
12
+ res = YAML::load(res) if res.present?
13
+ res
14
+ end
15
+
16
+ def []=(url, value)
17
+ if value.nil?
18
+ del(url)
19
+ else
20
+ key_cache_add(url) if @store.add(url, YAML::dump(value), @ttl)
21
+ end
22
+ value
23
+ end
24
+
25
+ def keys
26
+ key_cache
27
+ end
28
+
29
+ def del(url)
30
+ key_cache_delete(url) if @store.delete(url)
31
+ end
32
+
33
+ private
34
+
35
+ def key_cache
36
+ the_keys = @store.get(@keys)
37
+ if the_keys.nil?
38
+ @store.add(@keys, YAML::dump([]))
39
+ []
40
+ else
41
+ YAML::load(the_keys)
42
+ end
43
+ end
44
+
45
+ def key_cache_add(key)
46
+ @store.replace(@keys, YAML::dump(key_cache << key))
47
+ end
48
+
49
+ def key_cache_delete(key)
50
+ tmp = key_cache
51
+ tmp.delete(key)
52
+ @store.replace(@keys, YAML::dump(tmp))
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,26 @@
1
+ #Taken from: https://github.com/alexreisner/geocoder/blob/master/examples/autoexpire_cache_redis.rb
2
+ module Geomash
3
+ class AutoexpireCacheRedis
4
+ def initialize(store, ttl = 86400)
5
+ @store = store
6
+ @ttl = ttl
7
+ end
8
+
9
+ def [](url)
10
+ @store.[](url)
11
+ end
12
+
13
+ def []=(url, value)
14
+ @store.[]=(url, value)
15
+ @store.expire(url, @ttl)
16
+ end
17
+
18
+ def keys
19
+ @store.keys
20
+ end
21
+
22
+ def del(url)
23
+ @store.del(url)
24
+ end
25
+ end
26
+ end
@@ -72,6 +72,7 @@ module Geomash
72
72
  }
73
73
 
74
74
  #Terms that drive geographic parsers mad...
75
+ #Possibly check using QA against LCSH Subject topics that aren't geographical...
75
76
  JUNK_TERMS = [
76
77
  'Cranberries',
77
78
  'History',
@@ -80,7 +81,11 @@ module Geomash
80
81
  'Pictorial works.',
81
82
  /[nN]ation/,
82
83
  'Asia',
83
- '(Republic)'
84
+ '(Republic)',
85
+ 'Directories',
86
+ 'Biography',
87
+ #Some date removal
88
+ /[\d]+th [cC]entury,/
84
89
  ]
85
90
 
86
91
 
@@ -89,22 +89,27 @@ module Geomash
89
89
  end
90
90
  retry_count = retry_count + 1
91
91
 
92
- geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q=#{CGI.escape(geonames_search_string)}&name_equals=#{CGI.escape(exact_name_term)}&country=#{Country.find_country_by_name(geo_hash[:country_part]).alpha2}")
92
+ country_code = nil
93
+ if geo_hash[:country_part] == 'South Korea'
94
+ country_code = 'KR'
95
+ elsif geo_hash[:country_part] == 'North Korea'
96
+ country_code = 'KP'
97
+ else
98
+ country_code = Country.find_country_by_name(geo_hash[:country_part]).alpha2
99
+ end
100
+ geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q=#{CGI.escape(geonames_search_string)}&name_equals=#{CGI.escape(exact_name_term)}&country=#{country_code}")
93
101
 
94
102
  end until (geonames_response.code != 500 || retry_count == max_retry)
95
103
 
96
104
  unless geonames_response.code == 500
97
105
 
98
- parsed_xml = Nokogiri::Slop(geonames_response.body)
106
+ parsed_xml = Nokogiri::XML(geonames_response.body)
99
107
 
100
- begin
101
- raise "geonames status error message of: #{parsed_xml.to_s}" if parsed_xml.geonames.status
102
- rescue
103
- #Do nothing but FIXME to not use slop
104
- end
108
+ raise "geonames status error message of: #{parsed_xml.to_s}" if parsed_xml.xpath("//status").present?
105
109
 
106
110
  #This is ugly and needs to be redone to achieve better recursive...
107
- if parsed_xml.geonames.totalResultsCount.text == '0'
111
+ current_count = parsed_xml.xpath("//totalResultsCount")
112
+ if current_count.blank? || current_count.first.text == '0'
108
113
  if geo_hash[:neighborhood_part].present?
109
114
  geo_hash_temp = geo_hash.clone
110
115
  geo_hash_temp[:neighborhood_part] = nil
@@ -121,14 +126,14 @@ module Geomash
121
126
  end
122
127
 
123
128
  #Exact Match ... FIXME to not use Slop
124
- if parsed_xml.geonames.geoname.class == Nokogiri::XML::Element
125
- return_hash[:id] = parsed_xml.geonames.geoname.geonameId.text
126
- return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
127
- elsif parsed_xml.geonames.geoname.class ==Nokogiri::XML::NodeSet
128
- return_hash[:id] = parsed_xml.geonames.geoname.first.geonameId.text
129
+ if parsed_xml.xpath("//geonames/geoname/geonameId").present?
130
+ return_hash[:id] = parsed_xml.xpath("//geonames/geoname/geonameId").first.text
129
131
  return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
132
+ return_hash[:original_string_differs] = Geomash::Standardizer.parsed_and_original_check(geo_hash)
133
+ return return_hash
134
+ else
135
+ return nil
130
136
  end
131
- return_hash[:original_string_differs] = Geomash::Standardizer.parsed_and_original_check(geo_hash)
132
137
 
133
138
  end
134
139
 
@@ -1,6 +1,11 @@
1
1
  module Geomash
2
2
  class Parser
3
3
 
4
+ def self.cache_enabled
5
+ return Geomash.config[:parser_cache_enabled] unless Geomash.config[:parser_cache_enabled].nil?
6
+ return false
7
+ end
8
+
4
9
  def self.mapquest_key
5
10
  Geomash.config[:mapquest_key] || '<mapquest_key>'
6
11
  end
@@ -44,12 +49,21 @@ module Geomash
44
49
  return {}
45
50
  end
46
51
 
47
- Geocoder.configure(:lookup => :bing,:api_key => self.bing_key,:timeout => self.timeout, :always_raise => :all)
48
- bing_api_result = Geocoder.search(term)
52
+ ::Geocoder.configure(:lookup => :bing,:api_key => self.bing_key,:timeout => self.timeout, :always_raise => :all)
53
+
54
+ begin
55
+ bing_api_result = Geocoder.search(term)
56
+ rescue SocketError => e
57
+ Geocoder::Lookup.get(:bing).cache.expire(Geocoder::Query.new(term).url) if self.cache_enabled #Expire this url
58
+ retry unless (retry_count -= 1).zero?
59
+ rescue Geocoder::OverQueryLimitError => e
60
+ Geocoder::Lookup.get(:bing).cache.expire(Geocoder::Query.new(term).url) if self.cache_enabled #Expire this url
61
+ raise e
62
+ rescue Exception => e
63
+ Geocoder::Lookup.get(:bing).cache.expire(Geocoder::Query.new(term).url) if self.cache_enabled #Expire this url
64
+ raise e
65
+ end
49
66
 
50
- rescue SocketError => e
51
- retry unless (retry_count -= 1).zero?
52
- else
53
67
 
54
68
  #Use only for United States results... international results are inaccurate.
55
69
  if bing_api_result.present? && bing_api_result.first.data["address"]["countryRegion"] == 'United States'
@@ -113,13 +127,20 @@ module Geomash
113
127
  return {}
114
128
  end
115
129
 
116
- Geocoder.configure(:lookup => :mapquest,:api_key => self.mapquest_key,:timeout => self.timeout, :always_raise => :all)
117
-
118
- mapquest_api_result = Geocoder.search(term)
119
- rescue SocketError => e
120
- retry unless (retry_count -= 1).zero?
121
- else
122
-
130
+ ::Geocoder.configure(:lookup => :mapquest,:api_key => self.mapquest_key,:timeout => self.timeout, :always_raise => :all)
131
+
132
+ begin
133
+ mapquest_api_result = Geocoder.search(term)
134
+ rescue SocketError => e
135
+ Geocoder::Lookup.get(:mapquest).cache.expire(Geocoder::Query.new(term).url) if self.cache_enabled #Expire this url
136
+ retry unless (retry_count -= 1).zero?
137
+ rescue Geocoder::OverQueryLimitError => e
138
+ Geocoder::Lookup.get(:mapquest).cache.expire(Geocoder::Query.new(term).url) if self.cache_enabled #Expire this url
139
+ raise e
140
+ rescue Exception => e
141
+ Geocoder::Lookup.get(:mapquest).cache.expire(Geocoder::Query.new(term).url) if self.cache_enabled #Expire this url
142
+ raise e
143
+ end
123
144
 
124
145
  #If this call returned a result...
125
146
  if mapquest_api_result.present?
@@ -173,11 +194,18 @@ module Geomash
173
194
  return_hash[:standardized_term] = term
174
195
 
175
196
  ::Geocoder.configure(:lookup => :google,:api_key => nil,:timeout => self.timeout, :always_raise => :all)
176
-
177
- google_api_result = ::Geocoder.search(term)
178
- rescue SocketError => e
179
- retry unless (retry_count -= 1).zero?
180
- else
197
+ begin
198
+ google_api_result = ::Geocoder.search(term)
199
+ rescue SocketError => e
200
+ Geocoder::Lookup.get(:google).cache.expire(Geocoder::Query.new(term).url) if self.cache_enabled #Expire this url
201
+ retry unless (retry_count -= 1).zero?
202
+ rescue Geocoder::OverQueryLimitError => e
203
+ Geocoder::Lookup.get(:google).cache.expire(Geocoder::Query.new(term).url) if self.cache_enabled #Expire this url
204
+ raise e
205
+ rescue Exception => e
206
+ Geocoder::Lookup.get(:google).cache.expire(Geocoder::Query.new(term).url) if self.cache_enabled #Expire this url
207
+ raise e
208
+ end
181
209
 
182
210
 
183
211
  #Check if only a partial match. To avoid errors, strip out the first part and try again...
@@ -202,7 +230,7 @@ module Geomash
202
230
  elsif (result['types'] & ['country']).present?
203
231
  return_hash[:country_part] = result['long_name']
204
232
  elsif (result['types'] & ['administrative_area_level_1']).present?
205
- return_hash[:state_part] = result['long_name'].to_ascii
233
+ return_hash[:state_part] = result['long_name'].to_ascii.gsub('-city', '')
206
234
  elsif (result['types'] & ['locality']).present?
207
235
  return_hash[:city_part] = result['long_name']
208
236
  elsif (result['types'] & ['sublocality', 'political']).length == 2 || result['types'].include?('neighborhood')
@@ -213,6 +241,8 @@ module Geomash
213
241
  return_hash[:term_differs_from_tgn] ||= google_api_result.first.data['partial_match'] unless google_api_result.first.data['partial_match'].blank?
214
242
  end
215
243
 
244
+ #FIXME: Google free API rate limit is 5 requests / 1 second now (used to be 10). Need a better way to handle this.
245
+ sleep(0.1)
216
246
 
217
247
  return return_hash
218
248
  end
@@ -11,6 +11,10 @@ module Geomash
11
11
  return ''
12
12
  end
13
13
 
14
+ term_split_list = term.split(/[,\-\(\(]/).reject{ |e| e.empty? }
15
+ term_split_list.each{ |e| e.gsub!(/[^\w\s]/, "") } #Remove punctuation
16
+ term_split_list.each{ |e| e.strip! } #Remove any extra remaining whitespace
17
+ term_split_list.reject{ |e| e.empty? }
14
18
  state_abbr_list = ['Mass']
15
19
  state_name_list = []
16
20
  country_name_list = []
@@ -24,29 +28,51 @@ module Geomash
24
28
  Country.all.each do |country_name_abbr_pair|
25
29
  country_name_list << country_name_abbr_pair.first
26
30
  end
31
+ country_name_list.append('South Korea') #Listed as Korea, Republic of in the gem
32
+ country_name_list.append('North Korea') #Listed as Korea, Democratic People's Republic Of of in the gem
27
33
 
28
34
  #Parsing a subject geographic term.
29
- if term.include?('--')
30
- term.split('--').each_with_index do |split_term, index|
31
- if state_name_list.any? { |state| split_term.include? state } || country_name_list.any? { |country| split_term.include? country }
32
- geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
33
- elsif state_abbr_list.any? { |abbr| split_term.include? abbr }
34
- geo_term = split_term
35
+ if (state_name_list & term_split_list).present? || (state_abbr_list & term_split_list).present? || (country_name_list & term_split_list).present?
36
+ if term.include?('--')
37
+ term.split('--').each_with_index do |split_term, index|
38
+ if state_name_list.any? { |state| split_term.include? state } || country_name_list.any? { |country| split_term.include? country }
39
+ #Cases like Naroden Etnografski Muzeĭ (Sofia, Bulgaria)--Catalogs
40
+ if split_term.match(/\([^\)]+\)/)
41
+ geo_term = split_term.gsub('(', ',').gsub(' ,', ', ')
42
+ geo_term = geo_term.gsub(')', '')
43
+
44
+ =begin
45
+ if split_term.match(/\([^\)]+,[^\)]+\)/)
46
+ geo_term = split_term.match(/\([^\)]+\)/).to_s
47
+ geo_term = geo_term[1..geo_term.length-2]
48
+ #Abbeville (France)--History--20th century.
49
+ elsif split_term.match(/\([^\)]+\)/)
50
+ geo_term = split_term
51
+ =end
52
+ else
53
+ geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
54
+ end
55
+
56
+ elsif state_abbr_list.any? { |abbr| split_term.include? abbr }
57
+ geo_term = split_term
58
+ end
35
59
  end
36
- end
37
- #Other than a '--' field
38
- #Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
39
- elsif term.include?(' - ')
40
- term.split(' - ').each do |split_term|
41
- if state_name_list.any? { |state| split_term.include? state } || state_abbr_list.any? { |abbr| split_term.include? abbr } || country_name_list.any? { |country| split_term.include? country }
42
- geo_term = split_term
60
+ #Other than a '--' field
61
+ #Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
62
+ elsif term.include?(' - ')
63
+ term.split(' - ').each do |split_term|
64
+ if state_name_list.any? { |state| split_term.include? state } || state_abbr_list.any? { |abbr| split_term.include? abbr } || country_name_list.any? { |country| split_term.include? country }
65
+ geo_term = split_term
66
+ end
67
+
43
68
  end
69
+ else
70
+ #if term_split_list.length > 1
71
+ geo_term = term.gsub('(', ',').gsub(' ,', ', ')
72
+ geo_term = geo_term.gsub(')', '')
73
+ #end
44
74
 
45
75
  end
46
- else
47
- if state_name_list.any? { |state| term.include? state } || state_abbr_list.any? { |abbr| term.include? abbr } || country_name_list.any? { |country| term.include? country }
48
- geo_term = term
49
- end
50
76
  end
51
77
 
52
78
  return geo_term
@@ -83,6 +109,8 @@ module Geomash
83
109
  end
84
110
  end
85
111
 
112
+ geo_term = geo_term.squeeze(',')
113
+
86
114
  return geo_term
87
115
  end
88
116
 
@@ -224,6 +252,7 @@ module Geomash
224
252
 
225
253
  def self.try_with_entered_names(geo_hash)
226
254
  geo_hash_local = geo_hash.clone
255
+ geo_hash_local[:tgn] = nil
227
256
  if geo_hash_local[:neighborhood_part].present?
228
257
  orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:neighborhood_part].downcase.to_ascii}
229
258
  geo_hash_local[:neighborhood_part] = orig_string_check.first.strip if orig_string_check.present? && orig_string_check != geo_hash_local[:neighborhood_part]