geomash 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/geomash.yml +27 -0
- data/config/geomash.yml.sample +21 -0
- data/lib/geomash/geonames.rb +11 -2
- data/lib/geomash/parser.rb +56 -8
- data/lib/geomash/standardizer.rb +7 -3
- data/lib/geomash/tgn.rb +64 -20
- data/lib/geomash/version.rb +1 -1
- data/test/geomash_test.rb +41 -0
- data/test/parser_test.rb +7 -0
- data/test/tgn_test.rb +12 -0
- metadata +42 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f517ba6fcc94becafdbd6e43ae35ce833b69f728
|
4
|
+
data.tar.gz: fcc4f7d3194c8203e87bbf0a1b1192cf2e55804a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee9bb319b8a8014ff077957c38f27521ff61a37836140bc389a25d3acd659ecec91cd3602ab59c96f14589d61d32c6e35d74825f421c3dd82aa273e93e85d2bc
|
7
|
+
data.tar.gz: 0cfd23abf1dd9c6f9f71dc7a4cd96da271e6e8ecb379980875a4b7d291fa336499ab9735e35e97d562204948b337d40addb5845b8c27ed55b4ac4d7489715318
|
data/config/geomash.yml
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
development:
|
2
|
+
tgn_enabled: true
|
3
|
+
geonames_username: boston_library
|
4
|
+
#NOTE: Mapquest is super unreliable on its matches...
|
5
|
+
mapquest_key: <mapquest_key>
|
6
|
+
#NOTE: Bing also fairly unreliable but mostly works...
|
7
|
+
bing_key: <bing_key>
|
8
|
+
timeout: 7
|
9
|
+
parser_cache_enabled: false #See Readme before enabling this
|
10
|
+
test: &TEST_
|
11
|
+
tgn_enabled: true
|
12
|
+
geonames_username: boston_library
|
13
|
+
#NOTE: Mapquest is super unreliable on its matches...
|
14
|
+
mapquest_key: <mapquest_key>
|
15
|
+
#NOTE: Bing also fairly unreliable but mostly works...
|
16
|
+
bing_key: <bing_key>
|
17
|
+
timeout: 7
|
18
|
+
parser_cache_enabled: false #See Readme before enabling this
|
19
|
+
production:
|
20
|
+
tgn_enabled: true
|
21
|
+
geonames_username: boston_library
|
22
|
+
#NOTE: Mapquest is super unreliable on its matches...
|
23
|
+
mapquest_key: <mapquest_key>
|
24
|
+
#NOTE: Bing also fairly unreliable but mostly works...
|
25
|
+
bing_key: <bing_key>
|
26
|
+
timeout: 7
|
27
|
+
parser_cache_enabled: false #See Readme before enabling this
|
data/config/geomash.yml.sample
CHANGED
@@ -5,6 +5,13 @@ development:
|
|
5
5
|
mapquest_key: <mapquest_key>
|
6
6
|
#NOTE: Bing also fairly unreliable but mostly works...
|
7
7
|
bing_key: <bing_key>
|
8
|
+
#NOTE: Optional Google API key
|
9
|
+
google_key: <google_key>
|
10
|
+
#NOTE: Optional Google Maps for Work Information
|
11
|
+
google_premier:
|
12
|
+
- key
|
13
|
+
- client
|
14
|
+
- channel
|
8
15
|
timeout: 7
|
9
16
|
parser_cache_enabled: false #See Readme before enabling this
|
10
17
|
test: &TEST_
|
@@ -14,6 +21,13 @@ test: &TEST_
|
|
14
21
|
mapquest_key: <mapquest_key>
|
15
22
|
#NOTE: Bing also fairly unreliable but mostly works...
|
16
23
|
bing_key: <bing_key>
|
24
|
+
#NOTE: Optional Google API key
|
25
|
+
google_key: <google_key>
|
26
|
+
#NOTE: Optional Google Maps for Work Information
|
27
|
+
google_premier:
|
28
|
+
- key
|
29
|
+
- client
|
30
|
+
- channel
|
17
31
|
timeout: 7
|
18
32
|
parser_cache_enabled: false #See Readme before enabling this
|
19
33
|
production:
|
@@ -23,5 +37,12 @@ production:
|
|
23
37
|
mapquest_key: <mapquest_key>
|
24
38
|
#NOTE: Bing also fairly unreliable but mostly works...
|
25
39
|
bing_key: <bing_key>
|
40
|
+
#NOTE: Optional Google API key
|
41
|
+
google_key: <google_key>
|
42
|
+
#NOTE: Optional Google Maps for Work Information
|
43
|
+
google_premier:
|
44
|
+
- key
|
45
|
+
- client
|
46
|
+
- channel
|
26
47
|
timeout: 7
|
27
48
|
parser_cache_enabled: false #See Readme before enabling this
|
data/lib/geomash/geonames.rb
CHANGED
@@ -28,7 +28,16 @@ module Geomash
|
|
28
28
|
parsed_xml = Nokogiri::Slop(geonames_response.body)
|
29
29
|
|
30
30
|
parsed_xml.geonames.geoname.each do |geoname|
|
31
|
-
|
31
|
+
#In some cases, geonames duplicates the fcode keys? See fcode "area" of:
|
32
|
+
#http://api.geonames.org/hierarchy?username=<username>&lang=en&style=FULL&geonameId=6947909
|
33
|
+
#FIXME: Something better needs to be done...
|
34
|
+
if hier_geo.has_key?(geoname.fcode.text.downcase.to_sym)
|
35
|
+
temp_key = geoname.fcode.text.downcase + "2"
|
36
|
+
hier_geo[temp_key.to_sym] = geoname.toponymName.text
|
37
|
+
else
|
38
|
+
hier_geo[geoname.fcode.text.downcase.to_sym] = geoname.toponymName.text
|
39
|
+
end
|
40
|
+
|
32
41
|
end
|
33
42
|
|
34
43
|
#FIXME: Code4Lib lazy implementation... will get last result
|
@@ -95,7 +104,7 @@ module Geomash
|
|
95
104
|
elsif geo_hash[:country_part] == 'North Korea'
|
96
105
|
country_code = 'KP'
|
97
106
|
else
|
98
|
-
country_code = Country.find_country_by_name(geo_hash[:country_part]).alpha2
|
107
|
+
country_code = ISO3166::Country.find_country_by_name(geo_hash[:country_part]).alpha2
|
99
108
|
end
|
100
109
|
geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q=#{CGI.escape(geonames_search_string)}&name_equals=#{CGI.escape(exact_name_term)}&country=#{country_code}")
|
101
110
|
|
data/lib/geomash/parser.rb
CHANGED
@@ -14,6 +14,14 @@ module Geomash
|
|
14
14
|
Geomash.config[:bing_key] || '<bing_key>'
|
15
15
|
end
|
16
16
|
|
17
|
+
def self.google_key
|
18
|
+
Geomash.config[:google_key] || '<google_key>'
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.google_premier
|
22
|
+
Geomash.config[:google_premier] || ['key', 'client', 'channel']
|
23
|
+
end
|
24
|
+
|
17
25
|
def self.timeout
|
18
26
|
Geomash.config[:timeout]
|
19
27
|
end
|
@@ -153,7 +161,7 @@ module Geomash
|
|
153
161
|
:combined=>mapquest_api_result.first.data['latLng']['lat'].to_s + ',' + mapquest_api_result.first.data['latLng']['lng'].to_s}
|
154
162
|
end
|
155
163
|
|
156
|
-
return_hash[:country_part] = Country.new(mapquest_api_result.first.data["adminArea1"]).name
|
164
|
+
return_hash[:country_part] = ISO3166::Country.new(mapquest_api_result.first.data["adminArea1"]).name
|
157
165
|
|
158
166
|
if return_hash[:country_part] == 'United States'
|
159
167
|
return_hash[:state_part] = Geomash::Constants::STATE_ABBR[mapquest_api_result.first.data["adminArea3"]] || mapquest_api_result.first.data["adminArea4"]
|
@@ -191,9 +199,21 @@ module Geomash
|
|
191
199
|
return {}
|
192
200
|
end
|
193
201
|
|
202
|
+
#FIXME: Handle just a plain 'Korea' better...
|
203
|
+
if term.match(/Korea/) and !term.match(/South/) and !term.match(/North/)
|
204
|
+
term.gsub!('Korea', 'South Korea')
|
205
|
+
end
|
206
|
+
|
194
207
|
return_hash[:standardized_term] = term
|
195
208
|
|
196
|
-
|
209
|
+
if self.google_premier != ['key', 'client', 'channel']
|
210
|
+
::Geocoder.configure(:lookup => :google_premier,:api_key => self.google_premier,:timeout => self.timeout, :use_https => true, :always_raise => :all)
|
211
|
+
elsif self.google_key != '<google_key>'
|
212
|
+
::Geocoder.configure(:lookup => :google,:api_key => self.google_key,:timeout => self.timeout, :use_https => true, :always_raise => :all)
|
213
|
+
else
|
214
|
+
::Geocoder.configure(:lookup => :google,:api_key => nil,:timeout => self.timeout, :always_raise => :all)
|
215
|
+
end
|
216
|
+
|
197
217
|
begin
|
198
218
|
google_api_result = ::Geocoder.search(term)
|
199
219
|
rescue SocketError => e
|
@@ -217,18 +237,46 @@ module Geomash
|
|
217
237
|
end
|
218
238
|
end
|
219
239
|
|
240
|
+
|
220
241
|
if google_api_result.present?
|
242
|
+
#Find the best match index... case of Ho Chi Minh City (Vietnam) resolving to an airport...
|
243
|
+
best_match_index = 0
|
244
|
+
best_city_match = -1
|
245
|
+
best_state_match = -1
|
246
|
+
|
247
|
+
google_api_result.each_with_index do |single_google_api_result, index|
|
248
|
+
single_google_api_result.data["address_components"].each do |result|
|
249
|
+
if (result['types'] & ['locality']).present?
|
250
|
+
if return_hash[:standardized_term].to_ascii.include?(result['long_name'].to_ascii)
|
251
|
+
best_city_match = index if best_city_match == -1
|
252
|
+
end
|
253
|
+
elsif (result['types'] & ['administrative_area_level_1']).present?
|
254
|
+
if return_hash[:standardized_term].to_ascii.include?(result['long_name'].to_ascii.gsub('-city', ''))
|
255
|
+
best_state_match = index if best_state_match == -1
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
if best_city_match != -1
|
262
|
+
best_match_index = best_city_match
|
263
|
+
elsif best_state_match != -1
|
264
|
+
best_match_index = best_state_match
|
265
|
+
end
|
266
|
+
|
267
|
+
|
221
268
|
#Types: street number, route, neighborhood, establishment, transit_station, bus_station
|
222
|
-
google_api_result.
|
269
|
+
google_api_result[best_match_index].data["address_components"].each do |result|
|
223
270
|
if (result['types'] & ['street number', 'route', 'establishment', 'transit_station', 'bus_station']).present? || (result['types'].include?('neighborhood') && !result['types'].include?('political'))
|
224
271
|
#return_hash[:term_differs_from_tgn] = true
|
225
272
|
#TODO: Not implemented for Google results right now.
|
226
273
|
#return_hash[:street_part] = 'TODO: Not Implemented for Google Results'
|
227
|
-
return_hash[:coords] = {:latitude=>google_api_result.
|
228
|
-
:longitude=>google_api_result.
|
229
|
-
:combined=>google_api_result.
|
274
|
+
return_hash[:coords] = {:latitude=>google_api_result[best_match_index].data['geometry']['location']['lat'].to_s,
|
275
|
+
:longitude=>google_api_result[best_match_index].data['geometry']['location']['lng'].to_s,
|
276
|
+
:combined=>google_api_result[best_match_index].data['geometry']['location']['lat'].to_s + ',' + google_api_result[best_match_index].data['geometry']['location']['lng'].to_s}
|
230
277
|
elsif (result['types'] & ['country']).present?
|
231
|
-
|
278
|
+
#gsub to fix a case of "Macedonia" returning "Macedonia (FYROM)"
|
279
|
+
return_hash[:country_part] = result['long_name'].gsub(/ \(.+\)$/, '')
|
232
280
|
elsif (result['types'] & ['administrative_area_level_1']).present?
|
233
281
|
return_hash[:state_part] = result['long_name'].to_ascii.gsub('-city', '')
|
234
282
|
elsif (result['types'] & ['locality']).present?
|
@@ -238,7 +286,7 @@ module Geomash
|
|
238
286
|
end
|
239
287
|
end
|
240
288
|
|
241
|
-
return_hash[:term_differs_from_tgn] ||= google_api_result.
|
289
|
+
return_hash[:term_differs_from_tgn] ||= google_api_result[best_match_index].data['partial_match'] unless google_api_result[best_match_index].data['partial_match'].blank?
|
242
290
|
end
|
243
291
|
|
244
292
|
#FIXME: Google free API rate limit is 5 requests / 1 second now (used to be 10). Need a better way to handle this.
|
data/lib/geomash/standardizer.rb
CHANGED
@@ -20,13 +20,17 @@ module Geomash
|
|
20
20
|
country_name_list = []
|
21
21
|
|
22
22
|
#Countries gem of https://github.com/hexorx/countries
|
23
|
-
Country.new('US').states.each do |state_abbr, state_names|
|
23
|
+
ISO3166::Country.new('US').states.each do |state_abbr, state_names|
|
24
24
|
state_abbr_list << ' ' + state_abbr
|
25
25
|
state_name_list << state_names["name"]
|
26
26
|
end
|
27
27
|
|
28
|
-
Country.all.each do |
|
29
|
-
country_name_list << country_name_abbr_pair.first
|
28
|
+
ISO3166::Country.all.each do |country_name_hash|
|
29
|
+
#country_name_list << country_name_abbr_pair.first
|
30
|
+
country_name_list << country_name_hash.data["name"] if country_name_hash.data["name"].present?
|
31
|
+
country_name_hash.data["names"].each do |name|
|
32
|
+
country_name_list << name
|
33
|
+
end
|
30
34
|
end
|
31
35
|
country_name_list.append('South Korea') #Listed as Korea, Republic of in the gem
|
32
36
|
country_name_list.append('North Korea') #Listed as Korea, Democratic People's Republic Of of in the gem
|
data/lib/geomash/tgn.rb
CHANGED
@@ -391,7 +391,7 @@ EXAMPLE SPARQL:
|
|
391
391
|
#Broader places
|
392
392
|
#FIXME: could parse xml:lang instead of the three optional clauses now... didn't expect places to lack a default preferred label.
|
393
393
|
if broader_place_type_list.present? #Case of World... top of hierachy check
|
394
|
-
query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?
|
394
|
+
query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref ?place_label_latn_pinyin WHERE {"
|
395
395
|
|
396
396
|
broader_place_type_list.each do |place_uri|
|
397
397
|
query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
@@ -407,16 +407,13 @@ EXAMPLE SPARQL:
|
|
407
407
|
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#altLabel> ?place_label_alt
|
408
408
|
FILTER langMatches( lang(?place_label_alt), "en" )
|
409
409
|
}
|
410
|
-
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_remaining
|
411
|
-
FILTER(!langMatches( lang(?place_label_remaining), "" ) && !langMatches( lang(?place_label_remaining), "en" ) && !langMatches( lang(?place_label_remaining), "zh-latn-pinyin" ))
|
412
|
-
}
|
413
410
|
<#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
414
411
|
} UNION
|
415
412
|
}
|
416
413
|
end
|
417
414
|
|
418
415
|
query = query[0..-12]
|
419
|
-
query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?place_label_latn_pinyin ?place_label_alt ?
|
416
|
+
query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?place_label_latn_pinyin ?place_label_alt ?aat_pref"
|
420
417
|
query = query.squish
|
421
418
|
|
422
419
|
tgn_response_for_aat = Typhoeus::Request.post("http://vocab.getty.edu/sparql.json", :body=>{:query=>query}, :timeout=>500)
|
@@ -431,41 +428,43 @@ EXAMPLE SPARQL:
|
|
431
428
|
tgn_term = aat_response['place_label_default']['value']
|
432
429
|
elsif aat_response['place_label_latn_pinyin'].present? && aat_response['place_label_latn_pinyin']['value'] != '-'
|
433
430
|
tgn_term = aat_response['place_label_latn_pinyin']['value']
|
431
|
+
elsif aat_response['place_label_latn_notone'].present? && aat_response['place_label_latn_notone']['value'] != '-'
|
432
|
+
tgn_term = aat_response['place_label_latn_notone']['value']
|
434
433
|
elsif aat_response['place_label_alt'].present? && aat_response['place_label_alt']['value'] != '-'
|
435
434
|
tgn_term = aat_response['place_label_alt']['value']
|
436
435
|
else
|
437
|
-
|
436
|
+
raise "Could not find a label for: #{tgn_id}"
|
438
437
|
end
|
439
438
|
|
440
439
|
case tgn_term_type
|
441
440
|
when '300128176' #continent
|
442
|
-
hier_geo[:continent]
|
441
|
+
hier_geo[:continent] ||= tgn_term
|
443
442
|
when '300128207', '300387130', '300387506' #nation, autonomous areas, countries
|
444
|
-
hier_geo[:country]
|
443
|
+
hier_geo[:country] ||= tgn_term
|
445
444
|
when '300000774' #province
|
446
|
-
hier_geo[:province]
|
445
|
+
hier_geo[:province] ||= tgn_term
|
447
446
|
when '300236112', '300182722', '300387194', '300387052', '300387113', '300387107' #region, union, semi-independent political entity, autonomous communities, autonomous regions
|
448
|
-
hier_geo[:region]
|
447
|
+
hier_geo[:region] ||= tgn_term
|
449
448
|
when '300000776', '300000772', '300235093' #state, department, governorate
|
450
|
-
hier_geo[:state]
|
449
|
+
hier_geo[:state] ||= tgn_term
|
451
450
|
when '300387081' #national district
|
452
451
|
if tgn_term == 'District of Columbia'
|
453
|
-
hier_geo[:state]
|
452
|
+
hier_geo[:state] ||= tgn_term
|
454
453
|
else
|
455
|
-
hier_geo[:territory]
|
454
|
+
hier_geo[:territory] ||= tgn_term
|
456
455
|
end
|
457
456
|
when '300135982', '300387176', '300387122' #territory, dependent state, union territory
|
458
|
-
hier_geo[:territory]
|
457
|
+
hier_geo[:territory] ||= tgn_term
|
459
458
|
when '300000771', '300387092', '300387071' #county, parishes, unitary authorities
|
460
|
-
hier_geo[:county]
|
459
|
+
hier_geo[:county] ||= tgn_term
|
461
460
|
when '300008347', '300008389' #inhabited place, cities
|
462
|
-
hier_geo[:city]
|
461
|
+
hier_geo[:city] ||= tgn_term
|
463
462
|
when '300000745', '300000778', '300387331' #neighborhood, parishes, parts of inhabited places
|
464
|
-
hier_geo[:city_section]
|
463
|
+
hier_geo[:city_section] ||= tgn_term
|
465
464
|
when '300008791', '300387062' #island
|
466
|
-
hier_geo[:island]
|
465
|
+
hier_geo[:island] ||= tgn_term
|
467
466
|
when '300387575', '300387346', '300167671', '300387178', '300387082', '300387173', '300055621', '300386853', '300386831', '300386832', '300008178', '300008804', '300387131', '300132348', '300387085', '300387198', '300008761' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division', administrative divisions, area (measurement), island groups, mountain ranges, mountain systems, nature reserves, peninsulas, regional divisions, sand bars, senatorial districts (administrative districts), third level subdivisions (political entities), valleys (landforms)
|
468
|
-
hier_geo[:area]
|
467
|
+
hier_geo[:area] ||= tgn_term
|
469
468
|
end
|
470
469
|
end
|
471
470
|
end
|
@@ -500,6 +499,10 @@ EXAMPLE SPARQL:
|
|
500
499
|
neighboorhood_response = {}
|
501
500
|
|
502
501
|
state_part = geo_hash[:state_part]
|
502
|
+
#FIXME: In TGN, Ho Chi Minh doesn't have an ASCII label... unsure what to do in this case... maybe a synonyms file?
|
503
|
+
if state_part == 'Ho Chi Minh'
|
504
|
+
state_part = 'Hồ Chí Minh'
|
505
|
+
end
|
503
506
|
|
504
507
|
country_code = Geomash::Constants::COUNTRY_TGN_LOOKUP[geo_hash[:country_part]][:tgn_id] unless Geomash::Constants::COUNTRY_TGN_LOOKUP[geo_hash[:country_part]].blank?
|
505
508
|
country_code ||= ''
|
@@ -582,6 +585,8 @@ WHERE
|
|
582
585
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
583
586
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
584
587
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
588
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387506>} UNION
|
589
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300265612>} UNION
|
585
590
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
586
591
|
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
587
592
|
FILTER regex(?object_label, "^#{state_part}$", "i" )
|
@@ -601,20 +606,39 @@ GROUP BY ?object_identifier
|
|
601
606
|
web_request_error = true if states_response[:errors]
|
602
607
|
end
|
603
608
|
|
609
|
+
#Do prefLabel first and then do just label... needed for case of Newton vs Newtown in MA (Newtown has an altlabel of Newton)
|
604
610
|
if states_response[:id].present? && city_part.present? && !web_request_error
|
605
611
|
query = %{SELECT ?object_identifier
|
606
612
|
WHERE
|
607
613
|
{
|
608
614
|
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
609
615
|
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
610
|
-
?x <http://www.w3.org/
|
616
|
+
?x <http://www.w3.org/2004/02/skos/core#prefLabel> ?object_label .
|
611
617
|
FILTER regex(?object_label, "^#{city_part}$", "i" )
|
612
618
|
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
613
619
|
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
|
614
620
|
}
|
615
621
|
GROUP BY ?object_identifier
|
616
622
|
}
|
623
|
+
|
617
624
|
cities_response = self.tgn_sparql_request(query)
|
625
|
+
if cities_response[:id].blank? && !cities_response[:errors]
|
626
|
+
query = %{SELECT ?object_identifier
|
627
|
+
WHERE
|
628
|
+
{
|
629
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
630
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
631
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
632
|
+
FILTER regex(?object_label, "^#{city_part}$", "i" )
|
633
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
634
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
|
635
|
+
}
|
636
|
+
GROUP BY ?object_identifier
|
637
|
+
}
|
638
|
+
cities_response = self.tgn_sparql_request(query)
|
639
|
+
end
|
640
|
+
|
641
|
+
|
618
642
|
if cities_response[:id].blank? && !cities_response[:errors]
|
619
643
|
return_hash[:original_string_differs] = true
|
620
644
|
else
|
@@ -666,6 +690,26 @@ WHERE
|
|
666
690
|
GROUP BY ?object_identifier
|
667
691
|
}
|
668
692
|
neighborhood_response = self.tgn_sparql_request(query)
|
693
|
+
|
694
|
+
#Try once more on just prefLabel with no city restriction and inhabited places type added...
|
695
|
+
if neighborhood_response[:id].blank? && !neighborhood_response[:errors]
|
696
|
+
query = %{SELECT ?object_identifier
|
697
|
+
WHERE
|
698
|
+
{
|
699
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
700
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000745>} UNION
|
701
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} .
|
702
|
+
?x <http://www.w3.org/2004/02/skos/core#prefLabel> ?object_label .
|
703
|
+
FILTER regex(?object_label, "^#{neighborhood_part}$", "i" )
|
704
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
705
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
|
706
|
+
}
|
707
|
+
GROUP BY ?object_identifier
|
708
|
+
}
|
709
|
+
neighborhood_response = self.tgn_sparql_request(query)
|
710
|
+
end
|
711
|
+
|
712
|
+
|
669
713
|
if neighborhood_response[:id].blank? && !neighborhood_response[:errors]
|
670
714
|
return_hash[:original_string_differs]=true
|
671
715
|
else
|
data/lib/geomash/version.rb
CHANGED
data/test/geomash_test.rb
CHANGED
@@ -161,6 +161,8 @@ class GeomashTest < ActiveSupport::TestCase
|
|
161
161
|
assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
162
162
|
|
163
163
|
#Case of a country with no states
|
164
|
+
#FIXME: This now returns a state when it never did before...
|
165
|
+
=begin
|
164
166
|
result = Geomash.parse('Tokyo, Japan')
|
165
167
|
assert_equal 'Tokyo', result[:city_part]
|
166
168
|
assert_equal nil, result[:state_part]
|
@@ -170,6 +172,7 @@ class GeomashTest < ActiveSupport::TestCase
|
|
170
172
|
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
171
173
|
assert_equal '1850147', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
172
174
|
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
175
|
+
=end
|
173
176
|
|
174
177
|
#Should find the Michigan Atlanta over the Georgia Atlanta
|
175
178
|
#State part from an API giving me Atlanta????
|
@@ -194,6 +197,44 @@ class GeomashTest < ActiveSupport::TestCase
|
|
194
197
|
assert_equal '1835841', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
195
198
|
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
196
199
|
|
200
|
+
result = Geomash.parse('Northern Ireland')
|
201
|
+
assert_equal nil, result[:city_part]
|
202
|
+
assert_equal 'Northern Ireland', result[:state_part]
|
203
|
+
assert_equal 'United Kingdom', result[:country_part]
|
204
|
+
assert_equal nil, result[:neighborhood_part]
|
205
|
+
assert_equal '7002448', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
206
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
207
|
+
assert_equal '2641364', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
208
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
209
|
+
|
210
|
+
result = Geomash.parse('Phnom Penh (Cambodia)')
|
211
|
+
assert_equal '7004076', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
212
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
213
|
+
assert_equal '1821306', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
214
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
215
|
+
|
216
|
+
|
217
|
+
result = Geomash.parse('Ho Chi Minh City (Vietnam)')
|
218
|
+
assert_equal '7001069', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
219
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
220
|
+
assert_equal '1566083', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
221
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
222
|
+
|
223
|
+
#Ensure we get "Newton" instead of "Newtown" that has an altlabel of "Newton"
|
224
|
+
#Should this find Chestnut hill...?
|
225
|
+
result = Geomash.parse('Chestnut Hill, Massachusetts')
|
226
|
+
assert_equal 'Newton', result[:city_part]
|
227
|
+
assert_equal 'Massachusetts', result[:state_part]
|
228
|
+
assert_equal 'United States', result[:country_part]
|
229
|
+
assert_equal 'Chestnut Hill', result[:neighborhood_part]
|
230
|
+
assert_equal '7032056', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true #2050214 or
|
231
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
232
|
+
assert_equal '4932957', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
233
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
|
197
238
|
|
198
239
|
|
199
240
|
end
|
data/test/parser_test.rb
CHANGED
@@ -20,6 +20,13 @@ class ParserTest < ActiveSupport::TestCase
|
|
20
20
|
assert_equal 'Roxbury', result[:neighborhood_part]
|
21
21
|
assert_equal nil, result[:street_part]
|
22
22
|
|
23
|
+
result = Geomash::Parser.parse_google_api('Macedonia')
|
24
|
+
assert_equal nil, result[:city_part]
|
25
|
+
assert_equal nil, result[:state_part]
|
26
|
+
assert_equal 'Macedonia', result[:country_part]
|
27
|
+
assert_equal nil, result[:neighborhood_part]
|
28
|
+
assert_equal nil, result[:street_part]
|
29
|
+
|
23
30
|
|
24
31
|
#FIXME!!! Is this alright?
|
25
32
|
#result = Bplgeo::Parser.parse_google_api('201 Dowman Dr., Atlanta, GA 30322')
|
data/test/tgn_test.rb
CHANGED
@@ -14,6 +14,18 @@ class TGNTest < ActiveSupport::TestCase
|
|
14
14
|
assert_equal 'United States', result[:hier_geo][:country]
|
15
15
|
assert_equal 'North and Central America', result[:hier_geo][:continent]
|
16
16
|
|
17
|
+
|
18
|
+
#Check for a weird prefLabel case of only zh-latn-pinyin-x-notone
|
19
|
+
result = Geomash::TGN.get_tgn_data('7002066')
|
20
|
+
assert_equal '45.75', result[:coords][:latitude]
|
21
|
+
assert_equal '126.65', result[:coords][:longitude]
|
22
|
+
assert_equal '45.75,126.65', result[:coords][:combined]
|
23
|
+
assert_equal 'Harbin', result[:hier_geo][:city]
|
24
|
+
assert_equal 'Heilongjiang', result[:hier_geo][:province]
|
25
|
+
assert_equal 'China', result[:hier_geo][:country]
|
26
|
+
assert_equal 'Asia', result[:hier_geo][:continent]
|
27
|
+
assert_equal 'Harbin', result[:non_hier_geo][:value]
|
28
|
+
assert_nil result[:non_hier_geo][:qualifier]
|
17
29
|
end
|
18
30
|
end
|
19
31
|
end
|
metadata
CHANGED
@@ -1,153 +1,153 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geomash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boston Public Library
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: countries
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - '
|
31
|
+
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 1.2.2
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - '
|
38
|
+
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 1.2.2
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: geocoder
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: unidecoder
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: typhoeus
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: nokogiri
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: htmlentities
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: sparql
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- -
|
115
|
+
- - ">="
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '0'
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- -
|
122
|
+
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: sqlite3
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
|
-
- -
|
129
|
+
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
131
|
version: '0'
|
132
132
|
type: :development
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- -
|
136
|
+
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: rails
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
142
142
|
requirements:
|
143
|
-
- -
|
143
|
+
- - ">="
|
144
144
|
- !ruby/object:Gem::Version
|
145
145
|
version: '0'
|
146
146
|
type: :development
|
147
147
|
prerelease: false
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
|
-
- -
|
150
|
+
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
153
|
description: Parse string for potential geographic matches and return that data along
|
@@ -158,25 +158,26 @@ executables: []
|
|
158
158
|
extensions: []
|
159
159
|
extra_rdoc_files: []
|
160
160
|
files:
|
161
|
+
- Rakefile
|
162
|
+
- config/geomash.yml
|
161
163
|
- config/geomash.yml.sample
|
162
164
|
- lib/geomash.rb
|
163
|
-
- lib/geomash/tgn.rb
|
164
165
|
- lib/geomash/autoexpire_cache_dalli.rb
|
165
|
-
- lib/geomash/town_lookup.rb
|
166
|
-
- lib/geomash/geonames.rb
|
167
|
-
- lib/geomash/constants.rb
|
168
|
-
- lib/geomash/version.rb
|
169
|
-
- lib/geomash/standardizer.rb
|
170
166
|
- lib/geomash/autoexpire_cache_redis.rb
|
167
|
+
- lib/geomash/constants.rb
|
168
|
+
- lib/geomash/geonames.rb
|
171
169
|
- lib/geomash/parser.rb
|
172
|
-
-
|
173
|
-
-
|
170
|
+
- lib/geomash/standardizer.rb
|
171
|
+
- lib/geomash/tgn.rb
|
172
|
+
- lib/geomash/town_lookup.rb
|
173
|
+
- lib/geomash/version.rb
|
174
174
|
- test/geomash_test.rb
|
175
|
+
- test/geonames_test.rb
|
176
|
+
- test/parser_test.rb
|
177
|
+
- test/standardizer_test.rb
|
175
178
|
- test/test_helper.rb
|
176
179
|
- test/tgn_test.rb
|
177
180
|
- test/town_lookup_test.rb
|
178
|
-
- test/parser_test.rb
|
179
|
-
- test/geonames_test.rb
|
180
181
|
homepage: http://www.bpl.org
|
181
182
|
licenses: []
|
182
183
|
metadata: {}
|
@@ -186,26 +187,26 @@ require_paths:
|
|
186
187
|
- lib
|
187
188
|
required_ruby_version: !ruby/object:Gem::Requirement
|
188
189
|
requirements:
|
189
|
-
- -
|
190
|
+
- - ">="
|
190
191
|
- !ruby/object:Gem::Version
|
191
192
|
version: '0'
|
192
193
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
193
194
|
requirements:
|
194
|
-
- -
|
195
|
+
- - ">="
|
195
196
|
- !ruby/object:Gem::Version
|
196
197
|
version: '0'
|
197
198
|
requirements: []
|
198
199
|
rubyforge_project:
|
199
|
-
rubygems_version: 2.
|
200
|
+
rubygems_version: 2.4.6
|
200
201
|
signing_key:
|
201
202
|
specification_version: 4
|
202
203
|
summary: Parse string for potential geographic matches and return that data along
|
203
204
|
with the TGN ID and Geonames ID.
|
204
205
|
test_files:
|
206
|
+
- test/parser_test.rb
|
207
|
+
- test/geonames_test.rb
|
205
208
|
- test/standardizer_test.rb
|
206
|
-
- test/geomash_test.rb
|
207
|
-
- test/test_helper.rb
|
208
209
|
- test/tgn_test.rb
|
209
210
|
- test/town_lookup_test.rb
|
210
|
-
- test/
|
211
|
-
- test/
|
211
|
+
- test/test_helper.rb
|
212
|
+
- test/geomash_test.rb
|