geomash 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/geomash.yml +27 -0
- data/config/geomash.yml.sample +21 -0
- data/lib/geomash/geonames.rb +11 -2
- data/lib/geomash/parser.rb +56 -8
- data/lib/geomash/standardizer.rb +7 -3
- data/lib/geomash/tgn.rb +64 -20
- data/lib/geomash/version.rb +1 -1
- data/test/geomash_test.rb +41 -0
- data/test/parser_test.rb +7 -0
- data/test/tgn_test.rb +12 -0
- metadata +42 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f517ba6fcc94becafdbd6e43ae35ce833b69f728
|
4
|
+
data.tar.gz: fcc4f7d3194c8203e87bbf0a1b1192cf2e55804a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee9bb319b8a8014ff077957c38f27521ff61a37836140bc389a25d3acd659ecec91cd3602ab59c96f14589d61d32c6e35d74825f421c3dd82aa273e93e85d2bc
|
7
|
+
data.tar.gz: 0cfd23abf1dd9c6f9f71dc7a4cd96da271e6e8ecb379980875a4b7d291fa336499ab9735e35e97d562204948b337d40addb5845b8c27ed55b4ac4d7489715318
|
data/config/geomash.yml
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
development:
|
2
|
+
tgn_enabled: true
|
3
|
+
geonames_username: boston_library
|
4
|
+
#NOTE: Mapquest is super unreliable on its matches...
|
5
|
+
mapquest_key: <mapquest_key>
|
6
|
+
#NOTE: Bing also fairly unreliable but mostly works...
|
7
|
+
bing_key: <bing_key>
|
8
|
+
timeout: 7
|
9
|
+
parser_cache_enabled: false #See Readme before enabling this
|
10
|
+
test: &TEST_
|
11
|
+
tgn_enabled: true
|
12
|
+
geonames_username: boston_library
|
13
|
+
#NOTE: Mapquest is super unreliable on its matches...
|
14
|
+
mapquest_key: <mapquest_key>
|
15
|
+
#NOTE: Bing also fairly unreliable but mostly works...
|
16
|
+
bing_key: <bing_key>
|
17
|
+
timeout: 7
|
18
|
+
parser_cache_enabled: false #See Readme before enabling this
|
19
|
+
production:
|
20
|
+
tgn_enabled: true
|
21
|
+
geonames_username: boston_library
|
22
|
+
#NOTE: Mapquest is super unreliable on its matches...
|
23
|
+
mapquest_key: <mapquest_key>
|
24
|
+
#NOTE: Bing also fairly unreliable but mostly works...
|
25
|
+
bing_key: <bing_key>
|
26
|
+
timeout: 7
|
27
|
+
parser_cache_enabled: false #See Readme before enabling this
|
data/config/geomash.yml.sample
CHANGED
@@ -5,6 +5,13 @@ development:
|
|
5
5
|
mapquest_key: <mapquest_key>
|
6
6
|
#NOTE: Bing also fairly unreliable but mostly works...
|
7
7
|
bing_key: <bing_key>
|
8
|
+
#NOTE: Optional Google API key
|
9
|
+
google_key: <google_key>
|
10
|
+
#NOTE: Optional Google Maps for Work Information
|
11
|
+
google_premier:
|
12
|
+
- key
|
13
|
+
- client
|
14
|
+
- channel
|
8
15
|
timeout: 7
|
9
16
|
parser_cache_enabled: false #See Readme before enabling this
|
10
17
|
test: &TEST_
|
@@ -14,6 +21,13 @@ test: &TEST_
|
|
14
21
|
mapquest_key: <mapquest_key>
|
15
22
|
#NOTE: Bing also fairly unreliable but mostly works...
|
16
23
|
bing_key: <bing_key>
|
24
|
+
#NOTE: Optional Google API key
|
25
|
+
google_key: <google_key>
|
26
|
+
#NOTE: Optional Google Maps for Work Information
|
27
|
+
google_premier:
|
28
|
+
- key
|
29
|
+
- client
|
30
|
+
- channel
|
17
31
|
timeout: 7
|
18
32
|
parser_cache_enabled: false #See Readme before enabling this
|
19
33
|
production:
|
@@ -23,5 +37,12 @@ production:
|
|
23
37
|
mapquest_key: <mapquest_key>
|
24
38
|
#NOTE: Bing also fairly unreliable but mostly works...
|
25
39
|
bing_key: <bing_key>
|
40
|
+
#NOTE: Optional Google API key
|
41
|
+
google_key: <google_key>
|
42
|
+
#NOTE: Optional Google Maps for Work Information
|
43
|
+
google_premier:
|
44
|
+
- key
|
45
|
+
- client
|
46
|
+
- channel
|
26
47
|
timeout: 7
|
27
48
|
parser_cache_enabled: false #See Readme before enabling this
|
data/lib/geomash/geonames.rb
CHANGED
@@ -28,7 +28,16 @@ module Geomash
|
|
28
28
|
parsed_xml = Nokogiri::Slop(geonames_response.body)
|
29
29
|
|
30
30
|
parsed_xml.geonames.geoname.each do |geoname|
|
31
|
-
|
31
|
+
#In some cases, geonames duplicates the fcode keys? See fcode "area" of:
|
32
|
+
#http://api.geonames.org/hierarchy?username=<username>&lang=en&style=FULL&geonameId=6947909
|
33
|
+
#FIXME: Something better needs to be done...
|
34
|
+
if hier_geo.has_key?(geoname.fcode.text.downcase.to_sym)
|
35
|
+
temp_key = geoname.fcode.text.downcase + "2"
|
36
|
+
hier_geo[temp_key.to_sym] = geoname.toponymName.text
|
37
|
+
else
|
38
|
+
hier_geo[geoname.fcode.text.downcase.to_sym] = geoname.toponymName.text
|
39
|
+
end
|
40
|
+
|
32
41
|
end
|
33
42
|
|
34
43
|
#FIXME: Code4Lib lazy implementation... will get last result
|
@@ -95,7 +104,7 @@ module Geomash
|
|
95
104
|
elsif geo_hash[:country_part] == 'North Korea'
|
96
105
|
country_code = 'KP'
|
97
106
|
else
|
98
|
-
country_code = Country.find_country_by_name(geo_hash[:country_part]).alpha2
|
107
|
+
country_code = ISO3166::Country.find_country_by_name(geo_hash[:country_part]).alpha2
|
99
108
|
end
|
100
109
|
geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q=#{CGI.escape(geonames_search_string)}&name_equals=#{CGI.escape(exact_name_term)}&country=#{country_code}")
|
101
110
|
|
data/lib/geomash/parser.rb
CHANGED
@@ -14,6 +14,14 @@ module Geomash
|
|
14
14
|
Geomash.config[:bing_key] || '<bing_key>'
|
15
15
|
end
|
16
16
|
|
17
|
+
def self.google_key
|
18
|
+
Geomash.config[:google_key] || '<google_key>'
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.google_premier
|
22
|
+
Geomash.config[:google_premier] || ['key', 'client', 'channel']
|
23
|
+
end
|
24
|
+
|
17
25
|
def self.timeout
|
18
26
|
Geomash.config[:timeout]
|
19
27
|
end
|
@@ -153,7 +161,7 @@ module Geomash
|
|
153
161
|
:combined=>mapquest_api_result.first.data['latLng']['lat'].to_s + ',' + mapquest_api_result.first.data['latLng']['lng'].to_s}
|
154
162
|
end
|
155
163
|
|
156
|
-
return_hash[:country_part] = Country.new(mapquest_api_result.first.data["adminArea1"]).name
|
164
|
+
return_hash[:country_part] = ISO3166::Country.new(mapquest_api_result.first.data["adminArea1"]).name
|
157
165
|
|
158
166
|
if return_hash[:country_part] == 'United States'
|
159
167
|
return_hash[:state_part] = Geomash::Constants::STATE_ABBR[mapquest_api_result.first.data["adminArea3"]] || mapquest_api_result.first.data["adminArea4"]
|
@@ -191,9 +199,21 @@ module Geomash
|
|
191
199
|
return {}
|
192
200
|
end
|
193
201
|
|
202
|
+
#FIXME: Handle just a plain 'Korea' better...
|
203
|
+
if term.match(/Korea/) and !term.match(/South/) and !term.match(/North/)
|
204
|
+
term.gsub!('Korea', 'South Korea')
|
205
|
+
end
|
206
|
+
|
194
207
|
return_hash[:standardized_term] = term
|
195
208
|
|
196
|
-
|
209
|
+
if self.google_premier != ['key', 'client', 'channel']
|
210
|
+
::Geocoder.configure(:lookup => :google_premier,:api_key => self.google_premier,:timeout => self.timeout, :use_https => true, :always_raise => :all)
|
211
|
+
elsif self.google_key != '<google_key>'
|
212
|
+
::Geocoder.configure(:lookup => :google,:api_key => self.google_key,:timeout => self.timeout, :use_https => true, :always_raise => :all)
|
213
|
+
else
|
214
|
+
::Geocoder.configure(:lookup => :google,:api_key => nil,:timeout => self.timeout, :always_raise => :all)
|
215
|
+
end
|
216
|
+
|
197
217
|
begin
|
198
218
|
google_api_result = ::Geocoder.search(term)
|
199
219
|
rescue SocketError => e
|
@@ -217,18 +237,46 @@ module Geomash
|
|
217
237
|
end
|
218
238
|
end
|
219
239
|
|
240
|
+
|
220
241
|
if google_api_result.present?
|
242
|
+
#Find the best match index... case of Ho Chi Minh City (Vietnam) resolving to an airport...
|
243
|
+
best_match_index = 0
|
244
|
+
best_city_match = -1
|
245
|
+
best_state_match = -1
|
246
|
+
|
247
|
+
google_api_result.each_with_index do |single_google_api_result, index|
|
248
|
+
single_google_api_result.data["address_components"].each do |result|
|
249
|
+
if (result['types'] & ['locality']).present?
|
250
|
+
if return_hash[:standardized_term].to_ascii.include?(result['long_name'].to_ascii)
|
251
|
+
best_city_match = index if best_city_match == -1
|
252
|
+
end
|
253
|
+
elsif (result['types'] & ['administrative_area_level_1']).present?
|
254
|
+
if return_hash[:standardized_term].to_ascii.include?(result['long_name'].to_ascii.gsub('-city', ''))
|
255
|
+
best_state_match = index if best_state_match == -1
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
if best_city_match != -1
|
262
|
+
best_match_index = best_city_match
|
263
|
+
elsif best_state_match != -1
|
264
|
+
best_match_index = best_state_match
|
265
|
+
end
|
266
|
+
|
267
|
+
|
221
268
|
#Types: street number, route, neighborhood, establishment, transit_station, bus_station
|
222
|
-
google_api_result.
|
269
|
+
google_api_result[best_match_index].data["address_components"].each do |result|
|
223
270
|
if (result['types'] & ['street number', 'route', 'establishment', 'transit_station', 'bus_station']).present? || (result['types'].include?('neighborhood') && !result['types'].include?('political'))
|
224
271
|
#return_hash[:term_differs_from_tgn] = true
|
225
272
|
#TODO: Not implemented for Google results right now.
|
226
273
|
#return_hash[:street_part] = 'TODO: Not Implemented for Google Results'
|
227
|
-
return_hash[:coords] = {:latitude=>google_api_result.
|
228
|
-
:longitude=>google_api_result.
|
229
|
-
:combined=>google_api_result.
|
274
|
+
return_hash[:coords] = {:latitude=>google_api_result[best_match_index].data['geometry']['location']['lat'].to_s,
|
275
|
+
:longitude=>google_api_result[best_match_index].data['geometry']['location']['lng'].to_s,
|
276
|
+
:combined=>google_api_result[best_match_index].data['geometry']['location']['lat'].to_s + ',' + google_api_result[best_match_index].data['geometry']['location']['lng'].to_s}
|
230
277
|
elsif (result['types'] & ['country']).present?
|
231
|
-
|
278
|
+
#gsub to fix a case of "Macedonia" returning "Macedonia (FYROM)"
|
279
|
+
return_hash[:country_part] = result['long_name'].gsub(/ \(.+\)$/, '')
|
232
280
|
elsif (result['types'] & ['administrative_area_level_1']).present?
|
233
281
|
return_hash[:state_part] = result['long_name'].to_ascii.gsub('-city', '')
|
234
282
|
elsif (result['types'] & ['locality']).present?
|
@@ -238,7 +286,7 @@ module Geomash
|
|
238
286
|
end
|
239
287
|
end
|
240
288
|
|
241
|
-
return_hash[:term_differs_from_tgn] ||= google_api_result.
|
289
|
+
return_hash[:term_differs_from_tgn] ||= google_api_result[best_match_index].data['partial_match'] unless google_api_result[best_match_index].data['partial_match'].blank?
|
242
290
|
end
|
243
291
|
|
244
292
|
#FIXME: Google free API rate limit is 5 requests / 1 second now (used to be 10). Need a better way to handle this.
|
data/lib/geomash/standardizer.rb
CHANGED
@@ -20,13 +20,17 @@ module Geomash
|
|
20
20
|
country_name_list = []
|
21
21
|
|
22
22
|
#Countries gem of https://github.com/hexorx/countries
|
23
|
-
Country.new('US').states.each do |state_abbr, state_names|
|
23
|
+
ISO3166::Country.new('US').states.each do |state_abbr, state_names|
|
24
24
|
state_abbr_list << ' ' + state_abbr
|
25
25
|
state_name_list << state_names["name"]
|
26
26
|
end
|
27
27
|
|
28
|
-
Country.all.each do |
|
29
|
-
country_name_list << country_name_abbr_pair.first
|
28
|
+
ISO3166::Country.all.each do |country_name_hash|
|
29
|
+
#country_name_list << country_name_abbr_pair.first
|
30
|
+
country_name_list << country_name_hash.data["name"] if country_name_hash.data["name"].present?
|
31
|
+
country_name_hash.data["names"].each do |name|
|
32
|
+
country_name_list << name
|
33
|
+
end
|
30
34
|
end
|
31
35
|
country_name_list.append('South Korea') #Listed as Korea, Republic of in the gem
|
32
36
|
country_name_list.append('North Korea') #Listed as Korea, Democratic People's Republic Of of in the gem
|
data/lib/geomash/tgn.rb
CHANGED
@@ -391,7 +391,7 @@ EXAMPLE SPARQL:
|
|
391
391
|
#Broader places
|
392
392
|
#FIXME: could parse xml:lang instead of the three optional clauses now... didn't expect places to lack a default preferred label.
|
393
393
|
if broader_place_type_list.present? #Case of World... top of hierachy check
|
394
|
-
query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?
|
394
|
+
query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref ?place_label_latn_pinyin WHERE {"
|
395
395
|
|
396
396
|
broader_place_type_list.each do |place_uri|
|
397
397
|
query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
@@ -407,16 +407,13 @@ EXAMPLE SPARQL:
|
|
407
407
|
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#altLabel> ?place_label_alt
|
408
408
|
FILTER langMatches( lang(?place_label_alt), "en" )
|
409
409
|
}
|
410
|
-
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_remaining
|
411
|
-
FILTER(!langMatches( lang(?place_label_remaining), "" ) && !langMatches( lang(?place_label_remaining), "en" ) && !langMatches( lang(?place_label_remaining), "zh-latn-pinyin" ))
|
412
|
-
}
|
413
410
|
<#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
414
411
|
} UNION
|
415
412
|
}
|
416
413
|
end
|
417
414
|
|
418
415
|
query = query[0..-12]
|
419
|
-
query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?place_label_latn_pinyin ?place_label_alt ?
|
416
|
+
query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?place_label_latn_pinyin ?place_label_alt ?aat_pref"
|
420
417
|
query = query.squish
|
421
418
|
|
422
419
|
tgn_response_for_aat = Typhoeus::Request.post("http://vocab.getty.edu/sparql.json", :body=>{:query=>query}, :timeout=>500)
|
@@ -431,41 +428,43 @@ EXAMPLE SPARQL:
|
|
431
428
|
tgn_term = aat_response['place_label_default']['value']
|
432
429
|
elsif aat_response['place_label_latn_pinyin'].present? && aat_response['place_label_latn_pinyin']['value'] != '-'
|
433
430
|
tgn_term = aat_response['place_label_latn_pinyin']['value']
|
431
|
+
elsif aat_response['place_label_latn_notone'].present? && aat_response['place_label_latn_notone']['value'] != '-'
|
432
|
+
tgn_term = aat_response['place_label_latn_notone']['value']
|
434
433
|
elsif aat_response['place_label_alt'].present? && aat_response['place_label_alt']['value'] != '-'
|
435
434
|
tgn_term = aat_response['place_label_alt']['value']
|
436
435
|
else
|
437
|
-
|
436
|
+
raise "Could not find a label for: #{tgn_id}"
|
438
437
|
end
|
439
438
|
|
440
439
|
case tgn_term_type
|
441
440
|
when '300128176' #continent
|
442
|
-
hier_geo[:continent]
|
441
|
+
hier_geo[:continent] ||= tgn_term
|
443
442
|
when '300128207', '300387130', '300387506' #nation, autonomous areas, countries
|
444
|
-
hier_geo[:country]
|
443
|
+
hier_geo[:country] ||= tgn_term
|
445
444
|
when '300000774' #province
|
446
|
-
hier_geo[:province]
|
445
|
+
hier_geo[:province] ||= tgn_term
|
447
446
|
when '300236112', '300182722', '300387194', '300387052', '300387113', '300387107' #region, union, semi-independent political entity, autonomous communities, autonomous regions
|
448
|
-
hier_geo[:region]
|
447
|
+
hier_geo[:region] ||= tgn_term
|
449
448
|
when '300000776', '300000772', '300235093' #state, department, governorate
|
450
|
-
hier_geo[:state]
|
449
|
+
hier_geo[:state] ||= tgn_term
|
451
450
|
when '300387081' #national district
|
452
451
|
if tgn_term == 'District of Columbia'
|
453
|
-
hier_geo[:state]
|
452
|
+
hier_geo[:state] ||= tgn_term
|
454
453
|
else
|
455
|
-
hier_geo[:territory]
|
454
|
+
hier_geo[:territory] ||= tgn_term
|
456
455
|
end
|
457
456
|
when '300135982', '300387176', '300387122' #territory, dependent state, union territory
|
458
|
-
hier_geo[:territory]
|
457
|
+
hier_geo[:territory] ||= tgn_term
|
459
458
|
when '300000771', '300387092', '300387071' #county, parishes, unitary authorities
|
460
|
-
hier_geo[:county]
|
459
|
+
hier_geo[:county] ||= tgn_term
|
461
460
|
when '300008347', '300008389' #inhabited place, cities
|
462
|
-
hier_geo[:city]
|
461
|
+
hier_geo[:city] ||= tgn_term
|
463
462
|
when '300000745', '300000778', '300387331' #neighborhood, parishes, parts of inhabited places
|
464
|
-
hier_geo[:city_section]
|
463
|
+
hier_geo[:city_section] ||= tgn_term
|
465
464
|
when '300008791', '300387062' #island
|
466
|
-
hier_geo[:island]
|
465
|
+
hier_geo[:island] ||= tgn_term
|
467
466
|
when '300387575', '300387346', '300167671', '300387178', '300387082', '300387173', '300055621', '300386853', '300386831', '300386832', '300008178', '300008804', '300387131', '300132348', '300387085', '300387198', '300008761' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division', administrative divisions, area (measurement), island groups, mountain ranges, mountain systems, nature reserves, peninsulas, regional divisions, sand bars, senatorial districts (administrative districts), third level subdivisions (political entities), valleys (landforms)
|
468
|
-
hier_geo[:area]
|
467
|
+
hier_geo[:area] ||= tgn_term
|
469
468
|
end
|
470
469
|
end
|
471
470
|
end
|
@@ -500,6 +499,10 @@ EXAMPLE SPARQL:
|
|
500
499
|
neighboorhood_response = {}
|
501
500
|
|
502
501
|
state_part = geo_hash[:state_part]
|
502
|
+
#FIXME: In TGN, Ho Chi Minh doesn't have an ASCII label... unsure what to do in this case... maybe a synonyms file?
|
503
|
+
if state_part == 'Ho Chi Minh'
|
504
|
+
state_part = 'Hồ Chí Minh'
|
505
|
+
end
|
503
506
|
|
504
507
|
country_code = Geomash::Constants::COUNTRY_TGN_LOOKUP[geo_hash[:country_part]][:tgn_id] unless Geomash::Constants::COUNTRY_TGN_LOOKUP[geo_hash[:country_part]].blank?
|
505
508
|
country_code ||= ''
|
@@ -582,6 +585,8 @@ WHERE
|
|
582
585
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
583
586
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
584
587
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
588
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387506>} UNION
|
589
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300265612>} UNION
|
585
590
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
586
591
|
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
587
592
|
FILTER regex(?object_label, "^#{state_part}$", "i" )
|
@@ -601,20 +606,39 @@ GROUP BY ?object_identifier
|
|
601
606
|
web_request_error = true if states_response[:errors]
|
602
607
|
end
|
603
608
|
|
609
|
+
#Do prefLabel first and then do just label... needed for case of Newton vs Newtown in MA (Newtown has an altlabel of Newton)
|
604
610
|
if states_response[:id].present? && city_part.present? && !web_request_error
|
605
611
|
query = %{SELECT ?object_identifier
|
606
612
|
WHERE
|
607
613
|
{
|
608
614
|
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
609
615
|
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
610
|
-
?x <http://www.w3.org/
|
616
|
+
?x <http://www.w3.org/2004/02/skos/core#prefLabel> ?object_label .
|
611
617
|
FILTER regex(?object_label, "^#{city_part}$", "i" )
|
612
618
|
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
613
619
|
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
|
614
620
|
}
|
615
621
|
GROUP BY ?object_identifier
|
616
622
|
}
|
623
|
+
|
617
624
|
cities_response = self.tgn_sparql_request(query)
|
625
|
+
if cities_response[:id].blank? && !cities_response[:errors]
|
626
|
+
query = %{SELECT ?object_identifier
|
627
|
+
WHERE
|
628
|
+
{
|
629
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
630
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
631
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
632
|
+
FILTER regex(?object_label, "^#{city_part}$", "i" )
|
633
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
634
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
|
635
|
+
}
|
636
|
+
GROUP BY ?object_identifier
|
637
|
+
}
|
638
|
+
cities_response = self.tgn_sparql_request(query)
|
639
|
+
end
|
640
|
+
|
641
|
+
|
618
642
|
if cities_response[:id].blank? && !cities_response[:errors]
|
619
643
|
return_hash[:original_string_differs] = true
|
620
644
|
else
|
@@ -666,6 +690,26 @@ WHERE
|
|
666
690
|
GROUP BY ?object_identifier
|
667
691
|
}
|
668
692
|
neighborhood_response = self.tgn_sparql_request(query)
|
693
|
+
|
694
|
+
#Try once more on just prefLabel with no city restriction and inhabited places type added...
|
695
|
+
if neighborhood_response[:id].blank? && !neighborhood_response[:errors]
|
696
|
+
query = %{SELECT ?object_identifier
|
697
|
+
WHERE
|
698
|
+
{
|
699
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
700
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000745>} UNION
|
701
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} .
|
702
|
+
?x <http://www.w3.org/2004/02/skos/core#prefLabel> ?object_label .
|
703
|
+
FILTER regex(?object_label, "^#{neighborhood_part}$", "i" )
|
704
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
705
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
|
706
|
+
}
|
707
|
+
GROUP BY ?object_identifier
|
708
|
+
}
|
709
|
+
neighborhood_response = self.tgn_sparql_request(query)
|
710
|
+
end
|
711
|
+
|
712
|
+
|
669
713
|
if neighborhood_response[:id].blank? && !neighborhood_response[:errors]
|
670
714
|
return_hash[:original_string_differs]=true
|
671
715
|
else
|
data/lib/geomash/version.rb
CHANGED
data/test/geomash_test.rb
CHANGED
@@ -161,6 +161,8 @@ class GeomashTest < ActiveSupport::TestCase
|
|
161
161
|
assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
162
162
|
|
163
163
|
#Case of a country with no states
|
164
|
+
#FIXME: This now returns a state when it never did before...
|
165
|
+
=begin
|
164
166
|
result = Geomash.parse('Tokyo, Japan')
|
165
167
|
assert_equal 'Tokyo', result[:city_part]
|
166
168
|
assert_equal nil, result[:state_part]
|
@@ -170,6 +172,7 @@ class GeomashTest < ActiveSupport::TestCase
|
|
170
172
|
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
171
173
|
assert_equal '1850147', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
172
174
|
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
175
|
+
=end
|
173
176
|
|
174
177
|
#Should find the Michigan Atlanta over the Georgia Atlanta
|
175
178
|
#State part from an API giving me Atlanta????
|
@@ -194,6 +197,44 @@ class GeomashTest < ActiveSupport::TestCase
|
|
194
197
|
assert_equal '1835841', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
195
198
|
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
196
199
|
|
200
|
+
result = Geomash.parse('Northern Ireland')
|
201
|
+
assert_equal nil, result[:city_part]
|
202
|
+
assert_equal 'Northern Ireland', result[:state_part]
|
203
|
+
assert_equal 'United Kingdom', result[:country_part]
|
204
|
+
assert_equal nil, result[:neighborhood_part]
|
205
|
+
assert_equal '7002448', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
206
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
207
|
+
assert_equal '2641364', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
208
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
209
|
+
|
210
|
+
result = Geomash.parse('Phnom Penh (Cambodia)')
|
211
|
+
assert_equal '7004076', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
212
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
213
|
+
assert_equal '1821306', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
214
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
215
|
+
|
216
|
+
|
217
|
+
result = Geomash.parse('Ho Chi Minh City (Vietnam)')
|
218
|
+
assert_equal '7001069', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
219
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
220
|
+
assert_equal '1566083', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
221
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
222
|
+
|
223
|
+
#Ensure we get "Newton" instead of "Newtown" that has an altlabel of "Newton"
|
224
|
+
#Should this find Chestnut hill...?
|
225
|
+
result = Geomash.parse('Chestnut Hill, Massachusetts')
|
226
|
+
assert_equal 'Newton', result[:city_part]
|
227
|
+
assert_equal 'Massachusetts', result[:state_part]
|
228
|
+
assert_equal 'United States', result[:country_part]
|
229
|
+
assert_equal 'Chestnut Hill', result[:neighborhood_part]
|
230
|
+
assert_equal '7032056', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true #2050214 or
|
231
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
232
|
+
assert_equal '4932957', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
233
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
|
197
238
|
|
198
239
|
|
199
240
|
end
|
data/test/parser_test.rb
CHANGED
@@ -20,6 +20,13 @@ class ParserTest < ActiveSupport::TestCase
|
|
20
20
|
assert_equal 'Roxbury', result[:neighborhood_part]
|
21
21
|
assert_equal nil, result[:street_part]
|
22
22
|
|
23
|
+
result = Geomash::Parser.parse_google_api('Macedonia')
|
24
|
+
assert_equal nil, result[:city_part]
|
25
|
+
assert_equal nil, result[:state_part]
|
26
|
+
assert_equal 'Macedonia', result[:country_part]
|
27
|
+
assert_equal nil, result[:neighborhood_part]
|
28
|
+
assert_equal nil, result[:street_part]
|
29
|
+
|
23
30
|
|
24
31
|
#FIXME!!! Is this alright?
|
25
32
|
#result = Bplgeo::Parser.parse_google_api('201 Dowman Dr., Atlanta, GA 30322')
|
data/test/tgn_test.rb
CHANGED
@@ -14,6 +14,18 @@ class TGNTest < ActiveSupport::TestCase
|
|
14
14
|
assert_equal 'United States', result[:hier_geo][:country]
|
15
15
|
assert_equal 'North and Central America', result[:hier_geo][:continent]
|
16
16
|
|
17
|
+
|
18
|
+
#Check for a weird prefLabel case of only zh-latn-pinyin-x-notone
|
19
|
+
result = Geomash::TGN.get_tgn_data('7002066')
|
20
|
+
assert_equal '45.75', result[:coords][:latitude]
|
21
|
+
assert_equal '126.65', result[:coords][:longitude]
|
22
|
+
assert_equal '45.75,126.65', result[:coords][:combined]
|
23
|
+
assert_equal 'Harbin', result[:hier_geo][:city]
|
24
|
+
assert_equal 'Heilongjiang', result[:hier_geo][:province]
|
25
|
+
assert_equal 'China', result[:hier_geo][:country]
|
26
|
+
assert_equal 'Asia', result[:hier_geo][:continent]
|
27
|
+
assert_equal 'Harbin', result[:non_hier_geo][:value]
|
28
|
+
assert_nil result[:non_hier_geo][:qualifier]
|
17
29
|
end
|
18
30
|
end
|
19
31
|
end
|
metadata
CHANGED
@@ -1,153 +1,153 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geomash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boston Public Library
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: countries
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - '
|
31
|
+
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 1.2.2
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - '
|
38
|
+
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 1.2.2
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: geocoder
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: unidecoder
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: typhoeus
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: nokogiri
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: htmlentities
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: sparql
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- -
|
115
|
+
- - ">="
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '0'
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- -
|
122
|
+
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: sqlite3
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
|
-
- -
|
129
|
+
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
131
|
version: '0'
|
132
132
|
type: :development
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- -
|
136
|
+
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: rails
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
142
142
|
requirements:
|
143
|
-
- -
|
143
|
+
- - ">="
|
144
144
|
- !ruby/object:Gem::Version
|
145
145
|
version: '0'
|
146
146
|
type: :development
|
147
147
|
prerelease: false
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
|
-
- -
|
150
|
+
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
153
|
description: Parse string for potential geographic matches and return that data along
|
@@ -158,25 +158,26 @@ executables: []
|
|
158
158
|
extensions: []
|
159
159
|
extra_rdoc_files: []
|
160
160
|
files:
|
161
|
+
- Rakefile
|
162
|
+
- config/geomash.yml
|
161
163
|
- config/geomash.yml.sample
|
162
164
|
- lib/geomash.rb
|
163
|
-
- lib/geomash/tgn.rb
|
164
165
|
- lib/geomash/autoexpire_cache_dalli.rb
|
165
|
-
- lib/geomash/town_lookup.rb
|
166
|
-
- lib/geomash/geonames.rb
|
167
|
-
- lib/geomash/constants.rb
|
168
|
-
- lib/geomash/version.rb
|
169
|
-
- lib/geomash/standardizer.rb
|
170
166
|
- lib/geomash/autoexpire_cache_redis.rb
|
167
|
+
- lib/geomash/constants.rb
|
168
|
+
- lib/geomash/geonames.rb
|
171
169
|
- lib/geomash/parser.rb
|
172
|
-
-
|
173
|
-
-
|
170
|
+
- lib/geomash/standardizer.rb
|
171
|
+
- lib/geomash/tgn.rb
|
172
|
+
- lib/geomash/town_lookup.rb
|
173
|
+
- lib/geomash/version.rb
|
174
174
|
- test/geomash_test.rb
|
175
|
+
- test/geonames_test.rb
|
176
|
+
- test/parser_test.rb
|
177
|
+
- test/standardizer_test.rb
|
175
178
|
- test/test_helper.rb
|
176
179
|
- test/tgn_test.rb
|
177
180
|
- test/town_lookup_test.rb
|
178
|
-
- test/parser_test.rb
|
179
|
-
- test/geonames_test.rb
|
180
181
|
homepage: http://www.bpl.org
|
181
182
|
licenses: []
|
182
183
|
metadata: {}
|
@@ -186,26 +187,26 @@ require_paths:
|
|
186
187
|
- lib
|
187
188
|
required_ruby_version: !ruby/object:Gem::Requirement
|
188
189
|
requirements:
|
189
|
-
- -
|
190
|
+
- - ">="
|
190
191
|
- !ruby/object:Gem::Version
|
191
192
|
version: '0'
|
192
193
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
193
194
|
requirements:
|
194
|
-
- -
|
195
|
+
- - ">="
|
195
196
|
- !ruby/object:Gem::Version
|
196
197
|
version: '0'
|
197
198
|
requirements: []
|
198
199
|
rubyforge_project:
|
199
|
-
rubygems_version: 2.
|
200
|
+
rubygems_version: 2.4.6
|
200
201
|
signing_key:
|
201
202
|
specification_version: 4
|
202
203
|
summary: Parse string for potential geographic matches and return that data along
|
203
204
|
with the TGN ID and Geonames ID.
|
204
205
|
test_files:
|
206
|
+
- test/parser_test.rb
|
207
|
+
- test/geonames_test.rb
|
205
208
|
- test/standardizer_test.rb
|
206
|
-
- test/geomash_test.rb
|
207
|
-
- test/test_helper.rb
|
208
209
|
- test/tgn_test.rb
|
209
210
|
- test/town_lookup_test.rb
|
210
|
-
- test/
|
211
|
-
- test/
|
211
|
+
- test/test_helper.rb
|
212
|
+
- test/geomash_test.rb
|