geomash 0.2.1 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/geomash.yml.sample +3 -0
- data/lib/geomash/autoexpire_cache_dalli.rb +55 -0
- data/lib/geomash/autoexpire_cache_redis.rb +26 -0
- data/lib/geomash/constants.rb +6 -1
- data/lib/geomash/geonames.rb +19 -14
- data/lib/geomash/parser.rb +48 -18
- data/lib/geomash/standardizer.rb +46 -17
- data/lib/geomash/tgn.rb +274 -217
- data/lib/geomash/version.rb +1 -1
- data/lib/geomash.rb +8 -18
- data/test/geomash_test.rb +58 -4
- data/test/geonames_test.rb +1 -1
- data/test/standardizer_test.rb +37 -0
- metadata +15 -14
data/lib/geomash/tgn.rb
CHANGED
@@ -3,7 +3,8 @@ module Geomash
|
|
3
3
|
class TGN
|
4
4
|
|
5
5
|
def self.tgn_enabled
|
6
|
-
Geomash.config[:tgn_enabled]
|
6
|
+
return Geomash.config[:tgn_enabled] unless Geomash.config[:tgn_enabled].nil?
|
7
|
+
return true
|
7
8
|
end
|
8
9
|
|
9
10
|
=begin
|
@@ -189,7 +190,7 @@ EXAMPLE SPARQL:
|
|
189
190
|
tgn_main_term_info = {}
|
190
191
|
broader_place_type_list = []
|
191
192
|
|
192
|
-
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"})
|
193
|
+
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"}, :timeout=>500)
|
193
194
|
|
194
195
|
return nil if(primary_tgn_response.response_code == 404) #Couldn't find TGN... FIXME: additional check needed if TGN is down?
|
195
196
|
|
@@ -243,36 +244,32 @@ EXAMPLE SPARQL:
|
|
243
244
|
|
244
245
|
query = query.squish
|
245
246
|
|
246
|
-
primary_tgn_response = Typhoeus::Request.
|
247
|
+
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :body=>{:query=>query}, :timeout=>500)
|
247
248
|
as_json_tgn_response = JSON.parse(primary_tgn_response.body)
|
248
249
|
end
|
249
250
|
|
250
|
-
#FIXME: Temporary hack to determine more cases of non-blank/english place name conflicts that require resolution.
|
251
|
-
label_remaining_check = false
|
252
|
-
|
253
251
|
as_json_tgn_response['results']['bindings'].each do |ntriple|
|
254
252
|
case ntriple['Predicate']['value']
|
255
253
|
when 'http://www.w3.org/2004/02/skos/core#prefLabel'
|
256
254
|
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
257
|
-
tgn_main_term_info[:label_en]
|
255
|
+
tgn_main_term_info[:label_en] ||= ntriple['Object']['value']
|
258
256
|
elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'zh-latn-pinyin'
|
259
|
-
tgn_main_term_info[:label_other]
|
257
|
+
tgn_main_term_info[:label_other] ||= ntriple['Object']['value']
|
260
258
|
elsif ntriple['Object']['xml:lang'].blank?
|
261
|
-
tgn_main_term_info[:label_default]
|
259
|
+
tgn_main_term_info[:label_default] ||= ntriple['Object']['value']
|
262
260
|
else
|
263
|
-
|
264
|
-
tgn_main_term_info[:label_remaining] = ntriple['Object']['value']
|
261
|
+
tgn_main_term_info[:label_remaining] ||= ntriple['Object']['value']
|
265
262
|
end
|
266
263
|
when 'http://www.w3.org/2004/02/skos/core#altLabel'
|
267
264
|
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
268
|
-
tgn_main_term_info[:label_alt]
|
265
|
+
tgn_main_term_info[:label_alt] ||= ntriple['Object']['value']
|
269
266
|
end
|
270
267
|
when 'http://vocab.getty.edu/ontology#placeTypePreferred'
|
271
|
-
tgn_main_term_info[:aat_place]
|
268
|
+
tgn_main_term_info[:aat_place] ||= ntriple['Object']['value']
|
272
269
|
when 'http://schema.org/latitude'
|
273
|
-
tgn_main_term_info[:latitude]
|
270
|
+
tgn_main_term_info[:latitude] ||= ntriple['Object']['value']
|
274
271
|
when 'http://schema.org/longitude'
|
275
|
-
tgn_main_term_info[:longitude]
|
272
|
+
tgn_main_term_info[:longitude] ||= ntriple['Object']['value']
|
276
273
|
when 'http://vocab.getty.edu/ontology#broaderPreferredExtended'
|
277
274
|
broader_place_type_list << ntriple['Object']['value']
|
278
275
|
end
|
@@ -289,18 +286,14 @@ EXAMPLE SPARQL:
|
|
289
286
|
end
|
290
287
|
|
291
288
|
hier_geo = {}
|
289
|
+
non_hier_geo = {}
|
290
|
+
|
292
291
|
#Default term to best label language...
|
293
292
|
tgn_term = tgn_main_term_info[:label_en]
|
294
293
|
tgn_term ||= tgn_main_term_info[:label_default]
|
295
294
|
tgn_term ||= tgn_main_term_info[:label_other]
|
296
295
|
tgn_term ||= tgn_main_term_info[:label_alt]
|
297
|
-
|
298
|
-
if label_remaining_check
|
299
|
-
raise "Could not determine a single label for TGN: " + tgn_id
|
300
|
-
else
|
301
|
-
tgn_term = tgn_main_term_info[:label_remaining]
|
302
|
-
end
|
303
|
-
end
|
296
|
+
tgn_term ||= tgn_main_term_info[:label_remaining]
|
304
297
|
|
305
298
|
tgn_term_type = tgn_main_term_info[:aat_place].split('/').last
|
306
299
|
|
@@ -309,11 +302,11 @@ EXAMPLE SPARQL:
|
|
309
302
|
case tgn_term_type
|
310
303
|
when '300128176' #continent
|
311
304
|
hier_geo[:continent] = tgn_term
|
312
|
-
when '300128207' #
|
305
|
+
when '300128207', '300387130', '300387506' #nation, autonomous areas, countries
|
313
306
|
hier_geo[:country] = tgn_term
|
314
307
|
when '300000774' #province
|
315
308
|
hier_geo[:province] = tgn_term
|
316
|
-
when '300236112', '300182722', '300387194', '300387052' #region, union, semi-independent political entity
|
309
|
+
when '300236112', '300182722', '300387194', '300387052', '300387113', '300387107' #region, union, semi-independent political entity, autonomous communities, autonomous regions
|
317
310
|
hier_geo[:region] = tgn_term
|
318
311
|
when '300000776', '300000772', '300235093' #state, department, governorate
|
319
312
|
hier_geo[:state] = tgn_term
|
@@ -325,27 +318,83 @@ EXAMPLE SPARQL:
|
|
325
318
|
end
|
326
319
|
when '300135982', '300387176', '300387122' #territory, dependent state, union territory
|
327
320
|
hier_geo[:territory] = tgn_term
|
328
|
-
when '300000771' #county
|
321
|
+
when '300000771', '300387092', '300387071' #county, parishes, unitary authorities
|
329
322
|
hier_geo[:county] = tgn_term
|
330
|
-
when '300008347' #inhabited place
|
323
|
+
when '300008347', '300008389' #inhabited place, cities
|
331
324
|
hier_geo[:city] = tgn_term
|
332
|
-
when '300000745' #neighborhood
|
325
|
+
when '300000745', '300000778', '300387331' #neighborhood, parishes, parts of inhabited places
|
333
326
|
hier_geo[:city_section] = tgn_term
|
334
327
|
when '300008791', '300387062' #island
|
335
328
|
hier_geo[:island] = tgn_term
|
336
|
-
when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
329
|
+
when '300387575', '300387346', '300167671', '300387178', '300387082', '300387173', '300055621', '300386853', '300386831', '300386832', '300008178', '300008804', '300387131', '300132348', '300387085', '300387198', '300008761' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division', administrative divisions, area (measurement), island groups, mountain ranges, mountain systems, nature reserves, peninsulas, regional divisions, sand bars, senatorial districts (administrative districts), third level subdivisions (political entities), valleys (landforms)
|
337
330
|
hier_geo[:area] = tgn_term
|
331
|
+
when '300386699' #Top level element of World
|
332
|
+
non_hier_geo[:value] = 'World'
|
333
|
+
non_hier_geo[:qualifier] = nil
|
338
334
|
else
|
339
|
-
|
335
|
+
aat_main_term_info = {}
|
336
|
+
label_remaining_check = false
|
337
|
+
|
338
|
+
aat_type_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/aat/#{tgn_term_type}.json"}, :timeout=>500)
|
339
|
+
JSON.parse(aat_type_response.body)['results']['bindings'].each do |ntriple|
|
340
|
+
case ntriple['Predicate']['value']
|
341
|
+
when 'http://www.w3.org/2004/02/skos/core#prefLabel'
|
342
|
+
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
343
|
+
aat_main_term_info[:label_en] ||= ntriple['Object']['value']
|
344
|
+
elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en-us'
|
345
|
+
aat_main_term_info[:label_en] ||= ntriple['Object']['value']
|
346
|
+
elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'zh-latn-pinyin'
|
347
|
+
aat_main_term_info[:label_other] ||= ntriple['Object']['value']
|
348
|
+
elsif ntriple['Object']['xml:lang'].blank?
|
349
|
+
aat_main_term_info[:label_default] ||= ntriple['Object']['value']
|
350
|
+
else
|
351
|
+
label_remaining_check = true if aat_main_term_info[:label_remaining].present?
|
352
|
+
aat_main_term_info[:label_remaining] ||= ntriple['Object']['value']
|
353
|
+
end
|
354
|
+
when 'http://www.w3.org/2004/02/skos/core#altLabel'
|
355
|
+
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
356
|
+
aat_main_term_info[:label_alt] ||= ntriple['Object']['value']
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
end
|
361
|
+
#Default term to best label language...
|
362
|
+
aat_term = aat_main_term_info[:label_en]
|
363
|
+
aat_term ||= aat_main_term_info[:label_default]
|
364
|
+
aat_term ||= aat_main_term_info[:label_other]
|
365
|
+
aat_term ||= aat_main_term_info[:label_alt]
|
366
|
+
|
367
|
+
if aat_term.blank?
|
368
|
+
if label_remaining_check
|
369
|
+
raise "Could not determine a single aat non_hier_geo label for TGN: " + tgn_id
|
370
|
+
else
|
371
|
+
aat_term = aat_main_term_info[:label_remaining]
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
#Fix cases like http://vocab.getty.edu/aat/300132316 which are bays (bodies of water)
|
376
|
+
aat_term = aat_term.gsub(/ \(.+\)$/, '')
|
377
|
+
|
378
|
+
if (aat_term =~ /ies$/).present?
|
379
|
+
aat_term = aat_term.gsub(/ies$/, 'y')
|
380
|
+
elsif (aat_term =~ /es$/).present?
|
381
|
+
aat_term = aat_term.gsub(/es$/, '')
|
382
|
+
elsif (aat_term =~ /s$/).present?
|
383
|
+
aat_term = aat_term.gsub(/s$/, '')
|
384
|
+
end
|
385
|
+
|
386
|
+
#Fix cases like "Boston Harbor" as "Boston Harbor (harbor)" isn't that helpful
|
387
|
+
non_hier_geo[:value] = tgn_term
|
388
|
+
non_hier_geo[:qualifier] = tgn_term.downcase.include?(aat_term.downcase) ? nil : aat_term
|
340
389
|
end
|
341
390
|
|
342
391
|
#Broader places
|
343
392
|
#FIXME: could parse xml:lang instead of the three optional clauses now... didn't expect places to lack a default preferred label.
|
393
|
+
if broader_place_type_list.present? #Case of World... top of hierachy check
|
394
|
+
query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?place_label_remaining ?aat_pref WHERE {"
|
344
395
|
|
345
|
-
|
346
|
-
|
347
|
-
broader_place_type_list.each do |place_uri|
|
348
|
-
query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
396
|
+
broader_place_type_list.each do |place_uri|
|
397
|
+
query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
349
398
|
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
350
399
|
FILTER langMatches( lang(?place_label_en), "en" )
|
351
400
|
}
|
@@ -364,66 +413,67 @@ EXAMPLE SPARQL:
|
|
364
413
|
<#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
365
414
|
} UNION
|
366
415
|
}
|
367
|
-
|
416
|
+
end
|
368
417
|
|
369
|
-
|
370
|
-
|
371
|
-
|
418
|
+
query = query[0..-12]
|
419
|
+
query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?place_label_latn_pinyin ?place_label_alt ?place_label_remaining ?aat_pref"
|
420
|
+
query = query.squish
|
372
421
|
|
373
|
-
|
374
|
-
|
422
|
+
tgn_response_for_aat = Typhoeus::Request.post("http://vocab.getty.edu/sparql.json", :body=>{:query=>query}, :timeout=>500)
|
423
|
+
as_json_tgn_response_for_aat = JSON.parse(tgn_response_for_aat.body)
|
375
424
|
|
376
|
-
|
377
|
-
|
425
|
+
as_json_tgn_response_for_aat["results"]["bindings"].each do |aat_response|
|
426
|
+
tgn_term_type = aat_response['aat_pref']['value'].split('/').last
|
378
427
|
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
428
|
+
if aat_response['place_label_en'].present? && aat_response['place_label_en']['value'] != '-'
|
429
|
+
tgn_term = aat_response['place_label_en']['value']
|
430
|
+
elsif aat_response['place_label_default'].present? && aat_response['place_label_default']['value'] != '-'
|
431
|
+
tgn_term = aat_response['place_label_default']['value']
|
432
|
+
elsif aat_response['place_label_latn_pinyin'].present? && aat_response['place_label_latn_pinyin']['value'] != '-'
|
433
|
+
tgn_term = aat_response['place_label_latn_pinyin']['value']
|
434
|
+
elsif aat_response['place_label_alt'].present? && aat_response['place_label_alt']['value'] != '-'
|
435
|
+
tgn_term = aat_response['place_label_alt']['value']
|
436
|
+
else
|
437
|
+
tgn_term = aat_response['place_label_remaining']['value']
|
438
|
+
end
|
390
439
|
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
hier_geo[:state] = tgn_term
|
402
|
-
when '300387081' #national district
|
403
|
-
if tgn_term == 'District of Columbia'
|
440
|
+
case tgn_term_type
|
441
|
+
when '300128176' #continent
|
442
|
+
hier_geo[:continent] = tgn_term
|
443
|
+
when '300128207', '300387130', '300387506' #nation, autonomous areas, countries
|
444
|
+
hier_geo[:country] = tgn_term
|
445
|
+
when '300000774' #province
|
446
|
+
hier_geo[:province] = tgn_term
|
447
|
+
when '300236112', '300182722', '300387194', '300387052', '300387113', '300387107' #region, union, semi-independent political entity, autonomous communities, autonomous regions
|
448
|
+
hier_geo[:region] = tgn_term
|
449
|
+
when '300000776', '300000772', '300235093' #state, department, governorate
|
404
450
|
hier_geo[:state] = tgn_term
|
405
|
-
|
451
|
+
when '300387081' #national district
|
452
|
+
if tgn_term == 'District of Columbia'
|
453
|
+
hier_geo[:state] = tgn_term
|
454
|
+
else
|
455
|
+
hier_geo[:territory] = tgn_term
|
456
|
+
end
|
457
|
+
when '300135982', '300387176', '300387122' #territory, dependent state, union territory
|
406
458
|
hier_geo[:territory] = tgn_term
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
419
|
-
hier_geo[:area] = tgn_term
|
459
|
+
when '300000771', '300387092', '300387071' #county, parishes, unitary authorities
|
460
|
+
hier_geo[:county] = tgn_term
|
461
|
+
when '300008347', '300008389' #inhabited place, cities
|
462
|
+
hier_geo[:city] = tgn_term
|
463
|
+
when '300000745', '300000778', '300387331' #neighborhood, parishes, parts of inhabited places
|
464
|
+
hier_geo[:city_section] = tgn_term
|
465
|
+
when '300008791', '300387062' #island
|
466
|
+
hier_geo[:island] = tgn_term
|
467
|
+
when '300387575', '300387346', '300167671', '300387178', '300387082', '300387173', '300055621', '300386853', '300386831', '300386832', '300008178', '300008804', '300387131', '300132348', '300387085', '300387198', '300008761' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division', administrative divisions, area (measurement), island groups, mountain ranges, mountain systems, nature reserves, peninsulas, regional divisions, sand bars, senatorial districts (administrative districts), third level subdivisions (political entities), valleys (landforms)
|
468
|
+
hier_geo[:area] = tgn_term
|
469
|
+
end
|
420
470
|
end
|
421
471
|
end
|
422
472
|
|
423
473
|
tgn_data = {}
|
424
474
|
tgn_data[:coords] = coords
|
425
475
|
tgn_data[:hier_geo] = hier_geo.length > 0 ? hier_geo : nil
|
426
|
-
tgn_data[:non_hier_geo] = non_hier_geo ? non_hier_geo : nil
|
476
|
+
tgn_data[:non_hier_geo] = non_hier_geo.present? ? non_hier_geo : nil
|
427
477
|
|
428
478
|
else
|
429
479
|
|
@@ -435,17 +485,19 @@ EXAMPLE SPARQL:
|
|
435
485
|
|
436
486
|
end
|
437
487
|
|
438
|
-
|
439
488
|
def self.tgn_id_from_geo_hash(geo_hash)
|
440
489
|
return nil if Geomash::TGN.tgn_enabled != true
|
441
490
|
|
442
491
|
geo_hash = geo_hash.clone
|
443
|
-
|
444
492
|
max_retry = 3
|
445
493
|
sleep_time = 60 # In seconds
|
446
494
|
retry_count = 0
|
447
495
|
|
448
496
|
return_hash = {}
|
497
|
+
country_response = {}
|
498
|
+
states_response = {}
|
499
|
+
cities_response = {}
|
500
|
+
neighboorhood_response = {}
|
449
501
|
|
450
502
|
state_part = geo_hash[:state_part]
|
451
503
|
|
@@ -454,41 +506,71 @@ EXAMPLE SPARQL:
|
|
454
506
|
|
455
507
|
|
456
508
|
country_part = Geomash::Constants::COUNTRY_TGN_LOOKUP[geo_hash[:country_part]][:tgn_country_name] unless Geomash::Constants::COUNTRY_TGN_LOOKUP[geo_hash[:country_part]].blank?
|
457
|
-
country_part
|
509
|
+
country_part = geo_hash[:country_part] if country_part.blank?
|
458
510
|
country_part ||= ''
|
459
511
|
|
460
512
|
city_part = geo_hash[:city_part]
|
461
513
|
|
462
514
|
neighborhood_part = geo_hash[:neighborhood_part]
|
463
515
|
|
516
|
+
web_request_error = false
|
517
|
+
begin
|
518
|
+
if retry_count > 0
|
519
|
+
sleep(sleep_time)
|
520
|
+
end
|
521
|
+
retry_count = retry_count + 1
|
464
522
|
|
523
|
+
#First we get county!!
|
465
524
|
|
466
|
-
if city_part.blank? && state_part.blank?
|
467
|
-
# Limit to nations
|
468
525
|
query = %{SELECT ?object_identifier
|
469
526
|
WHERE
|
470
527
|
{
|
471
528
|
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
472
529
|
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
473
|
-
?x <http://www.w3.org/
|
530
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
474
531
|
FILTER regex(?object_label, "^#{country_part}$", "i" )
|
475
|
-
}
|
476
|
-
|
477
|
-
|
478
|
-
|
532
|
+
}
|
533
|
+
GROUP BY ?object_identifier
|
534
|
+
}
|
535
|
+
country_response = self.tgn_sparql_request(query)
|
536
|
+
return nil if country_response[:id].blank? && !country_response[:errors]
|
537
|
+
return_hash[:id] = country_response[:id]
|
538
|
+
return_hash[:rdf] = country_response[:rdf]
|
539
|
+
return_hash[:parse_depth] = 1
|
540
|
+
web_request_error = true if country_response[:errors]
|
541
|
+
|
542
|
+
#United State state query
|
543
|
+
if state_part.present? && country_code == 7012149 && !web_request_error
|
544
|
+
query = %{SELECT ?object_identifier
|
479
545
|
WHERE
|
480
546
|
{
|
481
547
|
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
482
|
-
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>
|
548
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
549
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
483
550
|
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
484
551
|
FILTER regex(?object_label, "^#{state_part}$", "i" )
|
485
552
|
|
486
553
|
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/7012149> .
|
487
|
-
}
|
488
|
-
|
489
|
-
|
554
|
+
}
|
555
|
+
GROUP BY ?object_identifier
|
556
|
+
}
|
490
557
|
|
491
|
-
|
558
|
+
states_response = self.tgn_sparql_request(query)
|
559
|
+
if states_response[:id].blank? && !states_response[:errors]
|
560
|
+
return_hash[:original_string_differs] = true
|
561
|
+
else
|
562
|
+
return_hash[:id] = states_response[:id]
|
563
|
+
return_hash[:rdf] = states_response[:rdf]
|
564
|
+
return_hash[:parse_depth] = 2
|
565
|
+
end
|
566
|
+
web_request_error = true if states_response[:errors]
|
567
|
+
end
|
568
|
+
|
569
|
+
#Non United States state query
|
570
|
+
#Note: Had to remove {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION as it returned two results
|
571
|
+
#for "15. Bezirk (Rudolfsheim-Fünfhaus, Vienna, Austria)--Exhibitions". Correct or not?
|
572
|
+
if state_part.present? && country_code != 7012149 && !web_request_error
|
573
|
+
query = %{SELECT ?object_identifier
|
492
574
|
WHERE
|
493
575
|
{
|
494
576
|
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
@@ -500,75 +582,76 @@ WHERE
|
|
500
582
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
501
583
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
502
584
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
503
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
504
585
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
505
586
|
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
506
587
|
FILTER regex(?object_label, "^#{state_part}$", "i" )
|
507
|
-
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended>
|
508
|
-
{
|
509
|
-
SELECT ?parent_country ?identifier_country
|
510
|
-
WHERE {
|
511
|
-
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
512
|
-
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
513
|
-
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
514
|
-
FILTER regex(?country_label, "^#{country_part}$", "i" )
|
515
|
-
}
|
516
|
-
|
517
|
-
}
|
588
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
518
589
|
}
|
519
590
|
GROUP BY ?object_identifier
|
520
591
|
}
|
521
592
|
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
593
|
+
states_response = self.tgn_sparql_request(query)
|
594
|
+
if states_response[:id].blank? && !states_response[:errors]
|
595
|
+
return_hash[:original_string_differs] = true
|
596
|
+
else
|
597
|
+
return_hash[:id] = states_response[:id]
|
598
|
+
return_hash[:rdf] = states_response[:rdf]
|
599
|
+
return_hash[:parse_depth] = 2
|
600
|
+
end
|
601
|
+
web_request_error = true if states_response[:errors]
|
602
|
+
end
|
603
|
+
|
604
|
+
if states_response[:id].present? && city_part.present? && !web_request_error
|
605
|
+
query = %{SELECT ?object_identifier
|
527
606
|
WHERE
|
528
607
|
{
|
529
608
|
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
530
609
|
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
531
610
|
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
532
611
|
FILTER regex(?object_label, "^#{city_part}$", "i" )
|
533
|
-
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended>
|
534
|
-
{
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
WHERE {
|
548
|
-
?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
|
549
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
|
550
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
|
551
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
|
552
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
|
553
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
|
554
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
555
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
556
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
557
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
558
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
559
|
-
?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
|
560
|
-
FILTER regex(?state_label, "^#{state_part}$", "i" )
|
561
|
-
}
|
612
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
613
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
|
614
|
+
}
|
615
|
+
GROUP BY ?object_identifier
|
616
|
+
}
|
617
|
+
cities_response = self.tgn_sparql_request(query)
|
618
|
+
if cities_response[:id].blank? && !cities_response[:errors]
|
619
|
+
return_hash[:original_string_differs] = true
|
620
|
+
else
|
621
|
+
return_hash[:id] = cities_response[:id]
|
622
|
+
return_hash[:rdf] = cities_response[:rdf]
|
623
|
+
return_hash[:parse_depth] = 3
|
624
|
+
end
|
625
|
+
web_request_error = true if cities_response[:errors]
|
562
626
|
|
563
|
-
|
627
|
+
end
|
564
628
|
|
629
|
+
#Case of Countries without a state breakdown... ie. Tokyo, Japan
|
630
|
+
if state_part.blank? && country_response[:id].present? && city_part.present? && !web_request_error
|
631
|
+
query = %{SELECT ?object_identifier
|
632
|
+
WHERE
|
633
|
+
{
|
634
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
635
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
636
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
637
|
+
FILTER regex(?object_label, "^#{city_part}$", "i" )
|
638
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
565
639
|
}
|
566
640
|
GROUP BY ?object_identifier
|
567
641
|
}
|
642
|
+
cities_response = self.tgn_sparql_request(query)
|
643
|
+
if cities_response[:id].blank? && !cities_response[:errors]
|
644
|
+
return_hash[:original_string_differs] = true
|
645
|
+
else
|
646
|
+
return_hash[:id] = cities_response[:id]
|
647
|
+
return_hash[:rdf] = cities_response[:rdf]
|
648
|
+
return_hash[:parse_depth] = 3
|
649
|
+
end
|
650
|
+
web_request_error = true if cities_response[:errors]
|
568
651
|
|
652
|
+
end
|
569
653
|
|
570
|
-
|
571
|
-
#Limited to only to neighborhoods currently...
|
654
|
+
if cities_response[:id].present? && neighborhood_part.present? && !web_request_error
|
572
655
|
query = %{SELECT ?object_identifier
|
573
656
|
WHERE
|
574
657
|
{
|
@@ -576,97 +659,71 @@ WHERE
|
|
576
659
|
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000745> .
|
577
660
|
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
578
661
|
FILTER regex(?object_label, "^#{neighborhood_part}$", "i" )
|
579
|
-
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended>
|
580
|
-
{
|
581
|
-
|
582
|
-
WHERE {
|
583
|
-
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
584
|
-
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
585
|
-
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
586
|
-
FILTER regex(?country_label, "^#{country_part}$", "i" )
|
587
|
-
}
|
588
|
-
|
589
|
-
}
|
590
|
-
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_state .
|
591
|
-
{
|
592
|
-
SELECT ?parent_state ?identifier_state
|
593
|
-
WHERE {
|
594
|
-
?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
|
595
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
|
596
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
|
597
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
|
598
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
|
599
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
|
600
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
601
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
602
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
603
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
604
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
605
|
-
?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
|
606
|
-
FILTER regex(?state_label, "^#{state_part}$", "i" )
|
607
|
-
}
|
608
|
-
|
609
|
-
}
|
610
|
-
|
611
|
-
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_city .
|
612
|
-
{
|
613
|
-
SELECT ?parent_city ?identifier_city
|
614
|
-
WHERE {
|
615
|
-
?parent_city <http://purl.org/dc/elements/1.1/identifier> ?identifier_city .
|
616
|
-
?parent_city <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
617
|
-
?parent_city <http://www.w3.org/2000/01/rdf-schema#label> ?city_label .
|
618
|
-
FILTER regex(?city_label, "^#{city_part}$", "i" )
|
619
|
-
}
|
620
|
-
|
621
|
-
}
|
622
|
-
|
662
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
663
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
|
664
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{cities_response[:id]}> .
|
623
665
|
}
|
624
666
|
GROUP BY ?object_identifier
|
625
667
|
}
|
668
|
+
neighborhood_response = self.tgn_sparql_request(query)
|
669
|
+
if neighborhood_response[:id].blank? && !neighborhood_response[:errors]
|
670
|
+
return_hash[:original_string_differs]=true
|
671
|
+
else
|
672
|
+
return_hash[:id] = neighborhood_response[:id]
|
673
|
+
return_hash[:rdf] = neighborhood_response[:rdf]
|
674
|
+
return_hash[:parse_depth] = 4
|
675
|
+
end
|
676
|
+
web_request_error = true if neighborhood_response[:errors]
|
677
|
+
end
|
626
678
|
|
679
|
+
end until (!web_request_error || retry_count == max_retry)
|
627
680
|
|
681
|
+
if return_hash.present? && !web_request_error
|
682
|
+
return_hash[:original_string_differs] ||= Geomash::Standardizer.parsed_and_original_check(geo_hash)
|
683
|
+
return return_hash
|
628
684
|
else
|
629
685
|
return nil
|
630
686
|
end
|
631
687
|
|
632
|
-
|
688
|
+
end
|
633
689
|
|
634
|
-
|
635
|
-
|
690
|
+
def self.tgn_sparql_request(query,method="GET")
|
691
|
+
response = {}
|
692
|
+
query = query.squish
|
693
|
+
if(method=="GET")
|
694
|
+
tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query}, :timeout=>500)
|
695
|
+
else
|
696
|
+
tgn_response = Typhoeus::Request.post("http://vocab.getty.edu/sparql.json", :params=>{:query=>query}, :timeout=>500)
|
636
697
|
end
|
637
|
-
retry_count = retry_count + 1
|
638
|
-
|
639
|
-
query = query.squish
|
640
|
-
tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
|
641
|
-
|
642
|
-
end until (tgn_response.code != 500 || retry_count == max_retry)
|
643
|
-
|
644
698
|
|
699
|
+
if tgn_response.success? && tgn_response.code == 200
|
700
|
+
begin
|
701
|
+
as_json = JSON.parse(tgn_response.body)
|
702
|
+
response[:json] = as_json
|
703
|
+
if as_json["results"]["bindings"].present? && as_json["results"]["bindings"].first["object_identifier"].present?
|
704
|
+
response[:id] = as_json["results"]["bindings"].first["object_identifier"]["value"]
|
705
|
+
response[:rdf] = "http://vocab.getty.edu/tgn/#{response[:id]}.rdf"
|
706
|
+
end
|
707
|
+
response[:errors] = false
|
708
|
+
rescue JSON::ParserError
|
709
|
+
response[:json] = nil
|
710
|
+
response[:errors] = true
|
711
|
+
if tgn_response.cached? && Typhoeus::Config.cache.present?
|
712
|
+
cache_key = Typhoeus::Request.new("http://vocab.getty.edu/sparql.json", params: {query: query}).cache_key
|
713
|
+
Typhoeus::Config.cache.delete(cache_key) #Need to define a delete method like: def delete(request) Rails.cache.delete(request) end
|
714
|
+
end
|
645
715
|
|
646
|
-
|
647
|
-
unless tgn_response.code == 500
|
648
|
-
as_json = JSON.parse(tgn_response.body)
|
649
|
-
|
650
|
-
#This is ugly and needs to be redone to achieve better recursive...
|
651
|
-
if as_json["results"]["bindings"].present? && as_json["results"]["bindings"].first["object_identifier"].present?
|
652
|
-
return_hash[:id] = as_json["results"]["bindings"].first["object_identifier"]["value"]
|
653
|
-
return_hash[:rdf] = "http://vocab.getty.edu/tgn/#{return_hash[:id]}.rdf"
|
716
|
+
end
|
654
717
|
else
|
655
|
-
|
718
|
+
if tgn_response.cached? && Typhoeus::Config.cache.present?
|
719
|
+
cache_key = Typhoeus::Request.new("http://vocab.getty.edu/sparql.json", params: {query: query}).cache_key
|
720
|
+
Typhoeus::Config.cache.delete(cache_key) #Need to define a delete method like: def delete(request) Rails.cache.delete(request) end
|
721
|
+
end
|
656
722
|
end
|
657
|
-
end
|
658
723
|
|
659
|
-
|
660
|
-
raise 'TGN Server appears to not be responding for Geographic query: ' + query
|
661
|
-
end
|
724
|
+
return response
|
662
725
|
|
663
|
-
if return_hash.present?
|
664
|
-
return_hash[:original_string_differs] = Geomash::Standardizer.parsed_and_original_check(geo_hash)
|
665
|
-
return return_hash
|
666
|
-
else
|
667
|
-
return nil
|
668
726
|
end
|
669
|
-
end
|
670
727
|
|
671
728
|
|
672
729
|
end
|