geomash 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/geomash.yml.sample +3 -0
- data/lib/geomash/autoexpire_cache_dalli.rb +55 -0
- data/lib/geomash/autoexpire_cache_redis.rb +26 -0
- data/lib/geomash/constants.rb +6 -1
- data/lib/geomash/geonames.rb +19 -14
- data/lib/geomash/parser.rb +48 -18
- data/lib/geomash/standardizer.rb +46 -17
- data/lib/geomash/tgn.rb +274 -217
- data/lib/geomash/version.rb +1 -1
- data/lib/geomash.rb +8 -18
- data/test/geomash_test.rb +58 -4
- data/test/geonames_test.rb +1 -1
- data/test/standardizer_test.rb +37 -0
- metadata +15 -14
data/lib/geomash/tgn.rb
CHANGED
@@ -3,7 +3,8 @@ module Geomash
|
|
3
3
|
class TGN
|
4
4
|
|
5
5
|
def self.tgn_enabled
|
6
|
-
Geomash.config[:tgn_enabled]
|
6
|
+
return Geomash.config[:tgn_enabled] unless Geomash.config[:tgn_enabled].nil?
|
7
|
+
return true
|
7
8
|
end
|
8
9
|
|
9
10
|
=begin
|
@@ -189,7 +190,7 @@ EXAMPLE SPARQL:
|
|
189
190
|
tgn_main_term_info = {}
|
190
191
|
broader_place_type_list = []
|
191
192
|
|
192
|
-
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"})
|
193
|
+
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"}, :timeout=>500)
|
193
194
|
|
194
195
|
return nil if(primary_tgn_response.response_code == 404) #Couldn't find TGN... FIXME: additional check needed if TGN is down?
|
195
196
|
|
@@ -243,36 +244,32 @@ EXAMPLE SPARQL:
|
|
243
244
|
|
244
245
|
query = query.squish
|
245
246
|
|
246
|
-
primary_tgn_response = Typhoeus::Request.
|
247
|
+
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :body=>{:query=>query}, :timeout=>500)
|
247
248
|
as_json_tgn_response = JSON.parse(primary_tgn_response.body)
|
248
249
|
end
|
249
250
|
|
250
|
-
#FIXME: Temporary hack to determine more cases of non-blank/english place name conflicts that require resolution.
|
251
|
-
label_remaining_check = false
|
252
|
-
|
253
251
|
as_json_tgn_response['results']['bindings'].each do |ntriple|
|
254
252
|
case ntriple['Predicate']['value']
|
255
253
|
when 'http://www.w3.org/2004/02/skos/core#prefLabel'
|
256
254
|
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
257
|
-
tgn_main_term_info[:label_en]
|
255
|
+
tgn_main_term_info[:label_en] ||= ntriple['Object']['value']
|
258
256
|
elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'zh-latn-pinyin'
|
259
|
-
tgn_main_term_info[:label_other]
|
257
|
+
tgn_main_term_info[:label_other] ||= ntriple['Object']['value']
|
260
258
|
elsif ntriple['Object']['xml:lang'].blank?
|
261
|
-
tgn_main_term_info[:label_default]
|
259
|
+
tgn_main_term_info[:label_default] ||= ntriple['Object']['value']
|
262
260
|
else
|
263
|
-
|
264
|
-
tgn_main_term_info[:label_remaining] = ntriple['Object']['value']
|
261
|
+
tgn_main_term_info[:label_remaining] ||= ntriple['Object']['value']
|
265
262
|
end
|
266
263
|
when 'http://www.w3.org/2004/02/skos/core#altLabel'
|
267
264
|
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
268
|
-
tgn_main_term_info[:label_alt]
|
265
|
+
tgn_main_term_info[:label_alt] ||= ntriple['Object']['value']
|
269
266
|
end
|
270
267
|
when 'http://vocab.getty.edu/ontology#placeTypePreferred'
|
271
|
-
tgn_main_term_info[:aat_place]
|
268
|
+
tgn_main_term_info[:aat_place] ||= ntriple['Object']['value']
|
272
269
|
when 'http://schema.org/latitude'
|
273
|
-
tgn_main_term_info[:latitude]
|
270
|
+
tgn_main_term_info[:latitude] ||= ntriple['Object']['value']
|
274
271
|
when 'http://schema.org/longitude'
|
275
|
-
tgn_main_term_info[:longitude]
|
272
|
+
tgn_main_term_info[:longitude] ||= ntriple['Object']['value']
|
276
273
|
when 'http://vocab.getty.edu/ontology#broaderPreferredExtended'
|
277
274
|
broader_place_type_list << ntriple['Object']['value']
|
278
275
|
end
|
@@ -289,18 +286,14 @@ EXAMPLE SPARQL:
|
|
289
286
|
end
|
290
287
|
|
291
288
|
hier_geo = {}
|
289
|
+
non_hier_geo = {}
|
290
|
+
|
292
291
|
#Default term to best label language...
|
293
292
|
tgn_term = tgn_main_term_info[:label_en]
|
294
293
|
tgn_term ||= tgn_main_term_info[:label_default]
|
295
294
|
tgn_term ||= tgn_main_term_info[:label_other]
|
296
295
|
tgn_term ||= tgn_main_term_info[:label_alt]
|
297
|
-
|
298
|
-
if label_remaining_check
|
299
|
-
raise "Could not determine a single label for TGN: " + tgn_id
|
300
|
-
else
|
301
|
-
tgn_term = tgn_main_term_info[:label_remaining]
|
302
|
-
end
|
303
|
-
end
|
296
|
+
tgn_term ||= tgn_main_term_info[:label_remaining]
|
304
297
|
|
305
298
|
tgn_term_type = tgn_main_term_info[:aat_place].split('/').last
|
306
299
|
|
@@ -309,11 +302,11 @@ EXAMPLE SPARQL:
|
|
309
302
|
case tgn_term_type
|
310
303
|
when '300128176' #continent
|
311
304
|
hier_geo[:continent] = tgn_term
|
312
|
-
when '300128207' #
|
305
|
+
when '300128207', '300387130', '300387506' #nation, autonomous areas, countries
|
313
306
|
hier_geo[:country] = tgn_term
|
314
307
|
when '300000774' #province
|
315
308
|
hier_geo[:province] = tgn_term
|
316
|
-
when '300236112', '300182722', '300387194', '300387052' #region, union, semi-independent political entity
|
309
|
+
when '300236112', '300182722', '300387194', '300387052', '300387113', '300387107' #region, union, semi-independent political entity, autonomous communities, autonomous regions
|
317
310
|
hier_geo[:region] = tgn_term
|
318
311
|
when '300000776', '300000772', '300235093' #state, department, governorate
|
319
312
|
hier_geo[:state] = tgn_term
|
@@ -325,27 +318,83 @@ EXAMPLE SPARQL:
|
|
325
318
|
end
|
326
319
|
when '300135982', '300387176', '300387122' #territory, dependent state, union territory
|
327
320
|
hier_geo[:territory] = tgn_term
|
328
|
-
when '300000771' #county
|
321
|
+
when '300000771', '300387092', '300387071' #county, parishes, unitary authorities
|
329
322
|
hier_geo[:county] = tgn_term
|
330
|
-
when '300008347' #inhabited place
|
323
|
+
when '300008347', '300008389' #inhabited place, cities
|
331
324
|
hier_geo[:city] = tgn_term
|
332
|
-
when '300000745' #neighborhood
|
325
|
+
when '300000745', '300000778', '300387331' #neighborhood, parishes, parts of inhabited places
|
333
326
|
hier_geo[:city_section] = tgn_term
|
334
327
|
when '300008791', '300387062' #island
|
335
328
|
hier_geo[:island] = tgn_term
|
336
|
-
when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
329
|
+
when '300387575', '300387346', '300167671', '300387178', '300387082', '300387173', '300055621', '300386853', '300386831', '300386832', '300008178', '300008804', '300387131', '300132348', '300387085', '300387198', '300008761' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division', administrative divisions, area (measurement), island groups, mountain ranges, mountain systems, nature reserves, peninsulas, regional divisions, sand bars, senatorial districts (administrative districts), third level subdivisions (political entities), valleys (landforms)
|
337
330
|
hier_geo[:area] = tgn_term
|
331
|
+
when '300386699' #Top level element of World
|
332
|
+
non_hier_geo[:value] = 'World'
|
333
|
+
non_hier_geo[:qualifier] = nil
|
338
334
|
else
|
339
|
-
|
335
|
+
aat_main_term_info = {}
|
336
|
+
label_remaining_check = false
|
337
|
+
|
338
|
+
aat_type_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/aat/#{tgn_term_type}.json"}, :timeout=>500)
|
339
|
+
JSON.parse(aat_type_response.body)['results']['bindings'].each do |ntriple|
|
340
|
+
case ntriple['Predicate']['value']
|
341
|
+
when 'http://www.w3.org/2004/02/skos/core#prefLabel'
|
342
|
+
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
343
|
+
aat_main_term_info[:label_en] ||= ntriple['Object']['value']
|
344
|
+
elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en-us'
|
345
|
+
aat_main_term_info[:label_en] ||= ntriple['Object']['value']
|
346
|
+
elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'zh-latn-pinyin'
|
347
|
+
aat_main_term_info[:label_other] ||= ntriple['Object']['value']
|
348
|
+
elsif ntriple['Object']['xml:lang'].blank?
|
349
|
+
aat_main_term_info[:label_default] ||= ntriple['Object']['value']
|
350
|
+
else
|
351
|
+
label_remaining_check = true if aat_main_term_info[:label_remaining].present?
|
352
|
+
aat_main_term_info[:label_remaining] ||= ntriple['Object']['value']
|
353
|
+
end
|
354
|
+
when 'http://www.w3.org/2004/02/skos/core#altLabel'
|
355
|
+
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
356
|
+
aat_main_term_info[:label_alt] ||= ntriple['Object']['value']
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
end
|
361
|
+
#Default term to best label language...
|
362
|
+
aat_term = aat_main_term_info[:label_en]
|
363
|
+
aat_term ||= aat_main_term_info[:label_default]
|
364
|
+
aat_term ||= aat_main_term_info[:label_other]
|
365
|
+
aat_term ||= aat_main_term_info[:label_alt]
|
366
|
+
|
367
|
+
if aat_term.blank?
|
368
|
+
if label_remaining_check
|
369
|
+
raise "Could not determine a single aat non_hier_geo label for TGN: " + tgn_id
|
370
|
+
else
|
371
|
+
aat_term = aat_main_term_info[:label_remaining]
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
#Fix cases like http://vocab.getty.edu/aat/300132316 which are bays (bodies of water)
|
376
|
+
aat_term = aat_term.gsub(/ \(.+\)$/, '')
|
377
|
+
|
378
|
+
if (aat_term =~ /ies$/).present?
|
379
|
+
aat_term = aat_term.gsub(/ies$/, 'y')
|
380
|
+
elsif (aat_term =~ /es$/).present?
|
381
|
+
aat_term = aat_term.gsub(/es$/, '')
|
382
|
+
elsif (aat_term =~ /s$/).present?
|
383
|
+
aat_term = aat_term.gsub(/s$/, '')
|
384
|
+
end
|
385
|
+
|
386
|
+
#Fix cases like "Boston Harbor" as "Boston Harbor (harbor)" isn't that helpful
|
387
|
+
non_hier_geo[:value] = tgn_term
|
388
|
+
non_hier_geo[:qualifier] = tgn_term.downcase.include?(aat_term.downcase) ? nil : aat_term
|
340
389
|
end
|
341
390
|
|
342
391
|
#Broader places
|
343
392
|
#FIXME: could parse xml:lang instead of the three optional clauses now... didn't expect places to lack a default preferred label.
|
393
|
+
if broader_place_type_list.present? #Case of World... top of hierachy check
|
394
|
+
query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?place_label_remaining ?aat_pref WHERE {"
|
344
395
|
|
345
|
-
|
346
|
-
|
347
|
-
broader_place_type_list.each do |place_uri|
|
348
|
-
query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
396
|
+
broader_place_type_list.each do |place_uri|
|
397
|
+
query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
349
398
|
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
350
399
|
FILTER langMatches( lang(?place_label_en), "en" )
|
351
400
|
}
|
@@ -364,66 +413,67 @@ EXAMPLE SPARQL:
|
|
364
413
|
<#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
365
414
|
} UNION
|
366
415
|
}
|
367
|
-
|
416
|
+
end
|
368
417
|
|
369
|
-
|
370
|
-
|
371
|
-
|
418
|
+
query = query[0..-12]
|
419
|
+
query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?place_label_latn_pinyin ?place_label_alt ?place_label_remaining ?aat_pref"
|
420
|
+
query = query.squish
|
372
421
|
|
373
|
-
|
374
|
-
|
422
|
+
tgn_response_for_aat = Typhoeus::Request.post("http://vocab.getty.edu/sparql.json", :body=>{:query=>query}, :timeout=>500)
|
423
|
+
as_json_tgn_response_for_aat = JSON.parse(tgn_response_for_aat.body)
|
375
424
|
|
376
|
-
|
377
|
-
|
425
|
+
as_json_tgn_response_for_aat["results"]["bindings"].each do |aat_response|
|
426
|
+
tgn_term_type = aat_response['aat_pref']['value'].split('/').last
|
378
427
|
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
428
|
+
if aat_response['place_label_en'].present? && aat_response['place_label_en']['value'] != '-'
|
429
|
+
tgn_term = aat_response['place_label_en']['value']
|
430
|
+
elsif aat_response['place_label_default'].present? && aat_response['place_label_default']['value'] != '-'
|
431
|
+
tgn_term = aat_response['place_label_default']['value']
|
432
|
+
elsif aat_response['place_label_latn_pinyin'].present? && aat_response['place_label_latn_pinyin']['value'] != '-'
|
433
|
+
tgn_term = aat_response['place_label_latn_pinyin']['value']
|
434
|
+
elsif aat_response['place_label_alt'].present? && aat_response['place_label_alt']['value'] != '-'
|
435
|
+
tgn_term = aat_response['place_label_alt']['value']
|
436
|
+
else
|
437
|
+
tgn_term = aat_response['place_label_remaining']['value']
|
438
|
+
end
|
390
439
|
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
hier_geo[:state] = tgn_term
|
402
|
-
when '300387081' #national district
|
403
|
-
if tgn_term == 'District of Columbia'
|
440
|
+
case tgn_term_type
|
441
|
+
when '300128176' #continent
|
442
|
+
hier_geo[:continent] = tgn_term
|
443
|
+
when '300128207', '300387130', '300387506' #nation, autonomous areas, countries
|
444
|
+
hier_geo[:country] = tgn_term
|
445
|
+
when '300000774' #province
|
446
|
+
hier_geo[:province] = tgn_term
|
447
|
+
when '300236112', '300182722', '300387194', '300387052', '300387113', '300387107' #region, union, semi-independent political entity, autonomous communities, autonomous regions
|
448
|
+
hier_geo[:region] = tgn_term
|
449
|
+
when '300000776', '300000772', '300235093' #state, department, governorate
|
404
450
|
hier_geo[:state] = tgn_term
|
405
|
-
|
451
|
+
when '300387081' #national district
|
452
|
+
if tgn_term == 'District of Columbia'
|
453
|
+
hier_geo[:state] = tgn_term
|
454
|
+
else
|
455
|
+
hier_geo[:territory] = tgn_term
|
456
|
+
end
|
457
|
+
when '300135982', '300387176', '300387122' #territory, dependent state, union territory
|
406
458
|
hier_geo[:territory] = tgn_term
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
419
|
-
hier_geo[:area] = tgn_term
|
459
|
+
when '300000771', '300387092', '300387071' #county, parishes, unitary authorities
|
460
|
+
hier_geo[:county] = tgn_term
|
461
|
+
when '300008347', '300008389' #inhabited place, cities
|
462
|
+
hier_geo[:city] = tgn_term
|
463
|
+
when '300000745', '300000778', '300387331' #neighborhood, parishes, parts of inhabited places
|
464
|
+
hier_geo[:city_section] = tgn_term
|
465
|
+
when '300008791', '300387062' #island
|
466
|
+
hier_geo[:island] = tgn_term
|
467
|
+
when '300387575', '300387346', '300167671', '300387178', '300387082', '300387173', '300055621', '300386853', '300386831', '300386832', '300008178', '300008804', '300387131', '300132348', '300387085', '300387198', '300008761' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division', administrative divisions, area (measurement), island groups, mountain ranges, mountain systems, nature reserves, peninsulas, regional divisions, sand bars, senatorial districts (administrative districts), third level subdivisions (political entities), valleys (landforms)
|
468
|
+
hier_geo[:area] = tgn_term
|
469
|
+
end
|
420
470
|
end
|
421
471
|
end
|
422
472
|
|
423
473
|
tgn_data = {}
|
424
474
|
tgn_data[:coords] = coords
|
425
475
|
tgn_data[:hier_geo] = hier_geo.length > 0 ? hier_geo : nil
|
426
|
-
tgn_data[:non_hier_geo] = non_hier_geo ? non_hier_geo : nil
|
476
|
+
tgn_data[:non_hier_geo] = non_hier_geo.present? ? non_hier_geo : nil
|
427
477
|
|
428
478
|
else
|
429
479
|
|
@@ -435,17 +485,19 @@ EXAMPLE SPARQL:
|
|
435
485
|
|
436
486
|
end
|
437
487
|
|
438
|
-
|
439
488
|
def self.tgn_id_from_geo_hash(geo_hash)
|
440
489
|
return nil if Geomash::TGN.tgn_enabled != true
|
441
490
|
|
442
491
|
geo_hash = geo_hash.clone
|
443
|
-
|
444
492
|
max_retry = 3
|
445
493
|
sleep_time = 60 # In seconds
|
446
494
|
retry_count = 0
|
447
495
|
|
448
496
|
return_hash = {}
|
497
|
+
country_response = {}
|
498
|
+
states_response = {}
|
499
|
+
cities_response = {}
|
500
|
+
neighboorhood_response = {}
|
449
501
|
|
450
502
|
state_part = geo_hash[:state_part]
|
451
503
|
|
@@ -454,41 +506,71 @@ EXAMPLE SPARQL:
|
|
454
506
|
|
455
507
|
|
456
508
|
country_part = Geomash::Constants::COUNTRY_TGN_LOOKUP[geo_hash[:country_part]][:tgn_country_name] unless Geomash::Constants::COUNTRY_TGN_LOOKUP[geo_hash[:country_part]].blank?
|
457
|
-
country_part
|
509
|
+
country_part = geo_hash[:country_part] if country_part.blank?
|
458
510
|
country_part ||= ''
|
459
511
|
|
460
512
|
city_part = geo_hash[:city_part]
|
461
513
|
|
462
514
|
neighborhood_part = geo_hash[:neighborhood_part]
|
463
515
|
|
516
|
+
web_request_error = false
|
517
|
+
begin
|
518
|
+
if retry_count > 0
|
519
|
+
sleep(sleep_time)
|
520
|
+
end
|
521
|
+
retry_count = retry_count + 1
|
464
522
|
|
523
|
+
#First we get county!!
|
465
524
|
|
466
|
-
if city_part.blank? && state_part.blank?
|
467
|
-
# Limit to nations
|
468
525
|
query = %{SELECT ?object_identifier
|
469
526
|
WHERE
|
470
527
|
{
|
471
528
|
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
472
529
|
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
473
|
-
?x <http://www.w3.org/
|
530
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
474
531
|
FILTER regex(?object_label, "^#{country_part}$", "i" )
|
475
|
-
}
|
476
|
-
|
477
|
-
|
478
|
-
|
532
|
+
}
|
533
|
+
GROUP BY ?object_identifier
|
534
|
+
}
|
535
|
+
country_response = self.tgn_sparql_request(query)
|
536
|
+
return nil if country_response[:id].blank? && !country_response[:errors]
|
537
|
+
return_hash[:id] = country_response[:id]
|
538
|
+
return_hash[:rdf] = country_response[:rdf]
|
539
|
+
return_hash[:parse_depth] = 1
|
540
|
+
web_request_error = true if country_response[:errors]
|
541
|
+
|
542
|
+
#United State state query
|
543
|
+
if state_part.present? && country_code == 7012149 && !web_request_error
|
544
|
+
query = %{SELECT ?object_identifier
|
479
545
|
WHERE
|
480
546
|
{
|
481
547
|
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
482
|
-
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>
|
548
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
549
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
483
550
|
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
484
551
|
FILTER regex(?object_label, "^#{state_part}$", "i" )
|
485
552
|
|
486
553
|
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/7012149> .
|
487
|
-
}
|
488
|
-
|
489
|
-
|
554
|
+
}
|
555
|
+
GROUP BY ?object_identifier
|
556
|
+
}
|
490
557
|
|
491
|
-
|
558
|
+
states_response = self.tgn_sparql_request(query)
|
559
|
+
if states_response[:id].blank? && !states_response[:errors]
|
560
|
+
return_hash[:original_string_differs] = true
|
561
|
+
else
|
562
|
+
return_hash[:id] = states_response[:id]
|
563
|
+
return_hash[:rdf] = states_response[:rdf]
|
564
|
+
return_hash[:parse_depth] = 2
|
565
|
+
end
|
566
|
+
web_request_error = true if states_response[:errors]
|
567
|
+
end
|
568
|
+
|
569
|
+
#Non United States state query
|
570
|
+
#Note: Had to remove {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION as it returned two results
|
571
|
+
#for "15. Bezirk (Rudolfsheim-Fünfhaus, Vienna, Austria)--Exhibitions". Correct or not?
|
572
|
+
if state_part.present? && country_code != 7012149 && !web_request_error
|
573
|
+
query = %{SELECT ?object_identifier
|
492
574
|
WHERE
|
493
575
|
{
|
494
576
|
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
@@ -500,75 +582,76 @@ WHERE
|
|
500
582
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
501
583
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
502
584
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
503
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
504
585
|
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
505
586
|
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
506
587
|
FILTER regex(?object_label, "^#{state_part}$", "i" )
|
507
|
-
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended>
|
508
|
-
{
|
509
|
-
SELECT ?parent_country ?identifier_country
|
510
|
-
WHERE {
|
511
|
-
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
512
|
-
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
513
|
-
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
514
|
-
FILTER regex(?country_label, "^#{country_part}$", "i" )
|
515
|
-
}
|
516
|
-
|
517
|
-
}
|
588
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
518
589
|
}
|
519
590
|
GROUP BY ?object_identifier
|
520
591
|
}
|
521
592
|
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
593
|
+
states_response = self.tgn_sparql_request(query)
|
594
|
+
if states_response[:id].blank? && !states_response[:errors]
|
595
|
+
return_hash[:original_string_differs] = true
|
596
|
+
else
|
597
|
+
return_hash[:id] = states_response[:id]
|
598
|
+
return_hash[:rdf] = states_response[:rdf]
|
599
|
+
return_hash[:parse_depth] = 2
|
600
|
+
end
|
601
|
+
web_request_error = true if states_response[:errors]
|
602
|
+
end
|
603
|
+
|
604
|
+
if states_response[:id].present? && city_part.present? && !web_request_error
|
605
|
+
query = %{SELECT ?object_identifier
|
527
606
|
WHERE
|
528
607
|
{
|
529
608
|
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
530
609
|
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
531
610
|
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
532
611
|
FILTER regex(?object_label, "^#{city_part}$", "i" )
|
533
|
-
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended>
|
534
|
-
{
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
WHERE {
|
548
|
-
?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
|
549
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
|
550
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
|
551
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
|
552
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
|
553
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
|
554
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
555
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
556
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
557
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
558
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
559
|
-
?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
|
560
|
-
FILTER regex(?state_label, "^#{state_part}$", "i" )
|
561
|
-
}
|
612
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
613
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
|
614
|
+
}
|
615
|
+
GROUP BY ?object_identifier
|
616
|
+
}
|
617
|
+
cities_response = self.tgn_sparql_request(query)
|
618
|
+
if cities_response[:id].blank? && !cities_response[:errors]
|
619
|
+
return_hash[:original_string_differs] = true
|
620
|
+
else
|
621
|
+
return_hash[:id] = cities_response[:id]
|
622
|
+
return_hash[:rdf] = cities_response[:rdf]
|
623
|
+
return_hash[:parse_depth] = 3
|
624
|
+
end
|
625
|
+
web_request_error = true if cities_response[:errors]
|
562
626
|
|
563
|
-
|
627
|
+
end
|
564
628
|
|
629
|
+
#Case of Countries without a state breakdown... ie. Tokyo, Japan
|
630
|
+
if state_part.blank? && country_response[:id].present? && city_part.present? && !web_request_error
|
631
|
+
query = %{SELECT ?object_identifier
|
632
|
+
WHERE
|
633
|
+
{
|
634
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
635
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
636
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
637
|
+
FILTER regex(?object_label, "^#{city_part}$", "i" )
|
638
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
565
639
|
}
|
566
640
|
GROUP BY ?object_identifier
|
567
641
|
}
|
642
|
+
cities_response = self.tgn_sparql_request(query)
|
643
|
+
if cities_response[:id].blank? && !cities_response[:errors]
|
644
|
+
return_hash[:original_string_differs] = true
|
645
|
+
else
|
646
|
+
return_hash[:id] = cities_response[:id]
|
647
|
+
return_hash[:rdf] = cities_response[:rdf]
|
648
|
+
return_hash[:parse_depth] = 3
|
649
|
+
end
|
650
|
+
web_request_error = true if cities_response[:errors]
|
568
651
|
|
652
|
+
end
|
569
653
|
|
570
|
-
|
571
|
-
#Limited to only to neighborhoods currently...
|
654
|
+
if cities_response[:id].present? && neighborhood_part.present? && !web_request_error
|
572
655
|
query = %{SELECT ?object_identifier
|
573
656
|
WHERE
|
574
657
|
{
|
@@ -576,97 +659,71 @@ WHERE
|
|
576
659
|
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000745> .
|
577
660
|
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
578
661
|
FILTER regex(?object_label, "^#{neighborhood_part}$", "i" )
|
579
|
-
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended>
|
580
|
-
{
|
581
|
-
|
582
|
-
WHERE {
|
583
|
-
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
584
|
-
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
585
|
-
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
586
|
-
FILTER regex(?country_label, "^#{country_part}$", "i" )
|
587
|
-
}
|
588
|
-
|
589
|
-
}
|
590
|
-
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_state .
|
591
|
-
{
|
592
|
-
SELECT ?parent_state ?identifier_state
|
593
|
-
WHERE {
|
594
|
-
?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
|
595
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
|
596
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
|
597
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
|
598
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
|
599
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
|
600
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
601
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
602
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
603
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
604
|
-
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
605
|
-
?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
|
606
|
-
FILTER regex(?state_label, "^#{state_part}$", "i" )
|
607
|
-
}
|
608
|
-
|
609
|
-
}
|
610
|
-
|
611
|
-
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_city .
|
612
|
-
{
|
613
|
-
SELECT ?parent_city ?identifier_city
|
614
|
-
WHERE {
|
615
|
-
?parent_city <http://purl.org/dc/elements/1.1/identifier> ?identifier_city .
|
616
|
-
?parent_city <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
617
|
-
?parent_city <http://www.w3.org/2000/01/rdf-schema#label> ?city_label .
|
618
|
-
FILTER regex(?city_label, "^#{city_part}$", "i" )
|
619
|
-
}
|
620
|
-
|
621
|
-
}
|
622
|
-
|
662
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
|
663
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
|
664
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{cities_response[:id]}> .
|
623
665
|
}
|
624
666
|
GROUP BY ?object_identifier
|
625
667
|
}
|
668
|
+
neighborhood_response = self.tgn_sparql_request(query)
|
669
|
+
if neighborhood_response[:id].blank? && !neighborhood_response[:errors]
|
670
|
+
return_hash[:original_string_differs]=true
|
671
|
+
else
|
672
|
+
return_hash[:id] = neighborhood_response[:id]
|
673
|
+
return_hash[:rdf] = neighborhood_response[:rdf]
|
674
|
+
return_hash[:parse_depth] = 4
|
675
|
+
end
|
676
|
+
web_request_error = true if neighborhood_response[:errors]
|
677
|
+
end
|
626
678
|
|
679
|
+
end until (!web_request_error || retry_count == max_retry)
|
627
680
|
|
681
|
+
if return_hash.present? && !web_request_error
|
682
|
+
return_hash[:original_string_differs] ||= Geomash::Standardizer.parsed_and_original_check(geo_hash)
|
683
|
+
return return_hash
|
628
684
|
else
|
629
685
|
return nil
|
630
686
|
end
|
631
687
|
|
632
|
-
|
688
|
+
end
|
633
689
|
|
634
|
-
|
635
|
-
|
690
|
+
def self.tgn_sparql_request(query,method="GET")
|
691
|
+
response = {}
|
692
|
+
query = query.squish
|
693
|
+
if(method=="GET")
|
694
|
+
tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query}, :timeout=>500)
|
695
|
+
else
|
696
|
+
tgn_response = Typhoeus::Request.post("http://vocab.getty.edu/sparql.json", :params=>{:query=>query}, :timeout=>500)
|
636
697
|
end
|
637
|
-
retry_count = retry_count + 1
|
638
|
-
|
639
|
-
query = query.squish
|
640
|
-
tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
|
641
|
-
|
642
|
-
end until (tgn_response.code != 500 || retry_count == max_retry)
|
643
|
-
|
644
698
|
|
699
|
+
if tgn_response.success? && tgn_response.code == 200
|
700
|
+
begin
|
701
|
+
as_json = JSON.parse(tgn_response.body)
|
702
|
+
response[:json] = as_json
|
703
|
+
if as_json["results"]["bindings"].present? && as_json["results"]["bindings"].first["object_identifier"].present?
|
704
|
+
response[:id] = as_json["results"]["bindings"].first["object_identifier"]["value"]
|
705
|
+
response[:rdf] = "http://vocab.getty.edu/tgn/#{response[:id]}.rdf"
|
706
|
+
end
|
707
|
+
response[:errors] = false
|
708
|
+
rescue JSON::ParserError
|
709
|
+
response[:json] = nil
|
710
|
+
response[:errors] = true
|
711
|
+
if tgn_response.cached? && Typhoeus::Config.cache.present?
|
712
|
+
cache_key = Typhoeus::Request.new("http://vocab.getty.edu/sparql.json", params: {query: query}).cache_key
|
713
|
+
Typhoeus::Config.cache.delete(cache_key) #Need to define a delete method like: def delete(request) Rails.cache.delete(request) end
|
714
|
+
end
|
645
715
|
|
646
|
-
|
647
|
-
unless tgn_response.code == 500
|
648
|
-
as_json = JSON.parse(tgn_response.body)
|
649
|
-
|
650
|
-
#This is ugly and needs to be redone to achieve better recursive...
|
651
|
-
if as_json["results"]["bindings"].present? && as_json["results"]["bindings"].first["object_identifier"].present?
|
652
|
-
return_hash[:id] = as_json["results"]["bindings"].first["object_identifier"]["value"]
|
653
|
-
return_hash[:rdf] = "http://vocab.getty.edu/tgn/#{return_hash[:id]}.rdf"
|
716
|
+
end
|
654
717
|
else
|
655
|
-
|
718
|
+
if tgn_response.cached? && Typhoeus::Config.cache.present?
|
719
|
+
cache_key = Typhoeus::Request.new("http://vocab.getty.edu/sparql.json", params: {query: query}).cache_key
|
720
|
+
Typhoeus::Config.cache.delete(cache_key) #Need to define a delete method like: def delete(request) Rails.cache.delete(request) end
|
721
|
+
end
|
656
722
|
end
|
657
|
-
end
|
658
723
|
|
659
|
-
|
660
|
-
raise 'TGN Server appears to not be responding for Geographic query: ' + query
|
661
|
-
end
|
724
|
+
return response
|
662
725
|
|
663
|
-
if return_hash.present?
|
664
|
-
return_hash[:original_string_differs] = Geomash::Standardizer.parsed_and_original_check(geo_hash)
|
665
|
-
return return_hash
|
666
|
-
else
|
667
|
-
return nil
|
668
726
|
end
|
669
|
-
end
|
670
727
|
|
671
728
|
|
672
729
|
end
|