geomash 0.2.1 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/geomash/tgn.rb CHANGED
@@ -3,7 +3,8 @@ module Geomash
3
3
  class TGN
4
4
 
5
5
  def self.tgn_enabled
6
- Geomash.config[:tgn_enabled] || true
6
+ return Geomash.config[:tgn_enabled] unless Geomash.config[:tgn_enabled].nil?
7
+ return true
7
8
  end
8
9
 
9
10
  =begin
@@ -189,7 +190,7 @@ EXAMPLE SPARQL:
189
190
  tgn_main_term_info = {}
190
191
  broader_place_type_list = []
191
192
 
192
- primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"})
193
+ primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"}, :timeout=>500)
193
194
 
194
195
  return nil if(primary_tgn_response.response_code == 404) #Couldn't find TGN... FIXME: additional check needed if TGN is down?
195
196
 
@@ -243,36 +244,32 @@ EXAMPLE SPARQL:
243
244
 
244
245
  query = query.squish
245
246
 
246
- primary_tgn_response = Typhoeus::Request.post("http://vocab.getty.edu/sparql.json", :body=>{:query=>query})
247
+ primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :body=>{:query=>query}, :timeout=>500)
247
248
  as_json_tgn_response = JSON.parse(primary_tgn_response.body)
248
249
  end
249
250
 
250
- #FIXME: Temporary hack to determine more cases of non-blank/english place name conflicts that require resolution.
251
- label_remaining_check = false
252
-
253
251
  as_json_tgn_response['results']['bindings'].each do |ntriple|
254
252
  case ntriple['Predicate']['value']
255
253
  when 'http://www.w3.org/2004/02/skos/core#prefLabel'
256
254
  if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
257
- tgn_main_term_info[:label_en] = ntriple['Object']['value']
255
+ tgn_main_term_info[:label_en] ||= ntriple['Object']['value']
258
256
  elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'zh-latn-pinyin'
259
- tgn_main_term_info[:label_other] = ntriple['Object']['value']
257
+ tgn_main_term_info[:label_other] ||= ntriple['Object']['value']
260
258
  elsif ntriple['Object']['xml:lang'].blank?
261
- tgn_main_term_info[:label_default] = ntriple['Object']['value']
259
+ tgn_main_term_info[:label_default] ||= ntriple['Object']['value']
262
260
  else
263
- label_remaining_check = true if tgn_main_term_info[:label_remaining].present?
264
- tgn_main_term_info[:label_remaining] = ntriple['Object']['value']
261
+ tgn_main_term_info[:label_remaining] ||= ntriple['Object']['value']
265
262
  end
266
263
  when 'http://www.w3.org/2004/02/skos/core#altLabel'
267
264
  if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
268
- tgn_main_term_info[:label_alt] = ntriple['Object']['value']
265
+ tgn_main_term_info[:label_alt] ||= ntriple['Object']['value']
269
266
  end
270
267
  when 'http://vocab.getty.edu/ontology#placeTypePreferred'
271
- tgn_main_term_info[:aat_place] = ntriple['Object']['value']
268
+ tgn_main_term_info[:aat_place] ||= ntriple['Object']['value']
272
269
  when 'http://schema.org/latitude'
273
- tgn_main_term_info[:latitude] = ntriple['Object']['value']
270
+ tgn_main_term_info[:latitude] ||= ntriple['Object']['value']
274
271
  when 'http://schema.org/longitude'
275
- tgn_main_term_info[:longitude] = ntriple['Object']['value']
272
+ tgn_main_term_info[:longitude] ||= ntriple['Object']['value']
276
273
  when 'http://vocab.getty.edu/ontology#broaderPreferredExtended'
277
274
  broader_place_type_list << ntriple['Object']['value']
278
275
  end
@@ -289,18 +286,14 @@ EXAMPLE SPARQL:
289
286
  end
290
287
 
291
288
  hier_geo = {}
289
+ non_hier_geo = {}
290
+
292
291
  #Default term to best label language...
293
292
  tgn_term = tgn_main_term_info[:label_en]
294
293
  tgn_term ||= tgn_main_term_info[:label_default]
295
294
  tgn_term ||= tgn_main_term_info[:label_other]
296
295
  tgn_term ||= tgn_main_term_info[:label_alt]
297
- if tgn_term.blank?
298
- if label_remaining_check
299
- raise "Could not determine a single label for TGN: " + tgn_id
300
- else
301
- tgn_term = tgn_main_term_info[:label_remaining]
302
- end
303
- end
296
+ tgn_term ||= tgn_main_term_info[:label_remaining]
304
297
 
305
298
  tgn_term_type = tgn_main_term_info[:aat_place].split('/').last
306
299
 
@@ -309,11 +302,11 @@ EXAMPLE SPARQL:
309
302
  case tgn_term_type
310
303
  when '300128176' #continent
311
304
  hier_geo[:continent] = tgn_term
312
- when '300128207' #nations
305
+ when '300128207', '300387130', '300387506' #nation, autonomous areas, countries
313
306
  hier_geo[:country] = tgn_term
314
307
  when '300000774' #province
315
308
  hier_geo[:province] = tgn_term
316
- when '300236112', '300182722', '300387194', '300387052' #region, union, semi-independent political entity
309
+ when '300236112', '300182722', '300387194', '300387052', '300387113', '300387107' #region, union, semi-independent political entity, autonomous communities, autonomous regions
317
310
  hier_geo[:region] = tgn_term
318
311
  when '300000776', '300000772', '300235093' #state, department, governorate
319
312
  hier_geo[:state] = tgn_term
@@ -325,27 +318,83 @@ EXAMPLE SPARQL:
325
318
  end
326
319
  when '300135982', '300387176', '300387122' #territory, dependent state, union territory
327
320
  hier_geo[:territory] = tgn_term
328
- when '300000771' #county
321
+ when '300000771', '300387092', '300387071' #county, parishes, unitary authorities
329
322
  hier_geo[:county] = tgn_term
330
- when '300008347' #inhabited place
323
+ when '300008347', '300008389' #inhabited place, cities
331
324
  hier_geo[:city] = tgn_term
332
- when '300000745' #neighborhood
325
+ when '300000745', '300000778', '300387331' #neighborhood, parishes, parts of inhabited places
333
326
  hier_geo[:city_section] = tgn_term
334
327
  when '300008791', '300387062' #island
335
328
  hier_geo[:island] = tgn_term
336
- when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
329
+ when '300387575', '300387346', '300167671', '300387178', '300387082', '300387173', '300055621', '300386853', '300386831', '300386832', '300008178', '300008804', '300387131', '300132348', '300387085', '300387198', '300008761' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division', administrative divisions, area (measurement), island groups, mountain ranges, mountain systems, nature reserves, peninsulas, regional divisions, sand bars, senatorial districts (administrative districts), third level subdivisions (political entities), valleys (landforms)
337
330
  hier_geo[:area] = tgn_term
331
+ when '300386699' #Top level element of World
332
+ non_hier_geo[:value] = 'World'
333
+ non_hier_geo[:qualifier] = nil
338
334
  else
339
- non_hier_geo = tgn_term
335
+ aat_main_term_info = {}
336
+ label_remaining_check = false
337
+
338
+ aat_type_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/aat/#{tgn_term_type}.json"}, :timeout=>500)
339
+ JSON.parse(aat_type_response.body)['results']['bindings'].each do |ntriple|
340
+ case ntriple['Predicate']['value']
341
+ when 'http://www.w3.org/2004/02/skos/core#prefLabel'
342
+ if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
343
+ aat_main_term_info[:label_en] ||= ntriple['Object']['value']
344
+ elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en-us'
345
+ aat_main_term_info[:label_en] ||= ntriple['Object']['value']
346
+ elsif ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'zh-latn-pinyin'
347
+ aat_main_term_info[:label_other] ||= ntriple['Object']['value']
348
+ elsif ntriple['Object']['xml:lang'].blank?
349
+ aat_main_term_info[:label_default] ||= ntriple['Object']['value']
350
+ else
351
+ label_remaining_check = true if aat_main_term_info[:label_remaining].present?
352
+ aat_main_term_info[:label_remaining] ||= ntriple['Object']['value']
353
+ end
354
+ when 'http://www.w3.org/2004/02/skos/core#altLabel'
355
+ if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
356
+ aat_main_term_info[:label_alt] ||= ntriple['Object']['value']
357
+ end
358
+ end
359
+
360
+ end
361
+ #Default term to best label language...
362
+ aat_term = aat_main_term_info[:label_en]
363
+ aat_term ||= aat_main_term_info[:label_default]
364
+ aat_term ||= aat_main_term_info[:label_other]
365
+ aat_term ||= aat_main_term_info[:label_alt]
366
+
367
+ if aat_term.blank?
368
+ if label_remaining_check
369
+ raise "Could not determine a single aat non_hier_geo label for TGN: " + tgn_id
370
+ else
371
+ aat_term = aat_main_term_info[:label_remaining]
372
+ end
373
+ end
374
+
375
+ #Fix cases like http://vocab.getty.edu/aat/300132316 which are bays (bodies of water)
376
+ aat_term = aat_term.gsub(/ \(.+\)$/, '')
377
+
378
+ if (aat_term =~ /ies$/).present?
379
+ aat_term = aat_term.gsub(/ies$/, 'y')
380
+ elsif (aat_term =~ /es$/).present?
381
+ aat_term = aat_term.gsub(/es$/, '')
382
+ elsif (aat_term =~ /s$/).present?
383
+ aat_term = aat_term.gsub(/s$/, '')
384
+ end
385
+
386
+ #Fix cases like "Boston Harbor" as "Boston Harbor (harbor)" isn't that helpful
387
+ non_hier_geo[:value] = tgn_term
388
+ non_hier_geo[:qualifier] = tgn_term.downcase.include?(aat_term.downcase) ? nil : aat_term
340
389
  end
341
390
 
342
391
  #Broader places
343
392
  #FIXME: could parse xml:lang instead of the three optional clauses now... didn't expect places to lack a default preferred label.
393
+ if broader_place_type_list.present? #Case of World... top of hierachy check
394
+ query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?place_label_remaining ?aat_pref WHERE {"
344
395
 
345
- query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?place_label_remaining ?aat_pref WHERE {"
346
-
347
- broader_place_type_list.each do |place_uri|
348
- query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
396
+ broader_place_type_list.each do |place_uri|
397
+ query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
349
398
  OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
350
399
  FILTER langMatches( lang(?place_label_en), "en" )
351
400
  }
@@ -364,66 +413,67 @@ EXAMPLE SPARQL:
364
413
  <#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
365
414
  } UNION
366
415
  }
367
- end
416
+ end
368
417
 
369
- query = query[0..-12]
370
- query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?place_label_latn_pinyin ?place_label_alt ?place_label_remaining ?aat_pref"
371
- query = query.squish
418
+ query = query[0..-12]
419
+ query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?place_label_latn_pinyin ?place_label_alt ?place_label_remaining ?aat_pref"
420
+ query = query.squish
372
421
 
373
- tgn_response_for_aat = Typhoeus::Request.post("http://vocab.getty.edu/sparql.json", :body=>{:query=>query})
374
- as_json_tgn_response_for_aat = JSON.parse(tgn_response_for_aat.body)
422
+ tgn_response_for_aat = Typhoeus::Request.post("http://vocab.getty.edu/sparql.json", :body=>{:query=>query}, :timeout=>500)
423
+ as_json_tgn_response_for_aat = JSON.parse(tgn_response_for_aat.body)
375
424
 
376
- as_json_tgn_response_for_aat["results"]["bindings"].each do |aat_response|
377
- tgn_term_type = aat_response['aat_pref']['value'].split('/').last
425
+ as_json_tgn_response_for_aat["results"]["bindings"].each do |aat_response|
426
+ tgn_term_type = aat_response['aat_pref']['value'].split('/').last
378
427
 
379
- if aat_response['place_label_en'].present? && aat_response['place_label_en']['value'] != '-'
380
- tgn_term = aat_response['place_label_en']['value']
381
- elsif aat_response['place_label_default'].present? && aat_response['place_label_default']['value'] != '-'
382
- tgn_term = aat_response['place_label_default']['value']
383
- elsif aat_response['place_label_latn_pinyin'].present? && aat_response['place_label_latn_pinyin']['value'] != '-'
384
- tgn_term = aat_response['place_label_latn_pinyin']['value']
385
- elsif aat_response['place_label_alt'].present? && aat_response['place_label_alt']['value'] != '-'
386
- tgn_term = aat_response['place_label_alt']['value']
387
- else
388
- tgn_term = aat_response['place_label_remaining']['value']
389
- end
428
+ if aat_response['place_label_en'].present? && aat_response['place_label_en']['value'] != '-'
429
+ tgn_term = aat_response['place_label_en']['value']
430
+ elsif aat_response['place_label_default'].present? && aat_response['place_label_default']['value'] != '-'
431
+ tgn_term = aat_response['place_label_default']['value']
432
+ elsif aat_response['place_label_latn_pinyin'].present? && aat_response['place_label_latn_pinyin']['value'] != '-'
433
+ tgn_term = aat_response['place_label_latn_pinyin']['value']
434
+ elsif aat_response['place_label_alt'].present? && aat_response['place_label_alt']['value'] != '-'
435
+ tgn_term = aat_response['place_label_alt']['value']
436
+ else
437
+ tgn_term = aat_response['place_label_remaining']['value']
438
+ end
390
439
 
391
- case tgn_term_type
392
- when '300128176' #continent
393
- hier_geo[:continent] = tgn_term
394
- when '300128207' #nation
395
- hier_geo[:country] = tgn_term
396
- when '300000774' #province
397
- hier_geo[:province] = tgn_term
398
- when '300236112', '300182722', '300387194', '300387052' #region, union, semi-independent political entity
399
- hier_geo[:region] = tgn_term
400
- when '300000776', '300000772', '300235093' #state, department, governorate
401
- hier_geo[:state] = tgn_term
402
- when '300387081' #national district
403
- if tgn_term == 'District of Columbia'
440
+ case tgn_term_type
441
+ when '300128176' #continent
442
+ hier_geo[:continent] = tgn_term
443
+ when '300128207', '300387130', '300387506' #nation, autonomous areas, countries
444
+ hier_geo[:country] = tgn_term
445
+ when '300000774' #province
446
+ hier_geo[:province] = tgn_term
447
+ when '300236112', '300182722', '300387194', '300387052', '300387113', '300387107' #region, union, semi-independent political entity, autonomous communities, autonomous regions
448
+ hier_geo[:region] = tgn_term
449
+ when '300000776', '300000772', '300235093' #state, department, governorate
404
450
  hier_geo[:state] = tgn_term
405
- else
451
+ when '300387081' #national district
452
+ if tgn_term == 'District of Columbia'
453
+ hier_geo[:state] = tgn_term
454
+ else
455
+ hier_geo[:territory] = tgn_term
456
+ end
457
+ when '300135982', '300387176', '300387122' #territory, dependent state, union territory
406
458
  hier_geo[:territory] = tgn_term
407
- end
408
- when '300135982', '300387176', '300387122' #territory, dependent state, union territory
409
- hier_geo[:territory] = tgn_term
410
- when '300000771' #county
411
- hier_geo[:county] = tgn_term
412
- when '300008347' #inhabited place
413
- hier_geo[:city] = tgn_term
414
- when '300000745' #neighborhood
415
- hier_geo[:city_section] = tgn_term
416
- when '300008791', '300387062' #island
417
- hier_geo[:island] = tgn_term
418
- when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
419
- hier_geo[:area] = tgn_term
459
+ when '300000771', '300387092', '300387071' #county, parishes, unitary authorities
460
+ hier_geo[:county] = tgn_term
461
+ when '300008347', '300008389' #inhabited place, cities
462
+ hier_geo[:city] = tgn_term
463
+ when '300000745', '300000778', '300387331' #neighborhood, parishes, parts of inhabited places
464
+ hier_geo[:city_section] = tgn_term
465
+ when '300008791', '300387062' #island
466
+ hier_geo[:island] = tgn_term
467
+ when '300387575', '300387346', '300167671', '300387178', '300387082', '300387173', '300055621', '300386853', '300386831', '300386832', '300008178', '300008804', '300387131', '300132348', '300387085', '300387198', '300008761' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division', administrative divisions, area (measurement), island groups, mountain ranges, mountain systems, nature reserves, peninsulas, regional divisions, sand bars, senatorial districts (administrative districts), third level subdivisions (political entities), valleys (landforms)
468
+ hier_geo[:area] = tgn_term
469
+ end
420
470
  end
421
471
  end
422
472
 
423
473
  tgn_data = {}
424
474
  tgn_data[:coords] = coords
425
475
  tgn_data[:hier_geo] = hier_geo.length > 0 ? hier_geo : nil
426
- tgn_data[:non_hier_geo] = non_hier_geo ? non_hier_geo : nil
476
+ tgn_data[:non_hier_geo] = non_hier_geo.present? ? non_hier_geo : nil
427
477
 
428
478
  else
429
479
 
@@ -435,17 +485,19 @@ EXAMPLE SPARQL:
435
485
 
436
486
  end
437
487
 
438
-
439
488
  def self.tgn_id_from_geo_hash(geo_hash)
440
489
  return nil if Geomash::TGN.tgn_enabled != true
441
490
 
442
491
  geo_hash = geo_hash.clone
443
-
444
492
  max_retry = 3
445
493
  sleep_time = 60 # In seconds
446
494
  retry_count = 0
447
495
 
448
496
  return_hash = {}
497
+ country_response = {}
498
+ states_response = {}
499
+ cities_response = {}
500
+ neighboorhood_response = {}
449
501
 
450
502
  state_part = geo_hash[:state_part]
451
503
 
@@ -454,41 +506,71 @@ EXAMPLE SPARQL:
454
506
 
455
507
 
456
508
  country_part = Geomash::Constants::COUNTRY_TGN_LOOKUP[geo_hash[:country_part]][:tgn_country_name] unless Geomash::Constants::COUNTRY_TGN_LOOKUP[geo_hash[:country_part]].blank?
457
- country_part ||= geo_hash[:country_part]
509
+ country_part = geo_hash[:country_part] if country_part.blank?
458
510
  country_part ||= ''
459
511
 
460
512
  city_part = geo_hash[:city_part]
461
513
 
462
514
  neighborhood_part = geo_hash[:neighborhood_part]
463
515
 
516
+ web_request_error = false
517
+ begin
518
+ if retry_count > 0
519
+ sleep(sleep_time)
520
+ end
521
+ retry_count = retry_count + 1
464
522
 
523
+ #First we get county!!
465
524
 
466
- if city_part.blank? && state_part.blank?
467
- # Limit to nations
468
525
  query = %{SELECT ?object_identifier
469
526
  WHERE
470
527
  {
471
528
  ?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
472
529
  ?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
473
- ?x <http://www.w3.org/2004/02/skos/core#prefLabel> ?object_label .
530
+ ?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
474
531
  FILTER regex(?object_label, "^#{country_part}$", "i" )
475
- }}
476
- elsif state_part.present? && city_part.blank? && country_code == 7012149
477
- #Limit to states
478
- query = %{SELECT ?object_identifier
532
+ }
533
+ GROUP BY ?object_identifier
534
+ }
535
+ country_response = self.tgn_sparql_request(query)
536
+ return nil if country_response[:id].blank? && !country_response[:errors]
537
+ return_hash[:id] = country_response[:id]
538
+ return_hash[:rdf] = country_response[:rdf]
539
+ return_hash[:parse_depth] = 1
540
+ web_request_error = true if country_response[:errors]
541
+
542
+ #United State state query
543
+ if state_part.present? && country_code == 7012149 && !web_request_error
544
+ query = %{SELECT ?object_identifier
479
545
  WHERE
480
546
  {
481
547
  ?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
482
- ?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776> .
548
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
549
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
483
550
  ?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
484
551
  FILTER regex(?object_label, "^#{state_part}$", "i" )
485
552
 
486
553
  ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/7012149> .
487
- }}
488
- elsif state_part.present? && city_part.blank?
489
- #Limit to regions
554
+ }
555
+ GROUP BY ?object_identifier
556
+ }
490
557
 
491
- query = %{SELECT ?object_identifier
558
+ states_response = self.tgn_sparql_request(query)
559
+ if states_response[:id].blank? && !states_response[:errors]
560
+ return_hash[:original_string_differs] = true
561
+ else
562
+ return_hash[:id] = states_response[:id]
563
+ return_hash[:rdf] = states_response[:rdf]
564
+ return_hash[:parse_depth] = 2
565
+ end
566
+ web_request_error = true if states_response[:errors]
567
+ end
568
+
569
+ #Non United States state query
570
+ #Note: Had to remove {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION as it returned two results
571
+ #for "15. Bezirk (Rudolfsheim-Fünfhaus, Vienna, Austria)--Exhibitions". Correct or not?
572
+ if state_part.present? && country_code != 7012149 && !web_request_error
573
+ query = %{SELECT ?object_identifier
492
574
  WHERE
493
575
  {
494
576
  ?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
@@ -500,75 +582,76 @@ WHERE
500
582
  {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
501
583
  {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
502
584
  {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
503
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
504
585
  {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
505
586
  ?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
506
587
  FILTER regex(?object_label, "^#{state_part}$", "i" )
507
- ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
508
- {
509
- SELECT ?parent_country ?identifier_country
510
- WHERE {
511
- ?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
512
- ?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
513
- ?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
514
- FILTER regex(?country_label, "^#{country_part}$", "i" )
515
- }
516
-
517
- }
588
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
518
589
  }
519
590
  GROUP BY ?object_identifier
520
591
  }
521
592
 
522
- #FIXME Temporary: For Geomash.parse('Aknīste (Latvia)', true), seems to be a neighborhood placed in state
523
- # {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
524
- elsif state_part.present? && city_part.present? && neighborhood_part.blank?
525
- #Limited to only inhabited places at the moment...
526
- query = %{SELECT ?object_identifier
593
+ states_response = self.tgn_sparql_request(query)
594
+ if states_response[:id].blank? && !states_response[:errors]
595
+ return_hash[:original_string_differs] = true
596
+ else
597
+ return_hash[:id] = states_response[:id]
598
+ return_hash[:rdf] = states_response[:rdf]
599
+ return_hash[:parse_depth] = 2
600
+ end
601
+ web_request_error = true if states_response[:errors]
602
+ end
603
+
604
+ if states_response[:id].present? && city_part.present? && !web_request_error
605
+ query = %{SELECT ?object_identifier
527
606
  WHERE
528
607
  {
529
608
  ?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
530
609
  ?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
531
610
  ?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
532
611
  FILTER regex(?object_label, "^#{city_part}$", "i" )
533
- ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
534
- {
535
- SELECT ?parent_country ?identifier_country
536
- WHERE {
537
- ?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
538
- ?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
539
- ?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
540
- FILTER regex(?country_label, "^#{country_part}$", "i" )
541
- }
542
-
543
- }
544
- ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_state .
545
- {
546
- SELECT ?parent_state ?identifier_state
547
- WHERE {
548
- ?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
549
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
550
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
551
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
552
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
553
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
554
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
555
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
556
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
557
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
558
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
559
- ?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
560
- FILTER regex(?state_label, "^#{state_part}$", "i" )
561
- }
612
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
613
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
614
+ }
615
+ GROUP BY ?object_identifier
616
+ }
617
+ cities_response = self.tgn_sparql_request(query)
618
+ if cities_response[:id].blank? && !cities_response[:errors]
619
+ return_hash[:original_string_differs] = true
620
+ else
621
+ return_hash[:id] = cities_response[:id]
622
+ return_hash[:rdf] = cities_response[:rdf]
623
+ return_hash[:parse_depth] = 3
624
+ end
625
+ web_request_error = true if cities_response[:errors]
562
626
 
563
- }
627
+ end
564
628
 
629
+ #Case of Countries without a state breakdown... ie. Tokyo, Japan
630
+ if state_part.blank? && country_response[:id].present? && city_part.present? && !web_request_error
631
+ query = %{SELECT ?object_identifier
632
+ WHERE
633
+ {
634
+ ?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
635
+ ?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
636
+ ?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
637
+ FILTER regex(?object_label, "^#{city_part}$", "i" )
638
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
565
639
  }
566
640
  GROUP BY ?object_identifier
567
641
  }
642
+ cities_response = self.tgn_sparql_request(query)
643
+ if cities_response[:id].blank? && !cities_response[:errors]
644
+ return_hash[:original_string_differs] = true
645
+ else
646
+ return_hash[:id] = cities_response[:id]
647
+ return_hash[:rdf] = cities_response[:rdf]
648
+ return_hash[:parse_depth] = 3
649
+ end
650
+ web_request_error = true if cities_response[:errors]
568
651
 
652
+ end
569
653
 
570
- elsif state_part.present? && city_part.present? && neighborhood_part.present?
571
- #Limited to only to neighborhoods currently...
654
+ if cities_response[:id].present? && neighborhood_part.present? && !web_request_error
572
655
  query = %{SELECT ?object_identifier
573
656
  WHERE
574
657
  {
@@ -576,97 +659,71 @@ WHERE
576
659
  ?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000745> .
577
660
  ?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
578
661
  FILTER regex(?object_label, "^#{neighborhood_part}$", "i" )
579
- ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
580
- {
581
- SELECT ?parent_country ?identifier_country
582
- WHERE {
583
- ?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
584
- ?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
585
- ?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
586
- FILTER regex(?country_label, "^#{country_part}$", "i" )
587
- }
588
-
589
- }
590
- ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_state .
591
- {
592
- SELECT ?parent_state ?identifier_state
593
- WHERE {
594
- ?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
595
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
596
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
597
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
598
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
599
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
600
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
601
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
602
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
603
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
604
- {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
605
- ?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
606
- FILTER regex(?state_label, "^#{state_part}$", "i" )
607
- }
608
-
609
- }
610
-
611
- ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_city .
612
- {
613
- SELECT ?parent_city ?identifier_city
614
- WHERE {
615
- ?parent_city <http://purl.org/dc/elements/1.1/identifier> ?identifier_city .
616
- ?parent_city <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
617
- ?parent_city <http://www.w3.org/2000/01/rdf-schema#label> ?city_label .
618
- FILTER regex(?city_label, "^#{city_part}$", "i" )
619
- }
620
-
621
- }
622
-
662
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{country_response[:id]}> .
663
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{states_response[:id]}> .
664
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/#{cities_response[:id]}> .
623
665
  }
624
666
  GROUP BY ?object_identifier
625
667
  }
668
+ neighborhood_response = self.tgn_sparql_request(query)
669
+ if neighborhood_response[:id].blank? && !neighborhood_response[:errors]
670
+ return_hash[:original_string_differs]=true
671
+ else
672
+ return_hash[:id] = neighborhood_response[:id]
673
+ return_hash[:rdf] = neighborhood_response[:rdf]
674
+ return_hash[:parse_depth] = 4
675
+ end
676
+ web_request_error = true if neighborhood_response[:errors]
677
+ end
626
678
 
679
+ end until (!web_request_error || retry_count == max_retry)
627
680
 
681
+ if return_hash.present? && !web_request_error
682
+ return_hash[:original_string_differs] ||= Geomash::Standardizer.parsed_and_original_check(geo_hash)
683
+ return return_hash
628
684
  else
629
685
  return nil
630
686
  end
631
687
 
632
- begin
688
+ end
633
689
 
634
- if retry_count > 0
635
- sleep(sleep_time)
690
+ def self.tgn_sparql_request(query,method="GET")
691
+ response = {}
692
+ query = query.squish
693
+ if(method=="GET")
694
+ tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query}, :timeout=>500)
695
+ else
696
+ tgn_response = Typhoeus::Request.post("http://vocab.getty.edu/sparql.json", :params=>{:query=>query}, :timeout=>500)
636
697
  end
637
- retry_count = retry_count + 1
638
-
639
- query = query.squish
640
- tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
641
-
642
- end until (tgn_response.code != 500 || retry_count == max_retry)
643
-
644
698
 
699
+ if tgn_response.success? && tgn_response.code == 200
700
+ begin
701
+ as_json = JSON.parse(tgn_response.body)
702
+ response[:json] = as_json
703
+ if as_json["results"]["bindings"].present? && as_json["results"]["bindings"].first["object_identifier"].present?
704
+ response[:id] = as_json["results"]["bindings"].first["object_identifier"]["value"]
705
+ response[:rdf] = "http://vocab.getty.edu/tgn/#{response[:id]}.rdf"
706
+ end
707
+ response[:errors] = false
708
+ rescue JSON::ParserError
709
+ response[:json] = nil
710
+ response[:errors] = true
711
+ if tgn_response.cached? && Typhoeus::Config.cache.present?
712
+ cache_key = Typhoeus::Request.new("http://vocab.getty.edu/sparql.json", params: {query: query}).cache_key
713
+ Typhoeus::Config.cache.delete(cache_key) #Need to define a delete method like: def delete(request) Rails.cache.delete(request) end
714
+ end
645
715
 
646
-
647
- unless tgn_response.code == 500
648
- as_json = JSON.parse(tgn_response.body)
649
-
650
- #This is ugly and needs to be redone to achieve better recursive...
651
- if as_json["results"]["bindings"].present? && as_json["results"]["bindings"].first["object_identifier"].present?
652
- return_hash[:id] = as_json["results"]["bindings"].first["object_identifier"]["value"]
653
- return_hash[:rdf] = "http://vocab.getty.edu/tgn/#{return_hash[:id]}.rdf"
716
+ end
654
717
  else
655
- return nil
718
+ if tgn_response.cached? && Typhoeus::Config.cache.present?
719
+ cache_key = Typhoeus::Request.new("http://vocab.getty.edu/sparql.json", params: {query: query}).cache_key
720
+ Typhoeus::Config.cache.delete(cache_key) #Need to define a delete method like: def delete(request) Rails.cache.delete(request) end
721
+ end
656
722
  end
657
- end
658
723
 
659
- if tgn_response.code == 500
660
- raise 'TGN Server appears to not be responding for Geographic query: ' + query
661
- end
724
+ return response
662
725
 
663
- if return_hash.present?
664
- return_hash[:original_string_differs] = Geomash::Standardizer.parsed_and_original_check(geo_hash)
665
- return return_hash
666
- else
667
- return nil
668
726
  end
669
- end
670
727
 
671
728
 
672
729
  end