bplgeo 0.0.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bplgeo.rb +26 -1
- data/lib/bplgeo/geonames.rb +31 -32
- data/lib/bplgeo/parser.rb +5 -5
- data/lib/bplgeo/standardizer.rb +37 -3
- data/lib/bplgeo/tgn.rb +461 -193
- data/lib/bplgeo/version.rb +1 -1
- data/test/bplgeo_test.rb +64 -21
- data/test/dummy/config/bplgeo.yml +12 -15
- data/test/dummy/config/bplgeo.yml.sample +6 -6
- data/test/dummy/log/development.log +23 -27
- data/test/dummy/log/test.log +608 -0
- data/test/tgn_test.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13b58c2286e536e0c23e08c3a8cfed3c586e18f2
|
4
|
+
data.tar.gz: 060706f023e34218322856b763c0a7e644b3ac2f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 750cd801123b74916afb4626e0d948efb94940e9ee67fe5bfe7edc9609a9a279fad53b73a05ffc3ff7495416fb6e8de99a6ce835692746e45fa49f5f6c1ab40a
|
7
|
+
data.tar.gz: 6b55b9b1b838e2adedbde4f0c029d1e6c31af4b7685d4187344fdac87b420323d5c6831cf911fca60263dcc27893ab7a57406a30bc6da1dc75d02b1b83c4622c
|
data/lib/bplgeo.rb
CHANGED
@@ -25,8 +25,33 @@ module Bplgeo
|
|
25
25
|
return_hash = Bplgeo::Parser.parse_google_api(term, parse_term)
|
26
26
|
end
|
27
27
|
|
28
|
-
if return_hash.present?
|
28
|
+
if return_hash[:country_part].present?
|
29
|
+
#FIXME
|
29
30
|
return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(return_hash)
|
31
|
+
|
32
|
+
if return_hash[:tgn].blank?
|
33
|
+
geo_hash_temp = Bplgeo::Standardizer.try_with_entered_names(return_hash)
|
34
|
+
return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(geo_hash_temp) if geo_hash_temp.present?
|
35
|
+
|
36
|
+
if return_hash[:tgn].blank? && return_hash[:neighborhood_part].present?
|
37
|
+
|
38
|
+
geo_hash_temp = return_hash.clone
|
39
|
+
geo_hash_temp[:neighborhood_part] = nil
|
40
|
+
geo_hash_temp[:original_string_differs] = true
|
41
|
+
return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(geo_hash_temp)
|
42
|
+
return_hash[:tgn][:original_string_differs] = true if return_hash[:tgn].present?
|
43
|
+
elsif return_hash[:city_part].present? && return_hash[:tgn].blank?
|
44
|
+
|
45
|
+
geo_hash_temp = return_hash.clone
|
46
|
+
geo_hash_temp[:city_part] = nil
|
47
|
+
geo_hash_temp[:original_string_differs] = true
|
48
|
+
return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(geo_hash_temp)
|
49
|
+
return_hash[:tgn][:original_string_differs] = true if return_hash[:tgn].present?
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
30
55
|
return_hash[:geonames] = Bplgeo::Geonames.geonames_id_from_geo_hash(return_hash)
|
31
56
|
end
|
32
57
|
|
data/lib/bplgeo/geonames.rb
CHANGED
@@ -69,15 +69,19 @@ module Bplgeo
|
|
69
69
|
#Don't do both neighborhood and city!
|
70
70
|
if geo_hash[:neighborhood_part].present?
|
71
71
|
geonames_search_array << geo_hash[:neighborhood_part]
|
72
|
+
exact_name_term = geo_hash[:neighborhood_part]
|
72
73
|
elsif geo_hash[:city_part].present?
|
73
74
|
geonames_search_array << geo_hash[:city_part]
|
75
|
+
exact_name_term = geo_hash[:neighborhood_part]
|
74
76
|
end
|
75
77
|
|
76
78
|
geonames_search_array << geo_hash[:state_part] if geo_hash[:state_part].present?
|
79
|
+
exact_name_term ||= geo_hash[:neighborhood_part]
|
77
80
|
geonames_search_array << geo_hash[:country_part] if geo_hash[:country_part].present?
|
81
|
+
exact_name_term ||= geo_hash[:country_part]
|
78
82
|
geonames_search_string = geonames_search_array.join(', ')
|
79
83
|
|
80
|
-
|
84
|
+
exact_name_term = geonames_search_array.first.strip
|
81
85
|
|
82
86
|
begin
|
83
87
|
if retry_count > 0
|
@@ -85,7 +89,7 @@ module Bplgeo
|
|
85
89
|
end
|
86
90
|
retry_count = retry_count + 1
|
87
91
|
|
88
|
-
geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q
|
92
|
+
geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q=#{CGI.escape(geonames_search_string)}&name_equals=#{CGI.escape(exact_name_term)}&country=#{Country.find_country_by_name(geo_hash[:country_part]).alpha2}")
|
89
93
|
|
90
94
|
end until (geonames_response.code != 500 || retry_count == max_retry)
|
91
95
|
|
@@ -93,43 +97,38 @@ module Bplgeo
|
|
93
97
|
|
94
98
|
parsed_xml = Nokogiri::Slop(geonames_response.body)
|
95
99
|
|
100
|
+
begin
|
101
|
+
raise "geonames status error message of: #{parsed_xml.to_s}" if parsed_xml.geonames.status
|
102
|
+
rescue
|
103
|
+
#Do nothing but FIXME to not use slop
|
104
|
+
end
|
105
|
+
|
96
106
|
#This is ugly and needs to be redone to achieve better recursive...
|
97
107
|
if parsed_xml.geonames.totalResultsCount.text == '0'
|
98
|
-
if neighborhood_part.present?
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
108
|
+
if geo_hash[:neighborhood_part].present?
|
109
|
+
geo_hash_temp = geo_hash.clone
|
110
|
+
geo_hash_temp[:neighborhood_part] = nil
|
111
|
+
return_hash = geonames_id_from_geo_hash(geo_hash_temp)
|
112
|
+
return return_hash if return_hash.present?
|
113
|
+
elsif geo_hash[:city_part].present?
|
114
|
+
geo_hash_temp = geo_hash.clone
|
115
|
+
geo_hash_temp[:city_part] = nil
|
116
|
+
return_hash = geonames_id_from_geo_hash(geo_hash_temp)
|
117
|
+
return return_hash if return_hash.present?
|
104
118
|
end
|
105
119
|
|
106
|
-
return
|
120
|
+
return nil
|
107
121
|
end
|
108
122
|
|
109
|
-
#Exact Match
|
110
|
-
parsed_xml.geonames.geoname.
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
return_hash[:original_string_differs] = Bplgeo::Standardizer.parsed_and_original_check(geo_hash)
|
117
|
-
break
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
if return_hash.blank?
|
122
|
-
#Starts With
|
123
|
-
parsed_xml.geonames.geoname.each do |geoname|
|
124
|
-
|
125
|
-
current_term = geoname.toponymName.text.to_ascii.downcase.strip
|
126
|
-
|
127
|
-
if current_term.starts_with?(match_term) && return_hash.blank?
|
128
|
-
return_hash[:id] = geoname.geonameId.text
|
129
|
-
return_hash[:original_string_differs] = Bplgeo::Standardizer.parsed_and_original_check(geo_hash)
|
130
|
-
end
|
131
|
-
end
|
123
|
+
#Exact Match ... FIXME to not use Slop
|
124
|
+
if parsed_xml.geonames.geoname.class == Nokogiri::XML::Element
|
125
|
+
return_hash[:id] = parsed_xml.geonames.geoname.geonameId.text
|
126
|
+
return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
|
127
|
+
elsif parsed_xml.geonames.geoname.class ==Nokogiri::XML::NodeSet
|
128
|
+
return_hash[:id] = parsed_xml.geonames.geoname.first.geonameId.text
|
129
|
+
return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
|
132
130
|
end
|
131
|
+
return_hash[:original_string_differs] = Bplgeo::Standardizer.parsed_and_original_check(geo_hash)
|
133
132
|
|
134
133
|
end
|
135
134
|
|
data/lib/bplgeo/parser.rb
CHANGED
@@ -96,7 +96,7 @@ module Bplgeo
|
|
96
96
|
retry_count = 3
|
97
97
|
|
98
98
|
#Skip if no bing_key... possibly move this elsewhere?
|
99
|
-
return return_hash if self.
|
99
|
+
return return_hash if self.mapquest_key == '<mapquest_key>'
|
100
100
|
|
101
101
|
return_hash[:original_term] = term
|
102
102
|
|
@@ -199,10 +199,10 @@ module Bplgeo
|
|
199
199
|
if google_api_result.present?
|
200
200
|
#Types: street number, route, neighborhood, establishment, transit_station, bus_station
|
201
201
|
google_api_result.first.data["address_components"].each do |result|
|
202
|
-
if (result['types'] & ['street number', 'route', '
|
202
|
+
if (result['types'] & ['street number', 'route', 'establishment', 'transit_station', 'bus_station']).present? || (result['types'].include?('neighborhood') && !result['types'].include?('political'))
|
203
203
|
#return_hash[:term_differs_from_tgn] = true
|
204
204
|
#TODO: Not implemented for Google results right now.
|
205
|
-
return_hash[:street_part] = 'TODO: Not Implemented for Google Results'
|
205
|
+
#return_hash[:street_part] = 'TODO: Not Implemented for Google Results'
|
206
206
|
return_hash[:coords] = {:latitude=>google_api_result.first.data['geometry']['location']['lat'].to_s,
|
207
207
|
:longitude=>google_api_result.first.data['geometry']['location']['lng'].to_s,
|
208
208
|
:combined=>google_api_result.first.data['geometry']['location']['lat'].to_s + ',' + google_api_result.first.data['geometry']['location']['lng'].to_s}
|
@@ -212,8 +212,8 @@ module Bplgeo
|
|
212
212
|
return_hash[:state_part] = result['long_name'].to_ascii
|
213
213
|
elsif (result['types'] & ['locality']).present?
|
214
214
|
return_hash[:city_part] = result['long_name']
|
215
|
-
elsif (result['types'] & ['sublocality', 'political']).length == 2
|
216
|
-
|
215
|
+
elsif (result['types'] & ['sublocality', 'political']).length == 2 || result['types'].include?('neighborhood')
|
216
|
+
return_hash[:neighborhood_part] = result['long_name']
|
217
217
|
end
|
218
218
|
end
|
219
219
|
|
data/lib/bplgeo/standardizer.rb
CHANGED
@@ -12,6 +12,7 @@ module Bplgeo
|
|
12
12
|
|
13
13
|
state_abbr_list = ['Mass']
|
14
14
|
state_name_list = []
|
15
|
+
country_name_list = []
|
15
16
|
|
16
17
|
#Countries gem of https://github.com/hexorx/countries
|
17
18
|
Country.new('US').states.each do |state_abbr, state_names|
|
@@ -19,10 +20,14 @@ module Bplgeo
|
|
19
20
|
state_name_list << state_names["name"]
|
20
21
|
end
|
21
22
|
|
23
|
+
Country.all.each do |country_name_abbr_pair|
|
24
|
+
country_name_list << country_name_abbr_pair.first
|
25
|
+
end
|
26
|
+
|
22
27
|
#Parsing a subject geographic term.
|
23
28
|
if term.include?('--')
|
24
29
|
term.split('--').each_with_index do |split_term, index|
|
25
|
-
if state_name_list.any? { |state| split_term.include? state }
|
30
|
+
if state_name_list.any? { |state| split_term.include? state } || country_name_list.any? { |country| split_term.include? country }
|
26
31
|
geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
|
27
32
|
elsif state_abbr_list.any? { |abbr| split_term.include? abbr }
|
28
33
|
geo_term = split_term
|
@@ -32,13 +37,13 @@ module Bplgeo
|
|
32
37
|
#Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
|
33
38
|
elsif term.include?(' - ')
|
34
39
|
term.split(' - ').each do |split_term|
|
35
|
-
if state_name_list.any? { |state| split_term.include? state } || state_abbr_list.any? { |abbr| split_term.include? abbr }
|
40
|
+
if state_name_list.any? { |state| split_term.include? state } || state_abbr_list.any? { |abbr| split_term.include? abbr } || country_name_list.any? { |country| split_term.include? country }
|
36
41
|
geo_term = split_term
|
37
42
|
end
|
38
43
|
|
39
44
|
end
|
40
45
|
else
|
41
|
-
if state_name_list.any? { |state| term.include? state } || state_abbr_list.any? { |abbr| term.include? abbr }
|
46
|
+
if state_name_list.any? { |state| term.include? state } || state_abbr_list.any? { |abbr| term.include? abbr } || country_name_list.any? { |country| term.include? country }
|
42
47
|
geo_term = term
|
43
48
|
end
|
44
49
|
end
|
@@ -57,6 +62,9 @@ module Bplgeo
|
|
57
62
|
#Strip any leading periods or commas from junk terms
|
58
63
|
geo_term = geo_term.gsub(/^[\.,]+/, '').strip
|
59
64
|
|
65
|
+
#Replace any four TGN dashes from removing a junk term
|
66
|
+
geo_term = geo_term.gsub('----', '--')
|
67
|
+
|
60
68
|
#Replace any semicolons with commas... possible strip them?
|
61
69
|
geo_term = geo_term.gsub(';', ',')
|
62
70
|
|
@@ -211,5 +219,31 @@ module Bplgeo
|
|
211
219
|
def self.utf8Encode(value)
|
212
220
|
return HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
|
213
221
|
end
|
222
|
+
|
223
|
+
|
224
|
+
def self.try_with_entered_names(geo_hash)
|
225
|
+
geo_hash_local = geo_hash.clone
|
226
|
+
if geo_hash_local[:neighborhood_part].present?
|
227
|
+
orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:neighborhood_part].downcase.to_ascii}
|
228
|
+
geo_hash_local[:neighborhood_part] = orig_string_check.first.strip if orig_string_check.present? && orig_string_check != geo_hash_local[:neighborhood_part]
|
229
|
+
return geo_hash_local
|
230
|
+
end
|
231
|
+
|
232
|
+
if geo_hash_local[:city_part].present?
|
233
|
+
orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:city_part].downcase.to_ascii}
|
234
|
+
geo_hash_local[:city_part] = orig_string_check.first.strip if orig_string_check.present?
|
235
|
+
return geo_hash_local
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
if geo_hash_local[:state_part].present?
|
240
|
+
orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:state_part].downcase.to_ascii}
|
241
|
+
geo_hash_local[:state_part] = orig_string_check.first.strip if orig_string_check.present?
|
242
|
+
return geo_hash_local
|
243
|
+
end
|
244
|
+
|
245
|
+
return nil
|
246
|
+
end
|
247
|
+
|
214
248
|
end
|
215
249
|
end
|
data/lib/bplgeo/tgn.rb
CHANGED
@@ -7,122 +7,326 @@ module Bplgeo
|
|
7
7
|
@bplgeo_config ||= YAML::load(ERB.new(IO.read(File.join(root, 'config', 'bplgeo.yml'))).result)[env].with_indifferent_access
|
8
8
|
end
|
9
9
|
|
10
|
-
def self.
|
11
|
-
bplgeo_config[:
|
10
|
+
def self.tgn_enabled
|
11
|
+
bplgeo_config[:tgn_enabled] || true
|
12
12
|
end
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
=begin
|
15
|
+
81010/nation
|
16
|
+
81175/state
|
17
|
+
81165/region
|
18
|
+
84251/neighborhood
|
19
|
+
83002/inhabited place
|
20
|
+
|
21
|
+
nations
|
22
|
+
<http://vocab.getty.edu/tgn/7012149> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207>
|
23
|
+
|
24
|
+
States (political divisions):
|
25
|
+
<http://vocab.getty.edu/tgn/7007517> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776> .
|
26
|
+
|
27
|
+
Counties: (Suffolk - http://vocab.getty.edu/aat/300000771)
|
28
|
+
<http://vocab.getty.edu/tgn/1002923> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000771> .
|
29
|
+
|
30
|
+
Neighborhood: (Boston)
|
31
|
+
<http://vocab.getty.edu/tgn/7013445> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
32
|
+
|
33
|
+
|
34
|
+
Provinces:
|
35
|
+
http://vocab.getty.edu/aat/300000774
|
36
|
+
|
37
|
+
Departments:
|
38
|
+
http://vocab.getty.edu/aat/300000772
|
39
|
+
|
40
|
+
Governates:
|
41
|
+
http://vocab.getty.edu/aat/300235093
|
42
|
+
|
43
|
+
Territories:
|
44
|
+
http://vocab.getty.edu/aat/300135982
|
45
|
+
|
46
|
+
+ http://vocab.getty.edu/resource/getty/search?q=territory&luceneIndex=Brief&indexDataset=AAT&_form=%2Fresource%2Fgetty%2Fsearch
|
47
|
+
|
48
|
+
dependent state:
|
49
|
+
http://vocab.getty.edu/aat/300387176
|
50
|
+
|
51
|
+
|
52
|
+
union territory:
|
53
|
+
http://vocab.getty.edu/aat/300387122
|
54
|
+
|
55
|
+
national district:
|
56
|
+
http://vocab.getty.edu/aat/300387081
|
57
|
+
|
58
|
+
|
59
|
+
Roxbury:
|
60
|
+
http://vocab.getty.edu/tgn/7015002.json
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
#South Carolina - http://vocab.getty.edu/tgn/7007712
|
65
|
+
|
66
|
+
SELECT ?object_identifier
|
67
|
+
WHERE
|
68
|
+
{
|
69
|
+
?x <http://purl.org/dc/elements/1.1/identifier> 7007712 .
|
70
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
|
71
|
+
{
|
72
|
+
SELECT ?parent_country ?identifier_country ?aat_place_id
|
73
|
+
WHERE {
|
74
|
+
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
75
|
+
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_place_id .
|
76
|
+
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
77
|
+
}
|
78
|
+
GROUP BY ?parent_country
|
79
|
+
}
|
80
|
+
}
|
81
|
+
GROUP BY ?object_identifier
|
82
|
+
|
83
|
+
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/tgn/#{tgn_id}.json")
|
84
|
+
|
85
|
+
|
86
|
+
when 'http://vocab.getty.edu/ontology#placeTypePreferred'
|
87
|
+
place_type_base[:aat_id] = ntriple['Object']['value']
|
88
|
+
when 'http://www.w3.org/2004/02/skos/core#prefLabel'
|
89
|
+
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
90
|
+
place_type_base[:label_en] = ntriple['Object']['value']
|
91
|
+
else if ntriple['Object']['xml:lang'].blank?
|
92
|
+
place_type_base[:label_default] = ntriple['Object']['value']
|
93
|
+
|
94
|
+
|
95
|
+
tgn_main_term_info = {}
|
96
|
+
broader_place_type_list = ["http://vocab.getty.edu/tgn/"#{tgn_id}]
|
97
|
+
|
98
|
+
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"})
|
99
|
+
as_json_tgn_response = JSON.parse(primary_tgn_response.body)
|
100
|
+
|
101
|
+
as_json_tgn_response['results']['bindings'].each do |ntriple|
|
102
|
+
case ntriple['Predicate']['value']
|
103
|
+
when 'http://www.w3.org/2004/02/skos/core#prefLabel'
|
104
|
+
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
105
|
+
tgn_main_term_info[:label_en] = ntriple['Object']['value']
|
106
|
+
elsif ntriple['Object']['xml:lang'].blank?
|
107
|
+
tgn_main_term_info[:label_default] = ntriple['Object']['value']
|
16
108
|
end
|
109
|
+
when 'http://vocab.getty.edu/ontology#placeTypePreferred'
|
110
|
+
tgn_main_term_info[:aat_place] = ntriple['Object']['value']
|
111
|
+
when 'http://vocab.getty.edu/ontology#broaderPreferredExtended'
|
112
|
+
broader_place_type_list << ntriple['Object']['value']
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref WHERE {"
|
118
|
+
|
119
|
+
broader_place_type_list.each do |place_uri|
|
120
|
+
query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
121
|
+
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
122
|
+
FILTER langMatches( lang(?place_label_en), "en" )
|
123
|
+
}
|
124
|
+
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
|
125
|
+
FILTER langMatches( lang(?place_label_default), "" )
|
126
|
+
}
|
127
|
+
<#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
128
|
+
} UNION
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
query = query[0..-12]
|
133
|
+
query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?aat_pref"
|
134
|
+
|
135
|
+
tgn_response_for_aat = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
|
136
|
+
as_json_tgn_response_for_aat = JSON.parse(tgn_response_for_aat.body)
|
137
|
+
|
138
|
+
as_json_tgn_response_for_aat["results"]["bindings"].each do |aat_response|
|
139
|
+
#aat_response['identifier_place']['value']
|
140
|
+
#aat_response['place_label_default']['value']
|
141
|
+
#....
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
EXAMPLE SPARQL:
|
149
|
+
|
150
|
+
SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref
|
151
|
+
WHERE {
|
152
|
+
{<http://vocab.getty.edu/tgn/1000001> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
153
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/1000001> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
154
|
+
FILTER langMatches( lang(?place_label_en), "en" )
|
155
|
+
}
|
156
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/1000001> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
|
157
|
+
FILTER langMatches( lang(?place_label_default), "" )
|
158
|
+
}
|
159
|
+
<http://vocab.getty.edu/tgn/1000001> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
160
|
+
} UNION
|
161
|
+
{<http://vocab.getty.edu/tgn/7012149> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
162
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/7012149> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
163
|
+
FILTER langMatches( lang(?place_label_en), "en" )
|
164
|
+
}
|
165
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/7012149> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
|
166
|
+
FILTER langMatches( lang(?place_label_default), "" )
|
167
|
+
}
|
168
|
+
<http://vocab.getty.edu/tgn/7012149> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
169
|
+
} UNION
|
170
|
+
{<http://vocab.getty.edu/tgn/7029392> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
171
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/7029392> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
172
|
+
FILTER langMatches( lang(?place_label_en), "en" )
|
173
|
+
}
|
174
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/7029392> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
|
175
|
+
FILTER langMatches( lang(?place_label_default), "" )
|
176
|
+
}
|
177
|
+
<http://vocab.getty.edu/tgn/7012149> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
178
|
+
} .
|
179
|
+
|
180
|
+
|
181
|
+
}
|
182
|
+
GROUP BY ?identifier_place ?place_label_default ?place_label_en ?aat_pref
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
=end
|
17
188
|
|
18
|
-
# retrieve data from Getty TGN to populate <mods:subject auth="tgn">
|
19
189
|
def self.get_tgn_data(tgn_id)
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
190
|
+
return nil if Bplgeo::TGN.tgn_enabled != true
|
191
|
+
|
192
|
+
tgn_main_term_info = {}
|
193
|
+
#broader_place_type_list = ["http://vocab.getty.edu/tgn/#{tgn_id}"]
|
194
|
+
broader_place_type_list = []
|
195
|
+
|
196
|
+
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"})
|
197
|
+
|
198
|
+
return nil if(primary_tgn_response.response_code == 404) #Couldn't find TGN... FIXME: additional check needed if TGN is down?
|
199
|
+
|
200
|
+
as_json_tgn_response = JSON.parse(primary_tgn_response.body)
|
201
|
+
|
202
|
+
as_json_tgn_response['results']['bindings'].each do |ntriple|
|
203
|
+
case ntriple['Predicate']['value']
|
204
|
+
when 'http://www.w3.org/2004/02/skos/core#prefLabel'
|
205
|
+
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
206
|
+
tgn_main_term_info[:label_en] = ntriple['Object']['value']
|
207
|
+
elsif ntriple['Object']['xml:lang'].blank?
|
208
|
+
tgn_main_term_info[:label_default] = ntriple['Object']['value']
|
209
|
+
end
|
210
|
+
when 'http://vocab.getty.edu/ontology#placeTypePreferred'
|
211
|
+
tgn_main_term_info[:aat_place] = ntriple['Object']['value']
|
212
|
+
when 'http://schema.org/latitude'
|
213
|
+
tgn_main_term_info[:latitude] = ntriple['Object']['value']
|
214
|
+
when 'http://schema.org/longitude'
|
215
|
+
tgn_main_term_info[:longitude] = ntriple['Object']['value']
|
216
|
+
when 'http://vocab.getty.edu/ontology#broaderPreferredExtended'
|
217
|
+
broader_place_type_list << ntriple['Object']['value']
|
33
218
|
end
|
34
219
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
220
|
+
end
|
221
|
+
|
222
|
+
# coordinates
|
223
|
+
coords = nil
|
224
|
+
if tgn_main_term_info[:latitude].present?
|
225
|
+
coords = {}
|
226
|
+
coords[:latitude] = tgn_main_term_info[:latitude]
|
227
|
+
coords[:longitude] = tgn_main_term_info[:longitude]
|
228
|
+
coords[:combined] = tgn_main_term_info[:latitude] + ',' + tgn_main_term_info[:longitude]
|
229
|
+
end
|
230
|
+
|
231
|
+
hier_geo = {}
|
232
|
+
tgn_term = tgn_main_term_info[:label_en].present? ? tgn_main_term_info[:label_en] : tgn_main_term_info[:label_default]
|
233
|
+
tgn_term_type = tgn_main_term_info[:aat_place].split('/').last
|
234
|
+
|
235
|
+
#Initial Term
|
236
|
+
if tgn_term.present? && tgn_term_type.present?
|
237
|
+
case tgn_term_type
|
238
|
+
when '300128176' #continent
|
239
|
+
hier_geo[:continent] = tgn_term
|
240
|
+
when '300128207' #nations
|
241
|
+
hier_geo[:country] = tgn_term
|
242
|
+
when '300000774' #province
|
243
|
+
hier_geo[:province] = tgn_term
|
244
|
+
when '300236112', '300182722', '300387194', '300387052' #region, union, semi-independent political entity
|
245
|
+
hier_geo[:region] = tgn_term
|
246
|
+
when '300000776', '300000772', '300235093' #state, department, governorate
|
247
|
+
hier_geo[:state] = tgn_term
|
248
|
+
when '300387081' #national district
|
249
|
+
if tgn_term == 'District of Columbia'
|
250
|
+
hier_geo[:state] = tgn_term
|
251
|
+
else
|
252
|
+
hier_geo[:territory] = tgn_term
|
56
253
|
end
|
57
|
-
|
58
|
-
|
59
|
-
|
254
|
+
when '300135982', '300387176', '300387122' #territory, dependent state, union territory
|
255
|
+
hier_geo[:territory] = tgn_term
|
256
|
+
when '300000771' #county
|
257
|
+
hier_geo[:county] = tgn_term
|
258
|
+
when '300008347' #inhabited place
|
259
|
+
hier_geo[:city] = tgn_term
|
260
|
+
when '300000745' #neighborhood
|
261
|
+
hier_geo[:city_section] = tgn_term
|
262
|
+
when '300008791', '300387062' #island
|
263
|
+
hier_geo[:island] = tgn_term
|
264
|
+
when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
265
|
+
hier_geo[:area] = tgn_term
|
266
|
+
else
|
267
|
+
non_hier_geo = tgn_term
|
268
|
+
end
|
269
|
+
|
270
|
+
#Broader places
|
271
|
+
|
272
|
+
query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref WHERE {"
|
273
|
+
|
274
|
+
broader_place_type_list.each do |place_uri|
|
275
|
+
query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
276
|
+
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
277
|
+
FILTER langMatches( lang(?place_label_en), "en" )
|
278
|
+
}
|
279
|
+
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
|
280
|
+
FILTER langMatches( lang(?place_label_default), "" )
|
281
|
+
}
|
282
|
+
<#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
283
|
+
} UNION
|
284
|
+
}
|
60
285
|
end
|
61
|
-
|
286
|
+
|
287
|
+
query = query[0..-12]
|
288
|
+
query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?aat_pref"
|
289
|
+
|
290
|
+
tgn_response_for_aat = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
|
291
|
+
as_json_tgn_response_for_aat = JSON.parse(tgn_response_for_aat.body)
|
292
|
+
|
293
|
+
as_json_tgn_response_for_aat["results"]["bindings"].each do |aat_response|
|
294
|
+
tgn_term_type = aat_response['aat_pref']['value'].split('/').last
|
295
|
+
if aat_response['place_label_en'].present? && aat_response['place_label_en']['value'] != '-'
|
296
|
+
tgn_term = aat_response['place_label_en']['value']
|
297
|
+
else
|
298
|
+
tgn_term = aat_response['place_label_default']['value']
|
299
|
+
end
|
300
|
+
|
62
301
|
case tgn_term_type
|
63
|
-
when '
|
302
|
+
when '300128176' #continent
|
64
303
|
hier_geo[:continent] = tgn_term
|
65
|
-
when '
|
304
|
+
when '300128207' #nation
|
66
305
|
hier_geo[:country] = tgn_term
|
67
|
-
when '
|
306
|
+
when '300000774' #province
|
68
307
|
hier_geo[:province] = tgn_term
|
69
|
-
when '
|
308
|
+
when '300236112', '300182722', '300387194', '300387052' #region, union, semi-independent political entity
|
70
309
|
hier_geo[:region] = tgn_term
|
71
|
-
when '
|
310
|
+
when '300000776', '300000772', '300235093' #state, department, governorate
|
72
311
|
hier_geo[:state] = tgn_term
|
73
|
-
when '
|
312
|
+
when '300387081' #national district
|
74
313
|
if tgn_term == 'District of Columbia'
|
75
314
|
hier_geo[:state] = tgn_term
|
76
315
|
else
|
77
316
|
hier_geo[:territory] = tgn_term
|
78
317
|
end
|
79
|
-
when '
|
318
|
+
when '300135982', '300387176', '300387122' #territory, dependent state, union territory
|
80
319
|
hier_geo[:territory] = tgn_term
|
81
|
-
when '
|
320
|
+
when '300000771' #county
|
82
321
|
hier_geo[:county] = tgn_term
|
83
|
-
when '
|
322
|
+
when '300008347' #inhabited place
|
84
323
|
hier_geo[:city] = tgn_term
|
85
|
-
when '
|
324
|
+
when '300000745' #neighborhood
|
86
325
|
hier_geo[:city_section] = tgn_term
|
87
|
-
when '
|
326
|
+
when '300008791', '300387062' #island
|
88
327
|
hier_geo[:island] = tgn_term
|
89
|
-
when '81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
328
|
+
when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
90
329
|
hier_geo[:area] = tgn_term
|
91
|
-
else
|
92
|
-
non_hier_geo = tgn_term
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
# parent data for <mods:hierarchicalGeographic>
|
97
|
-
if tgnrec.at_xpath("//Parent_String")
|
98
|
-
parents = tgnrec.at_xpath("//Parent_String").children.to_s.split('], ')
|
99
|
-
parents.each do |parent|
|
100
|
-
if parent.include? '(continent)'
|
101
|
-
hier_geo[:continent] = parent
|
102
|
-
elsif parent.include? '(nation)'
|
103
|
-
hier_geo[:country] = parent
|
104
|
-
elsif parent.include? '(province)'
|
105
|
-
hier_geo[:province] = parent
|
106
|
-
elsif (parent.include? '(region)') || (parent.include? '(union)') || (parent.include? '(semi-independent political entity)')
|
107
|
-
hier_geo[:region] = parent
|
108
|
-
elsif (parent.include? '(state)') || (parent.include? '(department)') || (parent.include? '(governorate)') || (parent.include?('(national district)') && parent.include?('District of Columbia'))
|
109
|
-
hier_geo[:state] = parent
|
110
|
-
elsif (parent.include? '(territory)') || (parent.include? '(dependent state)') || (parent.include? '(union territory)') || (parent.include? '(national district)')
|
111
|
-
hier_geo[:territory] = parent
|
112
|
-
elsif parent.include? '(county)'
|
113
|
-
hier_geo[:county] = parent
|
114
|
-
elsif parent.include? '(inhabited place)'
|
115
|
-
hier_geo[:city] = parent
|
116
|
-
elsif parent.include? '(neighborhood)'
|
117
|
-
hier_geo[:city_section] = parent
|
118
|
-
elsif parent.include? '(island)'
|
119
|
-
hier_geo[:island] = parent
|
120
|
-
elsif (parent.include? '(area)') || (parent.include? '(general region)') || (parent.include? '(deserted settlement)') || (parent.include? '(historical region)') || (parent.include? '(national division)')
|
121
|
-
hier_geo[:area] = parent
|
122
|
-
end
|
123
|
-
end
|
124
|
-
hier_geo.each do |k,v|
|
125
|
-
hier_geo[k] = v.gsub(/ \(.*/,'')
|
126
330
|
end
|
127
331
|
end
|
128
332
|
|
@@ -141,8 +345,9 @@ module Bplgeo
|
|
141
345
|
|
142
346
|
end
|
143
347
|
|
348
|
+
|
144
349
|
def self.tgn_id_from_geo_hash(geo_hash)
|
145
|
-
return nil if Bplgeo::TGN.
|
350
|
+
return nil if Bplgeo::TGN.tgn_enabled != true
|
146
351
|
|
147
352
|
geo_hash = geo_hash.clone
|
148
353
|
|
@@ -166,139 +371,202 @@ module Bplgeo
|
|
166
371
|
|
167
372
|
neighborhood_part = geo_hash[:neighborhood_part]
|
168
373
|
|
169
|
-
|
170
|
-
match_term = nil
|
374
|
+
|
171
375
|
|
172
376
|
if city_part.blank? && state_part.blank?
|
173
377
|
# Limit to nations
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
378
|
+
query = %{SELECT ?object_identifier
|
379
|
+
WHERE
|
380
|
+
{
|
381
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
382
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
383
|
+
?x <http://www.w3.org/2004/02/skos/core#prefLabel> ?object_label .
|
384
|
+
FILTER regex(?object_label, "^#{country_part}$", "i" )
|
385
|
+
}}
|
178
386
|
elsif state_part.present? && city_part.blank? && country_code == 7012149
|
179
387
|
#Limit to states
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
388
|
+
query = %{SELECT ?object_identifier
|
389
|
+
WHERE
|
390
|
+
{
|
391
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
392
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776> .
|
393
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
394
|
+
FILTER regex(?object_label, "^#{state_part}$", "i" )
|
395
|
+
|
396
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/7012149> .
|
397
|
+
}}
|
184
398
|
elsif state_part.present? && city_part.blank?
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
399
|
+
#Limit to regions
|
400
|
+
|
401
|
+
query = %{SELECT ?object_identifier
|
402
|
+
WHERE
|
403
|
+
{
|
404
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
405
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
|
406
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
|
407
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
|
408
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
|
409
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
|
410
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
411
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
412
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
413
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
414
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
415
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
416
|
+
FILTER regex(?object_label, "^#{state_part}$", "i" )
|
417
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
|
418
|
+
{
|
419
|
+
SELECT ?parent_country ?identifier_country
|
420
|
+
WHERE {
|
421
|
+
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
422
|
+
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
423
|
+
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
424
|
+
FILTER regex(?country_label, "^#{country_part}$", "i" )
|
425
|
+
}
|
426
|
+
|
427
|
+
}
|
428
|
+
}
|
429
|
+
GROUP BY ?object_identifier
|
430
|
+
}
|
431
|
+
|
432
|
+
#FIXME Temporary: For Bplgeo.parse('Aknīste (Latvia)', true), seems to be a neighborhood placed in state
|
433
|
+
# {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
190
434
|
elsif state_part.present? && city_part.present? && neighborhood_part.blank?
|
191
435
|
#Limited to only inhabited places at the moment...
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
436
|
+
query = %{SELECT ?object_identifier
|
437
|
+
WHERE
|
438
|
+
{
|
439
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
440
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
441
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
442
|
+
FILTER regex(?object_label, "^#{city_part}$", "i" )
|
443
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
|
444
|
+
{
|
445
|
+
SELECT ?parent_country ?identifier_country
|
446
|
+
WHERE {
|
447
|
+
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
448
|
+
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
449
|
+
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
450
|
+
FILTER regex(?country_label, "^#{country_part}$", "i" )
|
451
|
+
}
|
452
|
+
|
453
|
+
}
|
454
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_state .
|
455
|
+
{
|
456
|
+
SELECT ?parent_state ?identifier_state
|
457
|
+
WHERE {
|
458
|
+
?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
|
459
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
|
460
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
|
461
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
|
462
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
|
463
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
|
464
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
465
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
466
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
467
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
468
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
469
|
+
?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
|
470
|
+
FILTER regex(?state_label, "^#{state_part}$", "i" )
|
471
|
+
}
|
472
|
+
|
473
|
+
}
|
474
|
+
|
475
|
+
}
|
476
|
+
GROUP BY ?object_identifier
|
477
|
+
}
|
478
|
+
|
479
|
+
|
197
480
|
elsif state_part.present? && city_part.present? && neighborhood_part.present?
|
198
481
|
#Limited to only to neighborhoods currently...
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
482
|
+
query = %{SELECT ?object_identifier
|
483
|
+
WHERE
|
484
|
+
{
|
485
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
486
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000745> .
|
487
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
488
|
+
FILTER regex(?object_label, "^#{neighborhood_part}$", "i" )
|
489
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
|
490
|
+
{
|
491
|
+
SELECT ?parent_country ?identifier_country
|
492
|
+
WHERE {
|
493
|
+
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
494
|
+
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
495
|
+
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
496
|
+
FILTER regex(?country_label, "^#{country_part}$", "i" )
|
497
|
+
}
|
498
|
+
|
499
|
+
}
|
500
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_state .
|
501
|
+
{
|
502
|
+
SELECT ?parent_state ?identifier_state
|
503
|
+
WHERE {
|
504
|
+
?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
|
505
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
|
506
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
|
507
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
|
508
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
|
509
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
|
510
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
511
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
512
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
513
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
514
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
515
|
+
?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
|
516
|
+
FILTER regex(?state_label, "^#{state_part}$", "i" )
|
517
|
+
}
|
518
|
+
|
519
|
+
}
|
520
|
+
|
521
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_city .
|
522
|
+
{
|
523
|
+
SELECT ?parent_city ?identifier_city
|
524
|
+
WHERE {
|
525
|
+
?parent_city <http://purl.org/dc/elements/1.1/identifier> ?identifier_city .
|
526
|
+
?parent_city <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
527
|
+
?parent_city <http://www.w3.org/2000/01/rdf-schema#label> ?city_label .
|
528
|
+
FILTER regex(?city_label, "^#{city_part}$", "i" )
|
529
|
+
}
|
530
|
+
|
531
|
+
}
|
532
|
+
|
533
|
+
}
|
534
|
+
GROUP BY ?object_identifier
|
535
|
+
}
|
536
|
+
|
537
|
+
|
204
538
|
else
|
205
539
|
return nil
|
206
540
|
end
|
207
541
|
|
208
542
|
begin
|
543
|
+
|
209
544
|
if retry_count > 0
|
210
545
|
sleep(sleep_time)
|
211
546
|
end
|
212
547
|
retry_count = retry_count + 1
|
213
548
|
|
214
|
-
|
215
|
-
|
549
|
+
tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
|
216
550
|
|
217
551
|
end until (tgn_response.code != 500 || retry_count == max_retry)
|
218
552
|
|
219
|
-
unless tgn_response.code == 500
|
220
|
-
parsed_xml = Nokogiri::Slop(tgn_response.body)
|
221
553
|
|
222
|
-
#This is ugly and needs to be redone to achieve better recursive...
|
223
|
-
if parsed_xml.Vocabulary.Count.text == '0'
|
224
|
-
if neighborhood_part.present?
|
225
|
-
geo_hash[:neighborhood_part] = nil
|
226
|
-
geo_hash = tgn_id_from_geo_hash(geo_hash)
|
227
|
-
elsif city_part.present?
|
228
|
-
geo_hash[:city_part] = nil
|
229
|
-
geo_hash = tgn_id_from_geo_hash(geo_hash)
|
230
|
-
end
|
231
|
-
|
232
|
-
return nil
|
233
|
-
end
|
234
|
-
|
235
|
-
#If only one result, then not array. Otherwise array....
|
236
|
-
if parsed_xml.Vocabulary.Subject.first.blank?
|
237
|
-
subject = parsed_xml.Vocabulary.Subject
|
238
|
-
|
239
|
-
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
240
|
-
alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
|
241
|
-
|
242
|
-
#FIXME: Term should check for the correct level... temporary fix...
|
243
|
-
if current_term == match_term && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
244
|
-
return_hash[:id] = subject.Subject_ID.text
|
245
|
-
#Check alternative term ids
|
246
|
-
elsif alternative_terms.present? && alternative_terms.children.any? { |alt_term| alt_term.text.to_ascii.downcase.strip == match_term} && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
247
|
-
return_hash[:id] = subject.Subject_ID.text
|
248
|
-
elsif current_term == match_term && second_top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
249
|
-
return_hash[:id] = subject.Subject_ID.text
|
250
|
-
elsif alternative_terms.present? && alternative_terms.children.any? { |alt_term| alt_term.text.to_ascii.downcase.strip == match_term} && second_top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
251
|
-
return_hash[:id] = subject.Subject_ID.text
|
252
|
-
end
|
253
|
-
else
|
254
|
-
parsed_xml.Vocabulary.Subject.each do |subject|
|
255
|
-
|
256
|
-
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
257
|
-
alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
|
258
|
-
|
259
|
-
if current_term == match_term && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
260
|
-
return_hash[:id] = subject.Subject_ID.text
|
261
|
-
end
|
262
|
-
end
|
263
|
-
|
264
|
-
if return_hash[:id].blank?
|
265
|
-
parsed_xml.Vocabulary.Subject.each do |subject|
|
266
|
-
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
267
|
-
alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
|
268
|
-
|
269
|
-
if alternative_terms.present? && alternative_terms.children.any? { |alt_term| alt_term.text.to_ascii.downcase.strip == match_term} && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
270
|
-
return_hash[:id] = subject.Subject_ID.text
|
271
|
-
end
|
272
|
-
end
|
273
|
-
end
|
274
554
|
|
275
|
-
if return_hash[:id].blank?
|
276
|
-
parsed_xml.Vocabulary.Subject.each do |subject|
|
277
|
-
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
278
|
-
alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
|
279
555
|
|
280
|
-
|
281
|
-
|
282
|
-
end
|
283
|
-
end
|
284
|
-
end
|
285
|
-
|
286
|
-
if return_hash[:id].blank?
|
287
|
-
parsed_xml.Vocabulary.Subject.each do |subject|
|
288
|
-
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
289
|
-
alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
|
556
|
+
unless tgn_response.code == 500
|
557
|
+
as_json = JSON.parse(tgn_response.body)
|
290
558
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
559
|
+
#This is ugly and needs to be redone to achieve better recursive...
|
560
|
+
if as_json["results"]["bindings"].present? && as_json["results"]["bindings"].first["object_identifier"].present?
|
561
|
+
return_hash[:id] = as_json["results"]["bindings"].first["object_identifier"]["value"]
|
562
|
+
return_hash[:rdf] = "http://vocab.getty.edu/tgn/#{return_hash[:id]}.rdf"
|
563
|
+
else
|
564
|
+
return nil
|
296
565
|
end
|
297
|
-
|
298
566
|
end
|
299
567
|
|
300
568
|
if tgn_response.code == 500
|
301
|
-
raise 'TGN Server appears to not be responding for Geographic query: ' +
|
569
|
+
raise 'TGN Server appears to not be responding for Geographic query: ' + query
|
302
570
|
end
|
303
571
|
|
304
572
|
if return_hash.present?
|