bplgeo 0.0.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bplgeo.rb +26 -1
- data/lib/bplgeo/geonames.rb +31 -32
- data/lib/bplgeo/parser.rb +5 -5
- data/lib/bplgeo/standardizer.rb +37 -3
- data/lib/bplgeo/tgn.rb +461 -193
- data/lib/bplgeo/version.rb +1 -1
- data/test/bplgeo_test.rb +64 -21
- data/test/dummy/config/bplgeo.yml +12 -15
- data/test/dummy/config/bplgeo.yml.sample +6 -6
- data/test/dummy/log/development.log +23 -27
- data/test/dummy/log/test.log +608 -0
- data/test/tgn_test.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13b58c2286e536e0c23e08c3a8cfed3c586e18f2
|
4
|
+
data.tar.gz: 060706f023e34218322856b763c0a7e644b3ac2f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 750cd801123b74916afb4626e0d948efb94940e9ee67fe5bfe7edc9609a9a279fad53b73a05ffc3ff7495416fb6e8de99a6ce835692746e45fa49f5f6c1ab40a
|
7
|
+
data.tar.gz: 6b55b9b1b838e2adedbde4f0c029d1e6c31af4b7685d4187344fdac87b420323d5c6831cf911fca60263dcc27893ab7a57406a30bc6da1dc75d02b1b83c4622c
|
data/lib/bplgeo.rb
CHANGED
@@ -25,8 +25,33 @@ module Bplgeo
|
|
25
25
|
return_hash = Bplgeo::Parser.parse_google_api(term, parse_term)
|
26
26
|
end
|
27
27
|
|
28
|
-
if return_hash.present?
|
28
|
+
if return_hash[:country_part].present?
|
29
|
+
#FIXME
|
29
30
|
return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(return_hash)
|
31
|
+
|
32
|
+
if return_hash[:tgn].blank?
|
33
|
+
geo_hash_temp = Bplgeo::Standardizer.try_with_entered_names(return_hash)
|
34
|
+
return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(geo_hash_temp) if geo_hash_temp.present?
|
35
|
+
|
36
|
+
if return_hash[:tgn].blank? && return_hash[:neighborhood_part].present?
|
37
|
+
|
38
|
+
geo_hash_temp = return_hash.clone
|
39
|
+
geo_hash_temp[:neighborhood_part] = nil
|
40
|
+
geo_hash_temp[:original_string_differs] = true
|
41
|
+
return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(geo_hash_temp)
|
42
|
+
return_hash[:tgn][:original_string_differs] = true if return_hash[:tgn].present?
|
43
|
+
elsif return_hash[:city_part].present? && return_hash[:tgn].blank?
|
44
|
+
|
45
|
+
geo_hash_temp = return_hash.clone
|
46
|
+
geo_hash_temp[:city_part] = nil
|
47
|
+
geo_hash_temp[:original_string_differs] = true
|
48
|
+
return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(geo_hash_temp)
|
49
|
+
return_hash[:tgn][:original_string_differs] = true if return_hash[:tgn].present?
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
30
55
|
return_hash[:geonames] = Bplgeo::Geonames.geonames_id_from_geo_hash(return_hash)
|
31
56
|
end
|
32
57
|
|
data/lib/bplgeo/geonames.rb
CHANGED
@@ -69,15 +69,19 @@ module Bplgeo
|
|
69
69
|
#Don't do both neighborhood and city!
|
70
70
|
if geo_hash[:neighborhood_part].present?
|
71
71
|
geonames_search_array << geo_hash[:neighborhood_part]
|
72
|
+
exact_name_term = geo_hash[:neighborhood_part]
|
72
73
|
elsif geo_hash[:city_part].present?
|
73
74
|
geonames_search_array << geo_hash[:city_part]
|
75
|
+
exact_name_term = geo_hash[:neighborhood_part]
|
74
76
|
end
|
75
77
|
|
76
78
|
geonames_search_array << geo_hash[:state_part] if geo_hash[:state_part].present?
|
79
|
+
exact_name_term ||= geo_hash[:neighborhood_part]
|
77
80
|
geonames_search_array << geo_hash[:country_part] if geo_hash[:country_part].present?
|
81
|
+
exact_name_term ||= geo_hash[:country_part]
|
78
82
|
geonames_search_string = geonames_search_array.join(', ')
|
79
83
|
|
80
|
-
|
84
|
+
exact_name_term = geonames_search_array.first.strip
|
81
85
|
|
82
86
|
begin
|
83
87
|
if retry_count > 0
|
@@ -85,7 +89,7 @@ module Bplgeo
|
|
85
89
|
end
|
86
90
|
retry_count = retry_count + 1
|
87
91
|
|
88
|
-
geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q
|
92
|
+
geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q=#{CGI.escape(geonames_search_string)}&name_equals=#{CGI.escape(exact_name_term)}&country=#{Country.find_country_by_name(geo_hash[:country_part]).alpha2}")
|
89
93
|
|
90
94
|
end until (geonames_response.code != 500 || retry_count == max_retry)
|
91
95
|
|
@@ -93,43 +97,38 @@ module Bplgeo
|
|
93
97
|
|
94
98
|
parsed_xml = Nokogiri::Slop(geonames_response.body)
|
95
99
|
|
100
|
+
begin
|
101
|
+
raise "geonames status error message of: #{parsed_xml.to_s}" if parsed_xml.geonames.status
|
102
|
+
rescue
|
103
|
+
#Do nothing but FIXME to not use slop
|
104
|
+
end
|
105
|
+
|
96
106
|
#This is ugly and needs to be redone to achieve better recursive...
|
97
107
|
if parsed_xml.geonames.totalResultsCount.text == '0'
|
98
|
-
if neighborhood_part.present?
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
108
|
+
if geo_hash[:neighborhood_part].present?
|
109
|
+
geo_hash_temp = geo_hash.clone
|
110
|
+
geo_hash_temp[:neighborhood_part] = nil
|
111
|
+
return_hash = geonames_id_from_geo_hash(geo_hash_temp)
|
112
|
+
return return_hash if return_hash.present?
|
113
|
+
elsif geo_hash[:city_part].present?
|
114
|
+
geo_hash_temp = geo_hash.clone
|
115
|
+
geo_hash_temp[:city_part] = nil
|
116
|
+
return_hash = geonames_id_from_geo_hash(geo_hash_temp)
|
117
|
+
return return_hash if return_hash.present?
|
104
118
|
end
|
105
119
|
|
106
|
-
return
|
120
|
+
return nil
|
107
121
|
end
|
108
122
|
|
109
|
-
#Exact Match
|
110
|
-
parsed_xml.geonames.geoname.
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
return_hash[:original_string_differs] = Bplgeo::Standardizer.parsed_and_original_check(geo_hash)
|
117
|
-
break
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
if return_hash.blank?
|
122
|
-
#Starts With
|
123
|
-
parsed_xml.geonames.geoname.each do |geoname|
|
124
|
-
|
125
|
-
current_term = geoname.toponymName.text.to_ascii.downcase.strip
|
126
|
-
|
127
|
-
if current_term.starts_with?(match_term) && return_hash.blank?
|
128
|
-
return_hash[:id] = geoname.geonameId.text
|
129
|
-
return_hash[:original_string_differs] = Bplgeo::Standardizer.parsed_and_original_check(geo_hash)
|
130
|
-
end
|
131
|
-
end
|
123
|
+
#Exact Match ... FIXME to not use Slop
|
124
|
+
if parsed_xml.geonames.geoname.class == Nokogiri::XML::Element
|
125
|
+
return_hash[:id] = parsed_xml.geonames.geoname.geonameId.text
|
126
|
+
return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
|
127
|
+
elsif parsed_xml.geonames.geoname.class ==Nokogiri::XML::NodeSet
|
128
|
+
return_hash[:id] = parsed_xml.geonames.geoname.first.geonameId.text
|
129
|
+
return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
|
132
130
|
end
|
131
|
+
return_hash[:original_string_differs] = Bplgeo::Standardizer.parsed_and_original_check(geo_hash)
|
133
132
|
|
134
133
|
end
|
135
134
|
|
data/lib/bplgeo/parser.rb
CHANGED
@@ -96,7 +96,7 @@ module Bplgeo
|
|
96
96
|
retry_count = 3
|
97
97
|
|
98
98
|
#Skip if no bing_key... possibly move this elsewhere?
|
99
|
-
return return_hash if self.
|
99
|
+
return return_hash if self.mapquest_key == '<mapquest_key>'
|
100
100
|
|
101
101
|
return_hash[:original_term] = term
|
102
102
|
|
@@ -199,10 +199,10 @@ module Bplgeo
|
|
199
199
|
if google_api_result.present?
|
200
200
|
#Types: street number, route, neighborhood, establishment, transit_station, bus_station
|
201
201
|
google_api_result.first.data["address_components"].each do |result|
|
202
|
-
if (result['types'] & ['street number', 'route', '
|
202
|
+
if (result['types'] & ['street number', 'route', 'establishment', 'transit_station', 'bus_station']).present? || (result['types'].include?('neighborhood') && !result['types'].include?('political'))
|
203
203
|
#return_hash[:term_differs_from_tgn] = true
|
204
204
|
#TODO: Not implemented for Google results right now.
|
205
|
-
return_hash[:street_part] = 'TODO: Not Implemented for Google Results'
|
205
|
+
#return_hash[:street_part] = 'TODO: Not Implemented for Google Results'
|
206
206
|
return_hash[:coords] = {:latitude=>google_api_result.first.data['geometry']['location']['lat'].to_s,
|
207
207
|
:longitude=>google_api_result.first.data['geometry']['location']['lng'].to_s,
|
208
208
|
:combined=>google_api_result.first.data['geometry']['location']['lat'].to_s + ',' + google_api_result.first.data['geometry']['location']['lng'].to_s}
|
@@ -212,8 +212,8 @@ module Bplgeo
|
|
212
212
|
return_hash[:state_part] = result['long_name'].to_ascii
|
213
213
|
elsif (result['types'] & ['locality']).present?
|
214
214
|
return_hash[:city_part] = result['long_name']
|
215
|
-
elsif (result['types'] & ['sublocality', 'political']).length == 2
|
216
|
-
|
215
|
+
elsif (result['types'] & ['sublocality', 'political']).length == 2 || result['types'].include?('neighborhood')
|
216
|
+
return_hash[:neighborhood_part] = result['long_name']
|
217
217
|
end
|
218
218
|
end
|
219
219
|
|
data/lib/bplgeo/standardizer.rb
CHANGED
@@ -12,6 +12,7 @@ module Bplgeo
|
|
12
12
|
|
13
13
|
state_abbr_list = ['Mass']
|
14
14
|
state_name_list = []
|
15
|
+
country_name_list = []
|
15
16
|
|
16
17
|
#Countries gem of https://github.com/hexorx/countries
|
17
18
|
Country.new('US').states.each do |state_abbr, state_names|
|
@@ -19,10 +20,14 @@ module Bplgeo
|
|
19
20
|
state_name_list << state_names["name"]
|
20
21
|
end
|
21
22
|
|
23
|
+
Country.all.each do |country_name_abbr_pair|
|
24
|
+
country_name_list << country_name_abbr_pair.first
|
25
|
+
end
|
26
|
+
|
22
27
|
#Parsing a subject geographic term.
|
23
28
|
if term.include?('--')
|
24
29
|
term.split('--').each_with_index do |split_term, index|
|
25
|
-
if state_name_list.any? { |state| split_term.include? state }
|
30
|
+
if state_name_list.any? { |state| split_term.include? state } || country_name_list.any? { |country| split_term.include? country }
|
26
31
|
geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
|
27
32
|
elsif state_abbr_list.any? { |abbr| split_term.include? abbr }
|
28
33
|
geo_term = split_term
|
@@ -32,13 +37,13 @@ module Bplgeo
|
|
32
37
|
#Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
|
33
38
|
elsif term.include?(' - ')
|
34
39
|
term.split(' - ').each do |split_term|
|
35
|
-
if state_name_list.any? { |state| split_term.include? state } || state_abbr_list.any? { |abbr| split_term.include? abbr }
|
40
|
+
if state_name_list.any? { |state| split_term.include? state } || state_abbr_list.any? { |abbr| split_term.include? abbr } || country_name_list.any? { |country| split_term.include? country }
|
36
41
|
geo_term = split_term
|
37
42
|
end
|
38
43
|
|
39
44
|
end
|
40
45
|
else
|
41
|
-
if state_name_list.any? { |state| term.include? state } || state_abbr_list.any? { |abbr| term.include? abbr }
|
46
|
+
if state_name_list.any? { |state| term.include? state } || state_abbr_list.any? { |abbr| term.include? abbr } || country_name_list.any? { |country| term.include? country }
|
42
47
|
geo_term = term
|
43
48
|
end
|
44
49
|
end
|
@@ -57,6 +62,9 @@ module Bplgeo
|
|
57
62
|
#Strip any leading periods or commas from junk terms
|
58
63
|
geo_term = geo_term.gsub(/^[\.,]+/, '').strip
|
59
64
|
|
65
|
+
#Replace any four TGN dashes from removing a junk term
|
66
|
+
geo_term = geo_term.gsub('----', '--')
|
67
|
+
|
60
68
|
#Replace any semicolons with commas... possible strip them?
|
61
69
|
geo_term = geo_term.gsub(';', ',')
|
62
70
|
|
@@ -211,5 +219,31 @@ module Bplgeo
|
|
211
219
|
def self.utf8Encode(value)
|
212
220
|
return HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
|
213
221
|
end
|
222
|
+
|
223
|
+
|
224
|
+
def self.try_with_entered_names(geo_hash)
|
225
|
+
geo_hash_local = geo_hash.clone
|
226
|
+
if geo_hash_local[:neighborhood_part].present?
|
227
|
+
orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:neighborhood_part].downcase.to_ascii}
|
228
|
+
geo_hash_local[:neighborhood_part] = orig_string_check.first.strip if orig_string_check.present? && orig_string_check != geo_hash_local[:neighborhood_part]
|
229
|
+
return geo_hash_local
|
230
|
+
end
|
231
|
+
|
232
|
+
if geo_hash_local[:city_part].present?
|
233
|
+
orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:city_part].downcase.to_ascii}
|
234
|
+
geo_hash_local[:city_part] = orig_string_check.first.strip if orig_string_check.present?
|
235
|
+
return geo_hash_local
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
if geo_hash_local[:state_part].present?
|
240
|
+
orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:state_part].downcase.to_ascii}
|
241
|
+
geo_hash_local[:state_part] = orig_string_check.first.strip if orig_string_check.present?
|
242
|
+
return geo_hash_local
|
243
|
+
end
|
244
|
+
|
245
|
+
return nil
|
246
|
+
end
|
247
|
+
|
214
248
|
end
|
215
249
|
end
|
data/lib/bplgeo/tgn.rb
CHANGED
@@ -7,122 +7,326 @@ module Bplgeo
|
|
7
7
|
@bplgeo_config ||= YAML::load(ERB.new(IO.read(File.join(root, 'config', 'bplgeo.yml'))).result)[env].with_indifferent_access
|
8
8
|
end
|
9
9
|
|
10
|
-
def self.
|
11
|
-
bplgeo_config[:
|
10
|
+
def self.tgn_enabled
|
11
|
+
bplgeo_config[:tgn_enabled] || true
|
12
12
|
end
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
=begin
|
15
|
+
81010/nation
|
16
|
+
81175/state
|
17
|
+
81165/region
|
18
|
+
84251/neighborhood
|
19
|
+
83002/inhabited place
|
20
|
+
|
21
|
+
nations
|
22
|
+
<http://vocab.getty.edu/tgn/7012149> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207>
|
23
|
+
|
24
|
+
States (political divisions):
|
25
|
+
<http://vocab.getty.edu/tgn/7007517> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776> .
|
26
|
+
|
27
|
+
Counties: (Suffolk - http://vocab.getty.edu/aat/300000771)
|
28
|
+
<http://vocab.getty.edu/tgn/1002923> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000771> .
|
29
|
+
|
30
|
+
Neighborhood: (Boston)
|
31
|
+
<http://vocab.getty.edu/tgn/7013445> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
32
|
+
|
33
|
+
|
34
|
+
Provinces:
|
35
|
+
http://vocab.getty.edu/aat/300000774
|
36
|
+
|
37
|
+
Departments:
|
38
|
+
http://vocab.getty.edu/aat/300000772
|
39
|
+
|
40
|
+
Governates:
|
41
|
+
http://vocab.getty.edu/aat/300235093
|
42
|
+
|
43
|
+
Territories:
|
44
|
+
http://vocab.getty.edu/aat/300135982
|
45
|
+
|
46
|
+
+ http://vocab.getty.edu/resource/getty/search?q=territory&luceneIndex=Brief&indexDataset=AAT&_form=%2Fresource%2Fgetty%2Fsearch
|
47
|
+
|
48
|
+
dependent state:
|
49
|
+
http://vocab.getty.edu/aat/300387176
|
50
|
+
|
51
|
+
|
52
|
+
union territory:
|
53
|
+
http://vocab.getty.edu/aat/300387122
|
54
|
+
|
55
|
+
national district:
|
56
|
+
http://vocab.getty.edu/aat/300387081
|
57
|
+
|
58
|
+
|
59
|
+
Roxbury:
|
60
|
+
http://vocab.getty.edu/tgn/7015002.json
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
#South Carolina - http://vocab.getty.edu/tgn/7007712
|
65
|
+
|
66
|
+
SELECT ?object_identifier
|
67
|
+
WHERE
|
68
|
+
{
|
69
|
+
?x <http://purl.org/dc/elements/1.1/identifier> 7007712 .
|
70
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
|
71
|
+
{
|
72
|
+
SELECT ?parent_country ?identifier_country ?aat_place_id
|
73
|
+
WHERE {
|
74
|
+
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
75
|
+
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_place_id .
|
76
|
+
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
77
|
+
}
|
78
|
+
GROUP BY ?parent_country
|
79
|
+
}
|
80
|
+
}
|
81
|
+
GROUP BY ?object_identifier
|
82
|
+
|
83
|
+
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/tgn/#{tgn_id}.json")
|
84
|
+
|
85
|
+
|
86
|
+
when 'http://vocab.getty.edu/ontology#placeTypePreferred'
|
87
|
+
place_type_base[:aat_id] = ntriple['Object']['value']
|
88
|
+
when 'http://www.w3.org/2004/02/skos/core#prefLabel'
|
89
|
+
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
90
|
+
place_type_base[:label_en] = ntriple['Object']['value']
|
91
|
+
else if ntriple['Object']['xml:lang'].blank?
|
92
|
+
place_type_base[:label_default] = ntriple['Object']['value']
|
93
|
+
|
94
|
+
|
95
|
+
tgn_main_term_info = {}
|
96
|
+
broader_place_type_list = ["http://vocab.getty.edu/tgn/"#{tgn_id}]
|
97
|
+
|
98
|
+
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"})
|
99
|
+
as_json_tgn_response = JSON.parse(primary_tgn_response.body)
|
100
|
+
|
101
|
+
as_json_tgn_response['results']['bindings'].each do |ntriple|
|
102
|
+
case ntriple['Predicate']['value']
|
103
|
+
when 'http://www.w3.org/2004/02/skos/core#prefLabel'
|
104
|
+
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
105
|
+
tgn_main_term_info[:label_en] = ntriple['Object']['value']
|
106
|
+
elsif ntriple['Object']['xml:lang'].blank?
|
107
|
+
tgn_main_term_info[:label_default] = ntriple['Object']['value']
|
16
108
|
end
|
109
|
+
when 'http://vocab.getty.edu/ontology#placeTypePreferred'
|
110
|
+
tgn_main_term_info[:aat_place] = ntriple['Object']['value']
|
111
|
+
when 'http://vocab.getty.edu/ontology#broaderPreferredExtended'
|
112
|
+
broader_place_type_list << ntriple['Object']['value']
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref WHERE {"
|
118
|
+
|
119
|
+
broader_place_type_list.each do |place_uri|
|
120
|
+
query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
121
|
+
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
122
|
+
FILTER langMatches( lang(?place_label_en), "en" )
|
123
|
+
}
|
124
|
+
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
|
125
|
+
FILTER langMatches( lang(?place_label_default), "" )
|
126
|
+
}
|
127
|
+
<#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
128
|
+
} UNION
|
129
|
+
}
|
130
|
+
end
|
131
|
+
|
132
|
+
query = query[0..-12]
|
133
|
+
query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?aat_pref"
|
134
|
+
|
135
|
+
tgn_response_for_aat = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
|
136
|
+
as_json_tgn_response_for_aat = JSON.parse(tgn_response_for_aat.body)
|
137
|
+
|
138
|
+
as_json_tgn_response_for_aat["results"]["bindings"].each do |aat_response|
|
139
|
+
#aat_response['identifier_place']['value']
|
140
|
+
#aat_response['place_label_default']['value']
|
141
|
+
#....
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
EXAMPLE SPARQL:
|
149
|
+
|
150
|
+
SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref
|
151
|
+
WHERE {
|
152
|
+
{<http://vocab.getty.edu/tgn/1000001> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
153
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/1000001> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
154
|
+
FILTER langMatches( lang(?place_label_en), "en" )
|
155
|
+
}
|
156
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/1000001> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
|
157
|
+
FILTER langMatches( lang(?place_label_default), "" )
|
158
|
+
}
|
159
|
+
<http://vocab.getty.edu/tgn/1000001> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
160
|
+
} UNION
|
161
|
+
{<http://vocab.getty.edu/tgn/7012149> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
162
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/7012149> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
163
|
+
FILTER langMatches( lang(?place_label_en), "en" )
|
164
|
+
}
|
165
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/7012149> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
|
166
|
+
FILTER langMatches( lang(?place_label_default), "" )
|
167
|
+
}
|
168
|
+
<http://vocab.getty.edu/tgn/7012149> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
169
|
+
} UNION
|
170
|
+
{<http://vocab.getty.edu/tgn/7029392> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
171
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/7029392> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
172
|
+
FILTER langMatches( lang(?place_label_en), "en" )
|
173
|
+
}
|
174
|
+
OPTIONAL {<http://vocab.getty.edu/tgn/7029392> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
|
175
|
+
FILTER langMatches( lang(?place_label_default), "" )
|
176
|
+
}
|
177
|
+
<http://vocab.getty.edu/tgn/7012149> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
178
|
+
} .
|
179
|
+
|
180
|
+
|
181
|
+
}
|
182
|
+
GROUP BY ?identifier_place ?place_label_default ?place_label_en ?aat_pref
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
=end
|
17
188
|
|
18
|
-
# retrieve data from Getty TGN to populate <mods:subject auth="tgn">
|
19
189
|
def self.get_tgn_data(tgn_id)
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
190
|
+
return nil if Bplgeo::TGN.tgn_enabled != true
|
191
|
+
|
192
|
+
tgn_main_term_info = {}
|
193
|
+
#broader_place_type_list = ["http://vocab.getty.edu/tgn/#{tgn_id}"]
|
194
|
+
broader_place_type_list = []
|
195
|
+
|
196
|
+
primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"})
|
197
|
+
|
198
|
+
return nil if(primary_tgn_response.response_code == 404) #Couldn't find TGN... FIXME: additional check needed if TGN is down?
|
199
|
+
|
200
|
+
as_json_tgn_response = JSON.parse(primary_tgn_response.body)
|
201
|
+
|
202
|
+
as_json_tgn_response['results']['bindings'].each do |ntriple|
|
203
|
+
case ntriple['Predicate']['value']
|
204
|
+
when 'http://www.w3.org/2004/02/skos/core#prefLabel'
|
205
|
+
if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
|
206
|
+
tgn_main_term_info[:label_en] = ntriple['Object']['value']
|
207
|
+
elsif ntriple['Object']['xml:lang'].blank?
|
208
|
+
tgn_main_term_info[:label_default] = ntriple['Object']['value']
|
209
|
+
end
|
210
|
+
when 'http://vocab.getty.edu/ontology#placeTypePreferred'
|
211
|
+
tgn_main_term_info[:aat_place] = ntriple['Object']['value']
|
212
|
+
when 'http://schema.org/latitude'
|
213
|
+
tgn_main_term_info[:latitude] = ntriple['Object']['value']
|
214
|
+
when 'http://schema.org/longitude'
|
215
|
+
tgn_main_term_info[:longitude] = ntriple['Object']['value']
|
216
|
+
when 'http://vocab.getty.edu/ontology#broaderPreferredExtended'
|
217
|
+
broader_place_type_list << ntriple['Object']['value']
|
33
218
|
end
|
34
219
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
220
|
+
end
|
221
|
+
|
222
|
+
# coordinates
|
223
|
+
coords = nil
|
224
|
+
if tgn_main_term_info[:latitude].present?
|
225
|
+
coords = {}
|
226
|
+
coords[:latitude] = tgn_main_term_info[:latitude]
|
227
|
+
coords[:longitude] = tgn_main_term_info[:longitude]
|
228
|
+
coords[:combined] = tgn_main_term_info[:latitude] + ',' + tgn_main_term_info[:longitude]
|
229
|
+
end
|
230
|
+
|
231
|
+
hier_geo = {}
|
232
|
+
tgn_term = tgn_main_term_info[:label_en].present? ? tgn_main_term_info[:label_en] : tgn_main_term_info[:label_default]
|
233
|
+
tgn_term_type = tgn_main_term_info[:aat_place].split('/').last
|
234
|
+
|
235
|
+
#Initial Term
|
236
|
+
if tgn_term.present? && tgn_term_type.present?
|
237
|
+
case tgn_term_type
|
238
|
+
when '300128176' #continent
|
239
|
+
hier_geo[:continent] = tgn_term
|
240
|
+
when '300128207' #nations
|
241
|
+
hier_geo[:country] = tgn_term
|
242
|
+
when '300000774' #province
|
243
|
+
hier_geo[:province] = tgn_term
|
244
|
+
when '300236112', '300182722', '300387194', '300387052' #region, union, semi-independent political entity
|
245
|
+
hier_geo[:region] = tgn_term
|
246
|
+
when '300000776', '300000772', '300235093' #state, department, governorate
|
247
|
+
hier_geo[:state] = tgn_term
|
248
|
+
when '300387081' #national district
|
249
|
+
if tgn_term == 'District of Columbia'
|
250
|
+
hier_geo[:state] = tgn_term
|
251
|
+
else
|
252
|
+
hier_geo[:territory] = tgn_term
|
56
253
|
end
|
57
|
-
|
58
|
-
|
59
|
-
|
254
|
+
when '300135982', '300387176', '300387122' #territory, dependent state, union territory
|
255
|
+
hier_geo[:territory] = tgn_term
|
256
|
+
when '300000771' #county
|
257
|
+
hier_geo[:county] = tgn_term
|
258
|
+
when '300008347' #inhabited place
|
259
|
+
hier_geo[:city] = tgn_term
|
260
|
+
when '300000745' #neighborhood
|
261
|
+
hier_geo[:city_section] = tgn_term
|
262
|
+
when '300008791', '300387062' #island
|
263
|
+
hier_geo[:island] = tgn_term
|
264
|
+
when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
265
|
+
hier_geo[:area] = tgn_term
|
266
|
+
else
|
267
|
+
non_hier_geo = tgn_term
|
268
|
+
end
|
269
|
+
|
270
|
+
#Broader places
|
271
|
+
|
272
|
+
query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref WHERE {"
|
273
|
+
|
274
|
+
broader_place_type_list.each do |place_uri|
|
275
|
+
query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
|
276
|
+
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
|
277
|
+
FILTER langMatches( lang(?place_label_en), "en" )
|
278
|
+
}
|
279
|
+
OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
|
280
|
+
FILTER langMatches( lang(?place_label_default), "" )
|
281
|
+
}
|
282
|
+
<#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
|
283
|
+
} UNION
|
284
|
+
}
|
60
285
|
end
|
61
|
-
|
286
|
+
|
287
|
+
query = query[0..-12]
|
288
|
+
query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?aat_pref"
|
289
|
+
|
290
|
+
tgn_response_for_aat = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
|
291
|
+
as_json_tgn_response_for_aat = JSON.parse(tgn_response_for_aat.body)
|
292
|
+
|
293
|
+
as_json_tgn_response_for_aat["results"]["bindings"].each do |aat_response|
|
294
|
+
tgn_term_type = aat_response['aat_pref']['value'].split('/').last
|
295
|
+
if aat_response['place_label_en'].present? && aat_response['place_label_en']['value'] != '-'
|
296
|
+
tgn_term = aat_response['place_label_en']['value']
|
297
|
+
else
|
298
|
+
tgn_term = aat_response['place_label_default']['value']
|
299
|
+
end
|
300
|
+
|
62
301
|
case tgn_term_type
|
63
|
-
when '
|
302
|
+
when '300128176' #continent
|
64
303
|
hier_geo[:continent] = tgn_term
|
65
|
-
when '
|
304
|
+
when '300128207' #nation
|
66
305
|
hier_geo[:country] = tgn_term
|
67
|
-
when '
|
306
|
+
when '300000774' #province
|
68
307
|
hier_geo[:province] = tgn_term
|
69
|
-
when '
|
308
|
+
when '300236112', '300182722', '300387194', '300387052' #region, union, semi-independent political entity
|
70
309
|
hier_geo[:region] = tgn_term
|
71
|
-
when '
|
310
|
+
when '300000776', '300000772', '300235093' #state, department, governorate
|
72
311
|
hier_geo[:state] = tgn_term
|
73
|
-
when '
|
312
|
+
when '300387081' #national district
|
74
313
|
if tgn_term == 'District of Columbia'
|
75
314
|
hier_geo[:state] = tgn_term
|
76
315
|
else
|
77
316
|
hier_geo[:territory] = tgn_term
|
78
317
|
end
|
79
|
-
when '
|
318
|
+
when '300135982', '300387176', '300387122' #territory, dependent state, union territory
|
80
319
|
hier_geo[:territory] = tgn_term
|
81
|
-
when '
|
320
|
+
when '300000771' #county
|
82
321
|
hier_geo[:county] = tgn_term
|
83
|
-
when '
|
322
|
+
when '300008347' #inhabited place
|
84
323
|
hier_geo[:city] = tgn_term
|
85
|
-
when '
|
324
|
+
when '300000745' #neighborhood
|
86
325
|
hier_geo[:city_section] = tgn_term
|
87
|
-
when '
|
326
|
+
when '300008791', '300387062' #island
|
88
327
|
hier_geo[:island] = tgn_term
|
89
|
-
when '81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
328
|
+
when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
90
329
|
hier_geo[:area] = tgn_term
|
91
|
-
else
|
92
|
-
non_hier_geo = tgn_term
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
# parent data for <mods:hierarchicalGeographic>
|
97
|
-
if tgnrec.at_xpath("//Parent_String")
|
98
|
-
parents = tgnrec.at_xpath("//Parent_String").children.to_s.split('], ')
|
99
|
-
parents.each do |parent|
|
100
|
-
if parent.include? '(continent)'
|
101
|
-
hier_geo[:continent] = parent
|
102
|
-
elsif parent.include? '(nation)'
|
103
|
-
hier_geo[:country] = parent
|
104
|
-
elsif parent.include? '(province)'
|
105
|
-
hier_geo[:province] = parent
|
106
|
-
elsif (parent.include? '(region)') || (parent.include? '(union)') || (parent.include? '(semi-independent political entity)')
|
107
|
-
hier_geo[:region] = parent
|
108
|
-
elsif (parent.include? '(state)') || (parent.include? '(department)') || (parent.include? '(governorate)') || (parent.include?('(national district)') && parent.include?('District of Columbia'))
|
109
|
-
hier_geo[:state] = parent
|
110
|
-
elsif (parent.include? '(territory)') || (parent.include? '(dependent state)') || (parent.include? '(union territory)') || (parent.include? '(national district)')
|
111
|
-
hier_geo[:territory] = parent
|
112
|
-
elsif parent.include? '(county)'
|
113
|
-
hier_geo[:county] = parent
|
114
|
-
elsif parent.include? '(inhabited place)'
|
115
|
-
hier_geo[:city] = parent
|
116
|
-
elsif parent.include? '(neighborhood)'
|
117
|
-
hier_geo[:city_section] = parent
|
118
|
-
elsif parent.include? '(island)'
|
119
|
-
hier_geo[:island] = parent
|
120
|
-
elsif (parent.include? '(area)') || (parent.include? '(general region)') || (parent.include? '(deserted settlement)') || (parent.include? '(historical region)') || (parent.include? '(national division)')
|
121
|
-
hier_geo[:area] = parent
|
122
|
-
end
|
123
|
-
end
|
124
|
-
hier_geo.each do |k,v|
|
125
|
-
hier_geo[k] = v.gsub(/ \(.*/,'')
|
126
330
|
end
|
127
331
|
end
|
128
332
|
|
@@ -141,8 +345,9 @@ module Bplgeo
|
|
141
345
|
|
142
346
|
end
|
143
347
|
|
348
|
+
|
144
349
|
def self.tgn_id_from_geo_hash(geo_hash)
|
145
|
-
return nil if Bplgeo::TGN.
|
350
|
+
return nil if Bplgeo::TGN.tgn_enabled != true
|
146
351
|
|
147
352
|
geo_hash = geo_hash.clone
|
148
353
|
|
@@ -166,139 +371,202 @@ module Bplgeo
|
|
166
371
|
|
167
372
|
neighborhood_part = geo_hash[:neighborhood_part]
|
168
373
|
|
169
|
-
|
170
|
-
match_term = nil
|
374
|
+
|
171
375
|
|
172
376
|
if city_part.blank? && state_part.blank?
|
173
377
|
# Limit to nations
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
378
|
+
query = %{SELECT ?object_identifier
|
379
|
+
WHERE
|
380
|
+
{
|
381
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
382
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
383
|
+
?x <http://www.w3.org/2004/02/skos/core#prefLabel> ?object_label .
|
384
|
+
FILTER regex(?object_label, "^#{country_part}$", "i" )
|
385
|
+
}}
|
178
386
|
elsif state_part.present? && city_part.blank? && country_code == 7012149
|
179
387
|
#Limit to states
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
388
|
+
query = %{SELECT ?object_identifier
|
389
|
+
WHERE
|
390
|
+
{
|
391
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
392
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776> .
|
393
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
394
|
+
FILTER regex(?object_label, "^#{state_part}$", "i" )
|
395
|
+
|
396
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/7012149> .
|
397
|
+
}}
|
184
398
|
elsif state_part.present? && city_part.blank?
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
399
|
+
#Limit to regions
|
400
|
+
|
401
|
+
query = %{SELECT ?object_identifier
|
402
|
+
WHERE
|
403
|
+
{
|
404
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
405
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
|
406
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
|
407
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
|
408
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
|
409
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
|
410
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
411
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
412
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
413
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
414
|
+
{?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
415
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
416
|
+
FILTER regex(?object_label, "^#{state_part}$", "i" )
|
417
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
|
418
|
+
{
|
419
|
+
SELECT ?parent_country ?identifier_country
|
420
|
+
WHERE {
|
421
|
+
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
422
|
+
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
423
|
+
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
424
|
+
FILTER regex(?country_label, "^#{country_part}$", "i" )
|
425
|
+
}
|
426
|
+
|
427
|
+
}
|
428
|
+
}
|
429
|
+
GROUP BY ?object_identifier
|
430
|
+
}
|
431
|
+
|
432
|
+
#FIXME Temporary: For Bplgeo.parse('Aknīste (Latvia)', true), seems to be a neighborhood placed in state
|
433
|
+
# {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
190
434
|
elsif state_part.present? && city_part.present? && neighborhood_part.blank?
|
191
435
|
#Limited to only inhabited places at the moment...
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
436
|
+
query = %{SELECT ?object_identifier
|
437
|
+
WHERE
|
438
|
+
{
|
439
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
440
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
441
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
442
|
+
FILTER regex(?object_label, "^#{city_part}$", "i" )
|
443
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
|
444
|
+
{
|
445
|
+
SELECT ?parent_country ?identifier_country
|
446
|
+
WHERE {
|
447
|
+
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
448
|
+
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
449
|
+
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
450
|
+
FILTER regex(?country_label, "^#{country_part}$", "i" )
|
451
|
+
}
|
452
|
+
|
453
|
+
}
|
454
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_state .
|
455
|
+
{
|
456
|
+
SELECT ?parent_state ?identifier_state
|
457
|
+
WHERE {
|
458
|
+
?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
|
459
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
|
460
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
|
461
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
|
462
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
|
463
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
|
464
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
465
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
466
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
467
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
468
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
469
|
+
?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
|
470
|
+
FILTER regex(?state_label, "^#{state_part}$", "i" )
|
471
|
+
}
|
472
|
+
|
473
|
+
}
|
474
|
+
|
475
|
+
}
|
476
|
+
GROUP BY ?object_identifier
|
477
|
+
}
|
478
|
+
|
479
|
+
|
197
480
|
elsif state_part.present? && city_part.present? && neighborhood_part.present?
|
198
481
|
#Limited to only to neighborhoods currently...
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
482
|
+
query = %{SELECT ?object_identifier
|
483
|
+
WHERE
|
484
|
+
{
|
485
|
+
?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
|
486
|
+
?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000745> .
|
487
|
+
?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
|
488
|
+
FILTER regex(?object_label, "^#{neighborhood_part}$", "i" )
|
489
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
|
490
|
+
{
|
491
|
+
SELECT ?parent_country ?identifier_country
|
492
|
+
WHERE {
|
493
|
+
?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
|
494
|
+
?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
|
495
|
+
?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
|
496
|
+
FILTER regex(?country_label, "^#{country_part}$", "i" )
|
497
|
+
}
|
498
|
+
|
499
|
+
}
|
500
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_state .
|
501
|
+
{
|
502
|
+
SELECT ?parent_state ?identifier_state
|
503
|
+
WHERE {
|
504
|
+
?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
|
505
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
|
506
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
|
507
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
|
508
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
|
509
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
|
510
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
|
511
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
|
512
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
|
513
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
|
514
|
+
{?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
|
515
|
+
?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
|
516
|
+
FILTER regex(?state_label, "^#{state_part}$", "i" )
|
517
|
+
}
|
518
|
+
|
519
|
+
}
|
520
|
+
|
521
|
+
?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_city .
|
522
|
+
{
|
523
|
+
SELECT ?parent_city ?identifier_city
|
524
|
+
WHERE {
|
525
|
+
?parent_city <http://purl.org/dc/elements/1.1/identifier> ?identifier_city .
|
526
|
+
?parent_city <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
|
527
|
+
?parent_city <http://www.w3.org/2000/01/rdf-schema#label> ?city_label .
|
528
|
+
FILTER regex(?city_label, "^#{city_part}$", "i" )
|
529
|
+
}
|
530
|
+
|
531
|
+
}
|
532
|
+
|
533
|
+
}
|
534
|
+
GROUP BY ?object_identifier
|
535
|
+
}
|
536
|
+
|
537
|
+
|
204
538
|
else
|
205
539
|
return nil
|
206
540
|
end
|
207
541
|
|
208
542
|
begin
|
543
|
+
|
209
544
|
if retry_count > 0
|
210
545
|
sleep(sleep_time)
|
211
546
|
end
|
212
547
|
retry_count = retry_count + 1
|
213
548
|
|
214
|
-
|
215
|
-
|
549
|
+
tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
|
216
550
|
|
217
551
|
end until (tgn_response.code != 500 || retry_count == max_retry)
|
218
552
|
|
219
|
-
unless tgn_response.code == 500
|
220
|
-
parsed_xml = Nokogiri::Slop(tgn_response.body)
|
221
553
|
|
222
|
-
#This is ugly and needs to be redone to achieve better recursive...
|
223
|
-
if parsed_xml.Vocabulary.Count.text == '0'
|
224
|
-
if neighborhood_part.present?
|
225
|
-
geo_hash[:neighborhood_part] = nil
|
226
|
-
geo_hash = tgn_id_from_geo_hash(geo_hash)
|
227
|
-
elsif city_part.present?
|
228
|
-
geo_hash[:city_part] = nil
|
229
|
-
geo_hash = tgn_id_from_geo_hash(geo_hash)
|
230
|
-
end
|
231
|
-
|
232
|
-
return nil
|
233
|
-
end
|
234
|
-
|
235
|
-
#If only one result, then not array. Otherwise array....
|
236
|
-
if parsed_xml.Vocabulary.Subject.first.blank?
|
237
|
-
subject = parsed_xml.Vocabulary.Subject
|
238
|
-
|
239
|
-
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
240
|
-
alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
|
241
|
-
|
242
|
-
#FIXME: Term should check for the correct level... temporary fix...
|
243
|
-
if current_term == match_term && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
244
|
-
return_hash[:id] = subject.Subject_ID.text
|
245
|
-
#Check alternative term ids
|
246
|
-
elsif alternative_terms.present? && alternative_terms.children.any? { |alt_term| alt_term.text.to_ascii.downcase.strip == match_term} && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
247
|
-
return_hash[:id] = subject.Subject_ID.text
|
248
|
-
elsif current_term == match_term && second_top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
249
|
-
return_hash[:id] = subject.Subject_ID.text
|
250
|
-
elsif alternative_terms.present? && alternative_terms.children.any? { |alt_term| alt_term.text.to_ascii.downcase.strip == match_term} && second_top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
251
|
-
return_hash[:id] = subject.Subject_ID.text
|
252
|
-
end
|
253
|
-
else
|
254
|
-
parsed_xml.Vocabulary.Subject.each do |subject|
|
255
|
-
|
256
|
-
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
257
|
-
alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
|
258
|
-
|
259
|
-
if current_term == match_term && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
260
|
-
return_hash[:id] = subject.Subject_ID.text
|
261
|
-
end
|
262
|
-
end
|
263
|
-
|
264
|
-
if return_hash[:id].blank?
|
265
|
-
parsed_xml.Vocabulary.Subject.each do |subject|
|
266
|
-
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
267
|
-
alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
|
268
|
-
|
269
|
-
if alternative_terms.present? && alternative_terms.children.any? { |alt_term| alt_term.text.to_ascii.downcase.strip == match_term} && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
|
270
|
-
return_hash[:id] = subject.Subject_ID.text
|
271
|
-
end
|
272
|
-
end
|
273
|
-
end
|
274
554
|
|
275
|
-
if return_hash[:id].blank?
|
276
|
-
parsed_xml.Vocabulary.Subject.each do |subject|
|
277
|
-
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
278
|
-
alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
|
279
555
|
|
280
|
-
|
281
|
-
|
282
|
-
end
|
283
|
-
end
|
284
|
-
end
|
285
|
-
|
286
|
-
if return_hash[:id].blank?
|
287
|
-
parsed_xml.Vocabulary.Subject.each do |subject|
|
288
|
-
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
289
|
-
alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
|
556
|
+
unless tgn_response.code == 500
|
557
|
+
as_json = JSON.parse(tgn_response.body)
|
290
558
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
559
|
+
#This is ugly and needs to be redone to achieve better recursive...
|
560
|
+
if as_json["results"]["bindings"].present? && as_json["results"]["bindings"].first["object_identifier"].present?
|
561
|
+
return_hash[:id] = as_json["results"]["bindings"].first["object_identifier"]["value"]
|
562
|
+
return_hash[:rdf] = "http://vocab.getty.edu/tgn/#{return_hash[:id]}.rdf"
|
563
|
+
else
|
564
|
+
return nil
|
296
565
|
end
|
297
|
-
|
298
566
|
end
|
299
567
|
|
300
568
|
if tgn_response.code == 500
|
301
|
-
raise 'TGN Server appears to not be responding for Geographic query: ' +
|
569
|
+
raise 'TGN Server appears to not be responding for Geographic query: ' + query
|
302
570
|
end
|
303
571
|
|
304
572
|
if return_hash.present?
|