bplgeo 0.0.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 21c1271a3fb208898888438395ebafbfcf1bd167
4
- data.tar.gz: 64bde835593a4f414441b03390dbe1327524b0e6
3
+ metadata.gz: 13b58c2286e536e0c23e08c3a8cfed3c586e18f2
4
+ data.tar.gz: 060706f023e34218322856b763c0a7e644b3ac2f
5
5
  SHA512:
6
- metadata.gz: 2a0254403d3d1a29c63ffbab106133083a3193d7c005716ff432385012a0eda2e0faef78eec927ace1bc931c396c8e434fd24bbfb878b397e128d4b79b78e723
7
- data.tar.gz: 1a5382d42fb7f802c12569bdb983921d6335b7fc9b7b5b9f43f9a97c255e5bcfedd911250f191a9df44f983aee9b0750afa30b8b1ed6919c02c278a77e02d8e9
6
+ metadata.gz: 750cd801123b74916afb4626e0d948efb94940e9ee67fe5bfe7edc9609a9a279fad53b73a05ffc3ff7495416fb6e8de99a6ce835692746e45fa49f5f6c1ab40a
7
+ data.tar.gz: 6b55b9b1b838e2adedbde4f0c029d1e6c31af4b7685d4187344fdac87b420323d5c6831cf911fca60263dcc27893ab7a57406a30bc6da1dc75d02b1b83c4622c
@@ -25,8 +25,33 @@ module Bplgeo
25
25
  return_hash = Bplgeo::Parser.parse_google_api(term, parse_term)
26
26
  end
27
27
 
28
- if return_hash.present?
28
+ if return_hash[:country_part].present?
29
+ #FIXME
29
30
  return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(return_hash)
31
+
32
+ if return_hash[:tgn].blank?
33
+ geo_hash_temp = Bplgeo::Standardizer.try_with_entered_names(return_hash)
34
+ return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(geo_hash_temp) if geo_hash_temp.present?
35
+
36
+ if return_hash[:tgn].blank? && return_hash[:neighborhood_part].present?
37
+
38
+ geo_hash_temp = return_hash.clone
39
+ geo_hash_temp[:neighborhood_part] = nil
40
+ geo_hash_temp[:original_string_differs] = true
41
+ return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(geo_hash_temp)
42
+ return_hash[:tgn][:original_string_differs] = true if return_hash[:tgn].present?
43
+ elsif return_hash[:city_part].present? && return_hash[:tgn].blank?
44
+
45
+ geo_hash_temp = return_hash.clone
46
+ geo_hash_temp[:city_part] = nil
47
+ geo_hash_temp[:original_string_differs] = true
48
+ return_hash[:tgn] = Bplgeo::TGN.tgn_id_from_geo_hash(geo_hash_temp)
49
+ return_hash[:tgn][:original_string_differs] = true if return_hash[:tgn].present?
50
+
51
+ end
52
+
53
+ end
54
+
30
55
  return_hash[:geonames] = Bplgeo::Geonames.geonames_id_from_geo_hash(return_hash)
31
56
  end
32
57
 
@@ -69,15 +69,19 @@ module Bplgeo
69
69
  #Don't do both neighborhood and city!
70
70
  if geo_hash[:neighborhood_part].present?
71
71
  geonames_search_array << geo_hash[:neighborhood_part]
72
+ exact_name_term = geo_hash[:neighborhood_part]
72
73
  elsif geo_hash[:city_part].present?
73
74
  geonames_search_array << geo_hash[:city_part]
75
+ exact_name_term = geo_hash[:neighborhood_part]
74
76
  end
75
77
 
76
78
  geonames_search_array << geo_hash[:state_part] if geo_hash[:state_part].present?
79
+ exact_name_term ||= geo_hash[:neighborhood_part]
77
80
  geonames_search_array << geo_hash[:country_part] if geo_hash[:country_part].present?
81
+ exact_name_term ||= geo_hash[:country_part]
78
82
  geonames_search_string = geonames_search_array.join(', ')
79
83
 
80
- match_term = geonames_search_array.first.to_ascii.downcase.strip
84
+ exact_name_term = geonames_search_array.first.strip
81
85
 
82
86
  begin
83
87
  if retry_count > 0
@@ -85,7 +89,7 @@ module Bplgeo
85
89
  end
86
90
  retry_count = retry_count + 1
87
91
 
88
- geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q=" + CGI.escape(geonames_search_string))
92
+ geonames_response = Typhoeus::Request.get("http://api.geonames.org/search?username=#{self.geonames_username}&lang=en&style=FULL&q=#{CGI.escape(geonames_search_string)}&name_equals=#{CGI.escape(exact_name_term)}&country=#{Country.find_country_by_name(geo_hash[:country_part]).alpha2}")
89
93
 
90
94
  end until (geonames_response.code != 500 || retry_count == max_retry)
91
95
 
@@ -93,43 +97,38 @@ module Bplgeo
93
97
 
94
98
  parsed_xml = Nokogiri::Slop(geonames_response.body)
95
99
 
100
+ begin
101
+ raise "geonames status error message of: #{parsed_xml.to_s}" if parsed_xml.geonames.status
102
+ rescue
103
+ #Do nothing but FIXME to not use slop
104
+ end
105
+
96
106
  #This is ugly and needs to be redone to achieve better recursive...
97
107
  if parsed_xml.geonames.totalResultsCount.text == '0'
98
- if neighborhood_part.present?
99
- geo_hash[:neighborhood_part] = nil
100
- geo_hash = geonames_id_from_geo_hash(geo_hash)
101
- elsif city_part.present?
102
- geo_hash[:city_part] = nil
103
- geo_hash = geonames_id_from_geo_hash(geo_hash)
108
+ if geo_hash[:neighborhood_part].present?
109
+ geo_hash_temp = geo_hash.clone
110
+ geo_hash_temp[:neighborhood_part] = nil
111
+ return_hash = geonames_id_from_geo_hash(geo_hash_temp)
112
+ return return_hash if return_hash.present?
113
+ elsif geo_hash[:city_part].present?
114
+ geo_hash_temp = geo_hash.clone
115
+ geo_hash_temp[:city_part] = nil
116
+ return_hash = geonames_id_from_geo_hash(geo_hash_temp)
117
+ return return_hash if return_hash.present?
104
118
  end
105
119
 
106
- return geo_hash
120
+ return nil
107
121
  end
108
122
 
109
- #Exact Match
110
- parsed_xml.geonames.geoname.each do |geoname|
111
-
112
- current_term = geoname.toponymName.text.to_ascii.downcase.strip
113
-
114
- if current_term == match_term && return_hash.blank?
115
- return_hash[:id] = geoname.geonameId.text
116
- return_hash[:original_string_differs] = Bplgeo::Standardizer.parsed_and_original_check(geo_hash)
117
- break
118
- end
119
- end
120
-
121
- if return_hash.blank?
122
- #Starts With
123
- parsed_xml.geonames.geoname.each do |geoname|
124
-
125
- current_term = geoname.toponymName.text.to_ascii.downcase.strip
126
-
127
- if current_term.starts_with?(match_term) && return_hash.blank?
128
- return_hash[:id] = geoname.geonameId.text
129
- return_hash[:original_string_differs] = Bplgeo::Standardizer.parsed_and_original_check(geo_hash)
130
- end
131
- end
123
+ #Exact Match ... FIXME to not use Slop
124
+ if parsed_xml.geonames.geoname.class == Nokogiri::XML::Element
125
+ return_hash[:id] = parsed_xml.geonames.geoname.geonameId.text
126
+ return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
127
+ elsif parsed_xml.geonames.geoname.class ==Nokogiri::XML::NodeSet
128
+ return_hash[:id] = parsed_xml.geonames.geoname.first.geonameId.text
129
+ return_hash[:rdf] = "http://sws.geonames.org/#{return_hash[:id]}/about.rdf"
132
130
  end
131
+ return_hash[:original_string_differs] = Bplgeo::Standardizer.parsed_and_original_check(geo_hash)
133
132
 
134
133
  end
135
134
 
@@ -96,7 +96,7 @@ module Bplgeo
96
96
  retry_count = 3
97
97
 
98
98
  #Skip if no bing_key... possibly move this elsewhere?
99
- return return_hash if self.bing_key == '<mapquest_key>'
99
+ return return_hash if self.mapquest_key == '<mapquest_key>'
100
100
 
101
101
  return_hash[:original_term] = term
102
102
 
@@ -199,10 +199,10 @@ module Bplgeo
199
199
  if google_api_result.present?
200
200
  #Types: street number, route, neighborhood, establishment, transit_station, bus_station
201
201
  google_api_result.first.data["address_components"].each do |result|
202
- if (result['types'] & ['street number', 'route', 'neighborhood', 'establishment', 'transit_station', 'bus_station']).present?
202
+ if (result['types'] & ['street number', 'route', 'establishment', 'transit_station', 'bus_station']).present? || (result['types'].include?('neighborhood') && !result['types'].include?('political'))
203
203
  #return_hash[:term_differs_from_tgn] = true
204
204
  #TODO: Not implemented for Google results right now.
205
- return_hash[:street_part] = 'TODO: Not Implemented for Google Results'
205
+ #return_hash[:street_part] = 'TODO: Not Implemented for Google Results'
206
206
  return_hash[:coords] = {:latitude=>google_api_result.first.data['geometry']['location']['lat'].to_s,
207
207
  :longitude=>google_api_result.first.data['geometry']['location']['lng'].to_s,
208
208
  :combined=>google_api_result.first.data['geometry']['location']['lat'].to_s + ',' + google_api_result.first.data['geometry']['location']['lng'].to_s}
@@ -212,8 +212,8 @@ module Bplgeo
212
212
  return_hash[:state_part] = result['long_name'].to_ascii
213
213
  elsif (result['types'] & ['locality']).present?
214
214
  return_hash[:city_part] = result['long_name']
215
- elsif (result['types'] & ['sublocality', 'political']).length == 2
216
- return_hash[:neighborhood_part] = result['long_name']
215
+ elsif (result['types'] & ['sublocality', 'political']).length == 2 || result['types'].include?('neighborhood')
216
+ return_hash[:neighborhood_part] = result['long_name']
217
217
  end
218
218
  end
219
219
 
@@ -12,6 +12,7 @@ module Bplgeo
12
12
 
13
13
  state_abbr_list = ['Mass']
14
14
  state_name_list = []
15
+ country_name_list = []
15
16
 
16
17
  #Countries gem of https://github.com/hexorx/countries
17
18
  Country.new('US').states.each do |state_abbr, state_names|
@@ -19,10 +20,14 @@ module Bplgeo
19
20
  state_name_list << state_names["name"]
20
21
  end
21
22
 
23
+ Country.all.each do |country_name_abbr_pair|
24
+ country_name_list << country_name_abbr_pair.first
25
+ end
26
+
22
27
  #Parsing a subject geographic term.
23
28
  if term.include?('--')
24
29
  term.split('--').each_with_index do |split_term, index|
25
- if state_name_list.any? { |state| split_term.include? state }
30
+ if state_name_list.any? { |state| split_term.include? state } || country_name_list.any? { |country| split_term.include? country }
26
31
  geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
27
32
  elsif state_abbr_list.any? { |abbr| split_term.include? abbr }
28
33
  geo_term = split_term
@@ -32,13 +37,13 @@ module Bplgeo
32
37
  #Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
33
38
  elsif term.include?(' - ')
34
39
  term.split(' - ').each do |split_term|
35
- if state_name_list.any? { |state| split_term.include? state } || state_abbr_list.any? { |abbr| split_term.include? abbr }
40
+ if state_name_list.any? { |state| split_term.include? state } || state_abbr_list.any? { |abbr| split_term.include? abbr } || country_name_list.any? { |country| split_term.include? country }
36
41
  geo_term = split_term
37
42
  end
38
43
 
39
44
  end
40
45
  else
41
- if state_name_list.any? { |state| term.include? state } || state_abbr_list.any? { |abbr| term.include? abbr }
46
+ if state_name_list.any? { |state| term.include? state } || state_abbr_list.any? { |abbr| term.include? abbr } || country_name_list.any? { |country| term.include? country }
42
47
  geo_term = term
43
48
  end
44
49
  end
@@ -57,6 +62,9 @@ module Bplgeo
57
62
  #Strip any leading periods or commas from junk terms
58
63
  geo_term = geo_term.gsub(/^[\.,]+/, '').strip
59
64
 
65
+ #Replace any four TGN dashes from removing a junk term
66
+ geo_term = geo_term.gsub('----', '--')
67
+
60
68
  #Replace any semicolons with commas... possible strip them?
61
69
  geo_term = geo_term.gsub(';', ',')
62
70
 
@@ -211,5 +219,31 @@ module Bplgeo
211
219
  def self.utf8Encode(value)
212
220
  return HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
213
221
  end
222
+
223
+
224
+ def self.try_with_entered_names(geo_hash)
225
+ geo_hash_local = geo_hash.clone
226
+ if geo_hash_local[:neighborhood_part].present?
227
+ orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:neighborhood_part].downcase.to_ascii}
228
+ geo_hash_local[:neighborhood_part] = orig_string_check.first.strip if orig_string_check.present? && orig_string_check != geo_hash_local[:neighborhood_part]
229
+ return geo_hash_local
230
+ end
231
+
232
+ if geo_hash_local[:city_part].present?
233
+ orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:city_part].downcase.to_ascii}
234
+ geo_hash_local[:city_part] = orig_string_check.first.strip if orig_string_check.present?
235
+ return geo_hash_local
236
+ end
237
+
238
+
239
+ if geo_hash_local[:state_part].present?
240
+ orig_string_check = geo_hash_local[:standardized_term].gsub(',', ' ').squish.split(' ').select { |value| value.downcase.to_ascii == geo_hash_local[:state_part].downcase.to_ascii}
241
+ geo_hash_local[:state_part] = orig_string_check.first.strip if orig_string_check.present?
242
+ return geo_hash_local
243
+ end
244
+
245
+ return nil
246
+ end
247
+
214
248
  end
215
249
  end
@@ -7,122 +7,326 @@ module Bplgeo
7
7
  @bplgeo_config ||= YAML::load(ERB.new(IO.read(File.join(root, 'config', 'bplgeo.yml'))).result)[env].with_indifferent_access
8
8
  end
9
9
 
10
- def self.getty_username
11
- bplgeo_config[:getty_username] || '<username>'
10
+ def self.tgn_enabled
11
+ bplgeo_config[:tgn_enabled] || true
12
12
  end
13
13
 
14
- def self.getty_password
15
- bplgeo_config[:getty_password] || '<password>'
14
+ =begin
15
+ 81010/nation
16
+ 81175/state
17
+ 81165/region
18
+ 84251/neighborhood
19
+ 83002/inhabited place
20
+
21
+ nations
22
+ <http://vocab.getty.edu/tgn/7012149> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207>
23
+
24
+ States (political divisions):
25
+ <http://vocab.getty.edu/tgn/7007517> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776> .
26
+
27
+ Counties: (Suffolk - http://vocab.getty.edu/aat/300000771)
28
+ <http://vocab.getty.edu/tgn/1002923> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000771> .
29
+
30
+ Neighborhood: (Boston)
31
+ <http://vocab.getty.edu/tgn/7013445> <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
32
+
33
+
34
+ Provinces:
35
+ http://vocab.getty.edu/aat/300000774
36
+
37
+ Departments:
38
+ http://vocab.getty.edu/aat/300000772
39
+
40
+ Governates:
41
+ http://vocab.getty.edu/aat/300235093
42
+
43
+ Territories:
44
+ http://vocab.getty.edu/aat/300135982
45
+
46
+ + http://vocab.getty.edu/resource/getty/search?q=territory&luceneIndex=Brief&indexDataset=AAT&_form=%2Fresource%2Fgetty%2Fsearch
47
+
48
+ dependent state:
49
+ http://vocab.getty.edu/aat/300387176
50
+
51
+
52
+ union territory:
53
+ http://vocab.getty.edu/aat/300387122
54
+
55
+ national district:
56
+ http://vocab.getty.edu/aat/300387081
57
+
58
+
59
+ Roxbury:
60
+ http://vocab.getty.edu/tgn/7015002.json
61
+
62
+
63
+
64
+ #South Carolina - http://vocab.getty.edu/tgn/7007712
65
+
66
+ SELECT ?object_identifier
67
+ WHERE
68
+ {
69
+ ?x <http://purl.org/dc/elements/1.1/identifier> 7007712 .
70
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
71
+ {
72
+ SELECT ?parent_country ?identifier_country ?aat_place_id
73
+ WHERE {
74
+ ?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
75
+ ?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_place_id .
76
+ ?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
77
+ }
78
+ GROUP BY ?parent_country
79
+ }
80
+ }
81
+ GROUP BY ?object_identifier
82
+
83
+ primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/tgn/#{tgn_id}.json")
84
+
85
+
86
+ when 'http://vocab.getty.edu/ontology#placeTypePreferred'
87
+ place_type_base[:aat_id] = ntriple['Object']['value']
88
+ when 'http://www.w3.org/2004/02/skos/core#prefLabel'
89
+ if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
90
+ place_type_base[:label_en] = ntriple['Object']['value']
91
+ else if ntriple['Object']['xml:lang'].blank?
92
+ place_type_base[:label_default] = ntriple['Object']['value']
93
+
94
+
95
+ tgn_main_term_info = {}
96
+ broader_place_type_list = ["http://vocab.getty.edu/tgn/"#{tgn_id}]
97
+
98
+ primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"})
99
+ as_json_tgn_response = JSON.parse(primary_tgn_response.body)
100
+
101
+ as_json_tgn_response['results']['bindings'].each do |ntriple|
102
+ case ntriple['Predicate']['value']
103
+ when 'http://www.w3.org/2004/02/skos/core#prefLabel'
104
+ if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
105
+ tgn_main_term_info[:label_en] = ntriple['Object']['value']
106
+ elsif ntriple['Object']['xml:lang'].blank?
107
+ tgn_main_term_info[:label_default] = ntriple['Object']['value']
16
108
  end
109
+ when 'http://vocab.getty.edu/ontology#placeTypePreferred'
110
+ tgn_main_term_info[:aat_place] = ntriple['Object']['value']
111
+ when 'http://vocab.getty.edu/ontology#broaderPreferredExtended'
112
+ broader_place_type_list << ntriple['Object']['value']
113
+ end
114
+
115
+ end
116
+
117
+ query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref WHERE {"
118
+
119
+ broader_place_type_list.each do |place_uri|
120
+ query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
121
+ OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
122
+ FILTER langMatches( lang(?place_label_en), "en" )
123
+ }
124
+ OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
125
+ FILTER langMatches( lang(?place_label_default), "" )
126
+ }
127
+ <#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
128
+ } UNION
129
+ }
130
+ end
131
+
132
+ query = query[0..-12]
133
+ query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?aat_pref"
134
+
135
+ tgn_response_for_aat = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
136
+ as_json_tgn_response_for_aat = JSON.parse(tgn_response_for_aat.body)
137
+
138
+ as_json_tgn_response_for_aat["results"]["bindings"].each do |aat_response|
139
+ #aat_response['identifier_place']['value']
140
+ #aat_response['place_label_default']['value']
141
+ #....
142
+ end
143
+
144
+
145
+
146
+
147
+
148
+ EXAMPLE SPARQL:
149
+
150
+ SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref
151
+ WHERE {
152
+ {<http://vocab.getty.edu/tgn/1000001> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
153
+ OPTIONAL {<http://vocab.getty.edu/tgn/1000001> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
154
+ FILTER langMatches( lang(?place_label_en), "en" )
155
+ }
156
+ OPTIONAL {<http://vocab.getty.edu/tgn/1000001> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
157
+ FILTER langMatches( lang(?place_label_default), "" )
158
+ }
159
+ <http://vocab.getty.edu/tgn/1000001> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
160
+ } UNION
161
+ {<http://vocab.getty.edu/tgn/7012149> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
162
+ OPTIONAL {<http://vocab.getty.edu/tgn/7012149> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
163
+ FILTER langMatches( lang(?place_label_en), "en" )
164
+ }
165
+ OPTIONAL {<http://vocab.getty.edu/tgn/7012149> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
166
+ FILTER langMatches( lang(?place_label_default), "" )
167
+ }
168
+ <http://vocab.getty.edu/tgn/7012149> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
169
+ } UNION
170
+ {<http://vocab.getty.edu/tgn/7029392> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
171
+ OPTIONAL {<http://vocab.getty.edu/tgn/7029392> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
172
+ FILTER langMatches( lang(?place_label_en), "en" )
173
+ }
174
+ OPTIONAL {<http://vocab.getty.edu/tgn/7029392> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
175
+ FILTER langMatches( lang(?place_label_default), "" )
176
+ }
177
+ <http://vocab.getty.edu/tgn/7012149> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
178
+ } .
179
+
180
+
181
+ }
182
+ GROUP BY ?identifier_place ?place_label_default ?place_label_en ?aat_pref
183
+
184
+
185
+
186
+
187
+ =end
17
188
 
18
- # retrieve data from Getty TGN to populate <mods:subject auth="tgn">
19
189
  def self.get_tgn_data(tgn_id)
20
- tgn_response = Typhoeus::Request.get('http://vocabsservices.getty.edu/TGNService.asmx/TGNGetSubject?subjectID=' + tgn_id, userpwd: self.getty_username + ':' + self.getty_password)
21
- unless tgn_response.code == 500
22
- tgnrec = Nokogiri::XML(tgn_response.body)
23
- #puts tgnrec.to_s
24
-
25
- # coordinates
26
- if tgnrec.at_xpath("//Coordinates")
27
- coords = {}
28
- coords[:latitude] = tgnrec.at_xpath("//Latitude/Decimal").children.to_s
29
- coords[:longitude] = tgnrec.at_xpath("//Longitude/Decimal").children.to_s
30
- coords[:combined] = coords[:latitude] + ',' + coords[:longitude]
31
- else
32
- coords = nil
190
+ return nil if Bplgeo::TGN.tgn_enabled != true
191
+
192
+ tgn_main_term_info = {}
193
+ #broader_place_type_list = ["http://vocab.getty.edu/tgn/#{tgn_id}"]
194
+ broader_place_type_list = []
195
+
196
+ primary_tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/download/json", :params=>{:uri=>"http://vocab.getty.edu/tgn/#{tgn_id}.json"})
197
+
198
+ return nil if(primary_tgn_response.response_code == 404) #Couldn't find TGN... FIXME: additional check needed if TGN is down?
199
+
200
+ as_json_tgn_response = JSON.parse(primary_tgn_response.body)
201
+
202
+ as_json_tgn_response['results']['bindings'].each do |ntriple|
203
+ case ntriple['Predicate']['value']
204
+ when 'http://www.w3.org/2004/02/skos/core#prefLabel'
205
+ if ntriple['Object']['xml:lang'].present? && ntriple['Object']['xml:lang'] == 'en'
206
+ tgn_main_term_info[:label_en] = ntriple['Object']['value']
207
+ elsif ntriple['Object']['xml:lang'].blank?
208
+ tgn_main_term_info[:label_default] = ntriple['Object']['value']
209
+ end
210
+ when 'http://vocab.getty.edu/ontology#placeTypePreferred'
211
+ tgn_main_term_info[:aat_place] = ntriple['Object']['value']
212
+ when 'http://schema.org/latitude'
213
+ tgn_main_term_info[:latitude] = ntriple['Object']['value']
214
+ when 'http://schema.org/longitude'
215
+ tgn_main_term_info[:longitude] = ntriple['Object']['value']
216
+ when 'http://vocab.getty.edu/ontology#broaderPreferredExtended'
217
+ broader_place_type_list << ntriple['Object']['value']
33
218
  end
34
219
 
35
- hier_geo = {}
36
-
37
- #main term
38
- if tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text")
39
- tgn_term_type = tgnrec.at_xpath("//Preferred_Place_Type/Place_Type_ID").children.to_s
40
- pref_term_langs = tgnrec.xpath("//Terms/Preferred_Term/Term_Languages/Term_Language/Language")
41
- # if the preferred term is the preferred English form, use that
42
- if pref_term_langs.children.to_s.include? "English"
43
- tgn_term = tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text").children.to_s
44
- else # use the non-preferred term which is the preferred English form
45
- if tgnrec.xpath("//Terms/Non-Preferred_Term")
46
- non_pref_terms = tgnrec.xpath("//Terms/Non-Preferred_Term")
47
- non_pref_terms.each do |non_pref_term|
48
- non_pref_term_langs = non_pref_term.children.css("Term_Language")
49
- # have to loop through these, as sometimes languages share form
50
- non_pref_term_langs.each do |non_pref_term_lang|
51
- if non_pref_term_lang.children.css("Preferred").children.to_s == "Preferred" && non_pref_term_lang.children.css("Language").children.to_s == "English"
52
- tgn_term = non_pref_term.children.css("Term_Text").children.to_s
53
- end
54
- end
55
- end
220
+ end
221
+
222
+ # coordinates
223
+ coords = nil
224
+ if tgn_main_term_info[:latitude].present?
225
+ coords = {}
226
+ coords[:latitude] = tgn_main_term_info[:latitude]
227
+ coords[:longitude] = tgn_main_term_info[:longitude]
228
+ coords[:combined] = tgn_main_term_info[:latitude] + ',' + tgn_main_term_info[:longitude]
229
+ end
230
+
231
+ hier_geo = {}
232
+ tgn_term = tgn_main_term_info[:label_en].present? ? tgn_main_term_info[:label_en] : tgn_main_term_info[:label_default]
233
+ tgn_term_type = tgn_main_term_info[:aat_place].split('/').last
234
+
235
+ #Initial Term
236
+ if tgn_term.present? && tgn_term_type.present?
237
+ case tgn_term_type
238
+ when '300128176' #continent
239
+ hier_geo[:continent] = tgn_term
240
+ when '300128207' #nations
241
+ hier_geo[:country] = tgn_term
242
+ when '300000774' #province
243
+ hier_geo[:province] = tgn_term
244
+ when '300236112', '300182722', '300387194', '300387052' #region, union, semi-independent political entity
245
+ hier_geo[:region] = tgn_term
246
+ when '300000776', '300000772', '300235093' #state, department, governorate
247
+ hier_geo[:state] = tgn_term
248
+ when '300387081' #national district
249
+ if tgn_term == 'District of Columbia'
250
+ hier_geo[:state] = tgn_term
251
+ else
252
+ hier_geo[:territory] = tgn_term
56
253
  end
57
- end
58
- # if no term is the preferred English form, just use the preferred term
59
- tgn_term ||= tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text").children.to_s
254
+ when '300135982', '300387176', '300387122' #territory, dependent state, union territory
255
+ hier_geo[:territory] = tgn_term
256
+ when '300000771' #county
257
+ hier_geo[:county] = tgn_term
258
+ when '300008347' #inhabited place
259
+ hier_geo[:city] = tgn_term
260
+ when '300000745' #neighborhood
261
+ hier_geo[:city_section] = tgn_term
262
+ when '300008791', '300387062' #island
263
+ hier_geo[:island] = tgn_term
264
+ when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
265
+ hier_geo[:area] = tgn_term
266
+ else
267
+ non_hier_geo = tgn_term
268
+ end
269
+
270
+ #Broader places
271
+
272
+ query = "SELECT ?identifier_place ?place_label_default ?place_label_en ?aat_pref WHERE {"
273
+
274
+ broader_place_type_list.each do |place_uri|
275
+ query += %{{<#{place_uri}> <http://purl.org/dc/elements/1.1/identifier> ?identifier_place .
276
+ OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_en
277
+ FILTER langMatches( lang(?place_label_en), "en" )
278
+ }
279
+ OPTIONAL {<#{place_uri}> <http://www.w3.org/2004/02/skos/core#prefLabel> ?place_label_default
280
+ FILTER langMatches( lang(?place_label_default), "" )
281
+ }
282
+ <#{place_uri}> <http://vocab.getty.edu/ontology#placeTypePreferred> ?aat_pref
283
+ } UNION
284
+ }
60
285
  end
61
- if tgn_term && tgn_term_type
286
+
287
+ query = query[0..-12]
288
+ query += ". } GROUP BY ?identifier_place ?place_label_default ?place_label_en ?aat_pref"
289
+
290
+ tgn_response_for_aat = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
291
+ as_json_tgn_response_for_aat = JSON.parse(tgn_response_for_aat.body)
292
+
293
+ as_json_tgn_response_for_aat["results"]["bindings"].each do |aat_response|
294
+ tgn_term_type = aat_response['aat_pref']['value'].split('/').last
295
+ if aat_response['place_label_en'].present? && aat_response['place_label_en']['value'] != '-'
296
+ tgn_term = aat_response['place_label_en']['value']
297
+ else
298
+ tgn_term = aat_response['place_label_default']['value']
299
+ end
300
+
62
301
  case tgn_term_type
63
- when '29000/continent'
302
+ when '300128176' #continent
64
303
  hier_geo[:continent] = tgn_term
65
- when '81010/nation'
304
+ when '300128207' #nation
66
305
  hier_geo[:country] = tgn_term
67
- when '81161/province'
306
+ when '300000774' #province
68
307
  hier_geo[:province] = tgn_term
69
- when '81165/region', '82193/union', '80005/semi-independent political entity'
308
+ when '300236112', '300182722', '300387194', '300387052' #region, union, semi-independent political entity
70
309
  hier_geo[:region] = tgn_term
71
- when '81175/state', '81117/department', '82133/governorate'
310
+ when '300000776', '300000772', '300235093' #state, department, governorate
72
311
  hier_geo[:state] = tgn_term
73
- when '81125/national district'
312
+ when '300387081' #national district
74
313
  if tgn_term == 'District of Columbia'
75
314
  hier_geo[:state] = tgn_term
76
315
  else
77
316
  hier_geo[:territory] = tgn_term
78
317
  end
79
- when '81181/territory', '81021/dependent state', '81186/union territory'
318
+ when '300135982', '300387176', '300387122' #territory, dependent state, union territory
80
319
  hier_geo[:territory] = tgn_term
81
- when '81115/county'
320
+ when '300000771' #county
82
321
  hier_geo[:county] = tgn_term
83
- when '83002/inhabited place'
322
+ when '300008347' #inhabited place
84
323
  hier_geo[:city] = tgn_term
85
- when '84251/neighborhood'
324
+ when '300000745' #neighborhood
86
325
  hier_geo[:city_section] = tgn_term
87
- when '21471/island'
326
+ when '300008791', '300387062' #island
88
327
  hier_geo[:island] = tgn_term
89
- when '81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
328
+ when '300387575', '300387346', '300167671', '300387178', '300387082' #'81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
90
329
  hier_geo[:area] = tgn_term
91
- else
92
- non_hier_geo = tgn_term
93
- end
94
- end
95
-
96
- # parent data for <mods:hierarchicalGeographic>
97
- if tgnrec.at_xpath("//Parent_String")
98
- parents = tgnrec.at_xpath("//Parent_String").children.to_s.split('], ')
99
- parents.each do |parent|
100
- if parent.include? '(continent)'
101
- hier_geo[:continent] = parent
102
- elsif parent.include? '(nation)'
103
- hier_geo[:country] = parent
104
- elsif parent.include? '(province)'
105
- hier_geo[:province] = parent
106
- elsif (parent.include? '(region)') || (parent.include? '(union)') || (parent.include? '(semi-independent political entity)')
107
- hier_geo[:region] = parent
108
- elsif (parent.include? '(state)') || (parent.include? '(department)') || (parent.include? '(governorate)') || (parent.include?('(national district)') && parent.include?('District of Columbia'))
109
- hier_geo[:state] = parent
110
- elsif (parent.include? '(territory)') || (parent.include? '(dependent state)') || (parent.include? '(union territory)') || (parent.include? '(national district)')
111
- hier_geo[:territory] = parent
112
- elsif parent.include? '(county)'
113
- hier_geo[:county] = parent
114
- elsif parent.include? '(inhabited place)'
115
- hier_geo[:city] = parent
116
- elsif parent.include? '(neighborhood)'
117
- hier_geo[:city_section] = parent
118
- elsif parent.include? '(island)'
119
- hier_geo[:island] = parent
120
- elsif (parent.include? '(area)') || (parent.include? '(general region)') || (parent.include? '(deserted settlement)') || (parent.include? '(historical region)') || (parent.include? '(national division)')
121
- hier_geo[:area] = parent
122
- end
123
- end
124
- hier_geo.each do |k,v|
125
- hier_geo[k] = v.gsub(/ \(.*/,'')
126
330
  end
127
331
  end
128
332
 
@@ -141,8 +345,9 @@ module Bplgeo
141
345
 
142
346
  end
143
347
 
348
+
144
349
  def self.tgn_id_from_geo_hash(geo_hash)
145
- return nil if Bplgeo::TGN.getty_username == '<username>'
350
+ return nil if Bplgeo::TGN.tgn_enabled != true
146
351
 
147
352
  geo_hash = geo_hash.clone
148
353
 
@@ -166,139 +371,202 @@ module Bplgeo
166
371
 
167
372
  neighborhood_part = geo_hash[:neighborhood_part]
168
373
 
169
- top_match_term = ''
170
- match_term = nil
374
+
171
375
 
172
376
  if city_part.blank? && state_part.blank?
173
377
  # Limit to nations
174
- place_type = 81010
175
- top_match_term = ['']
176
- second_top_match_term = ''
177
- match_term = country_part.to_ascii.downcase
378
+ query = %{SELECT ?object_identifier
379
+ WHERE
380
+ {
381
+ ?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
382
+ ?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
383
+ ?x <http://www.w3.org/2004/02/skos/core#prefLabel> ?object_label .
384
+ FILTER regex(?object_label, "^#{country_part}$", "i" )
385
+ }}
178
386
  elsif state_part.present? && city_part.blank? && country_code == 7012149
179
387
  #Limit to states
180
- place_type = 81175
181
- top_match_term = ["#{country_part.to_ascii.downcase} (nation)"]
182
- second_top_match_term = ["#{country_part.to_ascii.downcase} (nation)"]
183
- match_term = state_part.to_ascii.downcase
388
+ query = %{SELECT ?object_identifier
389
+ WHERE
390
+ {
391
+ ?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
392
+ ?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776> .
393
+ ?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
394
+ FILTER regex(?object_label, "^#{state_part}$", "i" )
395
+
396
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> <http://vocab.getty.edu/tgn/7012149> .
397
+ }}
184
398
  elsif state_part.present? && city_part.blank?
185
- #Limit to regions
186
- place_type = 81165
187
- top_match_term = ["#{country_part.to_ascii.downcase} (nation)"]
188
- second_top_match_term = ["#{country_part.to_ascii.downcase} (nation)"]
189
- match_term = state_part.to_ascii.downcase
399
+ #Limit to regions
400
+
401
+ query = %{SELECT ?object_identifier
402
+ WHERE
403
+ {
404
+ ?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
405
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
406
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
407
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
408
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
409
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
410
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
411
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
412
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
413
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
414
+ {?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
415
+ ?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
416
+ FILTER regex(?object_label, "^#{state_part}$", "i" )
417
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
418
+ {
419
+ SELECT ?parent_country ?identifier_country
420
+ WHERE {
421
+ ?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
422
+ ?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
423
+ ?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
424
+ FILTER regex(?country_label, "^#{country_part}$", "i" )
425
+ }
426
+
427
+ }
428
+ }
429
+ GROUP BY ?object_identifier
430
+ }
431
+
432
+ #FIXME Temporary: For Bplgeo.parse('Aknīste (Latvia)', true), seems to be a neighborhood placed in state
433
+ # {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
190
434
  elsif state_part.present? && city_part.present? && neighborhood_part.blank?
191
435
  #Limited to only inhabited places at the moment...
192
- place_type = 83002
193
- sp = state_part.to_ascii.downcase
194
- top_match_term = ["#{sp} (state)", "#{sp} (department)", "#{sp} (governorate)", "#{sp} (territory)", "#{sp} (dependent state)", "#{sp} (union territory)", "#{sp} (national district)", "#{sp} (province)"]
195
- second_top_match_term = ["#{country_part.to_ascii.downcase} (nation)"]
196
- match_term = city_part.to_ascii.downcase
436
+ query = %{SELECT ?object_identifier
437
+ WHERE
438
+ {
439
+ ?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
440
+ ?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
441
+ ?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
442
+ FILTER regex(?object_label, "^#{city_part}$", "i" )
443
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
444
+ {
445
+ SELECT ?parent_country ?identifier_country
446
+ WHERE {
447
+ ?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
448
+ ?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
449
+ ?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
450
+ FILTER regex(?country_label, "^#{country_part}$", "i" )
451
+ }
452
+
453
+ }
454
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_state .
455
+ {
456
+ SELECT ?parent_state ?identifier_state
457
+ WHERE {
458
+ ?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
459
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
460
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
461
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
462
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
463
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
464
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
465
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
466
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
467
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
468
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
469
+ ?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
470
+ FILTER regex(?state_label, "^#{state_part}$", "i" )
471
+ }
472
+
473
+ }
474
+
475
+ }
476
+ GROUP BY ?object_identifier
477
+ }
478
+
479
+
197
480
  elsif state_part.present? && city_part.present? && neighborhood_part.present?
198
481
  #Limited to only to neighborhoods currently...
199
- place_type = 84251
200
- top_match_term = ["#{city_part.to_ascii.downcase} (inhabited place)"]
201
- sp = neighborhood_part.to_ascii.downcase
202
- second_top_match_term = ["#{sp} (state)", "#{sp} (department)", "#{sp} (governorate)", "#{sp} (territory)", "#{sp} (dependent state)", "#{sp} (union territory)", "#{sp} (national district)", "#{sp} (province)"]
203
- match_term = neighborhood_part.to_ascii.downcase
482
+ query = %{SELECT ?object_identifier
483
+ WHERE
484
+ {
485
+ ?x <http://purl.org/dc/elements/1.1/identifier> ?object_identifier .
486
+ ?x <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000745> .
487
+ ?x <http://www.w3.org/2000/01/rdf-schema#label> ?object_label .
488
+ FILTER regex(?object_label, "^#{neighborhood_part}$", "i" )
489
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_country .
490
+ {
491
+ SELECT ?parent_country ?identifier_country
492
+ WHERE {
493
+ ?parent_country <http://purl.org/dc/elements/1.1/identifier> ?identifier_country .
494
+ ?parent_country <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300128207> .
495
+ ?parent_country <http://www.w3.org/2000/01/rdf-schema#label> ?country_label .
496
+ FILTER regex(?country_label, "^#{country_part}$", "i" )
497
+ }
498
+
499
+ }
500
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_state .
501
+ {
502
+ SELECT ?parent_state ?identifier_state
503
+ WHERE {
504
+ ?parent_state <http://purl.org/dc/elements/1.1/identifier> ?identifier_state .
505
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000774>} UNION
506
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000772>} UNION
507
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300235093>} UNION
508
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300135982>} UNION
509
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387176>} UNION
510
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387122>} UNION
511
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300000776>} UNION
512
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300236112>} UNION
513
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347>} UNION
514
+ {?parent_state <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300387081>} .
515
+ ?parent_state <http://www.w3.org/2000/01/rdf-schema#label> ?state_label .
516
+ FILTER regex(?state_label, "^#{state_part}$", "i" )
517
+ }
518
+
519
+ }
520
+
521
+ ?x <http://vocab.getty.edu/ontology#broaderPreferredExtended> ?parent_city .
522
+ {
523
+ SELECT ?parent_city ?identifier_city
524
+ WHERE {
525
+ ?parent_city <http://purl.org/dc/elements/1.1/identifier> ?identifier_city .
526
+ ?parent_city <http://vocab.getty.edu/ontology#placeTypePreferred> <http://vocab.getty.edu/aat/300008347> .
527
+ ?parent_city <http://www.w3.org/2000/01/rdf-schema#label> ?city_label .
528
+ FILTER regex(?city_label, "^#{city_part}$", "i" )
529
+ }
530
+
531
+ }
532
+
533
+ }
534
+ GROUP BY ?object_identifier
535
+ }
536
+
537
+
204
538
  else
205
539
  return nil
206
540
  end
207
541
 
208
542
  begin
543
+
209
544
  if retry_count > 0
210
545
  sleep(sleep_time)
211
546
  end
212
547
  retry_count = retry_count + 1
213
548
 
214
- tgn_response = Typhoeus::Request.get("http://vocabsservices.getty.edu/TGNService.asmx/TGNGetTermMatch?placetypeid=#{place_type}&nationid=#{country_code}&name=" + CGI.escape(match_term), userpwd: self.getty_username + ':' + self.getty_password)
215
-
549
+ tgn_response = Typhoeus::Request.get("http://vocab.getty.edu/sparql.json", :params=>{:query=>query})
216
550
 
217
551
  end until (tgn_response.code != 500 || retry_count == max_retry)
218
552
 
219
- unless tgn_response.code == 500
220
- parsed_xml = Nokogiri::Slop(tgn_response.body)
221
553
 
222
- #This is ugly and needs to be redone to achieve better recursive...
223
- if parsed_xml.Vocabulary.Count.text == '0'
224
- if neighborhood_part.present?
225
- geo_hash[:neighborhood_part] = nil
226
- geo_hash = tgn_id_from_geo_hash(geo_hash)
227
- elsif city_part.present?
228
- geo_hash[:city_part] = nil
229
- geo_hash = tgn_id_from_geo_hash(geo_hash)
230
- end
231
-
232
- return nil
233
- end
234
-
235
- #If only one result, then not array. Otherwise array....
236
- if parsed_xml.Vocabulary.Subject.first.blank?
237
- subject = parsed_xml.Vocabulary.Subject
238
-
239
- current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
240
- alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
241
-
242
- #FIXME: Term should check for the correct level... temporary fix...
243
- if current_term == match_term && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
244
- return_hash[:id] = subject.Subject_ID.text
245
- #Check alternative term ids
246
- elsif alternative_terms.present? && alternative_terms.children.any? { |alt_term| alt_term.text.to_ascii.downcase.strip == match_term} && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
247
- return_hash[:id] = subject.Subject_ID.text
248
- elsif current_term == match_term && second_top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
249
- return_hash[:id] = subject.Subject_ID.text
250
- elsif alternative_terms.present? && alternative_terms.children.any? { |alt_term| alt_term.text.to_ascii.downcase.strip == match_term} && second_top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
251
- return_hash[:id] = subject.Subject_ID.text
252
- end
253
- else
254
- parsed_xml.Vocabulary.Subject.each do |subject|
255
-
256
- current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
257
- alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
258
-
259
- if current_term == match_term && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
260
- return_hash[:id] = subject.Subject_ID.text
261
- end
262
- end
263
-
264
- if return_hash[:id].blank?
265
- parsed_xml.Vocabulary.Subject.each do |subject|
266
- current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
267
- alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
268
-
269
- if alternative_terms.present? && alternative_terms.children.any? { |alt_term| alt_term.text.to_ascii.downcase.strip == match_term} && top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
270
- return_hash[:id] = subject.Subject_ID.text
271
- end
272
- end
273
- end
274
554
 
275
- if return_hash[:id].blank?
276
- parsed_xml.Vocabulary.Subject.each do |subject|
277
- current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
278
- alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
279
555
 
280
- if current_term == match_term && second_top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
281
- return_hash[:id] = subject.Subject_ID.text
282
- end
283
- end
284
- end
285
-
286
- if return_hash[:id].blank?
287
- parsed_xml.Vocabulary.Subject.each do |subject|
288
- current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
289
- alternative_terms = subject.elements.any? { |node| node.name == 'Term' } ? subject.Term : ''
556
+ unless tgn_response.code == 500
557
+ as_json = JSON.parse(tgn_response.body)
290
558
 
291
- if alternative_terms.present? && alternative_terms.children.any? { |alt_term| alt_term.text.to_ascii.downcase.strip == match_term} && second_top_match_term.any? { |top_match| subject.Preferred_Parent.text.to_ascii.downcase.include? top_match }
292
- return_hash[:id] = subject.Subject_ID.text
293
- end
294
- end
295
- end
559
+ #This is ugly and needs to be redone to achieve better recursive...
560
+ if as_json["results"]["bindings"].present? && as_json["results"]["bindings"].first["object_identifier"].present?
561
+ return_hash[:id] = as_json["results"]["bindings"].first["object_identifier"]["value"]
562
+ return_hash[:rdf] = "http://vocab.getty.edu/tgn/#{return_hash[:id]}.rdf"
563
+ else
564
+ return nil
296
565
  end
297
-
298
566
  end
299
567
 
300
568
  if tgn_response.code == 500
301
- raise 'TGN Server appears to not be responding for Geographic query: ' + term
569
+ raise 'TGN Server appears to not be responding for Geographic query: ' + query
302
570
  end
303
571
 
304
572
  if return_hash.present?