audumbla 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/audumbla.rb +4 -0
- data/lib/audumbla/enrichments.rb +1 -4
- data/lib/audumbla/enrichments/coarse_geocode.rb +244 -0
- data/lib/audumbla/field_enrichment.rb +13 -1
- data/lib/audumbla/version.rb +1 -1
- data/spec/fixtures/georgia.yml +276 -0
- data/spec/lib/audumbla/enrichments/coarse_geocode_spec.rb +105 -0
- metadata +50 -5
- data/lib/audumbla/enrichments/geocode.rb~ +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 100602e369b80c14118de38514d6273bad179cde
|
4
|
+
data.tar.gz: 4e2e0efbf018f7d632d460b9727f691578016a90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2ad72a4e8bb400e99d79e74c2fff277ec80d55cbed4ffb3771641e8f0bba56a2c20e8483201cdda17a16738fb389e04299ce925e40dcc384db73183c05b6632f
|
7
|
+
data.tar.gz: 34352f392ae17552d14511d630df1d50cdccec42d98a6193da0f5267a4e047855c033a9592ad181d1105d5008877449d1c521c306995736d93b24d7df3fc3aa6
|
data/lib/audumbla.rb
CHANGED
data/lib/audumbla/enrichments.rb
CHANGED
@@ -0,0 +1,244 @@
|
|
1
|
+
require 'twofishes'
|
2
|
+
require 'geokit'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module Audumbla::Enrichments
|
6
|
+
##
|
7
|
+
# Enriches a `DPLA::MAP::Place` node by running its data through external
|
8
|
+
# geocoders, using heuristics to determine a matching feature from GeoNames,
|
9
|
+
# and repopulating the `Place` with related data.
|
10
|
+
#
|
11
|
+
# If the existing `Place` contains data other than a `providedLabel`, that
|
12
|
+
# data will be used as context for evaluating interpretations. For example:
|
13
|
+
# a `Place` with an existing latitude and longitude will verify that the
|
14
|
+
# point is within the bounding box for a candidate match.
|
15
|
+
#
|
16
|
+
# `skos:exactMatch` are reserved for the GeoNames features returned by the
|
17
|
+
# geocoder. Other matching URIs (currently: LC authorities) are included as
|
18
|
+
# `skos:closeMatch`
|
19
|
+
#
|
20
|
+
# Configuration is handled through a YAML file passed into the initializer
|
21
|
+
# (default: 'geocode.yml'). The options are:
|
22
|
+
# - 'twofishes_host': the hostname for the twofishes server (default:
|
23
|
+
# 'localhost')
|
24
|
+
# - 'twofishes_port': the port of the twofishes geocode endpoint (default:
|
25
|
+
# 8080)
|
26
|
+
# - 'twofishes_timeout': request timeout in seconds (default: 3)
|
27
|
+
# - 'twofishes_retries': request retry maximum for twofishes (default: 2)
|
28
|
+
# - 'distance_threshold': the maximum distance between a set of coordinates
|
29
|
+
# in the input object and a candidate match before we judge it a
|
30
|
+
# false positive, given in kilometers. (default: 5)
|
31
|
+
# - 'max_intepretations': the number of geocoded "interpretations" to
|
32
|
+
# request from the server; these are the places that will be considered
|
33
|
+
# by the internal heuristics (defualt: 5).
|
34
|
+
#
|
35
|
+
# @example enriching from a `#providedLabel`
|
36
|
+
#
|
37
|
+
# place = DPLA::MAP::Place.new.tap { |p| p.providedLabel = 'Georgia' }
|
38
|
+
# CoarseGeocode.new.enrich_value.dump :ttl
|
39
|
+
# # [
|
40
|
+
# # a <http://www.europeana.eu/schemas/edm/Place>;
|
41
|
+
# # <http://dp.la/about/map/providedLabel> "Georgia";
|
42
|
+
# # <http://www.geonames.org/ontology#countryCode> "US";
|
43
|
+
# # <http://www.w3.org/2003/01/geo/wgs84_pos#lat> 3.275042e1;
|
44
|
+
# # <http://www.w3.org/2003/01/geo/wgs84_pos#long> -8.350018e1;
|
45
|
+
# # <http://www.w3.org/2004/02/skos/core#closeMatch> <http://id.loc.gov/authorities/names/n79023113>;
|
46
|
+
# # <http://www.w3.org/2004/02/skos/core#exactMatch> <http://sws.geonames.org/4197000/>;
|
47
|
+
# # <http://www.w3.org/2004/02/skos/core#prefLabel> "Georgia, United States"
|
48
|
+
# # ] .
|
49
|
+
#
|
50
|
+
# @example enriching from a `#providedLabel` with lat/lng guidance
|
51
|
+
#
|
52
|
+
# place = DPLA::MAP::Place.new.tap do |p|
|
53
|
+
# p.providedLabel = 'Georgia'
|
54
|
+
# p.lat = 41.9997
|
55
|
+
# p.long = 43.4998
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# CoarseGeocode.new.enrich_value.dump :ttl
|
59
|
+
# # [
|
60
|
+
# # a <http://www.europeana.eu/schemas/edm/Place>;
|
61
|
+
# # <http://dp.la/about/map/providedLabel> "Georgia";
|
62
|
+
# # <http://www.geonames.org/ontology#countryCode> "GE";
|
63
|
+
# # <http://www.w3.org/2003/01/geo/wgs84_pos#lat> 4.199998e1;
|
64
|
+
# # <http://www.w3.org/2003/01/geo/wgs84_pos#long> 4.34999e1;
|
65
|
+
# # <http://www.w3.org/2004/02/skos/core#exactMatch> <http://sws.geonames.org/614540/>;
|
66
|
+
# # <http://www.w3.org/2004/02/skos/core#prefLabel> "Georgia"
|
67
|
+
# # ] .
|
68
|
+
#
|
69
|
+
class CoarseGeocode
|
70
|
+
include Audumbla::FieldEnrichment
|
71
|
+
|
72
|
+
DEFAULT_DISTANCE_THRESHOLD_KMS = 100
|
73
|
+
DEFAULT_MAX_INTERPRETATIONS = 5
|
74
|
+
DEFAULT_TWOFISHES_HOST = 'localhost'
|
75
|
+
DEFAULT_TWOFISHES_PORT = 8080
|
76
|
+
DEFAULT_TWOFISHES_TIMEOUT = 10
|
77
|
+
DEFAULT_TWOFISHES_RETRIES = 2
|
78
|
+
|
79
|
+
##
|
80
|
+
# @param [String] config_file a path to a config file for the geocoder;
|
81
|
+
# default: 'geocode.yml'
|
82
|
+
def initialize(config_file = 'geocode.yml')
|
83
|
+
config = YAML.load_file(config_file)
|
84
|
+
|
85
|
+
@distance_threshold = config.fetch('distance_threshold',
|
86
|
+
DEFAULT_DISTANCE_THRESHOLD_KMS)
|
87
|
+
@max_interpretations = config.fetch('max_interpretations',
|
88
|
+
DEFAULT_MAX_INTERPRETATIONS)
|
89
|
+
|
90
|
+
Twofishes.configure do |twofish|
|
91
|
+
twofish.host = config.fetch('twofishes_host', DEFAULT_TWOFISHES_HOST)
|
92
|
+
twofish.port = config.fetch('twofishes_port', DEFAULT_TWOFISHES_PORT)
|
93
|
+
twofish.timeout = config.fetch('twofishes_timeout',
|
94
|
+
DEFAULT_TWOFISHES_TIMEOUT)
|
95
|
+
twofish.retries = config.fetch('twofishes_retries',
|
96
|
+
DEFAULT_TWOFISHES_RETRIES)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
##
|
101
|
+
# Enriches the given value against the TwoFishes coarse geocoder. This
|
102
|
+
# process adds a `skos:exactMatch` for a matching GeoNames URI, if any, and
|
103
|
+
# populates the remaining place data to the degree possible from the matched
|
104
|
+
# feature.
|
105
|
+
#
|
106
|
+
# Considers a number of matches specified by `@max_interpretations` and
|
107
|
+
# returned by Twofishes, via `#match?`.
|
108
|
+
#
|
109
|
+
# @param [DPLA::MAP::Place] value the place to geocode
|
110
|
+
#
|
111
|
+
# @return [DPLA::MAP::Place] the inital place, enriched via coarse geocoding
|
112
|
+
def enrich_value(value)
|
113
|
+
return value unless value.is_a? DPLA::MAP::Place
|
114
|
+
interpretations = geocode(value.providedLabel.first,
|
115
|
+
[],
|
116
|
+
maxInterpretations: @max_interpretations)
|
117
|
+
match = interpretations.find { |interp| match?(interp, value) }
|
118
|
+
match.nil? ? value : enrich_place(value, match.feature)
|
119
|
+
end
|
120
|
+
|
121
|
+
##
|
122
|
+
# Checks that we are satisfied with the geocoder's best matches prior to
|
123
|
+
# acceptance. Most tweaks to the geocoding process should be taken care
|
124
|
+
# of at the geocoder itself, but a simple accept/reject of the points
|
125
|
+
# offered is possible here. This allows existing data about the place
|
126
|
+
# to be used as context.
|
127
|
+
#
|
128
|
+
# For example, this method returns false if `place` contains latitude
|
129
|
+
# and longitude, but the candidate match has a geometry far away from those
|
130
|
+
# given. "far away" is defined by `@distance_threshold` from the center of the
|
131
|
+
# candidate feature to the point given by `#lat` and `#long` in `place`.
|
132
|
+
#
|
133
|
+
# @param [GeocodeInterpretation] interpretation a twofishes interpretation
|
134
|
+
# @param [#lat#long] place a place to verify a match against
|
135
|
+
#
|
136
|
+
# @result [Boolean] true if the interpretation is accepted
|
137
|
+
def match?(interpretation, place)
|
138
|
+
return true if place.lat.empty? || place.long.empty?
|
139
|
+
|
140
|
+
point = Geokit::LatLng.new(place.lat.first, place.long.first)
|
141
|
+
if interpretation.geometry.bounds.nil?
|
142
|
+
# measure distance between point centers
|
143
|
+
distance = twofishes_point_to_geokit(interpretation.geometry.center)
|
144
|
+
.distance_to(point, unit: :kms)
|
145
|
+
return distance < @distance_threshold
|
146
|
+
end
|
147
|
+
|
148
|
+
twofishes_bounds_to_geokit(interpretation.geometry.bounds)
|
149
|
+
.contains?(point)
|
150
|
+
end
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
##
|
155
|
+
# Populates a DPLA::MAP::Place with data from a given feature. This
|
156
|
+
# overwrites existing data with the exception of the identity (URI or node
|
157
|
+
# id) and the `providedLabel`. `exactMatch`, `closeMatch`, `label`
|
158
|
+
# (skos:prefLabel)and all other geographic data is replaced.
|
159
|
+
#
|
160
|
+
# @param [DPLA::MAP::Place] place a place to enrich
|
161
|
+
# @param [GeocodeFeature] feature a twofishes feature whose data should be
|
162
|
+
# added to place.
|
163
|
+
#
|
164
|
+
# @return [DPLA::MAP::Place] the original place enriched
|
165
|
+
def enrich_place(place, feature)
|
166
|
+
place.label = feature.display_name
|
167
|
+
place.exactMatch = feature_to_geoname_uris(feature)
|
168
|
+
place.closeMatch = feature_to_close_matches(feature,
|
169
|
+
/^http\:\/\/id\.loc\.gov\/.*/)
|
170
|
+
place.countryCode = feature.cc
|
171
|
+
place.lat = feature.geometry.center.lat
|
172
|
+
place.long = feature.geometry.center.lng
|
173
|
+
|
174
|
+
place
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# Extracts geonameids for the given feature and converts them into URIs
|
179
|
+
#
|
180
|
+
# @param [GeocodeFeature] feature the feature to identify
|
181
|
+
#
|
182
|
+
# @return [Array<RDF::URI>] a list of geoname URIs. Generally, this will only
|
183
|
+
# contain one exactly matching geonameid in URI form.
|
184
|
+
def feature_to_geoname_uris(feature)
|
185
|
+
geoname_ids = feature.ids.select { |id| id.source == :geonameid.to_s }
|
186
|
+
geoname_ids.map { |id| RDF::URI('http://sws.geonames.org') / id.id + '/' }
|
187
|
+
end
|
188
|
+
|
189
|
+
##
|
190
|
+
# Extracts URIs for closely matching terms in other authority or knowledege
|
191
|
+
# organization systems
|
192
|
+
#
|
193
|
+
# @param [GeocodeFeature] feature the feature to identify
|
194
|
+
# @param [Regexp] patterns a splat argument containing any number of
|
195
|
+
# patterns matching
|
196
|
+
#
|
197
|
+
# @return [Array<RDF::URI>] a list of matching ids
|
198
|
+
def feature_to_close_matches(feature, *patterns)
|
199
|
+
union = Regexp.union(patterns)
|
200
|
+
feature.attributes.urls.select { |str| union.match(str) }
|
201
|
+
.map { |id| RDF::URI(id) }
|
202
|
+
end
|
203
|
+
|
204
|
+
##
|
205
|
+
# Sends a geocode request. This is used in lieu of `Twofishes#geocode`,
|
206
|
+
# since that method does not allow passing parameters other than
|
207
|
+
# `responseIncludes`.
|
208
|
+
#
|
209
|
+
# @param [#to_s] location the string to try to match
|
210
|
+
# @param [Array] includes a list of twofishes include constants
|
211
|
+
# @param [Hash<Symbol, #to_s> params property and value pairs for
|
212
|
+
# parameters to pass to the request
|
213
|
+
#
|
214
|
+
# @see Twofishes#geocode
|
215
|
+
# @see Twofishes::Client
|
216
|
+
def geocode(location, includes = [], params = {})
|
217
|
+
client = Twofishes::Client
|
218
|
+
client.send(:handle_response) do
|
219
|
+
request = GeocodeRequest.new(query: location, responseIncludes: includes)
|
220
|
+
params.each { |prop, val| request.send("#{prop}=".to_sym, val) }
|
221
|
+
client.thrift_client.geocode(request)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
private
|
226
|
+
|
227
|
+
##
|
228
|
+
# @param [#lat#long] point a twofishes point to convert to Geokit
|
229
|
+
#
|
230
|
+
# @return [Geokit::LatLng]
|
231
|
+
def twofishes_point_to_geokit(point)
|
232
|
+
Geokit::LatLng.new(point.lat, point.lng)
|
233
|
+
end
|
234
|
+
|
235
|
+
##
|
236
|
+
# @param [#ne#sw] bounds a twofishes bounding box to convert to Geokit
|
237
|
+
#
|
238
|
+
# @return [Geokit::Bounds]
|
239
|
+
def twofishes_bounds_to_geokit(bounds)
|
240
|
+
Geokit::Bounds.new(twofishes_point_to_geokit(bounds.sw),
|
241
|
+
twofishes_point_to_geokit(bounds.ne))
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
@@ -42,7 +42,19 @@ module Audumbla
|
|
42
42
|
return record unless record.respond_to? field
|
43
43
|
values = record.send(field)
|
44
44
|
if field_chain.length == 1
|
45
|
-
new_values = values.map { |v| enrich_value(v) }
|
45
|
+
new_values = values.map { |v| enrich_value(v) }
|
46
|
+
# We call #flatten twice, since under some circumstances it fails on
|
47
|
+
# nested #to_ary calls the first time. This appears to be related to:
|
48
|
+
#
|
49
|
+
# http://yehudakatz.com/2010/01/02/the-craziest-fing-bug-ive-ever-seen/
|
50
|
+
# and
|
51
|
+
# https://bugs.ruby-lang.org/issues/2494
|
52
|
+
begin
|
53
|
+
new_values = new_values.flatten.compact
|
54
|
+
rescue
|
55
|
+
new_values = new_values.flatten.compact
|
56
|
+
end
|
57
|
+
|
46
58
|
record.send("#{field}=".to_sym, new_values)
|
47
59
|
else
|
48
60
|
resources(values).each { |v| enrich_field(v, field_chain[1..-1]) }
|
data/lib/audumbla/version.rb
CHANGED
@@ -0,0 +1,276 @@
|
|
1
|
+
--- !ruby/object:GeocodeResponse
|
2
|
+
interpretations:
|
3
|
+
- !ruby/object:GeocodeInterpretation
|
4
|
+
what: ''
|
5
|
+
where: georgia
|
6
|
+
feature: !ruby/object:GeocodeFeature
|
7
|
+
woeType: 8
|
8
|
+
cc: US
|
9
|
+
geometry: !ruby/object:FeatureGeometry
|
10
|
+
center: !ruby/object:GeocodePoint
|
11
|
+
lat: 32.75042
|
12
|
+
lng: -83.50018
|
13
|
+
bounds: !ruby/object:GeocodeBoundingBox
|
14
|
+
ne: !ruby/object:GeocodePoint
|
15
|
+
lat: 35.000659
|
16
|
+
lng: -80.751429
|
17
|
+
sw: !ruby/object:GeocodePoint
|
18
|
+
lat: 30.355756999999997
|
19
|
+
lng: -85.605165
|
20
|
+
source: usa_adm1.shp
|
21
|
+
name: Georgia
|
22
|
+
displayName: Georgia, United States
|
23
|
+
ids:
|
24
|
+
- !ruby/object:FeatureId
|
25
|
+
source: geonameid
|
26
|
+
id: '4197000'
|
27
|
+
- !ruby/object:FeatureId
|
28
|
+
source: woeid
|
29
|
+
id: '2347569'
|
30
|
+
names:
|
31
|
+
- !ruby/object:FeatureName
|
32
|
+
flags:
|
33
|
+
- 2
|
34
|
+
name: GA
|
35
|
+
lang: abbr
|
36
|
+
- !ruby/object:FeatureName
|
37
|
+
flags:
|
38
|
+
- 16
|
39
|
+
name: State of Georgia
|
40
|
+
lang: en
|
41
|
+
- !ruby/object:FeatureName
|
42
|
+
flags:
|
43
|
+
- 64
|
44
|
+
- 16
|
45
|
+
name: Peach State
|
46
|
+
lang: en
|
47
|
+
- !ruby/object:FeatureName
|
48
|
+
flags:
|
49
|
+
- 128
|
50
|
+
- 16
|
51
|
+
- 1
|
52
|
+
name: Georgia
|
53
|
+
lang: en
|
54
|
+
highlightedName: "<b>Georgia</b>, United States"
|
55
|
+
matchedName: Georgia, United States
|
56
|
+
id: geonameid:4197000
|
57
|
+
attributes: !ruby/object:GeocodeFeatureAttributes
|
58
|
+
adm0cap: false
|
59
|
+
adm1cap: false
|
60
|
+
scalerank: 20
|
61
|
+
labelrank: 0
|
62
|
+
natscale: 0
|
63
|
+
population: 8975842
|
64
|
+
sociallyRelevant: false
|
65
|
+
worldcity: false
|
66
|
+
urls:
|
67
|
+
- http://id.loc.gov/authorities/names/n79023113
|
68
|
+
- http://en.wikipedia.org/wiki/Georgia_(U.S._state)
|
69
|
+
longId: 72057594042124936
|
70
|
+
parentIds:
|
71
|
+
- 72057594044179937
|
72
|
+
- !ruby/object:GeocodeInterpretation
|
73
|
+
what: ''
|
74
|
+
where: georgia
|
75
|
+
feature: !ruby/object:GeocodeFeature
|
76
|
+
woeType: 12
|
77
|
+
cc: GE
|
78
|
+
geometry: !ruby/object:FeatureGeometry
|
79
|
+
center: !ruby/object:GeocodePoint
|
80
|
+
lat: 41.99998
|
81
|
+
lng: 43.4999
|
82
|
+
bounds: !ruby/object:GeocodeBoundingBox
|
83
|
+
ne: !ruby/object:GeocodePoint
|
84
|
+
lat: 43.586627
|
85
|
+
lng: 46.736119
|
86
|
+
sw: !ruby/object:GeocodePoint
|
87
|
+
lat: 41.054942
|
88
|
+
lng: 40.006604
|
89
|
+
source: gn-adm0-new3.json
|
90
|
+
name: Georgia
|
91
|
+
displayName: Georgia
|
92
|
+
ids:
|
93
|
+
- !ruby/object:FeatureId
|
94
|
+
source: geonameid
|
95
|
+
id: '614540'
|
96
|
+
names:
|
97
|
+
- !ruby/object:FeatureName
|
98
|
+
flags:
|
99
|
+
- 2
|
100
|
+
name: GE
|
101
|
+
lang: abbr
|
102
|
+
- !ruby/object:FeatureName
|
103
|
+
flags:
|
104
|
+
- 1024
|
105
|
+
name: Georgian Soviet Socialist Republic
|
106
|
+
lang: en
|
107
|
+
- !ruby/object:FeatureName
|
108
|
+
flags:
|
109
|
+
- 128
|
110
|
+
- 64
|
111
|
+
- 1
|
112
|
+
name: Georgia
|
113
|
+
lang: en
|
114
|
+
highlightedName: "<b>Georgia</b>"
|
115
|
+
matchedName: Georgia
|
116
|
+
id: geonameid:614540
|
117
|
+
attributes: !ruby/object:GeocodeFeatureAttributes
|
118
|
+
adm0cap: false
|
119
|
+
adm1cap: false
|
120
|
+
scalerank: 20
|
121
|
+
labelrank: 0
|
122
|
+
natscale: 0
|
123
|
+
population: 4630000
|
124
|
+
sociallyRelevant: false
|
125
|
+
worldcity: false
|
126
|
+
urls:
|
127
|
+
- http://ru.wikipedia.org/wiki/%D0%93%D1%80%D1%83%D0%B7%D0%B8%D1%8F
|
128
|
+
- http://en.wikipedia.org/wiki/Georgia_%28country%29
|
129
|
+
longId: 72057594038542476
|
130
|
+
parentIds:
|
131
|
+
- 72057594044183083
|
132
|
+
longIds:
|
133
|
+
- 72057594038542363
|
134
|
+
- !ruby/object:GeocodeInterpretation
|
135
|
+
what: ''
|
136
|
+
where: georgia
|
137
|
+
feature: !ruby/object:GeocodeFeature
|
138
|
+
woeType: 10
|
139
|
+
cc: US
|
140
|
+
geometry: !ruby/object:FeatureGeometry
|
141
|
+
center: !ruby/object:GeocodePoint
|
142
|
+
lat: 44.72824
|
143
|
+
lng: -73.12763
|
144
|
+
name: Town of Georgia
|
145
|
+
displayName: Town of Georgia, VT, United States
|
146
|
+
ids:
|
147
|
+
- !ruby/object:FeatureId
|
148
|
+
source: geonameid
|
149
|
+
id: '5236379'
|
150
|
+
- !ruby/object:FeatureId
|
151
|
+
source: woeid
|
152
|
+
id: '2409718'
|
153
|
+
names:
|
154
|
+
- !ruby/object:FeatureName
|
155
|
+
flags:
|
156
|
+
- 16
|
157
|
+
- 1
|
158
|
+
name: Town of Georgia
|
159
|
+
lang: en
|
160
|
+
- !ruby/object:FeatureName
|
161
|
+
flags:
|
162
|
+
- 16
|
163
|
+
- 8
|
164
|
+
- 1
|
165
|
+
name: Georgia
|
166
|
+
lang: en
|
167
|
+
highlightedName: "<b>Georgia</b>, VT, United States"
|
168
|
+
matchedName: Georgia, VT, United States
|
169
|
+
id: geonameid:5236379
|
170
|
+
attributes: !ruby/object:GeocodeFeatureAttributes
|
171
|
+
adm0cap: false
|
172
|
+
adm1cap: false
|
173
|
+
scalerank: 20
|
174
|
+
labelrank: 0
|
175
|
+
natscale: 0
|
176
|
+
population: 0
|
177
|
+
sociallyRelevant: false
|
178
|
+
worldcity: false
|
179
|
+
urls: []
|
180
|
+
longId: 72057594043164315
|
181
|
+
parentIds:
|
182
|
+
- 72057594044179937
|
183
|
+
- 72057594043170219
|
184
|
+
- 72057594043164215
|
185
|
+
- !ruby/object:GeocodeInterpretation
|
186
|
+
what: ''
|
187
|
+
where: georgia
|
188
|
+
feature: !ruby/object:GeocodeFeature
|
189
|
+
woeType: 7
|
190
|
+
cc: US
|
191
|
+
geometry: !ruby/object:FeatureGeometry
|
192
|
+
center: !ruby/object:GeocodePoint
|
193
|
+
lat: 40.18733
|
194
|
+
lng: -74.28459
|
195
|
+
bounds: !ruby/object:GeocodeBoundingBox
|
196
|
+
ne: !ruby/object:GeocodePoint
|
197
|
+
lat: 40.1990013123
|
198
|
+
lng: -74.2533340454
|
199
|
+
sw: !ruby/object:GeocodePoint
|
200
|
+
lat: 40.1450004578
|
201
|
+
lng: -74.3127212524
|
202
|
+
name: Georgia
|
203
|
+
displayName: Georgia, NJ, United States
|
204
|
+
ids:
|
205
|
+
- !ruby/object:FeatureId
|
206
|
+
source: geonameid
|
207
|
+
id: '5098392'
|
208
|
+
- !ruby/object:FeatureId
|
209
|
+
source: woeid
|
210
|
+
id: '2409714'
|
211
|
+
names:
|
212
|
+
- !ruby/object:FeatureName
|
213
|
+
flags:
|
214
|
+
- 16
|
215
|
+
- 1
|
216
|
+
name: Georgia
|
217
|
+
lang: en
|
218
|
+
highlightedName: "<b>Georgia</b>, NJ, United States"
|
219
|
+
matchedName: Georgia, NJ, United States
|
220
|
+
id: geonameid:5098392
|
221
|
+
attributes: !ruby/object:GeocodeFeatureAttributes
|
222
|
+
adm0cap: false
|
223
|
+
adm1cap: false
|
224
|
+
scalerank: 20
|
225
|
+
labelrank: 0
|
226
|
+
natscale: 0
|
227
|
+
population: 0
|
228
|
+
sociallyRelevant: false
|
229
|
+
worldcity: false
|
230
|
+
urls:
|
231
|
+
- http://en.wikipedia.org/wiki/Georgia%2C_New_Jersey
|
232
|
+
longId: 72057594043026328
|
233
|
+
parentIds:
|
234
|
+
- 72057594044179937
|
235
|
+
- 72057594043029696
|
236
|
+
- 72057594043029241
|
237
|
+
- !ruby/object:GeocodeInterpretation
|
238
|
+
what: ''
|
239
|
+
where: georgia
|
240
|
+
feature: !ruby/object:GeocodeFeature
|
241
|
+
woeType: 0
|
242
|
+
cc: CM
|
243
|
+
geometry: !ruby/object:FeatureGeometry
|
244
|
+
center: !ruby/object:GeocodePoint
|
245
|
+
lat: 6.6
|
246
|
+
lng: 14.01667
|
247
|
+
name: Gorgia
|
248
|
+
displayName: Gorgia, Cameroon
|
249
|
+
ids:
|
250
|
+
- !ruby/object:FeatureId
|
251
|
+
source: geonameid
|
252
|
+
id: '2231063'
|
253
|
+
names:
|
254
|
+
- !ruby/object:FeatureName
|
255
|
+
flags:
|
256
|
+
- 16
|
257
|
+
- 1
|
258
|
+
name: Gorgia
|
259
|
+
lang: en
|
260
|
+
highlightedName: "<b>Georgia</b>, Cameroon"
|
261
|
+
matchedName: Georgia, Cameroon
|
262
|
+
id: geonameid:2231063
|
263
|
+
attributes: !ruby/object:GeocodeFeatureAttributes
|
264
|
+
adm0cap: false
|
265
|
+
adm1cap: false
|
266
|
+
scalerank: 20
|
267
|
+
labelrank: 0
|
268
|
+
natscale: 0
|
269
|
+
population: 0
|
270
|
+
sociallyRelevant: false
|
271
|
+
worldcity: false
|
272
|
+
urls: []
|
273
|
+
longId: 72057594040158999
|
274
|
+
parentIds:
|
275
|
+
- 72057594040161323
|
276
|
+
- 72057594040163951
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Audumbla::Enrichments::CoarseGeocode do
|
4
|
+
it_behaves_like 'a field enrichment'
|
5
|
+
|
6
|
+
before do
|
7
|
+
allow(Twofishes::Client)
|
8
|
+
.to receive(:handle_response)
|
9
|
+
.and_return(Twofishes::Result.from_response(georgia_response))
|
10
|
+
end
|
11
|
+
|
12
|
+
let(:georgia_response) { YAML::load_file('spec/fixtures/georgia.yml') }
|
13
|
+
|
14
|
+
describe '#enrich_value' do
|
15
|
+
let(:place) do
|
16
|
+
build(:place,
|
17
|
+
providedLabel: 'georgia',
|
18
|
+
label: nil,
|
19
|
+
exactMatch: nil,
|
20
|
+
countryCode: nil,
|
21
|
+
parentFeature: nil,
|
22
|
+
lat: nil,
|
23
|
+
long: nil,
|
24
|
+
alt: nil)
|
25
|
+
end
|
26
|
+
|
27
|
+
let(:prefLabel) { 'Georgia, United States' }
|
28
|
+
let(:geoname_uri) { RDF::URI('http://sws.geonames.org/4197000/') }
|
29
|
+
let(:country_code) { 'US' }
|
30
|
+
let(:lat) { 32.75042 }
|
31
|
+
let(:lng) { -83.50018 }
|
32
|
+
let(:lcname_uri) do
|
33
|
+
RDF::URI('http://id.loc.gov/authorities/names/n79023113')
|
34
|
+
end
|
35
|
+
|
36
|
+
describe '#enrich_value' do
|
37
|
+
it 'returns the same place entity' do
|
38
|
+
expect(subject.enrich_value(place)).to eq place
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'retains providedLabel' do
|
42
|
+
expect(subject.enrich_value(place))
|
43
|
+
.to have_attributes(providedLabel: contain_exactly('georgia'))
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'it gives the geoname as skos:exactMatch' do
|
47
|
+
expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
|
48
|
+
.to contain_exactly(geoname_uri)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'adds LC closeMatches, if appropriate' do
|
52
|
+
expect(subject.enrich_value(place).closeMatch.map(&:rdf_subject))
|
53
|
+
.to contain_exactly(lcname_uri)
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'enriches place with new data' do
|
57
|
+
expect(subject.enrich_value(place))
|
58
|
+
.to have_attributes(
|
59
|
+
label: contain_exactly(prefLabel),
|
60
|
+
countryCode: contain_exactly(country_code),
|
61
|
+
lat: contain_exactly(be_within(0.01).of(lat)),
|
62
|
+
long: contain_exactly(be_within(0.01).of(lng))
|
63
|
+
)
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'with lat/lng' do
|
67
|
+
context 'and label' do
|
68
|
+
let(:place) do
|
69
|
+
build(:place,
|
70
|
+
providedLabel: 'georgia',
|
71
|
+
label: nil,
|
72
|
+
exactMatch: nil,
|
73
|
+
countryCode: nil,
|
74
|
+
parentFeature: nil,
|
75
|
+
lat: lat,
|
76
|
+
long: lng,
|
77
|
+
alt: nil)
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'gives result matching lat/lng' do
|
81
|
+
expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
|
82
|
+
.to contain_exactly(geoname_uri)
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'skips result not matching lat/lng' do
|
86
|
+
place.lat = 41.9997
|
87
|
+
place.long = 43.4998
|
88
|
+
|
89
|
+
georgia_country_uri = RDF::URI('http://sws.geonames.org/614540/')
|
90
|
+
|
91
|
+
# points are in bounding box for Georgia but not equal to center
|
92
|
+
expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
|
93
|
+
.to contain_exactly(georgia_country_uri)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'selects no match if none match lat/lng' do
|
97
|
+
place.lat = 41.9997
|
98
|
+
place.long = -43.4998
|
99
|
+
expect(subject.enrich_value(place).exactMatch).to be_empty
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: audumbla
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Audrey Altman
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2015-
|
14
|
+
date: 2015-07-22 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: dpla-map
|
@@ -27,6 +27,34 @@ dependencies:
|
|
27
27
|
- - "~>"
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 4.0.0.0.pre.10
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: twofishes
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
type: :runtime
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
- !ruby/object:Gem::Dependency
|
45
|
+
name: geokit
|
46
|
+
requirement: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
type: :runtime
|
52
|
+
prerelease: false
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
30
58
|
- !ruby/object:Gem::Dependency
|
31
59
|
name: rspec
|
32
60
|
requirement: !ruby/object:Gem::Requirement
|
@@ -41,6 +69,20 @@ dependencies:
|
|
41
69
|
- - "~>"
|
42
70
|
- !ruby/object:Gem::Version
|
43
71
|
version: '3.0'
|
72
|
+
- !ruby/object:Gem::Dependency
|
73
|
+
name: webmock
|
74
|
+
requirement: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
type: :development
|
80
|
+
prerelease: false
|
81
|
+
version_requirements: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
44
86
|
- !ruby/object:Gem::Dependency
|
45
87
|
name: pry
|
46
88
|
requirement: !ruby/object:Gem::Requirement
|
@@ -69,15 +111,17 @@ files:
|
|
69
111
|
- lib/audumbla/enrichment.rb~
|
70
112
|
- lib/audumbla/enrichments.rb
|
71
113
|
- lib/audumbla/enrichments.rb~
|
72
|
-
- lib/audumbla/enrichments/
|
114
|
+
- lib/audumbla/enrichments/coarse_geocode.rb
|
73
115
|
- lib/audumbla/enrichments/version.rb~
|
74
116
|
- lib/audumbla/field_enrichment.rb
|
75
117
|
- lib/audumbla/field_enrichment.rb~
|
76
118
|
- lib/audumbla/spec/enrichment.rb
|
77
119
|
- lib/audumbla/version.rb
|
78
120
|
- lib/audumbla/version.rb~
|
121
|
+
- spec/fixtures/georgia.yml
|
79
122
|
- spec/lib/audumbla/enrichment_spec.rb
|
80
123
|
- spec/lib/audumbla/enrichment_spec.rb~
|
124
|
+
- spec/lib/audumbla/enrichments/coarse_geocode_spec.rb
|
81
125
|
- spec/lib/audumbla/enrichments/geocode_spec.rb~
|
82
126
|
- spec/lib/audumbla/field_enrichment_spec.rb
|
83
127
|
- spec/lib/audumbla/field_enrichment_spec.rb~
|
@@ -103,7 +147,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
103
147
|
version: '0'
|
104
148
|
requirements: []
|
105
149
|
rubyforge_project:
|
106
|
-
rubygems_version: 2.
|
150
|
+
rubygems_version: 2.4.5
|
107
151
|
signing_key:
|
108
152
|
specification_version: 4
|
109
153
|
summary: A toolkit for enhancement of RDF Metadata
|
@@ -112,7 +156,8 @@ test_files:
|
|
112
156
|
- spec/lib/audumbla/field_enrichment_spec.rb
|
113
157
|
- spec/lib/audumbla/field_enrichment_spec.rb~
|
114
158
|
- spec/lib/audumbla/enrichment_spec.rb~
|
159
|
+
- spec/lib/audumbla/enrichments/coarse_geocode_spec.rb
|
115
160
|
- spec/lib/audumbla/enrichments/geocode_spec.rb~
|
116
161
|
- spec/spec_helper.rb
|
162
|
+
- spec/fixtures/georgia.yml
|
117
163
|
- spec/spec_helper.rb~
|
118
|
-
has_rdoc:
|