audumbla 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/audumbla.rb +4 -0
- data/lib/audumbla/enrichments.rb +1 -4
- data/lib/audumbla/enrichments/coarse_geocode.rb +244 -0
- data/lib/audumbla/field_enrichment.rb +13 -1
- data/lib/audumbla/version.rb +1 -1
- data/spec/fixtures/georgia.yml +276 -0
- data/spec/lib/audumbla/enrichments/coarse_geocode_spec.rb +105 -0
- metadata +50 -5
- data/lib/audumbla/enrichments/geocode.rb~ +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 100602e369b80c14118de38514d6273bad179cde
|
4
|
+
data.tar.gz: 4e2e0efbf018f7d632d460b9727f691578016a90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2ad72a4e8bb400e99d79e74c2fff277ec80d55cbed4ffb3771641e8f0bba56a2c20e8483201cdda17a16738fb389e04299ce925e40dcc384db73183c05b6632f
|
7
|
+
data.tar.gz: 34352f392ae17552d14511d630df1d50cdccec42d98a6193da0f5267a4e047855c033a9592ad181d1105d5008877449d1c521c306995736d93b24d7df3fc3aa6
|
data/lib/audumbla.rb
CHANGED
data/lib/audumbla/enrichments.rb
CHANGED
@@ -0,0 +1,244 @@
|
|
1
|
+
require 'twofishes'
|
2
|
+
require 'geokit'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module Audumbla::Enrichments
|
6
|
+
##
|
7
|
+
# Enriches a `DPLA::MAP::Place` node by running its data through external
|
8
|
+
# geocoders, using heuristics to determine a matching feature from GeoNames,
|
9
|
+
# and repopulating the `Place` with related data.
|
10
|
+
#
|
11
|
+
# If the existing `Place` contains data other than a `providedLabel`, that
|
12
|
+
# data will be used as context for evaluating interpretations. For example:
|
13
|
+
# a `Place` with an existing latitude and longitude will verify that the
|
14
|
+
# point is within the bounding box for a candidate match.
|
15
|
+
#
|
16
|
+
# `skos:exactMatch` are reserved for the GeoNames features returned by the
|
17
|
+
# geocoder. Other matching URIs (currently: LC authorities) are included as
|
18
|
+
# `skos:closeMatch`
|
19
|
+
#
|
20
|
+
# Configuration is handled through a YAML file passed into the initializer
|
21
|
+
# (default: 'geocode.yml'). The options are:
|
22
|
+
# - 'twofishes_host': the hostname for the twofishes server (default:
|
23
|
+
# 'localhost')
|
24
|
+
# - 'twofishes_port': the port of the twofishes geocode endpoint (default:
|
25
|
+
# 8080)
|
26
|
+
# - 'twofishes_timeout': request timeout in seconds (default: 3)
|
27
|
+
# - 'twofishes_retries': request retry maximum for twofishes (default: 2)
|
28
|
+
# - 'distance_threshold': the maximum distance between a set of coordinates
|
29
|
+
# in the input object and a candidate match before we judge it a
|
30
|
+
# false positive, given in kilometers. (default: 5)
|
31
|
+
# - 'max_intepretations': the number of geocoded "interpretations" to
|
32
|
+
# request from the server; these are the places that will be considered
|
33
|
+
# by the internal heuristics (defualt: 5).
|
34
|
+
#
|
35
|
+
# @example enriching from a `#providedLabel`
|
36
|
+
#
|
37
|
+
# place = DPLA::MAP::Place.new.tap { |p| p.providedLabel = 'Georgia' }
|
38
|
+
# CoarseGeocode.new.enrich_value.dump :ttl
|
39
|
+
# # [
|
40
|
+
# # a <http://www.europeana.eu/schemas/edm/Place>;
|
41
|
+
# # <http://dp.la/about/map/providedLabel> "Georgia";
|
42
|
+
# # <http://www.geonames.org/ontology#countryCode> "US";
|
43
|
+
# # <http://www.w3.org/2003/01/geo/wgs84_pos#lat> 3.275042e1;
|
44
|
+
# # <http://www.w3.org/2003/01/geo/wgs84_pos#long> -8.350018e1;
|
45
|
+
# # <http://www.w3.org/2004/02/skos/core#closeMatch> <http://id.loc.gov/authorities/names/n79023113>;
|
46
|
+
# # <http://www.w3.org/2004/02/skos/core#exactMatch> <http://sws.geonames.org/4197000/>;
|
47
|
+
# # <http://www.w3.org/2004/02/skos/core#prefLabel> "Georgia, United States"
|
48
|
+
# # ] .
|
49
|
+
#
|
50
|
+
# @example enriching from a `#providedLabel` with lat/lng guidance
|
51
|
+
#
|
52
|
+
# place = DPLA::MAP::Place.new.tap do |p|
|
53
|
+
# p.providedLabel = 'Georgia'
|
54
|
+
# p.lat = 41.9997
|
55
|
+
# p.long = 43.4998
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# CoarseGeocode.new.enrich_value.dump :ttl
|
59
|
+
# # [
|
60
|
+
# # a <http://www.europeana.eu/schemas/edm/Place>;
|
61
|
+
# # <http://dp.la/about/map/providedLabel> "Georgia";
|
62
|
+
# # <http://www.geonames.org/ontology#countryCode> "GE";
|
63
|
+
# # <http://www.w3.org/2003/01/geo/wgs84_pos#lat> 4.199998e1;
|
64
|
+
# # <http://www.w3.org/2003/01/geo/wgs84_pos#long> 4.34999e1;
|
65
|
+
# # <http://www.w3.org/2004/02/skos/core#exactMatch> <http://sws.geonames.org/614540/>;
|
66
|
+
# # <http://www.w3.org/2004/02/skos/core#prefLabel> "Georgia"
|
67
|
+
# # ] .
|
68
|
+
#
|
69
|
+
class CoarseGeocode
|
70
|
+
include Audumbla::FieldEnrichment
|
71
|
+
|
72
|
+
DEFAULT_DISTANCE_THRESHOLD_KMS = 100
|
73
|
+
DEFAULT_MAX_INTERPRETATIONS = 5
|
74
|
+
DEFAULT_TWOFISHES_HOST = 'localhost'
|
75
|
+
DEFAULT_TWOFISHES_PORT = 8080
|
76
|
+
DEFAULT_TWOFISHES_TIMEOUT = 10
|
77
|
+
DEFAULT_TWOFISHES_RETRIES = 2
|
78
|
+
|
79
|
+
##
|
80
|
+
# @param [String] config_file a path to a config file for the geocoder;
|
81
|
+
# default: 'geocode.yml'
|
82
|
+
def initialize(config_file = 'geocode.yml')
|
83
|
+
config = YAML.load_file(config_file)
|
84
|
+
|
85
|
+
@distance_threshold = config.fetch('distance_threshold',
|
86
|
+
DEFAULT_DISTANCE_THRESHOLD_KMS)
|
87
|
+
@max_interpretations = config.fetch('max_interpretations',
|
88
|
+
DEFAULT_MAX_INTERPRETATIONS)
|
89
|
+
|
90
|
+
Twofishes.configure do |twofish|
|
91
|
+
twofish.host = config.fetch('twofishes_host', DEFAULT_TWOFISHES_HOST)
|
92
|
+
twofish.port = config.fetch('twofishes_port', DEFAULT_TWOFISHES_PORT)
|
93
|
+
twofish.timeout = config.fetch('twofishes_timeout',
|
94
|
+
DEFAULT_TWOFISHES_TIMEOUT)
|
95
|
+
twofish.retries = config.fetch('twofishes_retries',
|
96
|
+
DEFAULT_TWOFISHES_RETRIES)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
##
|
101
|
+
# Enriches the given value against the TwoFishes coarse geocoder. This
|
102
|
+
# process adds a `skos:exactMatch` for a matching GeoNames URI, if any, and
|
103
|
+
# populates the remaining place data to the degree possible from the matched
|
104
|
+
# feature.
|
105
|
+
#
|
106
|
+
# Considers a number of matches specified by `@max_interpretations` and
|
107
|
+
# returned by Twofishes, via `#match?`.
|
108
|
+
#
|
109
|
+
# @param [DPLA::MAP::Place] value the place to geocode
|
110
|
+
#
|
111
|
+
# @return [DPLA::MAP::Place] the inital place, enriched via coarse geocoding
|
112
|
+
def enrich_value(value)
|
113
|
+
return value unless value.is_a? DPLA::MAP::Place
|
114
|
+
interpretations = geocode(value.providedLabel.first,
|
115
|
+
[],
|
116
|
+
maxInterpretations: @max_interpretations)
|
117
|
+
match = interpretations.find { |interp| match?(interp, value) }
|
118
|
+
match.nil? ? value : enrich_place(value, match.feature)
|
119
|
+
end
|
120
|
+
|
121
|
+
##
|
122
|
+
# Checks that we are satisfied with the geocoder's best matches prior to
|
123
|
+
# acceptance. Most tweaks to the geocoding process should be taken care
|
124
|
+
# of at the geocoder itself, but a simple accept/reject of the points
|
125
|
+
# offered is possible here. This allows existing data about the place
|
126
|
+
# to be used as context.
|
127
|
+
#
|
128
|
+
# For example, this method returns false if `place` contains latitude
|
129
|
+
# and longitude, but the candidate match has a geometry far away from those
|
130
|
+
# given. "far away" is defined by `@distance_threshold` from the center of the
|
131
|
+
# candidate feature to the point given by `#lat` and `#long` in `place`.
|
132
|
+
#
|
133
|
+
# @param [GeocodeInterpretation] interpretation a twofishes interpretation
|
134
|
+
# @param [#lat#long] place a place to verify a match against
|
135
|
+
#
|
136
|
+
# @result [Boolean] true if the interpretation is accepted
|
137
|
+
def match?(interpretation, place)
|
138
|
+
return true if place.lat.empty? || place.long.empty?
|
139
|
+
|
140
|
+
point = Geokit::LatLng.new(place.lat.first, place.long.first)
|
141
|
+
if interpretation.geometry.bounds.nil?
|
142
|
+
# measure distance between point centers
|
143
|
+
distance = twofishes_point_to_geokit(interpretation.geometry.center)
|
144
|
+
.distance_to(point, unit: :kms)
|
145
|
+
return distance < @distance_threshold
|
146
|
+
end
|
147
|
+
|
148
|
+
twofishes_bounds_to_geokit(interpretation.geometry.bounds)
|
149
|
+
.contains?(point)
|
150
|
+
end
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
##
|
155
|
+
# Populates a DPLA::MAP::Place with data from a given feature. This
|
156
|
+
# overwrites existing data with the exception of the identity (URI or node
|
157
|
+
# id) and the `providedLabel`. `exactMatch`, `closeMatch`, `label`
|
158
|
+
# (skos:prefLabel)and all other geographic data is replaced.
|
159
|
+
#
|
160
|
+
# @param [DPLA::MAP::Place] place a place to enrich
|
161
|
+
# @param [GeocodeFeature] feature a twofishes feature whose data should be
|
162
|
+
# added to place.
|
163
|
+
#
|
164
|
+
# @return [DPLA::MAP::Place] the original place enriched
|
165
|
+
def enrich_place(place, feature)
|
166
|
+
place.label = feature.display_name
|
167
|
+
place.exactMatch = feature_to_geoname_uris(feature)
|
168
|
+
place.closeMatch = feature_to_close_matches(feature,
|
169
|
+
/^http\:\/\/id\.loc\.gov\/.*/)
|
170
|
+
place.countryCode = feature.cc
|
171
|
+
place.lat = feature.geometry.center.lat
|
172
|
+
place.long = feature.geometry.center.lng
|
173
|
+
|
174
|
+
place
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# Extracts geonameids for the given feature and converts them into URIs
|
179
|
+
#
|
180
|
+
# @param [GeocodeFeature] feature the feature to identify
|
181
|
+
#
|
182
|
+
# @return [Array<RDF::URI>] a list of geoname URIs. Generally, this will only
|
183
|
+
# contain one exactly matching geonameid in URI form.
|
184
|
+
def feature_to_geoname_uris(feature)
|
185
|
+
geoname_ids = feature.ids.select { |id| id.source == :geonameid.to_s }
|
186
|
+
geoname_ids.map { |id| RDF::URI('http://sws.geonames.org') / id.id + '/' }
|
187
|
+
end
|
188
|
+
|
189
|
+
##
|
190
|
+
# Extracts URIs for closely matching terms in other authority or knowledege
|
191
|
+
# organization systems
|
192
|
+
#
|
193
|
+
# @param [GeocodeFeature] feature the feature to identify
|
194
|
+
# @param [Regexp] patterns a splat argument containing any number of
|
195
|
+
# patterns matching
|
196
|
+
#
|
197
|
+
# @return [Array<RDF::URI>] a list of matching ids
|
198
|
+
def feature_to_close_matches(feature, *patterns)
|
199
|
+
union = Regexp.union(patterns)
|
200
|
+
feature.attributes.urls.select { |str| union.match(str) }
|
201
|
+
.map { |id| RDF::URI(id) }
|
202
|
+
end
|
203
|
+
|
204
|
+
##
|
205
|
+
# Sends a geocode request. This is used in lieu of `Twofishes#geocode`,
|
206
|
+
# since that method does not allow passing parameters other than
|
207
|
+
# `responseIncludes`.
|
208
|
+
#
|
209
|
+
# @param [#to_s] location the string to try to match
|
210
|
+
# @param [Array] includes a list of twofishes include constants
|
211
|
+
# @param [Hash<Symbol, #to_s> params property and value pairs for
|
212
|
+
# parameters to pass to the request
|
213
|
+
#
|
214
|
+
# @see Twofishes#geocode
|
215
|
+
# @see Twofishes::Client
|
216
|
+
def geocode(location, includes = [], params = {})
|
217
|
+
client = Twofishes::Client
|
218
|
+
client.send(:handle_response) do
|
219
|
+
request = GeocodeRequest.new(query: location, responseIncludes: includes)
|
220
|
+
params.each { |prop, val| request.send("#{prop}=".to_sym, val) }
|
221
|
+
client.thrift_client.geocode(request)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
private
|
226
|
+
|
227
|
+
##
|
228
|
+
# @param [#lat#long] point a twofishes point to convert to Geokit
|
229
|
+
#
|
230
|
+
# @return [Geokit::LatLng]
|
231
|
+
def twofishes_point_to_geokit(point)
|
232
|
+
Geokit::LatLng.new(point.lat, point.lng)
|
233
|
+
end
|
234
|
+
|
235
|
+
##
|
236
|
+
# @param [#ne#sw] bounds a twofishes bounding box to convert to Geokit
|
237
|
+
#
|
238
|
+
# @return [Geokit::Bounds]
|
239
|
+
def twofishes_bounds_to_geokit(bounds)
|
240
|
+
Geokit::Bounds.new(twofishes_point_to_geokit(bounds.sw),
|
241
|
+
twofishes_point_to_geokit(bounds.ne))
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
@@ -42,7 +42,19 @@ module Audumbla
|
|
42
42
|
return record unless record.respond_to? field
|
43
43
|
values = record.send(field)
|
44
44
|
if field_chain.length == 1
|
45
|
-
new_values = values.map { |v| enrich_value(v) }
|
45
|
+
new_values = values.map { |v| enrich_value(v) }
|
46
|
+
# We call #flatten twice, since under some circumstances it fails on
|
47
|
+
# nested #to_ary calls the first time. This appears to be related to:
|
48
|
+
#
|
49
|
+
# http://yehudakatz.com/2010/01/02/the-craziest-fing-bug-ive-ever-seen/
|
50
|
+
# and
|
51
|
+
# https://bugs.ruby-lang.org/issues/2494
|
52
|
+
begin
|
53
|
+
new_values = new_values.flatten.compact
|
54
|
+
rescue
|
55
|
+
new_values = new_values.flatten.compact
|
56
|
+
end
|
57
|
+
|
46
58
|
record.send("#{field}=".to_sym, new_values)
|
47
59
|
else
|
48
60
|
resources(values).each { |v| enrich_field(v, field_chain[1..-1]) }
|
data/lib/audumbla/version.rb
CHANGED
@@ -0,0 +1,276 @@
|
|
1
|
+
--- !ruby/object:GeocodeResponse
|
2
|
+
interpretations:
|
3
|
+
- !ruby/object:GeocodeInterpretation
|
4
|
+
what: ''
|
5
|
+
where: georgia
|
6
|
+
feature: !ruby/object:GeocodeFeature
|
7
|
+
woeType: 8
|
8
|
+
cc: US
|
9
|
+
geometry: !ruby/object:FeatureGeometry
|
10
|
+
center: !ruby/object:GeocodePoint
|
11
|
+
lat: 32.75042
|
12
|
+
lng: -83.50018
|
13
|
+
bounds: !ruby/object:GeocodeBoundingBox
|
14
|
+
ne: !ruby/object:GeocodePoint
|
15
|
+
lat: 35.000659
|
16
|
+
lng: -80.751429
|
17
|
+
sw: !ruby/object:GeocodePoint
|
18
|
+
lat: 30.355756999999997
|
19
|
+
lng: -85.605165
|
20
|
+
source: usa_adm1.shp
|
21
|
+
name: Georgia
|
22
|
+
displayName: Georgia, United States
|
23
|
+
ids:
|
24
|
+
- !ruby/object:FeatureId
|
25
|
+
source: geonameid
|
26
|
+
id: '4197000'
|
27
|
+
- !ruby/object:FeatureId
|
28
|
+
source: woeid
|
29
|
+
id: '2347569'
|
30
|
+
names:
|
31
|
+
- !ruby/object:FeatureName
|
32
|
+
flags:
|
33
|
+
- 2
|
34
|
+
name: GA
|
35
|
+
lang: abbr
|
36
|
+
- !ruby/object:FeatureName
|
37
|
+
flags:
|
38
|
+
- 16
|
39
|
+
name: State of Georgia
|
40
|
+
lang: en
|
41
|
+
- !ruby/object:FeatureName
|
42
|
+
flags:
|
43
|
+
- 64
|
44
|
+
- 16
|
45
|
+
name: Peach State
|
46
|
+
lang: en
|
47
|
+
- !ruby/object:FeatureName
|
48
|
+
flags:
|
49
|
+
- 128
|
50
|
+
- 16
|
51
|
+
- 1
|
52
|
+
name: Georgia
|
53
|
+
lang: en
|
54
|
+
highlightedName: "<b>Georgia</b>, United States"
|
55
|
+
matchedName: Georgia, United States
|
56
|
+
id: geonameid:4197000
|
57
|
+
attributes: !ruby/object:GeocodeFeatureAttributes
|
58
|
+
adm0cap: false
|
59
|
+
adm1cap: false
|
60
|
+
scalerank: 20
|
61
|
+
labelrank: 0
|
62
|
+
natscale: 0
|
63
|
+
population: 8975842
|
64
|
+
sociallyRelevant: false
|
65
|
+
worldcity: false
|
66
|
+
urls:
|
67
|
+
- http://id.loc.gov/authorities/names/n79023113
|
68
|
+
- http://en.wikipedia.org/wiki/Georgia_(U.S._state)
|
69
|
+
longId: 72057594042124936
|
70
|
+
parentIds:
|
71
|
+
- 72057594044179937
|
72
|
+
- !ruby/object:GeocodeInterpretation
|
73
|
+
what: ''
|
74
|
+
where: georgia
|
75
|
+
feature: !ruby/object:GeocodeFeature
|
76
|
+
woeType: 12
|
77
|
+
cc: GE
|
78
|
+
geometry: !ruby/object:FeatureGeometry
|
79
|
+
center: !ruby/object:GeocodePoint
|
80
|
+
lat: 41.99998
|
81
|
+
lng: 43.4999
|
82
|
+
bounds: !ruby/object:GeocodeBoundingBox
|
83
|
+
ne: !ruby/object:GeocodePoint
|
84
|
+
lat: 43.586627
|
85
|
+
lng: 46.736119
|
86
|
+
sw: !ruby/object:GeocodePoint
|
87
|
+
lat: 41.054942
|
88
|
+
lng: 40.006604
|
89
|
+
source: gn-adm0-new3.json
|
90
|
+
name: Georgia
|
91
|
+
displayName: Georgia
|
92
|
+
ids:
|
93
|
+
- !ruby/object:FeatureId
|
94
|
+
source: geonameid
|
95
|
+
id: '614540'
|
96
|
+
names:
|
97
|
+
- !ruby/object:FeatureName
|
98
|
+
flags:
|
99
|
+
- 2
|
100
|
+
name: GE
|
101
|
+
lang: abbr
|
102
|
+
- !ruby/object:FeatureName
|
103
|
+
flags:
|
104
|
+
- 1024
|
105
|
+
name: Georgian Soviet Socialist Republic
|
106
|
+
lang: en
|
107
|
+
- !ruby/object:FeatureName
|
108
|
+
flags:
|
109
|
+
- 128
|
110
|
+
- 64
|
111
|
+
- 1
|
112
|
+
name: Georgia
|
113
|
+
lang: en
|
114
|
+
highlightedName: "<b>Georgia</b>"
|
115
|
+
matchedName: Georgia
|
116
|
+
id: geonameid:614540
|
117
|
+
attributes: !ruby/object:GeocodeFeatureAttributes
|
118
|
+
adm0cap: false
|
119
|
+
adm1cap: false
|
120
|
+
scalerank: 20
|
121
|
+
labelrank: 0
|
122
|
+
natscale: 0
|
123
|
+
population: 4630000
|
124
|
+
sociallyRelevant: false
|
125
|
+
worldcity: false
|
126
|
+
urls:
|
127
|
+
- http://ru.wikipedia.org/wiki/%D0%93%D1%80%D1%83%D0%B7%D0%B8%D1%8F
|
128
|
+
- http://en.wikipedia.org/wiki/Georgia_%28country%29
|
129
|
+
longId: 72057594038542476
|
130
|
+
parentIds:
|
131
|
+
- 72057594044183083
|
132
|
+
longIds:
|
133
|
+
- 72057594038542363
|
134
|
+
- !ruby/object:GeocodeInterpretation
|
135
|
+
what: ''
|
136
|
+
where: georgia
|
137
|
+
feature: !ruby/object:GeocodeFeature
|
138
|
+
woeType: 10
|
139
|
+
cc: US
|
140
|
+
geometry: !ruby/object:FeatureGeometry
|
141
|
+
center: !ruby/object:GeocodePoint
|
142
|
+
lat: 44.72824
|
143
|
+
lng: -73.12763
|
144
|
+
name: Town of Georgia
|
145
|
+
displayName: Town of Georgia, VT, United States
|
146
|
+
ids:
|
147
|
+
- !ruby/object:FeatureId
|
148
|
+
source: geonameid
|
149
|
+
id: '5236379'
|
150
|
+
- !ruby/object:FeatureId
|
151
|
+
source: woeid
|
152
|
+
id: '2409718'
|
153
|
+
names:
|
154
|
+
- !ruby/object:FeatureName
|
155
|
+
flags:
|
156
|
+
- 16
|
157
|
+
- 1
|
158
|
+
name: Town of Georgia
|
159
|
+
lang: en
|
160
|
+
- !ruby/object:FeatureName
|
161
|
+
flags:
|
162
|
+
- 16
|
163
|
+
- 8
|
164
|
+
- 1
|
165
|
+
name: Georgia
|
166
|
+
lang: en
|
167
|
+
highlightedName: "<b>Georgia</b>, VT, United States"
|
168
|
+
matchedName: Georgia, VT, United States
|
169
|
+
id: geonameid:5236379
|
170
|
+
attributes: !ruby/object:GeocodeFeatureAttributes
|
171
|
+
adm0cap: false
|
172
|
+
adm1cap: false
|
173
|
+
scalerank: 20
|
174
|
+
labelrank: 0
|
175
|
+
natscale: 0
|
176
|
+
population: 0
|
177
|
+
sociallyRelevant: false
|
178
|
+
worldcity: false
|
179
|
+
urls: []
|
180
|
+
longId: 72057594043164315
|
181
|
+
parentIds:
|
182
|
+
- 72057594044179937
|
183
|
+
- 72057594043170219
|
184
|
+
- 72057594043164215
|
185
|
+
- !ruby/object:GeocodeInterpretation
|
186
|
+
what: ''
|
187
|
+
where: georgia
|
188
|
+
feature: !ruby/object:GeocodeFeature
|
189
|
+
woeType: 7
|
190
|
+
cc: US
|
191
|
+
geometry: !ruby/object:FeatureGeometry
|
192
|
+
center: !ruby/object:GeocodePoint
|
193
|
+
lat: 40.18733
|
194
|
+
lng: -74.28459
|
195
|
+
bounds: !ruby/object:GeocodeBoundingBox
|
196
|
+
ne: !ruby/object:GeocodePoint
|
197
|
+
lat: 40.1990013123
|
198
|
+
lng: -74.2533340454
|
199
|
+
sw: !ruby/object:GeocodePoint
|
200
|
+
lat: 40.1450004578
|
201
|
+
lng: -74.3127212524
|
202
|
+
name: Georgia
|
203
|
+
displayName: Georgia, NJ, United States
|
204
|
+
ids:
|
205
|
+
- !ruby/object:FeatureId
|
206
|
+
source: geonameid
|
207
|
+
id: '5098392'
|
208
|
+
- !ruby/object:FeatureId
|
209
|
+
source: woeid
|
210
|
+
id: '2409714'
|
211
|
+
names:
|
212
|
+
- !ruby/object:FeatureName
|
213
|
+
flags:
|
214
|
+
- 16
|
215
|
+
- 1
|
216
|
+
name: Georgia
|
217
|
+
lang: en
|
218
|
+
highlightedName: "<b>Georgia</b>, NJ, United States"
|
219
|
+
matchedName: Georgia, NJ, United States
|
220
|
+
id: geonameid:5098392
|
221
|
+
attributes: !ruby/object:GeocodeFeatureAttributes
|
222
|
+
adm0cap: false
|
223
|
+
adm1cap: false
|
224
|
+
scalerank: 20
|
225
|
+
labelrank: 0
|
226
|
+
natscale: 0
|
227
|
+
population: 0
|
228
|
+
sociallyRelevant: false
|
229
|
+
worldcity: false
|
230
|
+
urls:
|
231
|
+
- http://en.wikipedia.org/wiki/Georgia%2C_New_Jersey
|
232
|
+
longId: 72057594043026328
|
233
|
+
parentIds:
|
234
|
+
- 72057594044179937
|
235
|
+
- 72057594043029696
|
236
|
+
- 72057594043029241
|
237
|
+
- !ruby/object:GeocodeInterpretation
|
238
|
+
what: ''
|
239
|
+
where: georgia
|
240
|
+
feature: !ruby/object:GeocodeFeature
|
241
|
+
woeType: 0
|
242
|
+
cc: CM
|
243
|
+
geometry: !ruby/object:FeatureGeometry
|
244
|
+
center: !ruby/object:GeocodePoint
|
245
|
+
lat: 6.6
|
246
|
+
lng: 14.01667
|
247
|
+
name: Gorgia
|
248
|
+
displayName: Gorgia, Cameroon
|
249
|
+
ids:
|
250
|
+
- !ruby/object:FeatureId
|
251
|
+
source: geonameid
|
252
|
+
id: '2231063'
|
253
|
+
names:
|
254
|
+
- !ruby/object:FeatureName
|
255
|
+
flags:
|
256
|
+
- 16
|
257
|
+
- 1
|
258
|
+
name: Gorgia
|
259
|
+
lang: en
|
260
|
+
highlightedName: "<b>Georgia</b>, Cameroon"
|
261
|
+
matchedName: Georgia, Cameroon
|
262
|
+
id: geonameid:2231063
|
263
|
+
attributes: !ruby/object:GeocodeFeatureAttributes
|
264
|
+
adm0cap: false
|
265
|
+
adm1cap: false
|
266
|
+
scalerank: 20
|
267
|
+
labelrank: 0
|
268
|
+
natscale: 0
|
269
|
+
population: 0
|
270
|
+
sociallyRelevant: false
|
271
|
+
worldcity: false
|
272
|
+
urls: []
|
273
|
+
longId: 72057594040158999
|
274
|
+
parentIds:
|
275
|
+
- 72057594040161323
|
276
|
+
- 72057594040163951
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Audumbla::Enrichments::CoarseGeocode do
|
4
|
+
it_behaves_like 'a field enrichment'
|
5
|
+
|
6
|
+
before do
|
7
|
+
allow(Twofishes::Client)
|
8
|
+
.to receive(:handle_response)
|
9
|
+
.and_return(Twofishes::Result.from_response(georgia_response))
|
10
|
+
end
|
11
|
+
|
12
|
+
let(:georgia_response) { YAML::load_file('spec/fixtures/georgia.yml') }
|
13
|
+
|
14
|
+
describe '#enrich_value' do
|
15
|
+
let(:place) do
|
16
|
+
build(:place,
|
17
|
+
providedLabel: 'georgia',
|
18
|
+
label: nil,
|
19
|
+
exactMatch: nil,
|
20
|
+
countryCode: nil,
|
21
|
+
parentFeature: nil,
|
22
|
+
lat: nil,
|
23
|
+
long: nil,
|
24
|
+
alt: nil)
|
25
|
+
end
|
26
|
+
|
27
|
+
let(:prefLabel) { 'Georgia, United States' }
|
28
|
+
let(:geoname_uri) { RDF::URI('http://sws.geonames.org/4197000/') }
|
29
|
+
let(:country_code) { 'US' }
|
30
|
+
let(:lat) { 32.75042 }
|
31
|
+
let(:lng) { -83.50018 }
|
32
|
+
let(:lcname_uri) do
|
33
|
+
RDF::URI('http://id.loc.gov/authorities/names/n79023113')
|
34
|
+
end
|
35
|
+
|
36
|
+
describe '#enrich_value' do
|
37
|
+
it 'returns the same place entity' do
|
38
|
+
expect(subject.enrich_value(place)).to eq place
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'retains providedLabel' do
|
42
|
+
expect(subject.enrich_value(place))
|
43
|
+
.to have_attributes(providedLabel: contain_exactly('georgia'))
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'it gives the geoname as skos:exactMatch' do
|
47
|
+
expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
|
48
|
+
.to contain_exactly(geoname_uri)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'adds LC closeMatches, if appropriate' do
|
52
|
+
expect(subject.enrich_value(place).closeMatch.map(&:rdf_subject))
|
53
|
+
.to contain_exactly(lcname_uri)
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'enriches place with new data' do
|
57
|
+
expect(subject.enrich_value(place))
|
58
|
+
.to have_attributes(
|
59
|
+
label: contain_exactly(prefLabel),
|
60
|
+
countryCode: contain_exactly(country_code),
|
61
|
+
lat: contain_exactly(be_within(0.01).of(lat)),
|
62
|
+
long: contain_exactly(be_within(0.01).of(lng))
|
63
|
+
)
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'with lat/lng' do
|
67
|
+
context 'and label' do
|
68
|
+
let(:place) do
|
69
|
+
build(:place,
|
70
|
+
providedLabel: 'georgia',
|
71
|
+
label: nil,
|
72
|
+
exactMatch: nil,
|
73
|
+
countryCode: nil,
|
74
|
+
parentFeature: nil,
|
75
|
+
lat: lat,
|
76
|
+
long: lng,
|
77
|
+
alt: nil)
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'gives result matching lat/lng' do
|
81
|
+
expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
|
82
|
+
.to contain_exactly(geoname_uri)
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'skips result not matching lat/lng' do
|
86
|
+
place.lat = 41.9997
|
87
|
+
place.long = 43.4998
|
88
|
+
|
89
|
+
georgia_country_uri = RDF::URI('http://sws.geonames.org/614540/')
|
90
|
+
|
91
|
+
# points are in bounding box for Georgia but not equal to center
|
92
|
+
expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
|
93
|
+
.to contain_exactly(georgia_country_uri)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'selects no match if none match lat/lng' do
|
97
|
+
place.lat = 41.9997
|
98
|
+
place.long = -43.4998
|
99
|
+
expect(subject.enrich_value(place).exactMatch).to be_empty
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: audumbla
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Audrey Altman
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2015-
|
14
|
+
date: 2015-07-22 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: dpla-map
|
@@ -27,6 +27,34 @@ dependencies:
|
|
27
27
|
- - "~>"
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 4.0.0.0.pre.10
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: twofishes
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
type: :runtime
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
- !ruby/object:Gem::Dependency
|
45
|
+
name: geokit
|
46
|
+
requirement: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
type: :runtime
|
52
|
+
prerelease: false
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
30
58
|
- !ruby/object:Gem::Dependency
|
31
59
|
name: rspec
|
32
60
|
requirement: !ruby/object:Gem::Requirement
|
@@ -41,6 +69,20 @@ dependencies:
|
|
41
69
|
- - "~>"
|
42
70
|
- !ruby/object:Gem::Version
|
43
71
|
version: '3.0'
|
72
|
+
- !ruby/object:Gem::Dependency
|
73
|
+
name: webmock
|
74
|
+
requirement: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
type: :development
|
80
|
+
prerelease: false
|
81
|
+
version_requirements: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
44
86
|
- !ruby/object:Gem::Dependency
|
45
87
|
name: pry
|
46
88
|
requirement: !ruby/object:Gem::Requirement
|
@@ -69,15 +111,17 @@ files:
|
|
69
111
|
- lib/audumbla/enrichment.rb~
|
70
112
|
- lib/audumbla/enrichments.rb
|
71
113
|
- lib/audumbla/enrichments.rb~
|
72
|
-
- lib/audumbla/enrichments/
|
114
|
+
- lib/audumbla/enrichments/coarse_geocode.rb
|
73
115
|
- lib/audumbla/enrichments/version.rb~
|
74
116
|
- lib/audumbla/field_enrichment.rb
|
75
117
|
- lib/audumbla/field_enrichment.rb~
|
76
118
|
- lib/audumbla/spec/enrichment.rb
|
77
119
|
- lib/audumbla/version.rb
|
78
120
|
- lib/audumbla/version.rb~
|
121
|
+
- spec/fixtures/georgia.yml
|
79
122
|
- spec/lib/audumbla/enrichment_spec.rb
|
80
123
|
- spec/lib/audumbla/enrichment_spec.rb~
|
124
|
+
- spec/lib/audumbla/enrichments/coarse_geocode_spec.rb
|
81
125
|
- spec/lib/audumbla/enrichments/geocode_spec.rb~
|
82
126
|
- spec/lib/audumbla/field_enrichment_spec.rb
|
83
127
|
- spec/lib/audumbla/field_enrichment_spec.rb~
|
@@ -103,7 +147,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
103
147
|
version: '0'
|
104
148
|
requirements: []
|
105
149
|
rubyforge_project:
|
106
|
-
rubygems_version: 2.
|
150
|
+
rubygems_version: 2.4.5
|
107
151
|
signing_key:
|
108
152
|
specification_version: 4
|
109
153
|
summary: A toolkit for enhancement of RDF Metadata
|
@@ -112,7 +156,8 @@ test_files:
|
|
112
156
|
- spec/lib/audumbla/field_enrichment_spec.rb
|
113
157
|
- spec/lib/audumbla/field_enrichment_spec.rb~
|
114
158
|
- spec/lib/audumbla/enrichment_spec.rb~
|
159
|
+
- spec/lib/audumbla/enrichments/coarse_geocode_spec.rb
|
115
160
|
- spec/lib/audumbla/enrichments/geocode_spec.rb~
|
116
161
|
- spec/spec_helper.rb
|
162
|
+
- spec/fixtures/georgia.yml
|
117
163
|
- spec/spec_helper.rb~
|
118
|
-
has_rdoc:
|