audumbla 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 85d96776f960b63fce1a6e0533f8e53d9afe9ae8
4
- data.tar.gz: c9fa438ae3c3ff6e6ae06395f188b18872b44a6b
3
+ metadata.gz: 100602e369b80c14118de38514d6273bad179cde
4
+ data.tar.gz: 4e2e0efbf018f7d632d460b9727f691578016a90
5
5
  SHA512:
6
- metadata.gz: f409960eca0091c5405aecbdafb6dab5d1eaf1e3383ac23c3ac32a40a38b24210ec57f8456be9d4f949498537d4e2bc4b678551c836f121f8c255ddc9b627571
7
- data.tar.gz: d0d5b93cad7e668c0faae99ad5dca370baa9d3e15d7d22becb917aa62acfb438428bd66b9f9ab29c79aececd376950b52b244ce26c0b5c623a6f018685626d66
6
+ metadata.gz: 2ad72a4e8bb400e99d79e74c2fff277ec80d55cbed4ffb3771641e8f0bba56a2c20e8483201cdda17a16738fb389e04299ce925e40dcc384db73183c05b6632f
7
+ data.tar.gz: 34352f392ae17552d14511d630df1d50cdccec42d98a6193da0f5267a4e047855c033a9592ad181d1105d5008877449d1c521c306995736d93b24d7df3fc3aa6
@@ -1,4 +1,8 @@
1
1
  module Audumbla
2
2
  autoload :Enrichment, 'audumbla/enrichment'
3
3
  autoload :FieldEnrichment, 'audumbla/field_enrichment'
4
+
5
+ module Enrichments
6
+ autoload :CoarseGeocode, 'audumbla/enrichments/coarse_geocode'
7
+ end
4
8
  end
@@ -1,4 +1 @@
1
- module Audumbla
2
- module Enrichments
3
- end
4
- end
1
+
@@ -0,0 +1,244 @@
1
+ require 'twofishes'
2
+ require 'geokit'
3
+ require 'yaml'
4
+
5
+ module Audumbla::Enrichments
6
+ ##
7
+ # Enriches a `DPLA::MAP::Place` node by running its data through external
8
+ # geocoders, using heuristics to determine a matching feature from GeoNames,
9
+ # and repopulating the `Place` with related data.
10
+ #
11
+ # If the existing `Place` contains data other than a `providedLabel`, that
12
+ # data will be used as context for evaluating interpretations. For example:
13
+ # a `Place` with an existing latitude and longitude will verify that the
14
+ # point is within the bounding box for a candidate match.
15
+ #
16
+ # `skos:exactMatch` are reserved for the GeoNames features returned by the
17
+ # geocoder. Other matching URIs (currently: LC authorities) are included as
18
+ # `skos:closeMatch`
19
+ #
20
+ # Configuration is handled through a YAML file passed into the initializer
21
+ # (default: 'geocode.yml'). The options are:
22
+ # - 'twofishes_host': the hostname for the twofishes server (default:
23
+ # 'localhost')
24
+ # - 'twofishes_port': the port of the twofishes geocode endpoint (default:
25
+ # 8080)
26
+ # - 'twofishes_timeout': request timeout in seconds (default: 3)
27
+ # - 'twofishes_retries': request retry maximum for twofishes (default: 2)
28
+ # - 'distance_threshold': the maximum distance between a set of coordinates
29
+ # in the input object and a candidate match before we judge it a
30
+ # false positive, given in kilometers. (default: 5)
31
+ # - 'max_intepretations': the number of geocoded "interpretations" to
32
+ # request from the server; these are the places that will be considered
33
+ # by the internal heuristics (defualt: 5).
34
+ #
35
+ # @example enriching from a `#providedLabel`
36
+ #
37
+ # place = DPLA::MAP::Place.new.tap { |p| p.providedLabel = 'Georgia' }
38
+ # CoarseGeocode.new.enrich_value.dump :ttl
39
+ # # [
40
+ # # a <http://www.europeana.eu/schemas/edm/Place>;
41
+ # # <http://dp.la/about/map/providedLabel> "Georgia";
42
+ # # <http://www.geonames.org/ontology#countryCode> "US";
43
+ # # <http://www.w3.org/2003/01/geo/wgs84_pos#lat> 3.275042e1;
44
+ # # <http://www.w3.org/2003/01/geo/wgs84_pos#long> -8.350018e1;
45
+ # # <http://www.w3.org/2004/02/skos/core#closeMatch> <http://id.loc.gov/authorities/names/n79023113>;
46
+ # # <http://www.w3.org/2004/02/skos/core#exactMatch> <http://sws.geonames.org/4197000/>;
47
+ # # <http://www.w3.org/2004/02/skos/core#prefLabel> "Georgia, United States"
48
+ # # ] .
49
+ #
50
+ # @example enriching from a `#providedLabel` with lat/lng guidance
51
+ #
52
+ # place = DPLA::MAP::Place.new.tap do |p|
53
+ # p.providedLabel = 'Georgia'
54
+ # p.lat = 41.9997
55
+ # p.long = 43.4998
56
+ # end
57
+ #
58
+ # CoarseGeocode.new.enrich_value.dump :ttl
59
+ # # [
60
+ # # a <http://www.europeana.eu/schemas/edm/Place>;
61
+ # # <http://dp.la/about/map/providedLabel> "Georgia";
62
+ # # <http://www.geonames.org/ontology#countryCode> "GE";
63
+ # # <http://www.w3.org/2003/01/geo/wgs84_pos#lat> 4.199998e1;
64
+ # # <http://www.w3.org/2003/01/geo/wgs84_pos#long> 4.34999e1;
65
+ # # <http://www.w3.org/2004/02/skos/core#exactMatch> <http://sws.geonames.org/614540/>;
66
+ # # <http://www.w3.org/2004/02/skos/core#prefLabel> "Georgia"
67
+ # # ] .
68
+ #
69
+ class CoarseGeocode
70
+ include Audumbla::FieldEnrichment
71
+
72
+ DEFAULT_DISTANCE_THRESHOLD_KMS = 100
73
+ DEFAULT_MAX_INTERPRETATIONS = 5
74
+ DEFAULT_TWOFISHES_HOST = 'localhost'
75
+ DEFAULT_TWOFISHES_PORT = 8080
76
+ DEFAULT_TWOFISHES_TIMEOUT = 10
77
+ DEFAULT_TWOFISHES_RETRIES = 2
78
+
79
+ ##
80
+ # @param [String] config_file a path to a config file for the geocoder;
81
+ # default: 'geocode.yml'
82
+ def initialize(config_file = 'geocode.yml')
83
+ config = YAML.load_file(config_file)
84
+
85
+ @distance_threshold = config.fetch('distance_threshold',
86
+ DEFAULT_DISTANCE_THRESHOLD_KMS)
87
+ @max_interpretations = config.fetch('max_interpretations',
88
+ DEFAULT_MAX_INTERPRETATIONS)
89
+
90
+ Twofishes.configure do |twofish|
91
+ twofish.host = config.fetch('twofishes_host', DEFAULT_TWOFISHES_HOST)
92
+ twofish.port = config.fetch('twofishes_port', DEFAULT_TWOFISHES_PORT)
93
+ twofish.timeout = config.fetch('twofishes_timeout',
94
+ DEFAULT_TWOFISHES_TIMEOUT)
95
+ twofish.retries = config.fetch('twofishes_retries',
96
+ DEFAULT_TWOFISHES_RETRIES)
97
+ end
98
+ end
99
+
100
+ ##
101
+ # Enriches the given value against the TwoFishes coarse geocoder. This
102
+ # process adds a `skos:exactMatch` for a matching GeoNames URI, if any, and
103
+ # populates the remaining place data to the degree possible from the matched
104
+ # feature.
105
+ #
106
+ # Considers a number of matches specified by `@max_interpretations` and
107
+ # returned by Twofishes, via `#match?`.
108
+ #
109
+ # @param [DPLA::MAP::Place] value the place to geocode
110
+ #
111
+ # @return [DPLA::MAP::Place] the inital place, enriched via coarse geocoding
112
+ def enrich_value(value)
113
+ return value unless value.is_a? DPLA::MAP::Place
114
+ interpretations = geocode(value.providedLabel.first,
115
+ [],
116
+ maxInterpretations: @max_interpretations)
117
+ match = interpretations.find { |interp| match?(interp, value) }
118
+ match.nil? ? value : enrich_place(value, match.feature)
119
+ end
120
+
121
+ ##
122
+ # Checks that we are satisfied with the geocoder's best matches prior to
123
+ # acceptance. Most tweaks to the geocoding process should be taken care
124
+ # of at the geocoder itself, but a simple accept/reject of the points
125
+ # offered is possible here. This allows existing data about the place
126
+ # to be used as context.
127
+ #
128
+ # For example, this method returns false if `place` contains latitude
129
+ # and longitude, but the candidate match has a geometry far away from those
130
+ # given. "far away" is defined by `@distance_threshold` from the center of the
131
+ # candidate feature to the point given by `#lat` and `#long` in `place`.
132
+ #
133
+ # @param [GeocodeInterpretation] interpretation a twofishes interpretation
134
+ # @param [#lat#long] place a place to verify a match against
135
+ #
136
+ # @result [Boolean] true if the interpretation is accepted
137
+ def match?(interpretation, place)
138
+ return true if place.lat.empty? || place.long.empty?
139
+
140
+ point = Geokit::LatLng.new(place.lat.first, place.long.first)
141
+ if interpretation.geometry.bounds.nil?
142
+ # measure distance between point centers
143
+ distance = twofishes_point_to_geokit(interpretation.geometry.center)
144
+ .distance_to(point, unit: :kms)
145
+ return distance < @distance_threshold
146
+ end
147
+
148
+ twofishes_bounds_to_geokit(interpretation.geometry.bounds)
149
+ .contains?(point)
150
+ end
151
+
152
+ private
153
+
154
+ ##
155
+ # Populates a DPLA::MAP::Place with data from a given feature. This
156
+ # overwrites existing data with the exception of the identity (URI or node
157
+ # id) and the `providedLabel`. `exactMatch`, `closeMatch`, `label`
158
+ # (skos:prefLabel)and all other geographic data is replaced.
159
+ #
160
+ # @param [DPLA::MAP::Place] place a place to enrich
161
+ # @param [GeocodeFeature] feature a twofishes feature whose data should be
162
+ # added to place.
163
+ #
164
+ # @return [DPLA::MAP::Place] the original place enriched
165
+ def enrich_place(place, feature)
166
+ place.label = feature.display_name
167
+ place.exactMatch = feature_to_geoname_uris(feature)
168
+ place.closeMatch = feature_to_close_matches(feature,
169
+ /^http\:\/\/id\.loc\.gov\/.*/)
170
+ place.countryCode = feature.cc
171
+ place.lat = feature.geometry.center.lat
172
+ place.long = feature.geometry.center.lng
173
+
174
+ place
175
+ end
176
+
177
+ ##
178
+ # Extracts geonameids for the given feature and converts them into URIs
179
+ #
180
+ # @param [GeocodeFeature] feature the feature to identify
181
+ #
182
+ # @return [Array<RDF::URI>] a list of geoname URIs. Generally, this will only
183
+ # contain one exactly matching geonameid in URI form.
184
+ def feature_to_geoname_uris(feature)
185
+ geoname_ids = feature.ids.select { |id| id.source == :geonameid.to_s }
186
+ geoname_ids.map { |id| RDF::URI('http://sws.geonames.org') / id.id + '/' }
187
+ end
188
+
189
+ ##
190
+ # Extracts URIs for closely matching terms in other authority or knowledege
191
+ # organization systems
192
+ #
193
+ # @param [GeocodeFeature] feature the feature to identify
194
+ # @param [Regexp] patterns a splat argument containing any number of
195
+ # patterns matching
196
+ #
197
+ # @return [Array<RDF::URI>] a list of matching ids
198
+ def feature_to_close_matches(feature, *patterns)
199
+ union = Regexp.union(patterns)
200
+ feature.attributes.urls.select { |str| union.match(str) }
201
+ .map { |id| RDF::URI(id) }
202
+ end
203
+
204
+ ##
205
+ # Sends a geocode request. This is used in lieu of `Twofishes#geocode`,
206
+ # since that method does not allow passing parameters other than
207
+ # `responseIncludes`.
208
+ #
209
+ # @param [#to_s] location the string to try to match
210
+ # @param [Array] includes a list of twofishes include constants
211
+ # @param [Hash<Symbol, #to_s> params property and value pairs for
212
+ # parameters to pass to the request
213
+ #
214
+ # @see Twofishes#geocode
215
+ # @see Twofishes::Client
216
+ def geocode(location, includes = [], params = {})
217
+ client = Twofishes::Client
218
+ client.send(:handle_response) do
219
+ request = GeocodeRequest.new(query: location, responseIncludes: includes)
220
+ params.each { |prop, val| request.send("#{prop}=".to_sym, val) }
221
+ client.thrift_client.geocode(request)
222
+ end
223
+ end
224
+
225
+ private
226
+
227
+ ##
228
+ # @param [#lat#long] point a twofishes point to convert to Geokit
229
+ #
230
+ # @return [Geokit::LatLng]
231
+ def twofishes_point_to_geokit(point)
232
+ Geokit::LatLng.new(point.lat, point.lng)
233
+ end
234
+
235
+ ##
236
+ # @param [#ne#sw] bounds a twofishes bounding box to convert to Geokit
237
+ #
238
+ # @return [Geokit::Bounds]
239
+ def twofishes_bounds_to_geokit(bounds)
240
+ Geokit::Bounds.new(twofishes_point_to_geokit(bounds.sw),
241
+ twofishes_point_to_geokit(bounds.ne))
242
+ end
243
+ end
244
+ end
@@ -42,7 +42,19 @@ module Audumbla
42
42
  return record unless record.respond_to? field
43
43
  values = record.send(field)
44
44
  if field_chain.length == 1
45
- new_values = values.map { |v| enrich_value(v) }.flatten.compact
45
+ new_values = values.map { |v| enrich_value(v) }
46
+ # We call #flatten twice, since under some circumstances it fails on
47
+ # nested #to_ary calls the first time. This appears to be related to:
48
+ #
49
+ # http://yehudakatz.com/2010/01/02/the-craziest-fing-bug-ive-ever-seen/
50
+ # and
51
+ # https://bugs.ruby-lang.org/issues/2494
52
+ begin
53
+ new_values = new_values.flatten.compact
54
+ rescue
55
+ new_values = new_values.flatten.compact
56
+ end
57
+
46
58
  record.send("#{field}=".to_sym, new_values)
47
59
  else
48
60
  resources(values).each { |v| enrich_field(v, field_chain[1..-1]) }
@@ -1,3 +1,3 @@
1
1
  module Audumbla
2
- VERSION = '0.1.0'.freeze
2
+ VERSION = '0.2.0'.freeze
3
3
  end
@@ -0,0 +1,276 @@
1
+ --- !ruby/object:GeocodeResponse
2
+ interpretations:
3
+ - !ruby/object:GeocodeInterpretation
4
+ what: ''
5
+ where: georgia
6
+ feature: !ruby/object:GeocodeFeature
7
+ woeType: 8
8
+ cc: US
9
+ geometry: !ruby/object:FeatureGeometry
10
+ center: !ruby/object:GeocodePoint
11
+ lat: 32.75042
12
+ lng: -83.50018
13
+ bounds: !ruby/object:GeocodeBoundingBox
14
+ ne: !ruby/object:GeocodePoint
15
+ lat: 35.000659
16
+ lng: -80.751429
17
+ sw: !ruby/object:GeocodePoint
18
+ lat: 30.355756999999997
19
+ lng: -85.605165
20
+ source: usa_adm1.shp
21
+ name: Georgia
22
+ displayName: Georgia, United States
23
+ ids:
24
+ - !ruby/object:FeatureId
25
+ source: geonameid
26
+ id: '4197000'
27
+ - !ruby/object:FeatureId
28
+ source: woeid
29
+ id: '2347569'
30
+ names:
31
+ - !ruby/object:FeatureName
32
+ flags:
33
+ - 2
34
+ name: GA
35
+ lang: abbr
36
+ - !ruby/object:FeatureName
37
+ flags:
38
+ - 16
39
+ name: State of Georgia
40
+ lang: en
41
+ - !ruby/object:FeatureName
42
+ flags:
43
+ - 64
44
+ - 16
45
+ name: Peach State
46
+ lang: en
47
+ - !ruby/object:FeatureName
48
+ flags:
49
+ - 128
50
+ - 16
51
+ - 1
52
+ name: Georgia
53
+ lang: en
54
+ highlightedName: "<b>Georgia</b>, United States"
55
+ matchedName: Georgia, United States
56
+ id: geonameid:4197000
57
+ attributes: !ruby/object:GeocodeFeatureAttributes
58
+ adm0cap: false
59
+ adm1cap: false
60
+ scalerank: 20
61
+ labelrank: 0
62
+ natscale: 0
63
+ population: 8975842
64
+ sociallyRelevant: false
65
+ worldcity: false
66
+ urls:
67
+ - http://id.loc.gov/authorities/names/n79023113
68
+ - http://en.wikipedia.org/wiki/Georgia_(U.S._state)
69
+ longId: 72057594042124936
70
+ parentIds:
71
+ - 72057594044179937
72
+ - !ruby/object:GeocodeInterpretation
73
+ what: ''
74
+ where: georgia
75
+ feature: !ruby/object:GeocodeFeature
76
+ woeType: 12
77
+ cc: GE
78
+ geometry: !ruby/object:FeatureGeometry
79
+ center: !ruby/object:GeocodePoint
80
+ lat: 41.99998
81
+ lng: 43.4999
82
+ bounds: !ruby/object:GeocodeBoundingBox
83
+ ne: !ruby/object:GeocodePoint
84
+ lat: 43.586627
85
+ lng: 46.736119
86
+ sw: !ruby/object:GeocodePoint
87
+ lat: 41.054942
88
+ lng: 40.006604
89
+ source: gn-adm0-new3.json
90
+ name: Georgia
91
+ displayName: Georgia
92
+ ids:
93
+ - !ruby/object:FeatureId
94
+ source: geonameid
95
+ id: '614540'
96
+ names:
97
+ - !ruby/object:FeatureName
98
+ flags:
99
+ - 2
100
+ name: GE
101
+ lang: abbr
102
+ - !ruby/object:FeatureName
103
+ flags:
104
+ - 1024
105
+ name: Georgian Soviet Socialist Republic
106
+ lang: en
107
+ - !ruby/object:FeatureName
108
+ flags:
109
+ - 128
110
+ - 64
111
+ - 1
112
+ name: Georgia
113
+ lang: en
114
+ highlightedName: "<b>Georgia</b>"
115
+ matchedName: Georgia
116
+ id: geonameid:614540
117
+ attributes: !ruby/object:GeocodeFeatureAttributes
118
+ adm0cap: false
119
+ adm1cap: false
120
+ scalerank: 20
121
+ labelrank: 0
122
+ natscale: 0
123
+ population: 4630000
124
+ sociallyRelevant: false
125
+ worldcity: false
126
+ urls:
127
+ - http://ru.wikipedia.org/wiki/%D0%93%D1%80%D1%83%D0%B7%D0%B8%D1%8F
128
+ - http://en.wikipedia.org/wiki/Georgia_%28country%29
129
+ longId: 72057594038542476
130
+ parentIds:
131
+ - 72057594044183083
132
+ longIds:
133
+ - 72057594038542363
134
+ - !ruby/object:GeocodeInterpretation
135
+ what: ''
136
+ where: georgia
137
+ feature: !ruby/object:GeocodeFeature
138
+ woeType: 10
139
+ cc: US
140
+ geometry: !ruby/object:FeatureGeometry
141
+ center: !ruby/object:GeocodePoint
142
+ lat: 44.72824
143
+ lng: -73.12763
144
+ name: Town of Georgia
145
+ displayName: Town of Georgia, VT, United States
146
+ ids:
147
+ - !ruby/object:FeatureId
148
+ source: geonameid
149
+ id: '5236379'
150
+ - !ruby/object:FeatureId
151
+ source: woeid
152
+ id: '2409718'
153
+ names:
154
+ - !ruby/object:FeatureName
155
+ flags:
156
+ - 16
157
+ - 1
158
+ name: Town of Georgia
159
+ lang: en
160
+ - !ruby/object:FeatureName
161
+ flags:
162
+ - 16
163
+ - 8
164
+ - 1
165
+ name: Georgia
166
+ lang: en
167
+ highlightedName: "<b>Georgia</b>, VT, United States"
168
+ matchedName: Georgia, VT, United States
169
+ id: geonameid:5236379
170
+ attributes: !ruby/object:GeocodeFeatureAttributes
171
+ adm0cap: false
172
+ adm1cap: false
173
+ scalerank: 20
174
+ labelrank: 0
175
+ natscale: 0
176
+ population: 0
177
+ sociallyRelevant: false
178
+ worldcity: false
179
+ urls: []
180
+ longId: 72057594043164315
181
+ parentIds:
182
+ - 72057594044179937
183
+ - 72057594043170219
184
+ - 72057594043164215
185
+ - !ruby/object:GeocodeInterpretation
186
+ what: ''
187
+ where: georgia
188
+ feature: !ruby/object:GeocodeFeature
189
+ woeType: 7
190
+ cc: US
191
+ geometry: !ruby/object:FeatureGeometry
192
+ center: !ruby/object:GeocodePoint
193
+ lat: 40.18733
194
+ lng: -74.28459
195
+ bounds: !ruby/object:GeocodeBoundingBox
196
+ ne: !ruby/object:GeocodePoint
197
+ lat: 40.1990013123
198
+ lng: -74.2533340454
199
+ sw: !ruby/object:GeocodePoint
200
+ lat: 40.1450004578
201
+ lng: -74.3127212524
202
+ name: Georgia
203
+ displayName: Georgia, NJ, United States
204
+ ids:
205
+ - !ruby/object:FeatureId
206
+ source: geonameid
207
+ id: '5098392'
208
+ - !ruby/object:FeatureId
209
+ source: woeid
210
+ id: '2409714'
211
+ names:
212
+ - !ruby/object:FeatureName
213
+ flags:
214
+ - 16
215
+ - 1
216
+ name: Georgia
217
+ lang: en
218
+ highlightedName: "<b>Georgia</b>, NJ, United States"
219
+ matchedName: Georgia, NJ, United States
220
+ id: geonameid:5098392
221
+ attributes: !ruby/object:GeocodeFeatureAttributes
222
+ adm0cap: false
223
+ adm1cap: false
224
+ scalerank: 20
225
+ labelrank: 0
226
+ natscale: 0
227
+ population: 0
228
+ sociallyRelevant: false
229
+ worldcity: false
230
+ urls:
231
+ - http://en.wikipedia.org/wiki/Georgia%2C_New_Jersey
232
+ longId: 72057594043026328
233
+ parentIds:
234
+ - 72057594044179937
235
+ - 72057594043029696
236
+ - 72057594043029241
237
+ - !ruby/object:GeocodeInterpretation
238
+ what: ''
239
+ where: georgia
240
+ feature: !ruby/object:GeocodeFeature
241
+ woeType: 0
242
+ cc: CM
243
+ geometry: !ruby/object:FeatureGeometry
244
+ center: !ruby/object:GeocodePoint
245
+ lat: 6.6
246
+ lng: 14.01667
247
+ name: Gorgia
248
+ displayName: Gorgia, Cameroon
249
+ ids:
250
+ - !ruby/object:FeatureId
251
+ source: geonameid
252
+ id: '2231063'
253
+ names:
254
+ - !ruby/object:FeatureName
255
+ flags:
256
+ - 16
257
+ - 1
258
+ name: Gorgia
259
+ lang: en
260
+ highlightedName: "<b>Georgia</b>, Cameroon"
261
+ matchedName: Georgia, Cameroon
262
+ id: geonameid:2231063
263
+ attributes: !ruby/object:GeocodeFeatureAttributes
264
+ adm0cap: false
265
+ adm1cap: false
266
+ scalerank: 20
267
+ labelrank: 0
268
+ natscale: 0
269
+ population: 0
270
+ sociallyRelevant: false
271
+ worldcity: false
272
+ urls: []
273
+ longId: 72057594040158999
274
+ parentIds:
275
+ - 72057594040161323
276
+ - 72057594040163951
@@ -0,0 +1,105 @@
1
+ require 'spec_helper'
2
+
3
+ describe Audumbla::Enrichments::CoarseGeocode do
4
+ it_behaves_like 'a field enrichment'
5
+
6
+ before do
7
+ allow(Twofishes::Client)
8
+ .to receive(:handle_response)
9
+ .and_return(Twofishes::Result.from_response(georgia_response))
10
+ end
11
+
12
+ let(:georgia_response) { YAML::load_file('spec/fixtures/georgia.yml') }
13
+
14
+ describe '#enrich_value' do
15
+ let(:place) do
16
+ build(:place,
17
+ providedLabel: 'georgia',
18
+ label: nil,
19
+ exactMatch: nil,
20
+ countryCode: nil,
21
+ parentFeature: nil,
22
+ lat: nil,
23
+ long: nil,
24
+ alt: nil)
25
+ end
26
+
27
+ let(:prefLabel) { 'Georgia, United States' }
28
+ let(:geoname_uri) { RDF::URI('http://sws.geonames.org/4197000/') }
29
+ let(:country_code) { 'US' }
30
+ let(:lat) { 32.75042 }
31
+ let(:lng) { -83.50018 }
32
+ let(:lcname_uri) do
33
+ RDF::URI('http://id.loc.gov/authorities/names/n79023113')
34
+ end
35
+
36
+ describe '#enrich_value' do
37
+ it 'returns the same place entity' do
38
+ expect(subject.enrich_value(place)).to eq place
39
+ end
40
+
41
+ it 'retains providedLabel' do
42
+ expect(subject.enrich_value(place))
43
+ .to have_attributes(providedLabel: contain_exactly('georgia'))
44
+ end
45
+
46
+ it 'it gives the geoname as skos:exactMatch' do
47
+ expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
48
+ .to contain_exactly(geoname_uri)
49
+ end
50
+
51
+ it 'adds LC closeMatches, if appropriate' do
52
+ expect(subject.enrich_value(place).closeMatch.map(&:rdf_subject))
53
+ .to contain_exactly(lcname_uri)
54
+ end
55
+
56
+ it 'enriches place with new data' do
57
+ expect(subject.enrich_value(place))
58
+ .to have_attributes(
59
+ label: contain_exactly(prefLabel),
60
+ countryCode: contain_exactly(country_code),
61
+ lat: contain_exactly(be_within(0.01).of(lat)),
62
+ long: contain_exactly(be_within(0.01).of(lng))
63
+ )
64
+ end
65
+
66
+ context 'with lat/lng' do
67
+ context 'and label' do
68
+ let(:place) do
69
+ build(:place,
70
+ providedLabel: 'georgia',
71
+ label: nil,
72
+ exactMatch: nil,
73
+ countryCode: nil,
74
+ parentFeature: nil,
75
+ lat: lat,
76
+ long: lng,
77
+ alt: nil)
78
+ end
79
+
80
+ it 'gives result matching lat/lng' do
81
+ expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
82
+ .to contain_exactly(geoname_uri)
83
+ end
84
+
85
+ it 'skips result not matching lat/lng' do
86
+ place.lat = 41.9997
87
+ place.long = 43.4998
88
+
89
+ georgia_country_uri = RDF::URI('http://sws.geonames.org/614540/')
90
+
91
+ # points are in bounding box for Georgia but not equal to center
92
+ expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
93
+ .to contain_exactly(georgia_country_uri)
94
+ end
95
+
96
+ it 'selects no match if none match lat/lng' do
97
+ place.lat = 41.9997
98
+ place.long = -43.4998
99
+ expect(subject.enrich_value(place).exactMatch).to be_empty
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: audumbla
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Audrey Altman
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2015-05-29 00:00:00.000000000 Z
14
+ date: 2015-07-22 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: dpla-map
@@ -27,6 +27,34 @@ dependencies:
27
27
  - - "~>"
28
28
  - !ruby/object:Gem::Version
29
29
  version: 4.0.0.0.pre.10
30
+ - !ruby/object:Gem::Dependency
31
+ name: twofishes
32
+ requirement: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ type: :runtime
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ - !ruby/object:Gem::Dependency
45
+ name: geokit
46
+ requirement: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
30
58
  - !ruby/object:Gem::Dependency
31
59
  name: rspec
32
60
  requirement: !ruby/object:Gem::Requirement
@@ -41,6 +69,20 @@ dependencies:
41
69
  - - "~>"
42
70
  - !ruby/object:Gem::Version
43
71
  version: '3.0'
72
+ - !ruby/object:Gem::Dependency
73
+ name: webmock
74
+ requirement: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ type: :development
80
+ prerelease: false
81
+ version_requirements: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
44
86
  - !ruby/object:Gem::Dependency
45
87
  name: pry
46
88
  requirement: !ruby/object:Gem::Requirement
@@ -69,15 +111,17 @@ files:
69
111
  - lib/audumbla/enrichment.rb~
70
112
  - lib/audumbla/enrichments.rb
71
113
  - lib/audumbla/enrichments.rb~
72
- - lib/audumbla/enrichments/geocode.rb~
114
+ - lib/audumbla/enrichments/coarse_geocode.rb
73
115
  - lib/audumbla/enrichments/version.rb~
74
116
  - lib/audumbla/field_enrichment.rb
75
117
  - lib/audumbla/field_enrichment.rb~
76
118
  - lib/audumbla/spec/enrichment.rb
77
119
  - lib/audumbla/version.rb
78
120
  - lib/audumbla/version.rb~
121
+ - spec/fixtures/georgia.yml
79
122
  - spec/lib/audumbla/enrichment_spec.rb
80
123
  - spec/lib/audumbla/enrichment_spec.rb~
124
+ - spec/lib/audumbla/enrichments/coarse_geocode_spec.rb
81
125
  - spec/lib/audumbla/enrichments/geocode_spec.rb~
82
126
  - spec/lib/audumbla/field_enrichment_spec.rb
83
127
  - spec/lib/audumbla/field_enrichment_spec.rb~
@@ -103,7 +147,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
103
147
  version: '0'
104
148
  requirements: []
105
149
  rubyforge_project:
106
- rubygems_version: 2.2.1
150
+ rubygems_version: 2.4.5
107
151
  signing_key:
108
152
  specification_version: 4
109
153
  summary: A toolkit for enhancement of RDF Metadata
@@ -112,7 +156,8 @@ test_files:
112
156
  - spec/lib/audumbla/field_enrichment_spec.rb
113
157
  - spec/lib/audumbla/field_enrichment_spec.rb~
114
158
  - spec/lib/audumbla/enrichment_spec.rb~
159
+ - spec/lib/audumbla/enrichments/coarse_geocode_spec.rb
115
160
  - spec/lib/audumbla/enrichments/geocode_spec.rb~
116
161
  - spec/spec_helper.rb
162
+ - spec/fixtures/georgia.yml
117
163
  - spec/spec_helper.rb~
118
- has_rdoc:
@@ -1,11 +0,0 @@
1
- module Krikri::Enrichments
2
- ##
3
- #
4
- class Geocode
5
- include Krikri::FieldEnrichment
6
-
7
- def enrich_value(value)
8
- value
9
- end
10
- end
11
- end