audumbla 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 85d96776f960b63fce1a6e0533f8e53d9afe9ae8
4
- data.tar.gz: c9fa438ae3c3ff6e6ae06395f188b18872b44a6b
3
+ metadata.gz: 100602e369b80c14118de38514d6273bad179cde
4
+ data.tar.gz: 4e2e0efbf018f7d632d460b9727f691578016a90
5
5
  SHA512:
6
- metadata.gz: f409960eca0091c5405aecbdafb6dab5d1eaf1e3383ac23c3ac32a40a38b24210ec57f8456be9d4f949498537d4e2bc4b678551c836f121f8c255ddc9b627571
7
- data.tar.gz: d0d5b93cad7e668c0faae99ad5dca370baa9d3e15d7d22becb917aa62acfb438428bd66b9f9ab29c79aececd376950b52b244ce26c0b5c623a6f018685626d66
6
+ metadata.gz: 2ad72a4e8bb400e99d79e74c2fff277ec80d55cbed4ffb3771641e8f0bba56a2c20e8483201cdda17a16738fb389e04299ce925e40dcc384db73183c05b6632f
7
+ data.tar.gz: 34352f392ae17552d14511d630df1d50cdccec42d98a6193da0f5267a4e047855c033a9592ad181d1105d5008877449d1c521c306995736d93b24d7df3fc3aa6
@@ -1,4 +1,8 @@
1
1
  module Audumbla
2
2
  autoload :Enrichment, 'audumbla/enrichment'
3
3
  autoload :FieldEnrichment, 'audumbla/field_enrichment'
4
+
5
+ module Enrichments
6
+ autoload :CoarseGeocode, 'audumbla/enrichments/coarse_geocode'
7
+ end
4
8
  end
@@ -1,4 +1 @@
1
- module Audumbla
2
- module Enrichments
3
- end
4
- end
1
+
@@ -0,0 +1,244 @@
1
+ require 'twofishes'
2
+ require 'geokit'
3
+ require 'yaml'
4
+
5
+ module Audumbla::Enrichments
6
+ ##
7
+ # Enriches a `DPLA::MAP::Place` node by running its data through external
8
+ # geocoders, using heuristics to determine a matching feature from GeoNames,
9
+ # and repopulating the `Place` with related data.
10
+ #
11
+ # If the existing `Place` contains data other than a `providedLabel`, that
12
+ # data will be used as context for evaluating interpretations. For example:
13
+ # a `Place` with an existing latitude and longitude will verify that the
14
+ # point is within the bounding box for a candidate match.
15
+ #
16
+ # `skos:exactMatch` are reserved for the GeoNames features returned by the
17
+ # geocoder. Other matching URIs (currently: LC authorities) are included as
18
+ # `skos:closeMatch`
19
+ #
20
+ # Configuration is handled through a YAML file passed into the initializer
21
+ # (default: 'geocode.yml'). The options are:
22
+ # - 'twofishes_host': the hostname for the twofishes server (default:
23
+ # 'localhost')
24
+ # - 'twofishes_port': the port of the twofishes geocode endpoint (default:
25
+ # 8080)
26
+ # - 'twofishes_timeout': request timeout in seconds (default: 3)
27
+ # - 'twofishes_retries': request retry maximum for twofishes (default: 2)
28
+ # - 'distance_threshold': the maximum distance between a set of coordinates
29
+ # in the input object and a candidate match before we judge it a
30
+ # false positive, given in kilometers. (default: 5)
31
+ # - 'max_intepretations': the number of geocoded "interpretations" to
32
+ # request from the server; these are the places that will be considered
33
+ # by the internal heuristics (defualt: 5).
34
+ #
35
+ # @example enriching from a `#providedLabel`
36
+ #
37
+ # place = DPLA::MAP::Place.new.tap { |p| p.providedLabel = 'Georgia' }
38
+ # CoarseGeocode.new.enrich_value.dump :ttl
39
+ # # [
40
+ # # a <http://www.europeana.eu/schemas/edm/Place>;
41
+ # # <http://dp.la/about/map/providedLabel> "Georgia";
42
+ # # <http://www.geonames.org/ontology#countryCode> "US";
43
+ # # <http://www.w3.org/2003/01/geo/wgs84_pos#lat> 3.275042e1;
44
+ # # <http://www.w3.org/2003/01/geo/wgs84_pos#long> -8.350018e1;
45
+ # # <http://www.w3.org/2004/02/skos/core#closeMatch> <http://id.loc.gov/authorities/names/n79023113>;
46
+ # # <http://www.w3.org/2004/02/skos/core#exactMatch> <http://sws.geonames.org/4197000/>;
47
+ # # <http://www.w3.org/2004/02/skos/core#prefLabel> "Georgia, United States"
48
+ # # ] .
49
+ #
50
+ # @example enriching from a `#providedLabel` with lat/lng guidance
51
+ #
52
+ # place = DPLA::MAP::Place.new.tap do |p|
53
+ # p.providedLabel = 'Georgia'
54
+ # p.lat = 41.9997
55
+ # p.long = 43.4998
56
+ # end
57
+ #
58
+ # CoarseGeocode.new.enrich_value.dump :ttl
59
+ # # [
60
+ # # a <http://www.europeana.eu/schemas/edm/Place>;
61
+ # # <http://dp.la/about/map/providedLabel> "Georgia";
62
+ # # <http://www.geonames.org/ontology#countryCode> "GE";
63
+ # # <http://www.w3.org/2003/01/geo/wgs84_pos#lat> 4.199998e1;
64
+ # # <http://www.w3.org/2003/01/geo/wgs84_pos#long> 4.34999e1;
65
+ # # <http://www.w3.org/2004/02/skos/core#exactMatch> <http://sws.geonames.org/614540/>;
66
+ # # <http://www.w3.org/2004/02/skos/core#prefLabel> "Georgia"
67
+ # # ] .
68
+ #
69
+ class CoarseGeocode
70
+ include Audumbla::FieldEnrichment
71
+
72
+ DEFAULT_DISTANCE_THRESHOLD_KMS = 100
73
+ DEFAULT_MAX_INTERPRETATIONS = 5
74
+ DEFAULT_TWOFISHES_HOST = 'localhost'
75
+ DEFAULT_TWOFISHES_PORT = 8080
76
+ DEFAULT_TWOFISHES_TIMEOUT = 10
77
+ DEFAULT_TWOFISHES_RETRIES = 2
78
+
79
+ ##
80
+ # @param [String] config_file a path to a config file for the geocoder;
81
+ # default: 'geocode.yml'
82
+ def initialize(config_file = 'geocode.yml')
83
+ config = YAML.load_file(config_file)
84
+
85
+ @distance_threshold = config.fetch('distance_threshold',
86
+ DEFAULT_DISTANCE_THRESHOLD_KMS)
87
+ @max_interpretations = config.fetch('max_interpretations',
88
+ DEFAULT_MAX_INTERPRETATIONS)
89
+
90
+ Twofishes.configure do |twofish|
91
+ twofish.host = config.fetch('twofishes_host', DEFAULT_TWOFISHES_HOST)
92
+ twofish.port = config.fetch('twofishes_port', DEFAULT_TWOFISHES_PORT)
93
+ twofish.timeout = config.fetch('twofishes_timeout',
94
+ DEFAULT_TWOFISHES_TIMEOUT)
95
+ twofish.retries = config.fetch('twofishes_retries',
96
+ DEFAULT_TWOFISHES_RETRIES)
97
+ end
98
+ end
99
+
100
+ ##
101
+ # Enriches the given value against the TwoFishes coarse geocoder. This
102
+ # process adds a `skos:exactMatch` for a matching GeoNames URI, if any, and
103
+ # populates the remaining place data to the degree possible from the matched
104
+ # feature.
105
+ #
106
+ # Considers a number of matches specified by `@max_interpretations` and
107
+ # returned by Twofishes, via `#match?`.
108
+ #
109
+ # @param [DPLA::MAP::Place] value the place to geocode
110
+ #
111
+ # @return [DPLA::MAP::Place] the inital place, enriched via coarse geocoding
112
+ def enrich_value(value)
113
+ return value unless value.is_a? DPLA::MAP::Place
114
+ interpretations = geocode(value.providedLabel.first,
115
+ [],
116
+ maxInterpretations: @max_interpretations)
117
+ match = interpretations.find { |interp| match?(interp, value) }
118
+ match.nil? ? value : enrich_place(value, match.feature)
119
+ end
120
+
121
+ ##
122
+ # Checks that we are satisfied with the geocoder's best matches prior to
123
+ # acceptance. Most tweaks to the geocoding process should be taken care
124
+ # of at the geocoder itself, but a simple accept/reject of the points
125
+ # offered is possible here. This allows existing data about the place
126
+ # to be used as context.
127
+ #
128
+ # For example, this method returns false if `place` contains latitude
129
+ # and longitude, but the candidate match has a geometry far away from those
130
+ # given. "far away" is defined by `@distance_threshold` from the center of the
131
+ # candidate feature to the point given by `#lat` and `#long` in `place`.
132
+ #
133
+ # @param [GeocodeInterpretation] interpretation a twofishes interpretation
134
+ # @param [#lat#long] place a place to verify a match against
135
+ #
136
+ # @result [Boolean] true if the interpretation is accepted
137
+ def match?(interpretation, place)
138
+ return true if place.lat.empty? || place.long.empty?
139
+
140
+ point = Geokit::LatLng.new(place.lat.first, place.long.first)
141
+ if interpretation.geometry.bounds.nil?
142
+ # measure distance between point centers
143
+ distance = twofishes_point_to_geokit(interpretation.geometry.center)
144
+ .distance_to(point, unit: :kms)
145
+ return distance < @distance_threshold
146
+ end
147
+
148
+ twofishes_bounds_to_geokit(interpretation.geometry.bounds)
149
+ .contains?(point)
150
+ end
151
+
152
+ private
153
+
154
+ ##
155
+ # Populates a DPLA::MAP::Place with data from a given feature. This
156
+ # overwrites existing data with the exception of the identity (URI or node
157
+ # id) and the `providedLabel`. `exactMatch`, `closeMatch`, `label`
158
+ # (skos:prefLabel)and all other geographic data is replaced.
159
+ #
160
+ # @param [DPLA::MAP::Place] place a place to enrich
161
+ # @param [GeocodeFeature] feature a twofishes feature whose data should be
162
+ # added to place.
163
+ #
164
+ # @return [DPLA::MAP::Place] the original place enriched
165
+ def enrich_place(place, feature)
166
+ place.label = feature.display_name
167
+ place.exactMatch = feature_to_geoname_uris(feature)
168
+ place.closeMatch = feature_to_close_matches(feature,
169
+ /^http\:\/\/id\.loc\.gov\/.*/)
170
+ place.countryCode = feature.cc
171
+ place.lat = feature.geometry.center.lat
172
+ place.long = feature.geometry.center.lng
173
+
174
+ place
175
+ end
176
+
177
+ ##
178
+ # Extracts geonameids for the given feature and converts them into URIs
179
+ #
180
+ # @param [GeocodeFeature] feature the feature to identify
181
+ #
182
+ # @return [Array<RDF::URI>] a list of geoname URIs. Generally, this will only
183
+ # contain one exactly matching geonameid in URI form.
184
+ def feature_to_geoname_uris(feature)
185
+ geoname_ids = feature.ids.select { |id| id.source == :geonameid.to_s }
186
+ geoname_ids.map { |id| RDF::URI('http://sws.geonames.org') / id.id + '/' }
187
+ end
188
+
189
+ ##
190
+ # Extracts URIs for closely matching terms in other authority or knowledege
191
+ # organization systems
192
+ #
193
+ # @param [GeocodeFeature] feature the feature to identify
194
+ # @param [Regexp] patterns a splat argument containing any number of
195
+ # patterns matching
196
+ #
197
+ # @return [Array<RDF::URI>] a list of matching ids
198
+ def feature_to_close_matches(feature, *patterns)
199
+ union = Regexp.union(patterns)
200
+ feature.attributes.urls.select { |str| union.match(str) }
201
+ .map { |id| RDF::URI(id) }
202
+ end
203
+
204
+ ##
205
+ # Sends a geocode request. This is used in lieu of `Twofishes#geocode`,
206
+ # since that method does not allow passing parameters other than
207
+ # `responseIncludes`.
208
+ #
209
+ # @param [#to_s] location the string to try to match
210
+ # @param [Array] includes a list of twofishes include constants
211
+ # @param [Hash<Symbol, #to_s> params property and value pairs for
212
+ # parameters to pass to the request
213
+ #
214
+ # @see Twofishes#geocode
215
+ # @see Twofishes::Client
216
+ def geocode(location, includes = [], params = {})
217
+ client = Twofishes::Client
218
+ client.send(:handle_response) do
219
+ request = GeocodeRequest.new(query: location, responseIncludes: includes)
220
+ params.each { |prop, val| request.send("#{prop}=".to_sym, val) }
221
+ client.thrift_client.geocode(request)
222
+ end
223
+ end
224
+
225
+ private
226
+
227
+ ##
228
+ # @param [#lat#long] point a twofishes point to convert to Geokit
229
+ #
230
+ # @return [Geokit::LatLng]
231
+ def twofishes_point_to_geokit(point)
232
+ Geokit::LatLng.new(point.lat, point.lng)
233
+ end
234
+
235
+ ##
236
+ # @param [#ne#sw] bounds a twofishes bounding box to convert to Geokit
237
+ #
238
+ # @return [Geokit::Bounds]
239
+ def twofishes_bounds_to_geokit(bounds)
240
+ Geokit::Bounds.new(twofishes_point_to_geokit(bounds.sw),
241
+ twofishes_point_to_geokit(bounds.ne))
242
+ end
243
+ end
244
+ end
@@ -42,7 +42,19 @@ module Audumbla
42
42
  return record unless record.respond_to? field
43
43
  values = record.send(field)
44
44
  if field_chain.length == 1
45
- new_values = values.map { |v| enrich_value(v) }.flatten.compact
45
+ new_values = values.map { |v| enrich_value(v) }
46
+ # We call #flatten twice, since under some circumstances it fails on
47
+ # nested #to_ary calls the first time. This appears to be related to:
48
+ #
49
+ # http://yehudakatz.com/2010/01/02/the-craziest-fing-bug-ive-ever-seen/
50
+ # and
51
+ # https://bugs.ruby-lang.org/issues/2494
52
+ begin
53
+ new_values = new_values.flatten.compact
54
+ rescue
55
+ new_values = new_values.flatten.compact
56
+ end
57
+
46
58
  record.send("#{field}=".to_sym, new_values)
47
59
  else
48
60
  resources(values).each { |v| enrich_field(v, field_chain[1..-1]) }
@@ -1,3 +1,3 @@
1
1
  module Audumbla
2
- VERSION = '0.1.0'.freeze
2
+ VERSION = '0.2.0'.freeze
3
3
  end
@@ -0,0 +1,276 @@
1
+ --- !ruby/object:GeocodeResponse
2
+ interpretations:
3
+ - !ruby/object:GeocodeInterpretation
4
+ what: ''
5
+ where: georgia
6
+ feature: !ruby/object:GeocodeFeature
7
+ woeType: 8
8
+ cc: US
9
+ geometry: !ruby/object:FeatureGeometry
10
+ center: !ruby/object:GeocodePoint
11
+ lat: 32.75042
12
+ lng: -83.50018
13
+ bounds: !ruby/object:GeocodeBoundingBox
14
+ ne: !ruby/object:GeocodePoint
15
+ lat: 35.000659
16
+ lng: -80.751429
17
+ sw: !ruby/object:GeocodePoint
18
+ lat: 30.355756999999997
19
+ lng: -85.605165
20
+ source: usa_adm1.shp
21
+ name: Georgia
22
+ displayName: Georgia, United States
23
+ ids:
24
+ - !ruby/object:FeatureId
25
+ source: geonameid
26
+ id: '4197000'
27
+ - !ruby/object:FeatureId
28
+ source: woeid
29
+ id: '2347569'
30
+ names:
31
+ - !ruby/object:FeatureName
32
+ flags:
33
+ - 2
34
+ name: GA
35
+ lang: abbr
36
+ - !ruby/object:FeatureName
37
+ flags:
38
+ - 16
39
+ name: State of Georgia
40
+ lang: en
41
+ - !ruby/object:FeatureName
42
+ flags:
43
+ - 64
44
+ - 16
45
+ name: Peach State
46
+ lang: en
47
+ - !ruby/object:FeatureName
48
+ flags:
49
+ - 128
50
+ - 16
51
+ - 1
52
+ name: Georgia
53
+ lang: en
54
+ highlightedName: "<b>Georgia</b>, United States"
55
+ matchedName: Georgia, United States
56
+ id: geonameid:4197000
57
+ attributes: !ruby/object:GeocodeFeatureAttributes
58
+ adm0cap: false
59
+ adm1cap: false
60
+ scalerank: 20
61
+ labelrank: 0
62
+ natscale: 0
63
+ population: 8975842
64
+ sociallyRelevant: false
65
+ worldcity: false
66
+ urls:
67
+ - http://id.loc.gov/authorities/names/n79023113
68
+ - http://en.wikipedia.org/wiki/Georgia_(U.S._state)
69
+ longId: 72057594042124936
70
+ parentIds:
71
+ - 72057594044179937
72
+ - !ruby/object:GeocodeInterpretation
73
+ what: ''
74
+ where: georgia
75
+ feature: !ruby/object:GeocodeFeature
76
+ woeType: 12
77
+ cc: GE
78
+ geometry: !ruby/object:FeatureGeometry
79
+ center: !ruby/object:GeocodePoint
80
+ lat: 41.99998
81
+ lng: 43.4999
82
+ bounds: !ruby/object:GeocodeBoundingBox
83
+ ne: !ruby/object:GeocodePoint
84
+ lat: 43.586627
85
+ lng: 46.736119
86
+ sw: !ruby/object:GeocodePoint
87
+ lat: 41.054942
88
+ lng: 40.006604
89
+ source: gn-adm0-new3.json
90
+ name: Georgia
91
+ displayName: Georgia
92
+ ids:
93
+ - !ruby/object:FeatureId
94
+ source: geonameid
95
+ id: '614540'
96
+ names:
97
+ - !ruby/object:FeatureName
98
+ flags:
99
+ - 2
100
+ name: GE
101
+ lang: abbr
102
+ - !ruby/object:FeatureName
103
+ flags:
104
+ - 1024
105
+ name: Georgian Soviet Socialist Republic
106
+ lang: en
107
+ - !ruby/object:FeatureName
108
+ flags:
109
+ - 128
110
+ - 64
111
+ - 1
112
+ name: Georgia
113
+ lang: en
114
+ highlightedName: "<b>Georgia</b>"
115
+ matchedName: Georgia
116
+ id: geonameid:614540
117
+ attributes: !ruby/object:GeocodeFeatureAttributes
118
+ adm0cap: false
119
+ adm1cap: false
120
+ scalerank: 20
121
+ labelrank: 0
122
+ natscale: 0
123
+ population: 4630000
124
+ sociallyRelevant: false
125
+ worldcity: false
126
+ urls:
127
+ - http://ru.wikipedia.org/wiki/%D0%93%D1%80%D1%83%D0%B7%D0%B8%D1%8F
128
+ - http://en.wikipedia.org/wiki/Georgia_%28country%29
129
+ longId: 72057594038542476
130
+ parentIds:
131
+ - 72057594044183083
132
+ longIds:
133
+ - 72057594038542363
134
+ - !ruby/object:GeocodeInterpretation
135
+ what: ''
136
+ where: georgia
137
+ feature: !ruby/object:GeocodeFeature
138
+ woeType: 10
139
+ cc: US
140
+ geometry: !ruby/object:FeatureGeometry
141
+ center: !ruby/object:GeocodePoint
142
+ lat: 44.72824
143
+ lng: -73.12763
144
+ name: Town of Georgia
145
+ displayName: Town of Georgia, VT, United States
146
+ ids:
147
+ - !ruby/object:FeatureId
148
+ source: geonameid
149
+ id: '5236379'
150
+ - !ruby/object:FeatureId
151
+ source: woeid
152
+ id: '2409718'
153
+ names:
154
+ - !ruby/object:FeatureName
155
+ flags:
156
+ - 16
157
+ - 1
158
+ name: Town of Georgia
159
+ lang: en
160
+ - !ruby/object:FeatureName
161
+ flags:
162
+ - 16
163
+ - 8
164
+ - 1
165
+ name: Georgia
166
+ lang: en
167
+ highlightedName: "<b>Georgia</b>, VT, United States"
168
+ matchedName: Georgia, VT, United States
169
+ id: geonameid:5236379
170
+ attributes: !ruby/object:GeocodeFeatureAttributes
171
+ adm0cap: false
172
+ adm1cap: false
173
+ scalerank: 20
174
+ labelrank: 0
175
+ natscale: 0
176
+ population: 0
177
+ sociallyRelevant: false
178
+ worldcity: false
179
+ urls: []
180
+ longId: 72057594043164315
181
+ parentIds:
182
+ - 72057594044179937
183
+ - 72057594043170219
184
+ - 72057594043164215
185
+ - !ruby/object:GeocodeInterpretation
186
+ what: ''
187
+ where: georgia
188
+ feature: !ruby/object:GeocodeFeature
189
+ woeType: 7
190
+ cc: US
191
+ geometry: !ruby/object:FeatureGeometry
192
+ center: !ruby/object:GeocodePoint
193
+ lat: 40.18733
194
+ lng: -74.28459
195
+ bounds: !ruby/object:GeocodeBoundingBox
196
+ ne: !ruby/object:GeocodePoint
197
+ lat: 40.1990013123
198
+ lng: -74.2533340454
199
+ sw: !ruby/object:GeocodePoint
200
+ lat: 40.1450004578
201
+ lng: -74.3127212524
202
+ name: Georgia
203
+ displayName: Georgia, NJ, United States
204
+ ids:
205
+ - !ruby/object:FeatureId
206
+ source: geonameid
207
+ id: '5098392'
208
+ - !ruby/object:FeatureId
209
+ source: woeid
210
+ id: '2409714'
211
+ names:
212
+ - !ruby/object:FeatureName
213
+ flags:
214
+ - 16
215
+ - 1
216
+ name: Georgia
217
+ lang: en
218
+ highlightedName: "<b>Georgia</b>, NJ, United States"
219
+ matchedName: Georgia, NJ, United States
220
+ id: geonameid:5098392
221
+ attributes: !ruby/object:GeocodeFeatureAttributes
222
+ adm0cap: false
223
+ adm1cap: false
224
+ scalerank: 20
225
+ labelrank: 0
226
+ natscale: 0
227
+ population: 0
228
+ sociallyRelevant: false
229
+ worldcity: false
230
+ urls:
231
+ - http://en.wikipedia.org/wiki/Georgia%2C_New_Jersey
232
+ longId: 72057594043026328
233
+ parentIds:
234
+ - 72057594044179937
235
+ - 72057594043029696
236
+ - 72057594043029241
237
+ - !ruby/object:GeocodeInterpretation
238
+ what: ''
239
+ where: georgia
240
+ feature: !ruby/object:GeocodeFeature
241
+ woeType: 0
242
+ cc: CM
243
+ geometry: !ruby/object:FeatureGeometry
244
+ center: !ruby/object:GeocodePoint
245
+ lat: 6.6
246
+ lng: 14.01667
247
+ name: Gorgia
248
+ displayName: Gorgia, Cameroon
249
+ ids:
250
+ - !ruby/object:FeatureId
251
+ source: geonameid
252
+ id: '2231063'
253
+ names:
254
+ - !ruby/object:FeatureName
255
+ flags:
256
+ - 16
257
+ - 1
258
+ name: Gorgia
259
+ lang: en
260
+ highlightedName: "<b>Georgia</b>, Cameroon"
261
+ matchedName: Georgia, Cameroon
262
+ id: geonameid:2231063
263
+ attributes: !ruby/object:GeocodeFeatureAttributes
264
+ adm0cap: false
265
+ adm1cap: false
266
+ scalerank: 20
267
+ labelrank: 0
268
+ natscale: 0
269
+ population: 0
270
+ sociallyRelevant: false
271
+ worldcity: false
272
+ urls: []
273
+ longId: 72057594040158999
274
+ parentIds:
275
+ - 72057594040161323
276
+ - 72057594040163951
@@ -0,0 +1,105 @@
1
+ require 'spec_helper'
2
+
3
+ describe Audumbla::Enrichments::CoarseGeocode do
4
+ it_behaves_like 'a field enrichment'
5
+
6
+ before do
7
+ allow(Twofishes::Client)
8
+ .to receive(:handle_response)
9
+ .and_return(Twofishes::Result.from_response(georgia_response))
10
+ end
11
+
12
+ let(:georgia_response) { YAML::load_file('spec/fixtures/georgia.yml') }
13
+
14
+ describe '#enrich_value' do
15
+ let(:place) do
16
+ build(:place,
17
+ providedLabel: 'georgia',
18
+ label: nil,
19
+ exactMatch: nil,
20
+ countryCode: nil,
21
+ parentFeature: nil,
22
+ lat: nil,
23
+ long: nil,
24
+ alt: nil)
25
+ end
26
+
27
+ let(:prefLabel) { 'Georgia, United States' }
28
+ let(:geoname_uri) { RDF::URI('http://sws.geonames.org/4197000/') }
29
+ let(:country_code) { 'US' }
30
+ let(:lat) { 32.75042 }
31
+ let(:lng) { -83.50018 }
32
+ let(:lcname_uri) do
33
+ RDF::URI('http://id.loc.gov/authorities/names/n79023113')
34
+ end
35
+
36
+ describe '#enrich_value' do
37
+ it 'returns the same place entity' do
38
+ expect(subject.enrich_value(place)).to eq place
39
+ end
40
+
41
+ it 'retains providedLabel' do
42
+ expect(subject.enrich_value(place))
43
+ .to have_attributes(providedLabel: contain_exactly('georgia'))
44
+ end
45
+
46
+ it 'it gives the geoname as skos:exactMatch' do
47
+ expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
48
+ .to contain_exactly(geoname_uri)
49
+ end
50
+
51
+ it 'adds LC closeMatches, if appropriate' do
52
+ expect(subject.enrich_value(place).closeMatch.map(&:rdf_subject))
53
+ .to contain_exactly(lcname_uri)
54
+ end
55
+
56
+ it 'enriches place with new data' do
57
+ expect(subject.enrich_value(place))
58
+ .to have_attributes(
59
+ label: contain_exactly(prefLabel),
60
+ countryCode: contain_exactly(country_code),
61
+ lat: contain_exactly(be_within(0.01).of(lat)),
62
+ long: contain_exactly(be_within(0.01).of(lng))
63
+ )
64
+ end
65
+
66
+ context 'with lat/lng' do
67
+ context 'and label' do
68
+ let(:place) do
69
+ build(:place,
70
+ providedLabel: 'georgia',
71
+ label: nil,
72
+ exactMatch: nil,
73
+ countryCode: nil,
74
+ parentFeature: nil,
75
+ lat: lat,
76
+ long: lng,
77
+ alt: nil)
78
+ end
79
+
80
+ it 'gives result matching lat/lng' do
81
+ expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
82
+ .to contain_exactly(geoname_uri)
83
+ end
84
+
85
+ it 'skips result not matching lat/lng' do
86
+ place.lat = 41.9997
87
+ place.long = 43.4998
88
+
89
+ georgia_country_uri = RDF::URI('http://sws.geonames.org/614540/')
90
+
91
+ # points are in bounding box for Georgia but not equal to center
92
+ expect(subject.enrich_value(place).exactMatch.map(&:rdf_subject))
93
+ .to contain_exactly(georgia_country_uri)
94
+ end
95
+
96
+ it 'selects no match if none match lat/lng' do
97
+ place.lat = 41.9997
98
+ place.long = -43.4998
99
+ expect(subject.enrich_value(place).exactMatch).to be_empty
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: audumbla
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Audrey Altman
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2015-05-29 00:00:00.000000000 Z
14
+ date: 2015-07-22 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: dpla-map
@@ -27,6 +27,34 @@ dependencies:
27
27
  - - "~>"
28
28
  - !ruby/object:Gem::Version
29
29
  version: 4.0.0.0.pre.10
30
+ - !ruby/object:Gem::Dependency
31
+ name: twofishes
32
+ requirement: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ type: :runtime
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ - !ruby/object:Gem::Dependency
45
+ name: geokit
46
+ requirement: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ type: :runtime
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
30
58
  - !ruby/object:Gem::Dependency
31
59
  name: rspec
32
60
  requirement: !ruby/object:Gem::Requirement
@@ -41,6 +69,20 @@ dependencies:
41
69
  - - "~>"
42
70
  - !ruby/object:Gem::Version
43
71
  version: '3.0'
72
+ - !ruby/object:Gem::Dependency
73
+ name: webmock
74
+ requirement: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ type: :development
80
+ prerelease: false
81
+ version_requirements: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
44
86
  - !ruby/object:Gem::Dependency
45
87
  name: pry
46
88
  requirement: !ruby/object:Gem::Requirement
@@ -69,15 +111,17 @@ files:
69
111
  - lib/audumbla/enrichment.rb~
70
112
  - lib/audumbla/enrichments.rb
71
113
  - lib/audumbla/enrichments.rb~
72
- - lib/audumbla/enrichments/geocode.rb~
114
+ - lib/audumbla/enrichments/coarse_geocode.rb
73
115
  - lib/audumbla/enrichments/version.rb~
74
116
  - lib/audumbla/field_enrichment.rb
75
117
  - lib/audumbla/field_enrichment.rb~
76
118
  - lib/audumbla/spec/enrichment.rb
77
119
  - lib/audumbla/version.rb
78
120
  - lib/audumbla/version.rb~
121
+ - spec/fixtures/georgia.yml
79
122
  - spec/lib/audumbla/enrichment_spec.rb
80
123
  - spec/lib/audumbla/enrichment_spec.rb~
124
+ - spec/lib/audumbla/enrichments/coarse_geocode_spec.rb
81
125
  - spec/lib/audumbla/enrichments/geocode_spec.rb~
82
126
  - spec/lib/audumbla/field_enrichment_spec.rb
83
127
  - spec/lib/audumbla/field_enrichment_spec.rb~
@@ -103,7 +147,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
103
147
  version: '0'
104
148
  requirements: []
105
149
  rubyforge_project:
106
- rubygems_version: 2.2.1
150
+ rubygems_version: 2.4.5
107
151
  signing_key:
108
152
  specification_version: 4
109
153
  summary: A toolkit for enhancement of RDF Metadata
@@ -112,7 +156,8 @@ test_files:
112
156
  - spec/lib/audumbla/field_enrichment_spec.rb
113
157
  - spec/lib/audumbla/field_enrichment_spec.rb~
114
158
  - spec/lib/audumbla/enrichment_spec.rb~
159
+ - spec/lib/audumbla/enrichments/coarse_geocode_spec.rb
115
160
  - spec/lib/audumbla/enrichments/geocode_spec.rb~
116
161
  - spec/spec_helper.rb
162
+ - spec/fixtures/georgia.yml
117
163
  - spec/spec_helper.rb~
118
- has_rdoc:
@@ -1,11 +0,0 @@
1
- module Krikri::Enrichments
2
- ##
3
- #
4
- class Geocode
5
- include Krikri::FieldEnrichment
6
-
7
- def enrich_value(value)
8
- value
9
- end
10
- end
11
- end