geomash 0.2.1 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/geomash.yml.sample +3 -0
- data/lib/geomash/autoexpire_cache_dalli.rb +55 -0
- data/lib/geomash/autoexpire_cache_redis.rb +26 -0
- data/lib/geomash/constants.rb +6 -1
- data/lib/geomash/geonames.rb +19 -14
- data/lib/geomash/parser.rb +48 -18
- data/lib/geomash/standardizer.rb +46 -17
- data/lib/geomash/tgn.rb +274 -217
- data/lib/geomash/version.rb +1 -1
- data/lib/geomash.rb +8 -18
- data/test/geomash_test.rb +58 -4
- data/test/geonames_test.rb +1 -1
- data/test/standardizer_test.rb +37 -0
- metadata +15 -14
data/lib/geomash/version.rb
CHANGED
data/lib/geomash.rb
CHANGED
@@ -5,6 +5,8 @@ module Geomash
|
|
5
5
|
require "geomash/tgn"
|
6
6
|
require "geomash/geonames"
|
7
7
|
require "geomash/town_lookup"
|
8
|
+
require "geomash/autoexpire_cache_dalli"
|
9
|
+
require "geomash/autoexpire_cache_redis"
|
8
10
|
require "geocoder"
|
9
11
|
require "countries"
|
10
12
|
require "unidecoder"
|
@@ -55,25 +57,13 @@ module Geomash
|
|
55
57
|
#FIXME
|
56
58
|
return_hash[:tgn] = Geomash::TGN.tgn_id_from_geo_hash(return_hash)
|
57
59
|
|
58
|
-
if return_hash[:tgn].blank?
|
60
|
+
if return_hash[:tgn].blank? || (return_hash[:tgn][:original_string_differs] && return_hash[:state_part].present?)
|
59
61
|
geo_hash_temp = Geomash::Standardizer.try_with_entered_names(return_hash)
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
geo_hash_temp[:neighborhood_part] = nil
|
66
|
-
geo_hash_temp[:original_string_differs] = true
|
67
|
-
return_hash[:tgn] = Geomash::TGN.tgn_id_from_geo_hash(geo_hash_temp)
|
68
|
-
return_hash[:tgn][:original_string_differs] = true if return_hash[:tgn].present?
|
69
|
-
elsif return_hash[:city_part].present? && return_hash[:tgn].blank?
|
70
|
-
|
71
|
-
geo_hash_temp = return_hash.clone
|
72
|
-
geo_hash_temp[:city_part] = nil
|
73
|
-
geo_hash_temp[:original_string_differs] = true
|
74
|
-
return_hash[:tgn] = Geomash::TGN.tgn_id_from_geo_hash(geo_hash_temp)
|
75
|
-
return_hash[:tgn][:original_string_differs] = true if return_hash[:tgn].present?
|
76
|
-
|
62
|
+
geo_hash_temp[:tgn] = Geomash::TGN.tgn_id_from_geo_hash(geo_hash_temp) if geo_hash_temp.present?
|
63
|
+
if geo_hash_temp.present? && return_hash[:tgn].blank?
|
64
|
+
return_hash[:tgn] = geo_hash_temp[:tgn]
|
65
|
+
elsif geo_hash_temp.present? && geo_hash_temp[:tgn][:parse_depth] > return_hash[:tgn][:parse_depth]
|
66
|
+
return_hash[:tgn] = geo_hash_temp[:tgn]
|
77
67
|
end
|
78
68
|
|
79
69
|
end
|
data/test/geomash_test.rb
CHANGED
@@ -16,9 +16,9 @@ class GeomashTest < ActiveSupport::TestCase
|
|
16
16
|
assert_equal 'France', result[:country_part]
|
17
17
|
assert_equal nil, result[:street_part]
|
18
18
|
assert_equal '7010587', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
19
|
-
assert_equal
|
20
|
-
assert_equal '2987374', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
21
|
-
assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
19
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
20
|
+
#assert_equal '2987374', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
21
|
+
#assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
22
22
|
|
23
23
|
#Slight variation problem with neighborhood: 11. Bezirk (Vienna, Austria)--Biography
|
24
24
|
result = Geomash.parse('15. Bezirk (Rudolfsheim-Fünfhaus, Vienna, Austria)--Exhibitions', true)
|
@@ -43,6 +43,40 @@ class GeomashTest < ActiveSupport::TestCase
|
|
43
43
|
assert_equal '2953481', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
44
44
|
assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
45
45
|
|
46
|
+
result = Geomash.parse('Naroden Etnografski Muzeĭ (Sofia, Bulgaria)--Catalogs', true)
|
47
|
+
assert_equal 'Sofia', result[:city_part]
|
48
|
+
assert_equal 'Sofia', result[:state_part]
|
49
|
+
assert_equal 'Bulgaria', result[:country_part]
|
50
|
+
assert_equal nil, result[:neighborhood_part]
|
51
|
+
assert_equal nil, result[:street_part]
|
52
|
+
assert_equal '7009977', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
53
|
+
assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
54
|
+
#assert_equal '727011', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
55
|
+
#assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
56
|
+
|
57
|
+
result = Geomash.parse('Lettering--United States--History--19th century', true)
|
58
|
+
assert_equal nil, result[:city_part]
|
59
|
+
assert_equal nil, result[:state_part]
|
60
|
+
assert_equal 'United States', result[:country_part]
|
61
|
+
assert_equal nil, result[:neighborhood_part]
|
62
|
+
assert_equal nil, result[:street_part]
|
63
|
+
assert_equal '7012149', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
64
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
65
|
+
assert_equal '6252001', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
66
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
67
|
+
|
68
|
+
result = Geomash.parse('Lettering--United States--History--19th century', true)
|
69
|
+
assert_equal nil, result[:city_part]
|
70
|
+
assert_equal nil, result[:state_part]
|
71
|
+
assert_equal 'United States', result[:country_part]
|
72
|
+
assert_equal nil, result[:neighborhood_part]
|
73
|
+
assert_equal nil, result[:street_part]
|
74
|
+
assert_equal '7012149', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
75
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
76
|
+
assert_equal '6252001', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
77
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
78
|
+
|
79
|
+
|
46
80
|
end
|
47
81
|
|
48
82
|
def test_parse_with_no_flag
|
@@ -126,6 +160,17 @@ class GeomashTest < ActiveSupport::TestCase
|
|
126
160
|
assert_equal nil, result[:street_part]
|
127
161
|
assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
128
162
|
|
163
|
+
#Case of a country with no states
|
164
|
+
result = Geomash.parse('Tokyo, Japan')
|
165
|
+
assert_equal 'Tokyo', result[:city_part]
|
166
|
+
assert_equal nil, result[:state_part]
|
167
|
+
assert_equal 'Japan', result[:country_part]
|
168
|
+
assert_equal nil, result[:neighborhood_part]
|
169
|
+
assert_equal '7004472', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
170
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
171
|
+
assert_equal '1850147', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
172
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
173
|
+
|
129
174
|
#Should find the Michigan Atlanta over the Georgia Atlanta
|
130
175
|
#State part from an API giving me Atlanta????
|
131
176
|
result = Geomash.parse('Atlanta, MI')
|
@@ -138,7 +183,16 @@ class GeomashTest < ActiveSupport::TestCase
|
|
138
183
|
assert_equal '4984500', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
139
184
|
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
140
185
|
|
141
|
-
|
186
|
+
#TODO: This should also likely parse as North Korea as well...
|
187
|
+
result = Geomash.parse('Korea')
|
188
|
+
assert_equal nil, result[:city_part]
|
189
|
+
assert_equal nil, result[:state_part]
|
190
|
+
assert_equal 'South Korea', result[:country_part]
|
191
|
+
assert_equal nil, result[:neighborhood_part]
|
192
|
+
assert_equal '7000299', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
193
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
194
|
+
assert_equal '1835841', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
195
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
142
196
|
|
143
197
|
|
144
198
|
|
data/test/geonames_test.rb
CHANGED
@@ -11,7 +11,7 @@ class GeonamesTest < ActiveSupport::TestCase
|
|
11
11
|
assert_equal '-84.18404', result[:coords][:box][:west]
|
12
12
|
assert_equal '45.01697', result[:coords][:box][:north]
|
13
13
|
assert_equal '-84.11884', result[:coords][:box][:east]
|
14
|
-
assert_equal '44.
|
14
|
+
assert_equal '44.9886', result[:coords][:box][:south]
|
15
15
|
assert_equal 'Atlanta', result[:hier_geo][:ppla2]
|
16
16
|
assert_equal 'Montmorency County', result[:hier_geo][:adm2]
|
17
17
|
assert_equal 'Michigan', result[:hier_geo][:adm1]
|
data/test/standardizer_test.rb
CHANGED
@@ -22,5 +22,42 @@ class ParserTest < ActiveSupport::TestCase
|
|
22
22
|
|
23
23
|
end
|
24
24
|
|
25
|
+
def test_geographic_parser
|
26
|
+
#Nil results... problem cases
|
27
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Yuma Indians')
|
28
|
+
assert_equal '', result
|
29
|
+
|
30
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Yuma Indians--Woodworking')
|
31
|
+
assert_equal '', result
|
32
|
+
|
33
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Norway maple')
|
34
|
+
assert_equal '', result
|
35
|
+
|
36
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Some Value--German Engineering--Cars')
|
37
|
+
assert_equal '', result
|
38
|
+
|
39
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Art, Japanese')
|
40
|
+
assert_equal '', result
|
41
|
+
|
42
|
+
#Normal cases
|
43
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass')
|
44
|
+
assert_equal 'Palmer, Mass', result
|
45
|
+
|
46
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Naroden Etnografski Muzeĭ (Sofia, Bulgaria)--Catalogs')
|
47
|
+
assert_equal 'Naroden Etnografski Muzeĭ, Sofia, Bulgaria', result
|
48
|
+
|
49
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Germany')
|
50
|
+
assert_equal 'Germany', result
|
51
|
+
|
52
|
+
result = Geomash::Standardizer.parse_for_geographic_term('United States')
|
53
|
+
assert_equal 'United States', result
|
54
|
+
|
55
|
+
result = Geomash::Standardizer.parse_for_geographic_term('South Korea')
|
56
|
+
assert_equal 'South Korea', result
|
57
|
+
|
58
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Blah (North Korea)')
|
59
|
+
assert_equal 'Blah, North Korea', result
|
60
|
+
end
|
61
|
+
|
25
62
|
|
26
63
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geomash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boston Public Library
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -160,21 +160,23 @@ extra_rdoc_files: []
|
|
160
160
|
files:
|
161
161
|
- config/geomash.yml.sample
|
162
162
|
- lib/geomash.rb
|
163
|
-
- lib/geomash/geonames.rb
|
164
|
-
- lib/geomash/parser.rb
|
165
|
-
- lib/geomash/standardizer.rb
|
166
|
-
- lib/geomash/version.rb
|
167
163
|
- lib/geomash/tgn.rb
|
164
|
+
- lib/geomash/autoexpire_cache_dalli.rb
|
168
165
|
- lib/geomash/town_lookup.rb
|
166
|
+
- lib/geomash/geonames.rb
|
169
167
|
- lib/geomash/constants.rb
|
168
|
+
- lib/geomash/version.rb
|
169
|
+
- lib/geomash/standardizer.rb
|
170
|
+
- lib/geomash/autoexpire_cache_redis.rb
|
171
|
+
- lib/geomash/parser.rb
|
170
172
|
- Rakefile
|
171
173
|
- test/standardizer_test.rb
|
172
|
-
- test/parser_test.rb
|
173
174
|
- test/geomash_test.rb
|
174
|
-
- test/geonames_test.rb
|
175
|
-
- test/town_lookup_test.rb
|
176
175
|
- test/test_helper.rb
|
177
176
|
- test/tgn_test.rb
|
177
|
+
- test/town_lookup_test.rb
|
178
|
+
- test/parser_test.rb
|
179
|
+
- test/geonames_test.rb
|
178
180
|
homepage: http://www.bpl.org
|
179
181
|
licenses: []
|
180
182
|
metadata: {}
|
@@ -194,17 +196,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
194
196
|
version: '0'
|
195
197
|
requirements: []
|
196
198
|
rubyforge_project:
|
197
|
-
rubygems_version: 2.
|
199
|
+
rubygems_version: 2.0.3
|
198
200
|
signing_key:
|
199
201
|
specification_version: 4
|
200
202
|
summary: Parse string for potential geographic matches and return that data along
|
201
203
|
with the TGN ID and Geonames ID.
|
202
204
|
test_files:
|
203
205
|
- test/standardizer_test.rb
|
204
|
-
- test/parser_test.rb
|
205
206
|
- test/geomash_test.rb
|
206
|
-
- test/geonames_test.rb
|
207
|
-
- test/town_lookup_test.rb
|
208
207
|
- test/test_helper.rb
|
209
208
|
- test/tgn_test.rb
|
210
|
-
|
209
|
+
- test/town_lookup_test.rb
|
210
|
+
- test/parser_test.rb
|
211
|
+
- test/geonames_test.rb
|