geomash 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/geomash.yml.sample +3 -0
- data/lib/geomash/autoexpire_cache_dalli.rb +55 -0
- data/lib/geomash/autoexpire_cache_redis.rb +26 -0
- data/lib/geomash/constants.rb +6 -1
- data/lib/geomash/geonames.rb +19 -14
- data/lib/geomash/parser.rb +48 -18
- data/lib/geomash/standardizer.rb +46 -17
- data/lib/geomash/tgn.rb +274 -217
- data/lib/geomash/version.rb +1 -1
- data/lib/geomash.rb +8 -18
- data/test/geomash_test.rb +58 -4
- data/test/geonames_test.rb +1 -1
- data/test/standardizer_test.rb +37 -0
- metadata +15 -14
data/lib/geomash/version.rb
CHANGED
data/lib/geomash.rb
CHANGED
@@ -5,6 +5,8 @@ module Geomash
|
|
5
5
|
require "geomash/tgn"
|
6
6
|
require "geomash/geonames"
|
7
7
|
require "geomash/town_lookup"
|
8
|
+
require "geomash/autoexpire_cache_dalli"
|
9
|
+
require "geomash/autoexpire_cache_redis"
|
8
10
|
require "geocoder"
|
9
11
|
require "countries"
|
10
12
|
require "unidecoder"
|
@@ -55,25 +57,13 @@ module Geomash
|
|
55
57
|
#FIXME
|
56
58
|
return_hash[:tgn] = Geomash::TGN.tgn_id_from_geo_hash(return_hash)
|
57
59
|
|
58
|
-
if return_hash[:tgn].blank?
|
60
|
+
if return_hash[:tgn].blank? || (return_hash[:tgn][:original_string_differs] && return_hash[:state_part].present?)
|
59
61
|
geo_hash_temp = Geomash::Standardizer.try_with_entered_names(return_hash)
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
geo_hash_temp[:neighborhood_part] = nil
|
66
|
-
geo_hash_temp[:original_string_differs] = true
|
67
|
-
return_hash[:tgn] = Geomash::TGN.tgn_id_from_geo_hash(geo_hash_temp)
|
68
|
-
return_hash[:tgn][:original_string_differs] = true if return_hash[:tgn].present?
|
69
|
-
elsif return_hash[:city_part].present? && return_hash[:tgn].blank?
|
70
|
-
|
71
|
-
geo_hash_temp = return_hash.clone
|
72
|
-
geo_hash_temp[:city_part] = nil
|
73
|
-
geo_hash_temp[:original_string_differs] = true
|
74
|
-
return_hash[:tgn] = Geomash::TGN.tgn_id_from_geo_hash(geo_hash_temp)
|
75
|
-
return_hash[:tgn][:original_string_differs] = true if return_hash[:tgn].present?
|
76
|
-
|
62
|
+
geo_hash_temp[:tgn] = Geomash::TGN.tgn_id_from_geo_hash(geo_hash_temp) if geo_hash_temp.present?
|
63
|
+
if geo_hash_temp.present? && return_hash[:tgn].blank?
|
64
|
+
return_hash[:tgn] = geo_hash_temp[:tgn]
|
65
|
+
elsif geo_hash_temp.present? && geo_hash_temp[:tgn][:parse_depth] > return_hash[:tgn][:parse_depth]
|
66
|
+
return_hash[:tgn] = geo_hash_temp[:tgn]
|
77
67
|
end
|
78
68
|
|
79
69
|
end
|
data/test/geomash_test.rb
CHANGED
@@ -16,9 +16,9 @@ class GeomashTest < ActiveSupport::TestCase
|
|
16
16
|
assert_equal 'France', result[:country_part]
|
17
17
|
assert_equal nil, result[:street_part]
|
18
18
|
assert_equal '7010587', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
19
|
-
assert_equal
|
20
|
-
assert_equal '2987374', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
21
|
-
assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
19
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
20
|
+
#assert_equal '2987374', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
21
|
+
#assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
22
22
|
|
23
23
|
#Slight variation problem with neighborhood: 11. Bezirk (Vienna, Austria)--Biography
|
24
24
|
result = Geomash.parse('15. Bezirk (Rudolfsheim-Fünfhaus, Vienna, Austria)--Exhibitions', true)
|
@@ -43,6 +43,40 @@ class GeomashTest < ActiveSupport::TestCase
|
|
43
43
|
assert_equal '2953481', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
44
44
|
assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
45
45
|
|
46
|
+
result = Geomash.parse('Naroden Etnografski Muzeĭ (Sofia, Bulgaria)--Catalogs', true)
|
47
|
+
assert_equal 'Sofia', result[:city_part]
|
48
|
+
assert_equal 'Sofia', result[:state_part]
|
49
|
+
assert_equal 'Bulgaria', result[:country_part]
|
50
|
+
assert_equal nil, result[:neighborhood_part]
|
51
|
+
assert_equal nil, result[:street_part]
|
52
|
+
assert_equal '7009977', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
53
|
+
assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
54
|
+
#assert_equal '727011', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
55
|
+
#assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
56
|
+
|
57
|
+
result = Geomash.parse('Lettering--United States--History--19th century', true)
|
58
|
+
assert_equal nil, result[:city_part]
|
59
|
+
assert_equal nil, result[:state_part]
|
60
|
+
assert_equal 'United States', result[:country_part]
|
61
|
+
assert_equal nil, result[:neighborhood_part]
|
62
|
+
assert_equal nil, result[:street_part]
|
63
|
+
assert_equal '7012149', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
64
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
65
|
+
assert_equal '6252001', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
66
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
67
|
+
|
68
|
+
result = Geomash.parse('Lettering--United States--History--19th century', true)
|
69
|
+
assert_equal nil, result[:city_part]
|
70
|
+
assert_equal nil, result[:state_part]
|
71
|
+
assert_equal 'United States', result[:country_part]
|
72
|
+
assert_equal nil, result[:neighborhood_part]
|
73
|
+
assert_equal nil, result[:street_part]
|
74
|
+
assert_equal '7012149', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
75
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
76
|
+
assert_equal '6252001', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
77
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
78
|
+
|
79
|
+
|
46
80
|
end
|
47
81
|
|
48
82
|
def test_parse_with_no_flag
|
@@ -126,6 +160,17 @@ class GeomashTest < ActiveSupport::TestCase
|
|
126
160
|
assert_equal nil, result[:street_part]
|
127
161
|
assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
128
162
|
|
163
|
+
#Case of a country with no states
|
164
|
+
result = Geomash.parse('Tokyo, Japan')
|
165
|
+
assert_equal 'Tokyo', result[:city_part]
|
166
|
+
assert_equal nil, result[:state_part]
|
167
|
+
assert_equal 'Japan', result[:country_part]
|
168
|
+
assert_equal nil, result[:neighborhood_part]
|
169
|
+
assert_equal '7004472', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
170
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
171
|
+
assert_equal '1850147', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
172
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
173
|
+
|
129
174
|
#Should find the Michigan Atlanta over the Georgia Atlanta
|
130
175
|
#State part from an API giving me Atlanta????
|
131
176
|
result = Geomash.parse('Atlanta, MI')
|
@@ -138,7 +183,16 @@ class GeomashTest < ActiveSupport::TestCase
|
|
138
183
|
assert_equal '4984500', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
139
184
|
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
140
185
|
|
141
|
-
|
186
|
+
#TODO: This should also likely parse as North Korea as well...
|
187
|
+
result = Geomash.parse('Korea')
|
188
|
+
assert_equal nil, result[:city_part]
|
189
|
+
assert_equal nil, result[:state_part]
|
190
|
+
assert_equal 'South Korea', result[:country_part]
|
191
|
+
assert_equal nil, result[:neighborhood_part]
|
192
|
+
assert_equal '7000299', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
193
|
+
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
194
|
+
assert_equal '1835841', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
195
|
+
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
142
196
|
|
143
197
|
|
144
198
|
|
data/test/geonames_test.rb
CHANGED
@@ -11,7 +11,7 @@ class GeonamesTest < ActiveSupport::TestCase
|
|
11
11
|
assert_equal '-84.18404', result[:coords][:box][:west]
|
12
12
|
assert_equal '45.01697', result[:coords][:box][:north]
|
13
13
|
assert_equal '-84.11884', result[:coords][:box][:east]
|
14
|
-
assert_equal '44.
|
14
|
+
assert_equal '44.9886', result[:coords][:box][:south]
|
15
15
|
assert_equal 'Atlanta', result[:hier_geo][:ppla2]
|
16
16
|
assert_equal 'Montmorency County', result[:hier_geo][:adm2]
|
17
17
|
assert_equal 'Michigan', result[:hier_geo][:adm1]
|
data/test/standardizer_test.rb
CHANGED
@@ -22,5 +22,42 @@ class ParserTest < ActiveSupport::TestCase
|
|
22
22
|
|
23
23
|
end
|
24
24
|
|
25
|
+
def test_geographic_parser
|
26
|
+
#Nil results... problem cases
|
27
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Yuma Indians')
|
28
|
+
assert_equal '', result
|
29
|
+
|
30
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Yuma Indians--Woodworking')
|
31
|
+
assert_equal '', result
|
32
|
+
|
33
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Norway maple')
|
34
|
+
assert_equal '', result
|
35
|
+
|
36
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Some Value--German Engineering--Cars')
|
37
|
+
assert_equal '', result
|
38
|
+
|
39
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Art, Japanese')
|
40
|
+
assert_equal '', result
|
41
|
+
|
42
|
+
#Normal cases
|
43
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass')
|
44
|
+
assert_equal 'Palmer, Mass', result
|
45
|
+
|
46
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Naroden Etnografski Muzeĭ (Sofia, Bulgaria)--Catalogs')
|
47
|
+
assert_equal 'Naroden Etnografski Muzeĭ, Sofia, Bulgaria', result
|
48
|
+
|
49
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Germany')
|
50
|
+
assert_equal 'Germany', result
|
51
|
+
|
52
|
+
result = Geomash::Standardizer.parse_for_geographic_term('United States')
|
53
|
+
assert_equal 'United States', result
|
54
|
+
|
55
|
+
result = Geomash::Standardizer.parse_for_geographic_term('South Korea')
|
56
|
+
assert_equal 'South Korea', result
|
57
|
+
|
58
|
+
result = Geomash::Standardizer.parse_for_geographic_term('Blah (North Korea)')
|
59
|
+
assert_equal 'Blah, North Korea', result
|
60
|
+
end
|
61
|
+
|
25
62
|
|
26
63
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geomash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boston Public Library
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -160,21 +160,23 @@ extra_rdoc_files: []
|
|
160
160
|
files:
|
161
161
|
- config/geomash.yml.sample
|
162
162
|
- lib/geomash.rb
|
163
|
-
- lib/geomash/geonames.rb
|
164
|
-
- lib/geomash/parser.rb
|
165
|
-
- lib/geomash/standardizer.rb
|
166
|
-
- lib/geomash/version.rb
|
167
163
|
- lib/geomash/tgn.rb
|
164
|
+
- lib/geomash/autoexpire_cache_dalli.rb
|
168
165
|
- lib/geomash/town_lookup.rb
|
166
|
+
- lib/geomash/geonames.rb
|
169
167
|
- lib/geomash/constants.rb
|
168
|
+
- lib/geomash/version.rb
|
169
|
+
- lib/geomash/standardizer.rb
|
170
|
+
- lib/geomash/autoexpire_cache_redis.rb
|
171
|
+
- lib/geomash/parser.rb
|
170
172
|
- Rakefile
|
171
173
|
- test/standardizer_test.rb
|
172
|
-
- test/parser_test.rb
|
173
174
|
- test/geomash_test.rb
|
174
|
-
- test/geonames_test.rb
|
175
|
-
- test/town_lookup_test.rb
|
176
175
|
- test/test_helper.rb
|
177
176
|
- test/tgn_test.rb
|
177
|
+
- test/town_lookup_test.rb
|
178
|
+
- test/parser_test.rb
|
179
|
+
- test/geonames_test.rb
|
178
180
|
homepage: http://www.bpl.org
|
179
181
|
licenses: []
|
180
182
|
metadata: {}
|
@@ -194,17 +196,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
194
196
|
version: '0'
|
195
197
|
requirements: []
|
196
198
|
rubyforge_project:
|
197
|
-
rubygems_version: 2.
|
199
|
+
rubygems_version: 2.0.3
|
198
200
|
signing_key:
|
199
201
|
specification_version: 4
|
200
202
|
summary: Parse string for potential geographic matches and return that data along
|
201
203
|
with the TGN ID and Geonames ID.
|
202
204
|
test_files:
|
203
205
|
- test/standardizer_test.rb
|
204
|
-
- test/parser_test.rb
|
205
206
|
- test/geomash_test.rb
|
206
|
-
- test/geonames_test.rb
|
207
|
-
- test/town_lookup_test.rb
|
208
207
|
- test/test_helper.rb
|
209
208
|
- test/tgn_test.rb
|
210
|
-
|
209
|
+
- test/town_lookup_test.rb
|
210
|
+
- test/parser_test.rb
|
211
|
+
- test/geonames_test.rb
|