geomash 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/geomash.yml +3 -3
- data/lib/geomash/constants.rb +1 -0
- data/lib/geomash/parser.rb +6 -4
- data/lib/geomash/standardizer.rb +5 -2
- data/lib/geomash/tgn.rb +2 -6
- data/lib/geomash/version.rb +1 -1
- data/test/geomash_test.rb +16 -16
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f51ecd1752538ca69b4f80d4069519842e9f78c4
|
4
|
+
data.tar.gz: f3f8a263990c5441b76532fc3d39f282b25c91c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f98af5d1b09972e6b16cc5b5beb0360cf965c2eb58494aaa075c565672ee294e909799bf22c46c0cfa24580e0838b75fae973c4c9845f655992e77e2e60e5969
|
7
|
+
data.tar.gz: 439939cef31ad2058efeb8546de0f20fbdfc2457484d23dda80c150ca42a7d8e36b835e61e917e54ce3276d44114dc345b1fcd396952a46f5eb2b6eb333fc4d6
|
data/config/geomash.yml
CHANGED
@@ -5,7 +5,7 @@ development:
|
|
5
5
|
mapquest_key: <mapquest_key>
|
6
6
|
#NOTE: Bing also fairly unreliable but mostly works...
|
7
7
|
bing_key: <bing_key>
|
8
|
-
timeout:
|
8
|
+
timeout: 15
|
9
9
|
parser_cache_enabled: false #See Readme before enabling this
|
10
10
|
test: &TEST_
|
11
11
|
tgn_enabled: true
|
@@ -14,7 +14,7 @@ test: &TEST_
|
|
14
14
|
mapquest_key: <mapquest_key>
|
15
15
|
#NOTE: Bing also fairly unreliable but mostly works...
|
16
16
|
bing_key: <bing_key>
|
17
|
-
timeout:
|
17
|
+
timeout: 15
|
18
18
|
parser_cache_enabled: false #See Readme before enabling this
|
19
19
|
production:
|
20
20
|
tgn_enabled: true
|
@@ -23,5 +23,5 @@ production:
|
|
23
23
|
mapquest_key: <mapquest_key>
|
24
24
|
#NOTE: Bing also fairly unreliable but mostly works...
|
25
25
|
bing_key: <bing_key>
|
26
|
-
timeout:
|
26
|
+
timeout: 15
|
27
27
|
parser_cache_enabled: false #See Readme before enabling this
|
data/lib/geomash/constants.rb
CHANGED
data/lib/geomash/parser.rb
CHANGED
@@ -282,16 +282,18 @@ module Geomash
|
|
282
282
|
return_hash[:state_part] = result['long_name'].to_ascii.gsub('-city', '')
|
283
283
|
elsif (result['types'] & ['locality']).present?
|
284
284
|
return_hash[:city_part] = result['long_name']
|
285
|
-
elsif (result['types'] & ['sublocality', 'political']).length == 2
|
286
|
-
return_hash[:neighborhood_part]
|
285
|
+
elsif (result['types'] & ['sublocality', 'political']).length == 2
|
286
|
+
return_hash[:neighborhood_part] ||= result['long_name'] #See term 'Roxbury (Boston, Mass.)' for why neighborhood should take precedence
|
287
|
+
elsif result['types'].include?('neighborhood')
|
288
|
+
return_hash[:neighborhood_part] = result['long_name']
|
287
289
|
end
|
288
290
|
end
|
289
291
|
|
290
292
|
return_hash[:term_differs_from_tgn] ||= google_api_result[best_match_index].data['partial_match'] unless google_api_result[best_match_index].data['partial_match'].blank?
|
291
293
|
end
|
292
294
|
|
293
|
-
#This changed in Google... need a better way to handle this
|
294
|
-
if return_hash[:state_part] == 'Nord-Pas-de-Calais Picardie'
|
295
|
+
#This changed in Google... twice now actually... need a better way to handle this
|
296
|
+
if return_hash[:state_part] == 'Nord-Pas-de-Calais Picardie' || return_hash[:state_part] == 'Nord-Pas-de-Calais-Picardie'
|
295
297
|
return_hash[:state_part] = 'Picardy'
|
296
298
|
end
|
297
299
|
|
data/lib/geomash/standardizer.rb
CHANGED
@@ -11,7 +11,7 @@ module Geomash
|
|
11
11
|
return ''
|
12
12
|
end
|
13
13
|
|
14
|
-
term_split_list = term.split(/[,\-\(\(]
|
14
|
+
term_split_list = term.split(/[,\-\(\(]|>/).reject{ |e| e.empty? }
|
15
15
|
term_split_list.each{ |e| e.gsub!(/[^\w\s]/, "") } #Remove punctuation
|
16
16
|
term_split_list.each{ |e| e.strip! } #Remove any extra remaining whitespace
|
17
17
|
term_split_list.reject{ |e| e.empty? }
|
@@ -72,7 +72,7 @@ module Geomash
|
|
72
72
|
end
|
73
73
|
else
|
74
74
|
#if term_split_list.length > 1
|
75
|
-
geo_term = term.gsub('(', ',').gsub(' ,', ', ')
|
75
|
+
geo_term = term.gsub('(', ',').gsub(' ,', ', ').gsub(' >', ',')
|
76
76
|
geo_term = geo_term.gsub(')', '')
|
77
77
|
#end
|
78
78
|
|
@@ -99,6 +99,9 @@ module Geomash
|
|
99
99
|
#Replace any semicolons with commas... possible strip them?
|
100
100
|
geo_term = geo_term.gsub(';', ',')
|
101
101
|
|
102
|
+
#Replace > with commas
|
103
|
+
geo_term = geo_term.gsub('>', ',').gsub('>', ',')
|
104
|
+
|
102
105
|
#Terms in paranthesis will cause some geographic parsers to freak out. Switch to commas instead.
|
103
106
|
if geo_term.match(/[\(\)]+/)
|
104
107
|
#Attempt to fix address if something like (word)
|
data/lib/geomash/tgn.rb
CHANGED
@@ -375,12 +375,8 @@ EXAMPLE SPARQL:
|
|
375
375
|
#Fix cases like http://vocab.getty.edu/aat/300132316 which are bays (bodies of water)
|
376
376
|
aat_term = aat_term.gsub(/ \(.+\)$/, '')
|
377
377
|
|
378
|
-
if (aat_term =~ /ies$/).present?
|
379
|
-
aat_term = aat_term.
|
380
|
-
elsif (aat_term =~ /es$/).present?
|
381
|
-
aat_term = aat_term.gsub(/es$/, '')
|
382
|
-
elsif (aat_term =~ /s$/).present?
|
383
|
-
aat_term = aat_term.gsub(/s$/, '')
|
378
|
+
if (aat_term =~ /ies$/).present? || (aat_term =~ /es$/).present? || (aat_term =~ /s$/).present?
|
379
|
+
aat_term = aat_term.singularize
|
384
380
|
end
|
385
381
|
|
386
382
|
#Fix cases like "Boston Harbor" as "Boston Harbor (harbor)" isn't that helpful
|
data/lib/geomash/version.rb
CHANGED
data/test/geomash_test.rb
CHANGED
@@ -10,15 +10,16 @@ require 'test_helper'
|
|
10
10
|
class GeomashTest < ActiveSupport::TestCase
|
11
11
|
|
12
12
|
def test_parse_with_flag
|
13
|
-
result = Geomash.parse('
|
14
|
-
assert_equal '
|
15
|
-
assert_equal '
|
16
|
-
assert_equal '
|
13
|
+
result = Geomash.parse('Massachusetts > Hampden (county) > Chicopee', true)
|
14
|
+
assert_equal 'Chicopee', result[:city_part]
|
15
|
+
assert_equal 'Massachusetts', result[:state_part]
|
16
|
+
assert_equal 'United States', result[:country_part]
|
17
|
+
assert_equal nil, result[:neighborhood_part]
|
17
18
|
assert_equal nil, result[:street_part]
|
18
|
-
assert_equal '
|
19
|
-
assert_equal
|
20
|
-
|
21
|
-
|
19
|
+
assert_equal '2049596', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
20
|
+
assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true #This should be false?
|
21
|
+
assert_equal '4933002', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
22
|
+
assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>' #This should be false?
|
22
23
|
|
23
24
|
#Slight variation problem with neighborhood: 11. Bezirk (Vienna, Austria)--Biography
|
24
25
|
result = Geomash.parse('15. Bezirk (Rudolfsheim-Fünfhaus, Vienna, Austria)--Exhibitions', true)
|
@@ -65,16 +66,15 @@ class GeomashTest < ActiveSupport::TestCase
|
|
65
66
|
assert_equal '6252001', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
66
67
|
assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
67
68
|
|
68
|
-
result = Geomash.parse('
|
69
|
-
assert_equal
|
70
|
-
assert_equal
|
71
|
-
assert_equal '
|
72
|
-
assert_equal nil, result[:neighborhood_part]
|
69
|
+
result = Geomash.parse('Abbeville (France)--History--20th century.', true)
|
70
|
+
assert_equal 'Abbeville', result[:city_part]
|
71
|
+
assert_equal 'Picardy', result[:state_part]
|
72
|
+
assert_equal 'France', result[:country_part]
|
73
73
|
assert_equal nil, result[:street_part]
|
74
|
-
assert_equal '
|
74
|
+
assert_equal '7010587', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
|
75
75
|
assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
|
76
|
-
|
77
|
-
|
76
|
+
#assert_equal '2987374', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
|
77
|
+
#assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
|
78
78
|
|
79
79
|
|
80
80
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geomash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boston Public Library
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|