geomash 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,19 @@
1
+ module Geomash
2
+ class TownLookup
3
+ #Only returns one result for now...
4
+ #Need to avoid cases like "Boston" and "East Boston"
5
+ def self.state_town_lookup(state_key, string)
6
+ return_tgn_id = nil
7
+ matched_terms_count = 0
8
+ matching_towns = Geomash::Constants::STATE_TOWN_TGN_IDS[state_key.to_sym].select {|hash| string.include?(hash[:location_name])}
9
+ matching_towns.each do |matching_town|
10
+ if matching_town[:location_name].split(' ').length > matched_terms_count
11
+ return_tgn_id = matching_town[:tgn_id]
12
+ matched_terms_count = matching_town[:location_name].split(' ').length
13
+ end
14
+ end
15
+
16
+ return return_tgn_id
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,3 @@
1
+ module Geomash
2
+ VERSION = "0.2.0"
3
+ end
@@ -0,0 +1,146 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'test_helper'
3
+
4
+ #Historical stuff like Jews--Soviet Union--History--Catalogs ?
5
+ # Registers of births, etc.--Canada, Western totally borked
6
+
7
+ #Synagogues--Germany--Baden-Württemberg--Directories -> doesn't match as google returns Baden-Württemberg as
8
+ #Baden-Wurttemberg . No matches http://vocab.getty.edu/tgn/7003692
9
+
10
+ class GeomashTest < ActiveSupport::TestCase
11
+
12
+ def test_parse_with_flag
13
+ result = Geomash.parse('Abbeville (France)--History--20th century.', true)
14
+ assert_equal 'Abbeville', result[:city_part]
15
+ assert_equal 'Picardy', result[:state_part]
16
+ assert_equal 'France', result[:country_part]
17
+ assert_equal nil, result[:street_part]
18
+ assert_equal '7010587', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
19
+ assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
20
+ assert_equal '2987374', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
21
+ assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
22
+
23
+ #Slight variation problem with neighborhood: 11. Bezirk (Vienna, Austria)--Biography
24
+ result = Geomash.parse('15. Bezirk (Rudolfsheim-Fünfhaus, Vienna, Austria)--Exhibitions', true)
25
+ assert_equal 'Vienna', result[:city_part]
26
+ assert_equal 'Vienna', result[:state_part]
27
+ assert_equal 'Austria', result[:country_part]
28
+ assert_equal 'Rudolfsheim-Fünfhaus', result[:neighborhood_part]
29
+ assert_equal nil, result[:street_part]
30
+ assert_equal '7003321', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
31
+ assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
32
+ assert_equal '2779138', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
33
+ assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
34
+
35
+ result = Geomash.parse('Synagogues--Germany--Baden-Württemberg--Directories', true)
36
+ assert_equal nil, result[:city_part]
37
+ assert_equal 'Baden-Wurttemberg', result[:state_part]
38
+ assert_equal 'Germany', result[:country_part]
39
+ assert_equal nil, result[:neighborhood_part]
40
+ assert_equal nil, result[:street_part]
41
+ assert_equal '7003692', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
42
+ assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
43
+ assert_equal '2953481', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
44
+ assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
45
+
46
+ end
47
+
48
+ def test_parse_with_no_flag
49
+ result = Geomash.parse('Boston, MA')
50
+ assert_equal 'Boston', result[:city_part]
51
+ assert_equal 'Massachusetts', result[:state_part]
52
+ assert_equal 'United States', result[:country_part]
53
+ assert_equal nil, result[:street_part]
54
+ assert_equal '7013445', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
55
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
56
+ assert_equal '4930956', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
57
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
58
+
59
+ result = Geomash.parse('New York, NY')
60
+ assert_equal 'New York', result[:city_part]
61
+ assert_equal 'New York', result[:state_part]
62
+ assert_equal 'United States', result[:country_part]
63
+ assert_equal nil, result[:street_part]
64
+ assert_equal '7007567', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
65
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
66
+ assert_equal '5128581', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
67
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
68
+
69
+ result = Geomash.parse('Washington, DC')
70
+ assert_equal 'Washington', result[:city_part]
71
+ assert_equal 'District of Columbia', result[:state_part]
72
+ assert_equal 'United States', result[:country_part]
73
+ assert_equal nil, result[:street_part]
74
+ assert_equal '7013962', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
75
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
76
+ assert_equal '4140963', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
77
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
78
+
79
+ result = Geomash.parse('Roxbury (Boston, Mass.)')
80
+ assert_equal 'Boston', result[:city_part]
81
+ assert_equal 'Massachusetts', result[:state_part]
82
+ assert_equal 'United States', result[:country_part]
83
+ assert_equal 'Roxbury', result[:neighborhood_part]
84
+ assert_equal nil, result[:street_part]
85
+ assert_equal '7015002', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
86
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
87
+ #FIXME?
88
+ assert_equal '4949151', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
89
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
90
+
91
+ result = Geomash.parse('Roxbury, Mass.')
92
+ assert_equal 'Boston', result[:city_part]
93
+ assert_equal 'Massachusetts', result[:state_part]
94
+ assert_equal 'United States', result[:country_part]
95
+ assert_equal 'Roxbury', result[:neighborhood_part]
96
+ assert_equal nil, result[:street_part]
97
+ assert_equal '7015002', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
98
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
99
+ #FIXME?
100
+ assert_equal '4949151', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
101
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
102
+
103
+ result = Geomash.parse('Vietnam')
104
+ assert_equal nil, result[:city_part]
105
+ assert_equal nil, result[:state_part]
106
+ assert_equal 'Vietnam', result[:country_part]
107
+ assert_equal nil, result[:neighborhood_part]
108
+ assert_equal '1000145', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
109
+ assert_equal nil, result[:street_part]
110
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
111
+
112
+ result = Geomash.parse('Soviet Union')
113
+ assert_equal nil, result[:city_part]
114
+ assert_equal nil, result[:state_part]
115
+ assert_equal nil, result[:country_part]
116
+ assert_equal nil, result[:neighborhood_part]
117
+ assert_equal nil, result[:tgn]
118
+ assert_equal nil, result[:street_part]
119
+
120
+ result = Geomash.parse('Fenway (Boston, Mass.)')
121
+ assert_equal 'Boston', result[:city_part]
122
+ assert_equal 'Massachusetts', result[:state_part]
123
+ assert_equal 'United States', result[:country_part]
124
+ assert_equal 'Fenway/Kenmore', result[:neighborhood_part]
125
+ assert_equal '7013445', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
126
+ assert_equal nil, result[:street_part]
127
+ assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
128
+
129
+ #Should find the Michigan Atlanta over the Georgia Atlanta
130
+ #State part from an API giving me Atlanta????
131
+ result = Geomash.parse('Atlanta, MI')
132
+ assert_equal 'Atlanta', result[:city_part]
133
+ assert_equal 'Michigan', result[:state_part]
134
+ assert_equal 'United States', result[:country_part]
135
+ assert_equal nil, result[:neighborhood_part]
136
+ assert_equal '2051159', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
137
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
138
+ assert_equal '4984500', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
139
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
140
+
141
+
142
+
143
+
144
+
145
+ end
146
+ end
@@ -0,0 +1,24 @@
1
+ require 'test_helper'
2
+
3
+ class GeonamesTest < ActiveSupport::TestCase
4
+ def test_geonames_lookup_from_id
5
+ if Geomash::Geonames.geonames_username != '<username>'
6
+ result = Geomash::Geonames.get_geonames_data('4984500')
7
+
8
+ assert_equal '45.00473', result[:coords][:latitude]
9
+ assert_equal '-84.14389', result[:coords][:longitude]
10
+ assert_equal '45.00473,-84.14389', result[:coords][:combined]
11
+ assert_equal '-84.18404', result[:coords][:box][:west]
12
+ assert_equal '45.01697', result[:coords][:box][:north]
13
+ assert_equal '-84.11884', result[:coords][:box][:east]
14
+ assert_equal '44.98859', result[:coords][:box][:south]
15
+ assert_equal 'Atlanta', result[:hier_geo][:ppla2]
16
+ assert_equal 'Montmorency County', result[:hier_geo][:adm2]
17
+ assert_equal 'Michigan', result[:hier_geo][:adm1]
18
+ assert_equal 'United States', result[:hier_geo][:pcli]
19
+ assert_equal 'North America', result[:hier_geo][:cont]
20
+ assert_equal 'Earth', result[:hier_geo][:area]
21
+
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,33 @@
1
+ require 'test_helper'
2
+
3
+ class ParserTest < ActiveSupport::TestCase
4
+ def test_google_parser
5
+ result = Geomash::Parser.parse_google_api('Boston, MA')
6
+ assert_equal 'Boston', result[:city_part]
7
+ assert_equal 'Massachusetts', result[:state_part]
8
+ assert_equal 'United States', result[:country_part]
9
+ assert_equal nil, result[:street_part]
10
+
11
+ result = Geomash::Parser.parse_google_api('700 Boylston St, Boston, MA 02116')
12
+ assert_equal 'Boston', result[:city_part]
13
+ assert_equal 'Massachusetts', result[:state_part]
14
+ assert_equal 'United States', result[:country_part]
15
+
16
+ result = Geomash::Parser.parse_google_api('Roxbury (Boston, Mass.)')
17
+ assert_equal 'Boston', result[:city_part]
18
+ assert_equal 'Massachusetts', result[:state_part]
19
+ assert_equal 'United States', result[:country_part]
20
+ assert_equal 'Roxbury', result[:neighborhood_part]
21
+ assert_equal nil, result[:street_part]
22
+
23
+
24
+ #FIXME!!! Is this alright?
25
+ #result = Bplgeo::Parser.parse_google_api('201 Dowman Dr., Atlanta, GA 30322')
26
+ #assert_equal 'Atlanta', result[:city_part]
27
+ #assert_equal 'Georgia', result[:state_part]
28
+ #assert_equal 'United States', result[:country_part]
29
+ #assert_equal 'true', result[:term_differs_from_tgn]
30
+ end
31
+
32
+
33
+ end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+
3
+ class ParserTest < ActiveSupport::TestCase
4
+ def test_dedup_geo
5
+ sample_array = ['Saigon, Vietnam', 'Saigon (Vietnam)', 'Vietnam', 'Vietnam, Party']
6
+ result = Geomash::Standardizer.dedup_geo(sample_array)
7
+ assert_equal ['Saigon, Vietnam', 'Vietnam', 'Vietnam, Party'], result
8
+
9
+ result = Geomash::Standardizer.dedup_geo(sample_array, true)
10
+ assert_equal ['Saigon, Vietnam', 'Vietnam, Party'], result
11
+
12
+ sample_array << 'Some Place, Vietnam'
13
+ result = Geomash::Standardizer.dedup_geo(sample_array)
14
+ assert_equal ['Saigon, Vietnam', 'Vietnam', 'Vietnam, Party', 'Some Place, Vietnam'], result
15
+
16
+ result = Geomash::Standardizer.dedup_geo(sample_array, true)
17
+ assert_equal ['Saigon, Vietnam', 'Vietnam, Party', 'Some Place, Vietnam'], result
18
+
19
+ #sample_array << 'Some Place, Vietnam, Saigon'
20
+ #result = Geomash::Standardizer.dedup_geo(sample_array, true)
21
+ #assert_equal ['Some Place, Vietnam, Saigon'], result
22
+
23
+ end
24
+
25
+
26
+ end
@@ -0,0 +1,16 @@
1
+ # Configure Rails Environment
2
+ ENV["RAILS_ENV"] = "test"
3
+
4
+ require 'rails'
5
+ require "rails/test_help"
6
+
7
+ require 'geomash'
8
+ Rails.backtrace_cleaner.remove_silencers!
9
+
10
+ # Load support files
11
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
12
+
13
+ # Load fixtures from the engine
14
+ if ActiveSupport::TestCase.method_defined?(:fixture_path=)
15
+ ActiveSupport::TestCase.fixture_path = File.expand_path("../fixtures", __FILE__)
16
+ end
data/test/tgn_test.rb ADDED
@@ -0,0 +1,19 @@
1
+ require 'test_helper'
2
+
3
+ class TGNTest < ActiveSupport::TestCase
4
+ def test_tgn_lookup_from_id
5
+ if Geomash::TGN.tgn_enabled == 'true'
6
+ result = Geomash::TGN.get_tgn_data('2051159')
7
+
8
+ assert_equal '45', result[:coords][:latitude]
9
+ assert_equal '-84.1333', result[:coords][:longitude]
10
+ assert_equal '45,-84.1333', result[:coords][:combined]
11
+ assert_equal 'Atlanta', result[:hier_geo][:city]
12
+ assert_equal 'Montmorency', result[:hier_geo][:county]
13
+ assert_equal 'Michigan', result[:hier_geo][:state]
14
+ assert_equal 'United States', result[:hier_geo][:country]
15
+ assert_equal 'North and Central America', result[:hier_geo][:continent]
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,11 @@
1
+ require 'test_helper'
2
+
3
+ class TownLookupTest < ActiveSupport::TestCase
4
+ def test_MA_lookup
5
+ result = Geomash::TownLookup.state_town_lookup('MA', "This test was written in Boston, MA.")
6
+ assert_equal '7013445', result
7
+
8
+ result = Geomash::TownLookup.state_town_lookup('MA', "This test was written in East Boston, MA.")
9
+ assert_equal '7015009', result
10
+ end
11
+ end
metadata ADDED
@@ -0,0 +1,210 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: geomash
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Boston Public Library
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: countries
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: geocoder
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: unidecoder
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: typhoeus
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: nokogiri
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: htmlentities
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: sparql
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: sqlite3
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: rails
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ description: Parse string for potential geographic matches and return that data along
154
+ with the TGN ID and Geonames ID.
155
+ email:
156
+ - sanderson@bpl.org
157
+ executables: []
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - config/geomash.yml.sample
162
+ - lib/geomash.rb
163
+ - lib/geomash/geonames.rb
164
+ - lib/geomash/parser.rb
165
+ - lib/geomash/standardizer.rb
166
+ - lib/geomash/version.rb
167
+ - lib/geomash/tgn.rb
168
+ - lib/geomash/town_lookup.rb
169
+ - lib/geomash/constants.rb
170
+ - Rakefile
171
+ - test/standardizer_test.rb
172
+ - test/parser_test.rb
173
+ - test/geomash_test.rb
174
+ - test/geonames_test.rb
175
+ - test/town_lookup_test.rb
176
+ - test/test_helper.rb
177
+ - test/tgn_test.rb
178
+ homepage: http://www.bpl.org
179
+ licenses: []
180
+ metadata: {}
181
+ post_install_message:
182
+ rdoc_options: []
183
+ require_paths:
184
+ - lib
185
+ required_ruby_version: !ruby/object:Gem::Requirement
186
+ requirements:
187
+ - - '>='
188
+ - !ruby/object:Gem::Version
189
+ version: '0'
190
+ required_rubygems_version: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - '>='
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ requirements: []
196
+ rubyforge_project:
197
+ rubygems_version: 2.1.10
198
+ signing_key:
199
+ specification_version: 4
200
+ summary: Parse string for potential geographic matches and return that data along
201
+ with the TGN ID and Geonames ID.
202
+ test_files:
203
+ - test/standardizer_test.rb
204
+ - test/parser_test.rb
205
+ - test/geomash_test.rb
206
+ - test/geonames_test.rb
207
+ - test/town_lookup_test.rb
208
+ - test/test_helper.rb
209
+ - test/tgn_test.rb
210
+ has_rdoc: