geomash 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ module Geomash
2
+ class TownLookup
3
+ #Only returns one result for now...
4
+ #Need to avoid cases like "Boston" and "East Boston"
5
+ def self.state_town_lookup(state_key, string)
6
+ return_tgn_id = nil
7
+ matched_terms_count = 0
8
+ matching_towns = Geomash::Constants::STATE_TOWN_TGN_IDS[state_key.to_sym].select {|hash| string.include?(hash[:location_name])}
9
+ matching_towns.each do |matching_town|
10
+ if matching_town[:location_name].split(' ').length > matched_terms_count
11
+ return_tgn_id = matching_town[:tgn_id]
12
+ matched_terms_count = matching_town[:location_name].split(' ').length
13
+ end
14
+ end
15
+
16
+ return return_tgn_id
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,3 @@
1
+ module Geomash
2
+ VERSION = "0.2.0"
3
+ end
@@ -0,0 +1,146 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'test_helper'
3
+
4
+ #Historical stuff like Jews--Soviet Union--History--Catalogs ?
5
+ # Registers of births, etc.--Canada, Western totally borked
6
+
7
+ #Synagogues--Germany--Baden-Württemberg--Directories -> doesn't match as google returns Baden-Württemberg as
8
+ #Baden-Wurttemberg . No matches http://vocab.getty.edu/tgn/7003692
9
+
10
+ class GeomashTest < ActiveSupport::TestCase
11
+
12
+ def test_parse_with_flag
13
+ result = Geomash.parse('Abbeville (France)--History--20th century.', true)
14
+ assert_equal 'Abbeville', result[:city_part]
15
+ assert_equal 'Picardy', result[:state_part]
16
+ assert_equal 'France', result[:country_part]
17
+ assert_equal nil, result[:street_part]
18
+ assert_equal '7010587', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
19
+ assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
20
+ assert_equal '2987374', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
21
+ assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
22
+
23
+ #Slight variation problem with neighborhood: 11. Bezirk (Vienna, Austria)--Biography
24
+ result = Geomash.parse('15. Bezirk (Rudolfsheim-Fünfhaus, Vienna, Austria)--Exhibitions', true)
25
+ assert_equal 'Vienna', result[:city_part]
26
+ assert_equal 'Vienna', result[:state_part]
27
+ assert_equal 'Austria', result[:country_part]
28
+ assert_equal 'Rudolfsheim-Fünfhaus', result[:neighborhood_part]
29
+ assert_equal nil, result[:street_part]
30
+ assert_equal '7003321', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
31
+ assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
32
+ assert_equal '2779138', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
33
+ assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
34
+
35
+ result = Geomash.parse('Synagogues--Germany--Baden-Württemberg--Directories', true)
36
+ assert_equal nil, result[:city_part]
37
+ assert_equal 'Baden-Wurttemberg', result[:state_part]
38
+ assert_equal 'Germany', result[:country_part]
39
+ assert_equal nil, result[:neighborhood_part]
40
+ assert_equal nil, result[:street_part]
41
+ assert_equal '7003692', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
42
+ assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
43
+ assert_equal '2953481', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
44
+ assert_equal true, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
45
+
46
+ end
47
+
48
+ def test_parse_with_no_flag
49
+ result = Geomash.parse('Boston, MA')
50
+ assert_equal 'Boston', result[:city_part]
51
+ assert_equal 'Massachusetts', result[:state_part]
52
+ assert_equal 'United States', result[:country_part]
53
+ assert_equal nil, result[:street_part]
54
+ assert_equal '7013445', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
55
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
56
+ assert_equal '4930956', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
57
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
58
+
59
+ result = Geomash.parse('New York, NY')
60
+ assert_equal 'New York', result[:city_part]
61
+ assert_equal 'New York', result[:state_part]
62
+ assert_equal 'United States', result[:country_part]
63
+ assert_equal nil, result[:street_part]
64
+ assert_equal '7007567', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
65
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
66
+ assert_equal '5128581', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
67
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
68
+
69
+ result = Geomash.parse('Washington, DC')
70
+ assert_equal 'Washington', result[:city_part]
71
+ assert_equal 'District of Columbia', result[:state_part]
72
+ assert_equal 'United States', result[:country_part]
73
+ assert_equal nil, result[:street_part]
74
+ assert_equal '7013962', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
75
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
76
+ assert_equal '4140963', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
77
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
78
+
79
+ result = Geomash.parse('Roxbury (Boston, Mass.)')
80
+ assert_equal 'Boston', result[:city_part]
81
+ assert_equal 'Massachusetts', result[:state_part]
82
+ assert_equal 'United States', result[:country_part]
83
+ assert_equal 'Roxbury', result[:neighborhood_part]
84
+ assert_equal nil, result[:street_part]
85
+ assert_equal '7015002', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
86
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
87
+ #FIXME?
88
+ assert_equal '4949151', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
89
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
90
+
91
+ result = Geomash.parse('Roxbury, Mass.')
92
+ assert_equal 'Boston', result[:city_part]
93
+ assert_equal 'Massachusetts', result[:state_part]
94
+ assert_equal 'United States', result[:country_part]
95
+ assert_equal 'Roxbury', result[:neighborhood_part]
96
+ assert_equal nil, result[:street_part]
97
+ assert_equal '7015002', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
98
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
99
+ #FIXME?
100
+ assert_equal '4949151', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
101
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
102
+
103
+ result = Geomash.parse('Vietnam')
104
+ assert_equal nil, result[:city_part]
105
+ assert_equal nil, result[:state_part]
106
+ assert_equal 'Vietnam', result[:country_part]
107
+ assert_equal nil, result[:neighborhood_part]
108
+ assert_equal '1000145', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
109
+ assert_equal nil, result[:street_part]
110
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
111
+
112
+ result = Geomash.parse('Soviet Union')
113
+ assert_equal nil, result[:city_part]
114
+ assert_equal nil, result[:state_part]
115
+ assert_equal nil, result[:country_part]
116
+ assert_equal nil, result[:neighborhood_part]
117
+ assert_equal nil, result[:tgn]
118
+ assert_equal nil, result[:street_part]
119
+
120
+ result = Geomash.parse('Fenway (Boston, Mass.)')
121
+ assert_equal 'Boston', result[:city_part]
122
+ assert_equal 'Massachusetts', result[:state_part]
123
+ assert_equal 'United States', result[:country_part]
124
+ assert_equal 'Fenway/Kenmore', result[:neighborhood_part]
125
+ assert_equal '7013445', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
126
+ assert_equal nil, result[:street_part]
127
+ assert_equal true, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
128
+
129
+ #Should find the Michigan Atlanta over the Georgia Atlanta
130
+ #State part from an API giving me Atlanta????
131
+ result = Geomash.parse('Atlanta, MI')
132
+ assert_equal 'Atlanta', result[:city_part]
133
+ assert_equal 'Michigan', result[:state_part]
134
+ assert_equal 'United States', result[:country_part]
135
+ assert_equal nil, result[:neighborhood_part]
136
+ assert_equal '2051159', result[:tgn][:id] if Geomash::TGN.tgn_enabled == true
137
+ assert_equal false, result[:tgn][:original_string_differs] if Geomash::TGN.tgn_enabled == true
138
+ assert_equal '4984500', result[:geonames][:id] if Geomash::Geonames.geonames_username != '<username>'
139
+ assert_equal false, result[:geonames][:original_string_differs] if Geomash::Geonames.geonames_username != '<username>'
140
+
141
+
142
+
143
+
144
+
145
+ end
146
+ end
@@ -0,0 +1,24 @@
1
+ require 'test_helper'
2
+
3
+ class GeonamesTest < ActiveSupport::TestCase
4
+ def test_geonames_lookup_from_id
5
+ if Geomash::Geonames.geonames_username != '<username>'
6
+ result = Geomash::Geonames.get_geonames_data('4984500')
7
+
8
+ assert_equal '45.00473', result[:coords][:latitude]
9
+ assert_equal '-84.14389', result[:coords][:longitude]
10
+ assert_equal '45.00473,-84.14389', result[:coords][:combined]
11
+ assert_equal '-84.18404', result[:coords][:box][:west]
12
+ assert_equal '45.01697', result[:coords][:box][:north]
13
+ assert_equal '-84.11884', result[:coords][:box][:east]
14
+ assert_equal '44.98859', result[:coords][:box][:south]
15
+ assert_equal 'Atlanta', result[:hier_geo][:ppla2]
16
+ assert_equal 'Montmorency County', result[:hier_geo][:adm2]
17
+ assert_equal 'Michigan', result[:hier_geo][:adm1]
18
+ assert_equal 'United States', result[:hier_geo][:pcli]
19
+ assert_equal 'North America', result[:hier_geo][:cont]
20
+ assert_equal 'Earth', result[:hier_geo][:area]
21
+
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,33 @@
1
+ require 'test_helper'
2
+
3
+ class ParserTest < ActiveSupport::TestCase
4
+ def test_google_parser
5
+ result = Geomash::Parser.parse_google_api('Boston, MA')
6
+ assert_equal 'Boston', result[:city_part]
7
+ assert_equal 'Massachusetts', result[:state_part]
8
+ assert_equal 'United States', result[:country_part]
9
+ assert_equal nil, result[:street_part]
10
+
11
+ result = Geomash::Parser.parse_google_api('700 Boylston St, Boston, MA 02116')
12
+ assert_equal 'Boston', result[:city_part]
13
+ assert_equal 'Massachusetts', result[:state_part]
14
+ assert_equal 'United States', result[:country_part]
15
+
16
+ result = Geomash::Parser.parse_google_api('Roxbury (Boston, Mass.)')
17
+ assert_equal 'Boston', result[:city_part]
18
+ assert_equal 'Massachusetts', result[:state_part]
19
+ assert_equal 'United States', result[:country_part]
20
+ assert_equal 'Roxbury', result[:neighborhood_part]
21
+ assert_equal nil, result[:street_part]
22
+
23
+
24
+ #FIXME!!! Is this alright?
25
+ #result = Bplgeo::Parser.parse_google_api('201 Dowman Dr., Atlanta, GA 30322')
26
+ #assert_equal 'Atlanta', result[:city_part]
27
+ #assert_equal 'Georgia', result[:state_part]
28
+ #assert_equal 'United States', result[:country_part]
29
+ #assert_equal 'true', result[:term_differs_from_tgn]
30
+ end
31
+
32
+
33
+ end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+
3
+ class ParserTest < ActiveSupport::TestCase
4
+ def test_dedup_geo
5
+ sample_array = ['Saigon, Vietnam', 'Saigon (Vietnam)', 'Vietnam', 'Vietnam, Party']
6
+ result = Geomash::Standardizer.dedup_geo(sample_array)
7
+ assert_equal ['Saigon, Vietnam', 'Vietnam', 'Vietnam, Party'], result
8
+
9
+ result = Geomash::Standardizer.dedup_geo(sample_array, true)
10
+ assert_equal ['Saigon, Vietnam', 'Vietnam, Party'], result
11
+
12
+ sample_array << 'Some Place, Vietnam'
13
+ result = Geomash::Standardizer.dedup_geo(sample_array)
14
+ assert_equal ['Saigon, Vietnam', 'Vietnam', 'Vietnam, Party', 'Some Place, Vietnam'], result
15
+
16
+ result = Geomash::Standardizer.dedup_geo(sample_array, true)
17
+ assert_equal ['Saigon, Vietnam', 'Vietnam, Party', 'Some Place, Vietnam'], result
18
+
19
+ #sample_array << 'Some Place, Vietnam, Saigon'
20
+ #result = Geomash::Standardizer.dedup_geo(sample_array, true)
21
+ #assert_equal ['Some Place, Vietnam, Saigon'], result
22
+
23
+ end
24
+
25
+
26
+ end
@@ -0,0 +1,16 @@
1
+ # Configure Rails Environment
2
+ ENV["RAILS_ENV"] = "test"
3
+
4
+ require 'rails'
5
+ require "rails/test_help"
6
+
7
+ require 'geomash'
8
+ Rails.backtrace_cleaner.remove_silencers!
9
+
10
+ # Load support files
11
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
12
+
13
+ # Load fixtures from the engine
14
+ if ActiveSupport::TestCase.method_defined?(:fixture_path=)
15
+ ActiveSupport::TestCase.fixture_path = File.expand_path("../fixtures", __FILE__)
16
+ end
data/test/tgn_test.rb ADDED
@@ -0,0 +1,19 @@
1
+ require 'test_helper'
2
+
3
+ class TGNTest < ActiveSupport::TestCase
4
+ def test_tgn_lookup_from_id
5
+ if Geomash::TGN.tgn_enabled == 'true'
6
+ result = Geomash::TGN.get_tgn_data('2051159')
7
+
8
+ assert_equal '45', result[:coords][:latitude]
9
+ assert_equal '-84.1333', result[:coords][:longitude]
10
+ assert_equal '45,-84.1333', result[:coords][:combined]
11
+ assert_equal 'Atlanta', result[:hier_geo][:city]
12
+ assert_equal 'Montmorency', result[:hier_geo][:county]
13
+ assert_equal 'Michigan', result[:hier_geo][:state]
14
+ assert_equal 'United States', result[:hier_geo][:country]
15
+ assert_equal 'North and Central America', result[:hier_geo][:continent]
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,11 @@
1
+ require 'test_helper'
2
+
3
+ class TownLookupTest < ActiveSupport::TestCase
4
+ def test_MA_lookup
5
+ result = Geomash::TownLookup.state_town_lookup('MA', "This test was written in Boston, MA.")
6
+ assert_equal '7013445', result
7
+
8
+ result = Geomash::TownLookup.state_town_lookup('MA', "This test was written in East Boston, MA.")
9
+ assert_equal '7015009', result
10
+ end
11
+ end
metadata ADDED
@@ -0,0 +1,210 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: geomash
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Boston Public Library
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: countries
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: geocoder
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: unidecoder
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: typhoeus
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: nokogiri
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: htmlentities
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: sparql
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: sqlite3
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: rails
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ description: Parse string for potential geographic matches and return that data along
154
+ with the TGN ID and Geonames ID.
155
+ email:
156
+ - sanderson@bpl.org
157
+ executables: []
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - config/geomash.yml.sample
162
+ - lib/geomash.rb
163
+ - lib/geomash/geonames.rb
164
+ - lib/geomash/parser.rb
165
+ - lib/geomash/standardizer.rb
166
+ - lib/geomash/version.rb
167
+ - lib/geomash/tgn.rb
168
+ - lib/geomash/town_lookup.rb
169
+ - lib/geomash/constants.rb
170
+ - Rakefile
171
+ - test/standardizer_test.rb
172
+ - test/parser_test.rb
173
+ - test/geomash_test.rb
174
+ - test/geonames_test.rb
175
+ - test/town_lookup_test.rb
176
+ - test/test_helper.rb
177
+ - test/tgn_test.rb
178
+ homepage: http://www.bpl.org
179
+ licenses: []
180
+ metadata: {}
181
+ post_install_message:
182
+ rdoc_options: []
183
+ require_paths:
184
+ - lib
185
+ required_ruby_version: !ruby/object:Gem::Requirement
186
+ requirements:
187
+ - - '>='
188
+ - !ruby/object:Gem::Version
189
+ version: '0'
190
+ required_rubygems_version: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - '>='
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ requirements: []
196
+ rubyforge_project:
197
+ rubygems_version: 2.1.10
198
+ signing_key:
199
+ specification_version: 4
200
+ summary: Parse string for potential geographic matches and return that data along
201
+ with the TGN ID and Geonames ID.
202
+ test_files:
203
+ - test/standardizer_test.rb
204
+ - test/parser_test.rb
205
+ - test/geomash_test.rb
206
+ - test/geonames_test.rb
207
+ - test/town_lookup_test.rb
208
+ - test/test_helper.rb
209
+ - test/tgn_test.rb
210
+ has_rdoc: