bplgeo 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bplgeo/standardizer.rb +4 -2
- data/lib/bplgeo/town_lookup.rb +1 -1
- data/lib/bplgeo/version.rb +1 -1
- data/lib/bplgeo.rb +1 -0
- data/test/dummy/bin/bundle +0 -0
- data/test/dummy/bin/rails +0 -0
- data/test/dummy/bin/rake +0 -0
- data/test/dummy/config/bplgeo.yml +3 -8
- data/test/dummy/log/test.log +37 -0
- data/test/standardizer_test.rb +26 -0
- data/test/town_lookup_test.rb +2 -2
- metadata +20 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21c1271a3fb208898888438395ebafbfcf1bd167
|
4
|
+
data.tar.gz: 64bde835593a4f414441b03390dbe1327524b0e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a0254403d3d1a29c63ffbab106133083a3193d7c005716ff432385012a0eda2e0faef78eec927ace1bc931c396c8e434fd24bbfb878b397e128d4b79b78e723
|
7
|
+
data.tar.gz: 1a5382d42fb7f802c12569bdb983921d6335b7fc9b7b5b9f43f9a97c255e5bcfedd911250f191a9df44f983aee9b0750afa30b8b1ed6919c02c278a77e02d8e9
|
data/lib/bplgeo/standardizer.rb
CHANGED
@@ -80,7 +80,7 @@ module Bplgeo
|
|
80
80
|
#Attempt to dedup a list of geographic areas.
|
81
81
|
#FIXME: Horrendous first pass.
|
82
82
|
#Aggresive flag removes less specific matches. IE. ['Hanoi, Vietnam' and 'Vietnam'] would return just ['Hanoi, Vietnam']
|
83
|
-
def self.dedup_geo(geo_list,
|
83
|
+
def self.dedup_geo(geo_list, aggressive=false)
|
84
84
|
geo_list = geo_list.clone
|
85
85
|
|
86
86
|
base_word_geo_list = []
|
@@ -113,7 +113,9 @@ module Bplgeo
|
|
113
113
|
}
|
114
114
|
|
115
115
|
matched_words_count.each_with_index do |matched_count, matched_index|
|
116
|
-
|
116
|
+
matched_count ||= 0
|
117
|
+
|
118
|
+
if (matched_count == base_word_geo_list[matched_index].split(' ').size) && ((base_word_geo_list[matched_index].split(' ').size < base_word_geo_list[index].split(' ').size && aggressive) || (base_word_geo_list[matched_index].split(' ').size == base_word_geo_list[index].split(' ').size))
|
117
119
|
if current_best_term.split(',').size < geo_list[matched_index].split(',').size || (current_best_term.size+1 < geo_list[matched_index].size && !geo_list[matched_index].include?('('))
|
118
120
|
current_best_term = geo_list[matched_index]
|
119
121
|
indexes_to_remove << current_best_term_index
|
data/lib/bplgeo/town_lookup.rb
CHANGED
@@ -9,7 +9,7 @@ module Bplgeo
|
|
9
9
|
matching_towns.each do |matching_town|
|
10
10
|
if matching_town[:location_name].split(' ').length > matched_terms_count
|
11
11
|
return_tgn_id = matching_town[:tgn_id]
|
12
|
-
matched_terms_count =
|
12
|
+
matched_terms_count = matching_town[:location_name].split(' ').length
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
data/lib/bplgeo/version.rb
CHANGED
data/lib/bplgeo.rb
CHANGED
data/test/dummy/bin/bundle
CHANGED
File without changes
|
data/test/dummy/bin/rails
CHANGED
File without changes
|
data/test/dummy/bin/rake
CHANGED
File without changes
|
@@ -1,23 +1,18 @@
|
|
1
1
|
development:
|
2
2
|
#scanned_image_drive: /home/bluewolf/mapped/scan_images/BPLDC/_
|
3
|
-
|
3
|
+
getty_username: bplib
|
4
4
|
getty_password: 8{83N78kO;B)2
|
5
|
-
geonames_username: boston_library
|
6
5
|
mapquest_key: Fmjtd%7Cluubn1utn0%2Ca2%3Do5-90b00a
|
7
6
|
bing_key: Avmp8UMpfYiAJOYa2D-6_cykJoprZsvvN5YLv6SDalvN-BZnW9KMlCzjIV7Zrtmn
|
8
|
-
timeout: 100
|
9
7
|
test: &TEST
|
10
8
|
#scanned_image_drive: /home/bluewolf/mapped/scan_images/BPLDC/_
|
11
|
-
|
9
|
+
getty_username: bplib
|
12
10
|
getty_password: 8{83N78kO;B)2
|
13
|
-
geonames_username: boston_library
|
14
11
|
mapquest_key: Fmjtd%7Cluubn1utn0%2Ca2%3Do5-90b00a
|
15
12
|
bing_key: Avmp8UMpfYiAJOYa2D-6_cykJoprZsvvN5YLv6SDalvN-BZnW9KMlCzjIV7Zrtmn
|
16
|
-
timeout: 100
|
17
13
|
production:
|
14
|
+
#scanned_image_drive: /home/bluewolf/mapped/scan_images/BPLDC/_
|
18
15
|
getty_username: bplib
|
19
16
|
getty_password: 8{83N78kO;B)2
|
20
|
-
geonames_username: boston_library
|
21
17
|
mapquest_key: Fmjtd%7Cluubn1utn0%2Ca2%3Do5-90b00a
|
22
18
|
bing_key: Avmp8UMpfYiAJOYa2D-6_cykJoprZsvvN5YLv6SDalvN-BZnW9KMlCzjIV7Zrtmn
|
23
|
-
timeout: 7
|
@@ -0,0 +1,37 @@
|
|
1
|
+
[1m[36m (0.4ms)[0m [1mbegin transaction[0m
|
2
|
+
----------------------
|
3
|
+
BplgeoTest: test_parse
|
4
|
+
----------------------
|
5
|
+
ETHON: Libcurl initialized
|
6
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=12.7465
|
7
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=0.660015
|
8
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=2.51674
|
9
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=0.5838760000000001
|
10
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=0.378902
|
11
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
12
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
13
|
+
------------------------------------------
|
14
|
+
GeonamesTest: test_geonames_lookup_from_id
|
15
|
+
------------------------------------------
|
16
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
17
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
18
|
+
--------------------------
|
19
|
+
ParserTest: test_dedup_geo
|
20
|
+
--------------------------
|
21
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
22
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
23
|
+
------------------------------
|
24
|
+
ParserTest: test_google_parser
|
25
|
+
------------------------------
|
26
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
27
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
28
|
+
--------------------------------
|
29
|
+
TGNTest: test_tgn_lookup_from_id
|
30
|
+
--------------------------------
|
31
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=1.122996
|
32
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
33
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
34
|
+
------------------------------
|
35
|
+
TownLookupTest: test_MA_lookup
|
36
|
+
------------------------------
|
37
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class ParserTest < ActiveSupport::TestCase
|
4
|
+
def test_dedup_geo
|
5
|
+
sample_array = ['Saigon, Vietnam', 'Saigon (Vietnam)', 'Vietnam', 'Vietnam, Party']
|
6
|
+
result = Bplgeo::Standardizer.dedup_geo(sample_array)
|
7
|
+
assert_equal ['Saigon, Vietnam', 'Vietnam', 'Vietnam, Party'], result
|
8
|
+
|
9
|
+
result = Bplgeo::Standardizer.dedup_geo(sample_array, true)
|
10
|
+
assert_equal ['Saigon, Vietnam', 'Vietnam, Party'], result
|
11
|
+
|
12
|
+
sample_array << 'Some Place, Vietnam'
|
13
|
+
result = Bplgeo::Standardizer.dedup_geo(sample_array)
|
14
|
+
assert_equal ['Saigon, Vietnam', 'Vietnam', 'Vietnam, Party', 'Some Place, Vietnam'], result
|
15
|
+
|
16
|
+
result = Bplgeo::Standardizer.dedup_geo(sample_array, true)
|
17
|
+
assert_equal ['Saigon, Vietnam', 'Vietnam, Party', 'Some Place, Vietnam'], result
|
18
|
+
|
19
|
+
#sample_array << 'Some Place, Vietnam, Saigon'
|
20
|
+
#result = Bplgeo::Standardizer.dedup_geo(sample_array, true)
|
21
|
+
#assert_equal ['Some Place, Vietnam, Saigon'], result
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
end
|
data/test/town_lookup_test.rb
CHANGED
@@ -5,7 +5,7 @@ class TownLookupTest < ActiveSupport::TestCase
|
|
5
5
|
result = Bplgeo::TownLookup.state_town_lookup('MA', "This test was written in Boston, MA.")
|
6
6
|
assert_equal '7013445', result
|
7
7
|
|
8
|
-
|
9
|
-
|
8
|
+
result = Bplgeo::TownLookup.state_town_lookup('MA', "This test was written in East Boston, MA.")
|
9
|
+
assert_equal '7015009', result
|
10
10
|
end
|
11
11
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bplgeo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boston Public Library
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - '>='
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: htmlentities
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: sqlite3
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -127,6 +141,7 @@ files:
|
|
127
141
|
- lib/bplgeo/parser.rb
|
128
142
|
- Rakefile
|
129
143
|
- test/bplgeo_test.rb
|
144
|
+
- test/standardizer_test.rb
|
130
145
|
- test/dummy/Rakefile
|
131
146
|
- test/dummy/config.ru
|
132
147
|
- test/dummy/public/500.html
|
@@ -160,6 +175,7 @@ files:
|
|
160
175
|
- test/dummy/bin/rails
|
161
176
|
- test/dummy/bin/bundle
|
162
177
|
- test/dummy/bin/rake
|
178
|
+
- test/dummy/log/test.log
|
163
179
|
- test/dummy/log/development.log
|
164
180
|
- test/dummy/README.rdoc
|
165
181
|
- test/test_helper.rb
|
@@ -193,6 +209,7 @@ summary: Parse string for potential geographic matches and return that data alon
|
|
193
209
|
with the TGN ID and Geonames ID.
|
194
210
|
test_files:
|
195
211
|
- test/bplgeo_test.rb
|
212
|
+
- test/standardizer_test.rb
|
196
213
|
- test/dummy/Rakefile
|
197
214
|
- test/dummy/config.ru
|
198
215
|
- test/dummy/public/500.html
|
@@ -226,6 +243,7 @@ test_files:
|
|
226
243
|
- test/dummy/bin/rails
|
227
244
|
- test/dummy/bin/bundle
|
228
245
|
- test/dummy/bin/rake
|
246
|
+
- test/dummy/log/test.log
|
229
247
|
- test/dummy/log/development.log
|
230
248
|
- test/dummy/README.rdoc
|
231
249
|
- test/test_helper.rb
|
@@ -233,4 +251,3 @@ test_files:
|
|
233
251
|
- test/town_lookup_test.rb
|
234
252
|
- test/parser_test.rb
|
235
253
|
- test/geonames_test.rb
|
236
|
-
has_rdoc:
|