bplgeo 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bplgeo/standardizer.rb +4 -2
- data/lib/bplgeo/town_lookup.rb +1 -1
- data/lib/bplgeo/version.rb +1 -1
- data/lib/bplgeo.rb +1 -0
- data/test/dummy/bin/bundle +0 -0
- data/test/dummy/bin/rails +0 -0
- data/test/dummy/bin/rake +0 -0
- data/test/dummy/config/bplgeo.yml +3 -8
- data/test/dummy/log/test.log +37 -0
- data/test/standardizer_test.rb +26 -0
- data/test/town_lookup_test.rb +2 -2
- metadata +20 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21c1271a3fb208898888438395ebafbfcf1bd167
|
4
|
+
data.tar.gz: 64bde835593a4f414441b03390dbe1327524b0e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a0254403d3d1a29c63ffbab106133083a3193d7c005716ff432385012a0eda2e0faef78eec927ace1bc931c396c8e434fd24bbfb878b397e128d4b79b78e723
|
7
|
+
data.tar.gz: 1a5382d42fb7f802c12569bdb983921d6335b7fc9b7b5b9f43f9a97c255e5bcfedd911250f191a9df44f983aee9b0750afa30b8b1ed6919c02c278a77e02d8e9
|
data/lib/bplgeo/standardizer.rb
CHANGED
@@ -80,7 +80,7 @@ module Bplgeo
|
|
80
80
|
#Attempt to dedup a list of geographic areas.
|
81
81
|
#FIXME: Horrendous first pass.
|
82
82
|
#Aggresive flag removes less specific matches. IE. ['Hanoi, Vietnam' and 'Vietnam'] would return just ['Hanoi, Vietnam']
|
83
|
-
def self.dedup_geo(geo_list,
|
83
|
+
def self.dedup_geo(geo_list, aggressive=false)
|
84
84
|
geo_list = geo_list.clone
|
85
85
|
|
86
86
|
base_word_geo_list = []
|
@@ -113,7 +113,9 @@ module Bplgeo
|
|
113
113
|
}
|
114
114
|
|
115
115
|
matched_words_count.each_with_index do |matched_count, matched_index|
|
116
|
-
|
116
|
+
matched_count ||= 0
|
117
|
+
|
118
|
+
if (matched_count == base_word_geo_list[matched_index].split(' ').size) && ((base_word_geo_list[matched_index].split(' ').size < base_word_geo_list[index].split(' ').size && aggressive) || (base_word_geo_list[matched_index].split(' ').size == base_word_geo_list[index].split(' ').size))
|
117
119
|
if current_best_term.split(',').size < geo_list[matched_index].split(',').size || (current_best_term.size+1 < geo_list[matched_index].size && !geo_list[matched_index].include?('('))
|
118
120
|
current_best_term = geo_list[matched_index]
|
119
121
|
indexes_to_remove << current_best_term_index
|
data/lib/bplgeo/town_lookup.rb
CHANGED
@@ -9,7 +9,7 @@ module Bplgeo
|
|
9
9
|
matching_towns.each do |matching_town|
|
10
10
|
if matching_town[:location_name].split(' ').length > matched_terms_count
|
11
11
|
return_tgn_id = matching_town[:tgn_id]
|
12
|
-
matched_terms_count =
|
12
|
+
matched_terms_count = matching_town[:location_name].split(' ').length
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
data/lib/bplgeo/version.rb
CHANGED
data/lib/bplgeo.rb
CHANGED
data/test/dummy/bin/bundle
CHANGED
File without changes
|
data/test/dummy/bin/rails
CHANGED
File without changes
|
data/test/dummy/bin/rake
CHANGED
File without changes
|
@@ -1,23 +1,18 @@
|
|
1
1
|
development:
|
2
2
|
#scanned_image_drive: /home/bluewolf/mapped/scan_images/BPLDC/_
|
3
|
-
|
3
|
+
getty_username: bplib
|
4
4
|
getty_password: 8{83N78kO;B)2
|
5
|
-
geonames_username: boston_library
|
6
5
|
mapquest_key: Fmjtd%7Cluubn1utn0%2Ca2%3Do5-90b00a
|
7
6
|
bing_key: Avmp8UMpfYiAJOYa2D-6_cykJoprZsvvN5YLv6SDalvN-BZnW9KMlCzjIV7Zrtmn
|
8
|
-
timeout: 100
|
9
7
|
test: &TEST
|
10
8
|
#scanned_image_drive: /home/bluewolf/mapped/scan_images/BPLDC/_
|
11
|
-
|
9
|
+
getty_username: bplib
|
12
10
|
getty_password: 8{83N78kO;B)2
|
13
|
-
geonames_username: boston_library
|
14
11
|
mapquest_key: Fmjtd%7Cluubn1utn0%2Ca2%3Do5-90b00a
|
15
12
|
bing_key: Avmp8UMpfYiAJOYa2D-6_cykJoprZsvvN5YLv6SDalvN-BZnW9KMlCzjIV7Zrtmn
|
16
|
-
timeout: 100
|
17
13
|
production:
|
14
|
+
#scanned_image_drive: /home/bluewolf/mapped/scan_images/BPLDC/_
|
18
15
|
getty_username: bplib
|
19
16
|
getty_password: 8{83N78kO;B)2
|
20
|
-
geonames_username: boston_library
|
21
17
|
mapquest_key: Fmjtd%7Cluubn1utn0%2Ca2%3Do5-90b00a
|
22
18
|
bing_key: Avmp8UMpfYiAJOYa2D-6_cykJoprZsvvN5YLv6SDalvN-BZnW9KMlCzjIV7Zrtmn
|
23
|
-
timeout: 7
|
@@ -0,0 +1,37 @@
|
|
1
|
+
[1m[36m (0.4ms)[0m [1mbegin transaction[0m
|
2
|
+
----------------------
|
3
|
+
BplgeoTest: test_parse
|
4
|
+
----------------------
|
5
|
+
ETHON: Libcurl initialized
|
6
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=12.7465
|
7
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=0.660015
|
8
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=2.51674
|
9
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=0.5838760000000001
|
10
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=0.378902
|
11
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
12
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
13
|
+
------------------------------------------
|
14
|
+
GeonamesTest: test_geonames_lookup_from_id
|
15
|
+
------------------------------------------
|
16
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
17
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
18
|
+
--------------------------
|
19
|
+
ParserTest: test_dedup_geo
|
20
|
+
--------------------------
|
21
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
22
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
23
|
+
------------------------------
|
24
|
+
ParserTest: test_google_parser
|
25
|
+
------------------------------
|
26
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
27
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
28
|
+
--------------------------------
|
29
|
+
TGNTest: test_tgn_lookup_from_id
|
30
|
+
--------------------------------
|
31
|
+
ETHON: performed EASY url= response_code=200 return_code=ok total_time=1.122996
|
32
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
33
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
34
|
+
------------------------------
|
35
|
+
TownLookupTest: test_MA_lookup
|
36
|
+
------------------------------
|
37
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class ParserTest < ActiveSupport::TestCase
|
4
|
+
def test_dedup_geo
|
5
|
+
sample_array = ['Saigon, Vietnam', 'Saigon (Vietnam)', 'Vietnam', 'Vietnam, Party']
|
6
|
+
result = Bplgeo::Standardizer.dedup_geo(sample_array)
|
7
|
+
assert_equal ['Saigon, Vietnam', 'Vietnam', 'Vietnam, Party'], result
|
8
|
+
|
9
|
+
result = Bplgeo::Standardizer.dedup_geo(sample_array, true)
|
10
|
+
assert_equal ['Saigon, Vietnam', 'Vietnam, Party'], result
|
11
|
+
|
12
|
+
sample_array << 'Some Place, Vietnam'
|
13
|
+
result = Bplgeo::Standardizer.dedup_geo(sample_array)
|
14
|
+
assert_equal ['Saigon, Vietnam', 'Vietnam', 'Vietnam, Party', 'Some Place, Vietnam'], result
|
15
|
+
|
16
|
+
result = Bplgeo::Standardizer.dedup_geo(sample_array, true)
|
17
|
+
assert_equal ['Saigon, Vietnam', 'Vietnam, Party', 'Some Place, Vietnam'], result
|
18
|
+
|
19
|
+
#sample_array << 'Some Place, Vietnam, Saigon'
|
20
|
+
#result = Bplgeo::Standardizer.dedup_geo(sample_array, true)
|
21
|
+
#assert_equal ['Some Place, Vietnam, Saigon'], result
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
end
|
data/test/town_lookup_test.rb
CHANGED
@@ -5,7 +5,7 @@ class TownLookupTest < ActiveSupport::TestCase
|
|
5
5
|
result = Bplgeo::TownLookup.state_town_lookup('MA', "This test was written in Boston, MA.")
|
6
6
|
assert_equal '7013445', result
|
7
7
|
|
8
|
-
|
9
|
-
|
8
|
+
result = Bplgeo::TownLookup.state_town_lookup('MA', "This test was written in East Boston, MA.")
|
9
|
+
assert_equal '7015009', result
|
10
10
|
end
|
11
11
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bplgeo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boston Public Library
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - '>='
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: htmlentities
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: sqlite3
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -127,6 +141,7 @@ files:
|
|
127
141
|
- lib/bplgeo/parser.rb
|
128
142
|
- Rakefile
|
129
143
|
- test/bplgeo_test.rb
|
144
|
+
- test/standardizer_test.rb
|
130
145
|
- test/dummy/Rakefile
|
131
146
|
- test/dummy/config.ru
|
132
147
|
- test/dummy/public/500.html
|
@@ -160,6 +175,7 @@ files:
|
|
160
175
|
- test/dummy/bin/rails
|
161
176
|
- test/dummy/bin/bundle
|
162
177
|
- test/dummy/bin/rake
|
178
|
+
- test/dummy/log/test.log
|
163
179
|
- test/dummy/log/development.log
|
164
180
|
- test/dummy/README.rdoc
|
165
181
|
- test/test_helper.rb
|
@@ -193,6 +209,7 @@ summary: Parse string for potential geographic matches and return that data alon
|
|
193
209
|
with the TGN ID and Geonames ID.
|
194
210
|
test_files:
|
195
211
|
- test/bplgeo_test.rb
|
212
|
+
- test/standardizer_test.rb
|
196
213
|
- test/dummy/Rakefile
|
197
214
|
- test/dummy/config.ru
|
198
215
|
- test/dummy/public/500.html
|
@@ -226,6 +243,7 @@ test_files:
|
|
226
243
|
- test/dummy/bin/rails
|
227
244
|
- test/dummy/bin/bundle
|
228
245
|
- test/dummy/bin/rake
|
246
|
+
- test/dummy/log/test.log
|
229
247
|
- test/dummy/log/development.log
|
230
248
|
- test/dummy/README.rdoc
|
231
249
|
- test/test_helper.rb
|
@@ -233,4 +251,3 @@ test_files:
|
|
233
251
|
- test/town_lookup_test.rb
|
234
252
|
- test/parser_test.rb
|
235
253
|
- test/geonames_test.rb
|
236
|
-
has_rdoc:
|