loose_tight_dictionary 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -4,13 +4,9 @@ Match things based on string similarity (using the Pair Distance algorithm) and
4
4
 
5
5
  = Quickstart
6
6
 
7
- >> right_records = [ 'seamus', 'andy', 'ben' ]
7
+ >> d = LooseTightDictionary.new %w(seamus andy ben)
8
8
  => [...]
9
- >> left_record = 'Shamus Heaney'
10
- => [...]
11
- >> d = LooseTightDictionary.new right_records
12
- => [...]
13
- >> puts d.left_to_right left_record
9
+ >> puts d.find 'Shamus Heaney'
14
10
  => 'seamus'
15
11
 
16
12
  Try running the included example file:
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.9
1
+ 0.0.10
@@ -218,6 +218,7 @@ class LooseTightDictionary
218
218
  inline_check left_record, right_record
219
219
  right_record
220
220
  end
221
+ alias_method :find, :left_to_right
221
222
 
222
223
  def optimize(t_options_left, t_options_right)
223
224
  cart_prod(t_options_left, t_options_right).max do |a, b|
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{loose_tight_dictionary}
8
- s.version = "0.0.9"
8
+ s.version = "0.0.10"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere"]
12
- s.date = %q{2010-09-30}
12
+ s.date = %q{2011-03-02}
13
13
  s.description = %q{Create dictionaries that link rows between two tables (left and right) using loose matching (string similarity) by default and tight matching (regexp) by request.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -65,212 +65,207 @@ class TestLooseTightDictionary < Test::Unit::TestCase
65
65
  :tee => $tee
66
66
  end
67
67
 
68
- if ENV['NEW'] == 'true' or ENV['ALL'] == 'true'
69
- end
70
-
71
- if ENV['OLD'] == 'true' or ENV['ALL'] == 'true'
72
- should "optionally only pay attention to things that match blockings" do
73
- assert_equal @a_right, ltd.left_to_right(@a_left)
68
+ should "optionally only pay attention to things that match blockings" do
69
+ assert_equal @a_right, ltd.left_to_right(@a_left)
74
70
 
75
- clear_ltd
76
- @blocking_only = true
77
- assert_equal nil, ltd.left_to_right(@a_left)
71
+ clear_ltd
72
+ @blocking_only = true
73
+ assert_equal nil, ltd.left_to_right(@a_left)
78
74
 
79
- clear_ltd
80
- @blocking_only = true
81
- @blockings.push ['/dash/i']
82
- assert_equal @a_right, ltd.left_to_right(@a_left)
83
- end
84
-
85
- # the example from the readme, considerably uglier here
86
- should "check a simple table" do
87
- @right = [ 'seamus', 'andy', 'ben' ]
88
- @positives = [ [ 'seamus', 'Mr. Seamus Abshere' ] ]
89
- left = [ 'Mr. Seamus Abshere', 'Sr. Andy Rossmeissl', 'Master BenT' ]
90
-
91
- assert_nothing_raised do
92
- ltd.check left
93
- end
94
- end
95
-
96
- should "treat a String as a full record if passed through" do
97
- dash = 'DHC8-400'
98
- b747 = 'B747200/300'
99
- dc9 = 'DC-9-10'
100
- right_records = [ dash, b747, dc9 ]
101
- simple_ltd = LooseTightDictionary.new right_records, :logger => $logger, :tee => $tee
102
- assert_equal dash, simple_ltd.left_to_right('DeHavilland Dash-8 DHC-400')
103
- assert_equal b747, simple_ltd.left_to_right('Boeing 747-300')
104
- assert_equal dc9, simple_ltd.left_to_right('McDonnell Douglas MD81/DC-9')
75
+ clear_ltd
76
+ @blocking_only = true
77
+ @blockings.push ['/dash/i']
78
+ assert_equal @a_right, ltd.left_to_right(@a_left)
79
+ end
80
+
81
+ # the example from the readme, considerably uglier here
82
+ should "check a simple table" do
83
+ @right = [ 'seamus', 'andy', 'ben' ]
84
+ @positives = [ [ 'seamus', 'Mr. Seamus Abshere' ] ]
85
+ left = [ 'Mr. Seamus Abshere', 'Sr. Andy Rossmeissl', 'Master BenT' ]
86
+
87
+ assert_nothing_raised do
88
+ ltd.check left
105
89
  end
106
-
107
- should "call it a mismatch if you hit a blank positive" do
108
- @positives.push [@a_left[0], '']
109
- assert_raises(LooseTightDictionary::Mismatch) do
110
- ltd.left_to_right @a_left
111
- end
90
+ end
91
+
92
+ should "treat a String as a full record if passed through" do
93
+ dash = 'DHC8-400'
94
+ b747 = 'B747200/300'
95
+ dc9 = 'DC-9-10'
96
+ right_records = [ dash, b747, dc9 ]
97
+ simple_ltd = LooseTightDictionary.new right_records, :logger => $logger, :tee => $tee
98
+ assert_equal dash, simple_ltd.left_to_right('DeHavilland Dash-8 DHC-400')
99
+ assert_equal b747, simple_ltd.left_to_right('Boeing 747-300')
100
+ assert_equal dc9, simple_ltd.find('McDonnell Douglas MD81/DC-9')
101
+ end
102
+
103
+ should "call it a mismatch if you hit a blank positive" do
104
+ @positives.push [@a_left[0], '']
105
+ assert_raises(LooseTightDictionary::Mismatch) do
106
+ ltd.left_to_right @a_left
112
107
  end
108
+ end
113
109
 
114
- should "call it a false positive if you hit a blank negative" do
115
- @negatives.push [@a_left[0], '']
116
- assert_raises(LooseTightDictionary::FalsePositive) do
117
- ltd.left_to_right @a_left
118
- end
119
- end
120
-
121
- should "have a false match without blocking" do
122
- # @d_left will be our victim
123
- @right.push @d_lookalike
124
- @tightenings.push @t_1
125
-
126
- assert_equal @d_lookalike, ltd.left_to_right(@d_left)
110
+ should "call it a false positive if you hit a blank negative" do
111
+ @negatives.push [@a_left[0], '']
112
+ assert_raises(LooseTightDictionary::FalsePositive) do
113
+ ltd.left_to_right @a_left
127
114
  end
115
+ end
116
+
117
+ should "have a false match without blocking" do
118
+ # @d_left will be our victim
119
+ @right.push @d_lookalike
120
+ @tightenings.push @t_1
128
121
 
129
- should "do blocking if the left matches a block" do
130
- # @d_left will be our victim
131
- @right.push @d_lookalike
132
- @tightenings.push @t_1
133
- @blockings.push ['/(bombardier|de ?havilland)/i']
134
-
135
- assert_equal @d_right, ltd.left_to_right(@d_left)
136
- end
122
+ assert_equal @d_lookalike, ltd.left_to_right(@d_left)
123
+ end
124
+
125
+ should "do blocking if the left matches a block" do
126
+ # @d_left will be our victim
127
+ @right.push @d_lookalike
128
+ @tightenings.push @t_1
129
+ @blockings.push ['/(bombardier|de ?havilland)/i']
137
130
 
138
- should "treat blocks as exclusive" do
139
- @right = [ @d_left ]
140
- @tightenings.push @t_1
141
- @blockings.push ['/(bombardier|de ?havilland)/i']
131
+ assert_equal @d_right, ltd.left_to_right(@d_left)
132
+ end
133
+
134
+ should "treat blocks as exclusive" do
135
+ @right = [ @d_left ]
136
+ @tightenings.push @t_1
137
+ @blockings.push ['/(bombardier|de ?havilland)/i']
142
138
 
143
- assert_equal nil, ltd.left_to_right(@d_lookalike)
144
- end
139
+ assert_equal nil, ltd.left_to_right(@d_lookalike)
140
+ end
141
+
142
+ should "only use identities if they stem from the same regexp" do
143
+ @identities.push @r_1
144
+ @identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
145
+ @identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
146
+ x_left = [ 'CESSNA D-333 CITATION V']
147
+ x_right = [ 'CESSNA D-333' ]
148
+ @right.push x_right
145
149
 
146
- should "only use identities if they stem from the same regexp" do
147
- @identities.push @r_1
148
- @identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
149
- @identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
150
- x_left = [ 'CESSNA D-333 CITATION V']
151
- x_right = [ 'CESSNA D-333' ]
152
- @right.push x_right
153
-
154
- assert_equal x_right, ltd.left_to_right(x_left)
155
- end
150
+ assert_equal x_right, ltd.left_to_right(x_left)
151
+ end
152
+
153
+ should "use the best score from all of the tightenings" do
154
+ x_left = ["BOEING 737100"]
155
+ x_right = ["BOEING BOEING 737-100/200"]
156
+ x_right_wrong = ["BOEING BOEING 737-900"]
157
+ @right.push x_right
158
+ @right.push x_right_wrong
159
+ @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
160
+ @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
156
161
 
157
- should "use the best score from all of the tightenings" do
158
- x_left = ["BOEING 737100"]
159
- x_right = ["BOEING BOEING 737-100/200"]
160
- x_right_wrong = ["BOEING BOEING 737-900"]
161
- @right.push x_right
162
- @right.push x_right_wrong
163
- @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
164
- @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
165
-
166
- assert_equal x_right, ltd.left_to_right(x_left)
167
- end
162
+ assert_equal x_right, ltd.left_to_right(x_left)
163
+ end
164
+
165
+ should "compare using prefixes if tightened key is shorter than correct match" do
166
+ x_left = ["BOEING 720"]
167
+ x_right = ["BOEING BOEING 720-000"]
168
+ x_right_wrong = ["BOEING BOEING 717-200"]
169
+ @right.push x_right
170
+ @right.push x_right_wrong
171
+ @tightenings.push @t_1
172
+ @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
173
+ @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
168
174
 
169
- should "compare using prefixes if tightened key is shorter than correct match" do
170
- x_left = ["BOEING 720"]
171
- x_right = ["BOEING BOEING 720-000"]
172
- x_right_wrong = ["BOEING BOEING 717-200"]
173
- @right.push x_right
174
- @right.push x_right_wrong
175
- @tightenings.push @t_1
176
- @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
177
- @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
178
-
179
- assert_equal x_right, ltd.left_to_right(x_left)
180
- end
175
+ assert_equal x_right, ltd.left_to_right(x_left)
176
+ end
177
+
178
+ should "use the shortest original input" do
179
+ x_left = ['De Havilland DHC8-777 Dash-8 Superstar']
180
+ x_right = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar']
181
+ x_right_long = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar/Supernova']
181
182
 
182
- should "use the shortest original input" do
183
- x_left = ['De Havilland DHC8-777 Dash-8 Superstar']
184
- x_right = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar']
185
- x_right_long = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar/Supernova']
186
-
187
- @right.push x_right_long
188
- @right.push x_right
189
- @tightenings.push @t_1
190
-
191
- assert_equal x_right, ltd.left_to_right(x_left)
192
- end
183
+ @right.push x_right_long
184
+ @right.push x_right
185
+ @tightenings.push @t_1
193
186
 
194
- should "perform lookups left to right" do
195
- assert_equal @a_right, ltd.left_to_right(@a_left)
187
+ assert_equal x_right, ltd.left_to_right(x_left)
188
+ end
189
+
190
+ should "perform lookups left to right" do
191
+ assert_equal @a_right, ltd.left_to_right(@a_left)
192
+ end
193
+
194
+ should "succeed if there are no checks" do
195
+ assert_nothing_raised do
196
+ ltd.check @left
196
197
  end
198
+ end
199
+
200
+ should "succeed if the positive checks just work" do
201
+ @positives.push [ @a_left[0], @a_right[0] ]
197
202
 
198
- should "succeed if there are no checks" do
199
- assert_nothing_raised do
200
- ltd.check @left
201
- end
203
+ assert_nothing_raised do
204
+ ltd.check @left
205
+ end
206
+ end
207
+
208
+ should "fail if positive checks don't work" do
209
+ @positives.push [ @d_left[0], @d_right[0] ]
210
+
211
+ assert_raises(LooseTightDictionary::Mismatch) do
212
+ ltd.check @left
213
+ end
214
+ end
215
+
216
+ should "succeed if proper tightening is applied" do
217
+ @positives.push [ @d_left[0], @d_right[0] ]
218
+ @tightenings.push @t_1
219
+
220
+ assert_nothing_raised do
221
+ ltd.check @left
202
222
  end
223
+ end
224
+
225
+ should "use a Google Docs spreadsheet as a source of tightenings" do
226
+ @positives.push [ @d_left[0], @d_right[0] ]
227
+ @tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
203
228
 
204
- should "succeed if the positive checks just work" do
205
- @positives.push [ @a_left[0], @a_right[0] ]
206
-
207
- assert_nothing_raised do
208
- ltd.check @left
209
- end
229
+ # sabshere 9/30/10 this shouldn't raise anything
230
+ # but the tightenings have been changed... we should be using test-only tightenings, not production ones
231
+ # assert_nothing_raised do
232
+ assert_raises(LooseTightDictionary::Mismatch) do
233
+ ltd.check @left
210
234
  end
235
+ end
211
236
 
212
- should "fail if positive checks don't work" do
213
- @positives.push [ @d_left[0], @d_right[0] ]
237
+ should "fail if negative checks don't work" do
238
+ @negatives.push [ @b_left[0], @c_right[0] ]
214
239
 
215
- assert_raises(LooseTightDictionary::Mismatch) do
216
- ltd.check @left
217
- end
240
+ assert_raises(LooseTightDictionary::FalsePositive) do
241
+ ltd.check @left
218
242
  end
243
+ end
219
244
 
220
- should "succeed if proper tightening is applied" do
221
- @positives.push [ @d_left[0], @d_right[0] ]
222
- @tightenings.push @t_1
245
+ should "do inline checking" do
246
+ @negatives.push [ @b_left[0], @c_right[0] ]
223
247
 
224
- assert_nothing_raised do
225
- ltd.check @left
226
- end
248
+ assert_raises(LooseTightDictionary::FalsePositive) do
249
+ ltd.left_to_right @b_left
227
250
  end
251
+ end
228
252
 
229
- should "use a Google Docs spreadsheet as a source of tightenings" do
230
- @positives.push [ @d_left[0], @d_right[0] ]
231
- @tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
232
-
233
- # sabshere 9/30/10 this shouldn't raise anything
234
- # but the tightenings have been changed... we should be using test-only tightenings, not production ones
235
- # assert_nothing_raised do
236
- assert_raises(LooseTightDictionary::Mismatch) do
237
- ltd.check @left
238
- end
239
- end
240
-
241
- should "fail if negative checks don't work" do
242
- @negatives.push [ @b_left[0], @c_right[0] ]
243
-
244
- assert_raises(LooseTightDictionary::FalsePositive) do
245
- ltd.check @left
246
- end
247
- end
248
-
249
- should "do inline checking" do
250
- @negatives.push [ @b_left[0], @c_right[0] ]
251
-
252
- assert_raises(LooseTightDictionary::FalsePositive) do
253
- ltd.left_to_right @b_left
254
- end
255
- end
253
+ should "fail if negative checks don't work, even with tightening" do
254
+ @negatives.push [ @b_left[0], @c_right[0] ]
255
+ @tightenings.push @t_1
256
256
 
257
- should "fail if negative checks don't work, even with tightening" do
258
- @negatives.push [ @b_left[0], @c_right[0] ]
259
- @tightenings.push @t_1
260
-
261
- assert_raises(LooseTightDictionary::FalsePositive) do
262
- ltd.check @left
263
- end
257
+ assert_raises(LooseTightDictionary::FalsePositive) do
258
+ ltd.check @left
264
259
  end
260
+ end
261
+
262
+ should "succeed if proper identity is applied" do
263
+ @negatives.push [ @b_left[0], @c_right[0] ]
264
+ @positives.push [ @d_left[0], @d_right[0] ]
265
+ @identities.push @r_1
265
266
 
266
- should "succeed if proper identity is applied" do
267
- @negatives.push [ @b_left[0], @c_right[0] ]
268
- @positives.push [ @d_left[0], @d_right[0] ]
269
- @identities.push @r_1
270
-
271
- assert_nothing_raised do
272
- ltd.check @left
273
- end
267
+ assert_nothing_raised do
268
+ ltd.check @left
274
269
  end
275
270
  end
276
271
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loose_tight_dictionary
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 11
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 9
10
- version: 0.0.9
9
+ - 10
10
+ version: 0.0.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-30 00:00:00 -05:00
18
+ date: 2011-03-02 00:00:00 -06:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency