loose_tight_dictionary 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -4,13 +4,9 @@ Match things based on string similarity (using the Pair Distance algorithm) and
4
4
 
5
5
  = Quickstart
6
6
 
7
- >> right_records = [ 'seamus', 'andy', 'ben' ]
7
+ >> d = LooseTightDictionary.new %w(seamus andy ben)
8
8
  => [...]
9
- >> left_record = 'Shamus Heaney'
10
- => [...]
11
- >> d = LooseTightDictionary.new right_records
12
- => [...]
13
- >> puts d.left_to_right left_record
9
+ >> puts d.find 'Shamus Heaney'
14
10
  => 'seamus'
15
11
 
16
12
  Try running the included example file:
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.9
1
+ 0.0.10
@@ -218,6 +218,7 @@ class LooseTightDictionary
218
218
  inline_check left_record, right_record
219
219
  right_record
220
220
  end
221
+ alias_method :find, :left_to_right
221
222
 
222
223
  def optimize(t_options_left, t_options_right)
223
224
  cart_prod(t_options_left, t_options_right).max do |a, b|
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{loose_tight_dictionary}
8
- s.version = "0.0.9"
8
+ s.version = "0.0.10"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere"]
12
- s.date = %q{2010-09-30}
12
+ s.date = %q{2011-03-02}
13
13
  s.description = %q{Create dictionaries that link rows between two tables (left and right) using loose matching (string similarity) by default and tight matching (regexp) by request.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -65,212 +65,207 @@ class TestLooseTightDictionary < Test::Unit::TestCase
65
65
  :tee => $tee
66
66
  end
67
67
 
68
- if ENV['NEW'] == 'true' or ENV['ALL'] == 'true'
69
- end
70
-
71
- if ENV['OLD'] == 'true' or ENV['ALL'] == 'true'
72
- should "optionally only pay attention to things that match blockings" do
73
- assert_equal @a_right, ltd.left_to_right(@a_left)
68
+ should "optionally only pay attention to things that match blockings" do
69
+ assert_equal @a_right, ltd.left_to_right(@a_left)
74
70
 
75
- clear_ltd
76
- @blocking_only = true
77
- assert_equal nil, ltd.left_to_right(@a_left)
71
+ clear_ltd
72
+ @blocking_only = true
73
+ assert_equal nil, ltd.left_to_right(@a_left)
78
74
 
79
- clear_ltd
80
- @blocking_only = true
81
- @blockings.push ['/dash/i']
82
- assert_equal @a_right, ltd.left_to_right(@a_left)
83
- end
84
-
85
- # the example from the readme, considerably uglier here
86
- should "check a simple table" do
87
- @right = [ 'seamus', 'andy', 'ben' ]
88
- @positives = [ [ 'seamus', 'Mr. Seamus Abshere' ] ]
89
- left = [ 'Mr. Seamus Abshere', 'Sr. Andy Rossmeissl', 'Master BenT' ]
90
-
91
- assert_nothing_raised do
92
- ltd.check left
93
- end
94
- end
95
-
96
- should "treat a String as a full record if passed through" do
97
- dash = 'DHC8-400'
98
- b747 = 'B747200/300'
99
- dc9 = 'DC-9-10'
100
- right_records = [ dash, b747, dc9 ]
101
- simple_ltd = LooseTightDictionary.new right_records, :logger => $logger, :tee => $tee
102
- assert_equal dash, simple_ltd.left_to_right('DeHavilland Dash-8 DHC-400')
103
- assert_equal b747, simple_ltd.left_to_right('Boeing 747-300')
104
- assert_equal dc9, simple_ltd.left_to_right('McDonnell Douglas MD81/DC-9')
75
+ clear_ltd
76
+ @blocking_only = true
77
+ @blockings.push ['/dash/i']
78
+ assert_equal @a_right, ltd.left_to_right(@a_left)
79
+ end
80
+
81
+ # the example from the readme, considerably uglier here
82
+ should "check a simple table" do
83
+ @right = [ 'seamus', 'andy', 'ben' ]
84
+ @positives = [ [ 'seamus', 'Mr. Seamus Abshere' ] ]
85
+ left = [ 'Mr. Seamus Abshere', 'Sr. Andy Rossmeissl', 'Master BenT' ]
86
+
87
+ assert_nothing_raised do
88
+ ltd.check left
105
89
  end
106
-
107
- should "call it a mismatch if you hit a blank positive" do
108
- @positives.push [@a_left[0], '']
109
- assert_raises(LooseTightDictionary::Mismatch) do
110
- ltd.left_to_right @a_left
111
- end
90
+ end
91
+
92
+ should "treat a String as a full record if passed through" do
93
+ dash = 'DHC8-400'
94
+ b747 = 'B747200/300'
95
+ dc9 = 'DC-9-10'
96
+ right_records = [ dash, b747, dc9 ]
97
+ simple_ltd = LooseTightDictionary.new right_records, :logger => $logger, :tee => $tee
98
+ assert_equal dash, simple_ltd.left_to_right('DeHavilland Dash-8 DHC-400')
99
+ assert_equal b747, simple_ltd.left_to_right('Boeing 747-300')
100
+ assert_equal dc9, simple_ltd.find('McDonnell Douglas MD81/DC-9')
101
+ end
102
+
103
+ should "call it a mismatch if you hit a blank positive" do
104
+ @positives.push [@a_left[0], '']
105
+ assert_raises(LooseTightDictionary::Mismatch) do
106
+ ltd.left_to_right @a_left
112
107
  end
108
+ end
113
109
 
114
- should "call it a false positive if you hit a blank negative" do
115
- @negatives.push [@a_left[0], '']
116
- assert_raises(LooseTightDictionary::FalsePositive) do
117
- ltd.left_to_right @a_left
118
- end
119
- end
120
-
121
- should "have a false match without blocking" do
122
- # @d_left will be our victim
123
- @right.push @d_lookalike
124
- @tightenings.push @t_1
125
-
126
- assert_equal @d_lookalike, ltd.left_to_right(@d_left)
110
+ should "call it a false positive if you hit a blank negative" do
111
+ @negatives.push [@a_left[0], '']
112
+ assert_raises(LooseTightDictionary::FalsePositive) do
113
+ ltd.left_to_right @a_left
127
114
  end
115
+ end
116
+
117
+ should "have a false match without blocking" do
118
+ # @d_left will be our victim
119
+ @right.push @d_lookalike
120
+ @tightenings.push @t_1
128
121
 
129
- should "do blocking if the left matches a block" do
130
- # @d_left will be our victim
131
- @right.push @d_lookalike
132
- @tightenings.push @t_1
133
- @blockings.push ['/(bombardier|de ?havilland)/i']
134
-
135
- assert_equal @d_right, ltd.left_to_right(@d_left)
136
- end
122
+ assert_equal @d_lookalike, ltd.left_to_right(@d_left)
123
+ end
124
+
125
+ should "do blocking if the left matches a block" do
126
+ # @d_left will be our victim
127
+ @right.push @d_lookalike
128
+ @tightenings.push @t_1
129
+ @blockings.push ['/(bombardier|de ?havilland)/i']
137
130
 
138
- should "treat blocks as exclusive" do
139
- @right = [ @d_left ]
140
- @tightenings.push @t_1
141
- @blockings.push ['/(bombardier|de ?havilland)/i']
131
+ assert_equal @d_right, ltd.left_to_right(@d_left)
132
+ end
133
+
134
+ should "treat blocks as exclusive" do
135
+ @right = [ @d_left ]
136
+ @tightenings.push @t_1
137
+ @blockings.push ['/(bombardier|de ?havilland)/i']
142
138
 
143
- assert_equal nil, ltd.left_to_right(@d_lookalike)
144
- end
139
+ assert_equal nil, ltd.left_to_right(@d_lookalike)
140
+ end
141
+
142
+ should "only use identities if they stem from the same regexp" do
143
+ @identities.push @r_1
144
+ @identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
145
+ @identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
146
+ x_left = [ 'CESSNA D-333 CITATION V']
147
+ x_right = [ 'CESSNA D-333' ]
148
+ @right.push x_right
145
149
 
146
- should "only use identities if they stem from the same regexp" do
147
- @identities.push @r_1
148
- @identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
149
- @identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
150
- x_left = [ 'CESSNA D-333 CITATION V']
151
- x_right = [ 'CESSNA D-333' ]
152
- @right.push x_right
153
-
154
- assert_equal x_right, ltd.left_to_right(x_left)
155
- end
150
+ assert_equal x_right, ltd.left_to_right(x_left)
151
+ end
152
+
153
+ should "use the best score from all of the tightenings" do
154
+ x_left = ["BOEING 737100"]
155
+ x_right = ["BOEING BOEING 737-100/200"]
156
+ x_right_wrong = ["BOEING BOEING 737-900"]
157
+ @right.push x_right
158
+ @right.push x_right_wrong
159
+ @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
160
+ @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
156
161
 
157
- should "use the best score from all of the tightenings" do
158
- x_left = ["BOEING 737100"]
159
- x_right = ["BOEING BOEING 737-100/200"]
160
- x_right_wrong = ["BOEING BOEING 737-900"]
161
- @right.push x_right
162
- @right.push x_right_wrong
163
- @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
164
- @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
165
-
166
- assert_equal x_right, ltd.left_to_right(x_left)
167
- end
162
+ assert_equal x_right, ltd.left_to_right(x_left)
163
+ end
164
+
165
+ should "compare using prefixes if tightened key is shorter than correct match" do
166
+ x_left = ["BOEING 720"]
167
+ x_right = ["BOEING BOEING 720-000"]
168
+ x_right_wrong = ["BOEING BOEING 717-200"]
169
+ @right.push x_right
170
+ @right.push x_right_wrong
171
+ @tightenings.push @t_1
172
+ @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
173
+ @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
168
174
 
169
- should "compare using prefixes if tightened key is shorter than correct match" do
170
- x_left = ["BOEING 720"]
171
- x_right = ["BOEING BOEING 720-000"]
172
- x_right_wrong = ["BOEING BOEING 717-200"]
173
- @right.push x_right
174
- @right.push x_right_wrong
175
- @tightenings.push @t_1
176
- @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
177
- @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
178
-
179
- assert_equal x_right, ltd.left_to_right(x_left)
180
- end
175
+ assert_equal x_right, ltd.left_to_right(x_left)
176
+ end
177
+
178
+ should "use the shortest original input" do
179
+ x_left = ['De Havilland DHC8-777 Dash-8 Superstar']
180
+ x_right = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar']
181
+ x_right_long = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar/Supernova']
181
182
 
182
- should "use the shortest original input" do
183
- x_left = ['De Havilland DHC8-777 Dash-8 Superstar']
184
- x_right = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar']
185
- x_right_long = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar/Supernova']
186
-
187
- @right.push x_right_long
188
- @right.push x_right
189
- @tightenings.push @t_1
190
-
191
- assert_equal x_right, ltd.left_to_right(x_left)
192
- end
183
+ @right.push x_right_long
184
+ @right.push x_right
185
+ @tightenings.push @t_1
193
186
 
194
- should "perform lookups left to right" do
195
- assert_equal @a_right, ltd.left_to_right(@a_left)
187
+ assert_equal x_right, ltd.left_to_right(x_left)
188
+ end
189
+
190
+ should "perform lookups left to right" do
191
+ assert_equal @a_right, ltd.left_to_right(@a_left)
192
+ end
193
+
194
+ should "succeed if there are no checks" do
195
+ assert_nothing_raised do
196
+ ltd.check @left
196
197
  end
198
+ end
199
+
200
+ should "succeed if the positive checks just work" do
201
+ @positives.push [ @a_left[0], @a_right[0] ]
197
202
 
198
- should "succeed if there are no checks" do
199
- assert_nothing_raised do
200
- ltd.check @left
201
- end
203
+ assert_nothing_raised do
204
+ ltd.check @left
205
+ end
206
+ end
207
+
208
+ should "fail if positive checks don't work" do
209
+ @positives.push [ @d_left[0], @d_right[0] ]
210
+
211
+ assert_raises(LooseTightDictionary::Mismatch) do
212
+ ltd.check @left
213
+ end
214
+ end
215
+
216
+ should "succeed if proper tightening is applied" do
217
+ @positives.push [ @d_left[0], @d_right[0] ]
218
+ @tightenings.push @t_1
219
+
220
+ assert_nothing_raised do
221
+ ltd.check @left
202
222
  end
223
+ end
224
+
225
+ should "use a Google Docs spreadsheet as a source of tightenings" do
226
+ @positives.push [ @d_left[0], @d_right[0] ]
227
+ @tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
203
228
 
204
- should "succeed if the positive checks just work" do
205
- @positives.push [ @a_left[0], @a_right[0] ]
206
-
207
- assert_nothing_raised do
208
- ltd.check @left
209
- end
229
+ # sabshere 9/30/10 this shouldn't raise anything
230
+ # but the tightenings have been changed... we should be using test-only tightenings, not production ones
231
+ # assert_nothing_raised do
232
+ assert_raises(LooseTightDictionary::Mismatch) do
233
+ ltd.check @left
210
234
  end
235
+ end
211
236
 
212
- should "fail if positive checks don't work" do
213
- @positives.push [ @d_left[0], @d_right[0] ]
237
+ should "fail if negative checks don't work" do
238
+ @negatives.push [ @b_left[0], @c_right[0] ]
214
239
 
215
- assert_raises(LooseTightDictionary::Mismatch) do
216
- ltd.check @left
217
- end
240
+ assert_raises(LooseTightDictionary::FalsePositive) do
241
+ ltd.check @left
218
242
  end
243
+ end
219
244
 
220
- should "succeed if proper tightening is applied" do
221
- @positives.push [ @d_left[0], @d_right[0] ]
222
- @tightenings.push @t_1
245
+ should "do inline checking" do
246
+ @negatives.push [ @b_left[0], @c_right[0] ]
223
247
 
224
- assert_nothing_raised do
225
- ltd.check @left
226
- end
248
+ assert_raises(LooseTightDictionary::FalsePositive) do
249
+ ltd.left_to_right @b_left
227
250
  end
251
+ end
228
252
 
229
- should "use a Google Docs spreadsheet as a source of tightenings" do
230
- @positives.push [ @d_left[0], @d_right[0] ]
231
- @tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
232
-
233
- # sabshere 9/30/10 this shouldn't raise anything
234
- # but the tightenings have been changed... we should be using test-only tightenings, not production ones
235
- # assert_nothing_raised do
236
- assert_raises(LooseTightDictionary::Mismatch) do
237
- ltd.check @left
238
- end
239
- end
240
-
241
- should "fail if negative checks don't work" do
242
- @negatives.push [ @b_left[0], @c_right[0] ]
243
-
244
- assert_raises(LooseTightDictionary::FalsePositive) do
245
- ltd.check @left
246
- end
247
- end
248
-
249
- should "do inline checking" do
250
- @negatives.push [ @b_left[0], @c_right[0] ]
251
-
252
- assert_raises(LooseTightDictionary::FalsePositive) do
253
- ltd.left_to_right @b_left
254
- end
255
- end
253
+ should "fail if negative checks don't work, even with tightening" do
254
+ @negatives.push [ @b_left[0], @c_right[0] ]
255
+ @tightenings.push @t_1
256
256
 
257
- should "fail if negative checks don't work, even with tightening" do
258
- @negatives.push [ @b_left[0], @c_right[0] ]
259
- @tightenings.push @t_1
260
-
261
- assert_raises(LooseTightDictionary::FalsePositive) do
262
- ltd.check @left
263
- end
257
+ assert_raises(LooseTightDictionary::FalsePositive) do
258
+ ltd.check @left
264
259
  end
260
+ end
261
+
262
+ should "succeed if proper identity is applied" do
263
+ @negatives.push [ @b_left[0], @c_right[0] ]
264
+ @positives.push [ @d_left[0], @d_right[0] ]
265
+ @identities.push @r_1
265
266
 
266
- should "succeed if proper identity is applied" do
267
- @negatives.push [ @b_left[0], @c_right[0] ]
268
- @positives.push [ @d_left[0], @d_right[0] ]
269
- @identities.push @r_1
270
-
271
- assert_nothing_raised do
272
- ltd.check @left
273
- end
267
+ assert_nothing_raised do
268
+ ltd.check @left
274
269
  end
275
270
  end
276
271
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loose_tight_dictionary
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 11
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 9
10
- version: 0.0.9
9
+ - 10
10
+ version: 0.0.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-30 00:00:00 -05:00
18
+ date: 2011-03-02 00:00:00 -06:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency