fuzzy_match 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.rspec +2 -0
- data/CHANGELOG +14 -0
- data/Gemfile +8 -0
- data/README.markdown +58 -38
- data/Rakefile +0 -9
- data/bin/fuzzy_match +106 -0
- data/fuzzy_match.gemspec +4 -4
- data/groupings-screenshot.png +0 -0
- data/highlevel.graffle +0 -0
- data/highlevel.png +0 -0
- data/lib/fuzzy_match/record.rb +58 -0
- data/lib/fuzzy_match/result.rb +11 -8
- data/lib/fuzzy_match/rule/grouping.rb +70 -12
- data/lib/fuzzy_match/rule/identity.rb +3 -3
- data/lib/fuzzy_match/rule.rb +1 -1
- data/lib/fuzzy_match/score/amatch.rb +0 -4
- data/lib/fuzzy_match/score/pure_ruby.rb +2 -8
- data/lib/fuzzy_match/score.rb +4 -0
- data/lib/fuzzy_match/similarity.rb +10 -32
- data/lib/fuzzy_match/version.rb +1 -1
- data/lib/fuzzy_match.rb +78 -94
- data/{test/test_amatch.rb → spec/amatch_spec.rb} +1 -2
- data/{test/test_cache.rb → spec/cache_spec.rb} +7 -7
- data/spec/foo.rb +9 -0
- data/spec/fuzzy_match_spec.rb +354 -0
- data/spec/grouping_spec.rb +60 -0
- data/spec/identity_spec.rb +29 -0
- data/{test/test_wrapper.rb → spec/record_spec.rb} +3 -7
- data/spec/spec_helper.rb +21 -0
- metadata +56 -50
- data/bin/fuzzy_match_checker +0 -71
- data/examples/bts_aircraft/5-2-A.htm +0 -10305
- data/examples/bts_aircraft/5-2-B.htm +0 -9576
- data/examples/bts_aircraft/5-2-D.htm +0 -7094
- data/examples/bts_aircraft/5-2-E.htm +0 -2349
- data/examples/bts_aircraft/5-2-G.htm +0 -2922
- data/examples/bts_aircraft/groupings.csv +0 -1
- data/examples/bts_aircraft/identities.csv +0 -1
- data/examples/bts_aircraft/negatives.csv +0 -1
- data/examples/bts_aircraft/normalizers.csv +0 -1
- data/examples/bts_aircraft/number_260.csv +0 -334
- data/examples/bts_aircraft/positives.csv +0 -1
- data/examples/bts_aircraft/test_bts_aircraft.rb +0 -116
- data/examples/first_name_matching.rb +0 -15
- data/examples/icao-bts.xls +0 -0
- data/lib/fuzzy_match/rule/normalizer.rb +0 -20
- data/lib/fuzzy_match/rule/stop_word.rb +0 -11
- data/lib/fuzzy_match/wrapper.rb +0 -73
- data/test/helper.rb +0 -12
- data/test/test_fuzzy_match.rb +0 -304
- data/test/test_fuzzy_match_convoluted.rb.disabled +0 -268
- data/test/test_grouping.rb +0 -28
- data/test/test_identity.rb +0 -34
- data/test/test_normalizer.rb +0 -10
@@ -0,0 +1,354 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe FuzzyMatch do
|
5
|
+
describe '#find' do
|
6
|
+
it %{finds the best match using string similarity} do
|
7
|
+
d = FuzzyMatch.new %w{ RATZ CATZ }
|
8
|
+
d.find('RITZ').should == 'RATZ'
|
9
|
+
end
|
10
|
+
|
11
|
+
it %{doesn't mind crazy characters} do
|
12
|
+
d = FuzzyMatch.new %w{ RATZ CATZ }
|
13
|
+
d.find('RíTZ').should == 'RATZ'
|
14
|
+
end
|
15
|
+
|
16
|
+
it %{not return any result if the maximum score is zero} do
|
17
|
+
FuzzyMatch.new(['a']).find('b').should be_nil
|
18
|
+
end
|
19
|
+
|
20
|
+
it %{finds exact matches} do
|
21
|
+
d = FuzzyMatch.new [ 'X' ]
|
22
|
+
d.find('X').should == 'X'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe '#find_all' do
|
27
|
+
it %{return all records in sorted order} do
|
28
|
+
d = FuzzyMatch.new [ 'X', 'X22', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
|
29
|
+
d.find_all('X').should == ['X', 'X22' ]
|
30
|
+
d.find_all('A').should == []
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#find_best' do
|
35
|
+
it %{returns one or more records with the best score} do
|
36
|
+
d = FuzzyMatch.new [ 'X', 'X', 'X22', 'Y', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
|
37
|
+
d.find_best('X').should == ['X', 'X' ]
|
38
|
+
d.find_best('A').should == []
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe '#find_all_with_score' do
|
43
|
+
it %{return records with 2 scores} do
|
44
|
+
d = FuzzyMatch.new [ 'X', 'X22', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
|
45
|
+
d.find_all_with_score('X').should == [ ['X', 1, 1], ['X22', 0, 0.33333333333333337] ]
|
46
|
+
d.find_all_with_score('A').should == []
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe '#find_with_score' do
|
51
|
+
it %{return record with dice's and lev's scores} do
|
52
|
+
d = FuzzyMatch.new [ 'X', 'X22', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
|
53
|
+
d.find_with_score('X').should == ['X', 1, 1]
|
54
|
+
d.find_with_score('A').should be_nil
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe '#explain' do
|
59
|
+
before do
|
60
|
+
require 'stringio'
|
61
|
+
@capture = StringIO.new
|
62
|
+
@old_stdout = $stdout
|
63
|
+
$stdout = @capture
|
64
|
+
end
|
65
|
+
after do
|
66
|
+
$stdout = @old_stdout
|
67
|
+
end
|
68
|
+
|
69
|
+
it %{print a basic explanation to stdout} do
|
70
|
+
d = FuzzyMatch.new %w{ RATZ CATZ }
|
71
|
+
d.explain('RITZ')
|
72
|
+
@capture.rewind
|
73
|
+
@capture.read.should include('CATZ')
|
74
|
+
end
|
75
|
+
|
76
|
+
it %{explains match failures} do
|
77
|
+
FuzzyMatch.new(['aaa']).explain('bbb')
|
78
|
+
@capture.rewind
|
79
|
+
@capture.read.should =~ %r{No winner assigned.*aaa.*bbb}
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe "groupings replacings normalizers" do
|
84
|
+
it %{sometimes gets false results without them} do
|
85
|
+
d = FuzzyMatch.new ['BOEING 737-100/200', 'BOEING 737-900']
|
86
|
+
d.find('BOEING 737100 number 900').should == 'BOEING 737-900'
|
87
|
+
end
|
88
|
+
|
89
|
+
it %{can be used to improve results} do
|
90
|
+
d = FuzzyMatch.new ['BOEING 737-100/200', 'BOEING 737-900'], groupings: [ [/boeing/i, /7(\d\d)-?(\d\d\d)?/]]
|
91
|
+
d.find('BOEING 737100 number 900').should == 'BOEING 737-100/200'
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
describe "identities" do
|
96
|
+
it %{sometimes gets false results without them} do
|
97
|
+
# false positive without identity
|
98
|
+
d = FuzzyMatch.new %w{ foo bar }
|
99
|
+
d.find('baz').should == 'bar'
|
100
|
+
end
|
101
|
+
|
102
|
+
it %{can be used to improve results} do
|
103
|
+
d = FuzzyMatch.new %w{ foo bar }, :identities => [ /ba(.)/ ]
|
104
|
+
d.find('baz').should be_nil
|
105
|
+
end
|
106
|
+
|
107
|
+
it %{is sort of like backreferences} do
|
108
|
+
one = '1 sauk ONEONEONEONEONE'
|
109
|
+
two = '2 sauk TWOTWOTWOTWO'
|
110
|
+
d = FuzzyMatch.new([one, two])
|
111
|
+
d.find('1 sauk TWOTWOTWOTWO').should == two # wrong
|
112
|
+
d = FuzzyMatch.new([one, two], identities: [/\A(\d+)\s+(\w+)/])
|
113
|
+
d.find('1 sauk TWOTWOTWOTWO').should == one # correct
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
describe 'groupings' do
|
118
|
+
it %{sometimes gets false results without them} do
|
119
|
+
d = FuzzyMatch.new [ 'Barack Obama', 'George Bush' ]
|
120
|
+
d.find('Barack Bush').should == 'Barack Obama' # luke i am your father
|
121
|
+
d.find('George Obama').should == 'George Bush' # nooooooooooooooooooo
|
122
|
+
end
|
123
|
+
|
124
|
+
it %{can be used to improve results} do
|
125
|
+
d = FuzzyMatch.new [ 'Barack Obama', 'George Bush' ], :groupings => [ /Obama/, /Bush/ ]
|
126
|
+
d.find('Barack Bush').should == 'George Bush'
|
127
|
+
d.find('George Obama').should == 'Barack Obama'
|
128
|
+
end
|
129
|
+
|
130
|
+
it %{stays within the group} do
|
131
|
+
d = FuzzyMatch.new [ 'AB', 'CD' ]
|
132
|
+
d.find('ABCDCD').should == 'CD'
|
133
|
+
d = FuzzyMatch.new [ 'AB', 'CD' ], groupings: [/A/]
|
134
|
+
d.find('ABCDCD').should == 'AB'
|
135
|
+
end
|
136
|
+
|
137
|
+
describe 'with chains' do
|
138
|
+
describe 'hotel example' do
|
139
|
+
before do
|
140
|
+
@grandh = 'Grand Hyatt'
|
141
|
+
@h = 'Hyatt'
|
142
|
+
@hgarden = 'Hyatt Garden'
|
143
|
+
@grandhotel = 'Grand Hotel'
|
144
|
+
@fz = FuzzyMatch.new([@grandh, @h, @hgarden, @grandhotel], groupings: [ [ /hyatt/i, /garden/i, /grand/i ] ], stop_words: [ /hotel/i ])
|
145
|
+
end
|
146
|
+
|
147
|
+
it %{works as expected} do
|
148
|
+
@fz.find('Grand Hyatt').should == @grandh
|
149
|
+
@fz.find('Grand Hyatt Foobar').should == @grandh
|
150
|
+
@fz.find('Hyatt Garden').should == @hgarden
|
151
|
+
@fz.find('Hyatt Garden Foobar').should == @hgarden
|
152
|
+
end
|
153
|
+
|
154
|
+
it %{enforces some stuff} do
|
155
|
+
# nope
|
156
|
+
@fz.find('Grund Hyatt').should == @h
|
157
|
+
@fz.find('Grund Hyatt Foobar').should == @h
|
158
|
+
@fz.find('Hyatt Gurden').should == @h
|
159
|
+
@fz.find('Hyatt Gurden Foobar').should == @h
|
160
|
+
# hmm - hyatt misspelled, so totally prevented from matching hyatt
|
161
|
+
@fz.find('Grund Hyutt').should == @grandhotel
|
162
|
+
@fz.find('Grund Hyutt Foobar').should == @grandhotel
|
163
|
+
# precedence
|
164
|
+
@fz.find('Grand Hyatt Garden').should == @hgarden
|
165
|
+
@fz.find('Grand Hyatt Garden Foobar').should == @hgarden
|
166
|
+
# sanity
|
167
|
+
@fz.find('Grund Hyatt Garden').should == @hgarden
|
168
|
+
@fz.find('Grund Hyatt Garden Foobar').should == @hgarden
|
169
|
+
@fz.find('Grand Hyatt Gurden').should == @grandh
|
170
|
+
@fz.find('Grand Hyatt Gurden Foobar').should == @grandh
|
171
|
+
end
|
172
|
+
|
173
|
+
it %{is sticky} do
|
174
|
+
@fz.find('Grand Hotel').should == @grandhotel
|
175
|
+
@fz.find('Hotel Garden').should be_nil
|
176
|
+
@fz.find('Grand Hotel Garden').should == @grandhotel
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
it "helps with subgroups" do
|
181
|
+
d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :groupings => [ [/boeing/i, /(7\d{2})/] ]
|
182
|
+
d.find_all('Boeing 747').should == [ 'Boeing 747', 'Boeing 747SR' ]
|
183
|
+
|
184
|
+
d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :groupings => [ [/boeing/i, /(7\d{2})/] ]
|
185
|
+
d.find_all('Boeing ER6').should == ["Boeing ER6"]
|
186
|
+
|
187
|
+
d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :groupings => [ [/boeing/i, /(7|E\d{2})/i] ]
|
188
|
+
d.find_all('Boeing ER6').should == [ 'Boeing ER6' ]
|
189
|
+
d.find_all('Boeing 747').should == [ 'Boeing 747', 'Boeing 747SR' ]
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
describe "the :must_match_grouping option" do
|
195
|
+
it %{optionally only attempt matches with records that fit into a grouping} do
|
196
|
+
d = FuzzyMatch.new [ 'Barack Obama', 'George Bush' ], :groupings => [ /Obama/, /Bush/ ], :must_match_grouping => true
|
197
|
+
d.find('George Clinton').should be_nil
|
198
|
+
|
199
|
+
d = FuzzyMatch.new [ 'Barack Obama', 'George Bush' ], :groupings => [ /Obama/, /Bush/ ]
|
200
|
+
d.find('George Clinton', :must_match_grouping => true).should be_nil
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
describe "the :read option" do
|
205
|
+
it %{interpret a Numeric as an array index} do
|
206
|
+
ab = ['a', 'b']
|
207
|
+
ba = ['b', 'a']
|
208
|
+
haystack = [ab, ba]
|
209
|
+
by_first = FuzzyMatch.new haystack, :read => 0
|
210
|
+
by_last = FuzzyMatch.new haystack, :read => 1
|
211
|
+
by_first.find('a').should == ab
|
212
|
+
by_last.find('b').should == ab
|
213
|
+
by_first.find('b').should == ba
|
214
|
+
by_last.find('a').should == ba
|
215
|
+
end
|
216
|
+
|
217
|
+
it %{interpret a Symbol, etc. as hash key} do
|
218
|
+
ab = { :one => 'a', :two => 'b' }
|
219
|
+
ba = { :one => 'b', :two => 'a' }
|
220
|
+
haystack = [ab, ba]
|
221
|
+
by_first = FuzzyMatch.new haystack, :read => :one
|
222
|
+
by_last = FuzzyMatch.new haystack, :read => :two
|
223
|
+
by_first.find('a').should == ab
|
224
|
+
by_last.find('b').should == ab
|
225
|
+
by_first.find('b').should == ba
|
226
|
+
by_last.find('a').should == ba
|
227
|
+
end
|
228
|
+
|
229
|
+
MyStruct = Struct.new(:one, :two)
|
230
|
+
it %{interpret a Symbol as a method id (if the object responds to it)} do
|
231
|
+
ab = MyStruct.new('a', 'b')
|
232
|
+
ba = MyStruct.new('b', 'a')
|
233
|
+
haystack = [ab, ba]
|
234
|
+
by_first = FuzzyMatch.new haystack, :read => :one
|
235
|
+
by_last = FuzzyMatch.new haystack, :read => :two
|
236
|
+
by_first.read.should == :one
|
237
|
+
by_last.read.should == :two
|
238
|
+
by_first.find('a').should == ab
|
239
|
+
by_last.find('b').should == ab
|
240
|
+
by_first.find('b').should == ba
|
241
|
+
by_last.find('a').should == ba
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
describe 'the :must_match_at_least_one_word option' do
|
246
|
+
it %{optionally require that the matching record share at least one word with the needle} do
|
247
|
+
d = FuzzyMatch.new %w{ RATZ CATZ }, :must_match_at_least_one_word => true
|
248
|
+
d.find('RITZ').should be_nil
|
249
|
+
|
250
|
+
d = FuzzyMatch.new ["Foo's Bar"], :must_match_at_least_one_word => true
|
251
|
+
d.find("Foo's").should == "Foo's Bar"
|
252
|
+
d.find("'s").should be_nil
|
253
|
+
d.find("Foo").should be_nil
|
254
|
+
|
255
|
+
d = FuzzyMatch.new ["Bolivia, Plurinational State of"], :must_match_at_least_one_word => true
|
256
|
+
d.find("Bolivia").should == "Bolivia, Plurinational State of"
|
257
|
+
end
|
258
|
+
|
259
|
+
it %{use STOP WORDS} do
|
260
|
+
d = FuzzyMatch.new [ 'A HOTEL', 'B HTL' ]
|
261
|
+
d.find('A HTL', :must_match_at_least_one_word => true).should == 'B HTL'
|
262
|
+
|
263
|
+
d = FuzzyMatch.new [ 'A HOTEL', 'B HTL' ], :must_match_at_least_one_word => true
|
264
|
+
d.find('A HTL').should == 'B HTL'
|
265
|
+
|
266
|
+
d = FuzzyMatch.new [ 'A HOTEL', 'B HTL' ], :must_match_at_least_one_word => true, :stop_words => [ %r{HO?TE?L} ]
|
267
|
+
d.find('A HTL').should == 'A HOTEL'
|
268
|
+
end
|
269
|
+
|
270
|
+
it %{not be fooled by substrings (but rather compare whole words to whole words)} do
|
271
|
+
d = FuzzyMatch.new [ 'PENINSULA HOTELS' ], :must_match_at_least_one_word => true
|
272
|
+
d.find('DOLCE LA HULPE BXL FI').should be_nil
|
273
|
+
end
|
274
|
+
|
275
|
+
it %{not be case-sensitive when checking for sharing of words} do
|
276
|
+
d = FuzzyMatch.new [ 'A', 'B' ]
|
277
|
+
d.find('a', :must_match_at_least_one_word => true).should == 'A'
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
describe "the :gather_last_result option" do
|
282
|
+
it %{not gather metadata about the last result by default} do
|
283
|
+
d = FuzzyMatch.new %w{ NISSAN HONDA }
|
284
|
+
d.find('MISSAM')
|
285
|
+
lambda do
|
286
|
+
d.last_result
|
287
|
+
end.should raise_error(::RuntimeError, /gather_last_result/)
|
288
|
+
end
|
289
|
+
|
290
|
+
it %{optionally gather metadata about the last result} do
|
291
|
+
d = FuzzyMatch.new %w{ NISSAN HONDA }
|
292
|
+
d.find 'MISSAM', :gather_last_result => true
|
293
|
+
d.last_result.score.should == 0.6
|
294
|
+
d.last_result.winner.should == 'NISSAN'
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
describe 'quirks' do
|
299
|
+
it %{should not return false negatives because of one-letter similarities} do
|
300
|
+
# dices coefficient doesn't think these two are similar at all because it looks at pairs
|
301
|
+
FuzzyMatch.score_class.new('X foo', 'X bar').dices_coefficient_similar.should == 0
|
302
|
+
# so we must compensate for that somewhere
|
303
|
+
d = FuzzyMatch.new ['X foo', 'randomness']
|
304
|
+
d.find('X bar').should == 'X foo'
|
305
|
+
# without making false positives
|
306
|
+
d.find('Y bar').should be_nil
|
307
|
+
end
|
308
|
+
|
309
|
+
it %{finds possible matches even when pair distance fails} do
|
310
|
+
d = FuzzyMatch.new ['XX', '2 A']
|
311
|
+
d.find('2A').should == '2 A'
|
312
|
+
d = FuzzyMatch.new ['XX', '2A']
|
313
|
+
d.find('2 A').should == '2A'
|
314
|
+
end
|
315
|
+
|
316
|
+
it %{weird blow ups} do
|
317
|
+
d = FuzzyMatch.new ['XX', '2 A']
|
318
|
+
d.find('A').should == '2 A'
|
319
|
+
d = FuzzyMatch.new ['XX', 'A']
|
320
|
+
d.find('2 A').should == 'A'
|
321
|
+
end
|
322
|
+
|
323
|
+
it %{from the wild 1} do
|
324
|
+
d = FuzzyMatch.new ["Doyle Collection", "Trump Collection", "Luxury Collection", "Autograph Collection"]
|
325
|
+
d.find("Algonquin Autograph Collection").should == "Autograph Collection"
|
326
|
+
end
|
327
|
+
|
328
|
+
end
|
329
|
+
|
330
|
+
describe 'deprecations' do
|
331
|
+
it %{takes :must_match_blocking as :must_match_grouping} do
|
332
|
+
d = FuzzyMatch.new [], :must_match_blocking => :a
|
333
|
+
d.default_options[:must_match_grouping].should == :a
|
334
|
+
end
|
335
|
+
|
336
|
+
it %{takes :haystack_reader as :read} do
|
337
|
+
d = FuzzyMatch.new [], :haystack_reader => :c
|
338
|
+
d.read.should == :c
|
339
|
+
end
|
340
|
+
|
341
|
+
it %{takes :blockings as :groupings} do
|
342
|
+
d = FuzzyMatch.new [], :blockings => [ /X/, /Y/ ]
|
343
|
+
d.groupings.should == [ FuzzyMatch::Rule::Grouping.new(/X/), FuzzyMatch::Rule::Grouping.new(/Y/) ]
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
it %{defaults to a pure-ruby engine, but also has amatch} do
|
348
|
+
if defined?($testing_amatch) and $testing_amatch
|
349
|
+
FuzzyMatch.engine.should == :amatch
|
350
|
+
else
|
351
|
+
FuzzyMatch.engine.should == :pure_ruby
|
352
|
+
end
|
353
|
+
end
|
354
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FuzzyMatch::Rule::Grouping do
|
4
|
+
it %{matches a single string argument} do
|
5
|
+
b = FuzzyMatch::Rule::Grouping.new %r{apple}
|
6
|
+
b.xmatch?(r('2 apples')).should == true
|
7
|
+
end
|
8
|
+
|
9
|
+
it %{embraces case insensitivity} do
|
10
|
+
b = FuzzyMatch::Rule::Grouping.new %r{apple}i
|
11
|
+
b.xmatch?(r('2 Apples')).should == true
|
12
|
+
end
|
13
|
+
|
14
|
+
it %{xjoins two string arguments} do
|
15
|
+
b = FuzzyMatch::Rule::Grouping.new %r{apple}
|
16
|
+
b.xjoin?(r('apple'), r('2 apples')).should == true
|
17
|
+
end
|
18
|
+
|
19
|
+
it %{fails to xjoin two string arguments} do
|
20
|
+
b = FuzzyMatch::Rule::Grouping.new %r{apple}
|
21
|
+
b.xjoin?(r('orange'), r('2 apples')).should == false
|
22
|
+
end
|
23
|
+
|
24
|
+
it %{returns nil instead of false when it has no information} do
|
25
|
+
b = FuzzyMatch::Rule::Grouping.new %r{apple}
|
26
|
+
b.xjoin?(r('orange'), r('orange')).should be_nil
|
27
|
+
end
|
28
|
+
|
29
|
+
it %{has chains} do
|
30
|
+
h, gr, ga = FuzzyMatch::Rule::Grouping.make([/hyatt/, /grand/, /garden/])
|
31
|
+
h.xjoin?(r('hyatt'), r('hyatt')).should == true
|
32
|
+
|
33
|
+
h.xjoin?(r('grund hyatt'), r('grand hyatt')).should == true
|
34
|
+
gr.xjoin?(r('grund hyatt'), r('grand hyatt')).should == false
|
35
|
+
ga.xjoin?(r('grund hyatt'), r('grand hyatt')).should be_nil
|
36
|
+
|
37
|
+
h.xjoin?(r('hyatt gurden'), r('hyatt garden')).should == true
|
38
|
+
gr.xjoin?(r('hyatt gurden'), r('hyatt garden')).should be_nil
|
39
|
+
ga.xjoin?(r('hyatt gurden'), r('hyatt garden')).should == false
|
40
|
+
|
41
|
+
h.xjoin?(r('grand hyatt'), r('grand hyatt')).should == false # sacrificing itself
|
42
|
+
gr.xjoin?(r('grand hyatt'), r('grand hyatt')).should == true
|
43
|
+
ga.xjoin?(r('grand hyatt'), r('grand hyatt')).should be_nil
|
44
|
+
|
45
|
+
h.xjoin?(r('hyatt garden'), r('hyatt garden')).should == false # sacrificing itself
|
46
|
+
gr.xjoin?(r('hyatt garden'), r('hyatt garden')).should be_nil
|
47
|
+
ga.xjoin?(r('hyatt garden'), r('hyatt garden')).should == true
|
48
|
+
|
49
|
+
h.xjoin?(r('grand hyatt garden'), r('grand hyatt garden')).should == false # sacrificing itself
|
50
|
+
gr.xjoin?(r('grand hyatt garden'), r('grand hyatt garden')).should == true
|
51
|
+
ga.xjoin?(r('grand hyatt garden'), r('grand hyatt garden')).should == true # NOT sacrificing itself?
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def r(str)
|
57
|
+
FuzzyMatch::Record.new str
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FuzzyMatch::Rule::Identity do
|
4
|
+
it %{determines whether two records COULD be identical} do
|
5
|
+
i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}
|
6
|
+
i.identical?(r('A1'), r('A 1foobar')).should == true
|
7
|
+
end
|
8
|
+
|
9
|
+
it %{determines that two records MUST NOT be identical} do
|
10
|
+
i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}
|
11
|
+
i.identical?(r('A1'), r('A 2foobar')).should == false
|
12
|
+
end
|
13
|
+
|
14
|
+
it %{returns nil indicating no information} do
|
15
|
+
i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}
|
16
|
+
i.identical?(r('B1'), r('A 2foobar')).should == nil
|
17
|
+
end
|
18
|
+
|
19
|
+
it %{embraces case insensitivity} do
|
20
|
+
i = FuzzyMatch::Rule::Identity.new %r{(A)[ ]*(\d)}i
|
21
|
+
i.identical?(r('A1'), r('a 1foobar')).should == true
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def r(str)
|
27
|
+
FuzzyMatch::Record.new str
|
28
|
+
end
|
29
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require '
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
describe FuzzyMatch::
|
3
|
+
describe FuzzyMatch::Record do
|
4
4
|
it %{does not treat "'s" as a word} do
|
5
5
|
assert_split ["foo's", "bar"], "Foo's Bar"
|
6
6
|
end
|
@@ -20,10 +20,6 @@ describe FuzzyMatch::Wrapper do
|
|
20
20
|
private
|
21
21
|
|
22
22
|
def assert_split(ary, str)
|
23
|
-
FuzzyMatch::
|
24
|
-
end
|
25
|
-
|
26
|
-
def null_fuzzy_match
|
27
|
-
FuzzyMatch.new []
|
23
|
+
FuzzyMatch::Record.new(str).words.should == ary
|
28
24
|
end
|
29
25
|
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
9
|
+
config.run_all_when_everything_filtered = true
|
10
|
+
config.filter_run :focus
|
11
|
+
|
12
|
+
# Run specs in random order to surface order dependencies. If you find an
|
13
|
+
# order dependency and want to debug it, you can fix the order by providing
|
14
|
+
# the seed, which is printed after each run.
|
15
|
+
# --seed 1234
|
16
|
+
config.order = 'random'
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'pry'
|
20
|
+
|
21
|
+
require 'fuzzy_match'
|