fuzzy_match 1.0.5 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Find a needle in a haystack based on string similarity (using the Pair Distance algorithm and Levenshtein distance) and regular expressions.
4
4
 
5
- Replaces [loose_tight_dictionary](https://github.com/seamusabshere/loose_tight_dictionary) because that was a confusing name.
5
+ Replaces {loose_tight_dictionary}[https://github.com/seamusabshere/loose_tight_dictionary] because that was a confusing name.
6
6
 
7
7
  == Quickstart
8
8
 
@@ -24,7 +24,7 @@ class FuzzyMatch
24
24
  # required options:
25
25
  # :primary_key - what to call on this class
26
26
  # :foreign_key - what to call on the other class
27
- def cache_fuzzy_match_matches_with(other_active_record_class, options)
27
+ def cache_fuzzy_match_with(other_active_record_class, options)
28
28
  other = other_active_record_class.to_s.singularize.camelcase
29
29
  me = name
30
30
  if me < other
@@ -1,17 +1,41 @@
1
+ require 'erb'
2
+
1
3
  class FuzzyMatch
2
4
  class Result #:nodoc: all
5
+ EXPLANATION = <<-ERB
6
+ You looked for <%= needle.render.inspect %>
7
+
8
+ <% if winner %>It was matched with "<%= winner %>"<% else %>No match was found<% end %>
9
+
10
+ # THE HAYSTACK
11
+
12
+ The haystack reader was <%= read.inspect %>.
13
+
14
+ The haystack contained <%= haystack.length %> records like <%= haystack[0, 3].map(&:render).map(&:inspect).join(', ') %>
15
+
16
+ # HOW IT WAS MATCHED
17
+ <% timeline.each_with_index do |event, index| %>
18
+ (<%= index+1 %>) <%= event %>
19
+ <% end %>
20
+ ERB
21
+
22
+ def timeline
23
+ @timeline ||= []
24
+ end
25
+
3
26
  attr_accessor :needle
27
+ attr_accessor :read
28
+ attr_accessor :haystack
29
+ attr_accessor :options
4
30
  attr_accessor :tighteners
5
31
  attr_accessor :blockings
6
32
  attr_accessor :identities
7
33
  attr_accessor :stop_words
8
- attr_accessor :candidates
9
- attr_accessor :joint
10
- attr_accessor :disjoint
11
- attr_accessor :possibly_identical
12
- attr_accessor :certainly_different
13
- attr_accessor :similarities
14
- attr_accessor :record
34
+ attr_accessor :winner
15
35
  attr_accessor :score
36
+
37
+ def explain
38
+ $stdout.puts ::ERB.new(EXPLANATION, 0, '%<').result(binding)
39
+ end
16
40
  end
17
41
  end
@@ -14,13 +14,13 @@ class FuzzyMatch
14
14
  end
15
15
 
16
16
  def inspect
17
- %{#<Score: dices_coefficient=#{dices_coefficient} levenshtein=#{levenshtein}>}
17
+ %{#<Score: dices_coefficient=#{dices_coefficient_similar} levenshtein=#{levenshtein_similar}>}
18
18
  end
19
19
 
20
20
  def <=>(other)
21
- by_dices_coefficient = (dices_coefficient <=> other.dices_coefficient)
21
+ by_dices_coefficient = (dices_coefficient_similar <=> other.dices_coefficient_similar)
22
22
  if by_dices_coefficient == 0
23
- levenshtein <=> other.levenshtein
23
+ levenshtein_similar <=> other.levenshtein_similar
24
24
  else
25
25
  by_dices_coefficient
26
26
  end
@@ -32,11 +32,11 @@ class FuzzyMatch
32
32
 
33
33
  if defined?(::Amatch)
34
34
 
35
- def dices_coefficient
35
+ def dices_coefficient_similar
36
36
  str1.pair_distance_similar str2
37
37
  end
38
38
 
39
- def levenshtein
39
+ def levenshtein_similar
40
40
  str1.levenshtein_similar str2
41
41
  end
42
42
 
@@ -44,7 +44,7 @@ class FuzzyMatch
44
44
 
45
45
  SPACE = ' '
46
46
  # http://stackoverflow.com/questions/653157/a-better-similarity-ranking-algorithm-for-variable-length-strings
47
- def dices_coefficient
47
+ def dices_coefficient_similar
48
48
  if str1 == str2
49
49
  return 1.0
50
50
  elsif str1.length == 1 and str2.length == 1
@@ -77,7 +77,7 @@ class FuzzyMatch
77
77
  # extracted/adapted from the text gem version 1.0.2
78
78
  # normalization added for utf-8 strings
79
79
  # lib/text/levenshtein.rb
80
- def levenshtein
80
+ def levenshtein_similar
81
81
  if utf8?
82
82
  unpack_rule = 'U*'
83
83
  else
@@ -118,8 +118,8 @@ class FuzzyMatch
118
118
  end
119
119
 
120
120
  extend ::ActiveSupport::Memoizable
121
- memoize :dices_coefficient
122
- memoize :levenshtein
121
+ memoize :dices_coefficient_similar
122
+ memoize :levenshtein_similar
123
123
  memoize :utf8?
124
124
  end
125
125
  end
@@ -1,3 +1,3 @@
1
1
  class FuzzyMatch
2
- VERSION = '1.0.5'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -3,20 +3,25 @@ class FuzzyMatch
3
3
  class Wrapper #:nodoc: all
4
4
  attr_reader :fuzzy_match
5
5
  attr_reader :record
6
- attr_reader :read
6
+ attr_reader :literal
7
+ attr_reader :rendered
7
8
 
8
- def initialize(fuzzy_match, record, read = nil)
9
+ def initialize(fuzzy_match, record, literal = false)
9
10
  @fuzzy_match = fuzzy_match
10
11
  @record = record
11
- @read = read
12
+ @literal = literal
12
13
  end
13
14
 
14
15
  def inspect
15
16
  "#<Wrapper render=#{render} variants=#{variants.length}>"
16
17
  end
18
+
19
+ def read
20
+ fuzzy_match.read unless literal
21
+ end
17
22
 
18
23
  def render
19
- return @render if rendered?
24
+ return @render if rendered
20
25
  str = case read
21
26
  when ::Proc
22
27
  read.call record
@@ -42,7 +47,8 @@ class FuzzyMatch
42
47
 
43
48
  alias :to_str :render
44
49
 
45
- WORD_BOUNDARY = %r{\s*\b\s*}
50
+ # "Foo's Bar" should be treated as [ "Foo's", "Bar" ], so we don't use traditional regexp word boundaries (\b)
51
+ WORD_BOUNDARY = %r{\s+}
46
52
  def words
47
53
  @words ||= render.split(WORD_BOUNDARY)
48
54
  end
@@ -59,9 +65,5 @@ class FuzzyMatch
59
65
  memo
60
66
  end.uniq
61
67
  end
62
-
63
- def rendered?
64
- @rendered == true
65
- end
66
68
  end
67
69
  end
data/lib/fuzzy_match.rb CHANGED
@@ -17,33 +17,72 @@ class FuzzyMatch
17
17
  autoload :Score, 'fuzzy_match/score'
18
18
  autoload :CachedResult, 'fuzzy_match/cached_result'
19
19
 
20
+ DEFAULT_OPTIONS = {
21
+ :first_blocking_decides => false,
22
+ :must_match_blocking => false,
23
+ :must_match_at_least_one_word => false,
24
+ :gather_last_result => false,
25
+ :find_all => false
26
+ }
27
+
20
28
  attr_reader :haystack
21
29
  attr_reader :blockings
22
30
  attr_reader :identities
23
31
  attr_reader :tighteners
24
32
  attr_reader :stop_words
25
- attr_reader :default_first_blocking_decides
26
- attr_reader :default_must_match_blocking
27
- attr_reader :default_must_match_at_least_one_word
33
+ attr_reader :read
34
+ attr_reader :default_options
28
35
 
29
- # haystack - a bunch of records
30
- # options
36
+ # haystack - a bunch of records that will compete to see who best matches the needle
37
+ #
38
+ # rules (can only be specified at initialization or by using a setter)
31
39
  # * tighteners: regexps (see readme)
32
40
  # * identities: regexps
33
41
  # * blockings: regexps
34
42
  # * stop_words: regexps
35
43
  # * read: how to interpret each entry in the 'haystack', either a Proc or a symbol
36
- def initialize(records, options = {})
37
- options = options.symbolize_keys
38
- @default_first_blocking_decides = options[:first_blocking_decides]
39
- @default_must_match_blocking = options[:must_match_blocking]
40
- @default_must_match_at_least_one_word = options[:must_match_at_least_one_word]
41
- @blockings = options.fetch(:blockings, []).map { |regexp_or_str| Blocking.new regexp_or_str }
42
- @identities = options.fetch(:identities, []).map { |regexp_or_str| Identity.new regexp_or_str }
43
- @tighteners = options.fetch(:tighteners, []).map { |regexp_or_str| Tightener.new regexp_or_str }
44
- @stop_words = options.fetch(:stop_words, []).map { |regexp_or_str| StopWord.new regexp_or_str }
45
- read = options[:read] || options[:haystack_reader]
46
- @haystack = records.map { |record| Wrapper.new self, record, read }
44
+ #
45
+ # options (can be specified at initialization or when calling #find)
46
+ # * first_blocking_decides
47
+ # * must_match_blocking
48
+ # * must_match_at_least_one_word
49
+ # * gather_last_result
50
+ # * find_all
51
+ def initialize(competitors, options_and_rules = {})
52
+ options_and_rules = options_and_rules.symbolize_keys
53
+
54
+ # rules
55
+ self.blockings = options_and_rules.delete(:blockings) || []
56
+ self.identities = options_and_rules.delete(:identities) || []
57
+ self.tighteners = options_and_rules.delete(:tighteners) || []
58
+ self.stop_words = options_and_rules.delete(:stop_words) || []
59
+ @read = options_and_rules.delete(:read) || options_and_rules.delete(:haystack_reader)
60
+
61
+ # options
62
+ @default_options = options_and_rules.reverse_merge(DEFAULT_OPTIONS).freeze
63
+
64
+ # do this last
65
+ self.haystack = competitors
66
+ end
67
+
68
+ def blockings=(ary)
69
+ @blockings = ary.map { |regexp_or_str| Blocking.new regexp_or_str }
70
+ end
71
+
72
+ def identities=(ary)
73
+ @identities = ary.map { |regexp_or_str| Identity.new regexp_or_str }
74
+ end
75
+
76
+ def tighteners=(ary)
77
+ @tighteners = ary.map { |regexp_or_str| Tightener.new regexp_or_str }
78
+ end
79
+
80
+ def stop_words=(ary)
81
+ @stop_words = ary.map { |regexp_or_str| StopWord.new regexp_or_str }
82
+ end
83
+
84
+ def haystack=(ary)
85
+ @haystack = ary.map { |competitor| Wrapper.new self, competitor }
47
86
  end
48
87
 
49
88
  def last_result
@@ -58,16 +97,24 @@ class FuzzyMatch
58
97
  def find(needle, options = {})
59
98
  raise ::RuntimeError, "[fuzzy_match] Dictionary has already been freed, can't perform more finds" if freed?
60
99
 
61
- options = options.symbolize_keys
62
- gather_last_result = options.fetch(:gather_last_result, false)
63
- is_find_all = options.fetch(:find_all, false)
64
- first_blocking_decides = options.fetch(:first_blocking_decides, default_first_blocking_decides)
65
- must_match_blocking = options.fetch(:must_match_blocking, default_must_match_blocking)
66
- must_match_at_least_one_word = options.fetch(:must_match_at_least_one_word, default_must_match_at_least_one_word)
100
+ options = options.symbolize_keys.reverse_merge default_options
101
+
102
+ gather_last_result = options[:gather_last_result]
103
+ is_find_all = options[:find_all]
104
+ first_blocking_decides = options[:first_blocking_decides]
105
+ must_match_blocking = options[:must_match_blocking]
106
+ must_match_at_least_one_word = options[:must_match_at_least_one_word]
67
107
 
68
108
  if gather_last_result
69
109
  free_last_result
70
110
  @last_result = Result.new
111
+ last_result.read = read
112
+ last_result.haystack = haystack
113
+ last_result.options = options
114
+ last_result.timeline << <<-EOS
115
+ Options were set, either by you or by falling back to defaults.
116
+ \tOptions: #{options.inspect}
117
+ EOS
71
118
  end
72
119
 
73
120
  if gather_last_result
@@ -77,13 +124,24 @@ class FuzzyMatch
77
124
  last_result.stop_words = stop_words
78
125
  end
79
126
 
80
- needle = Wrapper.new self, needle
127
+ needle = Wrapper.new self, needle, true
81
128
 
82
129
  if gather_last_result
83
130
  last_result.needle = needle
131
+ last_result.timeline << <<-EOS
132
+ The needle's #{needle.variants.length} variants were enumerated.
133
+ \tVariants: #{needle.variants.map(&:inspect).join(', ')}
134
+ EOS
84
135
  end
85
136
 
86
137
  if must_match_blocking and blockings.any? and blockings.none? { |blocking| blocking.match? needle }
138
+ if gather_last_result
139
+ last_result.timeline << <<-EOS
140
+ The needle didn't match any of the #{blockings.length} blocking, which was a requirement.
141
+ \tBlockings (first 3): #{blockings[0,3].map(&:inspect).join(', ')}
142
+ EOS
143
+ end
144
+
87
145
  if is_find_all
88
146
  return []
89
147
  else
@@ -91,83 +149,109 @@ class FuzzyMatch
91
149
  end
92
150
  end
93
151
 
94
- candidates = if must_match_at_least_one_word
95
- haystack.select do |straw|
152
+ if must_match_at_least_one_word
153
+ passed_word_requirement = haystack.select do |straw|
96
154
  (needle.words & straw.words).any?
97
155
  end
156
+ if gather_last_result
157
+ last_result.timeline << <<-EOS
158
+ Since :must_match_at_least_one_word => true, the competition was reduced to records sharing at least one word with the needle.
159
+ \tNeedle words: #{needle.words.map(&:inspect).join(', ')}
160
+ \tPassed (first 3): #{passed_word_requirement[0,3].map(&:render).map(&:inspect).join(', ')}
161
+ \tFailed (first 3): #{(haystack-passed_word_requirement)[0,3].map(&:render).map(&:inspect).join(', ')}
162
+ EOS
163
+ end
98
164
  else
99
- haystack
100
- end
101
-
102
- if gather_last_result
103
- last_result.candidates = candidates
165
+ passed_word_requirement = haystack
104
166
  end
105
167
 
106
- joint, disjoint = if blockings.any?
107
- candidates.partition do |straw|
168
+ if blockings.any?
169
+ joint = passed_word_requirement.select do |straw|
108
170
  if first_blocking_decides
109
171
  blockings.detect { |blocking| blocking.match? needle }.try :join?, needle, straw
110
172
  else
111
173
  blockings.any? { |blocking| blocking.join? needle, straw }
112
174
  end
113
175
  end
176
+ if gather_last_result
177
+ last_result.timeline << <<-EOS
178
+ Since there were blockings, the competition was reduced to records in the same block as the needle.
179
+ \tBlockings (first 3): #{blockings[0,3].map(&:inspect).join(', ')}
180
+ \tPassed (first 3): #{joint[0,3].map(&:render).map(&:inspect).join(', ')}
181
+ \tFailed (first 3): #{(passed_word_requirement-joint)[0,3].map(&:render).map(&:inspect).join(', ')}
182
+ EOS
183
+ end
114
184
  else
115
- [ candidates.dup, [] ]
185
+ joint = passed_word_requirement.dup
116
186
  end
117
187
 
118
188
  if joint.none?
119
189
  if must_match_blocking
190
+ if gather_last_result
191
+ last_result.timeline << <<-EOS
192
+ Since :must_match_at_least_one_word => true and none of the competition was in the same block as the needle, the search stopped.
193
+ EOS
194
+ end
120
195
  if is_find_all
121
196
  return []
122
197
  else
123
198
  return nil
124
199
  end
125
200
  else
126
- # special case: the needle didn't fit anywhere, but must_match_blocking is false, so we'll try it against everything
127
- joint = disjoint
128
- disjoint = []
201
+ joint = passed_word_requirement.dup
129
202
  end
130
203
  end
131
-
132
- if gather_last_result
133
- last_result.joint = joint
134
- last_result.disjoint = disjoint
135
- end
136
-
137
- possibly_identical, certainly_different = if identities.any?
138
- joint.partition do |straw|
204
+
205
+ if identities.any?
206
+ possibly_identical = joint.select do |straw|
139
207
  identities.all? do |identity|
140
208
  answer = identity.identical? needle, straw
141
209
  answer.nil? or answer == true
142
210
  end
143
211
  end
212
+ if gather_last_result
213
+ last_result.timeline << <<-EOS
214
+ Since there were identities, the competition was reduced to records that might be identical to the needle (in other words, are not certainly different)
215
+ \Identities (first 3): #{identities[0,3].map(&:inspect).join(', ')}
216
+ \tPassed (first 3): #{possibly_identical[0,3].map(&:render).map(&:inspect).join(', ')}
217
+ \tFailed (first 3): #{(joint-possibly_identical)[0,3].map(&:render).map(&:inspect).join(', ')}
218
+ EOS
219
+ end
144
220
  else
145
- [ joint.dup, [] ]
221
+ possibly_identical = joint.dup
146
222
  end
147
-
223
+
224
+ similarities = possibly_identical.map { |straw| needle.similarity straw }.sort.reverse
225
+
148
226
  if gather_last_result
149
- last_result.possibly_identical = possibly_identical
150
- last_result.certainly_different = certainly_different
227
+ last_result.timeline << <<-EOS
228
+ The competition was sorted in order of similarity to the needle.
229
+ \tSimilar (first 3): #{(similarities)[0,3].map(&:wrapper2).map(&:render).map(&:inspect).join(', ')}
230
+ EOS
151
231
  end
152
232
 
153
233
  if is_find_all
154
- return possibly_identical.map { |straw| straw.record }
155
- end
156
-
157
- similarities = possibly_identical.map { |straw| needle.similarity straw }.sort
158
-
159
- if gather_last_result
160
- last_result.similarities = similarities
234
+ return similarities.map { |similarity| similarity.wrapper2.record }
161
235
  end
162
236
 
163
- if best_similarity = similarities[-1] and best_similarity.best_score.dices_coefficient > 0
164
- record = best_similarity.wrapper2.record
237
+ winner = nil
238
+
239
+ if best_similarity = similarities.first and best_similarity.best_score.dices_coefficient_similar > 0
240
+ winner = best_similarity.wrapper2.record
165
241
  if gather_last_result
166
- last_result.record = record
167
- last_result.score = best_similarity.best_score.dices_coefficient
242
+ last_result.winner = winner
243
+ last_result.score = best_similarity.best_score.dices_coefficient_similar
244
+ last_result.timeline << <<-EOS
245
+ A winner was determined because the similarity score #{best_similarity.best_score.dices_coefficient_similar} is greater than zero.
246
+ EOS
168
247
  end
169
- record
248
+ elsif gather_last_result
249
+ last_result.timeline << <<-EOS
250
+ No winner assigned because similarity score was zero.
251
+ EOS
170
252
  end
253
+
254
+ winner
171
255
  end
172
256
 
173
257
  # Explain is like mysql's EXPLAIN command. You give it a needle and it tells you about how it was located (successfully or not) in the haystack.
@@ -175,63 +259,10 @@ class FuzzyMatch
175
259
  # d = FuzzyMatch.new ['737', '747', '757' ]
176
260
  # d.explain 'boeing 737-100'
177
261
  def explain(needle, options = {})
178
- record = find needle, options.merge(:gather_last_result => true)
179
- log "#" * 150
180
- log "# Match #{needle.inspect} => #{record.inspect}"
181
- log "#" * 150
182
- log
183
- log "Needle"
184
- log "-" * 150
185
- log last_result.needle.render
186
- log
187
- log "Stop words"
188
- log last_result.stop_words.blank? ? '(none)' : last_result.stop_words.map { |stop_word| stop_word.inspect }.join("\n")
189
- log
190
- log "Candidates"
191
- log "-" * 150
192
- log last_result.candidates.map { |record| record.render }.join("\n")
193
- log
194
- log "Tighteners"
195
- log "-" * 150
196
- log last_result.tighteners.blank? ? '(none)' : last_result.tighteners.map { |tightener| tightener.inspect }.join("\n")
197
- log
198
- log "Blockings"
199
- log "-" * 150
200
- log last_result.blockings.blank? ? '(none)' : last_result.blockings.map { |blocking| blocking.inspect }.join("\n")
201
- log
202
- log "Identities"
203
- log "-" * 150
204
- log last_result.identities.blank? ? '(none)' : last_result.identities.map { |blocking| blocking.inspect }.join("\n")
205
- log
206
- log "Joint"
207
- log "-" * 150
208
- log last_result.joint.blank? ? '(none)' : last_result.joint.map { |joint| joint.render }.join("\n")
209
- log
210
- log "Disjoint"
211
- log "-" * 150
212
- log last_result.disjoint.blank? ? '(none)' : last_result.disjoint.map { |disjoint| disjoint.render }.join("\n")
213
- log
214
- log "Possibly identical"
215
- log "-" * 150
216
- log last_result.possibly_identical.blank? ? '(none)' : last_result.possibly_identical.map { |possibly_identical| possibly_identical.render }.join("\n")
217
- log
218
- log "Certainly different"
219
- log "-" * 150
220
- log last_result.certainly_different.blank? ? '(none)' : last_result.certainly_different.map { |certainly_different| certainly_different.render }.join("\n")
221
- log
222
- log "Similarities"
223
- log "-" * 150
224
- log last_result.similarities.blank? ? '(none)' : last_result.similarities.reverse[0..9].map { |similarity| similarity.inspect }.join("\n")
225
- log
226
- log "Match"
227
- log "-" * 150
228
- log record.inspect
229
- end
230
-
231
- def log(str = '') #:nodoc:
232
- $stderr.puts str
262
+ find needle, options.merge(:gather_last_result => true)
263
+ last_result.explain
233
264
  end
234
-
265
+
235
266
  def freed?
236
267
  @freed == true
237
268
  end
data/test/test_cache.rb CHANGED
@@ -26,7 +26,7 @@ require 'fuzzy_match/cached_result'
26
26
  class Aircraft < ActiveRecord::Base
27
27
  set_primary_key :icao_code
28
28
 
29
- cache_fuzzy_match_matches_with :flight_segments, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
29
+ cache_fuzzy_match_with :flight_segments, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
30
30
 
31
31
  def aircraft_description
32
32
  [manufacturer_name, model_name].compact.join(' ')
@@ -53,7 +53,7 @@ end
53
53
  class FlightSegment < ActiveRecord::Base
54
54
  set_primary_key :row_hash
55
55
 
56
- cache_fuzzy_match_matches_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
56
+ cache_fuzzy_match_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
57
57
 
58
58
  extend CohortScope
59
59
  self.minimum_cohort_size = 1
@@ -24,7 +24,7 @@ class TestFuzzyMatch < Test::Unit::TestCase
24
24
  d = FuzzyMatch.new %w{ NISSAN HONDA }
25
25
  d.find 'MISSAM', :gather_last_result => true
26
26
  assert_equal 0.6, d.last_result.score
27
- assert_equal 'NISSAN', d.last_result.record
27
+ assert_equal 'NISSAN', d.last_result.winner
28
28
  end
29
29
 
30
30
  def test_004_false_positive_without_tightener
@@ -91,7 +91,7 @@ class TestFuzzyMatch < Test::Unit::TestCase
91
91
 
92
92
  # first_blocking_decides refers to the needle
93
93
  d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :blockings => [ /(boeing \d{3})/i, /boeing/i ], :first_blocking_decides => true
94
- assert_equal [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], d.find_all('Boeing ER6')
94
+ assert_equal ["Boeing ER6", "Boeing 747", "Boeing 747SR"], d.find_all('Boeing ER6')
95
95
 
96
96
  d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :blockings => [ /(boeing \d{3})/i, /boeing (7|E)/i, /boeing/i ], :first_blocking_decides => true
97
97
  assert_equal [ 'Boeing ER6' ], d.find_all('Boeing ER6')
@@ -108,6 +108,8 @@ class TestFuzzyMatch < Test::Unit::TestCase
108
108
  haystack = [ab, ba]
109
109
  by_first = FuzzyMatch.new haystack, :read => :one
110
110
  by_last = FuzzyMatch.new haystack, :read => :two
111
+ assert_equal :one, by_first.read
112
+ assert_equal :two, by_last.read
111
113
  assert_equal ab, by_first.find('a')
112
114
  assert_equal ab, by_last.find('b')
113
115
  assert_equal ba, by_first.find('b')
@@ -154,6 +156,10 @@ class TestFuzzyMatch < Test::Unit::TestCase
154
156
  def test_019_must_match_at_least_one_word
155
157
  d = FuzzyMatch.new %w{ RATZ CATZ }, :must_match_at_least_one_word => true
156
158
  assert_equal nil, d.find('RITZ')
159
+
160
+ d = FuzzyMatch.new ["Foo's Bar"], :must_match_at_least_one_word => true
161
+ assert_equal nil, d.find("Jacob's")
162
+ assert_equal "Foo's Bar", d.find("Foo's")
157
163
  end
158
164
 
159
165
  def test_020_stop_words
@@ -167,20 +173,19 @@ class TestFuzzyMatch < Test::Unit::TestCase
167
173
  assert_equal 'A HOTEL', d.find('A HTL')
168
174
  end
169
175
 
170
- def test_021_explain
176
+ def test_021_explain_prints_to_stdout
171
177
  require 'stringio'
172
178
  capture = StringIO.new
173
179
  begin
174
- old_stderr = $stderr
175
- $stderr = capture
180
+ old_stdout = $stdout
181
+ $stdout = capture
176
182
  d = FuzzyMatch.new %w{ RATZ CATZ }
177
183
  d.explain('RITZ')
178
184
  ensure
179
- $stderr = old_stderr
185
+ $stdout = old_stdout
180
186
  end
181
187
  capture.rewind
182
188
  assert capture.read.include?('CATZ')
183
- capture.close
184
189
  end
185
190
 
186
191
  def test_022_compare_words_with_words
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fuzzy_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-13 00:00:00.000000000Z
12
+ date: 2012-01-16 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: shoulda
16
- requirement: &2155828460 !ruby/object:Gem::Requirement
16
+ requirement: &2153333800 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2155828460
24
+ version_requirements: *2153333800
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: remote_table
27
- requirement: &2155827960 !ruby/object:Gem::Requirement
27
+ requirement: &2153333320 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2155827960
35
+ version_requirements: *2153333320
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activerecord
38
- requirement: &2155827320 !ruby/object:Gem::Requirement
38
+ requirement: &2153332560 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2155827320
46
+ version_requirements: *2153332560
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: mysql
49
- requirement: &2155826780 !ruby/object:Gem::Requirement
49
+ requirement: &2153331800 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2155826780
57
+ version_requirements: *2153331800
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: cohort_scope
60
- requirement: &2155826080 !ruby/object:Gem::Requirement
60
+ requirement: &2153325800 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2155826080
68
+ version_requirements: *2153325800
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: weighted_average
71
- requirement: &2155825480 !ruby/object:Gem::Requirement
71
+ requirement: &2153325220 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2155825480
79
+ version_requirements: *2153325220
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: rake
82
- requirement: &2155825060 !ruby/object:Gem::Requirement
82
+ requirement: &2153322620 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2155825060
90
+ version_requirements: *2153322620
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: activesupport
93
- requirement: &2155824520 !ruby/object:Gem::Requirement
93
+ requirement: &2153322100 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '3'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *2155824520
101
+ version_requirements: *2153322100
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: to_regexp
104
- requirement: &2155819080 !ruby/object:Gem::Requirement
104
+ requirement: &2153321580 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,7 +109,7 @@ dependencies:
109
109
  version: 0.0.3
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *2155819080
112
+ version_requirements: *2153321580
113
113
  description: Find a needle in a haystack using string similarity and (optionally)
114
114
  regexp rules. Replaces loose_tight_dictionary.
115
115
  email: