fuzzy_match 1.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Find a needle in a haystack based on string similarity (using the Pair Distance algorithm and Levenshtein distance) and regular expressions.
4
4
 
5
- Replaces [loose_tight_dictionary](https://github.com/seamusabshere/loose_tight_dictionary) because that was a confusing name.
5
+ Replaces {loose_tight_dictionary}[https://github.com/seamusabshere/loose_tight_dictionary] because that was a confusing name.
6
6
 
7
7
  == Quickstart
8
8
 
@@ -24,7 +24,7 @@ class FuzzyMatch
24
24
  # required options:
25
25
  # :primary_key - what to call on this class
26
26
  # :foreign_key - what to call on the other class
27
- def cache_fuzzy_match_matches_with(other_active_record_class, options)
27
+ def cache_fuzzy_match_with(other_active_record_class, options)
28
28
  other = other_active_record_class.to_s.singularize.camelcase
29
29
  me = name
30
30
  if me < other
@@ -1,17 +1,41 @@
1
+ require 'erb'
2
+
1
3
  class FuzzyMatch
2
4
  class Result #:nodoc: all
5
+ EXPLANATION = <<-ERB
6
+ You looked for <%= needle.render.inspect %>
7
+
8
+ <% if winner %>It was matched with "<%= winner %>"<% else %>No match was found<% end %>
9
+
10
+ # THE HAYSTACK
11
+
12
+ The haystack reader was <%= read.inspect %>.
13
+
14
+ The haystack contained <%= haystack.length %> records like <%= haystack[0, 3].map(&:render).map(&:inspect).join(', ') %>
15
+
16
+ # HOW IT WAS MATCHED
17
+ <% timeline.each_with_index do |event, index| %>
18
+ (<%= index+1 %>) <%= event %>
19
+ <% end %>
20
+ ERB
21
+
22
+ def timeline
23
+ @timeline ||= []
24
+ end
25
+
3
26
  attr_accessor :needle
27
+ attr_accessor :read
28
+ attr_accessor :haystack
29
+ attr_accessor :options
4
30
  attr_accessor :tighteners
5
31
  attr_accessor :blockings
6
32
  attr_accessor :identities
7
33
  attr_accessor :stop_words
8
- attr_accessor :candidates
9
- attr_accessor :joint
10
- attr_accessor :disjoint
11
- attr_accessor :possibly_identical
12
- attr_accessor :certainly_different
13
- attr_accessor :similarities
14
- attr_accessor :record
34
+ attr_accessor :winner
15
35
  attr_accessor :score
36
+
37
+ def explain
38
+ $stdout.puts ::ERB.new(EXPLANATION, 0, '%<').result(binding)
39
+ end
16
40
  end
17
41
  end
@@ -14,13 +14,13 @@ class FuzzyMatch
14
14
  end
15
15
 
16
16
  def inspect
17
- %{#<Score: dices_coefficient=#{dices_coefficient} levenshtein=#{levenshtein}>}
17
+ %{#<Score: dices_coefficient=#{dices_coefficient_similar} levenshtein=#{levenshtein_similar}>}
18
18
  end
19
19
 
20
20
  def <=>(other)
21
- by_dices_coefficient = (dices_coefficient <=> other.dices_coefficient)
21
+ by_dices_coefficient = (dices_coefficient_similar <=> other.dices_coefficient_similar)
22
22
  if by_dices_coefficient == 0
23
- levenshtein <=> other.levenshtein
23
+ levenshtein_similar <=> other.levenshtein_similar
24
24
  else
25
25
  by_dices_coefficient
26
26
  end
@@ -32,11 +32,11 @@ class FuzzyMatch
32
32
 
33
33
  if defined?(::Amatch)
34
34
 
35
- def dices_coefficient
35
+ def dices_coefficient_similar
36
36
  str1.pair_distance_similar str2
37
37
  end
38
38
 
39
- def levenshtein
39
+ def levenshtein_similar
40
40
  str1.levenshtein_similar str2
41
41
  end
42
42
 
@@ -44,7 +44,7 @@ class FuzzyMatch
44
44
 
45
45
  SPACE = ' '
46
46
  # http://stackoverflow.com/questions/653157/a-better-similarity-ranking-algorithm-for-variable-length-strings
47
- def dices_coefficient
47
+ def dices_coefficient_similar
48
48
  if str1 == str2
49
49
  return 1.0
50
50
  elsif str1.length == 1 and str2.length == 1
@@ -77,7 +77,7 @@ class FuzzyMatch
77
77
  # extracted/adapted from the text gem version 1.0.2
78
78
  # normalization added for utf-8 strings
79
79
  # lib/text/levenshtein.rb
80
- def levenshtein
80
+ def levenshtein_similar
81
81
  if utf8?
82
82
  unpack_rule = 'U*'
83
83
  else
@@ -118,8 +118,8 @@ class FuzzyMatch
118
118
  end
119
119
 
120
120
  extend ::ActiveSupport::Memoizable
121
- memoize :dices_coefficient
122
- memoize :levenshtein
121
+ memoize :dices_coefficient_similar
122
+ memoize :levenshtein_similar
123
123
  memoize :utf8?
124
124
  end
125
125
  end
@@ -1,3 +1,3 @@
1
1
  class FuzzyMatch
2
- VERSION = '1.0.5'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -3,20 +3,25 @@ class FuzzyMatch
3
3
  class Wrapper #:nodoc: all
4
4
  attr_reader :fuzzy_match
5
5
  attr_reader :record
6
- attr_reader :read
6
+ attr_reader :literal
7
+ attr_reader :rendered
7
8
 
8
- def initialize(fuzzy_match, record, read = nil)
9
+ def initialize(fuzzy_match, record, literal = false)
9
10
  @fuzzy_match = fuzzy_match
10
11
  @record = record
11
- @read = read
12
+ @literal = literal
12
13
  end
13
14
 
14
15
  def inspect
15
16
  "#<Wrapper render=#{render} variants=#{variants.length}>"
16
17
  end
18
+
19
+ def read
20
+ fuzzy_match.read unless literal
21
+ end
17
22
 
18
23
  def render
19
- return @render if rendered?
24
+ return @render if rendered
20
25
  str = case read
21
26
  when ::Proc
22
27
  read.call record
@@ -42,7 +47,8 @@ class FuzzyMatch
42
47
 
43
48
  alias :to_str :render
44
49
 
45
- WORD_BOUNDARY = %r{\s*\b\s*}
50
+ # "Foo's Bar" should be treated as [ "Foo's", "Bar" ], so we don't use traditional regexp word boundaries (\b)
51
+ WORD_BOUNDARY = %r{\s+}
46
52
  def words
47
53
  @words ||= render.split(WORD_BOUNDARY)
48
54
  end
@@ -59,9 +65,5 @@ class FuzzyMatch
59
65
  memo
60
66
  end.uniq
61
67
  end
62
-
63
- def rendered?
64
- @rendered == true
65
- end
66
68
  end
67
69
  end
data/lib/fuzzy_match.rb CHANGED
@@ -17,33 +17,72 @@ class FuzzyMatch
17
17
  autoload :Score, 'fuzzy_match/score'
18
18
  autoload :CachedResult, 'fuzzy_match/cached_result'
19
19
 
20
+ DEFAULT_OPTIONS = {
21
+ :first_blocking_decides => false,
22
+ :must_match_blocking => false,
23
+ :must_match_at_least_one_word => false,
24
+ :gather_last_result => false,
25
+ :find_all => false
26
+ }
27
+
20
28
  attr_reader :haystack
21
29
  attr_reader :blockings
22
30
  attr_reader :identities
23
31
  attr_reader :tighteners
24
32
  attr_reader :stop_words
25
- attr_reader :default_first_blocking_decides
26
- attr_reader :default_must_match_blocking
27
- attr_reader :default_must_match_at_least_one_word
33
+ attr_reader :read
34
+ attr_reader :default_options
28
35
 
29
- # haystack - a bunch of records
30
- # options
36
+ # haystack - a bunch of records that will compete to see who best matches the needle
37
+ #
38
+ # rules (can only be specified at initialization or by using a setter)
31
39
  # * tighteners: regexps (see readme)
32
40
  # * identities: regexps
33
41
  # * blockings: regexps
34
42
  # * stop_words: regexps
35
43
  # * read: how to interpret each entry in the 'haystack', either a Proc or a symbol
36
- def initialize(records, options = {})
37
- options = options.symbolize_keys
38
- @default_first_blocking_decides = options[:first_blocking_decides]
39
- @default_must_match_blocking = options[:must_match_blocking]
40
- @default_must_match_at_least_one_word = options[:must_match_at_least_one_word]
41
- @blockings = options.fetch(:blockings, []).map { |regexp_or_str| Blocking.new regexp_or_str }
42
- @identities = options.fetch(:identities, []).map { |regexp_or_str| Identity.new regexp_or_str }
43
- @tighteners = options.fetch(:tighteners, []).map { |regexp_or_str| Tightener.new regexp_or_str }
44
- @stop_words = options.fetch(:stop_words, []).map { |regexp_or_str| StopWord.new regexp_or_str }
45
- read = options[:read] || options[:haystack_reader]
46
- @haystack = records.map { |record| Wrapper.new self, record, read }
44
+ #
45
+ # options (can be specified at initialization or when calling #find)
46
+ # * first_blocking_decides
47
+ # * must_match_blocking
48
+ # * must_match_at_least_one_word
49
+ # * gather_last_result
50
+ # * find_all
51
+ def initialize(competitors, options_and_rules = {})
52
+ options_and_rules = options_and_rules.symbolize_keys
53
+
54
+ # rules
55
+ self.blockings = options_and_rules.delete(:blockings) || []
56
+ self.identities = options_and_rules.delete(:identities) || []
57
+ self.tighteners = options_and_rules.delete(:tighteners) || []
58
+ self.stop_words = options_and_rules.delete(:stop_words) || []
59
+ @read = options_and_rules.delete(:read) || options_and_rules.delete(:haystack_reader)
60
+
61
+ # options
62
+ @default_options = options_and_rules.reverse_merge(DEFAULT_OPTIONS).freeze
63
+
64
+ # do this last
65
+ self.haystack = competitors
66
+ end
67
+
68
+ def blockings=(ary)
69
+ @blockings = ary.map { |regexp_or_str| Blocking.new regexp_or_str }
70
+ end
71
+
72
+ def identities=(ary)
73
+ @identities = ary.map { |regexp_or_str| Identity.new regexp_or_str }
74
+ end
75
+
76
+ def tighteners=(ary)
77
+ @tighteners = ary.map { |regexp_or_str| Tightener.new regexp_or_str }
78
+ end
79
+
80
+ def stop_words=(ary)
81
+ @stop_words = ary.map { |regexp_or_str| StopWord.new regexp_or_str }
82
+ end
83
+
84
+ def haystack=(ary)
85
+ @haystack = ary.map { |competitor| Wrapper.new self, competitor }
47
86
  end
48
87
 
49
88
  def last_result
@@ -58,16 +97,24 @@ class FuzzyMatch
58
97
  def find(needle, options = {})
59
98
  raise ::RuntimeError, "[fuzzy_match] Dictionary has already been freed, can't perform more finds" if freed?
60
99
 
61
- options = options.symbolize_keys
62
- gather_last_result = options.fetch(:gather_last_result, false)
63
- is_find_all = options.fetch(:find_all, false)
64
- first_blocking_decides = options.fetch(:first_blocking_decides, default_first_blocking_decides)
65
- must_match_blocking = options.fetch(:must_match_blocking, default_must_match_blocking)
66
- must_match_at_least_one_word = options.fetch(:must_match_at_least_one_word, default_must_match_at_least_one_word)
100
+ options = options.symbolize_keys.reverse_merge default_options
101
+
102
+ gather_last_result = options[:gather_last_result]
103
+ is_find_all = options[:find_all]
104
+ first_blocking_decides = options[:first_blocking_decides]
105
+ must_match_blocking = options[:must_match_blocking]
106
+ must_match_at_least_one_word = options[:must_match_at_least_one_word]
67
107
 
68
108
  if gather_last_result
69
109
  free_last_result
70
110
  @last_result = Result.new
111
+ last_result.read = read
112
+ last_result.haystack = haystack
113
+ last_result.options = options
114
+ last_result.timeline << <<-EOS
115
+ Options were set, either by you or by falling back to defaults.
116
+ \tOptions: #{options.inspect}
117
+ EOS
71
118
  end
72
119
 
73
120
  if gather_last_result
@@ -77,13 +124,24 @@ class FuzzyMatch
77
124
  last_result.stop_words = stop_words
78
125
  end
79
126
 
80
- needle = Wrapper.new self, needle
127
+ needle = Wrapper.new self, needle, true
81
128
 
82
129
  if gather_last_result
83
130
  last_result.needle = needle
131
+ last_result.timeline << <<-EOS
132
+ The needle's #{needle.variants.length} variants were enumerated.
133
+ \tVariants: #{needle.variants.map(&:inspect).join(', ')}
134
+ EOS
84
135
  end
85
136
 
86
137
  if must_match_blocking and blockings.any? and blockings.none? { |blocking| blocking.match? needle }
138
+ if gather_last_result
139
+ last_result.timeline << <<-EOS
140
+ The needle didn't match any of the #{blockings.length} blocking, which was a requirement.
141
+ \tBlockings (first 3): #{blockings[0,3].map(&:inspect).join(', ')}
142
+ EOS
143
+ end
144
+
87
145
  if is_find_all
88
146
  return []
89
147
  else
@@ -91,83 +149,109 @@ class FuzzyMatch
91
149
  end
92
150
  end
93
151
 
94
- candidates = if must_match_at_least_one_word
95
- haystack.select do |straw|
152
+ if must_match_at_least_one_word
153
+ passed_word_requirement = haystack.select do |straw|
96
154
  (needle.words & straw.words).any?
97
155
  end
156
+ if gather_last_result
157
+ last_result.timeline << <<-EOS
158
+ Since :must_match_at_least_one_word => true, the competition was reduced to records sharing at least one word with the needle.
159
+ \tNeedle words: #{needle.words.map(&:inspect).join(', ')}
160
+ \tPassed (first 3): #{passed_word_requirement[0,3].map(&:render).map(&:inspect).join(', ')}
161
+ \tFailed (first 3): #{(haystack-passed_word_requirement)[0,3].map(&:render).map(&:inspect).join(', ')}
162
+ EOS
163
+ end
98
164
  else
99
- haystack
100
- end
101
-
102
- if gather_last_result
103
- last_result.candidates = candidates
165
+ passed_word_requirement = haystack
104
166
  end
105
167
 
106
- joint, disjoint = if blockings.any?
107
- candidates.partition do |straw|
168
+ if blockings.any?
169
+ joint = passed_word_requirement.select do |straw|
108
170
  if first_blocking_decides
109
171
  blockings.detect { |blocking| blocking.match? needle }.try :join?, needle, straw
110
172
  else
111
173
  blockings.any? { |blocking| blocking.join? needle, straw }
112
174
  end
113
175
  end
176
+ if gather_last_result
177
+ last_result.timeline << <<-EOS
178
+ Since there were blockings, the competition was reduced to records in the same block as the needle.
179
+ \tBlockings (first 3): #{blockings[0,3].map(&:inspect).join(', ')}
180
+ \tPassed (first 3): #{joint[0,3].map(&:render).map(&:inspect).join(', ')}
181
+ \tFailed (first 3): #{(passed_word_requirement-joint)[0,3].map(&:render).map(&:inspect).join(', ')}
182
+ EOS
183
+ end
114
184
  else
115
- [ candidates.dup, [] ]
185
+ joint = passed_word_requirement.dup
116
186
  end
117
187
 
118
188
  if joint.none?
119
189
  if must_match_blocking
190
+ if gather_last_result
191
+ last_result.timeline << <<-EOS
192
+ Since :must_match_at_least_one_word => true and none of the competition was in the same block as the needle, the search stopped.
193
+ EOS
194
+ end
120
195
  if is_find_all
121
196
  return []
122
197
  else
123
198
  return nil
124
199
  end
125
200
  else
126
- # special case: the needle didn't fit anywhere, but must_match_blocking is false, so we'll try it against everything
127
- joint = disjoint
128
- disjoint = []
201
+ joint = passed_word_requirement.dup
129
202
  end
130
203
  end
131
-
132
- if gather_last_result
133
- last_result.joint = joint
134
- last_result.disjoint = disjoint
135
- end
136
-
137
- possibly_identical, certainly_different = if identities.any?
138
- joint.partition do |straw|
204
+
205
+ if identities.any?
206
+ possibly_identical = joint.select do |straw|
139
207
  identities.all? do |identity|
140
208
  answer = identity.identical? needle, straw
141
209
  answer.nil? or answer == true
142
210
  end
143
211
  end
212
+ if gather_last_result
213
+ last_result.timeline << <<-EOS
214
+ Since there were identities, the competition was reduced to records that might be identical to the needle (in other words, are not certainly different)
215
+ \Identities (first 3): #{identities[0,3].map(&:inspect).join(', ')}
216
+ \tPassed (first 3): #{possibly_identical[0,3].map(&:render).map(&:inspect).join(', ')}
217
+ \tFailed (first 3): #{(joint-possibly_identical)[0,3].map(&:render).map(&:inspect).join(', ')}
218
+ EOS
219
+ end
144
220
  else
145
- [ joint.dup, [] ]
221
+ possibly_identical = joint.dup
146
222
  end
147
-
223
+
224
+ similarities = possibly_identical.map { |straw| needle.similarity straw }.sort.reverse
225
+
148
226
  if gather_last_result
149
- last_result.possibly_identical = possibly_identical
150
- last_result.certainly_different = certainly_different
227
+ last_result.timeline << <<-EOS
228
+ The competition was sorted in order of similarity to the needle.
229
+ \tSimilar (first 3): #{(similarities)[0,3].map(&:wrapper2).map(&:render).map(&:inspect).join(', ')}
230
+ EOS
151
231
  end
152
232
 
153
233
  if is_find_all
154
- return possibly_identical.map { |straw| straw.record }
155
- end
156
-
157
- similarities = possibly_identical.map { |straw| needle.similarity straw }.sort
158
-
159
- if gather_last_result
160
- last_result.similarities = similarities
234
+ return similarities.map { |similarity| similarity.wrapper2.record }
161
235
  end
162
236
 
163
- if best_similarity = similarities[-1] and best_similarity.best_score.dices_coefficient > 0
164
- record = best_similarity.wrapper2.record
237
+ winner = nil
238
+
239
+ if best_similarity = similarities.first and best_similarity.best_score.dices_coefficient_similar > 0
240
+ winner = best_similarity.wrapper2.record
165
241
  if gather_last_result
166
- last_result.record = record
167
- last_result.score = best_similarity.best_score.dices_coefficient
242
+ last_result.winner = winner
243
+ last_result.score = best_similarity.best_score.dices_coefficient_similar
244
+ last_result.timeline << <<-EOS
245
+ A winner was determined because the similarity score #{best_similarity.best_score.dices_coefficient_similar} is greater than zero.
246
+ EOS
168
247
  end
169
- record
248
+ elsif gather_last_result
249
+ last_result.timeline << <<-EOS
250
+ No winner assigned because similarity score was zero.
251
+ EOS
170
252
  end
253
+
254
+ winner
171
255
  end
172
256
 
173
257
  # Explain is like mysql's EXPLAIN command. You give it a needle and it tells you about how it was located (successfully or not) in the haystack.
@@ -175,63 +259,10 @@ class FuzzyMatch
175
259
  # d = FuzzyMatch.new ['737', '747', '757' ]
176
260
  # d.explain 'boeing 737-100'
177
261
  def explain(needle, options = {})
178
- record = find needle, options.merge(:gather_last_result => true)
179
- log "#" * 150
180
- log "# Match #{needle.inspect} => #{record.inspect}"
181
- log "#" * 150
182
- log
183
- log "Needle"
184
- log "-" * 150
185
- log last_result.needle.render
186
- log
187
- log "Stop words"
188
- log last_result.stop_words.blank? ? '(none)' : last_result.stop_words.map { |stop_word| stop_word.inspect }.join("\n")
189
- log
190
- log "Candidates"
191
- log "-" * 150
192
- log last_result.candidates.map { |record| record.render }.join("\n")
193
- log
194
- log "Tighteners"
195
- log "-" * 150
196
- log last_result.tighteners.blank? ? '(none)' : last_result.tighteners.map { |tightener| tightener.inspect }.join("\n")
197
- log
198
- log "Blockings"
199
- log "-" * 150
200
- log last_result.blockings.blank? ? '(none)' : last_result.blockings.map { |blocking| blocking.inspect }.join("\n")
201
- log
202
- log "Identities"
203
- log "-" * 150
204
- log last_result.identities.blank? ? '(none)' : last_result.identities.map { |blocking| blocking.inspect }.join("\n")
205
- log
206
- log "Joint"
207
- log "-" * 150
208
- log last_result.joint.blank? ? '(none)' : last_result.joint.map { |joint| joint.render }.join("\n")
209
- log
210
- log "Disjoint"
211
- log "-" * 150
212
- log last_result.disjoint.blank? ? '(none)' : last_result.disjoint.map { |disjoint| disjoint.render }.join("\n")
213
- log
214
- log "Possibly identical"
215
- log "-" * 150
216
- log last_result.possibly_identical.blank? ? '(none)' : last_result.possibly_identical.map { |possibly_identical| possibly_identical.render }.join("\n")
217
- log
218
- log "Certainly different"
219
- log "-" * 150
220
- log last_result.certainly_different.blank? ? '(none)' : last_result.certainly_different.map { |certainly_different| certainly_different.render }.join("\n")
221
- log
222
- log "Similarities"
223
- log "-" * 150
224
- log last_result.similarities.blank? ? '(none)' : last_result.similarities.reverse[0..9].map { |similarity| similarity.inspect }.join("\n")
225
- log
226
- log "Match"
227
- log "-" * 150
228
- log record.inspect
229
- end
230
-
231
- def log(str = '') #:nodoc:
232
- $stderr.puts str
262
+ find needle, options.merge(:gather_last_result => true)
263
+ last_result.explain
233
264
  end
234
-
265
+
235
266
  def freed?
236
267
  @freed == true
237
268
  end
data/test/test_cache.rb CHANGED
@@ -26,7 +26,7 @@ require 'fuzzy_match/cached_result'
26
26
  class Aircraft < ActiveRecord::Base
27
27
  set_primary_key :icao_code
28
28
 
29
- cache_fuzzy_match_matches_with :flight_segments, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
29
+ cache_fuzzy_match_with :flight_segments, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
30
30
 
31
31
  def aircraft_description
32
32
  [manufacturer_name, model_name].compact.join(' ')
@@ -53,7 +53,7 @@ end
53
53
  class FlightSegment < ActiveRecord::Base
54
54
  set_primary_key :row_hash
55
55
 
56
- cache_fuzzy_match_matches_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
56
+ cache_fuzzy_match_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :aircraft_description
57
57
 
58
58
  extend CohortScope
59
59
  self.minimum_cohort_size = 1
@@ -24,7 +24,7 @@ class TestFuzzyMatch < Test::Unit::TestCase
24
24
  d = FuzzyMatch.new %w{ NISSAN HONDA }
25
25
  d.find 'MISSAM', :gather_last_result => true
26
26
  assert_equal 0.6, d.last_result.score
27
- assert_equal 'NISSAN', d.last_result.record
27
+ assert_equal 'NISSAN', d.last_result.winner
28
28
  end
29
29
 
30
30
  def test_004_false_positive_without_tightener
@@ -91,7 +91,7 @@ class TestFuzzyMatch < Test::Unit::TestCase
91
91
 
92
92
  # first_blocking_decides refers to the needle
93
93
  d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :blockings => [ /(boeing \d{3})/i, /boeing/i ], :first_blocking_decides => true
94
- assert_equal [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], d.find_all('Boeing ER6')
94
+ assert_equal ["Boeing ER6", "Boeing 747", "Boeing 747SR"], d.find_all('Boeing ER6')
95
95
 
96
96
  d = FuzzyMatch.new [ 'Boeing 747', 'Boeing 747SR', 'Boeing ER6' ], :blockings => [ /(boeing \d{3})/i, /boeing (7|E)/i, /boeing/i ], :first_blocking_decides => true
97
97
  assert_equal [ 'Boeing ER6' ], d.find_all('Boeing ER6')
@@ -108,6 +108,8 @@ class TestFuzzyMatch < Test::Unit::TestCase
108
108
  haystack = [ab, ba]
109
109
  by_first = FuzzyMatch.new haystack, :read => :one
110
110
  by_last = FuzzyMatch.new haystack, :read => :two
111
+ assert_equal :one, by_first.read
112
+ assert_equal :two, by_last.read
111
113
  assert_equal ab, by_first.find('a')
112
114
  assert_equal ab, by_last.find('b')
113
115
  assert_equal ba, by_first.find('b')
@@ -154,6 +156,10 @@ class TestFuzzyMatch < Test::Unit::TestCase
154
156
  def test_019_must_match_at_least_one_word
155
157
  d = FuzzyMatch.new %w{ RATZ CATZ }, :must_match_at_least_one_word => true
156
158
  assert_equal nil, d.find('RITZ')
159
+
160
+ d = FuzzyMatch.new ["Foo's Bar"], :must_match_at_least_one_word => true
161
+ assert_equal nil, d.find("Jacob's")
162
+ assert_equal "Foo's Bar", d.find("Foo's")
157
163
  end
158
164
 
159
165
  def test_020_stop_words
@@ -167,20 +173,19 @@ class TestFuzzyMatch < Test::Unit::TestCase
167
173
  assert_equal 'A HOTEL', d.find('A HTL')
168
174
  end
169
175
 
170
- def test_021_explain
176
+ def test_021_explain_prints_to_stdout
171
177
  require 'stringio'
172
178
  capture = StringIO.new
173
179
  begin
174
- old_stderr = $stderr
175
- $stderr = capture
180
+ old_stdout = $stdout
181
+ $stdout = capture
176
182
  d = FuzzyMatch.new %w{ RATZ CATZ }
177
183
  d.explain('RITZ')
178
184
  ensure
179
- $stderr = old_stderr
185
+ $stdout = old_stdout
180
186
  end
181
187
  capture.rewind
182
188
  assert capture.read.include?('CATZ')
183
- capture.close
184
189
  end
185
190
 
186
191
  def test_022_compare_words_with_words
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fuzzy_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-13 00:00:00.000000000Z
12
+ date: 2012-01-16 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: shoulda
16
- requirement: &2155828460 !ruby/object:Gem::Requirement
16
+ requirement: &2153333800 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2155828460
24
+ version_requirements: *2153333800
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: remote_table
27
- requirement: &2155827960 !ruby/object:Gem::Requirement
27
+ requirement: &2153333320 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2155827960
35
+ version_requirements: *2153333320
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: activerecord
38
- requirement: &2155827320 !ruby/object:Gem::Requirement
38
+ requirement: &2153332560 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2155827320
46
+ version_requirements: *2153332560
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: mysql
49
- requirement: &2155826780 !ruby/object:Gem::Requirement
49
+ requirement: &2153331800 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2155826780
57
+ version_requirements: *2153331800
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: cohort_scope
60
- requirement: &2155826080 !ruby/object:Gem::Requirement
60
+ requirement: &2153325800 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2155826080
68
+ version_requirements: *2153325800
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: weighted_average
71
- requirement: &2155825480 !ruby/object:Gem::Requirement
71
+ requirement: &2153325220 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2155825480
79
+ version_requirements: *2153325220
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: rake
82
- requirement: &2155825060 !ruby/object:Gem::Requirement
82
+ requirement: &2153322620 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2155825060
90
+ version_requirements: *2153322620
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: activesupport
93
- requirement: &2155824520 !ruby/object:Gem::Requirement
93
+ requirement: &2153322100 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '3'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *2155824520
101
+ version_requirements: *2153322100
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: to_regexp
104
- requirement: &2155819080 !ruby/object:Gem::Requirement
104
+ requirement: &2153321580 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,7 +109,7 @@ dependencies:
109
109
  version: 0.0.3
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *2155819080
112
+ version_requirements: *2153321580
113
113
  description: Find a needle in a haystack using string similarity and (optionally)
114
114
  regexp rules. Replaces loose_tight_dictionary.
115
115
  email: