fuzzy_match 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTlmMmE3MDI3MWQ0NzY0NGE5N2Q4ZDY4MmI5NjUzZTg5YWU1OWE5OQ==
5
+ data.tar.gz: !binary |-
6
+ MzU3MDc3NjQ1NDczNWFhNWE0ZDdlZGRmYjlhYWQ3Y2YyZTNiMjRhMQ==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MDcyZDk5OGY5MTQwNTEyMDA1YmQ1MzdiYzYyZTczMWExNzI0NjhlYWNkYzcx
10
+ Y2UwNzgwN2I5YmI3MjVhMzIwMDljZmVmMTFkODQ2MjY2NDdkNzViZjlhNTcy
11
+ NjliMmU5NTAwOTBiYTYxNjk4ZDkwZWM0OWMzMTdjZTU4ZjQ4NDk=
12
+ data.tar.gz: !binary |-
13
+ MTg0MGVlYTc3NTY0NjMxZWMwNWFmZTdhYmRhOWM4MGJmN2QwYjc3NmNiYzQz
14
+ MzU3ODc2NDRjNDJjNzhiYmQwMmNkNmQ3MTdjYzMyNjM2YzBjMDEwOGYzNzgy
15
+ NmFkZTM0M2E2YzkwZjY1YjM4ZTEzY2Y1YmU5MmY3MDZjNTJhMzg=
data/CHANGELOG CHANGED
@@ -1,3 +1,14 @@
1
+ 1.5.0 / 2013-04-03
2
+
3
+ * Breaking changes
4
+
5
+ * No longer automatically calls to_regexp on rules - you must pass Regexps to normalizers, groupings, etc.
6
+
7
+ * Enhancements
8
+
9
+ * FuzzyMatch#find_best returns all top results with the same score - thanks @ihough !
10
+ * Doesn't require to_regexp gem for you - you can still use it if you want to convert strings into regexps safely, if you want, tho
11
+
1
12
  1.4.1 / 2013-01-17
2
13
 
3
14
  * Bug fixes
data/fuzzy_match.gemspec CHANGED
@@ -17,8 +17,6 @@ Gem::Specification.new do |s|
17
17
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
18
  s.require_paths = ["lib"]
19
19
 
20
- s.add_runtime_dependency 'to_regexp', '>=0.0.3'
21
-
22
20
  # needed if you use FuzzyMatch::CachedResult
23
21
  s.add_development_dependency 'active_record_inline_schema', '>=0.4.0'
24
22
 
@@ -3,8 +3,11 @@ class FuzzyMatch
3
3
  class Rule
4
4
  attr_reader :regexp
5
5
 
6
- def initialize(regexp_or_str)
7
- @regexp = regexp_or_str.to_regexp
6
+ def initialize(regexp)
7
+ unless regexp.is_a?(::Regexp)
8
+ raise ArgumentError, "[FuzzyMatch] Rules must be set with Regexp objects, but got #{regexp.inspect} (#{regexp.class.name})"
9
+ end
10
+ @regexp = regexp
8
11
  end
9
12
 
10
13
  def ==(other)
@@ -3,6 +3,8 @@ require 'fuzzy_match/score/amatch'
3
3
 
4
4
  class FuzzyMatch
5
5
  class Score
6
+ include Comparable
7
+
6
8
  attr_reader :str1
7
9
  attr_reader :str2
8
10
 
@@ -1,3 +1,3 @@
1
1
  class FuzzyMatch
2
- VERSION = '1.4.1'
2
+ VERSION = '1.5.0'
3
3
  end
data/lib/fuzzy_match.rb CHANGED
@@ -1,5 +1,3 @@
1
- require 'to_regexp'
2
-
3
1
  require 'fuzzy_match/rule'
4
2
  require 'fuzzy_match/rule/normalizer'
5
3
  require 'fuzzy_match/rule/stop_word'
@@ -42,7 +40,8 @@ class FuzzyMatch
42
40
  :gather_last_result => false,
43
41
  :find_all => false,
44
42
  :find_all_with_score => false,
45
- :threshold => 0
43
+ :threshold => 0,
44
+ :find_best => false,
46
45
  }
47
46
 
48
47
  self.engine = DEFAULT_ENGINE
@@ -94,19 +93,19 @@ class FuzzyMatch
94
93
  end
95
94
 
96
95
  def groupings=(ary)
97
- @groupings = ary.map { |regexp_or_str| Rule::Grouping.new regexp_or_str }
96
+ @groupings = ary.map { |regexp| Rule::Grouping.new regexp }
98
97
  end
99
98
 
100
99
  def identities=(ary)
101
- @identities = ary.map { |regexp_or_str| Rule::Identity.new regexp_or_str }
100
+ @identities = ary.map { |regexp| Rule::Identity.new regexp }
102
101
  end
103
102
 
104
103
  def normalizers=(ary)
105
- @normalizers = ary.map { |regexp_or_str| Rule::Normalizer.new regexp_or_str }
104
+ @normalizers = ary.map { |regexp| Rule::Normalizer.new regexp }
106
105
  end
107
106
 
108
107
  def stop_words=(ary)
109
- @stop_words = ary.map { |regexp_or_str| Rule::StopWord.new regexp_or_str }
108
+ @stop_words = ary.map { |regexp| Rule::StopWord.new regexp }
110
109
  end
111
110
 
112
111
  def haystack=(ary)
@@ -122,6 +121,11 @@ class FuzzyMatch
122
121
  find needle, options
123
122
  end
124
123
 
124
+ def find_best(needle, options = {})
125
+ options = options.merge(:find_best => true)
126
+ find needle, options
127
+ end
128
+
125
129
  def find_all_with_score(needle, options = {})
126
130
  options = options.merge(:find_all_with_score => true)
127
131
  find needle, options
@@ -133,7 +137,8 @@ class FuzzyMatch
133
137
  threshold = options[:threshold]
134
138
  gather_last_result = options[:gather_last_result]
135
139
  is_find_all_with_score = options[:find_all_with_score]
136
- is_find_all = options[:find_all] || is_find_all_with_score
140
+ is_find_best = options[:find_best]
141
+ is_find_all = options[:find_all] || is_find_all_with_score || is_find_best
137
142
  first_grouping_decides = options[:first_grouping_decides]
138
143
  must_match_grouping = options[:must_match_grouping]
139
144
  must_match_at_least_one_word = options[:must_match_at_least_one_word]
@@ -275,6 +280,23 @@ EOS
275
280
  return memo
276
281
  end
277
282
 
283
+ if is_find_best
284
+ memo = []
285
+ best_bs = nil
286
+ similarities.each do |similarity|
287
+ if similarity.satisfy?(needle, threshold)
288
+ bs = similarity.best_score
289
+ best_bs ||= bs
290
+ if bs >= best_bs
291
+ memo << similarity.wrapper2.record
292
+ else
293
+ break
294
+ end
295
+ end
296
+ end
297
+ return memo
298
+ end
299
+
278
300
  if is_find_all
279
301
  memo = []
280
302
  similarities.each do |similarity|
@@ -26,6 +26,14 @@ describe FuzzyMatch do
26
26
  end
27
27
  end
28
28
 
29
+ describe '#find_best' do
30
+ it %{returns one or more records with the best score} do
31
+ d = FuzzyMatch.new [ 'X', 'X', 'X22', 'Y', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
32
+ d.find_best('X').must_equal ['X', 'X' ]
33
+ d.find_best('A').must_equal []
34
+ end
35
+ end
36
+
29
37
  describe '#find_all_with_score' do
30
38
  it %{return records with 2 scores} do
31
39
  d = FuzzyMatch.new [ 'X', 'X22', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
@@ -21,12 +21,10 @@ describe FuzzyMatch::Rule::Identity do
21
21
  i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
22
22
  end
23
23
 
24
- it %{can be initialized from a string (via to_regexp gem)} do
25
- i = FuzzyMatch::Rule::Identity.new '%r{\A\\\?/(.*)etc/mysql\$$}'
26
- i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
27
-
28
- i = FuzzyMatch::Rule::Identity.new '/\A\\\?\/(.*)etc\/mysql\$$/'
29
- i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
24
+ it %{does not automatically convert strings to regexps} do
25
+ lambda do
26
+ FuzzyMatch::Rule::Identity.new '%r{\A\\\?/(.*)etc/mysql\$$}'
27
+ end.must_raise ArgumentError, /regexp/i
30
28
  end
31
29
 
32
30
  it %{embraces case insensitivity} do
metadata CHANGED
@@ -1,36 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fuzzy_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
5
- prerelease:
4
+ version: 1.5.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Seamus Abshere
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-01-18 00:00:00.000000000 Z
11
+ date: 2013-04-03 00:00:00.000000000 Z
13
12
  dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: to_regexp
16
- requirement: !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
21
- version: 0.0.3
22
- type: :runtime
23
- prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ! '>='
28
- - !ruby/object:Gem::Version
29
- version: 0.0.3
30
13
  - !ruby/object:Gem::Dependency
31
14
  name: active_record_inline_schema
32
15
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
16
  requirements:
35
17
  - - ! '>='
36
18
  - !ruby/object:Gem::Version
@@ -38,7 +20,6 @@ dependencies:
38
20
  type: :development
39
21
  prerelease: false
40
22
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
23
  requirements:
43
24
  - - ! '>='
44
25
  - !ruby/object:Gem::Version
@@ -46,7 +27,6 @@ dependencies:
46
27
  - !ruby/object:Gem::Dependency
47
28
  name: minitest
48
29
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
30
  requirements:
51
31
  - - ! '>='
52
32
  - !ruby/object:Gem::Version
@@ -54,7 +34,6 @@ dependencies:
54
34
  type: :development
55
35
  prerelease: false
56
36
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
37
  requirements:
59
38
  - - ! '>='
60
39
  - !ruby/object:Gem::Version
@@ -62,7 +41,6 @@ dependencies:
62
41
  - !ruby/object:Gem::Dependency
63
42
  name: activerecord
64
43
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
44
  requirements:
67
45
  - - ! '>='
68
46
  - !ruby/object:Gem::Version
@@ -70,7 +48,6 @@ dependencies:
70
48
  type: :development
71
49
  prerelease: false
72
50
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
51
  requirements:
75
52
  - - ! '>='
76
53
  - !ruby/object:Gem::Version
@@ -78,7 +55,6 @@ dependencies:
78
55
  - !ruby/object:Gem::Dependency
79
56
  name: mysql2
80
57
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
58
  requirements:
83
59
  - - ! '>='
84
60
  - !ruby/object:Gem::Version
@@ -86,7 +62,6 @@ dependencies:
86
62
  type: :development
87
63
  prerelease: false
88
64
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
65
  requirements:
91
66
  - - ! '>='
92
67
  - !ruby/object:Gem::Version
@@ -94,7 +69,6 @@ dependencies:
94
69
  - !ruby/object:Gem::Dependency
95
70
  name: cohort_analysis
96
71
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
72
  requirements:
99
73
  - - ! '>='
100
74
  - !ruby/object:Gem::Version
@@ -102,7 +76,6 @@ dependencies:
102
76
  type: :development
103
77
  prerelease: false
104
78
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
79
  requirements:
107
80
  - - ! '>='
108
81
  - !ruby/object:Gem::Version
@@ -110,7 +83,6 @@ dependencies:
110
83
  - !ruby/object:Gem::Dependency
111
84
  name: weighted_average
112
85
  requirement: !ruby/object:Gem::Requirement
113
- none: false
114
86
  requirements:
115
87
  - - ! '>='
116
88
  - !ruby/object:Gem::Version
@@ -118,7 +90,6 @@ dependencies:
118
90
  type: :development
119
91
  prerelease: false
120
92
  version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
93
  requirements:
123
94
  - - ! '>='
124
95
  - !ruby/object:Gem::Version
@@ -126,7 +97,6 @@ dependencies:
126
97
  - !ruby/object:Gem::Dependency
127
98
  name: yard
128
99
  requirement: !ruby/object:Gem::Requirement
129
- none: false
130
100
  requirements:
131
101
  - - ! '>='
132
102
  - !ruby/object:Gem::Version
@@ -134,7 +104,6 @@ dependencies:
134
104
  type: :development
135
105
  prerelease: false
136
106
  version_requirements: !ruby/object:Gem::Requirement
137
- none: false
138
107
  requirements:
139
108
  - - ! '>='
140
109
  - !ruby/object:Gem::Version
@@ -142,7 +111,6 @@ dependencies:
142
111
  - !ruby/object:Gem::Dependency
143
112
  name: amatch
144
113
  requirement: !ruby/object:Gem::Requirement
145
- none: false
146
114
  requirements:
147
115
  - - ! '>='
148
116
  - !ruby/object:Gem::Version
@@ -150,7 +118,6 @@ dependencies:
150
118
  type: :development
151
119
  prerelease: false
152
120
  version_requirements: !ruby/object:Gem::Requirement
153
- none: false
154
121
  requirements:
155
122
  - - ! '>='
156
123
  - !ruby/object:Gem::Version
@@ -158,7 +125,6 @@ dependencies:
158
125
  - !ruby/object:Gem::Dependency
159
126
  name: minitest-reporters
160
127
  requirement: !ruby/object:Gem::Requirement
161
- none: false
162
128
  requirements:
163
129
  - - ! '>='
164
130
  - !ruby/object:Gem::Version
@@ -166,7 +132,6 @@ dependencies:
166
132
  type: :development
167
133
  prerelease: false
168
134
  version_requirements: !ruby/object:Gem::Requirement
169
- none: false
170
135
  requirements:
171
136
  - - ! '>='
172
137
  - !ruby/object:Gem::Version
@@ -232,27 +197,26 @@ files:
232
197
  - test/test_wrapper.rb
233
198
  homepage: https://github.com/seamusabshere/fuzzy_match
234
199
  licenses: []
200
+ metadata: {}
235
201
  post_install_message:
236
202
  rdoc_options: []
237
203
  require_paths:
238
204
  - lib
239
205
  required_ruby_version: !ruby/object:Gem::Requirement
240
- none: false
241
206
  requirements:
242
207
  - - ! '>='
243
208
  - !ruby/object:Gem::Version
244
209
  version: '0'
245
210
  required_rubygems_version: !ruby/object:Gem::Requirement
246
- none: false
247
211
  requirements:
248
212
  - - ! '>='
249
213
  - !ruby/object:Gem::Version
250
214
  version: '0'
251
215
  requirements: []
252
216
  rubyforge_project: fuzzy_match
253
- rubygems_version: 1.8.24
217
+ rubygems_version: 2.0.3
254
218
  signing_key:
255
- specification_version: 3
219
+ specification_version: 4
256
220
  summary: Find a needle in a haystack using string similarity and (optionally) regexp
257
221
  rules. Replaces loose_tight_dictionary.
258
222
  test_files: