fuzzy_match 1.4.1 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTlmMmE3MDI3MWQ0NzY0NGE5N2Q4ZDY4MmI5NjUzZTg5YWU1OWE5OQ==
5
+ data.tar.gz: !binary |-
6
+ MzU3MDc3NjQ1NDczNWFhNWE0ZDdlZGRmYjlhYWQ3Y2YyZTNiMjRhMQ==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MDcyZDk5OGY5MTQwNTEyMDA1YmQ1MzdiYzYyZTczMWExNzI0NjhlYWNkYzcx
10
+ Y2UwNzgwN2I5YmI3MjVhMzIwMDljZmVmMTFkODQ2MjY2NDdkNzViZjlhNTcy
11
+ NjliMmU5NTAwOTBiYTYxNjk4ZDkwZWM0OWMzMTdjZTU4ZjQ4NDk=
12
+ data.tar.gz: !binary |-
13
+ MTg0MGVlYTc3NTY0NjMxZWMwNWFmZTdhYmRhOWM4MGJmN2QwYjc3NmNiYzQz
14
+ MzU3ODc2NDRjNDJjNzhiYmQwMmNkNmQ3MTdjYzMyNjM2YzBjMDEwOGYzNzgy
15
+ NmFkZTM0M2E2YzkwZjY1YjM4ZTEzY2Y1YmU5MmY3MDZjNTJhMzg=
data/CHANGELOG CHANGED
@@ -1,3 +1,14 @@
1
+ 1.5.0 / 2013-04-03
2
+
3
+ * Breaking changes
4
+
5
+ * No longer automatically calls to_regexp on rules - you must pass Regexps to normalizers, groupings, etc.
6
+
7
+ * Enhancements
8
+
9
+ * FuzzyMatch#find_best returns all top results with the same score - thanks @ihough !
10
+ * Doesn't require to_regexp gem for you - you can still use it if you want to convert strings into regexps safely, if you want, tho
11
+
1
12
  1.4.1 / 2013-01-17
2
13
 
3
14
  * Bug fixes
data/fuzzy_match.gemspec CHANGED
@@ -17,8 +17,6 @@ Gem::Specification.new do |s|
17
17
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
18
  s.require_paths = ["lib"]
19
19
 
20
- s.add_runtime_dependency 'to_regexp', '>=0.0.3'
21
-
22
20
  # needed if you use FuzzyMatch::CachedResult
23
21
  s.add_development_dependency 'active_record_inline_schema', '>=0.4.0'
24
22
 
@@ -3,8 +3,11 @@ class FuzzyMatch
3
3
  class Rule
4
4
  attr_reader :regexp
5
5
 
6
- def initialize(regexp_or_str)
7
- @regexp = regexp_or_str.to_regexp
6
+ def initialize(regexp)
7
+ unless regexp.is_a?(::Regexp)
8
+ raise ArgumentError, "[FuzzyMatch] Rules must be set with Regexp objects, but got #{regexp.inspect} (#{regexp.class.name})"
9
+ end
10
+ @regexp = regexp
8
11
  end
9
12
 
10
13
  def ==(other)
@@ -3,6 +3,8 @@ require 'fuzzy_match/score/amatch'
3
3
 
4
4
  class FuzzyMatch
5
5
  class Score
6
+ include Comparable
7
+
6
8
  attr_reader :str1
7
9
  attr_reader :str2
8
10
 
@@ -1,3 +1,3 @@
1
1
  class FuzzyMatch
2
- VERSION = '1.4.1'
2
+ VERSION = '1.5.0'
3
3
  end
data/lib/fuzzy_match.rb CHANGED
@@ -1,5 +1,3 @@
1
- require 'to_regexp'
2
-
3
1
  require 'fuzzy_match/rule'
4
2
  require 'fuzzy_match/rule/normalizer'
5
3
  require 'fuzzy_match/rule/stop_word'
@@ -42,7 +40,8 @@ class FuzzyMatch
42
40
  :gather_last_result => false,
43
41
  :find_all => false,
44
42
  :find_all_with_score => false,
45
- :threshold => 0
43
+ :threshold => 0,
44
+ :find_best => false,
46
45
  }
47
46
 
48
47
  self.engine = DEFAULT_ENGINE
@@ -94,19 +93,19 @@ class FuzzyMatch
94
93
  end
95
94
 
96
95
  def groupings=(ary)
97
- @groupings = ary.map { |regexp_or_str| Rule::Grouping.new regexp_or_str }
96
+ @groupings = ary.map { |regexp| Rule::Grouping.new regexp }
98
97
  end
99
98
 
100
99
  def identities=(ary)
101
- @identities = ary.map { |regexp_or_str| Rule::Identity.new regexp_or_str }
100
+ @identities = ary.map { |regexp| Rule::Identity.new regexp }
102
101
  end
103
102
 
104
103
  def normalizers=(ary)
105
- @normalizers = ary.map { |regexp_or_str| Rule::Normalizer.new regexp_or_str }
104
+ @normalizers = ary.map { |regexp| Rule::Normalizer.new regexp }
106
105
  end
107
106
 
108
107
  def stop_words=(ary)
109
- @stop_words = ary.map { |regexp_or_str| Rule::StopWord.new regexp_or_str }
108
+ @stop_words = ary.map { |regexp| Rule::StopWord.new regexp }
110
109
  end
111
110
 
112
111
  def haystack=(ary)
@@ -122,6 +121,11 @@ class FuzzyMatch
122
121
  find needle, options
123
122
  end
124
123
 
124
+ def find_best(needle, options = {})
125
+ options = options.merge(:find_best => true)
126
+ find needle, options
127
+ end
128
+
125
129
  def find_all_with_score(needle, options = {})
126
130
  options = options.merge(:find_all_with_score => true)
127
131
  find needle, options
@@ -133,7 +137,8 @@ class FuzzyMatch
133
137
  threshold = options[:threshold]
134
138
  gather_last_result = options[:gather_last_result]
135
139
  is_find_all_with_score = options[:find_all_with_score]
136
- is_find_all = options[:find_all] || is_find_all_with_score
140
+ is_find_best = options[:find_best]
141
+ is_find_all = options[:find_all] || is_find_all_with_score || is_find_best
137
142
  first_grouping_decides = options[:first_grouping_decides]
138
143
  must_match_grouping = options[:must_match_grouping]
139
144
  must_match_at_least_one_word = options[:must_match_at_least_one_word]
@@ -275,6 +280,23 @@ EOS
275
280
  return memo
276
281
  end
277
282
 
283
+ if is_find_best
284
+ memo = []
285
+ best_bs = nil
286
+ similarities.each do |similarity|
287
+ if similarity.satisfy?(needle, threshold)
288
+ bs = similarity.best_score
289
+ best_bs ||= bs
290
+ if bs >= best_bs
291
+ memo << similarity.wrapper2.record
292
+ else
293
+ break
294
+ end
295
+ end
296
+ end
297
+ return memo
298
+ end
299
+
278
300
  if is_find_all
279
301
  memo = []
280
302
  similarities.each do |similarity|
@@ -26,6 +26,14 @@ describe FuzzyMatch do
26
26
  end
27
27
  end
28
28
 
29
+ describe '#find_best' do
30
+ it %{returns one or more records with the best score} do
31
+ d = FuzzyMatch.new [ 'X', 'X', 'X22', 'Y', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
32
+ d.find_best('X').must_equal ['X', 'X' ]
33
+ d.find_best('A').must_equal []
34
+ end
35
+ end
36
+
29
37
  describe '#find_all_with_score' do
30
38
  it %{return records with 2 scores} do
31
39
  d = FuzzyMatch.new [ 'X', 'X22', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
@@ -21,12 +21,10 @@ describe FuzzyMatch::Rule::Identity do
21
21
  i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
22
22
  end
23
23
 
24
- it %{can be initialized from a string (via to_regexp gem)} do
25
- i = FuzzyMatch::Rule::Identity.new '%r{\A\\\?/(.*)etc/mysql\$$}'
26
- i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
27
-
28
- i = FuzzyMatch::Rule::Identity.new '/\A\\\?\/(.*)etc\/mysql\$$/'
29
- i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
24
+ it %{does not automatically convert strings to regexps} do
25
+ lambda do
26
+ FuzzyMatch::Rule::Identity.new '%r{\A\\\?/(.*)etc/mysql\$$}'
27
+ end.must_raise ArgumentError, /regexp/i
30
28
  end
31
29
 
32
30
  it %{embraces case insensitivity} do
metadata CHANGED
@@ -1,36 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fuzzy_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
5
- prerelease:
4
+ version: 1.5.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Seamus Abshere
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-01-18 00:00:00.000000000 Z
11
+ date: 2013-04-03 00:00:00.000000000 Z
13
12
  dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: to_regexp
16
- requirement: !ruby/object:Gem::Requirement
17
- none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
21
- version: 0.0.3
22
- type: :runtime
23
- prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ! '>='
28
- - !ruby/object:Gem::Version
29
- version: 0.0.3
30
13
  - !ruby/object:Gem::Dependency
31
14
  name: active_record_inline_schema
32
15
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
16
  requirements:
35
17
  - - ! '>='
36
18
  - !ruby/object:Gem::Version
@@ -38,7 +20,6 @@ dependencies:
38
20
  type: :development
39
21
  prerelease: false
40
22
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
23
  requirements:
43
24
  - - ! '>='
44
25
  - !ruby/object:Gem::Version
@@ -46,7 +27,6 @@ dependencies:
46
27
  - !ruby/object:Gem::Dependency
47
28
  name: minitest
48
29
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
30
  requirements:
51
31
  - - ! '>='
52
32
  - !ruby/object:Gem::Version
@@ -54,7 +34,6 @@ dependencies:
54
34
  type: :development
55
35
  prerelease: false
56
36
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
37
  requirements:
59
38
  - - ! '>='
60
39
  - !ruby/object:Gem::Version
@@ -62,7 +41,6 @@ dependencies:
62
41
  - !ruby/object:Gem::Dependency
63
42
  name: activerecord
64
43
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
44
  requirements:
67
45
  - - ! '>='
68
46
  - !ruby/object:Gem::Version
@@ -70,7 +48,6 @@ dependencies:
70
48
  type: :development
71
49
  prerelease: false
72
50
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
51
  requirements:
75
52
  - - ! '>='
76
53
  - !ruby/object:Gem::Version
@@ -78,7 +55,6 @@ dependencies:
78
55
  - !ruby/object:Gem::Dependency
79
56
  name: mysql2
80
57
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
58
  requirements:
83
59
  - - ! '>='
84
60
  - !ruby/object:Gem::Version
@@ -86,7 +62,6 @@ dependencies:
86
62
  type: :development
87
63
  prerelease: false
88
64
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
65
  requirements:
91
66
  - - ! '>='
92
67
  - !ruby/object:Gem::Version
@@ -94,7 +69,6 @@ dependencies:
94
69
  - !ruby/object:Gem::Dependency
95
70
  name: cohort_analysis
96
71
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
72
  requirements:
99
73
  - - ! '>='
100
74
  - !ruby/object:Gem::Version
@@ -102,7 +76,6 @@ dependencies:
102
76
  type: :development
103
77
  prerelease: false
104
78
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
79
  requirements:
107
80
  - - ! '>='
108
81
  - !ruby/object:Gem::Version
@@ -110,7 +83,6 @@ dependencies:
110
83
  - !ruby/object:Gem::Dependency
111
84
  name: weighted_average
112
85
  requirement: !ruby/object:Gem::Requirement
113
- none: false
114
86
  requirements:
115
87
  - - ! '>='
116
88
  - !ruby/object:Gem::Version
@@ -118,7 +90,6 @@ dependencies:
118
90
  type: :development
119
91
  prerelease: false
120
92
  version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
93
  requirements:
123
94
  - - ! '>='
124
95
  - !ruby/object:Gem::Version
@@ -126,7 +97,6 @@ dependencies:
126
97
  - !ruby/object:Gem::Dependency
127
98
  name: yard
128
99
  requirement: !ruby/object:Gem::Requirement
129
- none: false
130
100
  requirements:
131
101
  - - ! '>='
132
102
  - !ruby/object:Gem::Version
@@ -134,7 +104,6 @@ dependencies:
134
104
  type: :development
135
105
  prerelease: false
136
106
  version_requirements: !ruby/object:Gem::Requirement
137
- none: false
138
107
  requirements:
139
108
  - - ! '>='
140
109
  - !ruby/object:Gem::Version
@@ -142,7 +111,6 @@ dependencies:
142
111
  - !ruby/object:Gem::Dependency
143
112
  name: amatch
144
113
  requirement: !ruby/object:Gem::Requirement
145
- none: false
146
114
  requirements:
147
115
  - - ! '>='
148
116
  - !ruby/object:Gem::Version
@@ -150,7 +118,6 @@ dependencies:
150
118
  type: :development
151
119
  prerelease: false
152
120
  version_requirements: !ruby/object:Gem::Requirement
153
- none: false
154
121
  requirements:
155
122
  - - ! '>='
156
123
  - !ruby/object:Gem::Version
@@ -158,7 +125,6 @@ dependencies:
158
125
  - !ruby/object:Gem::Dependency
159
126
  name: minitest-reporters
160
127
  requirement: !ruby/object:Gem::Requirement
161
- none: false
162
128
  requirements:
163
129
  - - ! '>='
164
130
  - !ruby/object:Gem::Version
@@ -166,7 +132,6 @@ dependencies:
166
132
  type: :development
167
133
  prerelease: false
168
134
  version_requirements: !ruby/object:Gem::Requirement
169
- none: false
170
135
  requirements:
171
136
  - - ! '>='
172
137
  - !ruby/object:Gem::Version
@@ -232,27 +197,26 @@ files:
232
197
  - test/test_wrapper.rb
233
198
  homepage: https://github.com/seamusabshere/fuzzy_match
234
199
  licenses: []
200
+ metadata: {}
235
201
  post_install_message:
236
202
  rdoc_options: []
237
203
  require_paths:
238
204
  - lib
239
205
  required_ruby_version: !ruby/object:Gem::Requirement
240
- none: false
241
206
  requirements:
242
207
  - - ! '>='
243
208
  - !ruby/object:Gem::Version
244
209
  version: '0'
245
210
  required_rubygems_version: !ruby/object:Gem::Requirement
246
- none: false
247
211
  requirements:
248
212
  - - ! '>='
249
213
  - !ruby/object:Gem::Version
250
214
  version: '0'
251
215
  requirements: []
252
216
  rubyforge_project: fuzzy_match
253
- rubygems_version: 1.8.24
217
+ rubygems_version: 2.0.3
254
218
  signing_key:
255
- specification_version: 3
219
+ specification_version: 4
256
220
  summary: Find a needle in a haystack using string similarity and (optionally) regexp
257
221
  rules. Replaces loose_tight_dictionary.
258
222
  test_files: