fuzzy_match 1.4.1 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/CHANGELOG +11 -0
- data/fuzzy_match.gemspec +0 -2
- data/lib/fuzzy_match/rule.rb +5 -2
- data/lib/fuzzy_match/score.rb +2 -0
- data/lib/fuzzy_match/version.rb +1 -1
- data/lib/fuzzy_match.rb +30 -8
- data/test/test_fuzzy_match.rb +8 -0
- data/test/test_identity.rb +4 -6
- metadata +5 -41
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YTlmMmE3MDI3MWQ0NzY0NGE5N2Q4ZDY4MmI5NjUzZTg5YWU1OWE5OQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MzU3MDc3NjQ1NDczNWFhNWE0ZDdlZGRmYjlhYWQ3Y2YyZTNiMjRhMQ==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MDcyZDk5OGY5MTQwNTEyMDA1YmQ1MzdiYzYyZTczMWExNzI0NjhlYWNkYzcx
|
10
|
+
Y2UwNzgwN2I5YmI3MjVhMzIwMDljZmVmMTFkODQ2MjY2NDdkNzViZjlhNTcy
|
11
|
+
NjliMmU5NTAwOTBiYTYxNjk4ZDkwZWM0OWMzMTdjZTU4ZjQ4NDk=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MTg0MGVlYTc3NTY0NjMxZWMwNWFmZTdhYmRhOWM4MGJmN2QwYjc3NmNiYzQz
|
14
|
+
MzU3ODc2NDRjNDJjNzhiYmQwMmNkNmQ3MTdjYzMyNjM2YzBjMDEwOGYzNzgy
|
15
|
+
NmFkZTM0M2E2YzkwZjY1YjM4ZTEzY2Y1YmU5MmY3MDZjNTJhMzg=
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
1.5.0 / 2013-04-03
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* No longer automatically calls to_regexp on rules - you must pass Regexps to normalizers, groupings, etc.
|
6
|
+
|
7
|
+
* Enhancements
|
8
|
+
|
9
|
+
* FuzzyMatch#find_best returns all top results with the same score - thanks @ihough !
|
10
|
+
* Doesn't require to_regexp gem for you - you can still use it if you want to convert strings into regexps safely, if you want, tho
|
11
|
+
|
1
12
|
1.4.1 / 2013-01-17
|
2
13
|
|
3
14
|
* Bug fixes
|
data/fuzzy_match.gemspec
CHANGED
@@ -17,8 +17,6 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
18
|
s.require_paths = ["lib"]
|
19
19
|
|
20
|
-
s.add_runtime_dependency 'to_regexp', '>=0.0.3'
|
21
|
-
|
22
20
|
# needed if you use FuzzyMatch::CachedResult
|
23
21
|
s.add_development_dependency 'active_record_inline_schema', '>=0.4.0'
|
24
22
|
|
data/lib/fuzzy_match/rule.rb
CHANGED
@@ -3,8 +3,11 @@ class FuzzyMatch
|
|
3
3
|
class Rule
|
4
4
|
attr_reader :regexp
|
5
5
|
|
6
|
-
def initialize(
|
7
|
-
|
6
|
+
def initialize(regexp)
|
7
|
+
unless regexp.is_a?(::Regexp)
|
8
|
+
raise ArgumentError, "[FuzzyMatch] Rules must be set with Regexp objects, but got #{regexp.inspect} (#{regexp.class.name})"
|
9
|
+
end
|
10
|
+
@regexp = regexp
|
8
11
|
end
|
9
12
|
|
10
13
|
def ==(other)
|
data/lib/fuzzy_match/score.rb
CHANGED
data/lib/fuzzy_match/version.rb
CHANGED
data/lib/fuzzy_match.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'to_regexp'
|
2
|
-
|
3
1
|
require 'fuzzy_match/rule'
|
4
2
|
require 'fuzzy_match/rule/normalizer'
|
5
3
|
require 'fuzzy_match/rule/stop_word'
|
@@ -42,7 +40,8 @@ class FuzzyMatch
|
|
42
40
|
:gather_last_result => false,
|
43
41
|
:find_all => false,
|
44
42
|
:find_all_with_score => false,
|
45
|
-
:threshold => 0
|
43
|
+
:threshold => 0,
|
44
|
+
:find_best => false,
|
46
45
|
}
|
47
46
|
|
48
47
|
self.engine = DEFAULT_ENGINE
|
@@ -94,19 +93,19 @@ class FuzzyMatch
|
|
94
93
|
end
|
95
94
|
|
96
95
|
def groupings=(ary)
|
97
|
-
@groupings = ary.map { |
|
96
|
+
@groupings = ary.map { |regexp| Rule::Grouping.new regexp }
|
98
97
|
end
|
99
98
|
|
100
99
|
def identities=(ary)
|
101
|
-
@identities = ary.map { |
|
100
|
+
@identities = ary.map { |regexp| Rule::Identity.new regexp }
|
102
101
|
end
|
103
102
|
|
104
103
|
def normalizers=(ary)
|
105
|
-
@normalizers = ary.map { |
|
104
|
+
@normalizers = ary.map { |regexp| Rule::Normalizer.new regexp }
|
106
105
|
end
|
107
106
|
|
108
107
|
def stop_words=(ary)
|
109
|
-
@stop_words = ary.map { |
|
108
|
+
@stop_words = ary.map { |regexp| Rule::StopWord.new regexp }
|
110
109
|
end
|
111
110
|
|
112
111
|
def haystack=(ary)
|
@@ -122,6 +121,11 @@ class FuzzyMatch
|
|
122
121
|
find needle, options
|
123
122
|
end
|
124
123
|
|
124
|
+
def find_best(needle, options = {})
|
125
|
+
options = options.merge(:find_best => true)
|
126
|
+
find needle, options
|
127
|
+
end
|
128
|
+
|
125
129
|
def find_all_with_score(needle, options = {})
|
126
130
|
options = options.merge(:find_all_with_score => true)
|
127
131
|
find needle, options
|
@@ -133,7 +137,8 @@ class FuzzyMatch
|
|
133
137
|
threshold = options[:threshold]
|
134
138
|
gather_last_result = options[:gather_last_result]
|
135
139
|
is_find_all_with_score = options[:find_all_with_score]
|
136
|
-
|
140
|
+
is_find_best = options[:find_best]
|
141
|
+
is_find_all = options[:find_all] || is_find_all_with_score || is_find_best
|
137
142
|
first_grouping_decides = options[:first_grouping_decides]
|
138
143
|
must_match_grouping = options[:must_match_grouping]
|
139
144
|
must_match_at_least_one_word = options[:must_match_at_least_one_word]
|
@@ -275,6 +280,23 @@ EOS
|
|
275
280
|
return memo
|
276
281
|
end
|
277
282
|
|
283
|
+
if is_find_best
|
284
|
+
memo = []
|
285
|
+
best_bs = nil
|
286
|
+
similarities.each do |similarity|
|
287
|
+
if similarity.satisfy?(needle, threshold)
|
288
|
+
bs = similarity.best_score
|
289
|
+
best_bs ||= bs
|
290
|
+
if bs >= best_bs
|
291
|
+
memo << similarity.wrapper2.record
|
292
|
+
else
|
293
|
+
break
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
return memo
|
298
|
+
end
|
299
|
+
|
278
300
|
if is_find_all
|
279
301
|
memo = []
|
280
302
|
similarities.each do |similarity|
|
data/test/test_fuzzy_match.rb
CHANGED
@@ -26,6 +26,14 @@ describe FuzzyMatch do
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
+
describe '#find_best' do
|
30
|
+
it %{returns one or more records with the best score} do
|
31
|
+
d = FuzzyMatch.new [ 'X', 'X', 'X22', 'Y', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
|
32
|
+
d.find_best('X').must_equal ['X', 'X' ]
|
33
|
+
d.find_best('A').must_equal []
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
29
37
|
describe '#find_all_with_score' do
|
30
38
|
it %{return records with 2 scores} do
|
31
39
|
d = FuzzyMatch.new [ 'X', 'X22', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
|
data/test/test_identity.rb
CHANGED
@@ -21,12 +21,10 @@ describe FuzzyMatch::Rule::Identity do
|
|
21
21
|
i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
|
22
22
|
end
|
23
23
|
|
24
|
-
it %{
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
i = FuzzyMatch::Rule::Identity.new '/\A\\\?\/(.*)etc\/mysql\$$/'
|
29
|
-
i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
|
24
|
+
it %{does not automatically convert strings to regexps} do
|
25
|
+
lambda do
|
26
|
+
FuzzyMatch::Rule::Identity.new '%r{\A\\\?/(.*)etc/mysql\$$}'
|
27
|
+
end.must_raise ArgumentError, /regexp/i
|
30
28
|
end
|
31
29
|
|
32
30
|
it %{embraces case insensitivity} do
|
metadata
CHANGED
@@ -1,36 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fuzzy_match
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.5.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Seamus Abshere
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-04-03 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: to_regexp
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ! '>='
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: 0.0.3
|
22
|
-
type: :runtime
|
23
|
-
prerelease: false
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
|
-
requirements:
|
27
|
-
- - ! '>='
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: 0.0.3
|
30
13
|
- !ruby/object:Gem::Dependency
|
31
14
|
name: active_record_inline_schema
|
32
15
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
16
|
requirements:
|
35
17
|
- - ! '>='
|
36
18
|
- !ruby/object:Gem::Version
|
@@ -38,7 +20,6 @@ dependencies:
|
|
38
20
|
type: :development
|
39
21
|
prerelease: false
|
40
22
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
23
|
requirements:
|
43
24
|
- - ! '>='
|
44
25
|
- !ruby/object:Gem::Version
|
@@ -46,7 +27,6 @@ dependencies:
|
|
46
27
|
- !ruby/object:Gem::Dependency
|
47
28
|
name: minitest
|
48
29
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
30
|
requirements:
|
51
31
|
- - ! '>='
|
52
32
|
- !ruby/object:Gem::Version
|
@@ -54,7 +34,6 @@ dependencies:
|
|
54
34
|
type: :development
|
55
35
|
prerelease: false
|
56
36
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
37
|
requirements:
|
59
38
|
- - ! '>='
|
60
39
|
- !ruby/object:Gem::Version
|
@@ -62,7 +41,6 @@ dependencies:
|
|
62
41
|
- !ruby/object:Gem::Dependency
|
63
42
|
name: activerecord
|
64
43
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
44
|
requirements:
|
67
45
|
- - ! '>='
|
68
46
|
- !ruby/object:Gem::Version
|
@@ -70,7 +48,6 @@ dependencies:
|
|
70
48
|
type: :development
|
71
49
|
prerelease: false
|
72
50
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
51
|
requirements:
|
75
52
|
- - ! '>='
|
76
53
|
- !ruby/object:Gem::Version
|
@@ -78,7 +55,6 @@ dependencies:
|
|
78
55
|
- !ruby/object:Gem::Dependency
|
79
56
|
name: mysql2
|
80
57
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
58
|
requirements:
|
83
59
|
- - ! '>='
|
84
60
|
- !ruby/object:Gem::Version
|
@@ -86,7 +62,6 @@ dependencies:
|
|
86
62
|
type: :development
|
87
63
|
prerelease: false
|
88
64
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
65
|
requirements:
|
91
66
|
- - ! '>='
|
92
67
|
- !ruby/object:Gem::Version
|
@@ -94,7 +69,6 @@ dependencies:
|
|
94
69
|
- !ruby/object:Gem::Dependency
|
95
70
|
name: cohort_analysis
|
96
71
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
72
|
requirements:
|
99
73
|
- - ! '>='
|
100
74
|
- !ruby/object:Gem::Version
|
@@ -102,7 +76,6 @@ dependencies:
|
|
102
76
|
type: :development
|
103
77
|
prerelease: false
|
104
78
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
79
|
requirements:
|
107
80
|
- - ! '>='
|
108
81
|
- !ruby/object:Gem::Version
|
@@ -110,7 +83,6 @@ dependencies:
|
|
110
83
|
- !ruby/object:Gem::Dependency
|
111
84
|
name: weighted_average
|
112
85
|
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
86
|
requirements:
|
115
87
|
- - ! '>='
|
116
88
|
- !ruby/object:Gem::Version
|
@@ -118,7 +90,6 @@ dependencies:
|
|
118
90
|
type: :development
|
119
91
|
prerelease: false
|
120
92
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
93
|
requirements:
|
123
94
|
- - ! '>='
|
124
95
|
- !ruby/object:Gem::Version
|
@@ -126,7 +97,6 @@ dependencies:
|
|
126
97
|
- !ruby/object:Gem::Dependency
|
127
98
|
name: yard
|
128
99
|
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
100
|
requirements:
|
131
101
|
- - ! '>='
|
132
102
|
- !ruby/object:Gem::Version
|
@@ -134,7 +104,6 @@ dependencies:
|
|
134
104
|
type: :development
|
135
105
|
prerelease: false
|
136
106
|
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
107
|
requirements:
|
139
108
|
- - ! '>='
|
140
109
|
- !ruby/object:Gem::Version
|
@@ -142,7 +111,6 @@ dependencies:
|
|
142
111
|
- !ruby/object:Gem::Dependency
|
143
112
|
name: amatch
|
144
113
|
requirement: !ruby/object:Gem::Requirement
|
145
|
-
none: false
|
146
114
|
requirements:
|
147
115
|
- - ! '>='
|
148
116
|
- !ruby/object:Gem::Version
|
@@ -150,7 +118,6 @@ dependencies:
|
|
150
118
|
type: :development
|
151
119
|
prerelease: false
|
152
120
|
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
121
|
requirements:
|
155
122
|
- - ! '>='
|
156
123
|
- !ruby/object:Gem::Version
|
@@ -158,7 +125,6 @@ dependencies:
|
|
158
125
|
- !ruby/object:Gem::Dependency
|
159
126
|
name: minitest-reporters
|
160
127
|
requirement: !ruby/object:Gem::Requirement
|
161
|
-
none: false
|
162
128
|
requirements:
|
163
129
|
- - ! '>='
|
164
130
|
- !ruby/object:Gem::Version
|
@@ -166,7 +132,6 @@ dependencies:
|
|
166
132
|
type: :development
|
167
133
|
prerelease: false
|
168
134
|
version_requirements: !ruby/object:Gem::Requirement
|
169
|
-
none: false
|
170
135
|
requirements:
|
171
136
|
- - ! '>='
|
172
137
|
- !ruby/object:Gem::Version
|
@@ -232,27 +197,26 @@ files:
|
|
232
197
|
- test/test_wrapper.rb
|
233
198
|
homepage: https://github.com/seamusabshere/fuzzy_match
|
234
199
|
licenses: []
|
200
|
+
metadata: {}
|
235
201
|
post_install_message:
|
236
202
|
rdoc_options: []
|
237
203
|
require_paths:
|
238
204
|
- lib
|
239
205
|
required_ruby_version: !ruby/object:Gem::Requirement
|
240
|
-
none: false
|
241
206
|
requirements:
|
242
207
|
- - ! '>='
|
243
208
|
- !ruby/object:Gem::Version
|
244
209
|
version: '0'
|
245
210
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
246
|
-
none: false
|
247
211
|
requirements:
|
248
212
|
- - ! '>='
|
249
213
|
- !ruby/object:Gem::Version
|
250
214
|
version: '0'
|
251
215
|
requirements: []
|
252
216
|
rubyforge_project: fuzzy_match
|
253
|
-
rubygems_version:
|
217
|
+
rubygems_version: 2.0.3
|
254
218
|
signing_key:
|
255
|
-
specification_version:
|
219
|
+
specification_version: 4
|
256
220
|
summary: Find a needle in a haystack using string similarity and (optionally) regexp
|
257
221
|
rules. Replaces loose_tight_dictionary.
|
258
222
|
test_files:
|