fuzzy_match 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/CHANGELOG +11 -0
- data/fuzzy_match.gemspec +0 -2
- data/lib/fuzzy_match/rule.rb +5 -2
- data/lib/fuzzy_match/score.rb +2 -0
- data/lib/fuzzy_match/version.rb +1 -1
- data/lib/fuzzy_match.rb +30 -8
- data/test/test_fuzzy_match.rb +8 -0
- data/test/test_identity.rb +4 -6
- metadata +5 -41
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YTlmMmE3MDI3MWQ0NzY0NGE5N2Q4ZDY4MmI5NjUzZTg5YWU1OWE5OQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MzU3MDc3NjQ1NDczNWFhNWE0ZDdlZGRmYjlhYWQ3Y2YyZTNiMjRhMQ==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MDcyZDk5OGY5MTQwNTEyMDA1YmQ1MzdiYzYyZTczMWExNzI0NjhlYWNkYzcx
|
10
|
+
Y2UwNzgwN2I5YmI3MjVhMzIwMDljZmVmMTFkODQ2MjY2NDdkNzViZjlhNTcy
|
11
|
+
NjliMmU5NTAwOTBiYTYxNjk4ZDkwZWM0OWMzMTdjZTU4ZjQ4NDk=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MTg0MGVlYTc3NTY0NjMxZWMwNWFmZTdhYmRhOWM4MGJmN2QwYjc3NmNiYzQz
|
14
|
+
MzU3ODc2NDRjNDJjNzhiYmQwMmNkNmQ3MTdjYzMyNjM2YzBjMDEwOGYzNzgy
|
15
|
+
NmFkZTM0M2E2YzkwZjY1YjM4ZTEzY2Y1YmU5MmY3MDZjNTJhMzg=
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
1.5.0 / 2013-04-03
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* No longer automatically calls to_regexp on rules - you must pass Regexps to normalizers, groupings, etc.
|
6
|
+
|
7
|
+
* Enhancements
|
8
|
+
|
9
|
+
* FuzzyMatch#find_best returns all top results with the same score - thanks @ihough !
|
10
|
+
* Doesn't require to_regexp gem for you - you can still use it if you want to convert strings into regexps safely, if you want, tho
|
11
|
+
|
1
12
|
1.4.1 / 2013-01-17
|
2
13
|
|
3
14
|
* Bug fixes
|
data/fuzzy_match.gemspec
CHANGED
@@ -17,8 +17,6 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
18
|
s.require_paths = ["lib"]
|
19
19
|
|
20
|
-
s.add_runtime_dependency 'to_regexp', '>=0.0.3'
|
21
|
-
|
22
20
|
# needed if you use FuzzyMatch::CachedResult
|
23
21
|
s.add_development_dependency 'active_record_inline_schema', '>=0.4.0'
|
24
22
|
|
data/lib/fuzzy_match/rule.rb
CHANGED
@@ -3,8 +3,11 @@ class FuzzyMatch
|
|
3
3
|
class Rule
|
4
4
|
attr_reader :regexp
|
5
5
|
|
6
|
-
def initialize(
|
7
|
-
|
6
|
+
def initialize(regexp)
|
7
|
+
unless regexp.is_a?(::Regexp)
|
8
|
+
raise ArgumentError, "[FuzzyMatch] Rules must be set with Regexp objects, but got #{regexp.inspect} (#{regexp.class.name})"
|
9
|
+
end
|
10
|
+
@regexp = regexp
|
8
11
|
end
|
9
12
|
|
10
13
|
def ==(other)
|
data/lib/fuzzy_match/score.rb
CHANGED
data/lib/fuzzy_match/version.rb
CHANGED
data/lib/fuzzy_match.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'to_regexp'
|
2
|
-
|
3
1
|
require 'fuzzy_match/rule'
|
4
2
|
require 'fuzzy_match/rule/normalizer'
|
5
3
|
require 'fuzzy_match/rule/stop_word'
|
@@ -42,7 +40,8 @@ class FuzzyMatch
|
|
42
40
|
:gather_last_result => false,
|
43
41
|
:find_all => false,
|
44
42
|
:find_all_with_score => false,
|
45
|
-
:threshold => 0
|
43
|
+
:threshold => 0,
|
44
|
+
:find_best => false,
|
46
45
|
}
|
47
46
|
|
48
47
|
self.engine = DEFAULT_ENGINE
|
@@ -94,19 +93,19 @@ class FuzzyMatch
|
|
94
93
|
end
|
95
94
|
|
96
95
|
def groupings=(ary)
|
97
|
-
@groupings = ary.map { |
|
96
|
+
@groupings = ary.map { |regexp| Rule::Grouping.new regexp }
|
98
97
|
end
|
99
98
|
|
100
99
|
def identities=(ary)
|
101
|
-
@identities = ary.map { |
|
100
|
+
@identities = ary.map { |regexp| Rule::Identity.new regexp }
|
102
101
|
end
|
103
102
|
|
104
103
|
def normalizers=(ary)
|
105
|
-
@normalizers = ary.map { |
|
104
|
+
@normalizers = ary.map { |regexp| Rule::Normalizer.new regexp }
|
106
105
|
end
|
107
106
|
|
108
107
|
def stop_words=(ary)
|
109
|
-
@stop_words = ary.map { |
|
108
|
+
@stop_words = ary.map { |regexp| Rule::StopWord.new regexp }
|
110
109
|
end
|
111
110
|
|
112
111
|
def haystack=(ary)
|
@@ -122,6 +121,11 @@ class FuzzyMatch
|
|
122
121
|
find needle, options
|
123
122
|
end
|
124
123
|
|
124
|
+
def find_best(needle, options = {})
|
125
|
+
options = options.merge(:find_best => true)
|
126
|
+
find needle, options
|
127
|
+
end
|
128
|
+
|
125
129
|
def find_all_with_score(needle, options = {})
|
126
130
|
options = options.merge(:find_all_with_score => true)
|
127
131
|
find needle, options
|
@@ -133,7 +137,8 @@ class FuzzyMatch
|
|
133
137
|
threshold = options[:threshold]
|
134
138
|
gather_last_result = options[:gather_last_result]
|
135
139
|
is_find_all_with_score = options[:find_all_with_score]
|
136
|
-
|
140
|
+
is_find_best = options[:find_best]
|
141
|
+
is_find_all = options[:find_all] || is_find_all_with_score || is_find_best
|
137
142
|
first_grouping_decides = options[:first_grouping_decides]
|
138
143
|
must_match_grouping = options[:must_match_grouping]
|
139
144
|
must_match_at_least_one_word = options[:must_match_at_least_one_word]
|
@@ -275,6 +280,23 @@ EOS
|
|
275
280
|
return memo
|
276
281
|
end
|
277
282
|
|
283
|
+
if is_find_best
|
284
|
+
memo = []
|
285
|
+
best_bs = nil
|
286
|
+
similarities.each do |similarity|
|
287
|
+
if similarity.satisfy?(needle, threshold)
|
288
|
+
bs = similarity.best_score
|
289
|
+
best_bs ||= bs
|
290
|
+
if bs >= best_bs
|
291
|
+
memo << similarity.wrapper2.record
|
292
|
+
else
|
293
|
+
break
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
return memo
|
298
|
+
end
|
299
|
+
|
278
300
|
if is_find_all
|
279
301
|
memo = []
|
280
302
|
similarities.each do |similarity|
|
data/test/test_fuzzy_match.rb
CHANGED
@@ -26,6 +26,14 @@ describe FuzzyMatch do
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
+
describe '#find_best' do
|
30
|
+
it %{returns one or more records with the best score} do
|
31
|
+
d = FuzzyMatch.new [ 'X', 'X', 'X22', 'Y', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
|
32
|
+
d.find_best('X').must_equal ['X', 'X' ]
|
33
|
+
d.find_best('A').must_equal []
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
29
37
|
describe '#find_all_with_score' do
|
30
38
|
it %{return records with 2 scores} do
|
31
39
|
d = FuzzyMatch.new [ 'X', 'X22', 'Y', 'Y4' ], :groupings => [ /X/, /Y/ ], :must_match_grouping => true
|
data/test/test_identity.rb
CHANGED
@@ -21,12 +21,10 @@ describe FuzzyMatch::Rule::Identity do
|
|
21
21
|
i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
|
22
22
|
end
|
23
23
|
|
24
|
-
it %{
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
i = FuzzyMatch::Rule::Identity.new '/\A\\\?\/(.*)etc\/mysql\$$/'
|
29
|
-
i.regexp.must_equal %r{\A\\?/(.*)etc/mysql\$$}
|
24
|
+
it %{does not automatically convert strings to regexps} do
|
25
|
+
lambda do
|
26
|
+
FuzzyMatch::Rule::Identity.new '%r{\A\\\?/(.*)etc/mysql\$$}'
|
27
|
+
end.must_raise ArgumentError, /regexp/i
|
30
28
|
end
|
31
29
|
|
32
30
|
it %{embraces case insensitivity} do
|
metadata
CHANGED
@@ -1,36 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fuzzy_match
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.5.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Seamus Abshere
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-04-03 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: to_regexp
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ! '>='
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: 0.0.3
|
22
|
-
type: :runtime
|
23
|
-
prerelease: false
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
|
-
requirements:
|
27
|
-
- - ! '>='
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: 0.0.3
|
30
13
|
- !ruby/object:Gem::Dependency
|
31
14
|
name: active_record_inline_schema
|
32
15
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
16
|
requirements:
|
35
17
|
- - ! '>='
|
36
18
|
- !ruby/object:Gem::Version
|
@@ -38,7 +20,6 @@ dependencies:
|
|
38
20
|
type: :development
|
39
21
|
prerelease: false
|
40
22
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
23
|
requirements:
|
43
24
|
- - ! '>='
|
44
25
|
- !ruby/object:Gem::Version
|
@@ -46,7 +27,6 @@ dependencies:
|
|
46
27
|
- !ruby/object:Gem::Dependency
|
47
28
|
name: minitest
|
48
29
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
30
|
requirements:
|
51
31
|
- - ! '>='
|
52
32
|
- !ruby/object:Gem::Version
|
@@ -54,7 +34,6 @@ dependencies:
|
|
54
34
|
type: :development
|
55
35
|
prerelease: false
|
56
36
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
37
|
requirements:
|
59
38
|
- - ! '>='
|
60
39
|
- !ruby/object:Gem::Version
|
@@ -62,7 +41,6 @@ dependencies:
|
|
62
41
|
- !ruby/object:Gem::Dependency
|
63
42
|
name: activerecord
|
64
43
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
44
|
requirements:
|
67
45
|
- - ! '>='
|
68
46
|
- !ruby/object:Gem::Version
|
@@ -70,7 +48,6 @@ dependencies:
|
|
70
48
|
type: :development
|
71
49
|
prerelease: false
|
72
50
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
51
|
requirements:
|
75
52
|
- - ! '>='
|
76
53
|
- !ruby/object:Gem::Version
|
@@ -78,7 +55,6 @@ dependencies:
|
|
78
55
|
- !ruby/object:Gem::Dependency
|
79
56
|
name: mysql2
|
80
57
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
58
|
requirements:
|
83
59
|
- - ! '>='
|
84
60
|
- !ruby/object:Gem::Version
|
@@ -86,7 +62,6 @@ dependencies:
|
|
86
62
|
type: :development
|
87
63
|
prerelease: false
|
88
64
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
65
|
requirements:
|
91
66
|
- - ! '>='
|
92
67
|
- !ruby/object:Gem::Version
|
@@ -94,7 +69,6 @@ dependencies:
|
|
94
69
|
- !ruby/object:Gem::Dependency
|
95
70
|
name: cohort_analysis
|
96
71
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
72
|
requirements:
|
99
73
|
- - ! '>='
|
100
74
|
- !ruby/object:Gem::Version
|
@@ -102,7 +76,6 @@ dependencies:
|
|
102
76
|
type: :development
|
103
77
|
prerelease: false
|
104
78
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
79
|
requirements:
|
107
80
|
- - ! '>='
|
108
81
|
- !ruby/object:Gem::Version
|
@@ -110,7 +83,6 @@ dependencies:
|
|
110
83
|
- !ruby/object:Gem::Dependency
|
111
84
|
name: weighted_average
|
112
85
|
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
86
|
requirements:
|
115
87
|
- - ! '>='
|
116
88
|
- !ruby/object:Gem::Version
|
@@ -118,7 +90,6 @@ dependencies:
|
|
118
90
|
type: :development
|
119
91
|
prerelease: false
|
120
92
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
93
|
requirements:
|
123
94
|
- - ! '>='
|
124
95
|
- !ruby/object:Gem::Version
|
@@ -126,7 +97,6 @@ dependencies:
|
|
126
97
|
- !ruby/object:Gem::Dependency
|
127
98
|
name: yard
|
128
99
|
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
100
|
requirements:
|
131
101
|
- - ! '>='
|
132
102
|
- !ruby/object:Gem::Version
|
@@ -134,7 +104,6 @@ dependencies:
|
|
134
104
|
type: :development
|
135
105
|
prerelease: false
|
136
106
|
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
107
|
requirements:
|
139
108
|
- - ! '>='
|
140
109
|
- !ruby/object:Gem::Version
|
@@ -142,7 +111,6 @@ dependencies:
|
|
142
111
|
- !ruby/object:Gem::Dependency
|
143
112
|
name: amatch
|
144
113
|
requirement: !ruby/object:Gem::Requirement
|
145
|
-
none: false
|
146
114
|
requirements:
|
147
115
|
- - ! '>='
|
148
116
|
- !ruby/object:Gem::Version
|
@@ -150,7 +118,6 @@ dependencies:
|
|
150
118
|
type: :development
|
151
119
|
prerelease: false
|
152
120
|
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
121
|
requirements:
|
155
122
|
- - ! '>='
|
156
123
|
- !ruby/object:Gem::Version
|
@@ -158,7 +125,6 @@ dependencies:
|
|
158
125
|
- !ruby/object:Gem::Dependency
|
159
126
|
name: minitest-reporters
|
160
127
|
requirement: !ruby/object:Gem::Requirement
|
161
|
-
none: false
|
162
128
|
requirements:
|
163
129
|
- - ! '>='
|
164
130
|
- !ruby/object:Gem::Version
|
@@ -166,7 +132,6 @@ dependencies:
|
|
166
132
|
type: :development
|
167
133
|
prerelease: false
|
168
134
|
version_requirements: !ruby/object:Gem::Requirement
|
169
|
-
none: false
|
170
135
|
requirements:
|
171
136
|
- - ! '>='
|
172
137
|
- !ruby/object:Gem::Version
|
@@ -232,27 +197,26 @@ files:
|
|
232
197
|
- test/test_wrapper.rb
|
233
198
|
homepage: https://github.com/seamusabshere/fuzzy_match
|
234
199
|
licenses: []
|
200
|
+
metadata: {}
|
235
201
|
post_install_message:
|
236
202
|
rdoc_options: []
|
237
203
|
require_paths:
|
238
204
|
- lib
|
239
205
|
required_ruby_version: !ruby/object:Gem::Requirement
|
240
|
-
none: false
|
241
206
|
requirements:
|
242
207
|
- - ! '>='
|
243
208
|
- !ruby/object:Gem::Version
|
244
209
|
version: '0'
|
245
210
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
246
|
-
none: false
|
247
211
|
requirements:
|
248
212
|
- - ! '>='
|
249
213
|
- !ruby/object:Gem::Version
|
250
214
|
version: '0'
|
251
215
|
requirements: []
|
252
216
|
rubyforge_project: fuzzy_match
|
253
|
-
rubygems_version:
|
217
|
+
rubygems_version: 2.0.3
|
254
218
|
signing_key:
|
255
|
-
specification_version:
|
219
|
+
specification_version: 4
|
256
220
|
summary: Find a needle in a haystack using string similarity and (optionally) regexp
|
257
221
|
rules. Replaces loose_tight_dictionary.
|
258
222
|
test_files:
|