redis-autosuggest 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/redis/autosuggest/config.rb +6 -1
- data/lib/redis/autosuggest/fuzzy.rb +30 -12
- data/lib/redis/autosuggest/version.rb +1 -1
- metadata +2 -2
@@ -55,12 +55,17 @@ class Redis
|
|
55
55
|
|
56
56
|
# Maximum number of items to be indexed per n-gram (fuzzy matching)
|
57
57
|
@ngram_item_limit = 200
|
58
|
+
|
59
|
+
# If this is set to true, returned suggestions for fuzzy matching will only
|
60
|
+
# return suggestions that it has a very high confidence of in being correct.
|
61
|
+
@strict_fuzzy_matching = false
|
58
62
|
|
59
63
|
class << self
|
60
64
|
attr_reader :redis, :namespace
|
61
65
|
attr_accessor :db, :items, :itemids, :substrings, :max_per_substring,
|
62
66
|
:max_results, :max_str_size, :leaderboard, :use_leaderboard, :rails_sources,
|
63
|
-
:rails_source_sizes, :ngrams, :fuzzy_match, :ngram_size, :ngram_item_limit
|
67
|
+
:rails_source_sizes, :ngrams, :fuzzy_match, :ngram_size, :ngram_item_limit,
|
68
|
+
:strict_fuzzy_matching
|
64
69
|
|
65
70
|
def redis=(redis)
|
66
71
|
@redis = redis
|
@@ -2,7 +2,7 @@ class Redis
|
|
2
2
|
module Autosuggest
|
3
3
|
|
4
4
|
class << self
|
5
|
-
|
5
|
+
|
6
6
|
# Add an item's n-grams to the redis db. The n-grams will be used
|
7
7
|
# as candidates for autocompletions when Redis::Autosuggest.fuzzy_match
|
8
8
|
# is set to true.
|
@@ -13,26 +13,26 @@ class Redis
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
end
|
16
|
-
|
16
|
+
|
17
17
|
# Remove an item's n-grams from the Redis db
|
18
18
|
def remove_fuzzy(item)
|
19
19
|
yield_ngrams(item) do |ngram|
|
20
20
|
@ngrams.srem(ngram, "#{item}:#{compute_soundex_code(item)}")
|
21
21
|
end
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
# Compute the soundex code of a string (only works for single words
|
25
25
|
# so we have to merge multi-word strings)
|
26
26
|
def compute_soundex_code(str)
|
27
27
|
return Text::Soundex.soundex(alphabet_only(str))
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
# Build a candidate pool for all suitable fuzzy matches for a string
|
31
31
|
# by taking the union of all items in the Redis db that share an n-gram
|
32
32
|
# with the string. Use levenshtein distance, soundex code similarity,
|
33
33
|
# and the number of matching 2-grams to compute a score for each candidate.
|
34
34
|
# Then return the highest-scoring candidates.
|
35
|
-
def suggest_fuzzy(str, results=@max_results)
|
35
|
+
def suggest_fuzzy(str, results=@max_results, strict=@strict_fuzzy_matching)
|
36
36
|
str_mul = alphabet_only(str).size
|
37
37
|
str_soundex_code = compute_soundex_code(str)
|
38
38
|
str_2grams = ngram_list(str, 2)
|
@@ -59,20 +59,38 @@ class Redis
|
|
59
59
|
same_2grams = str_2grams & ngram_list(candidate_str, 2)
|
60
60
|
candidate_score *= Math.exp(same_2grams.size)
|
61
61
|
|
62
|
-
|
62
|
+
if candidate_score > 1
|
63
|
+
candidates << {
|
64
|
+
str: candidate_str,
|
65
|
+
score: candidate_score
|
66
|
+
}
|
67
|
+
end
|
63
68
|
end
|
64
69
|
# Sort results by score and return the highest scoring candidates
|
65
|
-
candidates = candidates.sort { |a, b| b[
|
66
|
-
# puts candidates.take(10).map { |
|
67
|
-
|
70
|
+
candidates = candidates.sort { |a, b| b[:score] <=> a[:score] }
|
71
|
+
# puts candidates.take(10).map { |cand| "#{cand[:str]} => #{cand[:score]}" }
|
72
|
+
# If strict fuzzy matching is used, only suggestion items with scores
|
73
|
+
# above a certain threshold will be returned.
|
74
|
+
if strict
|
75
|
+
suggestions = []
|
76
|
+
candidates.each do |cand|
|
77
|
+
# threshold ||= candidates[0][:score] / 10
|
78
|
+
threshold = Math.exp(str.size)
|
79
|
+
break if suggestions.size > results || cand[:score] < threshold
|
80
|
+
suggestions << cand
|
81
|
+
end
|
82
|
+
else
|
83
|
+
suggestions = candidates.take(results)
|
84
|
+
end
|
85
|
+
return suggestions.map { |cand| cand[:str] }
|
68
86
|
end
|
69
|
-
|
87
|
+
|
70
88
|
# Yield the n-grams of a specified size for a string one at a time
|
71
89
|
def yield_ngrams(str, ngram_size=@ngram_size)
|
72
90
|
ngram_list = ngram_list(str, ngram_size)
|
73
91
|
ngram_list.each { |ngram| yield ngram }
|
74
92
|
end
|
75
|
-
|
93
|
+
|
76
94
|
# Returns a list containing all of the n-grams of a specified size
|
77
95
|
# of a string. The list is ordered by the position of the n-gram
|
78
96
|
# in the string (duplicates included).
|
@@ -86,7 +104,7 @@ class Redis
|
|
86
104
|
end
|
87
105
|
ngram_list
|
88
106
|
end
|
89
|
-
|
107
|
+
|
90
108
|
# Remove all characters not in the range 'a-z' from a string
|
91
109
|
def alphabet_only(str)
|
92
110
|
return str.gsub(/[^abcdefghijklmnopqrstuvwxyz]/, '')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redis-autosuggest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: redis
|