redis-autosuggest 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/redis/autosuggest/config.rb +6 -1
- data/lib/redis/autosuggest/fuzzy.rb +30 -12
- data/lib/redis/autosuggest/version.rb +1 -1
- metadata +2 -2
@@ -55,12 +55,17 @@ class Redis
|
|
55
55
|
|
56
56
|
# Maximum number of items to be indexed per n-gram (fuzzy matching)
|
57
57
|
@ngram_item_limit = 200
|
58
|
+
|
59
|
+
# If this is set to true, returned suggestions for fuzzy matching will only
|
60
|
+
# return suggestions that it has a very high confidence of in being correct.
|
61
|
+
@strict_fuzzy_matching = false
|
58
62
|
|
59
63
|
class << self
|
60
64
|
attr_reader :redis, :namespace
|
61
65
|
attr_accessor :db, :items, :itemids, :substrings, :max_per_substring,
|
62
66
|
:max_results, :max_str_size, :leaderboard, :use_leaderboard, :rails_sources,
|
63
|
-
:rails_source_sizes, :ngrams, :fuzzy_match, :ngram_size, :ngram_item_limit
|
67
|
+
:rails_source_sizes, :ngrams, :fuzzy_match, :ngram_size, :ngram_item_limit,
|
68
|
+
:strict_fuzzy_matching
|
64
69
|
|
65
70
|
def redis=(redis)
|
66
71
|
@redis = redis
|
@@ -2,7 +2,7 @@ class Redis
|
|
2
2
|
module Autosuggest
|
3
3
|
|
4
4
|
class << self
|
5
|
-
|
5
|
+
|
6
6
|
# Add an item's n-grams to the redis db. The n-grams will be used
|
7
7
|
# as candidates for autocompletions when Redis::Autosuggest.fuzzy_match
|
8
8
|
# is set to true.
|
@@ -13,26 +13,26 @@ class Redis
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
end
|
16
|
-
|
16
|
+
|
17
17
|
# Remove an item's n-grams from the Redis db
|
18
18
|
def remove_fuzzy(item)
|
19
19
|
yield_ngrams(item) do |ngram|
|
20
20
|
@ngrams.srem(ngram, "#{item}:#{compute_soundex_code(item)}")
|
21
21
|
end
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
# Compute the soundex code of a string (only works for single words
|
25
25
|
# so we have to merge multi-word strings)
|
26
26
|
def compute_soundex_code(str)
|
27
27
|
return Text::Soundex.soundex(alphabet_only(str))
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
# Build a candidate pool for all suitable fuzzy matches for a string
|
31
31
|
# by taking the union of all items in the Redis db that share an n-gram
|
32
32
|
# with the string. Use levenshtein distance, soundex code similarity,
|
33
33
|
# and the number of matching 2-grams to compute a score for each candidate.
|
34
34
|
# Then return the highest-scoring candidates.
|
35
|
-
def suggest_fuzzy(str, results=@max_results)
|
35
|
+
def suggest_fuzzy(str, results=@max_results, strict=@strict_fuzzy_matching)
|
36
36
|
str_mul = alphabet_only(str).size
|
37
37
|
str_soundex_code = compute_soundex_code(str)
|
38
38
|
str_2grams = ngram_list(str, 2)
|
@@ -59,20 +59,38 @@ class Redis
|
|
59
59
|
same_2grams = str_2grams & ngram_list(candidate_str, 2)
|
60
60
|
candidate_score *= Math.exp(same_2grams.size)
|
61
61
|
|
62
|
-
|
62
|
+
if candidate_score > 1
|
63
|
+
candidates << {
|
64
|
+
str: candidate_str,
|
65
|
+
score: candidate_score
|
66
|
+
}
|
67
|
+
end
|
63
68
|
end
|
64
69
|
# Sort results by score and return the highest scoring candidates
|
65
|
-
candidates = candidates.sort { |a, b| b[
|
66
|
-
# puts candidates.take(10).map { |
|
67
|
-
|
70
|
+
candidates = candidates.sort { |a, b| b[:score] <=> a[:score] }
|
71
|
+
# puts candidates.take(10).map { |cand| "#{cand[:str]} => #{cand[:score]}" }
|
72
|
+
# If strict fuzzy matching is used, only suggestion items with scores
|
73
|
+
# above a certain threshold will be returned.
|
74
|
+
if strict
|
75
|
+
suggestions = []
|
76
|
+
candidates.each do |cand|
|
77
|
+
# threshold ||= candidates[0][:score] / 10
|
78
|
+
threshold = Math.exp(str.size)
|
79
|
+
break if suggestions.size > results || cand[:score] < threshold
|
80
|
+
suggestions << cand
|
81
|
+
end
|
82
|
+
else
|
83
|
+
suggestions = candidates.take(results)
|
84
|
+
end
|
85
|
+
return suggestions.map { |cand| cand[:str] }
|
68
86
|
end
|
69
|
-
|
87
|
+
|
70
88
|
# Yield the n-grams of a specified size for a string one at a time
|
71
89
|
def yield_ngrams(str, ngram_size=@ngram_size)
|
72
90
|
ngram_list = ngram_list(str, ngram_size)
|
73
91
|
ngram_list.each { |ngram| yield ngram }
|
74
92
|
end
|
75
|
-
|
93
|
+
|
76
94
|
# Returns a list containing all of the n-grams of a specified size
|
77
95
|
# of a string. The list is ordered by the position of the n-gram
|
78
96
|
# in the string (duplicates included).
|
@@ -86,7 +104,7 @@ class Redis
|
|
86
104
|
end
|
87
105
|
ngram_list
|
88
106
|
end
|
89
|
-
|
107
|
+
|
90
108
|
# Remove all characters not in the range 'a-z' from a string
|
91
109
|
def alphabet_only(str)
|
92
110
|
return str.gsub(/[^abcdefghijklmnopqrstuvwxyz]/, '')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redis-autosuggest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: redis
|