redis-autosuggest 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -55,12 +55,17 @@ class Redis
55
55
 
56
56
  # Maximum number of items to be indexed per n-gram (fuzzy matching)
57
57
  @ngram_item_limit = 200
58
+
59
+ # If this is set to true, returned suggestions for fuzzy matching will only
60
+ # return suggestions that it has a very high confidence of in being correct.
61
+ @strict_fuzzy_matching = false
58
62
 
59
63
  class << self
60
64
  attr_reader :redis, :namespace
61
65
  attr_accessor :db, :items, :itemids, :substrings, :max_per_substring,
62
66
  :max_results, :max_str_size, :leaderboard, :use_leaderboard, :rails_sources,
63
- :rails_source_sizes, :ngrams, :fuzzy_match, :ngram_size, :ngram_item_limit
67
+ :rails_source_sizes, :ngrams, :fuzzy_match, :ngram_size, :ngram_item_limit,
68
+ :strict_fuzzy_matching
64
69
 
65
70
  def redis=(redis)
66
71
  @redis = redis
@@ -2,7 +2,7 @@ class Redis
2
2
  module Autosuggest
3
3
 
4
4
  class << self
5
-
5
+
6
6
  # Add an item's n-grams to the redis db. The n-grams will be used
7
7
  # as candidates for autocompletions when Redis::Autosuggest.fuzzy_match
8
8
  # is set to true.
@@ -13,26 +13,26 @@ class Redis
13
13
  end
14
14
  end
15
15
  end
16
-
16
+
17
17
  # Remove an item's n-grams from the Redis db
18
18
  def remove_fuzzy(item)
19
19
  yield_ngrams(item) do |ngram|
20
20
  @ngrams.srem(ngram, "#{item}:#{compute_soundex_code(item)}")
21
21
  end
22
22
  end
23
-
23
+
24
24
  # Compute the soundex code of a string (only works for single words
25
25
  # so we have to merge multi-word strings)
26
26
  def compute_soundex_code(str)
27
27
  return Text::Soundex.soundex(alphabet_only(str))
28
28
  end
29
-
29
+
30
30
  # Build a candidate pool for all suitable fuzzy matches for a string
31
31
  # by taking the union of all items in the Redis db that share an n-gram
32
32
  # with the string. Use levenshtein distance, soundex code similarity,
33
33
  # and the number of matching 2-grams to compute a score for each candidate.
34
34
  # Then return the highest-scoring candidates.
35
- def suggest_fuzzy(str, results=@max_results)
35
+ def suggest_fuzzy(str, results=@max_results, strict=@strict_fuzzy_matching)
36
36
  str_mul = alphabet_only(str).size
37
37
  str_soundex_code = compute_soundex_code(str)
38
38
  str_2grams = ngram_list(str, 2)
@@ -59,20 +59,38 @@ class Redis
59
59
  same_2grams = str_2grams & ngram_list(candidate_str, 2)
60
60
  candidate_score *= Math.exp(same_2grams.size)
61
61
 
62
- candidates << [candidate_str, candidate_score] if candidate_score > 1
62
+ if candidate_score > 1
63
+ candidates << {
64
+ str: candidate_str,
65
+ score: candidate_score
66
+ }
67
+ end
63
68
  end
64
69
  # Sort results by score and return the highest scoring candidates
65
- candidates = candidates.sort { |a, b| b[1] <=> a[1] }
66
- # puts candidates.take(10).map { |tuple| "#{tuple[0]} => #{tuple[1]}" }
67
- return candidates.take(results).map { |a| a[0] }
70
+ candidates = candidates.sort { |a, b| b[:score] <=> a[:score] }
71
+ # puts candidates.take(10).map { |cand| "#{cand[:str]} => #{cand[:score]}" }
72
+ # If strict fuzzy matching is used, only suggestion items with scores
73
+ # above a certain threshold will be returned.
74
+ if strict
75
+ suggestions = []
76
+ candidates.each do |cand|
77
+ # threshold ||= candidates[0][:score] / 10
78
+ threshold = Math.exp(str.size)
79
+ break if suggestions.size > results || cand[:score] < threshold
80
+ suggestions << cand
81
+ end
82
+ else
83
+ suggestions = candidates.take(results)
84
+ end
85
+ return suggestions.map { |cand| cand[:str] }
68
86
  end
69
-
87
+
70
88
  # Yield the n-grams of a specified size for a string one at a time
71
89
  def yield_ngrams(str, ngram_size=@ngram_size)
72
90
  ngram_list = ngram_list(str, ngram_size)
73
91
  ngram_list.each { |ngram| yield ngram }
74
92
  end
75
-
93
+
76
94
  # Returns a list containing all of the n-grams of a specified size
77
95
  # of a string. The list is ordered by the position of the n-gram
78
96
  # in the string (duplicates included).
@@ -86,7 +104,7 @@ class Redis
86
104
  end
87
105
  ngram_list
88
106
  end
89
-
107
+
90
108
  # Remove all characters not in the range 'a-z' from a string
91
109
  def alphabet_only(str)
92
110
  return str.gsub(/[^abcdefghijklmnopqrstuvwxyz]/, '')
@@ -1,5 +1,5 @@
1
1
  class Redis
2
2
  module Autosuggest
3
- VERSION = "0.3.0"
3
+ VERSION = "0.3.1"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redis-autosuggest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-08 00:00:00.000000000 Z
12
+ date: 2013-01-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: redis