redis-autosuggest 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,12 +55,17 @@ class Redis
55
55
 
56
56
  # Maximum number of items to be indexed per n-gram (fuzzy matching)
57
57
  @ngram_item_limit = 200
58
+
59
+ # If this is set to true, returned suggestions for fuzzy matching will only
60
+ # return suggestions that it has a very high confidence of in being correct.
61
+ @strict_fuzzy_matching = false
58
62
 
59
63
  class << self
60
64
  attr_reader :redis, :namespace
61
65
  attr_accessor :db, :items, :itemids, :substrings, :max_per_substring,
62
66
  :max_results, :max_str_size, :leaderboard, :use_leaderboard, :rails_sources,
63
- :rails_source_sizes, :ngrams, :fuzzy_match, :ngram_size, :ngram_item_limit
67
+ :rails_source_sizes, :ngrams, :fuzzy_match, :ngram_size, :ngram_item_limit,
68
+ :strict_fuzzy_matching
64
69
 
65
70
  def redis=(redis)
66
71
  @redis = redis
@@ -2,7 +2,7 @@ class Redis
2
2
  module Autosuggest
3
3
 
4
4
  class << self
5
-
5
+
6
6
  # Add an item's n-grams to the redis db. The n-grams will be used
7
7
  # as candidates for autocompletions when Redis::Autosuggest.fuzzy_match
8
8
  # is set to true.
@@ -13,26 +13,26 @@ class Redis
13
13
  end
14
14
  end
15
15
  end
16
-
16
+
17
17
  # Remove an item's n-grams from the Redis db
18
18
  def remove_fuzzy(item)
19
19
  yield_ngrams(item) do |ngram|
20
20
  @ngrams.srem(ngram, "#{item}:#{compute_soundex_code(item)}")
21
21
  end
22
22
  end
23
-
23
+
24
24
  # Compute the soundex code of a string (only works for single words
25
25
  # so we have to merge multi-word strings)
26
26
  def compute_soundex_code(str)
27
27
  return Text::Soundex.soundex(alphabet_only(str))
28
28
  end
29
-
29
+
30
30
  # Build a candidate pool for all suitable fuzzy matches for a string
31
31
  # by taking the union of all items in the Redis db that share an n-gram
32
32
  # with the string. Use levenshtein distance, soundex code similarity,
33
33
  # and the number of matching 2-grams to compute a score for each candidate.
34
34
  # Then return the highest-scoring candidates.
35
- def suggest_fuzzy(str, results=@max_results)
35
+ def suggest_fuzzy(str, results=@max_results, strict=@strict_fuzzy_matching)
36
36
  str_mul = alphabet_only(str).size
37
37
  str_soundex_code = compute_soundex_code(str)
38
38
  str_2grams = ngram_list(str, 2)
@@ -59,20 +59,38 @@ class Redis
59
59
  same_2grams = str_2grams & ngram_list(candidate_str, 2)
60
60
  candidate_score *= Math.exp(same_2grams.size)
61
61
 
62
- candidates << [candidate_str, candidate_score] if candidate_score > 1
62
+ if candidate_score > 1
63
+ candidates << {
64
+ str: candidate_str,
65
+ score: candidate_score
66
+ }
67
+ end
63
68
  end
64
69
  # Sort results by score and return the highest scoring candidates
65
- candidates = candidates.sort { |a, b| b[1] <=> a[1] }
66
- # puts candidates.take(10).map { |tuple| "#{tuple[0]} => #{tuple[1]}" }
67
- return candidates.take(results).map { |a| a[0] }
70
+ candidates = candidates.sort { |a, b| b[:score] <=> a[:score] }
71
+ # puts candidates.take(10).map { |cand| "#{cand[:str]} => #{cand[:score]}" }
72
+ # If strict fuzzy matching is used, only suggestion items with scores
73
+ # above a certain threshold will be returned.
74
+ if strict
75
+ suggestions = []
76
+ candidates.each do |cand|
77
+ # threshold ||= candidates[0][:score] / 10
78
+ threshold = Math.exp(str.size)
79
+ break if suggestions.size > results || cand[:score] < threshold
80
+ suggestions << cand
81
+ end
82
+ else
83
+ suggestions = candidates.take(results)
84
+ end
85
+ return suggestions.map { |cand| cand[:str] }
68
86
  end
69
-
87
+
70
88
  # Yield the n-grams of a specified size for a string one at a time
71
89
  def yield_ngrams(str, ngram_size=@ngram_size)
72
90
  ngram_list = ngram_list(str, ngram_size)
73
91
  ngram_list.each { |ngram| yield ngram }
74
92
  end
75
-
93
+
76
94
  # Returns a list containing all of the n-grams of a specified size
77
95
  # of a string. The list is ordered by the position of the n-gram
78
96
  # in the string (duplicates included).
@@ -86,7 +104,7 @@ class Redis
86
104
  end
87
105
  ngram_list
88
106
  end
89
-
107
+
90
108
  # Remove all characters not in the range 'a-z' from a string
91
109
  def alphabet_only(str)
92
110
  return str.gsub(/[^abcdefghijklmnopqrstuvwxyz]/, '')
@@ -1,5 +1,5 @@
1
1
  class Redis
2
2
  module Autosuggest
3
- VERSION = "0.3.0"
3
+ VERSION = "0.3.1"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redis-autosuggest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-08 00:00:00.000000000 Z
12
+ date: 2013-01-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: redis