autosuggest 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bda23aeaee3abc15c45b52ddc2d64943de44a164f11d3a33de234171126b209c
4
- data.tar.gz: de6f64ac9ceb628e55e275080a45ac8cf0f5605a5e80767350c264adc92266a6
3
+ metadata.gz: '083927cb6e763a26bad61351189dd1a1c1e66d31da0a4844635ec5440f4620d0'
4
+ data.tar.gz: 94245fc7eaeb98f54669b561311d9dee4ac43788f88cef3768a33891c8b530ad
5
5
  SHA512:
6
- metadata.gz: 9a7a317190aba2237a7b98b6b2cce8ed656eeb44f7c57edd6e19644700fd2c8268c5086201d44d0186f1cabd513e365d471ff2abc4e14d0963989cac55b676f1
7
- data.tar.gz: d5dfe7b13390ef216d1f3ab614c4b6c83177d68ff4fef2545e0f34d92936f27cbd077fa44175393a487b6a398c216a51677add321521c97c4db9753e35c6bfbd
6
+ metadata.gz: f20b852dfcb4cf4249b4c4b2a72e7f58c2c2b642a11b14b33e632ee934736c721d70bcd87f7b6e20659ba5e82f05ce679b69d0e5e72f3b4c64d8187078d2b8c3
7
+ data.tar.gz: 6e99a02d77dc1ea2cf06e0903d6adebbbe73579496de092efff471369226dfb917df9bd31cee7697bc2d0fcd9392cbe6393186a6c4a747b49a6b03cee348108a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.2 (2021-11-22)
2
+
3
+ - Added `filter` option to `suggestions` method
4
+ - Improved performance
5
+
1
6
  ## 0.1.1 (2021-03-15)
2
7
 
3
8
  - Deprecated `blacklist_words` in favor of `block_words`
data/README.md CHANGED
@@ -14,16 +14,30 @@ Add this line to your application’s Gemfile:
14
14
  gem 'autosuggest'
15
15
  ```
16
16
 
17
- ## How It Works
17
+ ## Getting Started
18
18
 
19
- #### Start with the most popular queries
19
+ #### Prepare your data
20
+
21
+ Start with a hash of queries and their popularity, like the number of users who have searched it.
22
+
23
+ ```ruby
24
+ top_queries = {
25
+ "bananas" => 353,
26
+ "apples" => 213,
27
+ "oranges" => 140
28
+ }
29
+ ```
30
+
31
+ With [Searchjoy](https://github.com/ankane/searchjoy), you can do:
20
32
 
21
33
  ```ruby
22
- top_queries = Search.group("LOWER(query)")
23
- .having("COUNT(DISTINCT user_id) >= 5")
24
- .count("DISTINCT user_id")
25
- # {"bananas" => 353, "apples" => 213, ...
34
+ top_queries = Searchjoy::Search.group(:normalized_query)
35
+ .having("COUNT(DISTINCT user_id) >= 5").distinct.count(:user_id)
36
+ ```
26
37
 
38
+ Then pass them to Autosuggest.
39
+
40
+ ```ruby
27
41
  autosuggest = Autosuggest.new(top_queries)
28
42
  ```
29
43
 
@@ -76,7 +90,7 @@ autosuggest.block_words ["boom"]
76
90
  Get suggestions with:
77
91
 
78
92
  ```ruby
79
- autosuggest.suggestions
93
+ autosuggest.suggestions(filter: true)
80
94
  ```
81
95
 
82
96
  Filter queries without results and you’re set. We also prefer to have someone manually approve them by hand.
@@ -84,9 +98,8 @@ Filter queries without results and you’re set. We also prefer to have someone
84
98
  ## Full Example
85
99
 
86
100
  ```ruby
87
- top_queries = Search.group("LOWER(query)")
88
- .having("COUNT(DISTINCT user_id) >= 5")
89
- .count("DISTINCT user_id")
101
+ top_queries = Searchjoy::Search.group(:normalized_query)
102
+ .having("COUNT(DISTINCT user_id) >= 5").distinct.count(:user_id)
90
103
  product_names = Product.pluck(:name)
91
104
  brand_names = Brand.pluck(:name)
92
105
 
@@ -99,7 +112,7 @@ autosuggest.block_words ["boom"]
99
112
 
100
113
  puts autosuggest.pretty_suggestions
101
114
  # or
102
- suggestions = autosuggest.suggestions
115
+ suggestions = autosuggest.suggestions(filter: true)
103
116
  ```
104
117
 
105
118
  ## History
@@ -1,3 +1,3 @@
1
1
  class Autosuggest
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
data/lib/autosuggest.rb CHANGED
@@ -15,14 +15,18 @@ class Autosuggest
15
15
  @concepts = {}
16
16
  @words = Set.new
17
17
  @non_duplicates = Set.new
18
- @blocked_words = Set.new
19
- @blacklisted_words = Set.new
18
+ @blocked_words = {}
19
+ @blacklisted_words = {}
20
20
  @preferred_queries = {}
21
- @profane_words = Set.new(Obscenity::Base.blacklist)
21
+ @profane_words = {}
22
+ @concept_tree = {}
23
+ add_nodes(@profane_words, Obscenity::Base.blacklist)
22
24
  end
23
25
 
24
26
  def add_concept(name, values)
25
- @concepts[name] = Set.new(values.compact.uniq.map(&:downcase))
27
+ values = values.compact.uniq
28
+ add_nodes(@concept_tree, values)
29
+ @concepts[name] = Set.new(values.map(&:downcase))
26
30
  end
27
31
 
28
32
  def parse_words(phrases, options = {})
@@ -50,16 +54,14 @@ class Autosuggest
50
54
  end
51
55
 
52
56
  def block_words(words)
53
- words.each do |word|
54
- @blocked_words << word.downcase
55
- end
57
+ add_nodes(@blocked_words, words)
58
+ words
56
59
  end
57
60
 
58
61
  def blacklist_words(words)
59
62
  warn "[autosuggest] blacklist_words is deprecated. Use block_words instead."
60
- words.each do |word|
61
- @blacklisted_words << word.downcase
62
- end
63
+ add_nodes(@blacklisted_words, words)
64
+ words
63
65
  end
64
66
 
65
67
  def prefer(queries)
@@ -68,10 +70,11 @@ class Autosuggest
68
70
  end
69
71
  end
70
72
 
71
- def suggestions
73
+ # TODO add queries method for filter: false and make suggestions use filter: true in 0.2.0
74
+ def suggestions(filter: false)
72
75
  stemmed_queries = {}
73
76
  added_queries = Set.new
74
- @top_queries.sort_by { |_query, count| -count }.map do |query, count|
77
+ results = @top_queries.sort_by { |_query, count| -count }.map do |query, count|
75
78
  query = query.to_s
76
79
 
77
80
  # TODO do not ignore silently
@@ -110,12 +113,14 @@ class Autosuggest
110
113
  concepts << name if values.include?(query)
111
114
  end
112
115
 
116
+ tokens = tokenize(query)
117
+
113
118
  # exclude misspellings that are not brands
114
- misspelling = @words.any? && misspellings?(query)
119
+ misspelling = @words.any? && misspellings?(tokens)
115
120
 
116
- profane = blocked?(query, @profane_words)
117
- blocked = blocked?(query, @blocked_words)
118
- blacklisted = blocked?(query, @blacklisted_words)
121
+ profane = blocked?(tokens, @profane_words)
122
+ blocked = blocked?(tokens, @blocked_words)
123
+ blacklisted = blocked?(tokens, @blacklisted_words)
119
124
 
120
125
  notes = []
121
126
  notes << "duplicate of #{duplicate}" if duplicate
@@ -140,6 +145,10 @@ class Autosuggest
140
145
  result[:notes] = notes
141
146
  result
142
147
  end
148
+ if filter
149
+ results.reject! { |s| s[:duplicate] || s[:misspelling] || s[:profane] || s[:blocked] }
150
+ end
151
+ results
143
152
  end
144
153
 
145
154
  def pretty_suggestions
@@ -152,38 +161,41 @@ class Autosuggest
152
161
 
153
162
  protected
154
163
 
155
- def misspellings?(query)
156
- recurse(tokenize(query)).each do |terms|
157
- if terms.all? { |t| @concepts.any? { |_, values| values.include?(t) } || @words.include?(t) }
158
- return false
164
+ def misspellings?(tokens)
165
+ pos = [0]
166
+ while i = pos.shift
167
+ return false if i == tokens.size
168
+
169
+ if @words.include?(tokens[i])
170
+ pos << i + 1
171
+ end
172
+
173
+ node = @concept_tree[tokens[i]]
174
+ j = i
175
+ while node
176
+ j += 1
177
+ pos << j if node[:eos]
178
+ break if j == tokens.size
179
+ node = node[tokens[j]]
159
180
  end
160
- end
161
- true
162
- end
163
181
 
164
- def blocked?(query, blocked_words)
165
- recurse(tokenize(query)).each do |terms|
166
- return true if terms.any? { |t| blocked_words.include?(t) }
182
+ pos.uniq!
167
183
  end
168
- false
184
+ true
169
185
  end
170
186
 
171
- def recurse(words)
172
- if words.size == 1
173
- [words]
174
- else
175
- result = [[words.join(" ")]]
176
- i = 0
177
- while i < words.size - 1
178
- recurse(words[0..i]).each do |v1|
179
- recurse(words[i + 1..-1]).each do |v2|
180
- result << v1 + v2
181
- end
182
- end
183
- i += 1
187
+ def blocked?(tokens, blocked_words)
188
+ tokens.each_with_index do |token, i|
189
+ node = blocked_words[token]
190
+ j = i
191
+ while node
192
+ return true if node[:eos]
193
+ j += 1
194
+ break if j == tokens.size
195
+ node = node[tokens[j]]
184
196
  end
185
- result.uniq
186
197
  end
198
+ false
187
199
  end
188
200
 
189
201
  def tokenize(str)
@@ -206,4 +218,15 @@ class Autosuggest
206
218
  def normalize_query(query)
207
219
  tokenize(query.to_s.gsub("&", "and")).map { |q| Lingua.stemmer(q) }.sort.join
208
220
  end
221
+
222
+ def add_nodes(var, words)
223
+ words.each do |word|
224
+ node = var
225
+ tokenize(word).each do |token|
226
+ node = (node[token] ||= {})
227
+ end
228
+ node[:eos] = true
229
+ end
230
+ var
231
+ end
209
232
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: autosuggest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-15 00:00:00.000000000 Z
11
+ date: 2021-11-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-stemmer
@@ -68,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
68
68
  - !ruby/object:Gem::Version
69
69
  version: '0'
70
70
  requirements: []
71
- rubygems_version: 3.2.3
71
+ rubygems_version: 3.2.22
72
72
  signing_key:
73
73
  specification_version: 4
74
74
  summary: Generate autocomplete suggestions based on what your users search