autosuggest 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bda23aeaee3abc15c45b52ddc2d64943de44a164f11d3a33de234171126b209c
4
- data.tar.gz: de6f64ac9ceb628e55e275080a45ac8cf0f5605a5e80767350c264adc92266a6
3
+ metadata.gz: '083927cb6e763a26bad61351189dd1a1c1e66d31da0a4844635ec5440f4620d0'
4
+ data.tar.gz: 94245fc7eaeb98f54669b561311d9dee4ac43788f88cef3768a33891c8b530ad
5
5
  SHA512:
6
- metadata.gz: 9a7a317190aba2237a7b98b6b2cce8ed656eeb44f7c57edd6e19644700fd2c8268c5086201d44d0186f1cabd513e365d471ff2abc4e14d0963989cac55b676f1
7
- data.tar.gz: d5dfe7b13390ef216d1f3ab614c4b6c83177d68ff4fef2545e0f34d92936f27cbd077fa44175393a487b6a398c216a51677add321521c97c4db9753e35c6bfbd
6
+ metadata.gz: f20b852dfcb4cf4249b4c4b2a72e7f58c2c2b642a11b14b33e632ee934736c721d70bcd87f7b6e20659ba5e82f05ce679b69d0e5e72f3b4c64d8187078d2b8c3
7
+ data.tar.gz: 6e99a02d77dc1ea2cf06e0903d6adebbbe73579496de092efff471369226dfb917df9bd31cee7697bc2d0fcd9392cbe6393186a6c4a747b49a6b03cee348108a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.2 (2021-11-22)
2
+
3
+ - Added `filter` option to `suggestions` method
4
+ - Improved performance
5
+
1
6
  ## 0.1.1 (2021-03-15)
2
7
 
3
8
  - Deprecated `blacklist_words` in favor of `block_words`
data/README.md CHANGED
@@ -14,16 +14,30 @@ Add this line to your application’s Gemfile:
14
14
  gem 'autosuggest'
15
15
  ```
16
16
 
17
- ## How It Works
17
+ ## Getting Started
18
18
 
19
- #### Start with the most popular queries
19
+ #### Prepare your data
20
+
21
+ Start with a hash of queries and their popularity, like the number of users who have searched it.
22
+
23
+ ```ruby
24
+ top_queries = {
25
+ "bananas" => 353,
26
+ "apples" => 213,
27
+ "oranges" => 140
28
+ }
29
+ ```
30
+
31
+ With [Searchjoy](https://github.com/ankane/searchjoy), you can do:
20
32
 
21
33
  ```ruby
22
- top_queries = Search.group("LOWER(query)")
23
- .having("COUNT(DISTINCT user_id) >= 5")
24
- .count("DISTINCT user_id")
25
- # {"bananas" => 353, "apples" => 213, ...
34
+ top_queries = Searchjoy::Search.group(:normalized_query)
35
+ .having("COUNT(DISTINCT user_id) >= 5").distinct.count(:user_id)
36
+ ```
26
37
 
38
+ Then pass them to Autosuggest.
39
+
40
+ ```ruby
27
41
  autosuggest = Autosuggest.new(top_queries)
28
42
  ```
29
43
 
@@ -76,7 +90,7 @@ autosuggest.block_words ["boom"]
76
90
  Get suggestions with:
77
91
 
78
92
  ```ruby
79
- autosuggest.suggestions
93
+ autosuggest.suggestions(filter: true)
80
94
  ```
81
95
 
82
96
  Filter queries without results and you’re set. We also prefer to have someone manually approve them by hand.
@@ -84,9 +98,8 @@ Filter queries without results and you’re set. We also prefer to have someone
84
98
  ## Full Example
85
99
 
86
100
  ```ruby
87
- top_queries = Search.group("LOWER(query)")
88
- .having("COUNT(DISTINCT user_id) >= 5")
89
- .count("DISTINCT user_id")
101
+ top_queries = Searchjoy::Search.group(:normalized_query)
102
+ .having("COUNT(DISTINCT user_id) >= 5").distinct.count(:user_id)
90
103
  product_names = Product.pluck(:name)
91
104
  brand_names = Brand.pluck(:name)
92
105
 
@@ -99,7 +112,7 @@ autosuggest.block_words ["boom"]
99
112
 
100
113
  puts autosuggest.pretty_suggestions
101
114
  # or
102
- suggestions = autosuggest.suggestions
115
+ suggestions = autosuggest.suggestions(filter: true)
103
116
  ```
104
117
 
105
118
  ## History
@@ -1,3 +1,3 @@
1
1
  class Autosuggest
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
data/lib/autosuggest.rb CHANGED
@@ -15,14 +15,18 @@ class Autosuggest
15
15
  @concepts = {}
16
16
  @words = Set.new
17
17
  @non_duplicates = Set.new
18
- @blocked_words = Set.new
19
- @blacklisted_words = Set.new
18
+ @blocked_words = {}
19
+ @blacklisted_words = {}
20
20
  @preferred_queries = {}
21
- @profane_words = Set.new(Obscenity::Base.blacklist)
21
+ @profane_words = {}
22
+ @concept_tree = {}
23
+ add_nodes(@profane_words, Obscenity::Base.blacklist)
22
24
  end
23
25
 
24
26
  def add_concept(name, values)
25
- @concepts[name] = Set.new(values.compact.uniq.map(&:downcase))
27
+ values = values.compact.uniq
28
+ add_nodes(@concept_tree, values)
29
+ @concepts[name] = Set.new(values.map(&:downcase))
26
30
  end
27
31
 
28
32
  def parse_words(phrases, options = {})
@@ -50,16 +54,14 @@ class Autosuggest
50
54
  end
51
55
 
52
56
  def block_words(words)
53
- words.each do |word|
54
- @blocked_words << word.downcase
55
- end
57
+ add_nodes(@blocked_words, words)
58
+ words
56
59
  end
57
60
 
58
61
  def blacklist_words(words)
59
62
  warn "[autosuggest] blacklist_words is deprecated. Use block_words instead."
60
- words.each do |word|
61
- @blacklisted_words << word.downcase
62
- end
63
+ add_nodes(@blacklisted_words, words)
64
+ words
63
65
  end
64
66
 
65
67
  def prefer(queries)
@@ -68,10 +70,11 @@ class Autosuggest
68
70
  end
69
71
  end
70
72
 
71
- def suggestions
73
+ # TODO add queries method for filter: false and make suggestions use filter: true in 0.2.0
74
+ def suggestions(filter: false)
72
75
  stemmed_queries = {}
73
76
  added_queries = Set.new
74
- @top_queries.sort_by { |_query, count| -count }.map do |query, count|
77
+ results = @top_queries.sort_by { |_query, count| -count }.map do |query, count|
75
78
  query = query.to_s
76
79
 
77
80
  # TODO do not ignore silently
@@ -110,12 +113,14 @@ class Autosuggest
110
113
  concepts << name if values.include?(query)
111
114
  end
112
115
 
116
+ tokens = tokenize(query)
117
+
113
118
  # exclude misspellings that are not brands
114
- misspelling = @words.any? && misspellings?(query)
119
+ misspelling = @words.any? && misspellings?(tokens)
115
120
 
116
- profane = blocked?(query, @profane_words)
117
- blocked = blocked?(query, @blocked_words)
118
- blacklisted = blocked?(query, @blacklisted_words)
121
+ profane = blocked?(tokens, @profane_words)
122
+ blocked = blocked?(tokens, @blocked_words)
123
+ blacklisted = blocked?(tokens, @blacklisted_words)
119
124
 
120
125
  notes = []
121
126
  notes << "duplicate of #{duplicate}" if duplicate
@@ -140,6 +145,10 @@ class Autosuggest
140
145
  result[:notes] = notes
141
146
  result
142
147
  end
148
+ if filter
149
+ results.reject! { |s| s[:duplicate] || s[:misspelling] || s[:profane] || s[:blocked] }
150
+ end
151
+ results
143
152
  end
144
153
 
145
154
  def pretty_suggestions
@@ -152,38 +161,41 @@ class Autosuggest
152
161
 
153
162
  protected
154
163
 
155
- def misspellings?(query)
156
- recurse(tokenize(query)).each do |terms|
157
- if terms.all? { |t| @concepts.any? { |_, values| values.include?(t) } || @words.include?(t) }
158
- return false
164
+ def misspellings?(tokens)
165
+ pos = [0]
166
+ while i = pos.shift
167
+ return false if i == tokens.size
168
+
169
+ if @words.include?(tokens[i])
170
+ pos << i + 1
171
+ end
172
+
173
+ node = @concept_tree[tokens[i]]
174
+ j = i
175
+ while node
176
+ j += 1
177
+ pos << j if node[:eos]
178
+ break if j == tokens.size
179
+ node = node[tokens[j]]
159
180
  end
160
- end
161
- true
162
- end
163
181
 
164
- def blocked?(query, blocked_words)
165
- recurse(tokenize(query)).each do |terms|
166
- return true if terms.any? { |t| blocked_words.include?(t) }
182
+ pos.uniq!
167
183
  end
168
- false
184
+ true
169
185
  end
170
186
 
171
- def recurse(words)
172
- if words.size == 1
173
- [words]
174
- else
175
- result = [[words.join(" ")]]
176
- i = 0
177
- while i < words.size - 1
178
- recurse(words[0..i]).each do |v1|
179
- recurse(words[i + 1..-1]).each do |v2|
180
- result << v1 + v2
181
- end
182
- end
183
- i += 1
187
+ def blocked?(tokens, blocked_words)
188
+ tokens.each_with_index do |token, i|
189
+ node = blocked_words[token]
190
+ j = i
191
+ while node
192
+ return true if node[:eos]
193
+ j += 1
194
+ break if j == tokens.size
195
+ node = node[tokens[j]]
184
196
  end
185
- result.uniq
186
197
  end
198
+ false
187
199
  end
188
200
 
189
201
  def tokenize(str)
@@ -206,4 +218,15 @@ class Autosuggest
206
218
  def normalize_query(query)
207
219
  tokenize(query.to_s.gsub("&", "and")).map { |q| Lingua.stemmer(q) }.sort.join
208
220
  end
221
+
222
+ def add_nodes(var, words)
223
+ words.each do |word|
224
+ node = var
225
+ tokenize(word).each do |token|
226
+ node = (node[token] ||= {})
227
+ end
228
+ node[:eos] = true
229
+ end
230
+ var
231
+ end
209
232
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: autosuggest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-15 00:00:00.000000000 Z
11
+ date: 2021-11-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-stemmer
@@ -68,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
68
68
  - !ruby/object:Gem::Version
69
69
  version: '0'
70
70
  requirements: []
71
- rubygems_version: 3.2.3
71
+ rubygems_version: 3.2.22
72
72
  signing_key:
73
73
  specification_version: 4
74
74
  summary: Generate autocomplete suggestions based on what your users search