autosuggest 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +24 -11
- data/lib/autosuggest/version.rb +1 -1
- data/lib/autosuggest.rb +64 -41
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '083927cb6e763a26bad61351189dd1a1c1e66d31da0a4844635ec5440f4620d0'
|
4
|
+
data.tar.gz: 94245fc7eaeb98f54669b561311d9dee4ac43788f88cef3768a33891c8b530ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f20b852dfcb4cf4249b4c4b2a72e7f58c2c2b642a11b14b33e632ee934736c721d70bcd87f7b6e20659ba5e82f05ce679b69d0e5e72f3b4c64d8187078d2b8c3
|
7
|
+
data.tar.gz: 6e99a02d77dc1ea2cf06e0903d6adebbbe73579496de092efff471369226dfb917df9bd31cee7697bc2d0fcd9392cbe6393186a6c4a747b49a6b03cee348108a
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -14,16 +14,30 @@ Add this line to your application’s Gemfile:
|
|
14
14
|
gem 'autosuggest'
|
15
15
|
```
|
16
16
|
|
17
|
-
##
|
17
|
+
## Getting Started
|
18
18
|
|
19
|
-
####
|
19
|
+
#### Prepare your data
|
20
|
+
|
21
|
+
Start with a hash of queries and their popularity, like the number of users who have searched it.
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
top_queries = {
|
25
|
+
"bananas" => 353,
|
26
|
+
"apples" => 213,
|
27
|
+
"oranges" => 140
|
28
|
+
}
|
29
|
+
```
|
30
|
+
|
31
|
+
With [Searchjoy](https://github.com/ankane/searchjoy), you can do:
|
20
32
|
|
21
33
|
```ruby
|
22
|
-
top_queries = Search.group(
|
23
|
-
|
24
|
-
|
25
|
-
# {"bananas" => 353, "apples" => 213, ...
|
34
|
+
top_queries = Searchjoy::Search.group(:normalized_query)
|
35
|
+
.having("COUNT(DISTINCT user_id) >= 5").distinct.count(:user_id)
|
36
|
+
```
|
26
37
|
|
38
|
+
Then pass them to Autosuggest.
|
39
|
+
|
40
|
+
```ruby
|
27
41
|
autosuggest = Autosuggest.new(top_queries)
|
28
42
|
```
|
29
43
|
|
@@ -76,7 +90,7 @@ autosuggest.block_words ["boom"]
|
|
76
90
|
Get suggestions with:
|
77
91
|
|
78
92
|
```ruby
|
79
|
-
autosuggest.suggestions
|
93
|
+
autosuggest.suggestions(filter: true)
|
80
94
|
```
|
81
95
|
|
82
96
|
Filter queries without results and you’re set. We also prefer to have someone manually approve them by hand.
|
@@ -84,9 +98,8 @@ Filter queries without results and you’re set. We also prefer to have someone
|
|
84
98
|
## Full Example
|
85
99
|
|
86
100
|
```ruby
|
87
|
-
top_queries = Search.group(
|
88
|
-
|
89
|
-
.count("DISTINCT user_id")
|
101
|
+
top_queries = Searchjoy::Search.group(:normalized_query)
|
102
|
+
.having("COUNT(DISTINCT user_id) >= 5").distinct.count(:user_id)
|
90
103
|
product_names = Product.pluck(:name)
|
91
104
|
brand_names = Brand.pluck(:name)
|
92
105
|
|
@@ -99,7 +112,7 @@ autosuggest.block_words ["boom"]
|
|
99
112
|
|
100
113
|
puts autosuggest.pretty_suggestions
|
101
114
|
# or
|
102
|
-
suggestions = autosuggest.suggestions
|
115
|
+
suggestions = autosuggest.suggestions(filter: true)
|
103
116
|
```
|
104
117
|
|
105
118
|
## History
|
data/lib/autosuggest/version.rb
CHANGED
data/lib/autosuggest.rb
CHANGED
@@ -15,14 +15,18 @@ class Autosuggest
|
|
15
15
|
@concepts = {}
|
16
16
|
@words = Set.new
|
17
17
|
@non_duplicates = Set.new
|
18
|
-
@blocked_words =
|
19
|
-
@blacklisted_words =
|
18
|
+
@blocked_words = {}
|
19
|
+
@blacklisted_words = {}
|
20
20
|
@preferred_queries = {}
|
21
|
-
@profane_words =
|
21
|
+
@profane_words = {}
|
22
|
+
@concept_tree = {}
|
23
|
+
add_nodes(@profane_words, Obscenity::Base.blacklist)
|
22
24
|
end
|
23
25
|
|
24
26
|
def add_concept(name, values)
|
25
|
-
|
27
|
+
values = values.compact.uniq
|
28
|
+
add_nodes(@concept_tree, values)
|
29
|
+
@concepts[name] = Set.new(values.map(&:downcase))
|
26
30
|
end
|
27
31
|
|
28
32
|
def parse_words(phrases, options = {})
|
@@ -50,16 +54,14 @@ class Autosuggest
|
|
50
54
|
end
|
51
55
|
|
52
56
|
def block_words(words)
|
53
|
-
words
|
54
|
-
|
55
|
-
end
|
57
|
+
add_nodes(@blocked_words, words)
|
58
|
+
words
|
56
59
|
end
|
57
60
|
|
58
61
|
def blacklist_words(words)
|
59
62
|
warn "[autosuggest] blacklist_words is deprecated. Use block_words instead."
|
60
|
-
words
|
61
|
-
|
62
|
-
end
|
63
|
+
add_nodes(@blacklisted_words, words)
|
64
|
+
words
|
63
65
|
end
|
64
66
|
|
65
67
|
def prefer(queries)
|
@@ -68,10 +70,11 @@ class Autosuggest
|
|
68
70
|
end
|
69
71
|
end
|
70
72
|
|
71
|
-
|
73
|
+
# TODO add queries method for filter: false and make suggestions use filter: true in 0.2.0
|
74
|
+
def suggestions(filter: false)
|
72
75
|
stemmed_queries = {}
|
73
76
|
added_queries = Set.new
|
74
|
-
@top_queries.sort_by { |_query, count| -count }.map do |query, count|
|
77
|
+
results = @top_queries.sort_by { |_query, count| -count }.map do |query, count|
|
75
78
|
query = query.to_s
|
76
79
|
|
77
80
|
# TODO do not ignore silently
|
@@ -110,12 +113,14 @@ class Autosuggest
|
|
110
113
|
concepts << name if values.include?(query)
|
111
114
|
end
|
112
115
|
|
116
|
+
tokens = tokenize(query)
|
117
|
+
|
113
118
|
# exclude misspellings that are not brands
|
114
|
-
misspelling = @words.any? && misspellings?(
|
119
|
+
misspelling = @words.any? && misspellings?(tokens)
|
115
120
|
|
116
|
-
profane = blocked?(
|
117
|
-
blocked = blocked?(
|
118
|
-
blacklisted = blocked?(
|
121
|
+
profane = blocked?(tokens, @profane_words)
|
122
|
+
blocked = blocked?(tokens, @blocked_words)
|
123
|
+
blacklisted = blocked?(tokens, @blacklisted_words)
|
119
124
|
|
120
125
|
notes = []
|
121
126
|
notes << "duplicate of #{duplicate}" if duplicate
|
@@ -140,6 +145,10 @@ class Autosuggest
|
|
140
145
|
result[:notes] = notes
|
141
146
|
result
|
142
147
|
end
|
148
|
+
if filter
|
149
|
+
results.reject! { |s| s[:duplicate] || s[:misspelling] || s[:profane] || s[:blocked] }
|
150
|
+
end
|
151
|
+
results
|
143
152
|
end
|
144
153
|
|
145
154
|
def pretty_suggestions
|
@@ -152,38 +161,41 @@ class Autosuggest
|
|
152
161
|
|
153
162
|
protected
|
154
163
|
|
155
|
-
def misspellings?(
|
156
|
-
|
157
|
-
|
158
|
-
|
164
|
+
def misspellings?(tokens)
|
165
|
+
pos = [0]
|
166
|
+
while i = pos.shift
|
167
|
+
return false if i == tokens.size
|
168
|
+
|
169
|
+
if @words.include?(tokens[i])
|
170
|
+
pos << i + 1
|
171
|
+
end
|
172
|
+
|
173
|
+
node = @concept_tree[tokens[i]]
|
174
|
+
j = i
|
175
|
+
while node
|
176
|
+
j += 1
|
177
|
+
pos << j if node[:eos]
|
178
|
+
break if j == tokens.size
|
179
|
+
node = node[tokens[j]]
|
159
180
|
end
|
160
|
-
end
|
161
|
-
true
|
162
|
-
end
|
163
181
|
|
164
|
-
|
165
|
-
recurse(tokenize(query)).each do |terms|
|
166
|
-
return true if terms.any? { |t| blocked_words.include?(t) }
|
182
|
+
pos.uniq!
|
167
183
|
end
|
168
|
-
|
184
|
+
true
|
169
185
|
end
|
170
186
|
|
171
|
-
def
|
172
|
-
|
173
|
-
[
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
result << v1 + v2
|
181
|
-
end
|
182
|
-
end
|
183
|
-
i += 1
|
187
|
+
def blocked?(tokens, blocked_words)
|
188
|
+
tokens.each_with_index do |token, i|
|
189
|
+
node = blocked_words[token]
|
190
|
+
j = i
|
191
|
+
while node
|
192
|
+
return true if node[:eos]
|
193
|
+
j += 1
|
194
|
+
break if j == tokens.size
|
195
|
+
node = node[tokens[j]]
|
184
196
|
end
|
185
|
-
result.uniq
|
186
197
|
end
|
198
|
+
false
|
187
199
|
end
|
188
200
|
|
189
201
|
def tokenize(str)
|
@@ -206,4 +218,15 @@ class Autosuggest
|
|
206
218
|
def normalize_query(query)
|
207
219
|
tokenize(query.to_s.gsub("&", "and")).map { |q| Lingua.stemmer(q) }.sort.join
|
208
220
|
end
|
221
|
+
|
222
|
+
def add_nodes(var, words)
|
223
|
+
words.each do |word|
|
224
|
+
node = var
|
225
|
+
tokenize(word).each do |token|
|
226
|
+
node = (node[token] ||= {})
|
227
|
+
end
|
228
|
+
node[:eos] = true
|
229
|
+
end
|
230
|
+
var
|
231
|
+
end
|
209
232
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: autosuggest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-11-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-stemmer
|
@@ -68,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '0'
|
70
70
|
requirements: []
|
71
|
-
rubygems_version: 3.2.
|
71
|
+
rubygems_version: 3.2.22
|
72
72
|
signing_key:
|
73
73
|
specification_version: 4
|
74
74
|
summary: Generate autocomplete suggestions based on what your users search
|