autosuggest 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +24 -11
- data/lib/autosuggest/version.rb +1 -1
- data/lib/autosuggest.rb +64 -41
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '083927cb6e763a26bad61351189dd1a1c1e66d31da0a4844635ec5440f4620d0'
|
4
|
+
data.tar.gz: 94245fc7eaeb98f54669b561311d9dee4ac43788f88cef3768a33891c8b530ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f20b852dfcb4cf4249b4c4b2a72e7f58c2c2b642a11b14b33e632ee934736c721d70bcd87f7b6e20659ba5e82f05ce679b69d0e5e72f3b4c64d8187078d2b8c3
|
7
|
+
data.tar.gz: 6e99a02d77dc1ea2cf06e0903d6adebbbe73579496de092efff471369226dfb917df9bd31cee7697bc2d0fcd9392cbe6393186a6c4a747b49a6b03cee348108a
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -14,16 +14,30 @@ Add this line to your application’s Gemfile:
|
|
14
14
|
gem 'autosuggest'
|
15
15
|
```
|
16
16
|
|
17
|
-
##
|
17
|
+
## Getting Started
|
18
18
|
|
19
|
-
####
|
19
|
+
#### Prepare your data
|
20
|
+
|
21
|
+
Start with a hash of queries and their popularity, like the number of users who have searched it.
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
top_queries = {
|
25
|
+
"bananas" => 353,
|
26
|
+
"apples" => 213,
|
27
|
+
"oranges" => 140
|
28
|
+
}
|
29
|
+
```
|
30
|
+
|
31
|
+
With [Searchjoy](https://github.com/ankane/searchjoy), you can do:
|
20
32
|
|
21
33
|
```ruby
|
22
|
-
top_queries = Search.group(
|
23
|
-
|
24
|
-
|
25
|
-
# {"bananas" => 353, "apples" => 213, ...
|
34
|
+
top_queries = Searchjoy::Search.group(:normalized_query)
|
35
|
+
.having("COUNT(DISTINCT user_id) >= 5").distinct.count(:user_id)
|
36
|
+
```
|
26
37
|
|
38
|
+
Then pass them to Autosuggest.
|
39
|
+
|
40
|
+
```ruby
|
27
41
|
autosuggest = Autosuggest.new(top_queries)
|
28
42
|
```
|
29
43
|
|
@@ -76,7 +90,7 @@ autosuggest.block_words ["boom"]
|
|
76
90
|
Get suggestions with:
|
77
91
|
|
78
92
|
```ruby
|
79
|
-
autosuggest.suggestions
|
93
|
+
autosuggest.suggestions(filter: true)
|
80
94
|
```
|
81
95
|
|
82
96
|
Filter queries without results and you’re set. We also prefer to have someone manually approve them by hand.
|
@@ -84,9 +98,8 @@ Filter queries without results and you’re set. We also prefer to have someone
|
|
84
98
|
## Full Example
|
85
99
|
|
86
100
|
```ruby
|
87
|
-
top_queries = Search.group(
|
88
|
-
|
89
|
-
.count("DISTINCT user_id")
|
101
|
+
top_queries = Searchjoy::Search.group(:normalized_query)
|
102
|
+
.having("COUNT(DISTINCT user_id) >= 5").distinct.count(:user_id)
|
90
103
|
product_names = Product.pluck(:name)
|
91
104
|
brand_names = Brand.pluck(:name)
|
92
105
|
|
@@ -99,7 +112,7 @@ autosuggest.block_words ["boom"]
|
|
99
112
|
|
100
113
|
puts autosuggest.pretty_suggestions
|
101
114
|
# or
|
102
|
-
suggestions = autosuggest.suggestions
|
115
|
+
suggestions = autosuggest.suggestions(filter: true)
|
103
116
|
```
|
104
117
|
|
105
118
|
## History
|
data/lib/autosuggest/version.rb
CHANGED
data/lib/autosuggest.rb
CHANGED
@@ -15,14 +15,18 @@ class Autosuggest
|
|
15
15
|
@concepts = {}
|
16
16
|
@words = Set.new
|
17
17
|
@non_duplicates = Set.new
|
18
|
-
@blocked_words =
|
19
|
-
@blacklisted_words =
|
18
|
+
@blocked_words = {}
|
19
|
+
@blacklisted_words = {}
|
20
20
|
@preferred_queries = {}
|
21
|
-
@profane_words =
|
21
|
+
@profane_words = {}
|
22
|
+
@concept_tree = {}
|
23
|
+
add_nodes(@profane_words, Obscenity::Base.blacklist)
|
22
24
|
end
|
23
25
|
|
24
26
|
def add_concept(name, values)
|
25
|
-
|
27
|
+
values = values.compact.uniq
|
28
|
+
add_nodes(@concept_tree, values)
|
29
|
+
@concepts[name] = Set.new(values.map(&:downcase))
|
26
30
|
end
|
27
31
|
|
28
32
|
def parse_words(phrases, options = {})
|
@@ -50,16 +54,14 @@ class Autosuggest
|
|
50
54
|
end
|
51
55
|
|
52
56
|
def block_words(words)
|
53
|
-
words
|
54
|
-
|
55
|
-
end
|
57
|
+
add_nodes(@blocked_words, words)
|
58
|
+
words
|
56
59
|
end
|
57
60
|
|
58
61
|
def blacklist_words(words)
|
59
62
|
warn "[autosuggest] blacklist_words is deprecated. Use block_words instead."
|
60
|
-
words
|
61
|
-
|
62
|
-
end
|
63
|
+
add_nodes(@blacklisted_words, words)
|
64
|
+
words
|
63
65
|
end
|
64
66
|
|
65
67
|
def prefer(queries)
|
@@ -68,10 +70,11 @@ class Autosuggest
|
|
68
70
|
end
|
69
71
|
end
|
70
72
|
|
71
|
-
|
73
|
+
# TODO add queries method for filter: false and make suggestions use filter: true in 0.2.0
|
74
|
+
def suggestions(filter: false)
|
72
75
|
stemmed_queries = {}
|
73
76
|
added_queries = Set.new
|
74
|
-
@top_queries.sort_by { |_query, count| -count }.map do |query, count|
|
77
|
+
results = @top_queries.sort_by { |_query, count| -count }.map do |query, count|
|
75
78
|
query = query.to_s
|
76
79
|
|
77
80
|
# TODO do not ignore silently
|
@@ -110,12 +113,14 @@ class Autosuggest
|
|
110
113
|
concepts << name if values.include?(query)
|
111
114
|
end
|
112
115
|
|
116
|
+
tokens = tokenize(query)
|
117
|
+
|
113
118
|
# exclude misspellings that are not brands
|
114
|
-
misspelling = @words.any? && misspellings?(
|
119
|
+
misspelling = @words.any? && misspellings?(tokens)
|
115
120
|
|
116
|
-
profane = blocked?(
|
117
|
-
blocked = blocked?(
|
118
|
-
blacklisted = blocked?(
|
121
|
+
profane = blocked?(tokens, @profane_words)
|
122
|
+
blocked = blocked?(tokens, @blocked_words)
|
123
|
+
blacklisted = blocked?(tokens, @blacklisted_words)
|
119
124
|
|
120
125
|
notes = []
|
121
126
|
notes << "duplicate of #{duplicate}" if duplicate
|
@@ -140,6 +145,10 @@ class Autosuggest
|
|
140
145
|
result[:notes] = notes
|
141
146
|
result
|
142
147
|
end
|
148
|
+
if filter
|
149
|
+
results.reject! { |s| s[:duplicate] || s[:misspelling] || s[:profane] || s[:blocked] }
|
150
|
+
end
|
151
|
+
results
|
143
152
|
end
|
144
153
|
|
145
154
|
def pretty_suggestions
|
@@ -152,38 +161,41 @@ class Autosuggest
|
|
152
161
|
|
153
162
|
protected
|
154
163
|
|
155
|
-
def misspellings?(
|
156
|
-
|
157
|
-
|
158
|
-
|
164
|
+
def misspellings?(tokens)
|
165
|
+
pos = [0]
|
166
|
+
while i = pos.shift
|
167
|
+
return false if i == tokens.size
|
168
|
+
|
169
|
+
if @words.include?(tokens[i])
|
170
|
+
pos << i + 1
|
171
|
+
end
|
172
|
+
|
173
|
+
node = @concept_tree[tokens[i]]
|
174
|
+
j = i
|
175
|
+
while node
|
176
|
+
j += 1
|
177
|
+
pos << j if node[:eos]
|
178
|
+
break if j == tokens.size
|
179
|
+
node = node[tokens[j]]
|
159
180
|
end
|
160
|
-
end
|
161
|
-
true
|
162
|
-
end
|
163
181
|
|
164
|
-
|
165
|
-
recurse(tokenize(query)).each do |terms|
|
166
|
-
return true if terms.any? { |t| blocked_words.include?(t) }
|
182
|
+
pos.uniq!
|
167
183
|
end
|
168
|
-
|
184
|
+
true
|
169
185
|
end
|
170
186
|
|
171
|
-
def
|
172
|
-
|
173
|
-
[
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
result << v1 + v2
|
181
|
-
end
|
182
|
-
end
|
183
|
-
i += 1
|
187
|
+
def blocked?(tokens, blocked_words)
|
188
|
+
tokens.each_with_index do |token, i|
|
189
|
+
node = blocked_words[token]
|
190
|
+
j = i
|
191
|
+
while node
|
192
|
+
return true if node[:eos]
|
193
|
+
j += 1
|
194
|
+
break if j == tokens.size
|
195
|
+
node = node[tokens[j]]
|
184
196
|
end
|
185
|
-
result.uniq
|
186
197
|
end
|
198
|
+
false
|
187
199
|
end
|
188
200
|
|
189
201
|
def tokenize(str)
|
@@ -206,4 +218,15 @@ class Autosuggest
|
|
206
218
|
def normalize_query(query)
|
207
219
|
tokenize(query.to_s.gsub("&", "and")).map { |q| Lingua.stemmer(q) }.sort.join
|
208
220
|
end
|
221
|
+
|
222
|
+
def add_nodes(var, words)
|
223
|
+
words.each do |word|
|
224
|
+
node = var
|
225
|
+
tokenize(word).each do |token|
|
226
|
+
node = (node[token] ||= {})
|
227
|
+
end
|
228
|
+
node[:eos] = true
|
229
|
+
end
|
230
|
+
var
|
231
|
+
end
|
209
232
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: autosuggest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-11-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-stemmer
|
@@ -68,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '0'
|
70
70
|
requirements: []
|
71
|
-
rubygems_version: 3.2.
|
71
|
+
rubygems_version: 3.2.22
|
72
72
|
signing_key:
|
73
73
|
specification_version: 4
|
74
74
|
summary: Generate autocomplete suggestions based on what your users search
|