autosuggest 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/LICENSE.txt +1 -1
- data/README.md +108 -12
- data/lib/autosuggest/generator.rb +226 -0
- data/lib/autosuggest/version.rb +2 -2
- data/lib/autosuggest.rb +5 -220
- data/lib/generators/autosuggest/suggestions_generator.rb +33 -0
- data/lib/generators/autosuggest/templates/migration.rb.tt +11 -0
- data/lib/generators/autosuggest/templates/model.rb.tt +3 -0
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe418b5cfaa006d454a8e061ae08b63821de147677175054be5e28ad254399c6
|
4
|
+
data.tar.gz: 10c28b44de11f53fccd66a3e6f547a7f97282e23b529452a1db827b3d2dd0624
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 236b65f1939693fd076445ebddff62535d1d1197e44561ade442e2c381f2d3c4bd06572daacc29718dbfb087abc1cf3c001d0152f21c9198d059d8c1d933985c
|
7
|
+
data.tar.gz: 1ca979179a176b6a7eca3ee8b9593e6479680ea4706e8ad1c004d8c88efe0a1f1a82b779dee050cb1aa3c6af749323a87fc9f93c9c0c4d8eeb530a9e6a9f11cd
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
## 0.2.0 (2023-01-29)
|
2
|
+
|
3
|
+
- Added `language` option
|
4
|
+
- Changed `suggestions` method to filter by default
|
5
|
+
- Changed `filter: true` to only return query and score
|
6
|
+
- Removed `blacklist_words` method
|
7
|
+
- Dropped support for Ruby < 2.7
|
8
|
+
|
9
|
+
## 0.1.3 (2021-11-23)
|
10
|
+
|
11
|
+
- Added model generator
|
12
|
+
|
1
13
|
## 0.1.2 (2021-11-22)
|
2
14
|
|
3
15
|
- Added `filter` option to `suggestions` method
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -4,6 +4,8 @@ Generate autocomplete suggestions based on what your users search
|
|
4
4
|
|
5
5
|
:tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
|
6
6
|
|
7
|
+
Autosuggest 0.2 was recently released! See [how to upgrade](#upgrading)
|
8
|
+
|
7
9
|
[![Build Status](https://github.com/ankane/autosuggest/workflows/build/badge.svg?branch=master)](https://github.com/ankane/autosuggest/actions)
|
8
10
|
|
9
11
|
## Installation
|
@@ -11,7 +13,7 @@ Generate autocomplete suggestions based on what your users search
|
|
11
13
|
Add this line to your application’s Gemfile:
|
12
14
|
|
13
15
|
```ruby
|
14
|
-
gem
|
16
|
+
gem "autosuggest"
|
15
17
|
```
|
16
18
|
|
17
19
|
## Getting Started
|
@@ -38,14 +40,20 @@ top_queries = Searchjoy::Search.group(:normalized_query)
|
|
38
40
|
Then pass them to Autosuggest.
|
39
41
|
|
40
42
|
```ruby
|
41
|
-
autosuggest = Autosuggest.new(top_queries)
|
43
|
+
autosuggest = Autosuggest::Generator.new(top_queries)
|
42
44
|
```
|
43
45
|
|
44
46
|
#### Filter duplicates
|
45
47
|
|
46
48
|
[Stemming](https://en.wikipedia.org/wiki/Stemming) is used to detect duplicates like `apple` and `apples`.
|
47
49
|
|
48
|
-
|
50
|
+
Specify the stemming language (defaults to `english`) with:
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
autosuggest = Autosuggest::Generator.new(top_queries, language: "spanish")
|
54
|
+
```
|
55
|
+
|
56
|
+
The most popular query is preferred by default. To override this, use:
|
49
57
|
|
50
58
|
```ruby
|
51
59
|
autosuggest.prefer ["apples"]
|
@@ -85,17 +93,90 @@ There are two ways to build the corpus, which can be used together.
|
|
85
93
|
autosuggest.block_words ["boom"]
|
86
94
|
```
|
87
95
|
|
88
|
-
####
|
96
|
+
#### Generate suggestions
|
97
|
+
|
98
|
+
Generate suggestions with:
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
suggestions = autosuggest.suggestions
|
102
|
+
```
|
103
|
+
|
104
|
+
#### Save suggestions
|
105
|
+
|
106
|
+
Save suggestions in your database or another data store.
|
107
|
+
|
108
|
+
With Rails, you can generate a simple model with:
|
109
|
+
|
110
|
+
```sh
|
111
|
+
rails generate autosuggest:suggestions
|
112
|
+
rails db:migrate
|
113
|
+
```
|
114
|
+
|
115
|
+
And update suggestions with:
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
now = Time.now
|
119
|
+
records = suggestions.map { |s| s.slice(:query, :score).merge(updated_at: now) }
|
120
|
+
Autosuggest::Suggestion.transaction do
|
121
|
+
Autosuggest::Suggestion.upsert_all(records, unique_by: :query)
|
122
|
+
Autosuggest::Suggestion.where("updated_at < ?", now).delete_all
|
123
|
+
end
|
124
|
+
```
|
125
|
+
|
126
|
+
Leave out `unique_by` for MySQL, and use [activerecord-import](https://github.com/zdennis/activerecord-import) for upserts with Rails < 6.
|
127
|
+
|
128
|
+
#### Show suggestions
|
129
|
+
|
130
|
+
Use a JavaScript autocomplete library like [typeahead.js](https://github.com/twitter/typeahead.js) to show suggestions in the UI.
|
131
|
+
|
132
|
+
If you only have a few thousand suggestions, it’s much faster to load them all at once instead of as a user types (eliminates network requests).
|
133
|
+
|
134
|
+
With Rails, you can load all suggestions with:
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
Autosuggest::Suggestion.order(score: :desc).pluck(:query)
|
138
|
+
```
|
139
|
+
|
140
|
+
And suggestions matching user input with:
|
141
|
+
|
142
|
+
```ruby
|
143
|
+
input = params[:query]
|
144
|
+
Autosuggest::Suggestion
|
145
|
+
.order(score: :desc)
|
146
|
+
.where("query LIKE ?", "%#{Autosuggest::Suggestion.sanitize_sql_like(input.downcase)}%")
|
147
|
+
.pluck(:query)
|
148
|
+
```
|
149
|
+
|
150
|
+
You can also cache suggestions for performance.
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
Rails.cache.fetch("suggestions", expires_in: 5.minutes) do
|
154
|
+
Autosuggest::Suggestion.order(score: :desc).pluck(:query)
|
155
|
+
end
|
156
|
+
```
|
157
|
+
|
158
|
+
#### Additional considerations
|
89
159
|
|
90
|
-
|
160
|
+
You may want to have someone manually approve suggestions:
|
91
161
|
|
92
162
|
```ruby
|
93
|
-
|
163
|
+
Autosuggest::Suggestion.where(status: "approved")
|
94
164
|
```
|
95
165
|
|
96
|
-
|
166
|
+
Or filter suggestions without results:
|
97
167
|
|
98
|
-
|
168
|
+
```ruby
|
169
|
+
Autosuggest::Suggestion.find_each do |suggestion|
|
170
|
+
suggestion.results_count = Product.search(suggestion.query, load: false).count
|
171
|
+
suggestion.save! if suggestion.changed?
|
172
|
+
end
|
173
|
+
|
174
|
+
Autosuggest::Suggestion.where("results_count > 0")
|
175
|
+
```
|
176
|
+
|
177
|
+
You can add additional fields to your model/data store to accomplish this.
|
178
|
+
|
179
|
+
## Example
|
99
180
|
|
100
181
|
```ruby
|
101
182
|
top_queries = Searchjoy::Search.group(:normalized_query)
|
@@ -103,16 +184,31 @@ top_queries = Searchjoy::Search.group(:normalized_query)
|
|
103
184
|
product_names = Product.pluck(:name)
|
104
185
|
brand_names = Brand.pluck(:name)
|
105
186
|
|
106
|
-
autosuggest = Autosuggest.new(top_queries)
|
187
|
+
autosuggest = Autosuggest::Generator.new(top_queries)
|
107
188
|
autosuggest.parse_words product_names
|
108
189
|
autosuggest.add_concept "brand", brand_names
|
109
190
|
autosuggest.prefer brand_names
|
110
191
|
autosuggest.not_duplicates [["straws", "straus"]]
|
111
192
|
autosuggest.block_words ["boom"]
|
112
193
|
|
113
|
-
|
114
|
-
|
115
|
-
|
194
|
+
suggestions = autosuggest.suggestions
|
195
|
+
|
196
|
+
now = Time.now
|
197
|
+
records = suggestions.map { |s| s.slice(:query, :score).merge(updated_at: now) }
|
198
|
+
Autosuggest::Suggestion.transaction do
|
199
|
+
Autosuggest::Suggestion.upsert_all(records, unique_by: :query)
|
200
|
+
Autosuggest::Suggestion.where("updated_at < ?", now).delete_all
|
201
|
+
end
|
202
|
+
```
|
203
|
+
|
204
|
+
## Upgrading
|
205
|
+
|
206
|
+
### 0.2.0
|
207
|
+
|
208
|
+
Suggestions are now filtered by default, and only the query and score are returned. To get all queries and fields, use:
|
209
|
+
|
210
|
+
```ruby
|
211
|
+
autosuggest.suggestions(filter: false)
|
116
212
|
```
|
117
213
|
|
118
214
|
## History
|
@@ -0,0 +1,226 @@
|
|
1
|
+
module Autosuggest
|
2
|
+
class Generator
|
3
|
+
def initialize(top_queries, language: "english")
|
4
|
+
@top_queries = top_queries
|
5
|
+
@concepts = {}
|
6
|
+
@words = Set.new
|
7
|
+
@non_duplicates = Set.new
|
8
|
+
@blocked_words = {}
|
9
|
+
@preferred_queries = {}
|
10
|
+
@profane_words = {}
|
11
|
+
@concept_tree = {}
|
12
|
+
begin
|
13
|
+
@stemmer = Lingua::Stemmer.new(language: language)
|
14
|
+
rescue Lingua::StemmerError
|
15
|
+
raise ArgumentError, "Language not available"
|
16
|
+
end
|
17
|
+
# TODO take language into account for profanity
|
18
|
+
add_nodes(@profane_words, Obscenity::Base.blacklist)
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_concept(name, values)
|
22
|
+
values = values.compact.uniq
|
23
|
+
add_nodes(@concept_tree, values)
|
24
|
+
@concepts[name] = Set.new(values.map(&:downcase))
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_words(phrases, options = {})
|
28
|
+
min = options[:min] || 1
|
29
|
+
|
30
|
+
word_counts = Hash.new(0)
|
31
|
+
phrases.each do |phrase|
|
32
|
+
words = tokenize(phrase)
|
33
|
+
words.each do |word|
|
34
|
+
word_counts[word] += 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
word_counts.select { |_, c| c >= min }.each do |word, _|
|
39
|
+
@words << word
|
40
|
+
end
|
41
|
+
|
42
|
+
word_counts
|
43
|
+
end
|
44
|
+
|
45
|
+
def not_duplicates(pairs)
|
46
|
+
pairs.each do |pair|
|
47
|
+
@non_duplicates << pair.map(&:downcase).sort
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def block_words(words)
|
52
|
+
add_nodes(@blocked_words, words)
|
53
|
+
words
|
54
|
+
end
|
55
|
+
|
56
|
+
def prefer(queries)
|
57
|
+
queries.each do |query|
|
58
|
+
@preferred_queries[normalize_query(query)] ||= query
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def suggestions(filter: true)
|
63
|
+
stemmed_queries = {}
|
64
|
+
added_queries = Set.new
|
65
|
+
results = @top_queries.sort_by { |_query, count| -count }.map do |query, count|
|
66
|
+
query = query.to_s
|
67
|
+
|
68
|
+
# TODO do not ignore silently
|
69
|
+
next if query.length < 2
|
70
|
+
|
71
|
+
stemmed_query = normalize_query(query)
|
72
|
+
|
73
|
+
# get preferred term
|
74
|
+
preferred_query = @preferred_queries[stemmed_query]
|
75
|
+
if preferred_query && preferred_query != query
|
76
|
+
original_query, query = query, preferred_query
|
77
|
+
end
|
78
|
+
|
79
|
+
# exclude duplicates
|
80
|
+
duplicate = stemmed_queries[stemmed_query]
|
81
|
+
stemmed_queries[stemmed_query] ||= query
|
82
|
+
|
83
|
+
# also detect possibly misspelled duplicates
|
84
|
+
# TODO use top query as duplicate
|
85
|
+
if !duplicate && query.length > 4
|
86
|
+
edits(query).each do |edited_query|
|
87
|
+
if added_queries.include?(edited_query)
|
88
|
+
duplicate = edited_query
|
89
|
+
break
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
if duplicate && @non_duplicates.include?([duplicate, query].sort)
|
94
|
+
duplicate = nil
|
95
|
+
end
|
96
|
+
added_queries << query unless duplicate
|
97
|
+
|
98
|
+
# find concepts
|
99
|
+
concepts = []
|
100
|
+
@concepts.each do |name, values|
|
101
|
+
concepts << name if values.include?(query)
|
102
|
+
end
|
103
|
+
|
104
|
+
tokens = tokenize(query)
|
105
|
+
|
106
|
+
# exclude misspellings that are not brands
|
107
|
+
misspelling = @words.any? && misspellings?(tokens)
|
108
|
+
|
109
|
+
profane = blocked?(tokens, @profane_words)
|
110
|
+
blocked = blocked?(tokens, @blocked_words)
|
111
|
+
|
112
|
+
notes = []
|
113
|
+
notes << "duplicate of #{duplicate}" if duplicate
|
114
|
+
notes.concat(concepts)
|
115
|
+
notes << "misspelling" if misspelling
|
116
|
+
notes << "profane" if profane
|
117
|
+
notes << "blocked" if blocked
|
118
|
+
notes << "originally #{original_query}" if original_query
|
119
|
+
|
120
|
+
{
|
121
|
+
query: query,
|
122
|
+
original_query: original_query,
|
123
|
+
score: count,
|
124
|
+
duplicate: duplicate,
|
125
|
+
concepts: concepts,
|
126
|
+
misspelling: misspelling,
|
127
|
+
profane: profane,
|
128
|
+
blocked: blocked,
|
129
|
+
notes: notes
|
130
|
+
}
|
131
|
+
end
|
132
|
+
|
133
|
+
results.compact!
|
134
|
+
|
135
|
+
if filter
|
136
|
+
results.filter_map do |s|
|
137
|
+
unless s[:duplicate] || s[:misspelling] || s[:profane] || s[:blocked]
|
138
|
+
s.slice(:query, :score)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
else
|
142
|
+
results
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def table
|
147
|
+
str = "%-30s %5s %s\n" % %w(Query Score Notes)
|
148
|
+
suggestions(filter: false).each do |suggestion|
|
149
|
+
str << "%-30s %5d %s\n" % [suggestion[:query], suggestion[:score], suggestion[:notes].join(", ")]
|
150
|
+
end
|
151
|
+
str
|
152
|
+
end
|
153
|
+
alias_method :pretty_suggestions, :table
|
154
|
+
|
155
|
+
protected
|
156
|
+
|
157
|
+
def misspellings?(tokens)
|
158
|
+
pos = [0]
|
159
|
+
while i = pos.shift
|
160
|
+
return false if i == tokens.size
|
161
|
+
|
162
|
+
if @words.include?(tokens[i])
|
163
|
+
pos << i + 1
|
164
|
+
end
|
165
|
+
|
166
|
+
node = @concept_tree[tokens[i]]
|
167
|
+
j = i
|
168
|
+
while node
|
169
|
+
j += 1
|
170
|
+
pos << j if node[:eos]
|
171
|
+
break if j == tokens.size
|
172
|
+
node = node[tokens[j]]
|
173
|
+
end
|
174
|
+
|
175
|
+
pos.uniq!
|
176
|
+
end
|
177
|
+
true
|
178
|
+
end
|
179
|
+
|
180
|
+
def blocked?(tokens, blocked_words)
|
181
|
+
tokens.each_with_index do |token, i|
|
182
|
+
node = blocked_words[token]
|
183
|
+
j = i
|
184
|
+
while node
|
185
|
+
return true if node[:eos]
|
186
|
+
j += 1
|
187
|
+
break if j == tokens.size
|
188
|
+
node = node[tokens[j]]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
false
|
192
|
+
end
|
193
|
+
|
194
|
+
def tokenize(str)
|
195
|
+
str.to_s.downcase.split(" ")
|
196
|
+
end
|
197
|
+
|
198
|
+
# from https://blog.lojic.com/2008/09/04/how-to-write-a-spelling-corrector-in-ruby/
|
199
|
+
LETTERS = ("a".."z").to_a.join + "'"
|
200
|
+
def edits(word)
|
201
|
+
n = word.length
|
202
|
+
deletion = (0...n).collect { |i| word[0...i] + word[i + 1..-1] }
|
203
|
+
transposition = (0...n - 1).collect { |i| word[0...i] + word[i + 1, 1] + word[i, 1] + word[i + 2..-1] }
|
204
|
+
alteration = []
|
205
|
+
n.times { |i| LETTERS.each_byte { |l| alteration << word[0...i] + l.chr + word[i + 1..-1] } }
|
206
|
+
insertion = []
|
207
|
+
(n + 1).times { |i| LETTERS.each_byte { |l| insertion << word[0...i] + l.chr + word[i..-1] } }
|
208
|
+
deletion + transposition + alteration + insertion
|
209
|
+
end
|
210
|
+
|
211
|
+
def normalize_query(query)
|
212
|
+
tokenize(query.to_s.gsub("&", "and")).map { |q| @stemmer.stem(q) }.sort.join
|
213
|
+
end
|
214
|
+
|
215
|
+
def add_nodes(var, words)
|
216
|
+
words.each do |word|
|
217
|
+
node = var
|
218
|
+
tokenize(word).each do |token|
|
219
|
+
node = (node[token] ||= {})
|
220
|
+
end
|
221
|
+
node[:eos] = true
|
222
|
+
end
|
223
|
+
var
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
data/lib/autosuggest/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
VERSION = "0.
|
1
|
+
module Autosuggest
|
2
|
+
VERSION = "0.2.0"
|
3
3
|
end
|
data/lib/autosuggest.rb
CHANGED
@@ -7,226 +7,11 @@ require "lingua/stemmer"
|
|
7
7
|
require "obscenity"
|
8
8
|
|
9
9
|
# modules
|
10
|
-
|
10
|
+
require_relative "autosuggest/generator"
|
11
|
+
require_relative "autosuggest/version"
|
11
12
|
|
12
|
-
|
13
|
-
def
|
14
|
-
|
15
|
-
@concepts = {}
|
16
|
-
@words = Set.new
|
17
|
-
@non_duplicates = Set.new
|
18
|
-
@blocked_words = {}
|
19
|
-
@blacklisted_words = {}
|
20
|
-
@preferred_queries = {}
|
21
|
-
@profane_words = {}
|
22
|
-
@concept_tree = {}
|
23
|
-
add_nodes(@profane_words, Obscenity::Base.blacklist)
|
24
|
-
end
|
25
|
-
|
26
|
-
def add_concept(name, values)
|
27
|
-
values = values.compact.uniq
|
28
|
-
add_nodes(@concept_tree, values)
|
29
|
-
@concepts[name] = Set.new(values.map(&:downcase))
|
30
|
-
end
|
31
|
-
|
32
|
-
def parse_words(phrases, options = {})
|
33
|
-
min = options[:min] || 1
|
34
|
-
|
35
|
-
word_counts = Hash.new(0)
|
36
|
-
phrases.each do |phrase|
|
37
|
-
words = tokenize(phrase)
|
38
|
-
words.each do |word|
|
39
|
-
word_counts[word] += 1
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
word_counts.select { |_, c| c >= min }.each do |word, _|
|
44
|
-
@words << word
|
45
|
-
end
|
46
|
-
|
47
|
-
word_counts
|
48
|
-
end
|
49
|
-
|
50
|
-
def not_duplicates(pairs)
|
51
|
-
pairs.each do |pair|
|
52
|
-
@non_duplicates << pair.map(&:downcase).sort
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def block_words(words)
|
57
|
-
add_nodes(@blocked_words, words)
|
58
|
-
words
|
59
|
-
end
|
60
|
-
|
61
|
-
def blacklist_words(words)
|
62
|
-
warn "[autosuggest] blacklist_words is deprecated. Use block_words instead."
|
63
|
-
add_nodes(@blacklisted_words, words)
|
64
|
-
words
|
65
|
-
end
|
66
|
-
|
67
|
-
def prefer(queries)
|
68
|
-
queries.each do |query|
|
69
|
-
@preferred_queries[normalize_query(query)] ||= query
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
# TODO add queries method for filter: false and make suggestions use filter: true in 0.2.0
|
74
|
-
def suggestions(filter: false)
|
75
|
-
stemmed_queries = {}
|
76
|
-
added_queries = Set.new
|
77
|
-
results = @top_queries.sort_by { |_query, count| -count }.map do |query, count|
|
78
|
-
query = query.to_s
|
79
|
-
|
80
|
-
# TODO do not ignore silently
|
81
|
-
next if query.length < 2
|
82
|
-
|
83
|
-
stemmed_query = normalize_query(query)
|
84
|
-
|
85
|
-
# get preferred term
|
86
|
-
preferred_query = @preferred_queries[stemmed_query]
|
87
|
-
if preferred_query && preferred_query != query
|
88
|
-
original_query, query = query, preferred_query
|
89
|
-
end
|
90
|
-
|
91
|
-
# exclude duplicates
|
92
|
-
duplicate = stemmed_queries[stemmed_query]
|
93
|
-
stemmed_queries[stemmed_query] ||= query
|
94
|
-
|
95
|
-
# also detect possibly misspelled duplicates
|
96
|
-
# TODO use top query as duplicate
|
97
|
-
if !duplicate && query.length > 4
|
98
|
-
edits(query).each do |edited_query|
|
99
|
-
if added_queries.include?(edited_query)
|
100
|
-
duplicate = edited_query
|
101
|
-
break
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
if duplicate && @non_duplicates.include?([duplicate, query].sort)
|
106
|
-
duplicate = nil
|
107
|
-
end
|
108
|
-
added_queries << query unless duplicate
|
109
|
-
|
110
|
-
# find concepts
|
111
|
-
concepts = []
|
112
|
-
@concepts.each do |name, values|
|
113
|
-
concepts << name if values.include?(query)
|
114
|
-
end
|
115
|
-
|
116
|
-
tokens = tokenize(query)
|
117
|
-
|
118
|
-
# exclude misspellings that are not brands
|
119
|
-
misspelling = @words.any? && misspellings?(tokens)
|
120
|
-
|
121
|
-
profane = blocked?(tokens, @profane_words)
|
122
|
-
blocked = blocked?(tokens, @blocked_words)
|
123
|
-
blacklisted = blocked?(tokens, @blacklisted_words)
|
124
|
-
|
125
|
-
notes = []
|
126
|
-
notes << "duplicate of #{duplicate}" if duplicate
|
127
|
-
notes.concat(concepts)
|
128
|
-
notes << "misspelling" if misspelling
|
129
|
-
notes << "profane" if profane
|
130
|
-
notes << "blocked" if blocked
|
131
|
-
notes << "blacklisted" if blacklisted
|
132
|
-
notes << "originally #{original_query}" if original_query
|
133
|
-
|
134
|
-
result = {
|
135
|
-
query: query,
|
136
|
-
original_query: original_query,
|
137
|
-
score: count,
|
138
|
-
duplicate: duplicate,
|
139
|
-
concepts: concepts,
|
140
|
-
misspelling: misspelling,
|
141
|
-
profane: profane,
|
142
|
-
blocked: blocked
|
143
|
-
}
|
144
|
-
result[:blacklisted] = blacklisted if @blacklisted_words.any?
|
145
|
-
result[:notes] = notes
|
146
|
-
result
|
147
|
-
end
|
148
|
-
if filter
|
149
|
-
results.reject! { |s| s[:duplicate] || s[:misspelling] || s[:profane] || s[:blocked] }
|
150
|
-
end
|
151
|
-
results
|
152
|
-
end
|
153
|
-
|
154
|
-
def pretty_suggestions
|
155
|
-
str = "%-30s %5s %s\n" % %w(Query Score Notes)
|
156
|
-
suggestions.each do |suggestion|
|
157
|
-
str << "%-30s %5d %s\n" % [suggestion[:query], suggestion[:score], suggestion[:notes].join(", ")]
|
158
|
-
end
|
159
|
-
str
|
160
|
-
end
|
161
|
-
|
162
|
-
protected
|
163
|
-
|
164
|
-
def misspellings?(tokens)
|
165
|
-
pos = [0]
|
166
|
-
while i = pos.shift
|
167
|
-
return false if i == tokens.size
|
168
|
-
|
169
|
-
if @words.include?(tokens[i])
|
170
|
-
pos << i + 1
|
171
|
-
end
|
172
|
-
|
173
|
-
node = @concept_tree[tokens[i]]
|
174
|
-
j = i
|
175
|
-
while node
|
176
|
-
j += 1
|
177
|
-
pos << j if node[:eos]
|
178
|
-
break if j == tokens.size
|
179
|
-
node = node[tokens[j]]
|
180
|
-
end
|
181
|
-
|
182
|
-
pos.uniq!
|
183
|
-
end
|
184
|
-
true
|
185
|
-
end
|
186
|
-
|
187
|
-
def blocked?(tokens, blocked_words)
|
188
|
-
tokens.each_with_index do |token, i|
|
189
|
-
node = blocked_words[token]
|
190
|
-
j = i
|
191
|
-
while node
|
192
|
-
return true if node[:eos]
|
193
|
-
j += 1
|
194
|
-
break if j == tokens.size
|
195
|
-
node = node[tokens[j]]
|
196
|
-
end
|
197
|
-
end
|
198
|
-
false
|
199
|
-
end
|
200
|
-
|
201
|
-
def tokenize(str)
|
202
|
-
str.to_s.downcase.split(" ")
|
203
|
-
end
|
204
|
-
|
205
|
-
# from https://blog.lojic.com/2008/09/04/how-to-write-a-spelling-corrector-in-ruby/
|
206
|
-
LETTERS = ("a".."z").to_a.join + "'"
|
207
|
-
def edits(word)
|
208
|
-
n = word.length
|
209
|
-
deletion = (0...n).collect { |i| word[0...i] + word[i + 1..-1] }
|
210
|
-
transposition = (0...n - 1).collect { |i| word[0...i] + word[i + 1, 1] + word[i, 1] + word[i + 2..-1] }
|
211
|
-
alteration = []
|
212
|
-
n.times { |i| LETTERS.each_byte { |l| alteration << word[0...i] + l.chr + word[i + 1..-1] } }
|
213
|
-
insertion = []
|
214
|
-
(n + 1).times { |i| LETTERS.each_byte { |l| insertion << word[0...i] + l.chr + word[i..-1] } }
|
215
|
-
deletion + transposition + alteration + insertion
|
216
|
-
end
|
217
|
-
|
218
|
-
def normalize_query(query)
|
219
|
-
tokenize(query.to_s.gsub("&", "and")).map { |q| Lingua.stemmer(q) }.sort.join
|
220
|
-
end
|
221
|
-
|
222
|
-
def add_nodes(var, words)
|
223
|
-
words.each do |word|
|
224
|
-
node = var
|
225
|
-
tokenize(word).each do |token|
|
226
|
-
node = (node[token] ||= {})
|
227
|
-
end
|
228
|
-
node[:eos] = true
|
229
|
-
end
|
230
|
-
var
|
13
|
+
module Autosuggest
|
14
|
+
def self.new(*args, **options)
|
15
|
+
Generator.new(*args, **options)
|
231
16
|
end
|
232
17
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require "rails/generators/active_record"
|
2
|
+
|
3
|
+
module Autosuggest
|
4
|
+
module Generators
|
5
|
+
class SuggestionsGenerator < Rails::Generators::Base
|
6
|
+
include ActiveRecord::Generators::Migration
|
7
|
+
source_root File.join(__dir__, "templates")
|
8
|
+
|
9
|
+
def copy_templates
|
10
|
+
template "model.rb", "app/models/autosuggest/suggestion.rb"
|
11
|
+
migration_template "migration.rb", "db/migrate/create_autosuggest_suggestions.rb", migration_version: migration_version
|
12
|
+
end
|
13
|
+
|
14
|
+
def migration_version
|
15
|
+
"[#{ActiveRecord::VERSION::MAJOR}.#{ActiveRecord::VERSION::MINOR}]"
|
16
|
+
end
|
17
|
+
|
18
|
+
def mysql?
|
19
|
+
adapter =~ /mysql/i
|
20
|
+
end
|
21
|
+
|
22
|
+
# use connection_config instead of connection.adapter
|
23
|
+
# so database connection isn't needed
|
24
|
+
def adapter
|
25
|
+
if ActiveRecord::VERSION::STRING.to_f >= 6.1
|
26
|
+
ActiveRecord::Base.connection_db_config.adapter.to_s
|
27
|
+
else
|
28
|
+
ActiveRecord::Base.connection_config[:adapter].to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
|
2
|
+
def change
|
3
|
+
create_table :autosuggest_suggestions do |t|
|
4
|
+
t.string :query
|
5
|
+
t.float :score
|
6
|
+
t.datetime :updated_at<%= mysql? ? ", precision: 6" : "" %>
|
7
|
+
end
|
8
|
+
|
9
|
+
add_index :autosuggest_suggestions, :query, unique: true
|
10
|
+
end
|
11
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: autosuggest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-stemmer
|
@@ -48,7 +48,11 @@ files:
|
|
48
48
|
- LICENSE.txt
|
49
49
|
- README.md
|
50
50
|
- lib/autosuggest.rb
|
51
|
+
- lib/autosuggest/generator.rb
|
51
52
|
- lib/autosuggest/version.rb
|
53
|
+
- lib/generators/autosuggest/suggestions_generator.rb
|
54
|
+
- lib/generators/autosuggest/templates/migration.rb.tt
|
55
|
+
- lib/generators/autosuggest/templates/model.rb.tt
|
52
56
|
homepage: https://github.com/ankane/autosuggest
|
53
57
|
licenses:
|
54
58
|
- MIT
|
@@ -61,14 +65,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
61
65
|
requirements:
|
62
66
|
- - ">="
|
63
67
|
- !ruby/object:Gem::Version
|
64
|
-
version: '2.
|
68
|
+
version: '2.7'
|
65
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
70
|
requirements:
|
67
71
|
- - ">="
|
68
72
|
- !ruby/object:Gem::Version
|
69
73
|
version: '0'
|
70
74
|
requirements: []
|
71
|
-
rubygems_version: 3.
|
75
|
+
rubygems_version: 3.4.1
|
72
76
|
signing_key:
|
73
77
|
specification_version: 4
|
74
78
|
summary: Generate autocomplete suggestions based on what your users search
|