autosuggest 0.1.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/LICENSE.txt +1 -1
- data/README.md +26 -10
- data/lib/autosuggest/generator.rb +226 -0
- data/lib/autosuggest/version.rb +2 -2
- data/lib/autosuggest.rb +6 -221
- data/lib/generators/autosuggest/suggestions_generator.rb +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0cd2e072b09ebaecd6858e27f87b198e0909c8d70c0f36250c6eefc022600e59
|
4
|
+
data.tar.gz: '02434900d6a69b6c18ad2d5353c7359681f442fbe19a28f1aacfdac3c60dfe42'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22ed0ecf00dae3f077cbd2be2cd024e61f6ceb6cdefca57af9a90d1769499e51c1df301ee37bfbd2344ef69afd6b9da3582aa21714b1b3d5061749eb0d9b190d
|
7
|
+
data.tar.gz: d8b757d31b2c2b9429afc29758f6d70631e9402593dbc127728c47f5553edb2ec04b9007dc79257820a500f0105f93d72254decd1cde3b2123990752f255600e
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
## 0.3.0 (2024-05-22)
|
2
|
+
|
3
|
+
- Switched to Mittens for stemming
|
4
|
+
- Dropped support for Ruby < 3.1
|
5
|
+
|
6
|
+
## 0.2.0 (2023-01-29)
|
7
|
+
|
8
|
+
- Added `language` option
|
9
|
+
- Changed `suggestions` method to filter by default
|
10
|
+
- Changed `filter: true` to only return query and score
|
11
|
+
- Removed `blacklist_words` method
|
12
|
+
- Dropped support for Ruby < 2.7
|
13
|
+
|
1
14
|
## 0.1.3 (2021-11-23)
|
2
15
|
|
3
16
|
- Added model generator
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -4,14 +4,14 @@ Generate autocomplete suggestions based on what your users search
|
|
4
4
|
|
5
5
|
:tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
|
6
6
|
|
7
|
-
[](https://github.com/ankane/autosuggest/actions)
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
11
|
Add this line to your application’s Gemfile:
|
12
12
|
|
13
13
|
```ruby
|
14
|
-
gem
|
14
|
+
gem "autosuggest"
|
15
15
|
```
|
16
16
|
|
17
17
|
## Getting Started
|
@@ -38,14 +38,20 @@ top_queries = Searchjoy::Search.group(:normalized_query)
|
|
38
38
|
Then pass them to Autosuggest.
|
39
39
|
|
40
40
|
```ruby
|
41
|
-
autosuggest = Autosuggest.new(top_queries)
|
41
|
+
autosuggest = Autosuggest::Generator.new(top_queries)
|
42
42
|
```
|
43
43
|
|
44
44
|
#### Filter duplicates
|
45
45
|
|
46
46
|
[Stemming](https://en.wikipedia.org/wiki/Stemming) is used to detect duplicates like `apple` and `apples`.
|
47
47
|
|
48
|
-
|
48
|
+
Specify the stemming language (defaults to `english`) with:
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
autosuggest = Autosuggest::Generator.new(top_queries, language: "spanish")
|
52
|
+
```
|
53
|
+
|
54
|
+
The most popular query is preferred by default. To override this, use:
|
49
55
|
|
50
56
|
```ruby
|
51
57
|
autosuggest.prefer ["apples"]
|
@@ -90,7 +96,7 @@ autosuggest.block_words ["boom"]
|
|
90
96
|
Generate suggestions with:
|
91
97
|
|
92
98
|
```ruby
|
93
|
-
suggestions = autosuggest.suggestions
|
99
|
+
suggestions = autosuggest.suggestions
|
94
100
|
```
|
95
101
|
|
96
102
|
#### Save suggestions
|
@@ -152,18 +158,18 @@ end
|
|
152
158
|
You may want to have someone manually approve suggestions:
|
153
159
|
|
154
160
|
```ruby
|
155
|
-
Autosuggest::Suggestion.where(
|
161
|
+
Autosuggest::Suggestion.where(status: "approved")
|
156
162
|
```
|
157
163
|
|
158
164
|
Or filter suggestions without results:
|
159
165
|
|
160
166
|
```ruby
|
161
167
|
Autosuggest::Suggestion.find_each do |suggestion|
|
162
|
-
suggestion.
|
168
|
+
suggestion.results_count = Product.search(suggestion.query, load: false).count
|
163
169
|
suggestion.save! if suggestion.changed?
|
164
170
|
end
|
165
171
|
|
166
|
-
Autosuggest::Suggestion.where(
|
172
|
+
Autosuggest::Suggestion.where("results_count > 0")
|
167
173
|
```
|
168
174
|
|
169
175
|
You can add additional fields to your model/data store to accomplish this.
|
@@ -176,14 +182,14 @@ top_queries = Searchjoy::Search.group(:normalized_query)
|
|
176
182
|
product_names = Product.pluck(:name)
|
177
183
|
brand_names = Brand.pluck(:name)
|
178
184
|
|
179
|
-
autosuggest = Autosuggest.new(top_queries)
|
185
|
+
autosuggest = Autosuggest::Generator.new(top_queries)
|
180
186
|
autosuggest.parse_words product_names
|
181
187
|
autosuggest.add_concept "brand", brand_names
|
182
188
|
autosuggest.prefer brand_names
|
183
189
|
autosuggest.not_duplicates [["straws", "straus"]]
|
184
190
|
autosuggest.block_words ["boom"]
|
185
191
|
|
186
|
-
suggestions = autosuggest.suggestions
|
192
|
+
suggestions = autosuggest.suggestions
|
187
193
|
|
188
194
|
now = Time.now
|
189
195
|
records = suggestions.map { |s| s.slice(:query, :score).merge(updated_at: now) }
|
@@ -193,6 +199,16 @@ Autosuggest::Suggestion.transaction do
|
|
193
199
|
end
|
194
200
|
```
|
195
201
|
|
202
|
+
## Upgrading
|
203
|
+
|
204
|
+
### 0.2.0
|
205
|
+
|
206
|
+
Suggestions are now filtered by default, and only the query and score are returned. Get all queries and fields with:
|
207
|
+
|
208
|
+
```ruby
|
209
|
+
autosuggest.suggestions(filter: false)
|
210
|
+
```
|
211
|
+
|
196
212
|
## History
|
197
213
|
|
198
214
|
View the [changelog](https://github.com/ankane/autosuggest/blob/master/CHANGELOG.md)
|
@@ -0,0 +1,226 @@
|
|
1
|
+
module Autosuggest
|
2
|
+
class Generator
|
3
|
+
def initialize(top_queries, language: "english")
|
4
|
+
@top_queries = top_queries
|
5
|
+
@concepts = {}
|
6
|
+
@words = Set.new
|
7
|
+
@non_duplicates = Set.new
|
8
|
+
@blocked_words = {}
|
9
|
+
@preferred_queries = {}
|
10
|
+
@profane_words = {}
|
11
|
+
@concept_tree = {}
|
12
|
+
begin
|
13
|
+
@stemmer = Mittens::Stemmer.new(language: language)
|
14
|
+
rescue ArgumentError
|
15
|
+
raise ArgumentError, "Language not available"
|
16
|
+
end
|
17
|
+
# TODO take language into account for profanity
|
18
|
+
add_nodes(@profane_words, Obscenity::Base.blacklist)
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_concept(name, values)
|
22
|
+
values = values.compact.uniq
|
23
|
+
add_nodes(@concept_tree, values)
|
24
|
+
@concepts[name] = Set.new(values.map(&:downcase))
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_words(phrases, options = {})
|
28
|
+
min = options[:min] || 1
|
29
|
+
|
30
|
+
word_counts = Hash.new(0)
|
31
|
+
phrases.each do |phrase|
|
32
|
+
words = tokenize(phrase)
|
33
|
+
words.each do |word|
|
34
|
+
word_counts[word] += 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
word_counts.select { |_, c| c >= min }.each do |word, _|
|
39
|
+
@words << word
|
40
|
+
end
|
41
|
+
|
42
|
+
word_counts
|
43
|
+
end
|
44
|
+
|
45
|
+
def not_duplicates(pairs)
|
46
|
+
pairs.each do |pair|
|
47
|
+
@non_duplicates << pair.map(&:downcase).sort
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def block_words(words)
|
52
|
+
add_nodes(@blocked_words, words)
|
53
|
+
words
|
54
|
+
end
|
55
|
+
|
56
|
+
def prefer(queries)
|
57
|
+
queries.each do |query|
|
58
|
+
@preferred_queries[normalize_query(query)] ||= query
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def suggestions(filter: true)
|
63
|
+
stemmed_queries = {}
|
64
|
+
added_queries = Set.new
|
65
|
+
results = @top_queries.sort_by { |_query, count| -count }.map do |query, count|
|
66
|
+
query = query.to_s
|
67
|
+
|
68
|
+
# TODO do not ignore silently
|
69
|
+
next if query.length < 2
|
70
|
+
|
71
|
+
stemmed_query = normalize_query(query)
|
72
|
+
|
73
|
+
# get preferred term
|
74
|
+
preferred_query = @preferred_queries[stemmed_query]
|
75
|
+
if preferred_query && preferred_query != query
|
76
|
+
original_query, query = query, preferred_query
|
77
|
+
end
|
78
|
+
|
79
|
+
# exclude duplicates
|
80
|
+
duplicate = stemmed_queries[stemmed_query]
|
81
|
+
stemmed_queries[stemmed_query] ||= query
|
82
|
+
|
83
|
+
# also detect possibly misspelled duplicates
|
84
|
+
# TODO use top query as duplicate
|
85
|
+
if !duplicate && query.length > 4
|
86
|
+
edits(query).each do |edited_query|
|
87
|
+
if added_queries.include?(edited_query)
|
88
|
+
duplicate = edited_query
|
89
|
+
break
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
if duplicate && @non_duplicates.include?([duplicate, query].sort)
|
94
|
+
duplicate = nil
|
95
|
+
end
|
96
|
+
added_queries << query unless duplicate
|
97
|
+
|
98
|
+
# find concepts
|
99
|
+
concepts = []
|
100
|
+
@concepts.each do |name, values|
|
101
|
+
concepts << name if values.include?(query)
|
102
|
+
end
|
103
|
+
|
104
|
+
tokens = tokenize(query)
|
105
|
+
|
106
|
+
# exclude misspellings that are not brands
|
107
|
+
misspelling = @words.any? && misspellings?(tokens)
|
108
|
+
|
109
|
+
profane = blocked?(tokens, @profane_words)
|
110
|
+
blocked = blocked?(tokens, @blocked_words)
|
111
|
+
|
112
|
+
notes = []
|
113
|
+
notes << "duplicate of #{duplicate}" if duplicate
|
114
|
+
notes.concat(concepts)
|
115
|
+
notes << "misspelling" if misspelling
|
116
|
+
notes << "profane" if profane
|
117
|
+
notes << "blocked" if blocked
|
118
|
+
notes << "originally #{original_query}" if original_query
|
119
|
+
|
120
|
+
{
|
121
|
+
query: query,
|
122
|
+
original_query: original_query,
|
123
|
+
score: count,
|
124
|
+
duplicate: duplicate,
|
125
|
+
concepts: concepts,
|
126
|
+
misspelling: misspelling,
|
127
|
+
profane: profane,
|
128
|
+
blocked: blocked,
|
129
|
+
notes: notes
|
130
|
+
}
|
131
|
+
end
|
132
|
+
|
133
|
+
results.compact!
|
134
|
+
|
135
|
+
if filter
|
136
|
+
results.filter_map do |s|
|
137
|
+
unless s[:duplicate] || s[:misspelling] || s[:profane] || s[:blocked]
|
138
|
+
s.slice(:query, :score)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
else
|
142
|
+
results
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def table
|
147
|
+
str = "%-30s %5s %s\n" % %w(Query Score Notes)
|
148
|
+
suggestions(filter: false).each do |suggestion|
|
149
|
+
str << "%-30s %5d %s\n" % [suggestion[:query], suggestion[:score], suggestion[:notes].join(", ")]
|
150
|
+
end
|
151
|
+
str
|
152
|
+
end
|
153
|
+
alias_method :pretty_suggestions, :table
|
154
|
+
|
155
|
+
protected
|
156
|
+
|
157
|
+
def misspellings?(tokens)
|
158
|
+
pos = [0]
|
159
|
+
while i = pos.shift
|
160
|
+
return false if i == tokens.size
|
161
|
+
|
162
|
+
if @words.include?(tokens[i])
|
163
|
+
pos << i + 1
|
164
|
+
end
|
165
|
+
|
166
|
+
node = @concept_tree[tokens[i]]
|
167
|
+
j = i
|
168
|
+
while node
|
169
|
+
j += 1
|
170
|
+
pos << j if node[:eos]
|
171
|
+
break if j == tokens.size
|
172
|
+
node = node[tokens[j]]
|
173
|
+
end
|
174
|
+
|
175
|
+
pos.uniq!
|
176
|
+
end
|
177
|
+
true
|
178
|
+
end
|
179
|
+
|
180
|
+
def blocked?(tokens, blocked_words)
|
181
|
+
tokens.each_with_index do |token, i|
|
182
|
+
node = blocked_words[token]
|
183
|
+
j = i
|
184
|
+
while node
|
185
|
+
return true if node[:eos]
|
186
|
+
j += 1
|
187
|
+
break if j == tokens.size
|
188
|
+
node = node[tokens[j]]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
false
|
192
|
+
end
|
193
|
+
|
194
|
+
def tokenize(str)
|
195
|
+
str.to_s.downcase.split(" ")
|
196
|
+
end
|
197
|
+
|
198
|
+
# from https://blog.lojic.com/2008/09/04/how-to-write-a-spelling-corrector-in-ruby/
|
199
|
+
LETTERS = ("a".."z").to_a.join + "'"
|
200
|
+
def edits(word)
|
201
|
+
n = word.length
|
202
|
+
deletion = (0...n).collect { |i| word[0...i] + word[i + 1..-1] }
|
203
|
+
transposition = (0...n - 1).collect { |i| word[0...i] + word[i + 1, 1] + word[i, 1] + word[i + 2..-1] }
|
204
|
+
alteration = []
|
205
|
+
n.times { |i| LETTERS.each_byte { |l| alteration << word[0...i] + l.chr + word[i + 1..-1] } }
|
206
|
+
insertion = []
|
207
|
+
(n + 1).times { |i| LETTERS.each_byte { |l| insertion << word[0...i] + l.chr + word[i..-1] } }
|
208
|
+
deletion + transposition + alteration + insertion
|
209
|
+
end
|
210
|
+
|
211
|
+
def normalize_query(query)
|
212
|
+
tokenize(query.to_s.gsub("&", "and")).map { |q| @stemmer.stem(q) }.sort.join
|
213
|
+
end
|
214
|
+
|
215
|
+
def add_nodes(var, words)
|
216
|
+
words.each do |word|
|
217
|
+
node = var
|
218
|
+
tokenize(word).each do |token|
|
219
|
+
node = (node[token] ||= {})
|
220
|
+
end
|
221
|
+
node[:eos] = true
|
222
|
+
end
|
223
|
+
var
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
data/lib/autosuggest/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
VERSION = "0.
|
1
|
+
module Autosuggest
|
2
|
+
VERSION = "0.3.0"
|
3
3
|
end
|
data/lib/autosuggest.rb
CHANGED
@@ -3,230 +3,15 @@ require "set"
|
|
3
3
|
require "yaml" # for obscenity
|
4
4
|
|
5
5
|
# dependencies
|
6
|
-
require "
|
6
|
+
require "mittens"
|
7
7
|
require "obscenity"
|
8
8
|
|
9
9
|
# modules
|
10
|
-
|
10
|
+
require_relative "autosuggest/generator"
|
11
|
+
require_relative "autosuggest/version"
|
11
12
|
|
12
|
-
|
13
|
-
def
|
14
|
-
|
15
|
-
@concepts = {}
|
16
|
-
@words = Set.new
|
17
|
-
@non_duplicates = Set.new
|
18
|
-
@blocked_words = {}
|
19
|
-
@blacklisted_words = {}
|
20
|
-
@preferred_queries = {}
|
21
|
-
@profane_words = {}
|
22
|
-
@concept_tree = {}
|
23
|
-
add_nodes(@profane_words, Obscenity::Base.blacklist)
|
24
|
-
end
|
25
|
-
|
26
|
-
def add_concept(name, values)
|
27
|
-
values = values.compact.uniq
|
28
|
-
add_nodes(@concept_tree, values)
|
29
|
-
@concepts[name] = Set.new(values.map(&:downcase))
|
30
|
-
end
|
31
|
-
|
32
|
-
def parse_words(phrases, options = {})
|
33
|
-
min = options[:min] || 1
|
34
|
-
|
35
|
-
word_counts = Hash.new(0)
|
36
|
-
phrases.each do |phrase|
|
37
|
-
words = tokenize(phrase)
|
38
|
-
words.each do |word|
|
39
|
-
word_counts[word] += 1
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
word_counts.select { |_, c| c >= min }.each do |word, _|
|
44
|
-
@words << word
|
45
|
-
end
|
46
|
-
|
47
|
-
word_counts
|
48
|
-
end
|
49
|
-
|
50
|
-
def not_duplicates(pairs)
|
51
|
-
pairs.each do |pair|
|
52
|
-
@non_duplicates << pair.map(&:downcase).sort
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def block_words(words)
|
57
|
-
add_nodes(@blocked_words, words)
|
58
|
-
words
|
59
|
-
end
|
60
|
-
|
61
|
-
def blacklist_words(words)
|
62
|
-
warn "[autosuggest] blacklist_words is deprecated. Use block_words instead."
|
63
|
-
add_nodes(@blacklisted_words, words)
|
64
|
-
words
|
65
|
-
end
|
66
|
-
|
67
|
-
def prefer(queries)
|
68
|
-
queries.each do |query|
|
69
|
-
@preferred_queries[normalize_query(query)] ||= query
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
# TODO add queries method for filter: false and make suggestions use filter: true in 0.2.0
|
74
|
-
def suggestions(filter: false)
|
75
|
-
stemmed_queries = {}
|
76
|
-
added_queries = Set.new
|
77
|
-
results = @top_queries.sort_by { |_query, count| -count }.map do |query, count|
|
78
|
-
query = query.to_s
|
79
|
-
|
80
|
-
# TODO do not ignore silently
|
81
|
-
next if query.length < 2
|
82
|
-
|
83
|
-
stemmed_query = normalize_query(query)
|
84
|
-
|
85
|
-
# get preferred term
|
86
|
-
preferred_query = @preferred_queries[stemmed_query]
|
87
|
-
if preferred_query && preferred_query != query
|
88
|
-
original_query, query = query, preferred_query
|
89
|
-
end
|
90
|
-
|
91
|
-
# exclude duplicates
|
92
|
-
duplicate = stemmed_queries[stemmed_query]
|
93
|
-
stemmed_queries[stemmed_query] ||= query
|
94
|
-
|
95
|
-
# also detect possibly misspelled duplicates
|
96
|
-
# TODO use top query as duplicate
|
97
|
-
if !duplicate && query.length > 4
|
98
|
-
edits(query).each do |edited_query|
|
99
|
-
if added_queries.include?(edited_query)
|
100
|
-
duplicate = edited_query
|
101
|
-
break
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
if duplicate && @non_duplicates.include?([duplicate, query].sort)
|
106
|
-
duplicate = nil
|
107
|
-
end
|
108
|
-
added_queries << query unless duplicate
|
109
|
-
|
110
|
-
# find concepts
|
111
|
-
concepts = []
|
112
|
-
@concepts.each do |name, values|
|
113
|
-
concepts << name if values.include?(query)
|
114
|
-
end
|
115
|
-
|
116
|
-
tokens = tokenize(query)
|
117
|
-
|
118
|
-
# exclude misspellings that are not brands
|
119
|
-
misspelling = @words.any? && misspellings?(tokens)
|
120
|
-
|
121
|
-
profane = blocked?(tokens, @profane_words)
|
122
|
-
blocked = blocked?(tokens, @blocked_words)
|
123
|
-
blacklisted = blocked?(tokens, @blacklisted_words)
|
124
|
-
|
125
|
-
notes = []
|
126
|
-
notes << "duplicate of #{duplicate}" if duplicate
|
127
|
-
notes.concat(concepts)
|
128
|
-
notes << "misspelling" if misspelling
|
129
|
-
notes << "profane" if profane
|
130
|
-
notes << "blocked" if blocked
|
131
|
-
notes << "blacklisted" if blacklisted
|
132
|
-
notes << "originally #{original_query}" if original_query
|
133
|
-
|
134
|
-
result = {
|
135
|
-
query: query,
|
136
|
-
original_query: original_query,
|
137
|
-
score: count,
|
138
|
-
duplicate: duplicate,
|
139
|
-
concepts: concepts,
|
140
|
-
misspelling: misspelling,
|
141
|
-
profane: profane,
|
142
|
-
blocked: blocked
|
143
|
-
}
|
144
|
-
result[:blacklisted] = blacklisted if @blacklisted_words.any?
|
145
|
-
result[:notes] = notes
|
146
|
-
result
|
147
|
-
end
|
148
|
-
if filter
|
149
|
-
results.reject! { |s| s[:duplicate] || s[:misspelling] || s[:profane] || s[:blocked] }
|
150
|
-
end
|
151
|
-
results
|
152
|
-
end
|
153
|
-
|
154
|
-
def pretty_suggestions
|
155
|
-
str = "%-30s %5s %s\n" % %w(Query Score Notes)
|
156
|
-
suggestions.each do |suggestion|
|
157
|
-
str << "%-30s %5d %s\n" % [suggestion[:query], suggestion[:score], suggestion[:notes].join(", ")]
|
158
|
-
end
|
159
|
-
str
|
160
|
-
end
|
161
|
-
|
162
|
-
protected
|
163
|
-
|
164
|
-
def misspellings?(tokens)
|
165
|
-
pos = [0]
|
166
|
-
while i = pos.shift
|
167
|
-
return false if i == tokens.size
|
168
|
-
|
169
|
-
if @words.include?(tokens[i])
|
170
|
-
pos << i + 1
|
171
|
-
end
|
172
|
-
|
173
|
-
node = @concept_tree[tokens[i]]
|
174
|
-
j = i
|
175
|
-
while node
|
176
|
-
j += 1
|
177
|
-
pos << j if node[:eos]
|
178
|
-
break if j == tokens.size
|
179
|
-
node = node[tokens[j]]
|
180
|
-
end
|
181
|
-
|
182
|
-
pos.uniq!
|
183
|
-
end
|
184
|
-
true
|
185
|
-
end
|
186
|
-
|
187
|
-
def blocked?(tokens, blocked_words)
|
188
|
-
tokens.each_with_index do |token, i|
|
189
|
-
node = blocked_words[token]
|
190
|
-
j = i
|
191
|
-
while node
|
192
|
-
return true if node[:eos]
|
193
|
-
j += 1
|
194
|
-
break if j == tokens.size
|
195
|
-
node = node[tokens[j]]
|
196
|
-
end
|
197
|
-
end
|
198
|
-
false
|
199
|
-
end
|
200
|
-
|
201
|
-
def tokenize(str)
|
202
|
-
str.to_s.downcase.split(" ")
|
203
|
-
end
|
204
|
-
|
205
|
-
# from https://blog.lojic.com/2008/09/04/how-to-write-a-spelling-corrector-in-ruby/
|
206
|
-
LETTERS = ("a".."z").to_a.join + "'"
|
207
|
-
def edits(word)
|
208
|
-
n = word.length
|
209
|
-
deletion = (0...n).collect { |i| word[0...i] + word[i + 1..-1] }
|
210
|
-
transposition = (0...n - 1).collect { |i| word[0...i] + word[i + 1, 1] + word[i, 1] + word[i + 2..-1] }
|
211
|
-
alteration = []
|
212
|
-
n.times { |i| LETTERS.each_byte { |l| alteration << word[0...i] + l.chr + word[i + 1..-1] } }
|
213
|
-
insertion = []
|
214
|
-
(n + 1).times { |i| LETTERS.each_byte { |l| insertion << word[0...i] + l.chr + word[i..-1] } }
|
215
|
-
deletion + transposition + alteration + insertion
|
216
|
-
end
|
217
|
-
|
218
|
-
def normalize_query(query)
|
219
|
-
tokenize(query.to_s.gsub("&", "and")).map { |q| Lingua.stemmer(q) }.sort.join
|
220
|
-
end
|
221
|
-
|
222
|
-
def add_nodes(var, words)
|
223
|
-
words.each do |word|
|
224
|
-
node = var
|
225
|
-
tokenize(word).each do |token|
|
226
|
-
node = (node[token] ||= {})
|
227
|
-
end
|
228
|
-
node[:eos] = true
|
229
|
-
end
|
230
|
-
var
|
13
|
+
module Autosuggest
|
14
|
+
def self.new(*args, **options)
|
15
|
+
Generator.new(*args, **options)
|
231
16
|
end
|
232
17
|
end
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: autosuggest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: mittens
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
@@ -48,6 +48,7 @@ files:
|
|
48
48
|
- LICENSE.txt
|
49
49
|
- README.md
|
50
50
|
- lib/autosuggest.rb
|
51
|
+
- lib/autosuggest/generator.rb
|
51
52
|
- lib/autosuggest/version.rb
|
52
53
|
- lib/generators/autosuggest/suggestions_generator.rb
|
53
54
|
- lib/generators/autosuggest/templates/migration.rb.tt
|
@@ -64,14 +65,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
64
65
|
requirements:
|
65
66
|
- - ">="
|
66
67
|
- !ruby/object:Gem::Version
|
67
|
-
version: '
|
68
|
+
version: '3.1'
|
68
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
70
|
requirements:
|
70
71
|
- - ">="
|
71
72
|
- !ruby/object:Gem::Version
|
72
73
|
version: '0'
|
73
74
|
requirements: []
|
74
|
-
rubygems_version: 3.
|
75
|
+
rubygems_version: 3.5.9
|
75
76
|
signing_key:
|
76
77
|
specification_version: 4
|
77
78
|
summary: Generate autocomplete suggestions based on what your users search
|