autosuggest 0.1.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/LICENSE.txt +1 -1
- data/README.md +26 -10
- data/lib/autosuggest/generator.rb +226 -0
- data/lib/autosuggest/version.rb +2 -2
- data/lib/autosuggest.rb +6 -221
- data/lib/generators/autosuggest/suggestions_generator.rb +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0cd2e072b09ebaecd6858e27f87b198e0909c8d70c0f36250c6eefc022600e59
|
4
|
+
data.tar.gz: '02434900d6a69b6c18ad2d5353c7359681f442fbe19a28f1aacfdac3c60dfe42'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22ed0ecf00dae3f077cbd2be2cd024e61f6ceb6cdefca57af9a90d1769499e51c1df301ee37bfbd2344ef69afd6b9da3582aa21714b1b3d5061749eb0d9b190d
|
7
|
+
data.tar.gz: d8b757d31b2c2b9429afc29758f6d70631e9402593dbc127728c47f5553edb2ec04b9007dc79257820a500f0105f93d72254decd1cde3b2123990752f255600e
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
## 0.3.0 (2024-05-22)
|
2
|
+
|
3
|
+
- Switched to Mittens for stemming
|
4
|
+
- Dropped support for Ruby < 3.1
|
5
|
+
|
6
|
+
## 0.2.0 (2023-01-29)
|
7
|
+
|
8
|
+
- Added `language` option
|
9
|
+
- Changed `suggestions` method to filter by default
|
10
|
+
- Changed `filter: true` to only return query and score
|
11
|
+
- Removed `blacklist_words` method
|
12
|
+
- Dropped support for Ruby < 2.7
|
13
|
+
|
1
14
|
## 0.1.3 (2021-11-23)
|
2
15
|
|
3
16
|
- Added model generator
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -4,14 +4,14 @@ Generate autocomplete suggestions based on what your users search
|
|
4
4
|
|
5
5
|
:tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
|
6
6
|
|
7
|
-
[![Build Status](https://github.com/ankane/autosuggest/workflows/build/badge.svg
|
7
|
+
[![Build Status](https://github.com/ankane/autosuggest/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/autosuggest/actions)
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
11
|
Add this line to your application’s Gemfile:
|
12
12
|
|
13
13
|
```ruby
|
14
|
-
gem
|
14
|
+
gem "autosuggest"
|
15
15
|
```
|
16
16
|
|
17
17
|
## Getting Started
|
@@ -38,14 +38,20 @@ top_queries = Searchjoy::Search.group(:normalized_query)
|
|
38
38
|
Then pass them to Autosuggest.
|
39
39
|
|
40
40
|
```ruby
|
41
|
-
autosuggest = Autosuggest.new(top_queries)
|
41
|
+
autosuggest = Autosuggest::Generator.new(top_queries)
|
42
42
|
```
|
43
43
|
|
44
44
|
#### Filter duplicates
|
45
45
|
|
46
46
|
[Stemming](https://en.wikipedia.org/wiki/Stemming) is used to detect duplicates like `apple` and `apples`.
|
47
47
|
|
48
|
-
|
48
|
+
Specify the stemming language (defaults to `english`) with:
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
autosuggest = Autosuggest::Generator.new(top_queries, language: "spanish")
|
52
|
+
```
|
53
|
+
|
54
|
+
The most popular query is preferred by default. To override this, use:
|
49
55
|
|
50
56
|
```ruby
|
51
57
|
autosuggest.prefer ["apples"]
|
@@ -90,7 +96,7 @@ autosuggest.block_words ["boom"]
|
|
90
96
|
Generate suggestions with:
|
91
97
|
|
92
98
|
```ruby
|
93
|
-
suggestions = autosuggest.suggestions
|
99
|
+
suggestions = autosuggest.suggestions
|
94
100
|
```
|
95
101
|
|
96
102
|
#### Save suggestions
|
@@ -152,18 +158,18 @@ end
|
|
152
158
|
You may want to have someone manually approve suggestions:
|
153
159
|
|
154
160
|
```ruby
|
155
|
-
Autosuggest::Suggestion.where(
|
161
|
+
Autosuggest::Suggestion.where(status: "approved")
|
156
162
|
```
|
157
163
|
|
158
164
|
Or filter suggestions without results:
|
159
165
|
|
160
166
|
```ruby
|
161
167
|
Autosuggest::Suggestion.find_each do |suggestion|
|
162
|
-
suggestion.
|
168
|
+
suggestion.results_count = Product.search(suggestion.query, load: false).count
|
163
169
|
suggestion.save! if suggestion.changed?
|
164
170
|
end
|
165
171
|
|
166
|
-
Autosuggest::Suggestion.where(
|
172
|
+
Autosuggest::Suggestion.where("results_count > 0")
|
167
173
|
```
|
168
174
|
|
169
175
|
You can add additional fields to your model/data store to accomplish this.
|
@@ -176,14 +182,14 @@ top_queries = Searchjoy::Search.group(:normalized_query)
|
|
176
182
|
product_names = Product.pluck(:name)
|
177
183
|
brand_names = Brand.pluck(:name)
|
178
184
|
|
179
|
-
autosuggest = Autosuggest.new(top_queries)
|
185
|
+
autosuggest = Autosuggest::Generator.new(top_queries)
|
180
186
|
autosuggest.parse_words product_names
|
181
187
|
autosuggest.add_concept "brand", brand_names
|
182
188
|
autosuggest.prefer brand_names
|
183
189
|
autosuggest.not_duplicates [["straws", "straus"]]
|
184
190
|
autosuggest.block_words ["boom"]
|
185
191
|
|
186
|
-
suggestions = autosuggest.suggestions
|
192
|
+
suggestions = autosuggest.suggestions
|
187
193
|
|
188
194
|
now = Time.now
|
189
195
|
records = suggestions.map { |s| s.slice(:query, :score).merge(updated_at: now) }
|
@@ -193,6 +199,16 @@ Autosuggest::Suggestion.transaction do
|
|
193
199
|
end
|
194
200
|
```
|
195
201
|
|
202
|
+
## Upgrading
|
203
|
+
|
204
|
+
### 0.2.0
|
205
|
+
|
206
|
+
Suggestions are now filtered by default, and only the query and score are returned. Get all queries and fields with:
|
207
|
+
|
208
|
+
```ruby
|
209
|
+
autosuggest.suggestions(filter: false)
|
210
|
+
```
|
211
|
+
|
196
212
|
## History
|
197
213
|
|
198
214
|
View the [changelog](https://github.com/ankane/autosuggest/blob/master/CHANGELOG.md)
|
@@ -0,0 +1,226 @@
|
|
1
|
+
module Autosuggest
|
2
|
+
class Generator
|
3
|
+
def initialize(top_queries, language: "english")
|
4
|
+
@top_queries = top_queries
|
5
|
+
@concepts = {}
|
6
|
+
@words = Set.new
|
7
|
+
@non_duplicates = Set.new
|
8
|
+
@blocked_words = {}
|
9
|
+
@preferred_queries = {}
|
10
|
+
@profane_words = {}
|
11
|
+
@concept_tree = {}
|
12
|
+
begin
|
13
|
+
@stemmer = Mittens::Stemmer.new(language: language)
|
14
|
+
rescue ArgumentError
|
15
|
+
raise ArgumentError, "Language not available"
|
16
|
+
end
|
17
|
+
# TODO take language into account for profanity
|
18
|
+
add_nodes(@profane_words, Obscenity::Base.blacklist)
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_concept(name, values)
|
22
|
+
values = values.compact.uniq
|
23
|
+
add_nodes(@concept_tree, values)
|
24
|
+
@concepts[name] = Set.new(values.map(&:downcase))
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_words(phrases, options = {})
|
28
|
+
min = options[:min] || 1
|
29
|
+
|
30
|
+
word_counts = Hash.new(0)
|
31
|
+
phrases.each do |phrase|
|
32
|
+
words = tokenize(phrase)
|
33
|
+
words.each do |word|
|
34
|
+
word_counts[word] += 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
word_counts.select { |_, c| c >= min }.each do |word, _|
|
39
|
+
@words << word
|
40
|
+
end
|
41
|
+
|
42
|
+
word_counts
|
43
|
+
end
|
44
|
+
|
45
|
+
def not_duplicates(pairs)
|
46
|
+
pairs.each do |pair|
|
47
|
+
@non_duplicates << pair.map(&:downcase).sort
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def block_words(words)
|
52
|
+
add_nodes(@blocked_words, words)
|
53
|
+
words
|
54
|
+
end
|
55
|
+
|
56
|
+
def prefer(queries)
|
57
|
+
queries.each do |query|
|
58
|
+
@preferred_queries[normalize_query(query)] ||= query
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def suggestions(filter: true)
|
63
|
+
stemmed_queries = {}
|
64
|
+
added_queries = Set.new
|
65
|
+
results = @top_queries.sort_by { |_query, count| -count }.map do |query, count|
|
66
|
+
query = query.to_s
|
67
|
+
|
68
|
+
# TODO do not ignore silently
|
69
|
+
next if query.length < 2
|
70
|
+
|
71
|
+
stemmed_query = normalize_query(query)
|
72
|
+
|
73
|
+
# get preferred term
|
74
|
+
preferred_query = @preferred_queries[stemmed_query]
|
75
|
+
if preferred_query && preferred_query != query
|
76
|
+
original_query, query = query, preferred_query
|
77
|
+
end
|
78
|
+
|
79
|
+
# exclude duplicates
|
80
|
+
duplicate = stemmed_queries[stemmed_query]
|
81
|
+
stemmed_queries[stemmed_query] ||= query
|
82
|
+
|
83
|
+
# also detect possibly misspelled duplicates
|
84
|
+
# TODO use top query as duplicate
|
85
|
+
if !duplicate && query.length > 4
|
86
|
+
edits(query).each do |edited_query|
|
87
|
+
if added_queries.include?(edited_query)
|
88
|
+
duplicate = edited_query
|
89
|
+
break
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
if duplicate && @non_duplicates.include?([duplicate, query].sort)
|
94
|
+
duplicate = nil
|
95
|
+
end
|
96
|
+
added_queries << query unless duplicate
|
97
|
+
|
98
|
+
# find concepts
|
99
|
+
concepts = []
|
100
|
+
@concepts.each do |name, values|
|
101
|
+
concepts << name if values.include?(query)
|
102
|
+
end
|
103
|
+
|
104
|
+
tokens = tokenize(query)
|
105
|
+
|
106
|
+
# exclude misspellings that are not brands
|
107
|
+
misspelling = @words.any? && misspellings?(tokens)
|
108
|
+
|
109
|
+
profane = blocked?(tokens, @profane_words)
|
110
|
+
blocked = blocked?(tokens, @blocked_words)
|
111
|
+
|
112
|
+
notes = []
|
113
|
+
notes << "duplicate of #{duplicate}" if duplicate
|
114
|
+
notes.concat(concepts)
|
115
|
+
notes << "misspelling" if misspelling
|
116
|
+
notes << "profane" if profane
|
117
|
+
notes << "blocked" if blocked
|
118
|
+
notes << "originally #{original_query}" if original_query
|
119
|
+
|
120
|
+
{
|
121
|
+
query: query,
|
122
|
+
original_query: original_query,
|
123
|
+
score: count,
|
124
|
+
duplicate: duplicate,
|
125
|
+
concepts: concepts,
|
126
|
+
misspelling: misspelling,
|
127
|
+
profane: profane,
|
128
|
+
blocked: blocked,
|
129
|
+
notes: notes
|
130
|
+
}
|
131
|
+
end
|
132
|
+
|
133
|
+
results.compact!
|
134
|
+
|
135
|
+
if filter
|
136
|
+
results.filter_map do |s|
|
137
|
+
unless s[:duplicate] || s[:misspelling] || s[:profane] || s[:blocked]
|
138
|
+
s.slice(:query, :score)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
else
|
142
|
+
results
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def table
|
147
|
+
str = "%-30s %5s %s\n" % %w(Query Score Notes)
|
148
|
+
suggestions(filter: false).each do |suggestion|
|
149
|
+
str << "%-30s %5d %s\n" % [suggestion[:query], suggestion[:score], suggestion[:notes].join(", ")]
|
150
|
+
end
|
151
|
+
str
|
152
|
+
end
|
153
|
+
alias_method :pretty_suggestions, :table
|
154
|
+
|
155
|
+
protected
|
156
|
+
|
157
|
+
def misspellings?(tokens)
|
158
|
+
pos = [0]
|
159
|
+
while i = pos.shift
|
160
|
+
return false if i == tokens.size
|
161
|
+
|
162
|
+
if @words.include?(tokens[i])
|
163
|
+
pos << i + 1
|
164
|
+
end
|
165
|
+
|
166
|
+
node = @concept_tree[tokens[i]]
|
167
|
+
j = i
|
168
|
+
while node
|
169
|
+
j += 1
|
170
|
+
pos << j if node[:eos]
|
171
|
+
break if j == tokens.size
|
172
|
+
node = node[tokens[j]]
|
173
|
+
end
|
174
|
+
|
175
|
+
pos.uniq!
|
176
|
+
end
|
177
|
+
true
|
178
|
+
end
|
179
|
+
|
180
|
+
def blocked?(tokens, blocked_words)
|
181
|
+
tokens.each_with_index do |token, i|
|
182
|
+
node = blocked_words[token]
|
183
|
+
j = i
|
184
|
+
while node
|
185
|
+
return true if node[:eos]
|
186
|
+
j += 1
|
187
|
+
break if j == tokens.size
|
188
|
+
node = node[tokens[j]]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
false
|
192
|
+
end
|
193
|
+
|
194
|
+
def tokenize(str)
|
195
|
+
str.to_s.downcase.split(" ")
|
196
|
+
end
|
197
|
+
|
198
|
+
# from https://blog.lojic.com/2008/09/04/how-to-write-a-spelling-corrector-in-ruby/
|
199
|
+
LETTERS = ("a".."z").to_a.join + "'"
|
200
|
+
def edits(word)
|
201
|
+
n = word.length
|
202
|
+
deletion = (0...n).collect { |i| word[0...i] + word[i + 1..-1] }
|
203
|
+
transposition = (0...n - 1).collect { |i| word[0...i] + word[i + 1, 1] + word[i, 1] + word[i + 2..-1] }
|
204
|
+
alteration = []
|
205
|
+
n.times { |i| LETTERS.each_byte { |l| alteration << word[0...i] + l.chr + word[i + 1..-1] } }
|
206
|
+
insertion = []
|
207
|
+
(n + 1).times { |i| LETTERS.each_byte { |l| insertion << word[0...i] + l.chr + word[i..-1] } }
|
208
|
+
deletion + transposition + alteration + insertion
|
209
|
+
end
|
210
|
+
|
211
|
+
def normalize_query(query)
|
212
|
+
tokenize(query.to_s.gsub("&", "and")).map { |q| @stemmer.stem(q) }.sort.join
|
213
|
+
end
|
214
|
+
|
215
|
+
def add_nodes(var, words)
|
216
|
+
words.each do |word|
|
217
|
+
node = var
|
218
|
+
tokenize(word).each do |token|
|
219
|
+
node = (node[token] ||= {})
|
220
|
+
end
|
221
|
+
node[:eos] = true
|
222
|
+
end
|
223
|
+
var
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
data/lib/autosuggest/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
VERSION = "0.
|
1
|
+
module Autosuggest
|
2
|
+
VERSION = "0.3.0"
|
3
3
|
end
|
data/lib/autosuggest.rb
CHANGED
@@ -3,230 +3,15 @@ require "set"
|
|
3
3
|
require "yaml" # for obscenity
|
4
4
|
|
5
5
|
# dependencies
|
6
|
-
require "
|
6
|
+
require "mittens"
|
7
7
|
require "obscenity"
|
8
8
|
|
9
9
|
# modules
|
10
|
-
|
10
|
+
require_relative "autosuggest/generator"
|
11
|
+
require_relative "autosuggest/version"
|
11
12
|
|
12
|
-
|
13
|
-
def
|
14
|
-
|
15
|
-
@concepts = {}
|
16
|
-
@words = Set.new
|
17
|
-
@non_duplicates = Set.new
|
18
|
-
@blocked_words = {}
|
19
|
-
@blacklisted_words = {}
|
20
|
-
@preferred_queries = {}
|
21
|
-
@profane_words = {}
|
22
|
-
@concept_tree = {}
|
23
|
-
add_nodes(@profane_words, Obscenity::Base.blacklist)
|
24
|
-
end
|
25
|
-
|
26
|
-
def add_concept(name, values)
|
27
|
-
values = values.compact.uniq
|
28
|
-
add_nodes(@concept_tree, values)
|
29
|
-
@concepts[name] = Set.new(values.map(&:downcase))
|
30
|
-
end
|
31
|
-
|
32
|
-
def parse_words(phrases, options = {})
|
33
|
-
min = options[:min] || 1
|
34
|
-
|
35
|
-
word_counts = Hash.new(0)
|
36
|
-
phrases.each do |phrase|
|
37
|
-
words = tokenize(phrase)
|
38
|
-
words.each do |word|
|
39
|
-
word_counts[word] += 1
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
word_counts.select { |_, c| c >= min }.each do |word, _|
|
44
|
-
@words << word
|
45
|
-
end
|
46
|
-
|
47
|
-
word_counts
|
48
|
-
end
|
49
|
-
|
50
|
-
def not_duplicates(pairs)
|
51
|
-
pairs.each do |pair|
|
52
|
-
@non_duplicates << pair.map(&:downcase).sort
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def block_words(words)
|
57
|
-
add_nodes(@blocked_words, words)
|
58
|
-
words
|
59
|
-
end
|
60
|
-
|
61
|
-
def blacklist_words(words)
|
62
|
-
warn "[autosuggest] blacklist_words is deprecated. Use block_words instead."
|
63
|
-
add_nodes(@blacklisted_words, words)
|
64
|
-
words
|
65
|
-
end
|
66
|
-
|
67
|
-
def prefer(queries)
|
68
|
-
queries.each do |query|
|
69
|
-
@preferred_queries[normalize_query(query)] ||= query
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
# TODO add queries method for filter: false and make suggestions use filter: true in 0.2.0
|
74
|
-
def suggestions(filter: false)
|
75
|
-
stemmed_queries = {}
|
76
|
-
added_queries = Set.new
|
77
|
-
results = @top_queries.sort_by { |_query, count| -count }.map do |query, count|
|
78
|
-
query = query.to_s
|
79
|
-
|
80
|
-
# TODO do not ignore silently
|
81
|
-
next if query.length < 2
|
82
|
-
|
83
|
-
stemmed_query = normalize_query(query)
|
84
|
-
|
85
|
-
# get preferred term
|
86
|
-
preferred_query = @preferred_queries[stemmed_query]
|
87
|
-
if preferred_query && preferred_query != query
|
88
|
-
original_query, query = query, preferred_query
|
89
|
-
end
|
90
|
-
|
91
|
-
# exclude duplicates
|
92
|
-
duplicate = stemmed_queries[stemmed_query]
|
93
|
-
stemmed_queries[stemmed_query] ||= query
|
94
|
-
|
95
|
-
# also detect possibly misspelled duplicates
|
96
|
-
# TODO use top query as duplicate
|
97
|
-
if !duplicate && query.length > 4
|
98
|
-
edits(query).each do |edited_query|
|
99
|
-
if added_queries.include?(edited_query)
|
100
|
-
duplicate = edited_query
|
101
|
-
break
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
if duplicate && @non_duplicates.include?([duplicate, query].sort)
|
106
|
-
duplicate = nil
|
107
|
-
end
|
108
|
-
added_queries << query unless duplicate
|
109
|
-
|
110
|
-
# find concepts
|
111
|
-
concepts = []
|
112
|
-
@concepts.each do |name, values|
|
113
|
-
concepts << name if values.include?(query)
|
114
|
-
end
|
115
|
-
|
116
|
-
tokens = tokenize(query)
|
117
|
-
|
118
|
-
# exclude misspellings that are not brands
|
119
|
-
misspelling = @words.any? && misspellings?(tokens)
|
120
|
-
|
121
|
-
profane = blocked?(tokens, @profane_words)
|
122
|
-
blocked = blocked?(tokens, @blocked_words)
|
123
|
-
blacklisted = blocked?(tokens, @blacklisted_words)
|
124
|
-
|
125
|
-
notes = []
|
126
|
-
notes << "duplicate of #{duplicate}" if duplicate
|
127
|
-
notes.concat(concepts)
|
128
|
-
notes << "misspelling" if misspelling
|
129
|
-
notes << "profane" if profane
|
130
|
-
notes << "blocked" if blocked
|
131
|
-
notes << "blacklisted" if blacklisted
|
132
|
-
notes << "originally #{original_query}" if original_query
|
133
|
-
|
134
|
-
result = {
|
135
|
-
query: query,
|
136
|
-
original_query: original_query,
|
137
|
-
score: count,
|
138
|
-
duplicate: duplicate,
|
139
|
-
concepts: concepts,
|
140
|
-
misspelling: misspelling,
|
141
|
-
profane: profane,
|
142
|
-
blocked: blocked
|
143
|
-
}
|
144
|
-
result[:blacklisted] = blacklisted if @blacklisted_words.any?
|
145
|
-
result[:notes] = notes
|
146
|
-
result
|
147
|
-
end
|
148
|
-
if filter
|
149
|
-
results.reject! { |s| s[:duplicate] || s[:misspelling] || s[:profane] || s[:blocked] }
|
150
|
-
end
|
151
|
-
results
|
152
|
-
end
|
153
|
-
|
154
|
-
def pretty_suggestions
|
155
|
-
str = "%-30s %5s %s\n" % %w(Query Score Notes)
|
156
|
-
suggestions.each do |suggestion|
|
157
|
-
str << "%-30s %5d %s\n" % [suggestion[:query], suggestion[:score], suggestion[:notes].join(", ")]
|
158
|
-
end
|
159
|
-
str
|
160
|
-
end
|
161
|
-
|
162
|
-
protected
|
163
|
-
|
164
|
-
def misspellings?(tokens)
|
165
|
-
pos = [0]
|
166
|
-
while i = pos.shift
|
167
|
-
return false if i == tokens.size
|
168
|
-
|
169
|
-
if @words.include?(tokens[i])
|
170
|
-
pos << i + 1
|
171
|
-
end
|
172
|
-
|
173
|
-
node = @concept_tree[tokens[i]]
|
174
|
-
j = i
|
175
|
-
while node
|
176
|
-
j += 1
|
177
|
-
pos << j if node[:eos]
|
178
|
-
break if j == tokens.size
|
179
|
-
node = node[tokens[j]]
|
180
|
-
end
|
181
|
-
|
182
|
-
pos.uniq!
|
183
|
-
end
|
184
|
-
true
|
185
|
-
end
|
186
|
-
|
187
|
-
def blocked?(tokens, blocked_words)
|
188
|
-
tokens.each_with_index do |token, i|
|
189
|
-
node = blocked_words[token]
|
190
|
-
j = i
|
191
|
-
while node
|
192
|
-
return true if node[:eos]
|
193
|
-
j += 1
|
194
|
-
break if j == tokens.size
|
195
|
-
node = node[tokens[j]]
|
196
|
-
end
|
197
|
-
end
|
198
|
-
false
|
199
|
-
end
|
200
|
-
|
201
|
-
def tokenize(str)
|
202
|
-
str.to_s.downcase.split(" ")
|
203
|
-
end
|
204
|
-
|
205
|
-
# from https://blog.lojic.com/2008/09/04/how-to-write-a-spelling-corrector-in-ruby/
|
206
|
-
LETTERS = ("a".."z").to_a.join + "'"
|
207
|
-
def edits(word)
|
208
|
-
n = word.length
|
209
|
-
deletion = (0...n).collect { |i| word[0...i] + word[i + 1..-1] }
|
210
|
-
transposition = (0...n - 1).collect { |i| word[0...i] + word[i + 1, 1] + word[i, 1] + word[i + 2..-1] }
|
211
|
-
alteration = []
|
212
|
-
n.times { |i| LETTERS.each_byte { |l| alteration << word[0...i] + l.chr + word[i + 1..-1] } }
|
213
|
-
insertion = []
|
214
|
-
(n + 1).times { |i| LETTERS.each_byte { |l| insertion << word[0...i] + l.chr + word[i..-1] } }
|
215
|
-
deletion + transposition + alteration + insertion
|
216
|
-
end
|
217
|
-
|
218
|
-
def normalize_query(query)
|
219
|
-
tokenize(query.to_s.gsub("&", "and")).map { |q| Lingua.stemmer(q) }.sort.join
|
220
|
-
end
|
221
|
-
|
222
|
-
def add_nodes(var, words)
|
223
|
-
words.each do |word|
|
224
|
-
node = var
|
225
|
-
tokenize(word).each do |token|
|
226
|
-
node = (node[token] ||= {})
|
227
|
-
end
|
228
|
-
node[:eos] = true
|
229
|
-
end
|
230
|
-
var
|
13
|
+
module Autosuggest
|
14
|
+
def self.new(*args, **options)
|
15
|
+
Generator.new(*args, **options)
|
231
16
|
end
|
232
17
|
end
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: autosuggest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: mittens
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
@@ -48,6 +48,7 @@ files:
|
|
48
48
|
- LICENSE.txt
|
49
49
|
- README.md
|
50
50
|
- lib/autosuggest.rb
|
51
|
+
- lib/autosuggest/generator.rb
|
51
52
|
- lib/autosuggest/version.rb
|
52
53
|
- lib/generators/autosuggest/suggestions_generator.rb
|
53
54
|
- lib/generators/autosuggest/templates/migration.rb.tt
|
@@ -64,14 +65,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
64
65
|
requirements:
|
65
66
|
- - ">="
|
66
67
|
- !ruby/object:Gem::Version
|
67
|
-
version: '
|
68
|
+
version: '3.1'
|
68
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
70
|
requirements:
|
70
71
|
- - ">="
|
71
72
|
- !ruby/object:Gem::Version
|
72
73
|
version: '0'
|
73
74
|
requirements: []
|
74
|
-
rubygems_version: 3.
|
75
|
+
rubygems_version: 3.5.9
|
75
76
|
signing_key:
|
76
77
|
specification_version: 4
|
77
78
|
summary: Generate autocomplete suggestions based on what your users search
|