runestone 1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.tm_properties +1 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +10 -0
- data/Rakefile +14 -0
- data/db/migrate/20181101150207_create_ts_tables.rb +31 -0
- data/lib/runestone.rb +103 -0
- data/lib/runestone/active_record/base_methods.rb +135 -0
- data/lib/runestone/active_record/relation_methods.rb +83 -0
- data/lib/runestone/corpus.rb +45 -0
- data/lib/runestone/engine.rb +21 -0
- data/lib/runestone/indexing_job.rb +8 -0
- data/lib/runestone/model.rb +92 -0
- data/lib/runestone/settings.rb +106 -0
- data/lib/runestone/version.rb +3 -0
- data/lib/runestone/web_search.rb +203 -0
- data/lib/runestone/web_search/and.rb +17 -0
- data/lib/runestone/web_search/or.rb +11 -0
- data/lib/runestone/web_search/phrase.rb +19 -0
- data/lib/runestone/web_search/token.rb +27 -0
- data/runestone.gemspec +32 -0
- data/test/corpus_test.rb +42 -0
- data/test/database.rb +119 -0
- data/test/delayed_index_test.rb +34 -0
- data/test/helper_test.rb +40 -0
- data/test/highlight_test.rb +26 -0
- data/test/indexing_test.rb +151 -0
- data/test/multi_index_test.rb +177 -0
- data/test/query_test.rb +129 -0
- data/test/synonym_test.rb +128 -0
- data/test/test_helper.rb +185 -0
- metadata +239 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3756ae6e1dcf043dc6d4ca6e1312a0409ceac0fd15a90e6a7859c7d5cfec0ef9
|
4
|
+
data.tar.gz: a2e9bfd44ae564d10ddd6ee9f25179b267b12979dd7bfe2311825ce2bc348d97
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4c6b1cd82ea806fd419ab2b9986b27d2fabb0d0d898decf69068bb9b824c642d9ef333e8c2f6874ca5151d5250ef5f4d81796cce7813b2bfdb66dbb47ed5a01d
|
7
|
+
data.tar.gz: ec0d633c0eaab16dccea6a340f53f6160e6932a530d6d1398ce3462b9366abbc018548e5789cccf2974c8f45495e78949a865a3b270f09ab7d61b9738a5e7db6
|
data/.gitignore
ADDED
data/.tm_properties
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
exclude = '{$exclude,log,bin,tmp,.tm_properties,public/system,coverage}'
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2020 Jon Bracy
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# Runestone
|
2
|
+
|
3
|
+
Runestone provides full text search PostgreSQL's full text search capabilities.
|
4
|
+
It was inspired by [Postgres full-text search is Good Enough!][1] and
|
5
|
+
[Super Fuzzy Searching on PostgreSQL][2]
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
[1]: http://rachbelaid.com/postgres-full-text-search-is-good-enough/
|
10
|
+
[2]: http://www.www-old.bartlettpublishing.com/site/bartpub/blog/3/entry/350
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
Bundler.require(:development)
|
4
|
+
|
5
|
+
require 'fileutils'
|
6
|
+
require "rake/testtask"
|
7
|
+
|
8
|
+
# Test Task
|
9
|
+
Rake::TestTask.new do |t|
|
10
|
+
t.libs << 'lib' << 'test'
|
11
|
+
t.test_files = FileList['test/**/*_test.rb']
|
12
|
+
# t.warning = true
|
13
|
+
# t.verbose = true
|
14
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
class CreateRunestoneTables < ActiveRecord::Migration[6.0]
|
2
|
+
|
3
|
+
def change
|
4
|
+
enable_extension 'pgcrypto'
|
5
|
+
enable_extension 'pg_trgm'
|
6
|
+
enable_extension 'fuzzystrmatch'
|
7
|
+
|
8
|
+
create_table :runestones, id: :uuid do |t|
|
9
|
+
t.belongs_to :record, type: :uuid, polymorphic: true, null: false
|
10
|
+
t.string :name
|
11
|
+
t.string :dictionary
|
12
|
+
t.jsonb :data, null: false
|
13
|
+
t.tsvector :vector, null: false
|
14
|
+
end
|
15
|
+
|
16
|
+
add_index :runestones, [:record_type, :record_id, :name, :dictionary], unique: true, name: 'index_runestones_for_uniqueness'
|
17
|
+
add_index :runestones, :vector, using: :gin
|
18
|
+
|
19
|
+
execute <<-SQL
|
20
|
+
CREATE TABLE runestone_corpus ( word varchar, CONSTRAINT word UNIQUE(word) );
|
21
|
+
|
22
|
+
CREATE INDEX runestone_corpus_trgm_idx ON runestone_corpus USING GIN (word gin_trgm_ops);
|
23
|
+
|
24
|
+
CREATE TEXT SEARCH CONFIGURATION simple_unaccent (COPY = simple);
|
25
|
+
ALTER TEXT SEARCH CONFIGURATION simple_unaccent
|
26
|
+
ALTER MAPPING FOR hword, hword_part, word
|
27
|
+
WITH unaccent, simple;
|
28
|
+
SQL
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
data/lib/runestone.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'arel/extensions'
|
2
|
+
|
3
|
+
module Runestone
|
4
|
+
autoload :Model, "#{File.dirname(__FILE__)}/runestone/model"
|
5
|
+
autoload :Settings, "#{File.dirname(__FILE__)}/runestone/settings"
|
6
|
+
autoload :WebSearch, "#{File.dirname(__FILE__)}/runestone/web_search"
|
7
|
+
autoload :IndexingJob, "#{File.dirname(__FILE__)}/runestone/indexing_job"
|
8
|
+
|
9
|
+
mattr_accessor :dictionary, default: :simple_unaccent
|
10
|
+
mattr_accessor :runner, default: :inline
|
11
|
+
mattr_accessor :job_queue, default: :runestone_indexing
|
12
|
+
mattr_accessor :typo_tolerances, default: { 1 => 4..7, 2 => 8.. }
|
13
|
+
|
14
|
+
mattr_reader :synonyms do
|
15
|
+
{ }
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.normalize(string)
|
19
|
+
string = string.downcase
|
20
|
+
string = string.unicode_normalize!
|
21
|
+
string
|
22
|
+
rescue Encoding::CompatibilityError
|
23
|
+
string
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.add_synonyms(dictionary)
|
27
|
+
dictionary.each do |k, v|
|
28
|
+
add_synonym(k, *v)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.add_synonym(word, *replacements)
|
33
|
+
word = normalize(word)
|
34
|
+
replacements.map! { |r| normalize(r) }
|
35
|
+
|
36
|
+
word = word.split(/\s+/)
|
37
|
+
last = word.pop
|
38
|
+
|
39
|
+
syn = synonyms
|
40
|
+
word.each do |w|
|
41
|
+
syn = if syn.has_key?(w) && h = syn[w].find { |i| i.is_a?(Hash) }
|
42
|
+
h
|
43
|
+
else
|
44
|
+
h = {}
|
45
|
+
syn[w] ||= []
|
46
|
+
syn[w] << h
|
47
|
+
h
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
syn[last] ||= []
|
52
|
+
syn[last] += replacements
|
53
|
+
syn[last].uniq!
|
54
|
+
end
|
55
|
+
|
56
|
+
def search(query, dictionary: nil, prefix: :last)
|
57
|
+
exact_search = Runestone::WebSearch.parse(query, prefix: prefix)
|
58
|
+
typo_search = exact_search.typos
|
59
|
+
syn_search = typo_search.synonymize
|
60
|
+
|
61
|
+
tsqueries = [exact_search, typo_search, syn_search].map(&:to_s).uniq.map do |q|
|
62
|
+
ts_query(q, dictionary: dictionary)
|
63
|
+
end
|
64
|
+
|
65
|
+
q = if select_values.empty?
|
66
|
+
select(
|
67
|
+
klass.arel_table[Arel.star],
|
68
|
+
*tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
|
69
|
+
)
|
70
|
+
else
|
71
|
+
select(
|
72
|
+
*tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
|
73
|
+
)
|
74
|
+
end
|
75
|
+
|
76
|
+
q = if klass == Runestone::Model
|
77
|
+
q.where(ts_match(:vector, tsqueries.last, dictionary: dictionary))
|
78
|
+
else
|
79
|
+
q.joins(:runestones).where(ts_match(TS::Model.arel_table['vector'], tsqueries.last, dictionary: dictionary))
|
80
|
+
end
|
81
|
+
|
82
|
+
q = q.where(dictionary: dictionary) if dictionary
|
83
|
+
|
84
|
+
q.order(
|
85
|
+
*tsqueries.each_with_index.map { |q, i| Arel::Nodes::Descending.new(Arel::Nodes::SqlLiteral.new("rank#{i}")) }
|
86
|
+
)
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
require 'runestone/corpus'
|
93
|
+
require 'runestone/active_record/base_methods'
|
94
|
+
require 'runestone/active_record/relation_methods'
|
95
|
+
|
96
|
+
require 'active_record'
|
97
|
+
require 'active_record/relation'
|
98
|
+
require 'active_record/querying'
|
99
|
+
ActiveRecord::Base.include Runestone::ActiveRecord::BaseMethods
|
100
|
+
ActiveRecord::Relation.include Runestone::ActiveRecord::RelationMethods
|
101
|
+
ActiveRecord::Querying.delegate :search, to: :all
|
102
|
+
|
103
|
+
require 'runestone/engine' if defined?(Rails)
|
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'active_support/concern'
|
2
|
+
|
3
|
+
module Runestone::ActiveRecord
|
4
|
+
module BaseMethods
|
5
|
+
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
included do
|
9
|
+
class_attribute :runestone_settings, instance_accessor: true
|
10
|
+
end
|
11
|
+
|
12
|
+
class_methods do
|
13
|
+
|
14
|
+
def runestone(name: :default, dictionary: nil, runner: nil, &block)
|
15
|
+
runner ||= Runestone.runner
|
16
|
+
dictionary ||= Runestone.dictionary
|
17
|
+
|
18
|
+
if self.runestone_settings.nil?
|
19
|
+
self.runestone_settings = {}
|
20
|
+
|
21
|
+
class_eval do
|
22
|
+
has_many :runestones, class_name: 'Runestone::Model', as: :record, dependent: :destroy
|
23
|
+
|
24
|
+
case runner
|
25
|
+
when :active_job
|
26
|
+
after_commit :create_runestones, on: :create
|
27
|
+
after_commit :update_runestones, on: :update
|
28
|
+
else
|
29
|
+
after_create :create_runestones!
|
30
|
+
after_update :update_runestones!
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
self.runestone_settings[name] ||= []
|
36
|
+
self.runestone_settings[name] << Runestone::Settings.new(base_class.name, name: name, dictionary: dictionary, &block)
|
37
|
+
end
|
38
|
+
|
39
|
+
def reindex!
|
40
|
+
conn = Runestone::Model.connection
|
41
|
+
model_table = conn.quote_table_name(table_name)
|
42
|
+
|
43
|
+
conn.execute(<<-SQL)
|
44
|
+
DELETE FROM runestones
|
45
|
+
USING runestones AS t2
|
46
|
+
LEFT OUTER JOIN #{model_table} ON
|
47
|
+
t2.record_type = #{conn.quote(base_class.name)}
|
48
|
+
AND t2.record_id = #{model_table}.id
|
49
|
+
WHERE runestones.record_type = #{conn.quote(base_class.name)}
|
50
|
+
AND runestones.record_id = t2.record_id
|
51
|
+
AND #{model_table}.id IS NULL;
|
52
|
+
SQL
|
53
|
+
|
54
|
+
find_each(&:update_runestones!)
|
55
|
+
end
|
56
|
+
|
57
|
+
def highlights(name: :default, dictionary: nil)
|
58
|
+
dictionary ||= Runestone.dictionary
|
59
|
+
|
60
|
+
rsettings = self.runestone_settings[name].find { |s| s.dictionary.to_s == dictionary.to_s}
|
61
|
+
@highlights ||= highlight_indexes(rsettings.indexes.values.flatten.map{ |i| i.to_s.split('.') })
|
62
|
+
end
|
63
|
+
|
64
|
+
def highlight_indexes(indexes)
|
65
|
+
str = {}
|
66
|
+
indexes.sort.group_by { |i| i[0] }.each do |key, value|
|
67
|
+
value.each(&:shift)
|
68
|
+
value.reject!(&:empty?)
|
69
|
+
str[key] = value.empty? ? true : highlight_indexes(value)
|
70
|
+
end
|
71
|
+
str
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
def create_runestones
|
77
|
+
Runestone::IndexingJob.perform_later(self, :create_runestones!)
|
78
|
+
end
|
79
|
+
|
80
|
+
def create_runestones!
|
81
|
+
conn = Runestone::Model.connection
|
82
|
+
self.runestone_settings.each do |index_name, settings|
|
83
|
+
settings.each do |setting|
|
84
|
+
rdata = setting.extract_attributes(self)
|
85
|
+
|
86
|
+
ts_column_names = %w(record_type record_id name dictionary data vector).map { |name| conn.quote_column_name(name) }
|
87
|
+
ts_values = [
|
88
|
+
conn.quote(conn.send(:type_map).lookup('varchar').serialize(self.class.base_class.name)),
|
89
|
+
conn.quote(conn.send(:type_map).lookup('uuid').serialize(id)),
|
90
|
+
index_name == :default ? 'NULL' : conn.quote(conn.send(:type_map).lookup('varchar').serialize(index_name.to_s)),
|
91
|
+
conn.quote(conn.send(:type_map).lookup('varchar').serialize(setting.dictionary)),
|
92
|
+
conn.quote(conn.send(:type_map).lookup('jsonb').serialize(rdata)),
|
93
|
+
setting.vectorize(rdata).join(' || ')
|
94
|
+
]
|
95
|
+
conn.execute(<<-SQL)
|
96
|
+
INSERT INTO #{Runestone::Model.quoted_table_name} (#{ts_column_names.join(",")})
|
97
|
+
VALUES (#{ts_values.join(',')})
|
98
|
+
SQL
|
99
|
+
|
100
|
+
Runestone::Corpus.add(*setting.corpus(rdata))
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def update_runestones
|
106
|
+
Runestone::IndexingJob.preform_later(self, :update_runestones!)
|
107
|
+
end
|
108
|
+
|
109
|
+
def update_runestones!
|
110
|
+
conn = Runestone::Model.connection
|
111
|
+
self.runestone_settings.each do |index_name, settings|
|
112
|
+
settings.each do |setting|
|
113
|
+
rdata = setting.extract_attributes(self)
|
114
|
+
|
115
|
+
if conn.execute(<<-SQL).cmd_tuples == 0
|
116
|
+
UPDATE #{Runestone::Model.quoted_table_name}
|
117
|
+
SET
|
118
|
+
data = #{conn.quote(conn.send(:type_map).lookup('jsonb').serialize(rdata))},
|
119
|
+
vector = #{setting.vectorize(rdata).join(' || ')}
|
120
|
+
WHERE record_type = #{conn.quote(conn.send(:type_map).lookup('varchar').serialize(self.class.base_class.name))}
|
121
|
+
AND record_id = #{conn.quote(conn.send(:type_map).lookup('integer').serialize(id))}
|
122
|
+
AND name #{index_name == :default ? 'IS NULL' : "= " + conn.quote(conn.send(:type_map).lookup('integer').serialize(index_name))}
|
123
|
+
AND dictionary = #{conn.quote(conn.send(:type_map).lookup('integer').serialize(setting.dictionary))}
|
124
|
+
SQL
|
125
|
+
create_runestones!
|
126
|
+
else
|
127
|
+
Runestone::Corpus.add(*setting.corpus(rdata))
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Runestone::ActiveRecord
|
2
|
+
module RelationMethods
|
3
|
+
|
4
|
+
def ts_query(query, dictionary: nil)
|
5
|
+
dictionary ||= Runestone.dictionary
|
6
|
+
|
7
|
+
if query.is_a?(Arel::Nodes::TSQuery)
|
8
|
+
query
|
9
|
+
else
|
10
|
+
Arel::Nodes::TSQuery.new(query, language: dictionary)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def ts_vector(column_name, dictionary: nil)
|
15
|
+
# if column_name.is_a?(String) || column_name.is_a?(Symbol)
|
16
|
+
# column = columns_hash[column_name.to_s]
|
17
|
+
# if column.type == :tsvector
|
18
|
+
# arel_table[column.name]
|
19
|
+
# else
|
20
|
+
# Arel::Nodes::TSVector.new(arel_table[column.name], language)
|
21
|
+
# end
|
22
|
+
# else
|
23
|
+
# column_name
|
24
|
+
# end
|
25
|
+
Runestone::Model.arel_table[:vector]
|
26
|
+
end
|
27
|
+
|
28
|
+
def ts_match(vector, query, dictionary: nil)
|
29
|
+
Arel::Nodes::TSMatch.new(
|
30
|
+
ts_vector(vector, dictionary: dictionary),
|
31
|
+
ts_query(query, dictionary: dictionary)
|
32
|
+
)
|
33
|
+
end
|
34
|
+
|
35
|
+
def ts_rank(vector, query, dictionary: nil)
|
36
|
+
Arel::Nodes::TSRank.new(
|
37
|
+
ts_vector(vector, dictionary: dictionary),
|
38
|
+
ts_query(query, dictionary: dictionary)
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def ts_rank_cd(vector, query, dictionary: nil)
|
43
|
+
Arel::Nodes::TSRankCD.new(
|
44
|
+
ts_vector(vector, dictionary: dictionary),
|
45
|
+
ts_query(query, dictionary: dictionary)
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
def search(query, dictionary: nil, prefix: nil)
|
50
|
+
exact_search = Runestone::WebSearch.parse(query, prefix: prefix)
|
51
|
+
typo_search = exact_search.typos
|
52
|
+
syn_search = typo_search.synonymize
|
53
|
+
|
54
|
+
tsqueries = [exact_search, typo_search, syn_search].map(&:to_s).uniq.map do |q|
|
55
|
+
ts_query(q, dictionary: dictionary)
|
56
|
+
end
|
57
|
+
|
58
|
+
q = if select_values.empty?
|
59
|
+
select(
|
60
|
+
klass.arel_table[Arel.star],
|
61
|
+
*tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
|
62
|
+
)
|
63
|
+
else
|
64
|
+
select(
|
65
|
+
*tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
|
66
|
+
)
|
67
|
+
end
|
68
|
+
|
69
|
+
q = if klass == Runestone::Model
|
70
|
+
q.where(ts_match(:vector, tsqueries.last, dictionary: dictionary))
|
71
|
+
else
|
72
|
+
q.joins(:runestones).where(ts_match(Runestone::Model.arel_table['vector'], tsqueries.last, dictionary: dictionary))
|
73
|
+
end
|
74
|
+
|
75
|
+
q = q.where(dictionary: dictionary) if dictionary
|
76
|
+
|
77
|
+
q.order(
|
78
|
+
*tsqueries.each_with_index.map { |q, i| Arel::Nodes::Descending.new(Arel::Nodes::SqlLiteral.new("rank#{i}")) }
|
79
|
+
)
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Runestone::Corpus
|
2
|
+
|
3
|
+
def self.add(*words)
|
4
|
+
return if words.size == 0
|
5
|
+
|
6
|
+
conn = Runestone::Model.connection
|
7
|
+
conn.execute(<<-SQL)
|
8
|
+
INSERT INTO runestone_corpus ( word )
|
9
|
+
VALUES (#{words.map { |w| conn.quote(w.downcase) }.join('),(')})
|
10
|
+
ON CONFLICT DO NOTHING
|
11
|
+
SQL
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.similar_words(*words)
|
15
|
+
lut = {}
|
16
|
+
words = words.inject([]) do |ws, w|
|
17
|
+
tt = typo_tolerance(w)
|
18
|
+
ws << "#{Runestone::Model.connection.quote(w)}, #{Runestone::Model.connection.quote(w.downcase)}, #{tt}" if tt > 0
|
19
|
+
ws
|
20
|
+
end
|
21
|
+
return lut if words.size == 0
|
22
|
+
|
23
|
+
result = Runestone::Model.connection.execute(<<-SQL)
|
24
|
+
WITH tokens (token, token_downcased, typo_tolerance) AS (VALUES (#{words.join('), (')}))
|
25
|
+
SELECT token, word, levenshtein(runestone_corpus.word, tokens.token_downcased)
|
26
|
+
FROM tokens
|
27
|
+
JOIN runestone_corpus ON runestone_corpus.word % tokens.token_downcased
|
28
|
+
WHERE
|
29
|
+
runestone_corpus.word != tokens.token_downcased
|
30
|
+
AND levenshtein(runestone_corpus.word, tokens.token_downcased) <= tokens.typo_tolerance
|
31
|
+
SQL
|
32
|
+
result.each_row do |t, w, l|
|
33
|
+
w.gsub!(/\(|\)|:|\||!|\&|\*/, '')
|
34
|
+
next if w == t
|
35
|
+
lut[t] ||= []
|
36
|
+
lut[t] << w
|
37
|
+
end
|
38
|
+
lut
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.typo_tolerance(word)
|
42
|
+
Runestone.typo_tolerances.find { |k,v| v.member?(word.length) }&.first || 0
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|