runestone 1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3756ae6e1dcf043dc6d4ca6e1312a0409ceac0fd15a90e6a7859c7d5cfec0ef9
4
+ data.tar.gz: a2e9bfd44ae564d10ddd6ee9f25179b267b12979dd7bfe2311825ce2bc348d97
5
+ SHA512:
6
+ metadata.gz: 4c6b1cd82ea806fd419ab2b9986b27d2fabb0d0d898decf69068bb9b824c642d9ef333e8c2f6874ca5151d5250ef5f4d81796cce7813b2bfdb66dbb47ed5a01d
7
+ data.tar.gz: ec0d633c0eaab16dccea6a340f53f6160e6932a530d6d1398ce3462b9366abbc018548e5789cccf2974c8f45495e78949a865a3b270f09ab7d61b9738a5e7db6
@@ -0,0 +1,4 @@
1
+ coverage
2
+ .byebug_history
3
+ *.gem
4
+ Gemfile.lock
@@ -0,0 +1 @@
1
+ exclude = '{$exclude,log,bin,tmp,.tm_properties,public/system,coverage}'
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in sunstone.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2020 Jon Bracy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,10 @@
1
+ # Runestone
2
+
3
+ Runestone provides full text search PostgreSQL's full text search capabilities.
4
+ It was inspired by [Postgres full-text search is Good Enough!][1] and
5
+ [Super Fuzzy Searching on PostgreSQL][2]
6
+
7
+
8
+
9
+ [1]: http://rachbelaid.com/postgres-full-text-search-is-good-enough/
10
+ [2]: http://www.www-old.bartlettpublishing.com/site/bartpub/blog/3/entry/350
@@ -0,0 +1,14 @@
1
+ require 'bundler/setup'
2
+ require "bundler/gem_tasks"
3
+ Bundler.require(:development)
4
+
5
+ require 'fileutils'
6
+ require "rake/testtask"
7
+
8
+ # Test Task
9
+ Rake::TestTask.new do |t|
10
+ t.libs << 'lib' << 'test'
11
+ t.test_files = FileList['test/**/*_test.rb']
12
+ # t.warning = true
13
+ # t.verbose = true
14
+ end
@@ -0,0 +1,31 @@
1
+ class CreateRunestoneTables < ActiveRecord::Migration[6.0]
2
+
3
+ def change
4
+ enable_extension 'pgcrypto'
5
+ enable_extension 'pg_trgm'
6
+ enable_extension 'fuzzystrmatch'
7
+
8
+ create_table :runestones, id: :uuid do |t|
9
+ t.belongs_to :record, type: :uuid, polymorphic: true, null: false
10
+ t.string :name
11
+ t.string :dictionary
12
+ t.jsonb :data, null: false
13
+ t.tsvector :vector, null: false
14
+ end
15
+
16
+ add_index :runestones, [:record_type, :record_id, :name, :dictionary], unique: true, name: 'index_runestones_for_uniqueness'
17
+ add_index :runestones, :vector, using: :gin
18
+
19
+ execute <<-SQL
20
+ CREATE TABLE runestone_corpus ( word varchar, CONSTRAINT word UNIQUE(word) );
21
+
22
+ CREATE INDEX runestone_corpus_trgm_idx ON runestone_corpus USING GIN (word gin_trgm_ops);
23
+
24
+ CREATE TEXT SEARCH CONFIGURATION simple_unaccent (COPY = simple);
25
+ ALTER TEXT SEARCH CONFIGURATION simple_unaccent
26
+ ALTER MAPPING FOR hword, hword_part, word
27
+ WITH unaccent, simple;
28
+ SQL
29
+ end
30
+
31
+ end
@@ -0,0 +1,103 @@
1
+ require 'arel/extensions'
2
+
3
+ module Runestone
4
+ autoload :Model, "#{File.dirname(__FILE__)}/runestone/model"
5
+ autoload :Settings, "#{File.dirname(__FILE__)}/runestone/settings"
6
+ autoload :WebSearch, "#{File.dirname(__FILE__)}/runestone/web_search"
7
+ autoload :IndexingJob, "#{File.dirname(__FILE__)}/runestone/indexing_job"
8
+
9
+ mattr_accessor :dictionary, default: :simple_unaccent
10
+ mattr_accessor :runner, default: :inline
11
+ mattr_accessor :job_queue, default: :runestone_indexing
12
+ mattr_accessor :typo_tolerances, default: { 1 => 4..7, 2 => 8.. }
13
+
14
+ mattr_reader :synonyms do
15
+ { }
16
+ end
17
+
18
+ def self.normalize(string)
19
+ string = string.downcase
20
+ string = string.unicode_normalize!
21
+ string
22
+ rescue Encoding::CompatibilityError
23
+ string
24
+ end
25
+
26
+ def self.add_synonyms(dictionary)
27
+ dictionary.each do |k, v|
28
+ add_synonym(k, *v)
29
+ end
30
+ end
31
+
32
+ def self.add_synonym(word, *replacements)
33
+ word = normalize(word)
34
+ replacements.map! { |r| normalize(r) }
35
+
36
+ word = word.split(/\s+/)
37
+ last = word.pop
38
+
39
+ syn = synonyms
40
+ word.each do |w|
41
+ syn = if syn.has_key?(w) && h = syn[w].find { |i| i.is_a?(Hash) }
42
+ h
43
+ else
44
+ h = {}
45
+ syn[w] ||= []
46
+ syn[w] << h
47
+ h
48
+ end
49
+ end
50
+
51
+ syn[last] ||= []
52
+ syn[last] += replacements
53
+ syn[last].uniq!
54
+ end
55
+
56
+ def search(query, dictionary: nil, prefix: :last)
57
+ exact_search = Runestone::WebSearch.parse(query, prefix: prefix)
58
+ typo_search = exact_search.typos
59
+ syn_search = typo_search.synonymize
60
+
61
+ tsqueries = [exact_search, typo_search, syn_search].map(&:to_s).uniq.map do |q|
62
+ ts_query(q, dictionary: dictionary)
63
+ end
64
+
65
+ q = if select_values.empty?
66
+ select(
67
+ klass.arel_table[Arel.star],
68
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
69
+ )
70
+ else
71
+ select(
72
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
73
+ )
74
+ end
75
+
76
+ q = if klass == Runestone::Model
77
+ q.where(ts_match(:vector, tsqueries.last, dictionary: dictionary))
78
+ else
79
+ q.joins(:runestones).where(ts_match(TS::Model.arel_table['vector'], tsqueries.last, dictionary: dictionary))
80
+ end
81
+
82
+ q = q.where(dictionary: dictionary) if dictionary
83
+
84
+ q.order(
85
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::Descending.new(Arel::Nodes::SqlLiteral.new("rank#{i}")) }
86
+ )
87
+ end
88
+
89
+
90
+ end
91
+
92
+ require 'runestone/corpus'
93
+ require 'runestone/active_record/base_methods'
94
+ require 'runestone/active_record/relation_methods'
95
+
96
+ require 'active_record'
97
+ require 'active_record/relation'
98
+ require 'active_record/querying'
99
+ ActiveRecord::Base.include Runestone::ActiveRecord::BaseMethods
100
+ ActiveRecord::Relation.include Runestone::ActiveRecord::RelationMethods
101
+ ActiveRecord::Querying.delegate :search, to: :all
102
+
103
+ require 'runestone/engine' if defined?(Rails)
@@ -0,0 +1,135 @@
1
+ require 'active_support/concern'
2
+
3
+ module Runestone::ActiveRecord
4
+ module BaseMethods
5
+
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ class_attribute :runestone_settings, instance_accessor: true
10
+ end
11
+
12
+ class_methods do
13
+
14
+ def runestone(name: :default, dictionary: nil, runner: nil, &block)
15
+ runner ||= Runestone.runner
16
+ dictionary ||= Runestone.dictionary
17
+
18
+ if self.runestone_settings.nil?
19
+ self.runestone_settings = {}
20
+
21
+ class_eval do
22
+ has_many :runestones, class_name: 'Runestone::Model', as: :record, dependent: :destroy
23
+
24
+ case runner
25
+ when :active_job
26
+ after_commit :create_runestones, on: :create
27
+ after_commit :update_runestones, on: :update
28
+ else
29
+ after_create :create_runestones!
30
+ after_update :update_runestones!
31
+ end
32
+ end
33
+ end
34
+
35
+ self.runestone_settings[name] ||= []
36
+ self.runestone_settings[name] << Runestone::Settings.new(base_class.name, name: name, dictionary: dictionary, &block)
37
+ end
38
+
39
+ def reindex!
40
+ conn = Runestone::Model.connection
41
+ model_table = conn.quote_table_name(table_name)
42
+
43
+ conn.execute(<<-SQL)
44
+ DELETE FROM runestones
45
+ USING runestones AS t2
46
+ LEFT OUTER JOIN #{model_table} ON
47
+ t2.record_type = #{conn.quote(base_class.name)}
48
+ AND t2.record_id = #{model_table}.id
49
+ WHERE runestones.record_type = #{conn.quote(base_class.name)}
50
+ AND runestones.record_id = t2.record_id
51
+ AND #{model_table}.id IS NULL;
52
+ SQL
53
+
54
+ find_each(&:update_runestones!)
55
+ end
56
+
57
+ def highlights(name: :default, dictionary: nil)
58
+ dictionary ||= Runestone.dictionary
59
+
60
+ rsettings = self.runestone_settings[name].find { |s| s.dictionary.to_s == dictionary.to_s}
61
+ @highlights ||= highlight_indexes(rsettings.indexes.values.flatten.map{ |i| i.to_s.split('.') })
62
+ end
63
+
64
+ def highlight_indexes(indexes)
65
+ str = {}
66
+ indexes.sort.group_by { |i| i[0] }.each do |key, value|
67
+ value.each(&:shift)
68
+ value.reject!(&:empty?)
69
+ str[key] = value.empty? ? true : highlight_indexes(value)
70
+ end
71
+ str
72
+ end
73
+
74
+ end
75
+
76
+ def create_runestones
77
+ Runestone::IndexingJob.perform_later(self, :create_runestones!)
78
+ end
79
+
80
+ def create_runestones!
81
+ conn = Runestone::Model.connection
82
+ self.runestone_settings.each do |index_name, settings|
83
+ settings.each do |setting|
84
+ rdata = setting.extract_attributes(self)
85
+
86
+ ts_column_names = %w(record_type record_id name dictionary data vector).map { |name| conn.quote_column_name(name) }
87
+ ts_values = [
88
+ conn.quote(conn.send(:type_map).lookup('varchar').serialize(self.class.base_class.name)),
89
+ conn.quote(conn.send(:type_map).lookup('uuid').serialize(id)),
90
+ index_name == :default ? 'NULL' : conn.quote(conn.send(:type_map).lookup('varchar').serialize(index_name.to_s)),
91
+ conn.quote(conn.send(:type_map).lookup('varchar').serialize(setting.dictionary)),
92
+ conn.quote(conn.send(:type_map).lookup('jsonb').serialize(rdata)),
93
+ setting.vectorize(rdata).join(' || ')
94
+ ]
95
+ conn.execute(<<-SQL)
96
+ INSERT INTO #{Runestone::Model.quoted_table_name} (#{ts_column_names.join(",")})
97
+ VALUES (#{ts_values.join(',')})
98
+ SQL
99
+
100
+ Runestone::Corpus.add(*setting.corpus(rdata))
101
+ end
102
+ end
103
+ end
104
+
105
+ def update_runestones
106
+ Runestone::IndexingJob.preform_later(self, :update_runestones!)
107
+ end
108
+
109
+ def update_runestones!
110
+ conn = Runestone::Model.connection
111
+ self.runestone_settings.each do |index_name, settings|
112
+ settings.each do |setting|
113
+ rdata = setting.extract_attributes(self)
114
+
115
+ if conn.execute(<<-SQL).cmd_tuples == 0
116
+ UPDATE #{Runestone::Model.quoted_table_name}
117
+ SET
118
+ data = #{conn.quote(conn.send(:type_map).lookup('jsonb').serialize(rdata))},
119
+ vector = #{setting.vectorize(rdata).join(' || ')}
120
+ WHERE record_type = #{conn.quote(conn.send(:type_map).lookup('varchar').serialize(self.class.base_class.name))}
121
+ AND record_id = #{conn.quote(conn.send(:type_map).lookup('integer').serialize(id))}
122
+ AND name #{index_name == :default ? 'IS NULL' : "= " + conn.quote(conn.send(:type_map).lookup('integer').serialize(index_name))}
123
+ AND dictionary = #{conn.quote(conn.send(:type_map).lookup('integer').serialize(setting.dictionary))}
124
+ SQL
125
+ create_runestones!
126
+ else
127
+ Runestone::Corpus.add(*setting.corpus(rdata))
128
+ end
129
+
130
+ end
131
+ end
132
+ end
133
+
134
+ end
135
+ end
@@ -0,0 +1,83 @@
1
+ module Runestone::ActiveRecord
2
+ module RelationMethods
3
+
4
+ def ts_query(query, dictionary: nil)
5
+ dictionary ||= Runestone.dictionary
6
+
7
+ if query.is_a?(Arel::Nodes::TSQuery)
8
+ query
9
+ else
10
+ Arel::Nodes::TSQuery.new(query, language: dictionary)
11
+ end
12
+ end
13
+
14
+ def ts_vector(column_name, dictionary: nil)
15
+ # if column_name.is_a?(String) || column_name.is_a?(Symbol)
16
+ # column = columns_hash[column_name.to_s]
17
+ # if column.type == :tsvector
18
+ # arel_table[column.name]
19
+ # else
20
+ # Arel::Nodes::TSVector.new(arel_table[column.name], language)
21
+ # end
22
+ # else
23
+ # column_name
24
+ # end
25
+ Runestone::Model.arel_table[:vector]
26
+ end
27
+
28
+ def ts_match(vector, query, dictionary: nil)
29
+ Arel::Nodes::TSMatch.new(
30
+ ts_vector(vector, dictionary: dictionary),
31
+ ts_query(query, dictionary: dictionary)
32
+ )
33
+ end
34
+
35
+ def ts_rank(vector, query, dictionary: nil)
36
+ Arel::Nodes::TSRank.new(
37
+ ts_vector(vector, dictionary: dictionary),
38
+ ts_query(query, dictionary: dictionary)
39
+ )
40
+ end
41
+
42
+ def ts_rank_cd(vector, query, dictionary: nil)
43
+ Arel::Nodes::TSRankCD.new(
44
+ ts_vector(vector, dictionary: dictionary),
45
+ ts_query(query, dictionary: dictionary)
46
+ )
47
+ end
48
+
49
+ def search(query, dictionary: nil, prefix: nil)
50
+ exact_search = Runestone::WebSearch.parse(query, prefix: prefix)
51
+ typo_search = exact_search.typos
52
+ syn_search = typo_search.synonymize
53
+
54
+ tsqueries = [exact_search, typo_search, syn_search].map(&:to_s).uniq.map do |q|
55
+ ts_query(q, dictionary: dictionary)
56
+ end
57
+
58
+ q = if select_values.empty?
59
+ select(
60
+ klass.arel_table[Arel.star],
61
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
62
+ )
63
+ else
64
+ select(
65
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
66
+ )
67
+ end
68
+
69
+ q = if klass == Runestone::Model
70
+ q.where(ts_match(:vector, tsqueries.last, dictionary: dictionary))
71
+ else
72
+ q.joins(:runestones).where(ts_match(Runestone::Model.arel_table['vector'], tsqueries.last, dictionary: dictionary))
73
+ end
74
+
75
+ q = q.where(dictionary: dictionary) if dictionary
76
+
77
+ q.order(
78
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::Descending.new(Arel::Nodes::SqlLiteral.new("rank#{i}")) }
79
+ )
80
+ end
81
+
82
+ end
83
+ end
@@ -0,0 +1,45 @@
1
+ module Runestone::Corpus
2
+
3
+ def self.add(*words)
4
+ return if words.size == 0
5
+
6
+ conn = Runestone::Model.connection
7
+ conn.execute(<<-SQL)
8
+ INSERT INTO runestone_corpus ( word )
9
+ VALUES (#{words.map { |w| conn.quote(w.downcase) }.join('),(')})
10
+ ON CONFLICT DO NOTHING
11
+ SQL
12
+ end
13
+
14
+ def self.similar_words(*words)
15
+ lut = {}
16
+ words = words.inject([]) do |ws, w|
17
+ tt = typo_tolerance(w)
18
+ ws << "#{Runestone::Model.connection.quote(w)}, #{Runestone::Model.connection.quote(w.downcase)}, #{tt}" if tt > 0
19
+ ws
20
+ end
21
+ return lut if words.size == 0
22
+
23
+ result = Runestone::Model.connection.execute(<<-SQL)
24
+ WITH tokens (token, token_downcased, typo_tolerance) AS (VALUES (#{words.join('), (')}))
25
+ SELECT token, word, levenshtein(runestone_corpus.word, tokens.token_downcased)
26
+ FROM tokens
27
+ JOIN runestone_corpus ON runestone_corpus.word % tokens.token_downcased
28
+ WHERE
29
+ runestone_corpus.word != tokens.token_downcased
30
+ AND levenshtein(runestone_corpus.word, tokens.token_downcased) <= tokens.typo_tolerance
31
+ SQL
32
+ result.each_row do |t, w, l|
33
+ w.gsub!(/\(|\)|:|\||!|\&|\*/, '')
34
+ next if w == t
35
+ lut[t] ||= []
36
+ lut[t] << w
37
+ end
38
+ lut
39
+ end
40
+
41
+ def self.typo_tolerance(word)
42
+ Runestone.typo_tolerances.find { |k,v| v.member?(word.length) }&.first || 0
43
+ end
44
+
45
+ end