runestone 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3756ae6e1dcf043dc6d4ca6e1312a0409ceac0fd15a90e6a7859c7d5cfec0ef9
4
+ data.tar.gz: a2e9bfd44ae564d10ddd6ee9f25179b267b12979dd7bfe2311825ce2bc348d97
5
+ SHA512:
6
+ metadata.gz: 4c6b1cd82ea806fd419ab2b9986b27d2fabb0d0d898decf69068bb9b824c642d9ef333e8c2f6874ca5151d5250ef5f4d81796cce7813b2bfdb66dbb47ed5a01d
7
+ data.tar.gz: ec0d633c0eaab16dccea6a340f53f6160e6932a530d6d1398ce3462b9366abbc018548e5789cccf2974c8f45495e78949a865a3b270f09ab7d61b9738a5e7db6
@@ -0,0 +1,4 @@
1
+ coverage
2
+ .byebug_history
3
+ *.gem
4
+ Gemfile.lock
@@ -0,0 +1 @@
1
+ exclude = '{$exclude,log,bin,tmp,.tm_properties,public/system,coverage}'
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in sunstone.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2020 Jon Bracy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,10 @@
1
+ # Runestone
2
+
3
+ Runestone provides full text search PostgreSQL's full text search capabilities.
4
+ It was inspired by [Postgres full-text search is Good Enough!][1] and
5
+ [Super Fuzzy Searching on PostgreSQL][2]
6
+
7
+
8
+
9
+ [1]: http://rachbelaid.com/postgres-full-text-search-is-good-enough/
10
+ [2]: http://www.www-old.bartlettpublishing.com/site/bartpub/blog/3/entry/350
@@ -0,0 +1,14 @@
1
+ require 'bundler/setup'
2
+ require "bundler/gem_tasks"
3
+ Bundler.require(:development)
4
+
5
+ require 'fileutils'
6
+ require "rake/testtask"
7
+
8
+ # Test Task
9
+ Rake::TestTask.new do |t|
10
+ t.libs << 'lib' << 'test'
11
+ t.test_files = FileList['test/**/*_test.rb']
12
+ # t.warning = true
13
+ # t.verbose = true
14
+ end
@@ -0,0 +1,31 @@
1
+ class CreateRunestoneTables < ActiveRecord::Migration[6.0]
2
+
3
+ def change
4
+ enable_extension 'pgcrypto'
5
+ enable_extension 'pg_trgm'
6
+ enable_extension 'fuzzystrmatch'
7
+
8
+ create_table :runestones, id: :uuid do |t|
9
+ t.belongs_to :record, type: :uuid, polymorphic: true, null: false
10
+ t.string :name
11
+ t.string :dictionary
12
+ t.jsonb :data, null: false
13
+ t.tsvector :vector, null: false
14
+ end
15
+
16
+ add_index :runestones, [:record_type, :record_id, :name, :dictionary], unique: true, name: 'index_runestones_for_uniqueness'
17
+ add_index :runestones, :vector, using: :gin
18
+
19
+ execute <<-SQL
20
+ CREATE TABLE runestone_corpus ( word varchar, CONSTRAINT word UNIQUE(word) );
21
+
22
+ CREATE INDEX runestone_corpus_trgm_idx ON runestone_corpus USING GIN (word gin_trgm_ops);
23
+
24
+ CREATE TEXT SEARCH CONFIGURATION simple_unaccent (COPY = simple);
25
+ ALTER TEXT SEARCH CONFIGURATION simple_unaccent
26
+ ALTER MAPPING FOR hword, hword_part, word
27
+ WITH unaccent, simple;
28
+ SQL
29
+ end
30
+
31
+ end
@@ -0,0 +1,103 @@
1
+ require 'arel/extensions'
2
+
3
+ module Runestone
4
+ autoload :Model, "#{File.dirname(__FILE__)}/runestone/model"
5
+ autoload :Settings, "#{File.dirname(__FILE__)}/runestone/settings"
6
+ autoload :WebSearch, "#{File.dirname(__FILE__)}/runestone/web_search"
7
+ autoload :IndexingJob, "#{File.dirname(__FILE__)}/runestone/indexing_job"
8
+
9
+ mattr_accessor :dictionary, default: :simple_unaccent
10
+ mattr_accessor :runner, default: :inline
11
+ mattr_accessor :job_queue, default: :runestone_indexing
12
+ mattr_accessor :typo_tolerances, default: { 1 => 4..7, 2 => 8.. }
13
+
14
+ mattr_reader :synonyms do
15
+ { }
16
+ end
17
+
18
+ def self.normalize(string)
19
+ string = string.downcase
20
+ string = string.unicode_normalize!
21
+ string
22
+ rescue Encoding::CompatibilityError
23
+ string
24
+ end
25
+
26
+ def self.add_synonyms(dictionary)
27
+ dictionary.each do |k, v|
28
+ add_synonym(k, *v)
29
+ end
30
+ end
31
+
32
+ def self.add_synonym(word, *replacements)
33
+ word = normalize(word)
34
+ replacements.map! { |r| normalize(r) }
35
+
36
+ word = word.split(/\s+/)
37
+ last = word.pop
38
+
39
+ syn = synonyms
40
+ word.each do |w|
41
+ syn = if syn.has_key?(w) && h = syn[w].find { |i| i.is_a?(Hash) }
42
+ h
43
+ else
44
+ h = {}
45
+ syn[w] ||= []
46
+ syn[w] << h
47
+ h
48
+ end
49
+ end
50
+
51
+ syn[last] ||= []
52
+ syn[last] += replacements
53
+ syn[last].uniq!
54
+ end
55
+
56
+ def search(query, dictionary: nil, prefix: :last)
57
+ exact_search = Runestone::WebSearch.parse(query, prefix: prefix)
58
+ typo_search = exact_search.typos
59
+ syn_search = typo_search.synonymize
60
+
61
+ tsqueries = [exact_search, typo_search, syn_search].map(&:to_s).uniq.map do |q|
62
+ ts_query(q, dictionary: dictionary)
63
+ end
64
+
65
+ q = if select_values.empty?
66
+ select(
67
+ klass.arel_table[Arel.star],
68
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
69
+ )
70
+ else
71
+ select(
72
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
73
+ )
74
+ end
75
+
76
+ q = if klass == Runestone::Model
77
+ q.where(ts_match(:vector, tsqueries.last, dictionary: dictionary))
78
+ else
79
+ q.joins(:runestones).where(ts_match(TS::Model.arel_table['vector'], tsqueries.last, dictionary: dictionary))
80
+ end
81
+
82
+ q = q.where(dictionary: dictionary) if dictionary
83
+
84
+ q.order(
85
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::Descending.new(Arel::Nodes::SqlLiteral.new("rank#{i}")) }
86
+ )
87
+ end
88
+
89
+
90
+ end
91
+
92
+ require 'runestone/corpus'
93
+ require 'runestone/active_record/base_methods'
94
+ require 'runestone/active_record/relation_methods'
95
+
96
+ require 'active_record'
97
+ require 'active_record/relation'
98
+ require 'active_record/querying'
99
+ ActiveRecord::Base.include Runestone::ActiveRecord::BaseMethods
100
+ ActiveRecord::Relation.include Runestone::ActiveRecord::RelationMethods
101
+ ActiveRecord::Querying.delegate :search, to: :all
102
+
103
+ require 'runestone/engine' if defined?(Rails)
@@ -0,0 +1,135 @@
1
+ require 'active_support/concern'
2
+
3
+ module Runestone::ActiveRecord
4
+ module BaseMethods
5
+
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ class_attribute :runestone_settings, instance_accessor: true
10
+ end
11
+
12
+ class_methods do
13
+
14
+ def runestone(name: :default, dictionary: nil, runner: nil, &block)
15
+ runner ||= Runestone.runner
16
+ dictionary ||= Runestone.dictionary
17
+
18
+ if self.runestone_settings.nil?
19
+ self.runestone_settings = {}
20
+
21
+ class_eval do
22
+ has_many :runestones, class_name: 'Runestone::Model', as: :record, dependent: :destroy
23
+
24
+ case runner
25
+ when :active_job
26
+ after_commit :create_runestones, on: :create
27
+ after_commit :update_runestones, on: :update
28
+ else
29
+ after_create :create_runestones!
30
+ after_update :update_runestones!
31
+ end
32
+ end
33
+ end
34
+
35
+ self.runestone_settings[name] ||= []
36
+ self.runestone_settings[name] << Runestone::Settings.new(base_class.name, name: name, dictionary: dictionary, &block)
37
+ end
38
+
39
+ def reindex!
40
+ conn = Runestone::Model.connection
41
+ model_table = conn.quote_table_name(table_name)
42
+
43
+ conn.execute(<<-SQL)
44
+ DELETE FROM runestones
45
+ USING runestones AS t2
46
+ LEFT OUTER JOIN #{model_table} ON
47
+ t2.record_type = #{conn.quote(base_class.name)}
48
+ AND t2.record_id = #{model_table}.id
49
+ WHERE runestones.record_type = #{conn.quote(base_class.name)}
50
+ AND runestones.record_id = t2.record_id
51
+ AND #{model_table}.id IS NULL;
52
+ SQL
53
+
54
+ find_each(&:update_runestones!)
55
+ end
56
+
57
+ def highlights(name: :default, dictionary: nil)
58
+ dictionary ||= Runestone.dictionary
59
+
60
+ rsettings = self.runestone_settings[name].find { |s| s.dictionary.to_s == dictionary.to_s}
61
+ @highlights ||= highlight_indexes(rsettings.indexes.values.flatten.map{ |i| i.to_s.split('.') })
62
+ end
63
+
64
+ def highlight_indexes(indexes)
65
+ str = {}
66
+ indexes.sort.group_by { |i| i[0] }.each do |key, value|
67
+ value.each(&:shift)
68
+ value.reject!(&:empty?)
69
+ str[key] = value.empty? ? true : highlight_indexes(value)
70
+ end
71
+ str
72
+ end
73
+
74
+ end
75
+
76
+ def create_runestones
77
+ Runestone::IndexingJob.perform_later(self, :create_runestones!)
78
+ end
79
+
80
+ def create_runestones!
81
+ conn = Runestone::Model.connection
82
+ self.runestone_settings.each do |index_name, settings|
83
+ settings.each do |setting|
84
+ rdata = setting.extract_attributes(self)
85
+
86
+ ts_column_names = %w(record_type record_id name dictionary data vector).map { |name| conn.quote_column_name(name) }
87
+ ts_values = [
88
+ conn.quote(conn.send(:type_map).lookup('varchar').serialize(self.class.base_class.name)),
89
+ conn.quote(conn.send(:type_map).lookup('uuid').serialize(id)),
90
+ index_name == :default ? 'NULL' : conn.quote(conn.send(:type_map).lookup('varchar').serialize(index_name.to_s)),
91
+ conn.quote(conn.send(:type_map).lookup('varchar').serialize(setting.dictionary)),
92
+ conn.quote(conn.send(:type_map).lookup('jsonb').serialize(rdata)),
93
+ setting.vectorize(rdata).join(' || ')
94
+ ]
95
+ conn.execute(<<-SQL)
96
+ INSERT INTO #{Runestone::Model.quoted_table_name} (#{ts_column_names.join(",")})
97
+ VALUES (#{ts_values.join(',')})
98
+ SQL
99
+
100
+ Runestone::Corpus.add(*setting.corpus(rdata))
101
+ end
102
+ end
103
+ end
104
+
105
+ def update_runestones
106
+ Runestone::IndexingJob.preform_later(self, :update_runestones!)
107
+ end
108
+
109
+ def update_runestones!
110
+ conn = Runestone::Model.connection
111
+ self.runestone_settings.each do |index_name, settings|
112
+ settings.each do |setting|
113
+ rdata = setting.extract_attributes(self)
114
+
115
+ if conn.execute(<<-SQL).cmd_tuples == 0
116
+ UPDATE #{Runestone::Model.quoted_table_name}
117
+ SET
118
+ data = #{conn.quote(conn.send(:type_map).lookup('jsonb').serialize(rdata))},
119
+ vector = #{setting.vectorize(rdata).join(' || ')}
120
+ WHERE record_type = #{conn.quote(conn.send(:type_map).lookup('varchar').serialize(self.class.base_class.name))}
121
+ AND record_id = #{conn.quote(conn.send(:type_map).lookup('integer').serialize(id))}
122
+ AND name #{index_name == :default ? 'IS NULL' : "= " + conn.quote(conn.send(:type_map).lookup('integer').serialize(index_name))}
123
+ AND dictionary = #{conn.quote(conn.send(:type_map).lookup('integer').serialize(setting.dictionary))}
124
+ SQL
125
+ create_runestones!
126
+ else
127
+ Runestone::Corpus.add(*setting.corpus(rdata))
128
+ end
129
+
130
+ end
131
+ end
132
+ end
133
+
134
+ end
135
+ end
@@ -0,0 +1,83 @@
1
+ module Runestone::ActiveRecord
2
+ module RelationMethods
3
+
4
+ def ts_query(query, dictionary: nil)
5
+ dictionary ||= Runestone.dictionary
6
+
7
+ if query.is_a?(Arel::Nodes::TSQuery)
8
+ query
9
+ else
10
+ Arel::Nodes::TSQuery.new(query, language: dictionary)
11
+ end
12
+ end
13
+
14
+ def ts_vector(column_name, dictionary: nil)
15
+ # if column_name.is_a?(String) || column_name.is_a?(Symbol)
16
+ # column = columns_hash[column_name.to_s]
17
+ # if column.type == :tsvector
18
+ # arel_table[column.name]
19
+ # else
20
+ # Arel::Nodes::TSVector.new(arel_table[column.name], language)
21
+ # end
22
+ # else
23
+ # column_name
24
+ # end
25
+ Runestone::Model.arel_table[:vector]
26
+ end
27
+
28
+ def ts_match(vector, query, dictionary: nil)
29
+ Arel::Nodes::TSMatch.new(
30
+ ts_vector(vector, dictionary: dictionary),
31
+ ts_query(query, dictionary: dictionary)
32
+ )
33
+ end
34
+
35
+ def ts_rank(vector, query, dictionary: nil)
36
+ Arel::Nodes::TSRank.new(
37
+ ts_vector(vector, dictionary: dictionary),
38
+ ts_query(query, dictionary: dictionary)
39
+ )
40
+ end
41
+
42
+ def ts_rank_cd(vector, query, dictionary: nil)
43
+ Arel::Nodes::TSRankCD.new(
44
+ ts_vector(vector, dictionary: dictionary),
45
+ ts_query(query, dictionary: dictionary)
46
+ )
47
+ end
48
+
49
+ def search(query, dictionary: nil, prefix: nil)
50
+ exact_search = Runestone::WebSearch.parse(query, prefix: prefix)
51
+ typo_search = exact_search.typos
52
+ syn_search = typo_search.synonymize
53
+
54
+ tsqueries = [exact_search, typo_search, syn_search].map(&:to_s).uniq.map do |q|
55
+ ts_query(q, dictionary: dictionary)
56
+ end
57
+
58
+ q = if select_values.empty?
59
+ select(
60
+ klass.arel_table[Arel.star],
61
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
62
+ )
63
+ else
64
+ select(
65
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::As.new(ts_rank_cd(:vector, q, dictionary: dictionary), Arel::Nodes::SqlLiteral.new("rank#{i}")) }
66
+ )
67
+ end
68
+
69
+ q = if klass == Runestone::Model
70
+ q.where(ts_match(:vector, tsqueries.last, dictionary: dictionary))
71
+ else
72
+ q.joins(:runestones).where(ts_match(Runestone::Model.arel_table['vector'], tsqueries.last, dictionary: dictionary))
73
+ end
74
+
75
+ q = q.where(dictionary: dictionary) if dictionary
76
+
77
+ q.order(
78
+ *tsqueries.each_with_index.map { |q, i| Arel::Nodes::Descending.new(Arel::Nodes::SqlLiteral.new("rank#{i}")) }
79
+ )
80
+ end
81
+
82
+ end
83
+ end
@@ -0,0 +1,45 @@
1
+ module Runestone::Corpus
2
+
3
+ def self.add(*words)
4
+ return if words.size == 0
5
+
6
+ conn = Runestone::Model.connection
7
+ conn.execute(<<-SQL)
8
+ INSERT INTO runestone_corpus ( word )
9
+ VALUES (#{words.map { |w| conn.quote(w.downcase) }.join('),(')})
10
+ ON CONFLICT DO NOTHING
11
+ SQL
12
+ end
13
+
14
+ def self.similar_words(*words)
15
+ lut = {}
16
+ words = words.inject([]) do |ws, w|
17
+ tt = typo_tolerance(w)
18
+ ws << "#{Runestone::Model.connection.quote(w)}, #{Runestone::Model.connection.quote(w.downcase)}, #{tt}" if tt > 0
19
+ ws
20
+ end
21
+ return lut if words.size == 0
22
+
23
+ result = Runestone::Model.connection.execute(<<-SQL)
24
+ WITH tokens (token, token_downcased, typo_tolerance) AS (VALUES (#{words.join('), (')}))
25
+ SELECT token, word, levenshtein(runestone_corpus.word, tokens.token_downcased)
26
+ FROM tokens
27
+ JOIN runestone_corpus ON runestone_corpus.word % tokens.token_downcased
28
+ WHERE
29
+ runestone_corpus.word != tokens.token_downcased
30
+ AND levenshtein(runestone_corpus.word, tokens.token_downcased) <= tokens.typo_tolerance
31
+ SQL
32
+ result.each_row do |t, w, l|
33
+ w.gsub!(/\(|\)|:|\||!|\&|\*/, '')
34
+ next if w == t
35
+ lut[t] ||= []
36
+ lut[t] << w
37
+ end
38
+ lut
39
+ end
40
+
41
+ def self.typo_tolerance(word)
42
+ Runestone.typo_tolerances.find { |k,v| v.member?(word.length) }&.first || 0
43
+ end
44
+
45
+ end