runestone 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ class Runestone::Engine < Rails::Engine
2
+ config.runestone = ActiveSupport::OrderedOptions.new
3
+
4
+ initializer :append_migrations do |app|
5
+ unless app.root.to_s.match root.to_s
6
+ config.paths["db/migrate"].expanded.each do |expanded_path|
7
+ app.config.paths["db/migrate"] << expanded_path
8
+ end
9
+ end
10
+ end
11
+
12
+ initializer "runestone.set_configs" do |app|
13
+ options = app.config.runestone
14
+
15
+ Runestone.runner = options.runner if options.runner
16
+ Runestone.dictionary = options.dictionary if options.dictionary
17
+ Runestone.job_queue = options.job_queue if options.job_queue
18
+ Runestone.typo_tolerances = options.typo_tolerances if options.typo_tolerances
19
+ end
20
+
21
+ end
@@ -0,0 +1,8 @@
1
+ class Runestone::IndexingJob < ActiveJob::Base
2
+ queue_as { Runestone.job_queue }
3
+
4
+ def perform(record, indexing_method)
5
+ record.public_send(indexing_method)
6
+ end
7
+
8
+ end
@@ -0,0 +1,92 @@
1
+ class Runestone::Model < ActiveRecord::Base
2
+
3
+ self.table_name = :runestones
4
+
5
+ attr_accessor :highlights
6
+
7
+ belongs_to :record, polymorphic: true
8
+
9
+ def self.highlight(records, query, prefix: nil)
10
+ return [] if records.empty?
11
+
12
+ binds = []
13
+ records.each do |record|
14
+ binds += get_binds(record.data, record.record_type.constantize.highlights(dictionary: records.first.dictionary))
15
+ end
16
+
17
+ hlites = binds.uniq
18
+
19
+ newbinds = []
20
+ binds.each_with_index do |b|
21
+ newbinds << hlites.index(b)
22
+ end
23
+ binds = newbinds
24
+
25
+ hlites = get_highlights(hlites, query, prefix: prefix, dictionary: records.first.dictionary)
26
+
27
+ binds.map! { |x| hlites[x] }
28
+
29
+ records.each do |record|
30
+ record.highlights = highlight_data(
31
+ record.data,
32
+ binds,
33
+ record.record_type.constantize.highlights
34
+ )
35
+ end
36
+ end
37
+
38
+ def self.highlight_data(data, hlights, indexes)
39
+ str = {}
40
+ indexes.each do |key, value|
41
+ next unless data[key]
42
+
43
+ if data[key].is_a?(Hash)
44
+ str[key] = highlight_data(data[key], hlights, indexes[key])
45
+ elsif data[key].is_a?(Array)
46
+ str[key] = data[key].map { |i|
47
+ if i.is_a?(Hash)
48
+ highlight_data(i, hlights, indexes[key])
49
+ else
50
+ hlights.shift
51
+ end
52
+ }
53
+ else
54
+ str[key] = hlights.shift
55
+ end
56
+ end
57
+ str
58
+ end
59
+
60
+ def self.get_highlights(words, query, prefix: nil, dictionary: nil)
61
+ dictionary ||= Runestone.dictionary
62
+
63
+ query = Arel::Nodes::TSQuery.new(Runestone::WebSearch.parse(query, prefix: prefix).typos.synonymize.to_s, language: dictionary).to_sql
64
+ connection.exec_query(<<-SQL).cast_values
65
+ SELECT ts_headline(#{connection.quote(dictionary)}, words, #{query}, 'ShortWord=2')
66
+ FROM unnest(ARRAY[ #{words.map{ |t| connection.quote(t) }.join(', ')} ]::varchar[]) AS words
67
+ SQL
68
+ end
69
+
70
+ def self.get_binds(hash, highlight)
71
+ rt = []
72
+ highlight.each do |k, v|
73
+ next unless hash[k]
74
+
75
+ if hash[k].is_a?(Hash)
76
+ rt += get_binds(hash[k], highlight[k])
77
+ elsif hash[k].is_a?(Array)
78
+ hash[k].each do |i|
79
+ if i.is_a?(Hash)
80
+ rt += get_binds(i, highlight[k])
81
+ else
82
+ rt += i.is_a?(Array) ? i : [i]
83
+ end
84
+ end
85
+ else
86
+ rt << hash[k].to_s
87
+ end
88
+ end
89
+ rt
90
+ end
91
+
92
+ end
@@ -0,0 +1,106 @@
1
+ class Runestone::Settings
2
+
3
+ attr_reader :indexes, :dictionary
4
+
5
+ def initialize(model, name: , dictionary: , &block)
6
+ @name = name
7
+ @dictionary = dictionary
8
+ @indexes = {}
9
+ instance_exec(&block)
10
+ end
11
+
12
+ def index(*args, weight: 1)
13
+ @indexes[weight] = args.map(&:to_s)
14
+ end
15
+
16
+ def attribute(*names, &block)
17
+ raise ArgumentError.new('Cannot pass multiple attribute names if block given') if block_given? and names.length > 1
18
+
19
+ @attributes ||= {}
20
+ names.each do |name|
21
+ @attributes[name.to_sym] = block ? block : nil
22
+ end
23
+ end
24
+ alias :attributes :attribute
25
+
26
+ def extract_attributes(record)
27
+ attributes = {}
28
+
29
+ @attributes.each do |name, value|
30
+ attributes[name] = if value.is_a?(Proc)
31
+ record.instance_exec(&value)
32
+ else
33
+ rv = record.send(name)
34
+ end
35
+ end
36
+
37
+ remove_nulls(attributes)
38
+ end
39
+
40
+ def vectorize(data)
41
+ conn = Runestone::Model.connection
42
+ tsvector = []
43
+
44
+ @indexes.each do |weight, paths|
45
+ tsweight = {4 => 'D', 3 => 'C', 2 => 'B', 1 => 'A'}[weight]
46
+ paths.each do |path|
47
+ path = path.to_s.split('.')
48
+
49
+ dig(data, path).each do |value|
50
+ next if !value
51
+ language = value.to_s.size <= 5 ? 'simple' : @dictionary
52
+ tsvector << "setweight(to_tsvector(#{conn.quote(language)}, #{conn.quote(value.to_s.downcase)}), #{conn.quote(tsweight)})"
53
+ end
54
+ end
55
+ end
56
+ tsvector.empty? ? ["to_tsvector('')"] : tsvector
57
+ end
58
+
59
+ def corpus(data)
60
+ words = []
61
+
62
+ @indexes.each do |weight, paths|
63
+ paths.each do |path|
64
+ dig(data, path.to_s.split('.')).each do |value|
65
+ next if !value
66
+ value.to_s.split(/\s+/).each do |word|
67
+ words << word.downcase.gsub(/\A\W/, '').gsub(/\W\Z/, '')
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ words
74
+ end
75
+
76
+ def remove_nulls(value)
77
+ if value.is_a?(Hash)
78
+ nh = {}
79
+ value.each do |k, v|
80
+ nh[k] = if v.is_a?(Hash) || v.is_a?(Array)
81
+ remove_nulls(v)
82
+ elsif !v.nil?
83
+ v.is_a?(String) ? v.unicode_normalize(:nfc) : v
84
+ end
85
+ nh.delete(k) if nh[k].nil? || (nh[k].is_a?(Hash) && nh[k].empty?)
86
+ end
87
+ nh
88
+ elsif value.is_a?(Array)
89
+ value.select{|i| !i.nil? && !i.empty? }.map { |i| remove_nulls(i) }
90
+ else
91
+ value
92
+ end
93
+ end
94
+
95
+ def dig(data, keys)
96
+ if data.is_a?(Hash)
97
+ key = keys.shift
98
+ dig(data[key.to_sym] || data[key.to_s], keys)
99
+ elsif data.is_a?(Array)
100
+ data.map{ |d| dig(d, keys.dup) }.flatten.compact
101
+ else
102
+ [data]
103
+ end
104
+ end
105
+
106
+ end
@@ -0,0 +1,3 @@
1
+ module Runestone
2
+ VERSION = '1.0'
3
+ end
@@ -0,0 +1,203 @@
1
+ class Runestone::WebSearch
2
+ autoload :Or, "#{File.dirname(__FILE__)}/web_search/or"
3
+ autoload :And, "#{File.dirname(__FILE__)}/web_search/and"
4
+ autoload :Token, "#{File.dirname(__FILE__)}/web_search/token"
5
+ autoload :Phrase, "#{File.dirname(__FILE__)}/web_search/phrase"
6
+
7
+ class Match
8
+ attr_accessor :index, :substitution
9
+ def initialize(index, substitution)
10
+ @index = index
11
+ @substitution = substitution
12
+ end
13
+ end
14
+
15
+ class PartialMatch
16
+ attr_accessor :start_index, :end_index, :substitution
17
+ def initialize(start_index, end_index, substitution)
18
+ @start_index = start_index
19
+ @end_index = end_index
20
+ @substitution = substitution
21
+ end
22
+ end
23
+
24
+ attr_accessor :values
25
+
26
+ # prefix options: :all, :last, :none (default: :last)
27
+ def self.parse(query, prefix: :last)
28
+ prefix ||= :last
29
+ begin
30
+ query.unicode_normalize!
31
+ rescue Encoding::CompatibilityError
32
+ end
33
+ query.downcase!
34
+
35
+ q = []
36
+ stack = []
37
+ knot = false
38
+ tokens = query.gsub(/\"\s+\"/, '""').split(' ')
39
+ tokens.each_with_index do |token, i|
40
+ token.gsub!(/\(|\)|:|\||!|\&|\*/, '')
41
+ if token.start_with?('-')
42
+ knot = true
43
+ token.delete_prefix!('-')
44
+ else
45
+ knot = false
46
+ end
47
+
48
+ next if token.empty? || token == '""' || %w(' ").include?(token)
49
+
50
+ if token.start_with?('"') && token.end_with?('"')
51
+ token.delete_prefix!('"')
52
+ token.delete_suffix!('"')
53
+
54
+ q << Phrase.new([token], negative: knot)
55
+ elsif token.start_with?('"')
56
+ token.delete_prefix!('"')
57
+ stack.push(:phrase)
58
+ q << Phrase.new([Token.new(token)], negative: knot)
59
+ elsif token.end_with?('"')
60
+ token.delete_suffix!('"')
61
+ q.last.values << Token.new(token)
62
+ stack.pop
63
+ else
64
+ token = Token.new(token, negative: knot)
65
+ if !knot && prefix == :last && tokens.size - 1 == i
66
+ token.prefix = true
67
+ elsif !knot && prefix == :all
68
+ token.prefix = true
69
+ end
70
+
71
+ if stack.last == :phrase
72
+ q.last.values << token
73
+ else
74
+ q << token
75
+ end
76
+ end
77
+ end
78
+
79
+ new(q)
80
+ end
81
+
82
+ def initialize(values)
83
+ @values = values
84
+ end
85
+
86
+ def typos
87
+ tokens = @values.select{|t| t.is_a?(Token) && !t.negative }
88
+ sw = Runestone::Corpus.similar_words(*tokens.map(&:value))
89
+ q = @values.map do |t|
90
+ if t.is_a?(Token) && sw.has_key?(t.value)
91
+ Token.new(t.value, prefix: t.prefix, negative: t.negative, alts: sw[t.value])
92
+ else
93
+ t
94
+ end
95
+ end
96
+
97
+ Runestone::WebSearch.new(q)
98
+ end
99
+
100
+ def synonymize
101
+ parts = []
102
+ @values.each do |token|
103
+ if token.is_a?(Phrase) || token.negative
104
+ parts << token
105
+ else
106
+ parts << [] if parts.empty? || parts.last.is_a?(Phrase) || (!parts.last.is_a?(Array) && parts.last.negative)
107
+ parts.last << token
108
+ end
109
+ end
110
+
111
+ parts.map! do |part|
112
+ if !part.is_a?(Phrase) && (part.is_a?(Array) || !part.negative)
113
+ synonymize_part(part)
114
+ else
115
+ part
116
+ end
117
+ end
118
+
119
+ Runestone::WebSearch.new(parts)
120
+ end
121
+
122
+ def synonymize_part(part)
123
+ pending_matches = []
124
+ matches = []
125
+
126
+ part.each_with_index do |token, i|
127
+
128
+ pending_matches.select! do |match|
129
+ if match.end_index + 1 == i && match.substitution[token.value]
130
+ match.substitution[token.value].map do |nm|
131
+ if nm.is_a?(Hash)
132
+ match.end_index = i
133
+ match.alts = nm
134
+ true
135
+ else
136
+ matches << Match.new(match.start_index..i, Phrase.new(Array(nm), distance: 1))
137
+ false
138
+ end
139
+ end
140
+ else
141
+ false
142
+ end
143
+ end
144
+
145
+ if match = Runestone.synonyms[token.value]
146
+ match.each do |m|
147
+ if m.is_a?(Hash)
148
+ pending_matches << PartialMatch.new(i, i, m)
149
+ else
150
+ matches << Match.new(i, Phrase.new(m.split(/\s+/), distance: 1))
151
+ end
152
+ end
153
+ end
154
+
155
+ end
156
+
157
+ matches.select! do |match|
158
+ if match.index.is_a?(Integer)
159
+ case part[match.index]
160
+ when Or
161
+ part[match.index].values << match.substitution
162
+ else
163
+ part[match.index] = Or.new([part[match.index], match.substitution])
164
+ end
165
+
166
+ false
167
+ else
168
+ true
169
+ end
170
+ end
171
+
172
+ groups = matches.inject([]) do |memo, match|
173
+ if memo.empty?
174
+ memo << [match]
175
+ elsif i = memo.index { |k| k.none? { |j| j.index.overlaps?(match.index) } }
176
+ memo[i] << match
177
+ else
178
+ memo << [match]
179
+ end
180
+ memo
181
+ end
182
+
183
+ if groups.empty?
184
+ And.new(part)
185
+ else
186
+ orrs = Or.new([])
187
+ groups.each do |g|
188
+ p = []
189
+ p << And.new(part[0..g.first.index.begin-1]) if g.first.index.begin > 0
190
+ g.each do |m|
191
+ p << Or.new([And.new(part[m.index]), m.substitution])
192
+ end
193
+ p << And.new(part[g.last.index.end+1..-1]) if g.last.index.end < part.size
194
+ orrs.values << And.new(p)
195
+ end
196
+ orrs
197
+ end
198
+ end
199
+
200
+ def to_s(use_synonyms: true, allow_typos: true)
201
+ self.values.join(' & ')
202
+ end
203
+ end
@@ -0,0 +1,17 @@
1
+ class Runestone::WebSearch::And
2
+ attr_accessor :values, :negative
3
+ def initialize(values, negative: false)
4
+ @values = values
5
+ @negative = negative
6
+ end
7
+
8
+ def to_s
9
+ v = if values.size == 1
10
+ values.first.to_s
11
+ else
12
+ values.map(&:to_s).join(' & ')
13
+ end
14
+
15
+ negative ? "!#{v}" : v
16
+ end
17
+ end