runestone 1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,21 @@
1
+ class Runestone::Engine < Rails::Engine
2
+ config.runestone = ActiveSupport::OrderedOptions.new
3
+
4
+ initializer :append_migrations do |app|
5
+ unless app.root.to_s.match root.to_s
6
+ config.paths["db/migrate"].expanded.each do |expanded_path|
7
+ app.config.paths["db/migrate"] << expanded_path
8
+ end
9
+ end
10
+ end
11
+
12
+ initializer "runestone.set_configs" do |app|
13
+ options = app.config.runestone
14
+
15
+ Runestone.runner = options.runner if options.runner
16
+ Runestone.dictionary = options.dictionary if options.dictionary
17
+ Runestone.job_queue = options.job_queue if options.job_queue
18
+ Runestone.typo_tolerances = options.typo_tolerances if options.typo_tolerances
19
+ end
20
+
21
+ end
@@ -0,0 +1,8 @@
1
+ class Runestone::IndexingJob < ActiveJob::Base
2
+ queue_as { Runestone.job_queue }
3
+
4
+ def perform(record, indexing_method)
5
+ record.public_send(indexing_method)
6
+ end
7
+
8
+ end
@@ -0,0 +1,92 @@
1
+ class Runestone::Model < ActiveRecord::Base
2
+
3
+ self.table_name = :runestones
4
+
5
+ attr_accessor :highlights
6
+
7
+ belongs_to :record, polymorphic: true
8
+
9
+ def self.highlight(records, query, prefix: nil)
10
+ return [] if records.empty?
11
+
12
+ binds = []
13
+ records.each do |record|
14
+ binds += get_binds(record.data, record.record_type.constantize.highlights(dictionary: records.first.dictionary))
15
+ end
16
+
17
+ hlites = binds.uniq
18
+
19
+ newbinds = []
20
+ binds.each_with_index do |b|
21
+ newbinds << hlites.index(b)
22
+ end
23
+ binds = newbinds
24
+
25
+ hlites = get_highlights(hlites, query, prefix: prefix, dictionary: records.first.dictionary)
26
+
27
+ binds.map! { |x| hlites[x] }
28
+
29
+ records.each do |record|
30
+ record.highlights = highlight_data(
31
+ record.data,
32
+ binds,
33
+ record.record_type.constantize.highlights
34
+ )
35
+ end
36
+ end
37
+
38
+ def self.highlight_data(data, hlights, indexes)
39
+ str = {}
40
+ indexes.each do |key, value|
41
+ next unless data[key]
42
+
43
+ if data[key].is_a?(Hash)
44
+ str[key] = highlight_data(data[key], hlights, indexes[key])
45
+ elsif data[key].is_a?(Array)
46
+ str[key] = data[key].map { |i|
47
+ if i.is_a?(Hash)
48
+ highlight_data(i, hlights, indexes[key])
49
+ else
50
+ hlights.shift
51
+ end
52
+ }
53
+ else
54
+ str[key] = hlights.shift
55
+ end
56
+ end
57
+ str
58
+ end
59
+
60
+ def self.get_highlights(words, query, prefix: nil, dictionary: nil)
61
+ dictionary ||= Runestone.dictionary
62
+
63
+ query = Arel::Nodes::TSQuery.new(Runestone::WebSearch.parse(query, prefix: prefix).typos.synonymize.to_s, language: dictionary).to_sql
64
+ connection.exec_query(<<-SQL).cast_values
65
+ SELECT ts_headline(#{connection.quote(dictionary)}, words, #{query}, 'ShortWord=2')
66
+ FROM unnest(ARRAY[ #{words.map{ |t| connection.quote(t) }.join(', ')} ]::varchar[]) AS words
67
+ SQL
68
+ end
69
+
70
+ def self.get_binds(hash, highlight)
71
+ rt = []
72
+ highlight.each do |k, v|
73
+ next unless hash[k]
74
+
75
+ if hash[k].is_a?(Hash)
76
+ rt += get_binds(hash[k], highlight[k])
77
+ elsif hash[k].is_a?(Array)
78
+ hash[k].each do |i|
79
+ if i.is_a?(Hash)
80
+ rt += get_binds(i, highlight[k])
81
+ else
82
+ rt += i.is_a?(Array) ? i : [i]
83
+ end
84
+ end
85
+ else
86
+ rt << hash[k].to_s
87
+ end
88
+ end
89
+ rt
90
+ end
91
+
92
+ end
@@ -0,0 +1,106 @@
1
+ class Runestone::Settings
2
+
3
+ attr_reader :indexes, :dictionary
4
+
5
+ def initialize(model, name: , dictionary: , &block)
6
+ @name = name
7
+ @dictionary = dictionary
8
+ @indexes = {}
9
+ instance_exec(&block)
10
+ end
11
+
12
+ def index(*args, weight: 1)
13
+ @indexes[weight] = args.map(&:to_s)
14
+ end
15
+
16
+ def attribute(*names, &block)
17
+ raise ArgumentError.new('Cannot pass multiple attribute names if block given') if block_given? and names.length > 1
18
+
19
+ @attributes ||= {}
20
+ names.each do |name|
21
+ @attributes[name.to_sym] = block ? block : nil
22
+ end
23
+ end
24
+ alias :attributes :attribute
25
+
26
+ def extract_attributes(record)
27
+ attributes = {}
28
+
29
+ @attributes.each do |name, value|
30
+ attributes[name] = if value.is_a?(Proc)
31
+ record.instance_exec(&value)
32
+ else
33
+ rv = record.send(name)
34
+ end
35
+ end
36
+
37
+ remove_nulls(attributes)
38
+ end
39
+
40
+ def vectorize(data)
41
+ conn = Runestone::Model.connection
42
+ tsvector = []
43
+
44
+ @indexes.each do |weight, paths|
45
+ tsweight = {4 => 'D', 3 => 'C', 2 => 'B', 1 => 'A'}[weight]
46
+ paths.each do |path|
47
+ path = path.to_s.split('.')
48
+
49
+ dig(data, path).each do |value|
50
+ next if !value
51
+ language = value.to_s.size <= 5 ? 'simple' : @dictionary
52
+ tsvector << "setweight(to_tsvector(#{conn.quote(language)}, #{conn.quote(value.to_s.downcase)}), #{conn.quote(tsweight)})"
53
+ end
54
+ end
55
+ end
56
+ tsvector.empty? ? ["to_tsvector('')"] : tsvector
57
+ end
58
+
59
+ def corpus(data)
60
+ words = []
61
+
62
+ @indexes.each do |weight, paths|
63
+ paths.each do |path|
64
+ dig(data, path.to_s.split('.')).each do |value|
65
+ next if !value
66
+ value.to_s.split(/\s+/).each do |word|
67
+ words << word.downcase.gsub(/\A\W/, '').gsub(/\W\Z/, '')
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ words
74
+ end
75
+
76
+ def remove_nulls(value)
77
+ if value.is_a?(Hash)
78
+ nh = {}
79
+ value.each do |k, v|
80
+ nh[k] = if v.is_a?(Hash) || v.is_a?(Array)
81
+ remove_nulls(v)
82
+ elsif !v.nil?
83
+ v.is_a?(String) ? v.unicode_normalize(:nfc) : v
84
+ end
85
+ nh.delete(k) if nh[k].nil? || (nh[k].is_a?(Hash) && nh[k].empty?)
86
+ end
87
+ nh
88
+ elsif value.is_a?(Array)
89
+ value.select{|i| !i.nil? && !i.empty? }.map { |i| remove_nulls(i) }
90
+ else
91
+ value
92
+ end
93
+ end
94
+
95
+ def dig(data, keys)
96
+ if data.is_a?(Hash)
97
+ key = keys.shift
98
+ dig(data[key.to_sym] || data[key.to_s], keys)
99
+ elsif data.is_a?(Array)
100
+ data.map{ |d| dig(d, keys.dup) }.flatten.compact
101
+ else
102
+ [data]
103
+ end
104
+ end
105
+
106
+ end
@@ -0,0 +1,3 @@
1
+ module Runestone
2
+ VERSION = '1.0'
3
+ end
@@ -0,0 +1,203 @@
1
+ class Runestone::WebSearch
2
+ autoload :Or, "#{File.dirname(__FILE__)}/web_search/or"
3
+ autoload :And, "#{File.dirname(__FILE__)}/web_search/and"
4
+ autoload :Token, "#{File.dirname(__FILE__)}/web_search/token"
5
+ autoload :Phrase, "#{File.dirname(__FILE__)}/web_search/phrase"
6
+
7
+ class Match
8
+ attr_accessor :index, :substitution
9
+ def initialize(index, substitution)
10
+ @index = index
11
+ @substitution = substitution
12
+ end
13
+ end
14
+
15
+ class PartialMatch
16
+ attr_accessor :start_index, :end_index, :substitution
17
+ def initialize(start_index, end_index, substitution)
18
+ @start_index = start_index
19
+ @end_index = end_index
20
+ @substitution = substitution
21
+ end
22
+ end
23
+
24
+ attr_accessor :values
25
+
26
+ # prefix options: :all, :last, :none (default: :last)
27
+ def self.parse(query, prefix: :last)
28
+ prefix ||= :last
29
+ begin
30
+ query.unicode_normalize!
31
+ rescue Encoding::CompatibilityError
32
+ end
33
+ query.downcase!
34
+
35
+ q = []
36
+ stack = []
37
+ knot = false
38
+ tokens = query.gsub(/\"\s+\"/, '""').split(' ')
39
+ tokens.each_with_index do |token, i|
40
+ token.gsub!(/\(|\)|:|\||!|\&|\*/, '')
41
+ if token.start_with?('-')
42
+ knot = true
43
+ token.delete_prefix!('-')
44
+ else
45
+ knot = false
46
+ end
47
+
48
+ next if token.empty? || token == '""' || %w(' ").include?(token)
49
+
50
+ if token.start_with?('"') && token.end_with?('"')
51
+ token.delete_prefix!('"')
52
+ token.delete_suffix!('"')
53
+
54
+ q << Phrase.new([token], negative: knot)
55
+ elsif token.start_with?('"')
56
+ token.delete_prefix!('"')
57
+ stack.push(:phrase)
58
+ q << Phrase.new([Token.new(token)], negative: knot)
59
+ elsif token.end_with?('"')
60
+ token.delete_suffix!('"')
61
+ q.last.values << Token.new(token)
62
+ stack.pop
63
+ else
64
+ token = Token.new(token, negative: knot)
65
+ if !knot && prefix == :last && tokens.size - 1 == i
66
+ token.prefix = true
67
+ elsif !knot && prefix == :all
68
+ token.prefix = true
69
+ end
70
+
71
+ if stack.last == :phrase
72
+ q.last.values << token
73
+ else
74
+ q << token
75
+ end
76
+ end
77
+ end
78
+
79
+ new(q)
80
+ end
81
+
82
+ def initialize(values)
83
+ @values = values
84
+ end
85
+
86
+ def typos
87
+ tokens = @values.select{|t| t.is_a?(Token) && !t.negative }
88
+ sw = Runestone::Corpus.similar_words(*tokens.map(&:value))
89
+ q = @values.map do |t|
90
+ if t.is_a?(Token) && sw.has_key?(t.value)
91
+ Token.new(t.value, prefix: t.prefix, negative: t.negative, alts: sw[t.value])
92
+ else
93
+ t
94
+ end
95
+ end
96
+
97
+ Runestone::WebSearch.new(q)
98
+ end
99
+
100
+ def synonymize
101
+ parts = []
102
+ @values.each do |token|
103
+ if token.is_a?(Phrase) || token.negative
104
+ parts << token
105
+ else
106
+ parts << [] if parts.empty? || parts.last.is_a?(Phrase) || (!parts.last.is_a?(Array) && parts.last.negative)
107
+ parts.last << token
108
+ end
109
+ end
110
+
111
+ parts.map! do |part|
112
+ if !part.is_a?(Phrase) && (part.is_a?(Array) || !part.negative)
113
+ synonymize_part(part)
114
+ else
115
+ part
116
+ end
117
+ end
118
+
119
+ Runestone::WebSearch.new(parts)
120
+ end
121
+
122
+ def synonymize_part(part)
123
+ pending_matches = []
124
+ matches = []
125
+
126
+ part.each_with_index do |token, i|
127
+
128
+ pending_matches.select! do |match|
129
+ if match.end_index + 1 == i && match.substitution[token.value]
130
+ match.substitution[token.value].map do |nm|
131
+ if nm.is_a?(Hash)
132
+ match.end_index = i
133
+ match.alts = nm
134
+ true
135
+ else
136
+ matches << Match.new(match.start_index..i, Phrase.new(Array(nm), distance: 1))
137
+ false
138
+ end
139
+ end
140
+ else
141
+ false
142
+ end
143
+ end
144
+
145
+ if match = Runestone.synonyms[token.value]
146
+ match.each do |m|
147
+ if m.is_a?(Hash)
148
+ pending_matches << PartialMatch.new(i, i, m)
149
+ else
150
+ matches << Match.new(i, Phrase.new(m.split(/\s+/), distance: 1))
151
+ end
152
+ end
153
+ end
154
+
155
+ end
156
+
157
+ matches.select! do |match|
158
+ if match.index.is_a?(Integer)
159
+ case part[match.index]
160
+ when Or
161
+ part[match.index].values << match.substitution
162
+ else
163
+ part[match.index] = Or.new([part[match.index], match.substitution])
164
+ end
165
+
166
+ false
167
+ else
168
+ true
169
+ end
170
+ end
171
+
172
+ groups = matches.inject([]) do |memo, match|
173
+ if memo.empty?
174
+ memo << [match]
175
+ elsif i = memo.index { |k| k.none? { |j| j.index.overlaps?(match.index) } }
176
+ memo[i] << match
177
+ else
178
+ memo << [match]
179
+ end
180
+ memo
181
+ end
182
+
183
+ if groups.empty?
184
+ And.new(part)
185
+ else
186
+ orrs = Or.new([])
187
+ groups.each do |g|
188
+ p = []
189
+ p << And.new(part[0..g.first.index.begin-1]) if g.first.index.begin > 0
190
+ g.each do |m|
191
+ p << Or.new([And.new(part[m.index]), m.substitution])
192
+ end
193
+ p << And.new(part[g.last.index.end+1..-1]) if g.last.index.end < part.size
194
+ orrs.values << And.new(p)
195
+ end
196
+ orrs
197
+ end
198
+ end
199
+
200
+ def to_s(use_synonyms: true, allow_typos: true)
201
+ self.values.join(' & ')
202
+ end
203
+ end
@@ -0,0 +1,17 @@
1
+ class Runestone::WebSearch::And
2
+ attr_accessor :values, :negative
3
+ def initialize(values, negative: false)
4
+ @values = values
5
+ @negative = negative
6
+ end
7
+
8
+ def to_s
9
+ v = if values.size == 1
10
+ values.first.to_s
11
+ else
12
+ values.map(&:to_s).join(' & ')
13
+ end
14
+
15
+ negative ? "!#{v}" : v
16
+ end
17
+ end