knitsearch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +11 -0
- data/README.md +652 -0
- data/lib/generators/knitsearch/install/install_generator.rb +208 -0
- data/lib/generators/knitsearch/install/templates/migration.rb.tt +7 -0
- data/lib/generators/knitsearch/multisearch_install/multisearch_install_generator.rb +89 -0
- data/lib/knitsearch/document.rb +12 -0
- data/lib/knitsearch/engine.rb +22 -0
- data/lib/knitsearch/fuzzy_corrector.rb +79 -0
- data/lib/knitsearch/has_many_dependent.rb +62 -0
- data/lib/knitsearch/has_many_through_join_dependent.rb +47 -0
- data/lib/knitsearch/has_many_through_target_dependent.rb +54 -0
- data/lib/knitsearch/highlighter.rb +36 -0
- data/lib/knitsearch/levenshtein.rb +35 -0
- data/lib/knitsearch/migration.rb +235 -0
- data/lib/knitsearch/model.rb +613 -0
- data/lib/knitsearch/multisearchable.rb +24 -0
- data/lib/knitsearch/multisearchable_sync.rb +38 -0
- data/lib/knitsearch/query.rb +57 -0
- data/lib/knitsearch/version.rb +5 -0
- data/lib/knitsearch.rb +129 -0
- data/lib/tasks/knitsearch.rake +33 -0
- metadata +125 -0
|
@@ -0,0 +1,613 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "cgi"
|
|
4
|
+
|
|
5
|
+
module Knitsearch
|
|
6
|
+
# The user-facing concern. Include in an ActiveRecord model and call
|
|
7
|
+
# `searchable_by` with the columns you want indexed:
|
|
8
|
+
#
|
|
9
|
+
# class Article < ApplicationRecord
|
|
10
|
+
# include Knitsearch::Model
|
|
11
|
+
# searchable_by against: { title: 'A', body: 'B' }
|
|
12
|
+
# end
|
|
13
|
+
#
|
|
14
|
+
# Sync happens via SQLite triggers, not ActiveRecord callbacks.
|
|
15
|
+
# Triggers are created in the migration and fire atomically inside
|
|
16
|
+
# the source transaction.
|
|
17
|
+
module Model
|
|
18
|
+
extend ActiveSupport::Concern
|
|
19
|
+
|
|
20
|
+
class_methods do
|
|
21
|
+
def searchable_by(**kwargs)
|
|
22
|
+
@rich_text_mapping = {}
|
|
23
|
+
@associated_mapping = {}
|
|
24
|
+
|
|
25
|
+
columns, associated, options = parse_searchable_args(kwargs)
|
|
26
|
+
|
|
27
|
+
columns.each do |col, weight|
|
|
28
|
+
unless weight.is_a?(Numeric) && weight > 0
|
|
29
|
+
raise ArgumentError, "Weight for #{col} must be a positive number, got #{weight.inspect}"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Validate and merge associated columns into the main columns hash
|
|
34
|
+
associated.each do |assoc_name, assoc_columns|
|
|
35
|
+
assoc_columns.each do |col, weight|
|
|
36
|
+
unless weight.is_a?(Numeric) && weight > 0
|
|
37
|
+
raise ArgumentError, "Weight for #{assoc_name}.#{col} must be a positive number, got #{weight.inspect}"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
columns = columns.freeze
|
|
43
|
+
associated = associated.freeze
|
|
44
|
+
|
|
45
|
+
fts_table = "#{table_name}_fts"
|
|
46
|
+
if knitsearch_fts_table_available?(fts_table)
|
|
47
|
+
fts_columns = connection.columns(fts_table).map(&:name)
|
|
48
|
+
|
|
49
|
+
# Check both regular and rich-text shadow columns
|
|
50
|
+
columns_to_check = columns.keys.map { |col| rich_text_mapping[col]&.to_s || col }
|
|
51
|
+
# Add associated shadow columns
|
|
52
|
+
associated.each do |assoc_name, assoc_cols|
|
|
53
|
+
assoc_cols.keys.each do |col|
|
|
54
|
+
columns_to_check << "#{assoc_name}_#{col}_plain_text"
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
missing = columns_to_check.uniq - fts_columns
|
|
58
|
+
|
|
59
|
+
if missing.any?
|
|
60
|
+
raise Knitsearch::SchemaMismatchError,
|
|
61
|
+
"#{name} declares searchable_by(#{columns.keys.inspect}) but FTS table " \
|
|
62
|
+
"`#{fts_table}` is missing columns: #{missing.inspect}. " \
|
|
63
|
+
"Update the migration or the searchable_by declaration."
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
unless respond_to?(:searchable_columns)
|
|
68
|
+
class_attribute :searchable_columns
|
|
69
|
+
class_attribute :searchable_options
|
|
70
|
+
class_attribute :searchable_dictionary, default: "simple"
|
|
71
|
+
class_attribute :knitsearch_callbacks_installed, default: false
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
dictionary = parse_dictionary(options)
|
|
75
|
+
self.searchable_columns = columns
|
|
76
|
+
self.searchable_options = options.freeze
|
|
77
|
+
self.searchable_dictionary = dictionary
|
|
78
|
+
|
|
79
|
+
# Store the associated mapping so it can be accessed later
|
|
80
|
+
@associated_mapping = associated
|
|
81
|
+
|
|
82
|
+
if (rich_text_mapping.any? || associated.any?) && !knitsearch_callbacks_installed
|
|
83
|
+
install_rich_text_sync if rich_text_mapping.any?
|
|
84
|
+
install_associated_sync(associated) if associated.any?
|
|
85
|
+
self.knitsearch_callbacks_installed = true
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def search(query, limit: nil, highlight: nil, snippet: nil, operator: :and, match: :word, prefix: nil, fallback_below: nil, fuzzy: nil)
|
|
90
|
+
raise ArgumentError, "operator must be :and or :or, got: #{operator.inspect}" unless [:and, :or].include?(operator)
|
|
91
|
+
raise ArgumentError, "match must be :word or :phrase, got: #{match.inspect}" unless [:word, :phrase].include?(match)
|
|
92
|
+
|
|
93
|
+
if fuzzy && fuzzy > 0
|
|
94
|
+
query = Knitsearch::FuzzyCorrector.correct(
|
|
95
|
+
query,
|
|
96
|
+
vocab_table: vocab_table_name,
|
|
97
|
+
connection: connection,
|
|
98
|
+
threshold: fuzzy,
|
|
99
|
+
skip_last: (prefix == true)
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
strict = build_search_relation(query, limit: limit, highlight: highlight, snippet: snippet, operator: operator, match: match, prefix: prefix)
|
|
104
|
+
|
|
105
|
+
if fallback_eligible?(fallback_below, operator)
|
|
106
|
+
apply_fallback(query, strict, fallback_below, limit: limit, highlight: highlight, snippet: snippet, match: match, prefix: prefix)
|
|
107
|
+
else
|
|
108
|
+
strict
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def suggest(query, limit: 10, fallback_below: nil, fuzzy: nil)
|
|
113
|
+
search(query, prefix: true, limit: limit, fallback_below: fallback_below, fuzzy: fuzzy)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def suggest_correction(query, threshold: 1)
|
|
117
|
+
return nil if query.blank?
|
|
118
|
+
|
|
119
|
+
str = query.to_s
|
|
120
|
+
return nil if str.strip.empty?
|
|
121
|
+
|
|
122
|
+
corrected = Knitsearch::FuzzyCorrector.correct(
|
|
123
|
+
query,
|
|
124
|
+
vocab_table: vocab_table_name,
|
|
125
|
+
connection: connection,
|
|
126
|
+
threshold: threshold,
|
|
127
|
+
skip_last: false
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
corrected == str ? nil : corrected
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def knitsearch_backfill!
|
|
134
|
+
if rich_text_mapping.any?
|
|
135
|
+
backfill_shadow_columns
|
|
136
|
+
else
|
|
137
|
+
reindex!
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def reindex!
|
|
142
|
+
fts_table = "#{table_name}_fts"
|
|
143
|
+
quoted_fts = connection.quote_table_name(fts_table)
|
|
144
|
+
connection.execute("INSERT INTO #{quoted_fts}(#{quoted_fts}) VALUES('rebuild')")
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def vocab_table_name
|
|
148
|
+
"#{table_name}_fts_vocab"
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def rich_text_mapping
|
|
152
|
+
@rich_text_mapping ||= {}
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def associated_mapping
|
|
156
|
+
@associated_mapping ||= {}
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def associated_shadow_columns
|
|
160
|
+
result = {}
|
|
161
|
+
associated_mapping.each do |assoc_name, assoc_columns|
|
|
162
|
+
assoc_columns.each do |col, weight|
|
|
163
|
+
shadow_col = "#{assoc_name}_#{col}_plain_text"
|
|
164
|
+
result[shadow_col] = weight
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
result
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def fts_column_order
|
|
171
|
+
cols = searchable_columns.keys.map { |col| rich_text_mapping[col]&.to_s || col }
|
|
172
|
+
cols.concat(associated_shadow_columns.keys)
|
|
173
|
+
cols
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
private
|
|
177
|
+
|
|
178
|
+
def build_search_relation(query, limit:, highlight:, snippet:, operator:, match:, prefix: nil)
|
|
179
|
+
prefix = prefix.nil? ? searchable_options.dig(:using, :fts5, :prefix) : prefix
|
|
180
|
+
match_string = Knitsearch::Query.escape(query, operator: operator, prefix: prefix, match: match)
|
|
181
|
+
return none if match_string.nil?
|
|
182
|
+
|
|
183
|
+
validate_highlight_columns(highlight) if highlight
|
|
184
|
+
validate_snippet_columns(snippet) if snippet
|
|
185
|
+
|
|
186
|
+
fts_table = "#{table_name}_fts"
|
|
187
|
+
quoted_fts = connection.quote_table_name(fts_table)
|
|
188
|
+
quoted_source = connection.quote_table_name(table_name)
|
|
189
|
+
|
|
190
|
+
weights = searchable_columns.values
|
|
191
|
+
bm25_args = ([quoted_fts] + weights.map(&:to_s)).join(", ")
|
|
192
|
+
|
|
193
|
+
relation = joins("INNER JOIN #{quoted_fts} ON #{quoted_fts}.rowid = #{quoted_source}.id")
|
|
194
|
+
.where("#{quoted_fts} MATCH ?", match_string)
|
|
195
|
+
.order(Arel.sql("bm25(#{bm25_args})"))
|
|
196
|
+
|
|
197
|
+
# Add score when highlight or snippet are present (to avoid breaking .count()/.exists?())
|
|
198
|
+
if highlight || snippet
|
|
199
|
+
selects = ["#{quoted_source}.*", "bm25(#{bm25_args}) AS searchable_score"]
|
|
200
|
+
selects.concat(highlight_selects(highlight, fts_table)) if highlight
|
|
201
|
+
selects.concat(snippet_selects(snippet, fts_table)) if snippet
|
|
202
|
+
relation = relation.select(selects.join(", "))
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
relation = relation.limit(limit) if limit
|
|
206
|
+
relation
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def fallback_eligible?(fallback_below, operator)
|
|
210
|
+
fallback_below && fallback_below > 0 && operator == :and
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def apply_fallback(query, strict, threshold, limit:, highlight:, snippet:, match:, prefix: nil)
|
|
214
|
+
primary = strict.to_a
|
|
215
|
+
return primary if primary.size >= threshold
|
|
216
|
+
|
|
217
|
+
secondary = build_search_relation(
|
|
218
|
+
query,
|
|
219
|
+
limit: limit,
|
|
220
|
+
highlight: highlight,
|
|
221
|
+
snippet: snippet,
|
|
222
|
+
operator: :or,
|
|
223
|
+
match: :word,
|
|
224
|
+
prefix: prefix
|
|
225
|
+
).to_a
|
|
226
|
+
|
|
227
|
+
merge_search_results(primary, secondary, limit: limit)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def merge_search_results(primary, secondary, limit:)
|
|
231
|
+
seen_ids = primary.map(&:id).to_set
|
|
232
|
+
extras = secondary.reject { |record| seen_ids.include?(record.id) }
|
|
233
|
+
merged = primary + extras
|
|
234
|
+
limit ? merged.first(limit) : merged
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def parse_searchable_args(kwargs)
|
|
238
|
+
unless kwargs.key?(:against)
|
|
239
|
+
raise ArgumentError,
|
|
240
|
+
"searchable_by requires `against:` keyword. " \
|
|
241
|
+
"Example: searchable_by against: { title: 'A', bio: 'B' }"
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
columns = kwargs[:against].transform_values { |v| resolve_weight(v) }
|
|
245
|
+
options = kwargs.slice(:using)
|
|
246
|
+
associated = normalize_associated_against(kwargs[:associated_against])
|
|
247
|
+
|
|
248
|
+
# Detect ActionText rich-text fields and build mapping to shadow columns.
|
|
249
|
+
detect_rich_text_fields(columns)
|
|
250
|
+
|
|
251
|
+
[columns.transform_keys(&:to_s), associated, options]
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def normalize_associated_against(associated_against)
|
|
255
|
+
return {} unless associated_against
|
|
256
|
+
|
|
257
|
+
result = {}
|
|
258
|
+
|
|
259
|
+
associated_against.each do |assoc_name, columns_spec|
|
|
260
|
+
assoc_name_str = assoc_name.to_s
|
|
261
|
+
reflection = reflect_on_association(assoc_name)
|
|
262
|
+
|
|
263
|
+
unless reflection
|
|
264
|
+
raise Knitsearch::ConfigurationError,
|
|
265
|
+
"Associated field #{assoc_name.inspect} is not a declared association on #{name}"
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
case reflection.macro
|
|
269
|
+
when :belongs_to
|
|
270
|
+
if reflection.options[:polymorphic]
|
|
271
|
+
raise Knitsearch::ConfigurationError,
|
|
272
|
+
"Polymorphic belongs_to #{assoc_name.inspect} is not supported in this release."
|
|
273
|
+
end
|
|
274
|
+
when :has_many
|
|
275
|
+
if reflection.through_reflection?
|
|
276
|
+
# has_many :through is allowed
|
|
277
|
+
if reflection.source_reflection.polymorphic?
|
|
278
|
+
raise Knitsearch::ConfigurationError,
|
|
279
|
+
"Polymorphic source on has_many :through #{assoc_name.inspect} is not supported."
|
|
280
|
+
end
|
|
281
|
+
elsif reflection.options[:polymorphic]
|
|
282
|
+
raise Knitsearch::ConfigurationError,
|
|
283
|
+
"Polymorphic has_many #{assoc_name.inspect} is not supported in this release."
|
|
284
|
+
end
|
|
285
|
+
when :has_one
|
|
286
|
+
raise Knitsearch::ConfigurationError,
|
|
287
|
+
"has_one associations are not yet supported. Only belongs_to and has_many are available."
|
|
288
|
+
else
|
|
289
|
+
raise Knitsearch::ConfigurationError,
|
|
290
|
+
"Associated field #{assoc_name.inspect} is a #{reflection.macro}, but only belongs_to and has_many are supported."
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Normalize columns_spec: Array → Hash with "C" weight, Hash stays as is
|
|
294
|
+
if columns_spec.is_a?(Array)
|
|
295
|
+
columns_spec = columns_spec.index_with { "C" }
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
columns_with_weights = columns_spec.transform_values { |v| resolve_weight(v) }
|
|
299
|
+
result[assoc_name_str] = columns_with_weights
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
result
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def parse_dictionary(options)
|
|
306
|
+
dictionary = options.dig(:using, :fts5, :dictionary) || "simple"
|
|
307
|
+
|
|
308
|
+
if dictionary.is_a?(Symbol)
|
|
309
|
+
raise ArgumentError,
|
|
310
|
+
"dictionary must be a string (e.g., dictionary: \"english\"), not a symbol. " \
|
|
311
|
+
"Remove the colon: dictionary: \"#{dictionary}\""
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
unless Knitsearch::SUPPORTED_DICTIONARIES.include?(dictionary)
|
|
315
|
+
raise Knitsearch::UnknownDictionaryError,
|
|
316
|
+
"Unknown dictionary: #{dictionary.inspect}. Supported: #{Knitsearch::SUPPORTED_DICTIONARIES.inspect}"
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
if dictionary == "trigram" && options.dig(:using, :fts5, :prefix)
|
|
320
|
+
raise ArgumentError,
|
|
321
|
+
"dictionary: \"trigram\" cannot be combined with prefix: — the trigram tokenizer " \
|
|
322
|
+
"already supports substring matching. Pick one."
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
dictionary
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def detect_rich_text_fields(columns)
|
|
329
|
+
rich_text_field_names =
|
|
330
|
+
if respond_to?(:rich_text_association_names)
|
|
331
|
+
# Rails 8.1+: returns association names like :rich_text_body — strip the prefix
|
|
332
|
+
rich_text_association_names.map { |n| n.to_s.sub(/^rich_text_/, "").to_sym }
|
|
333
|
+
elsif respond_to?(:rich_text_class_attributes)
|
|
334
|
+
# Rails 7.x – 8.0
|
|
335
|
+
rich_text_class_attributes.keys
|
|
336
|
+
elsif respond_to?(:rich_text_attributes)
|
|
337
|
+
# legacy
|
|
338
|
+
rich_text_attributes
|
|
339
|
+
else
|
|
340
|
+
[]
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
columns.each_key do |field|
|
|
344
|
+
field_sym = field.to_sym
|
|
345
|
+
next unless rich_text_field_names.include?(field_sym)
|
|
346
|
+
|
|
347
|
+
shadow_column = "#{field}_plain_text".to_sym
|
|
348
|
+
rich_text_mapping[field.to_s] = shadow_column
|
|
349
|
+
end
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
def install_rich_text_sync
|
|
353
|
+
before_save :sync_rich_text_to_shadow_columns, if: :should_sync_rich_text?
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def install_associated_sync(associated)
|
|
357
|
+
before_save :sync_associated_to_shadow_columns, if: :should_sync_associated?
|
|
358
|
+
|
|
359
|
+
# Register this model's dependents on parent classes or register children on child classes
|
|
360
|
+
associated.each do |assoc_name, columns|
|
|
361
|
+
reflection = reflect_on_association(assoc_name)
|
|
362
|
+
|
|
363
|
+
# Build shadow map: { shadow_column => source_column }
|
|
364
|
+
shadow_map = {}
|
|
365
|
+
columns.each do |col, weight|
|
|
366
|
+
shadow_col = "#{assoc_name}_#{col}_plain_text"
|
|
367
|
+
shadow_map[shadow_col.to_sym] = col
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
if reflection.macro == :belongs_to
|
|
371
|
+
parent_class = reflection.klass
|
|
372
|
+
foreign_key = reflection.foreign_key.to_sym
|
|
373
|
+
Knitsearch.register_belongs_to_dependent(parent_class, self, foreign_key, shadow_map)
|
|
374
|
+
elsif reflection.macro == :has_many
|
|
375
|
+
if reflection.through_reflection?
|
|
376
|
+
# has_many :through case
|
|
377
|
+
join_class = reflection.through_reflection.klass
|
|
378
|
+
target_class = reflection.klass
|
|
379
|
+
parent_fk = reflection.through_reflection.foreign_key.to_sym
|
|
380
|
+
target_fk = reflection.source_reflection.foreign_key.to_sym
|
|
381
|
+
Knitsearch.register_has_many_through_dependent(
|
|
382
|
+
join_class: join_class,
|
|
383
|
+
target_class: target_class,
|
|
384
|
+
parent_class: self,
|
|
385
|
+
parent_fk: parent_fk,
|
|
386
|
+
target_fk: target_fk,
|
|
387
|
+
parent_assoc: assoc_name.to_sym,
|
|
388
|
+
shadow_map: shadow_map
|
|
389
|
+
)
|
|
390
|
+
else
|
|
391
|
+
# Plain has_many case
|
|
392
|
+
child_class = reflection.klass
|
|
393
|
+
inverse_fk = reflection.foreign_key.to_sym
|
|
394
|
+
Knitsearch.register_has_many_dependent(child_class, self, inverse_fk, shadow_map, assoc_name.to_sym)
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def backfill_shadow_columns
|
|
401
|
+
find_each do |record|
|
|
402
|
+
shadow_updates = {}
|
|
403
|
+
rich_text_mapping.each do |declared_field, shadow_column|
|
|
404
|
+
rich_text_body = record.send(declared_field)
|
|
405
|
+
plain_text = if rich_text_body.nil?
|
|
406
|
+
nil
|
|
407
|
+
else
|
|
408
|
+
record.send(:extract_plain_text_from_action_text, rich_text_body)
|
|
409
|
+
end
|
|
410
|
+
shadow_updates[shadow_column] = plain_text
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
record.update_columns(shadow_updates) if shadow_updates.any?
|
|
414
|
+
end
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def resolve_weight(value)
|
|
418
|
+
if value.is_a?(String) && Knitsearch::WEIGHT_BUCKETS.key?(value.upcase)
|
|
419
|
+
Knitsearch::WEIGHT_BUCKETS[value.upcase]
|
|
420
|
+
else
|
|
421
|
+
value.to_f
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
def knitsearch_fts_table_available?(fts_table)
|
|
426
|
+
# Virtual FTS5 tables don't appear in connection.tables, so we need to try
|
|
427
|
+
# querying them directly instead of using table_exists?
|
|
428
|
+
connection.execute("SELECT 1 FROM #{connection.quote_table_name(fts_table)} LIMIT 0")
|
|
429
|
+
true
|
|
430
|
+
rescue
|
|
431
|
+
false
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
def validate_highlight_columns(columns)
|
|
435
|
+
cols = Array(columns).map(&:to_s)
|
|
436
|
+
invalid = cols - searchable_columns.keys
|
|
437
|
+
return if invalid.empty?
|
|
438
|
+
|
|
439
|
+
raise Knitsearch::ColumnError,
|
|
440
|
+
"highlight: contains columns not in searchable_by: #{invalid.inspect}"
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
def validate_snippet_columns(snippets)
|
|
444
|
+
cols = case snippets
|
|
445
|
+
when Array then snippets.map(&:to_s)
|
|
446
|
+
when Hash then snippets.keys.map(&:to_s)
|
|
447
|
+
else return
|
|
448
|
+
end
|
|
449
|
+
invalid = cols - searchable_columns.keys
|
|
450
|
+
return if invalid.empty?
|
|
451
|
+
|
|
452
|
+
raise Knitsearch::ColumnError,
|
|
453
|
+
"snippet: contains columns not in searchable_by: #{invalid.inspect}"
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
def highlight_selects(columns, fts_table)
|
|
457
|
+
cols = Array(columns)
|
|
458
|
+
cols.map { |col| highlight_select(fts_table, col) }
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def snippet_selects(snippets, fts_table)
|
|
462
|
+
pairs = case snippets
|
|
463
|
+
when Array then snippets.map { |c| [c, 20] }
|
|
464
|
+
when Hash then snippets.to_a
|
|
465
|
+
else raise ArgumentError, "snippet: must be Array or Hash"
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
pairs.map { |col, tokens| snippet_select(fts_table, col, tokens) }
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def highlight_select(fts_table, column)
|
|
472
|
+
declared_col = column.to_s
|
|
473
|
+
col_index = searchable_columns.keys.index(declared_col)
|
|
474
|
+
raise Knitsearch::ColumnError, "#{column} is not in searchable_by columns" unless col_index
|
|
475
|
+
|
|
476
|
+
mark_opening = Knitsearch::Highlighter.opening_mark
|
|
477
|
+
mark_closing = Knitsearch::Highlighter.closing_mark
|
|
478
|
+
quoted_fts = connection.quote_table_name(fts_table)
|
|
479
|
+
|
|
480
|
+
"highlight(#{quoted_fts}, #{col_index}, #{connection.quote(mark_opening)}, #{connection.quote(mark_closing)}) AS searchable_highlight_#{column}"
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
def snippet_select(fts_table, column, tokens)
|
|
484
|
+
declared_col = column.to_s
|
|
485
|
+
col_index = searchable_columns.keys.index(declared_col)
|
|
486
|
+
raise Knitsearch::ColumnError, "#{column} is not in searchable_by columns" unless col_index
|
|
487
|
+
|
|
488
|
+
token_count = Integer(tokens)
|
|
489
|
+
raise ArgumentError, "snippet token count must be positive, got: #{tokens.inspect}" unless token_count > 0
|
|
490
|
+
|
|
491
|
+
mark_opening = Knitsearch::Highlighter.opening_mark
|
|
492
|
+
mark_closing = Knitsearch::Highlighter.closing_mark
|
|
493
|
+
quoted_fts = connection.quote_table_name(fts_table)
|
|
494
|
+
|
|
495
|
+
"snippet(#{quoted_fts}, #{col_index}, #{connection.quote(mark_opening)}, #{connection.quote(mark_closing)}, '...', #{token_count}) AS searchable_snippet_#{column}"
|
|
496
|
+
end
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
def should_sync_rich_text?
|
|
500
|
+
self.class.rich_text_mapping.any?
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
def sync_rich_text_to_shadow_columns
|
|
504
|
+
self.class.rich_text_mapping.each do |declared_field, shadow_column|
|
|
505
|
+
rich_text_body = send(declared_field)
|
|
506
|
+
plain_text = if rich_text_body.nil?
|
|
507
|
+
nil
|
|
508
|
+
else
|
|
509
|
+
extract_plain_text_from_action_text(rich_text_body)
|
|
510
|
+
end
|
|
511
|
+
send("#{shadow_column}=", plain_text)
|
|
512
|
+
end
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
def should_sync_associated?
|
|
516
|
+
self.class.associated_mapping.any?
|
|
517
|
+
end
|
|
518
|
+
|
|
519
|
+
def sync_associated_to_shadow_columns
|
|
520
|
+
self.class.associated_mapping.each do |assoc_name, columns|
|
|
521
|
+
reflection = self.class.reflect_on_association(assoc_name)
|
|
522
|
+
|
|
523
|
+
columns.each do |col, _weight|
|
|
524
|
+
shadow_column = "#{assoc_name}_#{col}_plain_text"
|
|
525
|
+
|
|
526
|
+
if reflection.macro == :belongs_to
|
|
527
|
+
# belongs_to: sync the parent's value
|
|
528
|
+
assoc_object = send(assoc_name)
|
|
529
|
+
value = if assoc_object.nil?
|
|
530
|
+
nil
|
|
531
|
+
else
|
|
532
|
+
assoc_object.send(col)&.to_s
|
|
533
|
+
end
|
|
534
|
+
send("#{shadow_column}=", value)
|
|
535
|
+
elsif reflection.macro == :has_many
|
|
536
|
+
# has_many (both plain and through): sync from the live association
|
|
537
|
+
# Plain has_many: synced on create via before_save, then updated from child side
|
|
538
|
+
# has_many :through: synced on create via before_save, then updated from join/target side
|
|
539
|
+
values = send(assoc_name).pluck(col).compact.map(&:to_s)
|
|
540
|
+
send("#{shadow_column}=", values.any? ? values.join(" ") : nil)
|
|
541
|
+
end
|
|
542
|
+
end
|
|
543
|
+
end
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
def search_highlight(column)
|
|
547
|
+
raw = self["searchable_highlight_#{column}"]
|
|
548
|
+
return nil if raw.nil?
|
|
549
|
+
Knitsearch::Highlighter.render(raw)
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
def search_snippet(column)
|
|
553
|
+
raw = self["searchable_snippet_#{column}"]
|
|
554
|
+
return nil if raw.nil?
|
|
555
|
+
Knitsearch::Highlighter.render(raw)
|
|
556
|
+
end
|
|
557
|
+
|
|
558
|
+
def searchable_score
|
|
559
|
+
self["searchable_score"]
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
def knitsearch_cascade_to_children
|
|
563
|
+
dependents = Knitsearch.belongs_to_dependents[self.class]
|
|
564
|
+
return unless dependents
|
|
565
|
+
|
|
566
|
+
dependents.each do |dependent|
|
|
567
|
+
child_model = dependent[:model]
|
|
568
|
+
fk = dependent[:foreign_key]
|
|
569
|
+
shadow_map = dependent[:columns]
|
|
570
|
+
|
|
571
|
+
# Build SET clause for update_all: { shadow_col => new_parent_value, ... }
|
|
572
|
+
updates = {}
|
|
573
|
+
shadow_map.each do |shadow_col, source_col|
|
|
574
|
+
value = send(source_col)&.to_s
|
|
575
|
+
updates[shadow_col] = value
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
child_model.where(fk => id).update_all(updates)
|
|
579
|
+
end
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
private
|
|
583
|
+
|
|
584
|
+
# Output is written to a shadow column for FTS indexing only — never
|
|
585
|
+
# rendered to a view. The regex stripper is not safe HTML sanitization.
|
|
586
|
+
def extract_plain_text_from_action_text(rich_text_body)
|
|
587
|
+
# ActionText body is serialized as HTML. Extract plain text by:
|
|
588
|
+
# 1. Get the raw HTML
|
|
589
|
+
# 2. Strip action-text-attachment tags
|
|
590
|
+
# 3. Add space before closing block elements
|
|
591
|
+
# 4. Remove HTML tags
|
|
592
|
+
# 5. Collapse whitespace
|
|
593
|
+
# 6. Unescape HTML entities
|
|
594
|
+
html = rich_text_body.to_html
|
|
595
|
+
return "" if html.blank?
|
|
596
|
+
|
|
597
|
+
# Remove <action-text-attachment> elements
|
|
598
|
+
text = html.gsub(/<action-text-attachment[^>]*>.*?<\/action-text-attachment>/m, "")
|
|
599
|
+
|
|
600
|
+
# Add space before block-closing tags to prevent word concatenation
|
|
601
|
+
text = text.gsub(%r{</(?:p|div|blockquote|pre|li|tr|td|th|h[1-6])>}, " ")
|
|
602
|
+
|
|
603
|
+
# Remove all remaining HTML tags
|
|
604
|
+
text = text.gsub(/<[^>]*>/, "")
|
|
605
|
+
|
|
606
|
+
# Replace multiple whitespace with single space, then strip
|
|
607
|
+
text = text.gsub(/\s+/, " ").strip
|
|
608
|
+
|
|
609
|
+
# Unescape HTML entities
|
|
610
|
+
CGI.unescape_html(text)
|
|
611
|
+
end
|
|
612
|
+
end
|
|
613
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/concern"
|
|
4
|
+
|
|
5
|
+
module Knitsearch
|
|
6
|
+
module Multisearchable
|
|
7
|
+
extend ActiveSupport::Concern
|
|
8
|
+
|
|
9
|
+
class_methods do
|
|
10
|
+
def multisearchable(against:)
|
|
11
|
+
@atomic_multisearchable_columns = Array(against).map(&:to_sym)
|
|
12
|
+
include Knitsearch::MultisearchableSync unless include?(Knitsearch::MultisearchableSync)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def atomic_multisearchable_columns
|
|
16
|
+
@atomic_multisearchable_columns || []
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def knitsearch_multisearch_backfill!
|
|
20
|
+
Knitsearch::Document.backfill!(self)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/concern"
|
|
4
|
+
|
|
5
|
+
module Knitsearch
|
|
6
|
+
module MultisearchableSync
|
|
7
|
+
extend ActiveSupport::Concern
|
|
8
|
+
|
|
9
|
+
included do
|
|
10
|
+
# Proc form, not symbol form. Symbol-form after_commit callbacks silently no-op when
|
|
11
|
+
# the target method is defined on a module included into the class — the callback registers
|
|
12
|
+
# but dispatch never reaches the method. Procs work.
|
|
13
|
+
after_save_commit { |record| record.knitsearch_sync_document }
|
|
14
|
+
after_destroy_commit { |record| record.knitsearch_destroy_document }
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def knitsearch_sync_document
|
|
18
|
+
content = self.class.atomic_multisearchable_columns
|
|
19
|
+
.map { |col| send(col).to_s }
|
|
20
|
+
.reject(&:empty?)
|
|
21
|
+
.join(" ")
|
|
22
|
+
|
|
23
|
+
doc = Knitsearch::Document.find_or_initialize_by(
|
|
24
|
+
searchable_type: self.class.name,
|
|
25
|
+
searchable_id: id
|
|
26
|
+
)
|
|
27
|
+
doc.content = content
|
|
28
|
+
doc.save!
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def knitsearch_destroy_document
|
|
32
|
+
Knitsearch::Document.where(
|
|
33
|
+
searchable_type: self.class.name,
|
|
34
|
+
searchable_id: id
|
|
35
|
+
).delete_all
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|