knitsearch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +11 -0
- data/README.md +652 -0
- data/lib/generators/knitsearch/install/install_generator.rb +208 -0
- data/lib/generators/knitsearch/install/templates/migration.rb.tt +7 -0
- data/lib/generators/knitsearch/multisearch_install/multisearch_install_generator.rb +89 -0
- data/lib/knitsearch/document.rb +12 -0
- data/lib/knitsearch/engine.rb +22 -0
- data/lib/knitsearch/fuzzy_corrector.rb +79 -0
- data/lib/knitsearch/has_many_dependent.rb +62 -0
- data/lib/knitsearch/has_many_through_join_dependent.rb +47 -0
- data/lib/knitsearch/has_many_through_target_dependent.rb +54 -0
- data/lib/knitsearch/highlighter.rb +36 -0
- data/lib/knitsearch/levenshtein.rb +35 -0
- data/lib/knitsearch/migration.rb +235 -0
- data/lib/knitsearch/model.rb +613 -0
- data/lib/knitsearch/multisearchable.rb +24 -0
- data/lib/knitsearch/multisearchable_sync.rb +38 -0
- data/lib/knitsearch/query.rb +57 -0
- data/lib/knitsearch/version.rb +5 -0
- data/lib/knitsearch.rb +129 -0
- data/lib/tasks/knitsearch.rake +33 -0
- metadata +125 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Knitsearch
|
|
4
|
+
module Migration
|
|
5
|
+
extend self
|
|
6
|
+
|
|
7
|
+
def connection
|
|
8
|
+
ActiveRecord::Base.connection
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def create_searchable_table(table_name, columns:, tokenizer: nil, dictionary: "simple", prefix: nil, rich_text_columns: [], associated_against: nil)
|
|
12
|
+
raise ArgumentError, "columns must not be empty" if columns.empty?
|
|
13
|
+
|
|
14
|
+
if tokenizer.present?
|
|
15
|
+
raise ArgumentError, "tokenizer: is deprecated. Use dictionary: instead (e.g., dictionary: 'english')"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
validate_dictionary(dictionary)
|
|
19
|
+
tokenizer_string = dictionary_to_tokenizer(dictionary)
|
|
20
|
+
fts_table = "#{table_name}_fts"
|
|
21
|
+
|
|
22
|
+
# For rich text columns, create shadow columns in the source table if needed
|
|
23
|
+
create_rich_text_shadow_columns(table_name, rich_text_columns)
|
|
24
|
+
|
|
25
|
+
# For associated columns, create shadow columns in the source table
|
|
26
|
+
associated_shadow_columns = {}
|
|
27
|
+
if associated_against.present?
|
|
28
|
+
associated_shadow_columns = create_associated_shadow_columns(table_name, associated_against)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Build the FTS column list: use shadow column names for rich text fields and associated fields
|
|
32
|
+
column_names = columns.is_a?(Hash) ? columns.keys : columns
|
|
33
|
+
# Ensure column names are strings for consistent handling
|
|
34
|
+
column_names = column_names.map(&:to_s)
|
|
35
|
+
# Convert rich_text_columns to strings for consistent comparison
|
|
36
|
+
rich_text_columns = rich_text_columns.map(&:to_s)
|
|
37
|
+
fts_column_names = column_names.map do |col|
|
|
38
|
+
rich_text_columns.include?(col) ? "#{col}_plain_text" : col
|
|
39
|
+
end
|
|
40
|
+
fts_column_names.concat(associated_shadow_columns.keys)
|
|
41
|
+
|
|
42
|
+
column_list = fts_column_names.map { |c| connection.quote_column_name(c.to_s) }.join(", ")
|
|
43
|
+
|
|
44
|
+
# Build FTS5 options. Each prefix size listed adds a sub-index to the FTS5 data file.
|
|
45
|
+
# prefix: true uses [2, 3] (safe default, ~2× index size). prefix: [2, 3, 4] customizes.
|
|
46
|
+
fts_options = [
|
|
47
|
+
"content=#{connection.quote(table_name)}",
|
|
48
|
+
"content_rowid='id'",
|
|
49
|
+
"tokenize=#{connection.quote(tokenizer_string)}"
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
if prefix
|
|
53
|
+
sizes = prefix == true ? [ 2, 3 ] : Array(prefix).map(&:to_i)
|
|
54
|
+
fts_options << "prefix=#{connection.quote(sizes.join(' '))}"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Create FTS5 virtual table with external content
|
|
58
|
+
sql = "CREATE VIRTUAL TABLE #{connection.quote_table_name(fts_table)} USING fts5(" \
|
|
59
|
+
"#{column_list}, " \
|
|
60
|
+
"#{fts_options.join(', ')}" \
|
|
61
|
+
")"
|
|
62
|
+
connection.execute(sql)
|
|
63
|
+
|
|
64
|
+
# Vocab table — read-only virtual table exposing the FTS5 dictionary
|
|
65
|
+
# for fuzzy correction. Standard SQLite feature, no extension.
|
|
66
|
+
vocab_table = "#{fts_table}_vocab"
|
|
67
|
+
connection.execute(
|
|
68
|
+
"CREATE VIRTUAL TABLE #{connection.quote_table_name(vocab_table)} " \
|
|
69
|
+
"USING fts5vocab(#{connection.quote(fts_table)}, 'row')"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Build trigger value references: use shadow column names for rich text fields and associated fields
|
|
73
|
+
trigger_values = column_names.map do |col|
|
|
74
|
+
col_ref = rich_text_columns.include?(col) ? "#{col}_plain_text" : col
|
|
75
|
+
"new.#{connection.quote_column_name(col_ref)}"
|
|
76
|
+
end
|
|
77
|
+
# Add associated shadow columns to trigger values
|
|
78
|
+
trigger_values.concat(associated_shadow_columns.keys.map { |col| "new.#{connection.quote_column_name(col.to_s)}" })
|
|
79
|
+
trigger_values_str = trigger_values.join(", ")
|
|
80
|
+
|
|
81
|
+
trigger_values_old = column_names.map do |col|
|
|
82
|
+
col_ref = rich_text_columns.include?(col) ? "#{col}_plain_text" : col
|
|
83
|
+
"old.#{connection.quote_column_name(col_ref)}"
|
|
84
|
+
end
|
|
85
|
+
# Add associated shadow columns to trigger values (for delete trigger)
|
|
86
|
+
trigger_values_old.concat(associated_shadow_columns.keys.map { |col| "old.#{connection.quote_column_name(col.to_s)}" })
|
|
87
|
+
trigger_values_old_str = trigger_values_old.join(", ")
|
|
88
|
+
|
|
89
|
+
# After insert trigger: add new row to index
|
|
90
|
+
insert_trigger = "CREATE TRIGGER #{connection.quote_table_name("#{table_name}_ai")} AFTER INSERT ON #{connection.quote_table_name(table_name)} BEGIN " \
|
|
91
|
+
"INSERT INTO #{connection.quote_table_name(fts_table)}(rowid, #{column_list}) VALUES (new.id, #{trigger_values_str}); " \
|
|
92
|
+
"END"
|
|
93
|
+
connection.execute(insert_trigger)
|
|
94
|
+
|
|
95
|
+
# After delete trigger: remove row from index
|
|
96
|
+
delete_trigger = "CREATE TRIGGER #{connection.quote_table_name("#{table_name}_ad")} AFTER DELETE ON #{connection.quote_table_name(table_name)} BEGIN " \
|
|
97
|
+
"INSERT INTO #{connection.quote_table_name(fts_table)}(#{fts_table}, rowid, #{column_list}) VALUES('delete', old.id, #{trigger_values_old_str}); " \
|
|
98
|
+
"END"
|
|
99
|
+
connection.execute(delete_trigger)
|
|
100
|
+
|
|
101
|
+
# After update trigger: delete old, insert new
|
|
102
|
+
update_trigger = "CREATE TRIGGER #{connection.quote_table_name("#{table_name}_au")} AFTER UPDATE ON #{connection.quote_table_name(table_name)} BEGIN " \
|
|
103
|
+
"INSERT INTO #{connection.quote_table_name(fts_table)}(#{fts_table}, rowid, #{column_list}) VALUES('delete', old.id, #{trigger_values_old_str}); " \
|
|
104
|
+
"INSERT INTO #{connection.quote_table_name(fts_table)}(rowid, #{column_list}) VALUES (new.id, #{trigger_values_str}); " \
|
|
105
|
+
"END"
|
|
106
|
+
connection.execute(update_trigger)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def drop_searchable_table(table_name)
|
|
110
|
+
fts_table = "#{table_name}_fts"
|
|
111
|
+
|
|
112
|
+
connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name("#{table_name}_ai")}")
|
|
113
|
+
connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name("#{table_name}_ad")}")
|
|
114
|
+
connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name("#{table_name}_au")}")
|
|
115
|
+
connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name("#{fts_table}_vocab")}")
|
|
116
|
+
connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name(fts_table)}")
|
|
117
|
+
|
|
118
|
+
%w[data idx docsize config].each do |suffix|
|
|
119
|
+
connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name("#{fts_table}_#{suffix}")}") rescue nil
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def create_multisearch_table
|
|
124
|
+
connection.execute("DROP TABLE IF EXISTS knitsearches_fts") rescue nil
|
|
125
|
+
connection.execute("DROP TRIGGER IF EXISTS knitsearches_ai") rescue nil
|
|
126
|
+
connection.execute("DROP TRIGGER IF EXISTS knitsearches_ad") rescue nil
|
|
127
|
+
connection.execute("DROP TRIGGER IF EXISTS knitsearches_au") rescue nil
|
|
128
|
+
connection.execute("DROP TABLE IF EXISTS knitsearches") rescue nil
|
|
129
|
+
|
|
130
|
+
connection.execute(<<~SQL)
|
|
131
|
+
CREATE TABLE knitsearches (
|
|
132
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
133
|
+
searchable_type VARCHAR(255) NOT NULL,
|
|
134
|
+
searchable_id INTEGER NOT NULL,
|
|
135
|
+
content TEXT,
|
|
136
|
+
created_at DATETIME,
|
|
137
|
+
updated_at DATETIME
|
|
138
|
+
)
|
|
139
|
+
SQL
|
|
140
|
+
|
|
141
|
+
connection.execute("CREATE UNIQUE INDEX idx_knitsearches_poly ON knitsearches (searchable_type, searchable_id)")
|
|
142
|
+
connection.execute("CREATE INDEX idx_knitsearches_type ON knitsearches (searchable_type)")
|
|
143
|
+
|
|
144
|
+
connection.execute(<<~SQL)
|
|
145
|
+
CREATE VIRTUAL TABLE knitsearches_fts USING fts5(
|
|
146
|
+
content,
|
|
147
|
+
content='knitsearches',
|
|
148
|
+
content_rowid='id',
|
|
149
|
+
tokenize='unicode61 remove_diacritics 2'
|
|
150
|
+
)
|
|
151
|
+
SQL
|
|
152
|
+
|
|
153
|
+
connection.execute(<<~SQL)
|
|
154
|
+
CREATE TRIGGER knitsearches_ai AFTER INSERT ON knitsearches BEGIN
|
|
155
|
+
INSERT INTO knitsearches_fts(rowid, content) VALUES (new.id, new.content);
|
|
156
|
+
END
|
|
157
|
+
SQL
|
|
158
|
+
|
|
159
|
+
connection.execute(<<~SQL)
|
|
160
|
+
CREATE TRIGGER knitsearches_ad AFTER DELETE ON knitsearches BEGIN
|
|
161
|
+
INSERT INTO knitsearches_fts(knitsearches_fts, rowid, content) VALUES('delete', old.id, old.content);
|
|
162
|
+
END
|
|
163
|
+
SQL
|
|
164
|
+
|
|
165
|
+
connection.execute(<<~SQL)
|
|
166
|
+
CREATE TRIGGER knitsearches_au AFTER UPDATE ON knitsearches BEGIN
|
|
167
|
+
INSERT INTO knitsearches_fts(knitsearches_fts, rowid, content) VALUES('delete', old.id, old.content);
|
|
168
|
+
INSERT INTO knitsearches_fts(rowid, content) VALUES (new.id, new.content);
|
|
169
|
+
END
|
|
170
|
+
SQL
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def drop_multisearch_table
|
|
174
|
+
connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name('knitsearches_au')}")
|
|
175
|
+
connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name('knitsearches_ad')}")
|
|
176
|
+
connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name('knitsearches_ai')}")
|
|
177
|
+
connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name('knitsearches_fts')}")
|
|
178
|
+
|
|
179
|
+
# Explicitly drop FTS5 shadow tables for knitsearches_fts
|
|
180
|
+
%w[data idx docsize config].each do |suffix|
|
|
181
|
+
connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name("knitsearches_fts_#{suffix}")}")
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name('knitsearches')}")
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
private
|
|
188
|
+
|
|
189
|
+
def create_rich_text_shadow_columns(table_name, rich_text_columns)
|
|
190
|
+
rich_text_columns.each do |col|
|
|
191
|
+
shadow_col = "#{col}_plain_text"
|
|
192
|
+
# Only create if it doesn't already exist
|
|
193
|
+
unless connection.column_exists?(table_name, shadow_col)
|
|
194
|
+
connection.add_column(table_name, shadow_col, :text)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def create_associated_shadow_columns(table_name, associated_against)
|
|
200
|
+
result = {}
|
|
201
|
+
associated_against.each do |assoc_name, columns_spec|
|
|
202
|
+
columns_list = columns_spec.is_a?(Array) ? columns_spec : columns_spec.keys
|
|
203
|
+
columns_list.each do |col|
|
|
204
|
+
shadow_col = "#{assoc_name}_#{col}_plain_text"
|
|
205
|
+
# Only create if it doesn't already exist
|
|
206
|
+
unless connection.column_exists?(table_name, shadow_col)
|
|
207
|
+
connection.add_column(table_name, shadow_col, :text)
|
|
208
|
+
end
|
|
209
|
+
result[shadow_col] = true
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
result
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def validate_dictionary(dictionary)
|
|
216
|
+
unless Knitsearch::SUPPORTED_DICTIONARIES.include?(dictionary)
|
|
217
|
+
raise Knitsearch::UnknownDictionaryError,
|
|
218
|
+
"Unknown dictionary: #{dictionary.inspect}. Supported: #{Knitsearch::SUPPORTED_DICTIONARIES.inspect}"
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def dictionary_to_tokenizer(dictionary)
|
|
223
|
+
case dictionary
|
|
224
|
+
when "simple"
|
|
225
|
+
Knitsearch::TOKENIZER_PRESETS[:unicode]
|
|
226
|
+
when "english"
|
|
227
|
+
Knitsearch::TOKENIZER_PRESETS[:porter]
|
|
228
|
+
when "trigram"
|
|
229
|
+
Knitsearch::TOKENIZER_PRESETS[:trigram]
|
|
230
|
+
else
|
|
231
|
+
raise Knitsearch::UnknownDictionaryError, "Unsupported dictionary: #{dictionary.inspect}"
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|