knitsearch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,235 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Knitsearch
4
+ module Migration
5
+ extend self
6
+
7
+ def connection
8
+ ActiveRecord::Base.connection
9
+ end
10
+
11
+ def create_searchable_table(table_name, columns:, tokenizer: nil, dictionary: "simple", prefix: nil, rich_text_columns: [], associated_against: nil)
12
+ raise ArgumentError, "columns must not be empty" if columns.empty?
13
+
14
+ if tokenizer.present?
15
+ raise ArgumentError, "tokenizer: is deprecated. Use dictionary: instead (e.g., dictionary: 'english')"
16
+ end
17
+
18
+ validate_dictionary(dictionary)
19
+ tokenizer_string = dictionary_to_tokenizer(dictionary)
20
+ fts_table = "#{table_name}_fts"
21
+
22
+ # For rich text columns, create shadow columns in the source table if needed
23
+ create_rich_text_shadow_columns(table_name, rich_text_columns)
24
+
25
+ # For associated columns, create shadow columns in the source table
26
+ associated_shadow_columns = {}
27
+ if associated_against.present?
28
+ associated_shadow_columns = create_associated_shadow_columns(table_name, associated_against)
29
+ end
30
+
31
+ # Build the FTS column list: use shadow column names for rich text fields and associated fields
32
+ column_names = columns.is_a?(Hash) ? columns.keys : columns
33
+ # Ensure column names are strings for consistent handling
34
+ column_names = column_names.map(&:to_s)
35
+ # Convert rich_text_columns to strings for consistent comparison
36
+ rich_text_columns = rich_text_columns.map(&:to_s)
37
+ fts_column_names = column_names.map do |col|
38
+ rich_text_columns.include?(col) ? "#{col}_plain_text" : col
39
+ end
40
+ fts_column_names.concat(associated_shadow_columns.keys)
41
+
42
+ column_list = fts_column_names.map { |c| connection.quote_column_name(c.to_s) }.join(", ")
43
+
44
+ # Build FTS5 options. Each prefix size listed adds a sub-index to the FTS5 data file.
45
+ # prefix: true uses [2, 3] (safe default, ~2× index size). prefix: [2, 3, 4] customizes.
46
+ fts_options = [
47
+ "content=#{connection.quote(table_name)}",
48
+ "content_rowid='id'",
49
+ "tokenize=#{connection.quote(tokenizer_string)}"
50
+ ]
51
+
52
+ if prefix
53
+ sizes = prefix == true ? [ 2, 3 ] : Array(prefix).map(&:to_i)
54
+ fts_options << "prefix=#{connection.quote(sizes.join(' '))}"
55
+ end
56
+
57
+ # Create FTS5 virtual table with external content
58
+ sql = "CREATE VIRTUAL TABLE #{connection.quote_table_name(fts_table)} USING fts5(" \
59
+ "#{column_list}, " \
60
+ "#{fts_options.join(', ')}" \
61
+ ")"
62
+ connection.execute(sql)
63
+
64
+ # Vocab table — read-only virtual table exposing the FTS5 dictionary
65
+ # for fuzzy correction. Standard SQLite feature, no extension.
66
+ vocab_table = "#{fts_table}_vocab"
67
+ connection.execute(
68
+ "CREATE VIRTUAL TABLE #{connection.quote_table_name(vocab_table)} " \
69
+ "USING fts5vocab(#{connection.quote(fts_table)}, 'row')"
70
+ )
71
+
72
+ # Build trigger value references: use shadow column names for rich text fields and associated fields
73
+ trigger_values = column_names.map do |col|
74
+ col_ref = rich_text_columns.include?(col) ? "#{col}_plain_text" : col
75
+ "new.#{connection.quote_column_name(col_ref)}"
76
+ end
77
+ # Add associated shadow columns to trigger values
78
+ trigger_values.concat(associated_shadow_columns.keys.map { |col| "new.#{connection.quote_column_name(col.to_s)}" })
79
+ trigger_values_str = trigger_values.join(", ")
80
+
81
+ trigger_values_old = column_names.map do |col|
82
+ col_ref = rich_text_columns.include?(col) ? "#{col}_plain_text" : col
83
+ "old.#{connection.quote_column_name(col_ref)}"
84
+ end
85
+ # Add associated shadow columns to trigger values (for delete trigger)
86
+ trigger_values_old.concat(associated_shadow_columns.keys.map { |col| "old.#{connection.quote_column_name(col.to_s)}" })
87
+ trigger_values_old_str = trigger_values_old.join(", ")
88
+
89
+ # After insert trigger: add new row to index
90
+ insert_trigger = "CREATE TRIGGER #{connection.quote_table_name("#{table_name}_ai")} AFTER INSERT ON #{connection.quote_table_name(table_name)} BEGIN " \
91
+ "INSERT INTO #{connection.quote_table_name(fts_table)}(rowid, #{column_list}) VALUES (new.id, #{trigger_values_str}); " \
92
+ "END"
93
+ connection.execute(insert_trigger)
94
+
95
+ # After delete trigger: remove row from index
96
+ delete_trigger = "CREATE TRIGGER #{connection.quote_table_name("#{table_name}_ad")} AFTER DELETE ON #{connection.quote_table_name(table_name)} BEGIN " \
97
+ "INSERT INTO #{connection.quote_table_name(fts_table)}(#{fts_table}, rowid, #{column_list}) VALUES('delete', old.id, #{trigger_values_old_str}); " \
98
+ "END"
99
+ connection.execute(delete_trigger)
100
+
101
+ # After update trigger: delete old, insert new
102
+ update_trigger = "CREATE TRIGGER #{connection.quote_table_name("#{table_name}_au")} AFTER UPDATE ON #{connection.quote_table_name(table_name)} BEGIN " \
103
+ "INSERT INTO #{connection.quote_table_name(fts_table)}(#{fts_table}, rowid, #{column_list}) VALUES('delete', old.id, #{trigger_values_old_str}); " \
104
+ "INSERT INTO #{connection.quote_table_name(fts_table)}(rowid, #{column_list}) VALUES (new.id, #{trigger_values_str}); " \
105
+ "END"
106
+ connection.execute(update_trigger)
107
+ end
108
+
109
+ def drop_searchable_table(table_name)
110
+ fts_table = "#{table_name}_fts"
111
+
112
+ connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name("#{table_name}_ai")}")
113
+ connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name("#{table_name}_ad")}")
114
+ connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name("#{table_name}_au")}")
115
+ connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name("#{fts_table}_vocab")}")
116
+ connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name(fts_table)}")
117
+
118
+ %w[data idx docsize config].each do |suffix|
119
+ connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name("#{fts_table}_#{suffix}")}") rescue nil
120
+ end
121
+ end
122
+
123
+ def create_multisearch_table
124
+ connection.execute("DROP TABLE IF EXISTS knitsearches_fts") rescue nil
125
+ connection.execute("DROP TRIGGER IF EXISTS knitsearches_ai") rescue nil
126
+ connection.execute("DROP TRIGGER IF EXISTS knitsearches_ad") rescue nil
127
+ connection.execute("DROP TRIGGER IF EXISTS knitsearches_au") rescue nil
128
+ connection.execute("DROP TABLE IF EXISTS knitsearches") rescue nil
129
+
130
+ connection.execute(<<~SQL)
131
+ CREATE TABLE knitsearches (
132
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
133
+ searchable_type VARCHAR(255) NOT NULL,
134
+ searchable_id INTEGER NOT NULL,
135
+ content TEXT,
136
+ created_at DATETIME,
137
+ updated_at DATETIME
138
+ )
139
+ SQL
140
+
141
+ connection.execute("CREATE UNIQUE INDEX idx_knitsearches_poly ON knitsearches (searchable_type, searchable_id)")
142
+ connection.execute("CREATE INDEX idx_knitsearches_type ON knitsearches (searchable_type)")
143
+
144
+ connection.execute(<<~SQL)
145
+ CREATE VIRTUAL TABLE knitsearches_fts USING fts5(
146
+ content,
147
+ content='knitsearches',
148
+ content_rowid='id',
149
+ tokenize='unicode61 remove_diacritics 2'
150
+ )
151
+ SQL
152
+
153
+ connection.execute(<<~SQL)
154
+ CREATE TRIGGER knitsearches_ai AFTER INSERT ON knitsearches BEGIN
155
+ INSERT INTO knitsearches_fts(rowid, content) VALUES (new.id, new.content);
156
+ END
157
+ SQL
158
+
159
+ connection.execute(<<~SQL)
160
+ CREATE TRIGGER knitsearches_ad AFTER DELETE ON knitsearches BEGIN
161
+ INSERT INTO knitsearches_fts(knitsearches_fts, rowid, content) VALUES('delete', old.id, old.content);
162
+ END
163
+ SQL
164
+
165
+ connection.execute(<<~SQL)
166
+ CREATE TRIGGER knitsearches_au AFTER UPDATE ON knitsearches BEGIN
167
+ INSERT INTO knitsearches_fts(knitsearches_fts, rowid, content) VALUES('delete', old.id, old.content);
168
+ INSERT INTO knitsearches_fts(rowid, content) VALUES (new.id, new.content);
169
+ END
170
+ SQL
171
+ end
172
+
173
+ def drop_multisearch_table
174
+ connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name('knitsearches_au')}")
175
+ connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name('knitsearches_ad')}")
176
+ connection.execute("DROP TRIGGER IF EXISTS #{connection.quote_table_name('knitsearches_ai')}")
177
+ connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name('knitsearches_fts')}")
178
+
179
+ # Explicitly drop FTS5 shadow tables for knitsearches_fts
180
+ %w[data idx docsize config].each do |suffix|
181
+ connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name("knitsearches_fts_#{suffix}")}")
182
+ end
183
+
184
+ connection.execute("DROP TABLE IF EXISTS #{connection.quote_table_name('knitsearches')}")
185
+ end
186
+
187
+ private
188
+
189
+ def create_rich_text_shadow_columns(table_name, rich_text_columns)
190
+ rich_text_columns.each do |col|
191
+ shadow_col = "#{col}_plain_text"
192
+ # Only create if it doesn't already exist
193
+ unless connection.column_exists?(table_name, shadow_col)
194
+ connection.add_column(table_name, shadow_col, :text)
195
+ end
196
+ end
197
+ end
198
+
199
+ def create_associated_shadow_columns(table_name, associated_against)
200
+ result = {}
201
+ associated_against.each do |assoc_name, columns_spec|
202
+ columns_list = columns_spec.is_a?(Array) ? columns_spec : columns_spec.keys
203
+ columns_list.each do |col|
204
+ shadow_col = "#{assoc_name}_#{col}_plain_text"
205
+ # Only create if it doesn't already exist
206
+ unless connection.column_exists?(table_name, shadow_col)
207
+ connection.add_column(table_name, shadow_col, :text)
208
+ end
209
+ result[shadow_col] = true
210
+ end
211
+ end
212
+ result
213
+ end
214
+
215
+ def validate_dictionary(dictionary)
216
+ unless Knitsearch::SUPPORTED_DICTIONARIES.include?(dictionary)
217
+ raise Knitsearch::UnknownDictionaryError,
218
+ "Unknown dictionary: #{dictionary.inspect}. Supported: #{Knitsearch::SUPPORTED_DICTIONARIES.inspect}"
219
+ end
220
+ end
221
+
222
+ def dictionary_to_tokenizer(dictionary)
223
+ case dictionary
224
+ when "simple"
225
+ Knitsearch::TOKENIZER_PRESETS[:unicode]
226
+ when "english"
227
+ Knitsearch::TOKENIZER_PRESETS[:porter]
228
+ when "trigram"
229
+ Knitsearch::TOKENIZER_PRESETS[:trigram]
230
+ else
231
+ raise Knitsearch::UnknownDictionaryError, "Unsupported dictionary: #{dictionary.inspect}"
232
+ end
233
+ end
234
+ end
235
+ end