rails-paradedb 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -1
- data/lib/generators/parade_db/index/templates/migration.rb.tt +1 -1
- data/lib/parade_db/index.rb +20 -9
- data/lib/parade_db/migration_helpers.rb +81 -18
- data/lib/parade_db/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1bcba07642cf33fe747ed6f810374adee776ee014533e64f5b38d490335dd672
|
|
4
|
+
data.tar.gz: a470ac63132444ae93ee893eee869d6dac4641774fe965f6519592a7e5c50d35
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aa7193be8a40ed3714b3d17cf1c1b53980b63245a70c878e6aaa164c272df072217eb9c71a808cc29c0dc2611056f7d323f3d6aa9e507b77648beea83a734b6e
|
|
7
|
+
data.tar.gz: 6870426a910699fd8b1b12f7a03402d8157138d2b2aa4c210339356a2ff60299b29301a1a5cc28f64656f4e80479719e0324270dd0f5a9f7668e09ee9654a262
|
data/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,22 @@ All notable changes to this project will be documented in this file. The format
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [0.6.0] - 2026-04-14
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- Support concurrent BM25 index creation via `concurrently:` in `create_paradedb_index` and `add_bm25_index`
|
|
12
|
+
|
|
13
|
+
## [0.5.0] - 2026-04-14
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
|
|
17
|
+
- Support partial indexes via `where:` in `add_bm25_index` and `ParadeDB::Index`
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
|
|
21
|
+
- Allow aliased indexed expressions like `"(rating + 1)" => { alias: "rating" }`
|
|
22
|
+
|
|
7
23
|
## [0.4.0] - 2026-04-09
|
|
8
24
|
|
|
9
25
|
### Changed
|
|
@@ -110,7 +126,9 @@ All notable changes to this project will be documented in this file. The format
|
|
|
110
126
|
- Schema dump/load round-trip for tokenizer configuration and index options
|
|
111
127
|
(including `target_segment_count`)
|
|
112
128
|
|
|
113
|
-
[Unreleased]: https://github.com/paradedb/rails-paradedb/compare/v0.
|
|
129
|
+
[Unreleased]: https://github.com/paradedb/rails-paradedb/compare/v0.6.0...HEAD
|
|
130
|
+
[0.6.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.6.0
|
|
131
|
+
[0.5.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.5.0
|
|
114
132
|
[0.4.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.4.0
|
|
115
133
|
[0.3.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.3.0
|
|
116
134
|
[0.2.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.2.0
|
|
@@ -6,7 +6,7 @@ class <%= migration_class_name %> < ActiveRecord::Migration[<%= ActiveRecord::Mi
|
|
|
6
6
|
|
|
7
7
|
<% end -%>
|
|
8
8
|
def up
|
|
9
|
-
create_paradedb_index(<%= class_name %>Index, if_not_exists: true)
|
|
9
|
+
create_paradedb_index(<%= class_name %>Index, if_not_exists: true<% if options[:concurrent] %>, concurrently: true<% end %>)
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
def down
|
data/lib/parade_db/index.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module ParadeDB
|
|
4
4
|
class Index
|
|
5
5
|
class << self
|
|
6
|
-
attr_writer :table_name, :key_field, :index_name, :fields, :index_options
|
|
6
|
+
attr_writer :table_name, :key_field, :index_name, :fields, :index_options, :where
|
|
7
7
|
|
|
8
8
|
def table_name
|
|
9
9
|
@table_name
|
|
@@ -25,6 +25,10 @@ module ParadeDB
|
|
|
25
25
|
@index_options || {}
|
|
26
26
|
end
|
|
27
27
|
|
|
28
|
+
def where
|
|
29
|
+
@where
|
|
30
|
+
end
|
|
31
|
+
|
|
28
32
|
def default_index_name
|
|
29
33
|
return nil if table_name.nil?
|
|
30
34
|
|
|
@@ -166,15 +170,16 @@ module ParadeDB
|
|
|
166
170
|
FIELD_OPTION_KEYS = %i[fast record normalizer expand_dots].freeze
|
|
167
171
|
|
|
168
172
|
class Compiled
|
|
169
|
-
attr_reader :table_name, :key_field, :index_name, :entries, :index_options, :field_options
|
|
173
|
+
attr_reader :table_name, :key_field, :index_name, :entries, :index_options, :field_options, :where
|
|
170
174
|
|
|
171
|
-
def initialize(table_name:, key_field:, index_name:, entries:, index_options:, field_options:)
|
|
175
|
+
def initialize(table_name:, key_field:, index_name:, entries:, index_options:, field_options:, where:)
|
|
172
176
|
@table_name = table_name
|
|
173
177
|
@key_field = key_field
|
|
174
178
|
@index_name = index_name
|
|
175
179
|
@entries = entries
|
|
176
180
|
@index_options = index_options
|
|
177
181
|
@field_options = field_options
|
|
182
|
+
@where = where
|
|
178
183
|
end
|
|
179
184
|
end
|
|
180
185
|
Entry = Struct.new(:source, :expression, :tokenizer, :options, :query_key, keyword_init: true)
|
|
@@ -203,7 +208,8 @@ module ParadeDB
|
|
|
203
208
|
index_name: index_name,
|
|
204
209
|
entries: entries,
|
|
205
210
|
index_options: index_options,
|
|
206
|
-
field_options: field_options
|
|
211
|
+
field_options: field_options,
|
|
212
|
+
where: klass.where
|
|
207
213
|
)
|
|
208
214
|
end
|
|
209
215
|
|
|
@@ -223,10 +229,6 @@ module ParadeDB
|
|
|
223
229
|
raise InvalidIndexDefinition, "fields must be a Hash"
|
|
224
230
|
end
|
|
225
231
|
|
|
226
|
-
build_entries_from_structured_fields(raw_fields)
|
|
227
|
-
end
|
|
228
|
-
|
|
229
|
-
def build_entries_from_structured_fields(raw_fields)
|
|
230
232
|
entries = []
|
|
231
233
|
field_options = {}
|
|
232
234
|
|
|
@@ -246,7 +248,16 @@ module ParadeDB
|
|
|
246
248
|
tokenizers = normalized[:tokenizers]
|
|
247
249
|
single_tokenizer_keys_present = TokenizerParser::TOKENIZER_SINGLE_KEYS.any? { |key| normalized.key?(key) }
|
|
248
250
|
|
|
249
|
-
|
|
251
|
+
is_alias = normalized[:alias] && normalized.length == 1
|
|
252
|
+
if is_alias
|
|
253
|
+
entries << Entry.new(
|
|
254
|
+
source: source_name,
|
|
255
|
+
expression: expression?(source_name),
|
|
256
|
+
tokenizer: nil,
|
|
257
|
+
options: {},
|
|
258
|
+
query_key: normalized[:alias]
|
|
259
|
+
)
|
|
260
|
+
elsif tokenizers
|
|
250
261
|
if single_tokenizer_keys_present
|
|
251
262
|
raise InvalidIndexDefinition,
|
|
252
263
|
"field #{source_name.inspect} cannot mix :tokenizers with :tokenizer/:args/:named_args/:filters/:stemmer/:alias"
|
|
@@ -4,11 +4,15 @@ require_relative "tokenizer_sql"
|
|
|
4
4
|
|
|
5
5
|
module ParadeDB
|
|
6
6
|
module MigrationHelpers
|
|
7
|
-
def create_paradedb_index(index_klass, if_not_exists: false)
|
|
7
|
+
def create_paradedb_index(index_klass, if_not_exists: false, concurrently: false)
|
|
8
8
|
ensure_postgresql_adapter!
|
|
9
|
+
if concurrently && transaction_open_for_paradedb?
|
|
10
|
+
raise ArgumentError, "create_paradedb_index concurrently: true cannot run inside a transaction"
|
|
11
|
+
end
|
|
12
|
+
|
|
9
13
|
resolved = resolve_index_klass(index_klass)
|
|
10
14
|
compiled = resolved.compiled_definition
|
|
11
|
-
execute(build_create_sql(compiled, if_not_exists: if_not_exists))
|
|
15
|
+
execute(build_create_sql(compiled, if_not_exists: if_not_exists, concurrently: concurrently))
|
|
12
16
|
remember_schema_index_reference(resolved)
|
|
13
17
|
end
|
|
14
18
|
|
|
@@ -21,7 +25,7 @@ module ParadeDB
|
|
|
21
25
|
remember_schema_index_reference(resolved)
|
|
22
26
|
end
|
|
23
27
|
|
|
24
|
-
def add_bm25_index(table, fields:, key_field:, name: nil, index_options: nil, if_not_exists: false)
|
|
28
|
+
def add_bm25_index(table, fields:, key_field:, name: nil, index_options: nil, where: nil, if_not_exists: false, concurrently: false)
|
|
25
29
|
ensure_postgresql_adapter!
|
|
26
30
|
anonymous = Class.new(ParadeDB::Index)
|
|
27
31
|
anonymous.table_name = table
|
|
@@ -29,8 +33,9 @@ module ParadeDB
|
|
|
29
33
|
anonymous.index_name = name unless name.nil?
|
|
30
34
|
anonymous.fields = fields
|
|
31
35
|
anonymous.index_options = index_options unless index_options.nil?
|
|
36
|
+
anonymous.where = where unless where.nil?
|
|
32
37
|
|
|
33
|
-
create_paradedb_index(anonymous, if_not_exists: if_not_exists)
|
|
38
|
+
create_paradedb_index(anonymous, if_not_exists: if_not_exists, concurrently: concurrently)
|
|
34
39
|
end
|
|
35
40
|
|
|
36
41
|
def remove_bm25_index(table, name: nil, if_exists: false)
|
|
@@ -76,15 +81,17 @@ module ParadeDB
|
|
|
76
81
|
ParadeDB.ensure_postgresql_adapter!(self, context: "ParadeDB migration helper")
|
|
77
82
|
end
|
|
78
83
|
|
|
79
|
-
def build_create_sql(compiled, if_not_exists:)
|
|
84
|
+
def build_create_sql(compiled, if_not_exists:, concurrently: false)
|
|
85
|
+
modifier = concurrently ? " CONCURRENTLY" : ""
|
|
80
86
|
prefix = if_not_exists ? "IF NOT EXISTS " : ""
|
|
81
87
|
fields_sql = compiled.entries.map { |entry| bm25_entry_sql(entry) }.join(", ")
|
|
82
88
|
with_options_sql = bm25_with_options_sql(compiled)
|
|
89
|
+
where_sql = compiled.where ? "\nWHERE #{compiled.where}" : ""
|
|
83
90
|
|
|
84
91
|
<<~SQL.strip.gsub(/\s+/, " ")
|
|
85
|
-
CREATE INDEX #{prefix}#{quote_table_name(compiled.index_name)} ON #{quote_table_name(compiled.table_name)}
|
|
92
|
+
CREATE INDEX#{modifier} #{prefix}#{quote_table_name(compiled.index_name)} ON #{quote_table_name(compiled.table_name)}
|
|
86
93
|
USING bm25 (#{fields_sql})
|
|
87
|
-
WITH (#{with_options_sql})
|
|
94
|
+
WITH (#{with_options_sql})#{where_sql}
|
|
88
95
|
SQL
|
|
89
96
|
end
|
|
90
97
|
|
|
@@ -177,6 +184,11 @@ module ParadeDB
|
|
|
177
184
|
|
|
178
185
|
def bm25_entry_sql(entry)
|
|
179
186
|
source_sql = bm25_source_sql(entry)
|
|
187
|
+
|
|
188
|
+
if entry.tokenizer.nil? && entry.query_key != entry.source
|
|
189
|
+
return "(#{source_sql}::pdb.alias(#{quote(entry.query_key)}))"
|
|
190
|
+
end
|
|
191
|
+
|
|
180
192
|
return source_sql if entry.tokenizer.nil?
|
|
181
193
|
|
|
182
194
|
"(#{source_sql}::#{tokenizer_sql(entry.tokenizer, entry.options)})"
|
|
@@ -282,7 +294,8 @@ module ParadeDB
|
|
|
282
294
|
SELECT
|
|
283
295
|
c.relname AS index_name,
|
|
284
296
|
t.relname AS table_name,
|
|
285
|
-
pg_get_indexdef(c.oid) AS indexdef
|
|
297
|
+
pg_get_indexdef(c.oid) AS indexdef,
|
|
298
|
+
pg_get_expr(i.indpred, i.indrelid) AS where_clause
|
|
286
299
|
FROM pg_class c
|
|
287
300
|
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
288
301
|
JOIN pg_index i ON i.indexrelid = c.oid
|
|
@@ -306,6 +319,7 @@ module ParadeDB
|
|
|
306
319
|
key_field = extract_bm25_key_field(indexdef)
|
|
307
320
|
index_options = extract_bm25_index_options(indexdef)
|
|
308
321
|
fields_sql = extract_bm25_fields_sql(indexdef)
|
|
322
|
+
where = normalize_bm25_where_clause(row["where_clause"])
|
|
309
323
|
|
|
310
324
|
if key_field && fields_sql
|
|
311
325
|
field_sqls = split_bm25_top_level(fields_sql).map(&:strip)
|
|
@@ -334,6 +348,7 @@ module ParadeDB
|
|
|
334
348
|
unless index_options.empty?
|
|
335
349
|
statement += ", index_options: #{ruby_hash_literal(index_options)}"
|
|
336
350
|
end
|
|
351
|
+
statement += ", where: #{where.inspect}" if where
|
|
337
352
|
statement
|
|
338
353
|
else
|
|
339
354
|
"execute #{indexdef.inspect}"
|
|
@@ -351,10 +366,7 @@ module ParadeDB
|
|
|
351
366
|
end
|
|
352
367
|
|
|
353
368
|
def extract_bm25_index_options(indexdef)
|
|
354
|
-
|
|
355
|
-
return {} unless with_match
|
|
356
|
-
|
|
357
|
-
with_sql = with_match[1]
|
|
369
|
+
with_sql, = extract_bm25_with_components(indexdef)
|
|
358
370
|
options = {}
|
|
359
371
|
split_sql_arguments(with_sql).each do |argument|
|
|
360
372
|
key, value_sql = split_assignment(argument)
|
|
@@ -378,7 +390,6 @@ module ParadeDB
|
|
|
378
390
|
|
|
379
391
|
def extract_bm25_fields_sql(indexdef)
|
|
380
392
|
match = indexdef.match(/USING\s+bm25\s*\(/im)
|
|
381
|
-
return nil unless match
|
|
382
393
|
|
|
383
394
|
start = match.end(0)
|
|
384
395
|
depth = 1
|
|
@@ -390,11 +401,61 @@ module ParadeDB
|
|
|
390
401
|
end
|
|
391
402
|
pos += 1
|
|
392
403
|
end
|
|
393
|
-
|
|
404
|
+
raise "Found invalid index definition `#{indexdef}`" if depth != 0
|
|
394
405
|
|
|
395
406
|
indexdef[start..pos - 2]
|
|
396
407
|
end
|
|
397
408
|
|
|
409
|
+
def extract_bm25_with_components(indexdef)
|
|
410
|
+
match = indexdef.match(/WITH\s*\(/im)
|
|
411
|
+
start = match.end(0)
|
|
412
|
+
depth = 1
|
|
413
|
+
pos = start
|
|
414
|
+
while pos < indexdef.length && depth > 0
|
|
415
|
+
case indexdef[pos]
|
|
416
|
+
when "(" then depth += 1
|
|
417
|
+
when ")" then depth -= 1
|
|
418
|
+
end
|
|
419
|
+
pos += 1
|
|
420
|
+
end
|
|
421
|
+
raise "Found invalid index definition `#{indexdef}`" if depth != 0
|
|
422
|
+
|
|
423
|
+
with_sql = indexdef[start..pos - 2]
|
|
424
|
+
trailing_sql = indexdef[pos..]&.strip
|
|
425
|
+
trailing_sql = nil if trailing_sql&.empty?
|
|
426
|
+
|
|
427
|
+
[with_sql, trailing_sql]
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
def normalize_bm25_where_clause(where)
|
|
431
|
+
return nil if where.nil?
|
|
432
|
+
|
|
433
|
+
normalized = where.to_s.strip
|
|
434
|
+
return nil if normalized.empty?
|
|
435
|
+
|
|
436
|
+
while bm25_wrapped_in_parentheses?(normalized)
|
|
437
|
+
normalized = normalized[1...-1].strip
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
normalized.empty? ? nil : normalized
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
def bm25_wrapped_in_parentheses?(sql)
|
|
444
|
+
return false unless sql.start_with?("(") && sql.end_with?(")")
|
|
445
|
+
|
|
446
|
+
depth = 0
|
|
447
|
+
sql.each_char.with_index do |char, idx|
|
|
448
|
+
case char
|
|
449
|
+
when "(" then depth += 1
|
|
450
|
+
when ")"
|
|
451
|
+
depth -= 1
|
|
452
|
+
return false if depth.zero? && idx < sql.length - 1
|
|
453
|
+
end
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
depth.zero?
|
|
457
|
+
end
|
|
458
|
+
|
|
398
459
|
def split_bm25_top_level(str)
|
|
399
460
|
parts = []
|
|
400
461
|
current = +""
|
|
@@ -790,22 +851,24 @@ end
|
|
|
790
851
|
if defined?(ActiveRecord::Migration)
|
|
791
852
|
module ParadeDB
|
|
792
853
|
module MigrationDSL
|
|
793
|
-
def create_paradedb_index(index_klass, if_not_exists: false)
|
|
794
|
-
connection.create_paradedb_index(index_klass, if_not_exists: if_not_exists)
|
|
854
|
+
def create_paradedb_index(index_klass, if_not_exists: false, concurrently: false)
|
|
855
|
+
connection.create_paradedb_index(index_klass, if_not_exists: if_not_exists, concurrently: concurrently)
|
|
795
856
|
end
|
|
796
857
|
|
|
797
858
|
def replace_paradedb_index(index_klass)
|
|
798
859
|
connection.replace_paradedb_index(index_klass)
|
|
799
860
|
end
|
|
800
861
|
|
|
801
|
-
def add_bm25_index(table, fields:, key_field:, name: nil, index_options: nil, if_not_exists: false)
|
|
862
|
+
def add_bm25_index(table, fields:, key_field:, name: nil, index_options: nil, where: nil, if_not_exists: false, concurrently: false)
|
|
802
863
|
connection.add_bm25_index(
|
|
803
864
|
table,
|
|
804
865
|
fields: fields,
|
|
805
866
|
key_field: key_field,
|
|
806
867
|
name: name,
|
|
807
868
|
index_options: index_options,
|
|
808
|
-
|
|
869
|
+
where: where,
|
|
870
|
+
if_not_exists: if_not_exists,
|
|
871
|
+
concurrently: concurrently
|
|
809
872
|
)
|
|
810
873
|
end
|
|
811
874
|
|
data/lib/parade_db/version.rb
CHANGED