rails-paradedb 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -1
- data/lib/parade_db/index.rb +20 -9
- data/lib/parade_db/migration_helpers.rb +177 -9
- data/lib/parade_db/search_methods.rb +0 -4
- data/lib/parade_db/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 04a77503859e70b3362f1a5679a2d1b229c62c5c49a0383fbe1164b317d902f2
|
|
4
|
+
data.tar.gz: 1ec3bc0ae18c1232ddfc1b67aec6eb3a92dde0285297fa4a45afa7b789ae9d7c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e81872356be536dd22423f39c7c8a899630dd5a601d5c54c6a8cbfc2c165ba3aa1c8edbe016a88ff9bc3492fcb0925c35cf31055dbca39de59f05f1b49ce2cec
|
|
7
|
+
data.tar.gz: 0c3781ba0d93f5aa4067545354f87e186da71c43fab1a257c9715ce821dfde474549c200d22f9814f75747a687a7e21626148de2b474759bc9317395f413902f
|
data/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file. The format
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [0.5.0] - 2026-04-14
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- Support partial indexes via `where:` in `add_bm25_index` and `ParadeDB::Index`
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
|
|
15
|
+
- Allow aliased indexed expressions like `"(rating + 1)" => { alias: "rating" }`
|
|
16
|
+
|
|
17
|
+
## [0.4.0] - 2026-04-09
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- Removed unnecessary validation from non-exact aggregate queries without `over()`
|
|
22
|
+
- `change` migrations now auto-reverse `create_paradedb_index` and `add_bm25_index`, while irreversible ParadeDB migration helpers raise explicit rollback errors
|
|
23
|
+
|
|
7
24
|
## [0.3.0] - 2026-03-23
|
|
8
25
|
|
|
9
26
|
### Removed
|
|
@@ -103,7 +120,9 @@ All notable changes to this project will be documented in this file. The format
|
|
|
103
120
|
- Schema dump/load round-trip for tokenizer configuration and index options
|
|
104
121
|
(including `target_segment_count`)
|
|
105
122
|
|
|
106
|
-
[Unreleased]: https://github.com/paradedb/rails-paradedb/compare/v0.
|
|
123
|
+
[Unreleased]: https://github.com/paradedb/rails-paradedb/compare/v0.5.0...HEAD
|
|
124
|
+
[0.5.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.5.0
|
|
125
|
+
[0.4.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.4.0
|
|
107
126
|
[0.3.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.3.0
|
|
108
127
|
[0.2.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.2.0
|
|
109
128
|
[0.1.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.1.0
|
data/lib/parade_db/index.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module ParadeDB
|
|
4
4
|
class Index
|
|
5
5
|
class << self
|
|
6
|
-
attr_writer :table_name, :key_field, :index_name, :fields, :index_options
|
|
6
|
+
attr_writer :table_name, :key_field, :index_name, :fields, :index_options, :where
|
|
7
7
|
|
|
8
8
|
def table_name
|
|
9
9
|
@table_name
|
|
@@ -25,6 +25,10 @@ module ParadeDB
|
|
|
25
25
|
@index_options || {}
|
|
26
26
|
end
|
|
27
27
|
|
|
28
|
+
def where
|
|
29
|
+
@where
|
|
30
|
+
end
|
|
31
|
+
|
|
28
32
|
def default_index_name
|
|
29
33
|
return nil if table_name.nil?
|
|
30
34
|
|
|
@@ -166,15 +170,16 @@ module ParadeDB
|
|
|
166
170
|
FIELD_OPTION_KEYS = %i[fast record normalizer expand_dots].freeze
|
|
167
171
|
|
|
168
172
|
class Compiled
|
|
169
|
-
attr_reader :table_name, :key_field, :index_name, :entries, :index_options, :field_options
|
|
173
|
+
attr_reader :table_name, :key_field, :index_name, :entries, :index_options, :field_options, :where
|
|
170
174
|
|
|
171
|
-
def initialize(table_name:, key_field:, index_name:, entries:, index_options:, field_options:)
|
|
175
|
+
def initialize(table_name:, key_field:, index_name:, entries:, index_options:, field_options:, where:)
|
|
172
176
|
@table_name = table_name
|
|
173
177
|
@key_field = key_field
|
|
174
178
|
@index_name = index_name
|
|
175
179
|
@entries = entries
|
|
176
180
|
@index_options = index_options
|
|
177
181
|
@field_options = field_options
|
|
182
|
+
@where = where
|
|
178
183
|
end
|
|
179
184
|
end
|
|
180
185
|
Entry = Struct.new(:source, :expression, :tokenizer, :options, :query_key, keyword_init: true)
|
|
@@ -203,7 +208,8 @@ module ParadeDB
|
|
|
203
208
|
index_name: index_name,
|
|
204
209
|
entries: entries,
|
|
205
210
|
index_options: index_options,
|
|
206
|
-
field_options: field_options
|
|
211
|
+
field_options: field_options,
|
|
212
|
+
where: klass.where
|
|
207
213
|
)
|
|
208
214
|
end
|
|
209
215
|
|
|
@@ -223,10 +229,6 @@ module ParadeDB
|
|
|
223
229
|
raise InvalidIndexDefinition, "fields must be a Hash"
|
|
224
230
|
end
|
|
225
231
|
|
|
226
|
-
build_entries_from_structured_fields(raw_fields)
|
|
227
|
-
end
|
|
228
|
-
|
|
229
|
-
def build_entries_from_structured_fields(raw_fields)
|
|
230
232
|
entries = []
|
|
231
233
|
field_options = {}
|
|
232
234
|
|
|
@@ -246,7 +248,16 @@ module ParadeDB
|
|
|
246
248
|
tokenizers = normalized[:tokenizers]
|
|
247
249
|
single_tokenizer_keys_present = TokenizerParser::TOKENIZER_SINGLE_KEYS.any? { |key| normalized.key?(key) }
|
|
248
250
|
|
|
249
|
-
|
|
251
|
+
is_alias = normalized[:alias] && normalized.length == 1
|
|
252
|
+
if is_alias
|
|
253
|
+
entries << Entry.new(
|
|
254
|
+
source: source_name,
|
|
255
|
+
expression: expression?(source_name),
|
|
256
|
+
tokenizer: nil,
|
|
257
|
+
options: {},
|
|
258
|
+
query_key: normalized[:alias]
|
|
259
|
+
)
|
|
260
|
+
elsif tokenizers
|
|
250
261
|
if single_tokenizer_keys_present
|
|
251
262
|
raise InvalidIndexDefinition,
|
|
252
263
|
"field #{source_name.inspect} cannot mix :tokenizers with :tokenizer/:args/:named_args/:filters/:stemmer/:alias"
|
|
@@ -21,7 +21,7 @@ module ParadeDB
|
|
|
21
21
|
remember_schema_index_reference(resolved)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
def add_bm25_index(table, fields:, key_field:, name: nil, index_options: nil, if_not_exists: false)
|
|
24
|
+
def add_bm25_index(table, fields:, key_field:, name: nil, index_options: nil, where: nil, if_not_exists: false)
|
|
25
25
|
ensure_postgresql_adapter!
|
|
26
26
|
anonymous = Class.new(ParadeDB::Index)
|
|
27
27
|
anonymous.table_name = table
|
|
@@ -29,6 +29,7 @@ module ParadeDB
|
|
|
29
29
|
anonymous.index_name = name unless name.nil?
|
|
30
30
|
anonymous.fields = fields
|
|
31
31
|
anonymous.index_options = index_options unless index_options.nil?
|
|
32
|
+
anonymous.where = where unless where.nil?
|
|
32
33
|
|
|
33
34
|
create_paradedb_index(anonymous, if_not_exists: if_not_exists)
|
|
34
35
|
end
|
|
@@ -80,11 +81,12 @@ module ParadeDB
|
|
|
80
81
|
prefix = if_not_exists ? "IF NOT EXISTS " : ""
|
|
81
82
|
fields_sql = compiled.entries.map { |entry| bm25_entry_sql(entry) }.join(", ")
|
|
82
83
|
with_options_sql = bm25_with_options_sql(compiled)
|
|
84
|
+
where_sql = compiled.where ? "\nWHERE #{compiled.where}" : ""
|
|
83
85
|
|
|
84
86
|
<<~SQL.strip.gsub(/\s+/, " ")
|
|
85
87
|
CREATE INDEX #{prefix}#{quote_table_name(compiled.index_name)} ON #{quote_table_name(compiled.table_name)}
|
|
86
88
|
USING bm25 (#{fields_sql})
|
|
87
|
-
WITH (#{with_options_sql})
|
|
89
|
+
WITH (#{with_options_sql})#{where_sql}
|
|
88
90
|
SQL
|
|
89
91
|
end
|
|
90
92
|
|
|
@@ -177,6 +179,11 @@ module ParadeDB
|
|
|
177
179
|
|
|
178
180
|
def bm25_entry_sql(entry)
|
|
179
181
|
source_sql = bm25_source_sql(entry)
|
|
182
|
+
|
|
183
|
+
if entry.tokenizer.nil? && entry.query_key != entry.source
|
|
184
|
+
return "(#{source_sql}::pdb.alias(#{quote(entry.query_key)}))"
|
|
185
|
+
end
|
|
186
|
+
|
|
180
187
|
return source_sql if entry.tokenizer.nil?
|
|
181
188
|
|
|
182
189
|
"(#{source_sql}::#{tokenizer_sql(entry.tokenizer, entry.options)})"
|
|
@@ -282,7 +289,8 @@ module ParadeDB
|
|
|
282
289
|
SELECT
|
|
283
290
|
c.relname AS index_name,
|
|
284
291
|
t.relname AS table_name,
|
|
285
|
-
pg_get_indexdef(c.oid) AS indexdef
|
|
292
|
+
pg_get_indexdef(c.oid) AS indexdef,
|
|
293
|
+
pg_get_expr(i.indpred, i.indrelid) AS where_clause
|
|
286
294
|
FROM pg_class c
|
|
287
295
|
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
288
296
|
JOIN pg_index i ON i.indexrelid = c.oid
|
|
@@ -306,6 +314,7 @@ module ParadeDB
|
|
|
306
314
|
key_field = extract_bm25_key_field(indexdef)
|
|
307
315
|
index_options = extract_bm25_index_options(indexdef)
|
|
308
316
|
fields_sql = extract_bm25_fields_sql(indexdef)
|
|
317
|
+
where = normalize_bm25_where_clause(row["where_clause"])
|
|
309
318
|
|
|
310
319
|
if key_field && fields_sql
|
|
311
320
|
field_sqls = split_bm25_top_level(fields_sql).map(&:strip)
|
|
@@ -334,6 +343,7 @@ module ParadeDB
|
|
|
334
343
|
unless index_options.empty?
|
|
335
344
|
statement += ", index_options: #{ruby_hash_literal(index_options)}"
|
|
336
345
|
end
|
|
346
|
+
statement += ", where: #{where.inspect}" if where
|
|
337
347
|
statement
|
|
338
348
|
else
|
|
339
349
|
"execute #{indexdef.inspect}"
|
|
@@ -351,10 +361,7 @@ module ParadeDB
|
|
|
351
361
|
end
|
|
352
362
|
|
|
353
363
|
def extract_bm25_index_options(indexdef)
|
|
354
|
-
|
|
355
|
-
return {} unless with_match
|
|
356
|
-
|
|
357
|
-
with_sql = with_match[1]
|
|
364
|
+
with_sql, = extract_bm25_with_components(indexdef)
|
|
358
365
|
options = {}
|
|
359
366
|
split_sql_arguments(with_sql).each do |argument|
|
|
360
367
|
key, value_sql = split_assignment(argument)
|
|
@@ -378,7 +385,6 @@ module ParadeDB
|
|
|
378
385
|
|
|
379
386
|
def extract_bm25_fields_sql(indexdef)
|
|
380
387
|
match = indexdef.match(/USING\s+bm25\s*\(/im)
|
|
381
|
-
return nil unless match
|
|
382
388
|
|
|
383
389
|
start = match.end(0)
|
|
384
390
|
depth = 1
|
|
@@ -390,11 +396,61 @@ module ParadeDB
|
|
|
390
396
|
end
|
|
391
397
|
pos += 1
|
|
392
398
|
end
|
|
393
|
-
|
|
399
|
+
raise "Found invalid index definition `#{indexdef}`" if depth != 0
|
|
394
400
|
|
|
395
401
|
indexdef[start..pos - 2]
|
|
396
402
|
end
|
|
397
403
|
|
|
404
|
+
def extract_bm25_with_components(indexdef)
|
|
405
|
+
match = indexdef.match(/WITH\s*\(/im)
|
|
406
|
+
start = match.end(0)
|
|
407
|
+
depth = 1
|
|
408
|
+
pos = start
|
|
409
|
+
while pos < indexdef.length && depth > 0
|
|
410
|
+
case indexdef[pos]
|
|
411
|
+
when "(" then depth += 1
|
|
412
|
+
when ")" then depth -= 1
|
|
413
|
+
end
|
|
414
|
+
pos += 1
|
|
415
|
+
end
|
|
416
|
+
raise "Found invalid index definition `#{indexdef}`" if depth != 0
|
|
417
|
+
|
|
418
|
+
with_sql = indexdef[start..pos - 2]
|
|
419
|
+
trailing_sql = indexdef[pos..]&.strip
|
|
420
|
+
trailing_sql = nil if trailing_sql&.empty?
|
|
421
|
+
|
|
422
|
+
[with_sql, trailing_sql]
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
def normalize_bm25_where_clause(where)
|
|
426
|
+
return nil if where.nil?
|
|
427
|
+
|
|
428
|
+
normalized = where.to_s.strip
|
|
429
|
+
return nil if normalized.empty?
|
|
430
|
+
|
|
431
|
+
while bm25_wrapped_in_parentheses?(normalized)
|
|
432
|
+
normalized = normalized[1...-1].strip
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
normalized.empty? ? nil : normalized
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
def bm25_wrapped_in_parentheses?(sql)
|
|
439
|
+
return false unless sql.start_with?("(") && sql.end_with?(")")
|
|
440
|
+
|
|
441
|
+
depth = 0
|
|
442
|
+
sql.each_char.with_index do |char, idx|
|
|
443
|
+
case char
|
|
444
|
+
when "(" then depth += 1
|
|
445
|
+
when ")"
|
|
446
|
+
depth -= 1
|
|
447
|
+
return false if depth.zero? && idx < sql.length - 1
|
|
448
|
+
end
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
depth.zero?
|
|
452
|
+
end
|
|
453
|
+
|
|
398
454
|
def split_bm25_top_level(str)
|
|
399
455
|
parts = []
|
|
400
456
|
current = +""
|
|
@@ -787,6 +843,42 @@ if defined?(ActiveRecord::ConnectionAdapters::AbstractAdapter)
|
|
|
787
843
|
ActiveRecord::ConnectionAdapters::AbstractAdapter.include(ParadeDB::MigrationHelpers)
|
|
788
844
|
end
|
|
789
845
|
|
|
846
|
+
if defined?(ActiveRecord::Migration)
|
|
847
|
+
module ParadeDB
|
|
848
|
+
module MigrationDSL
|
|
849
|
+
def create_paradedb_index(index_klass, if_not_exists: false)
|
|
850
|
+
connection.create_paradedb_index(index_klass, if_not_exists: if_not_exists)
|
|
851
|
+
end
|
|
852
|
+
|
|
853
|
+
def replace_paradedb_index(index_klass)
|
|
854
|
+
connection.replace_paradedb_index(index_klass)
|
|
855
|
+
end
|
|
856
|
+
|
|
857
|
+
def add_bm25_index(table, fields:, key_field:, name: nil, index_options: nil, where: nil, if_not_exists: false)
|
|
858
|
+
connection.add_bm25_index(
|
|
859
|
+
table,
|
|
860
|
+
fields: fields,
|
|
861
|
+
key_field: key_field,
|
|
862
|
+
name: name,
|
|
863
|
+
index_options: index_options,
|
|
864
|
+
where: where,
|
|
865
|
+
if_not_exists: if_not_exists
|
|
866
|
+
)
|
|
867
|
+
end
|
|
868
|
+
|
|
869
|
+
def remove_bm25_index(table, name: nil, if_exists: false)
|
|
870
|
+
connection.remove_bm25_index(table, name: name, if_exists: if_exists)
|
|
871
|
+
end
|
|
872
|
+
|
|
873
|
+
def reindex_bm25(table, name: nil, concurrently: false)
|
|
874
|
+
connection.reindex_bm25(table, name: name, concurrently: concurrently)
|
|
875
|
+
end
|
|
876
|
+
end
|
|
877
|
+
end
|
|
878
|
+
|
|
879
|
+
ActiveRecord::Migration.include(ParadeDB::MigrationDSL)
|
|
880
|
+
end
|
|
881
|
+
|
|
790
882
|
if defined?(ActiveRecord::SchemaDumper)
|
|
791
883
|
module ParadeDB
|
|
792
884
|
module SchemaDumperPatch
|
|
@@ -829,3 +921,79 @@ if defined?(ActiveRecord::SchemaDumper)
|
|
|
829
921
|
|
|
830
922
|
ActiveRecord::SchemaDumper.prepend(ParadeDB::SchemaDumperPatch)
|
|
831
923
|
end
|
|
924
|
+
|
|
925
|
+
if defined?(ActiveRecord::Migration::CommandRecorder)
|
|
926
|
+
module ParadeDB
|
|
927
|
+
module CommandRecorderPatch
|
|
928
|
+
%i[
|
|
929
|
+
create_paradedb_index
|
|
930
|
+
add_bm25_index
|
|
931
|
+
remove_bm25_index
|
|
932
|
+
replace_paradedb_index
|
|
933
|
+
reindex_bm25
|
|
934
|
+
].each do |method_name|
|
|
935
|
+
define_method(method_name) do |*args, &block|
|
|
936
|
+
record(method_name, args, &block)
|
|
937
|
+
end
|
|
938
|
+
ruby2_keywords(method_name)
|
|
939
|
+
end
|
|
940
|
+
|
|
941
|
+
private
|
|
942
|
+
|
|
943
|
+
def invert_create_paradedb_index(args)
|
|
944
|
+
index_klass, = args
|
|
945
|
+
compiled = resolve_paradedb_index_klass(index_klass).compiled_definition
|
|
946
|
+
remove_options = Hash.ruby2_keywords_hash(name: compiled.index_name, if_exists: true)
|
|
947
|
+
|
|
948
|
+
[:remove_bm25_index, [compiled.table_name, remove_options]]
|
|
949
|
+
end
|
|
950
|
+
|
|
951
|
+
def invert_add_bm25_index(args)
|
|
952
|
+
table, options = args
|
|
953
|
+
options = symbolize_options_hash(options)
|
|
954
|
+
|
|
955
|
+
remove_options = { if_exists: true }
|
|
956
|
+
remove_options[:name] = options[:name] if options[:name]
|
|
957
|
+
remove_options = Hash.ruby2_keywords_hash(remove_options)
|
|
958
|
+
|
|
959
|
+
[:remove_bm25_index, [table, remove_options]]
|
|
960
|
+
end
|
|
961
|
+
|
|
962
|
+
def invert_remove_bm25_index(_args)
|
|
963
|
+
raise ActiveRecord::IrreversibleMigration,
|
|
964
|
+
"remove_bm25_index is not automatically reversible. Use #up/#down or #reversible."
|
|
965
|
+
end
|
|
966
|
+
|
|
967
|
+
def invert_replace_paradedb_index(_args)
|
|
968
|
+
raise ActiveRecord::IrreversibleMigration,
|
|
969
|
+
"replace_paradedb_index is not automatically reversible. Use #up/#down or #reversible."
|
|
970
|
+
end
|
|
971
|
+
|
|
972
|
+
def invert_reindex_bm25(_args)
|
|
973
|
+
raise ActiveRecord::IrreversibleMigration,
|
|
974
|
+
"reindex_bm25 is not automatically reversible. Use #up/#down or #reversible."
|
|
975
|
+
end
|
|
976
|
+
|
|
977
|
+
def resolve_paradedb_index_klass(index_klass)
|
|
978
|
+
case index_klass
|
|
979
|
+
when String
|
|
980
|
+
index_klass.to_s.split("::").inject(Object) { |ctx, const_name| ctx.const_get(const_name) }
|
|
981
|
+
else
|
|
982
|
+
index_klass
|
|
983
|
+
end
|
|
984
|
+
rescue NameError
|
|
985
|
+
raise ParadeDB::InvalidIndexDefinition, "Unknown index class #{index_klass.inspect}"
|
|
986
|
+
end
|
|
987
|
+
|
|
988
|
+
def symbolize_options_hash(options)
|
|
989
|
+
return {} unless options.is_a?(Hash)
|
|
990
|
+
|
|
991
|
+
options.each_with_object({}) do |(key, value), memo|
|
|
992
|
+
memo[key.to_sym] = value
|
|
993
|
+
end
|
|
994
|
+
end
|
|
995
|
+
end
|
|
996
|
+
end
|
|
997
|
+
|
|
998
|
+
ActiveRecord::Migration::CommandRecorder.prepend(ParadeDB::CommandRecorderPatch)
|
|
999
|
+
end
|
|
@@ -397,10 +397,6 @@ module ParadeDB
|
|
|
397
397
|
|
|
398
398
|
def facets_agg(exact: nil, **named_aggregations)
|
|
399
399
|
validate_exact_option!(exact)
|
|
400
|
-
if exact == false
|
|
401
|
-
raise ArgumentError, "facets_agg(exact: false) requires with_agg so aggregation runs as a window function"
|
|
402
|
-
end
|
|
403
|
-
|
|
404
400
|
agg_specs = normalize_named_aggregation_specs(named_aggregations)
|
|
405
401
|
build_aggregation_query(agg_specs, exact: exact).execute
|
|
406
402
|
end
|
data/lib/parade_db/version.rb
CHANGED