rails-paradedb 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/lib/parade_db/arel/builder.rb +1 -1
- data/lib/parade_db/index.rb +1 -1
- data/lib/parade_db/migration_helpers.rb +3 -3
- data/lib/parade_db/tokenizer.rb +74 -72
- data/lib/parade_db/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5ac93df076da5e2c2ae979b925233b7f11195c1f3a1695d92980c64941b307a4
|
|
4
|
+
data.tar.gz: 410859f0b81ea85841c558f85b5b5d7cedcb9612d7036bb08587e9f94f00b89b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 19bd553913503c9a588f43a0793ad735844de0104b0e45c926affaa3763f7b7e3c9417b04a1c3d494bb9938a7984b3cb3d7a300a6c5a0192802acb8428325f1b
|
|
7
|
+
data.tar.gz: d999e0166ef6cd962d658b8bedf527a70a367caa9a82070846d4f4285adb55dc45f6cfcef717e818ad37c59b0b6afcf8079be90d57bc92c3c91d48dc86765cc6
|
data/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [0.8.0] - 2026-06-15
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
|
|
11
|
+
- **BREAKING**: The `Tokenizer` class is now namespaced as `ParadeDB::Tokenizer`. Update references from `Tokenizer.simple(...)` to `ParadeDB::Tokenizer.simple(...)`. Schema dumps (`schema.rb`) now emit the fully-qualified constant.
|
|
12
|
+
|
|
7
13
|
## [0.7.0] - 2026-04-21
|
|
8
14
|
|
|
9
15
|
### Changed
|
|
@@ -132,7 +138,8 @@ All notable changes to this project will be documented in this file. The format
|
|
|
132
138
|
- Schema dump/load round-trip for tokenizer configuration and index options
|
|
133
139
|
(including `target_segment_count`)
|
|
134
140
|
|
|
135
|
-
[Unreleased]: https://github.com/paradedb/rails-paradedb/compare/v0.
|
|
141
|
+
[Unreleased]: https://github.com/paradedb/rails-paradedb/compare/v0.8.0...HEAD
|
|
142
|
+
[0.8.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.8.0
|
|
136
143
|
[0.7.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.7.0
|
|
137
144
|
[0.6.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.6.0
|
|
138
145
|
[0.5.0]: https://github.com/paradedb/rails-paradedb/releases/tag/v0.5.0
|
data/lib/parade_db/index.rb
CHANGED
|
@@ -53,7 +53,7 @@ module ParadeDB
|
|
|
53
53
|
|
|
54
54
|
class << self
|
|
55
55
|
def parse(source_name, tokenizer, context:)
|
|
56
|
-
unless tokenizer.is_a?(Tokenizer)
|
|
56
|
+
unless tokenizer.is_a?(ParadeDB::Tokenizer)
|
|
57
57
|
raise InvalidIndexDefinition, "#{context} for #{source_name.inspect} must be a Tokenizer"
|
|
58
58
|
end
|
|
59
59
|
|
|
@@ -695,13 +695,13 @@ module ParadeDB
|
|
|
695
695
|
end
|
|
696
696
|
|
|
697
697
|
def bm25_tokenizer_ruby(name, positional_args, options)
|
|
698
|
-
if name.match?(/\A[a-z_][a-z0-9_]*\z/) && Tokenizer.respond_to?(name)
|
|
698
|
+
if name.match?(/\A[a-z_][a-z0-9_]*\z/) && ParadeDB::Tokenizer.respond_to?(name)
|
|
699
699
|
args = positional_args.map { |arg| ruby_literal(arg) }
|
|
700
700
|
args << "options: #{ruby_hash_literal(options)}" unless options.empty?
|
|
701
|
-
return "Tokenizer.#{name}(#{args.join(', ')})"
|
|
701
|
+
return "ParadeDB::Tokenizer.#{name}(#{args.join(', ')})"
|
|
702
702
|
end
|
|
703
703
|
|
|
704
|
-
"Tokenizer.new(#{name.inspect}, #{ruby_literal(positional_args.empty? ? nil : positional_args)}, #{ruby_literal(options.empty? ? nil : options)})"
|
|
704
|
+
"ParadeDB::Tokenizer.new(#{name.inspect}, #{ruby_literal(positional_args.empty? ? nil : positional_args)}, #{ruby_literal(options.empty? ? nil : options)})"
|
|
705
705
|
end
|
|
706
706
|
|
|
707
707
|
def split_sql_arguments(args_sql)
|
data/lib/parade_db/tokenizer.rb
CHANGED
|
@@ -1,95 +1,97 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
module ParadeDB
|
|
2
|
+
class Tokenizer
|
|
3
|
+
attr_reader :name, :positional_args, :options
|
|
4
|
+
|
|
5
|
+
def initialize(name, positional_args, options)
|
|
6
|
+
@name = name
|
|
7
|
+
@positional_args = positional_args
|
|
8
|
+
@options = options
|
|
9
|
+
end
|
|
3
10
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
end
|
|
11
|
+
def render()
|
|
12
|
+
if options.nil? && positional_args.nil?
|
|
13
|
+
return "pdb.#{name}"
|
|
14
|
+
end
|
|
9
15
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
16
|
+
args = []
|
|
17
|
+
if !positional_args.nil?
|
|
18
|
+
args.concat(positional_args.map { |x| render_positional_arg(x) })
|
|
19
|
+
end
|
|
20
|
+
if !options.nil?
|
|
21
|
+
args.concat(options.map {|k, v| quote_term("#{k}=#{v}")})
|
|
22
|
+
end
|
|
14
23
|
|
|
15
|
-
|
|
16
|
-
if !positional_args.nil?
|
|
17
|
-
args.concat(positional_args.map { |x| render_positional_arg(x) })
|
|
18
|
-
end
|
|
19
|
-
if !options.nil?
|
|
20
|
-
args.concat(options.map {|k, v| quote_term("#{k}=#{v}")})
|
|
24
|
+
return "pdb.#{name}(#{args.join(",")})"
|
|
21
25
|
end
|
|
22
26
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def self.whitespace(options: nil)
|
|
27
|
-
new("whitespace", nil, options)
|
|
28
|
-
end
|
|
27
|
+
def self.whitespace(options: nil)
|
|
28
|
+
new("whitespace", nil, options)
|
|
29
|
+
end
|
|
29
30
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
def self.unicode_words(options: nil)
|
|
32
|
+
new("unicode_words", nil, options)
|
|
33
|
+
end
|
|
33
34
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
def self.ngram(min_gram, max_gram, options: nil)
|
|
36
|
+
new("ngram", [min_gram, max_gram], options)
|
|
37
|
+
end
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
def self.simple(options: nil)
|
|
40
|
+
new("simple", nil, options)
|
|
41
|
+
end
|
|
41
42
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
def self.literal(options: nil)
|
|
44
|
+
new("literal", nil, options)
|
|
45
|
+
end
|
|
45
46
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
def self.literal_normalized(options: nil)
|
|
48
|
+
new("literal_normalized", nil, options)
|
|
49
|
+
end
|
|
49
50
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
def self.edge_ngram(min_gram, max_gram, options: nil)
|
|
52
|
+
new("edge_ngram", [min_gram, max_gram], options)
|
|
53
|
+
end
|
|
53
54
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
55
|
+
def self.regex_pattern(pattern, options: nil)
|
|
56
|
+
new("regex_pattern", [pattern], options)
|
|
57
|
+
end
|
|
57
58
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
def self.chinese_compatible(options: nil)
|
|
60
|
+
new("chinese_compatible", nil, options)
|
|
61
|
+
end
|
|
61
62
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
63
|
+
def self.lindera(dictionary, options: nil)
|
|
64
|
+
new("lindera", [dictionary], options)
|
|
65
|
+
end
|
|
65
66
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
67
|
+
def self.icu(options: nil)
|
|
68
|
+
new("icu", nil, options)
|
|
69
|
+
end
|
|
69
70
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
71
|
+
def self.jieba(options: nil)
|
|
72
|
+
new("jieba", nil, options)
|
|
73
|
+
end
|
|
73
74
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
def self.source_code(options: nil)
|
|
76
|
+
new("source_code", nil, options)
|
|
77
|
+
end
|
|
77
78
|
|
|
78
|
-
|
|
79
|
+
private
|
|
79
80
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
81
|
+
def quote_term(value)
|
|
82
|
+
escaped = value.gsub("'", "''")
|
|
83
|
+
"'#{escaped}'"
|
|
84
|
+
end
|
|
84
85
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
86
|
+
def render_positional_arg(value)
|
|
87
|
+
case value
|
|
88
|
+
when true, false, Numeric
|
|
89
|
+
value.to_s
|
|
90
|
+
when String
|
|
91
|
+
quote_term(value)
|
|
92
|
+
else
|
|
93
|
+
raise ArgumentError, "Unsupported tokenizer arg type: #{value.class}"
|
|
94
|
+
end
|
|
93
95
|
end
|
|
94
96
|
end
|
|
95
97
|
end
|
data/lib/parade_db/version.rb
CHANGED