search-engine-for-typesense 30.1.6.16 → 30.1.6.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/search_engine/search_engine/index_partition_job.rb +3 -1
- data/lib/search_engine/cascade.rb +50 -10
- data/lib/search_engine/cli/doctor.rb +4 -1
- data/lib/search_engine/dsl/parser.rb +17 -3
- data/lib/search_engine/indexer/retry_policy.rb +5 -1
- data/lib/search_engine/relation/dsl/filters.rb +33 -2
- data/lib/search_engine/schema.rb +7 -2
- data/lib/search_engine/sources/lambda_source.rb +38 -11
- data/lib/search_engine/sources.rb +6 -1
- data/lib/search_engine/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1bd17994c46590322f8d32265bf5f1a5ae91be3b911a120c48730ba6339fb9e3
|
|
4
|
+
data.tar.gz: e7d9785f65c500ddffaef1898b8620c13142ade4f737d6b93f7f1b88a31ee582
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 744c09d6abd7bce828543b0cd11bb354d94cdff2c17a89d690efd25948993901341c0177890e07256e0b1a9e376925f6ed790c1b5ad64f629e68a99dca9b6ad9
|
|
7
|
+
data.tar.gz: aee5a996ae1f5075533786e58347e0771737ecc8278cd25b380df7b8c21b6ea917e052a9ad34216eab7f6a2b8a80c90e6701241843eb9698572f1adc9fd201c1
|
|
@@ -33,6 +33,7 @@ module SearchEngine
|
|
|
33
33
|
# @param metadata [Hash]
|
|
34
34
|
# @return [void]
|
|
35
35
|
def perform(collection_class_name, partition, into: nil, metadata: {})
|
|
36
|
+
payload = nil
|
|
36
37
|
klass = constantize_collection!(collection_class_name)
|
|
37
38
|
payload = base_payload(klass, partition: partition, into: into)
|
|
38
39
|
instrument('search_engine.dispatcher.job_started',
|
|
@@ -54,7 +55,8 @@ module SearchEngine
|
|
|
54
55
|
)
|
|
55
56
|
nil
|
|
56
57
|
rescue StandardError => error
|
|
57
|
-
|
|
58
|
+
safe_payload = payload || error_payload(error)
|
|
59
|
+
instrument_error(error, payload: safe_payload.merge(metadata: metadata || {}))
|
|
58
60
|
raise
|
|
59
61
|
end
|
|
60
62
|
|
|
@@ -18,7 +18,7 @@ module SearchEngine
|
|
|
18
18
|
# @param context [Symbol] :update or :full
|
|
19
19
|
# @param client [SearchEngine::Client, nil]
|
|
20
20
|
# @return [Hash]
|
|
21
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting
|
|
21
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting, Metrics/PerceivedComplexity, Metrics/BlockLength
|
|
22
22
|
def cascade_reindex!(source:, ids:, context: :update, client: nil)
|
|
23
23
|
raise ArgumentError, 'context must be :update or :full' unless %i[update full].include?(context.to_sym)
|
|
24
24
|
|
|
@@ -28,6 +28,7 @@ module SearchEngine
|
|
|
28
28
|
|
|
29
29
|
graph = build_reverse_graph(client: ts_client)
|
|
30
30
|
referencers = Array(graph[src_collection])
|
|
31
|
+
target_collections = referencers.map { |edge| edge[:referrer] }.uniq
|
|
31
32
|
|
|
32
33
|
# Detect immediate cycles (A <-> B) and skip those pairs
|
|
33
34
|
cycle_pairs = detect_immediate_cycles(graph)
|
|
@@ -43,7 +44,7 @@ module SearchEngine
|
|
|
43
44
|
alias_cache: alias_cache
|
|
44
45
|
)
|
|
45
46
|
|
|
46
|
-
|
|
47
|
+
outcomes_by_collection = {}
|
|
47
48
|
partial_count = 0
|
|
48
49
|
full_count = 0
|
|
49
50
|
skipped_unregistered = 0
|
|
@@ -57,18 +58,19 @@ module SearchEngine
|
|
|
57
58
|
# Skip cycle pairs deterministically (avoid ping-pong)
|
|
58
59
|
if cycle_pairs.include?([src_collection, referrer_coll])
|
|
59
60
|
skipped_cycles << { pair: [src_collection, referrer_coll] }
|
|
60
|
-
|
|
61
|
+
merge_cascade_outcome!(outcomes_by_collection, { collection: referrer_coll, mode: :skipped_cycle })
|
|
61
62
|
next
|
|
62
63
|
end
|
|
63
64
|
|
|
64
65
|
ref_klass = safe_collection_class(referrer_coll)
|
|
65
66
|
unless ref_klass
|
|
66
67
|
skipped_unregistered += 1
|
|
67
|
-
|
|
68
|
+
merge_cascade_outcome!(outcomes_by_collection, { collection: referrer_coll, mode: :skipped_unregistered })
|
|
68
69
|
next
|
|
69
70
|
end
|
|
70
71
|
|
|
71
72
|
mode = :full
|
|
73
|
+
partial_failure = nil
|
|
72
74
|
if context.to_sym == :update && can_partial_reindex?(ref_klass)
|
|
73
75
|
begin
|
|
74
76
|
SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: { local_key.to_sym => Array(ids) },
|
|
@@ -90,9 +92,8 @@ into: nil
|
|
|
90
92
|
mode = :skipped_no_partitions
|
|
91
93
|
end
|
|
92
94
|
end
|
|
93
|
-
#
|
|
94
|
-
|
|
95
|
-
message: error.message.to_s[0, 200] }
|
|
95
|
+
# Preserve the partial failure details on the final single outcome row.
|
|
96
|
+
partial_failure = { error_class: error.class.name, message: error.message.to_s[0, 200] }
|
|
96
97
|
end
|
|
97
98
|
elsif seen_full[referrer_coll]
|
|
98
99
|
mode = :skipped_duplicate
|
|
@@ -107,14 +108,17 @@ into: nil
|
|
|
107
108
|
end
|
|
108
109
|
end
|
|
109
110
|
|
|
110
|
-
|
|
111
|
+
outcome = { collection: referrer_coll, mode: mode }
|
|
112
|
+
outcome.merge!(partial_failure) if partial_failure
|
|
113
|
+
merge_cascade_outcome!(outcomes_by_collection, outcome)
|
|
111
114
|
end
|
|
115
|
+
outcomes = outcomes_by_collection.values
|
|
112
116
|
|
|
113
117
|
payload = {
|
|
114
118
|
source_collection: src_collection,
|
|
115
119
|
ids_count: Array(ids).size,
|
|
116
120
|
context: context.to_sym,
|
|
117
|
-
targets_total:
|
|
121
|
+
targets_total: target_collections.size,
|
|
118
122
|
partial_count: partial_count,
|
|
119
123
|
full_count: full_count,
|
|
120
124
|
skipped_unregistered: skipped_unregistered,
|
|
@@ -124,7 +128,7 @@ into: nil
|
|
|
124
128
|
|
|
125
129
|
payload.merge(outcomes: outcomes)
|
|
126
130
|
end
|
|
127
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting
|
|
131
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting, Metrics/PerceivedComplexity, Metrics/BlockLength
|
|
128
132
|
|
|
129
133
|
# Build a reverse graph from Typesense live schemas when possible, falling
|
|
130
134
|
# back to compiled local schemas for registered models.
|
|
@@ -469,6 +473,42 @@ foreign_key: fk }
|
|
|
469
473
|
pairs.uniq
|
|
470
474
|
end
|
|
471
475
|
|
|
476
|
+
def merge_cascade_outcome!(outcomes_by_collection, outcome)
|
|
477
|
+
key = outcome[:collection]
|
|
478
|
+
existing = outcomes_by_collection[key]
|
|
479
|
+
if existing.nil?
|
|
480
|
+
outcomes_by_collection[key] = outcome
|
|
481
|
+
return
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
winner = if cascade_outcome_priority(outcome[:mode]) > cascade_outcome_priority(existing[:mode])
|
|
485
|
+
outcome.dup
|
|
486
|
+
else
|
|
487
|
+
existing.dup
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
error_class = existing[:error_class] || outcome[:error_class]
|
|
491
|
+
message = existing[:message] || outcome[:message]
|
|
492
|
+
if error_class
|
|
493
|
+
winner[:error_class] = error_class
|
|
494
|
+
winner[:message] = message
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
outcomes_by_collection[key] = winner
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
def cascade_outcome_priority(mode)
|
|
501
|
+
case mode.to_sym
|
|
502
|
+
when :full then 60
|
|
503
|
+
when :partial then 50
|
|
504
|
+
when :skipped_no_partitions then 40
|
|
505
|
+
when :skipped_duplicate then 30
|
|
506
|
+
when :skipped_cycle then 20
|
|
507
|
+
when :skipped_unregistered then 10
|
|
508
|
+
else 0
|
|
509
|
+
end
|
|
510
|
+
end
|
|
511
|
+
|
|
472
512
|
def safe_collection_class(name)
|
|
473
513
|
SearchEngine::CollectionResolver.model_for_logical(name)
|
|
474
514
|
end
|
|
@@ -166,7 +166,10 @@ module SearchEngine
|
|
|
166
166
|
started = monotonic_ms
|
|
167
167
|
client = client_with_overrides
|
|
168
168
|
health = client.health
|
|
169
|
-
|
|
169
|
+
ok_value = if health.is_a?(Hash)
|
|
170
|
+
health.key?(:ok) ? health[:ok] : health['ok']
|
|
171
|
+
end
|
|
172
|
+
ok = (ok_value == true)
|
|
170
173
|
details = { response: redacted_value(health) }
|
|
171
174
|
hint = ok ? nil : 'Verify host/port/protocol and network reachability to Typesense.'
|
|
172
175
|
|
|
@@ -66,7 +66,9 @@ module SearchEngine
|
|
|
66
66
|
# @param joins [Array<Symbol>, nil]
|
|
67
67
|
# @return [Array<SearchEngine::AST::Node>]
|
|
68
68
|
def parse_list(list, klass:, joins: nil)
|
|
69
|
-
|
|
69
|
+
# Preserve input shape so template bind arrays stay intact:
|
|
70
|
+
# parse_list(['brand_id IN ?', [1, 2]]) should keep [1, 2] as one bind arg.
|
|
71
|
+
items = Array(list).compact
|
|
70
72
|
return [] if items.empty?
|
|
71
73
|
|
|
72
74
|
nodes = []
|
|
@@ -92,7 +94,7 @@ module SearchEngine
|
|
|
92
94
|
nodes << parse_raw(entry.to_s)
|
|
93
95
|
i += 1
|
|
94
96
|
when Array
|
|
95
|
-
nodes
|
|
97
|
+
nodes.concat(Array(parse_array_entry(entry, klass: klass, joins: joins)))
|
|
96
98
|
i += 1
|
|
97
99
|
else
|
|
98
100
|
raise ArgumentError, "Parser: unsupported where argument #{entry.class}"
|
|
@@ -104,7 +106,19 @@ module SearchEngine
|
|
|
104
106
|
# --- Internals -------------------------------------------------------
|
|
105
107
|
|
|
106
108
|
def parse_array_entry(entry, klass:, joins: nil)
|
|
107
|
-
|
|
109
|
+
raise ArgumentError, 'Unsupported argument type: [] (Array)' if entry.empty?
|
|
110
|
+
|
|
111
|
+
first = entry.first
|
|
112
|
+
unless first.is_a?(String)
|
|
113
|
+
# Support wrapped nested where-lists while preserving template bind arrays:
|
|
114
|
+
# where([['a > ?', 1], ['b < ?', 2]]) should parse as two predicates.
|
|
115
|
+
if first.is_a?(Array) || first.is_a?(Hash) || first.is_a?(Symbol)
|
|
116
|
+
return parse_list(entry, klass: klass, joins: joins)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
return parse_raw(entry.to_s)
|
|
120
|
+
end
|
|
121
|
+
|
|
108
122
|
return parse_list(entry, klass: klass, joins: joins) unless placeholders?(entry.first)
|
|
109
123
|
|
|
110
124
|
template = entry.first
|
|
@@ -103,8 +103,12 @@ module SearchEngine
|
|
|
103
103
|
rng = (Thread.current[:__se_retry_rng__] ||= Random.new)
|
|
104
104
|
min = range.begin.to_f
|
|
105
105
|
max = range.end.to_f
|
|
106
|
-
#
|
|
106
|
+
# Honor exclusive-end ranges by shifting the upper bound down one
|
|
107
|
+
# representable float step. Inclusive ranges preserve the original end.
|
|
108
|
+
max = max.prev_float if range.exclude_end?
|
|
107
109
|
span = max - min
|
|
110
|
+
return min if span <= 0.0
|
|
111
|
+
|
|
108
112
|
min + (rng.rand * span)
|
|
109
113
|
end
|
|
110
114
|
end
|
|
@@ -17,6 +17,7 @@ module SearchEngine
|
|
|
17
17
|
# @param args [Array<Object>]
|
|
18
18
|
# @return [SearchEngine::Relation]
|
|
19
19
|
def not(*args)
|
|
20
|
+
@relation.send(:validate_not_args!, args)
|
|
20
21
|
nodes = Array(@relation.send(:build_ast_with_empty_array_rewrites, args, negated: true))
|
|
21
22
|
|
|
22
23
|
# Invert non-hidden predicates (Eq, In) returned by the builder
|
|
@@ -44,6 +45,7 @@ module SearchEngine
|
|
|
44
45
|
# @return [SearchEngine::Relation, WhereChain]
|
|
45
46
|
def where(*args)
|
|
46
47
|
return self if args.nil? || args.empty?
|
|
48
|
+
return self if blank_where_args?(args)
|
|
47
49
|
|
|
48
50
|
ast_nodes = build_ast_with_empty_array_rewrites(args, negated: false)
|
|
49
51
|
fragments = normalize_where(args)
|
|
@@ -58,6 +60,7 @@ module SearchEngine
|
|
|
58
60
|
# @param args [Array<Object>]
|
|
59
61
|
# @return [SearchEngine::Relation]
|
|
60
62
|
def not(*args)
|
|
63
|
+
validate_not_args!(args)
|
|
61
64
|
nodes = Array(build_ast_with_empty_array_rewrites(args, negated: true))
|
|
62
65
|
|
|
63
66
|
negated = nodes.map do |node|
|
|
@@ -122,6 +125,30 @@ module SearchEngine
|
|
|
122
125
|
|
|
123
126
|
private
|
|
124
127
|
|
|
128
|
+
def blank_where_args?(args)
|
|
129
|
+
return false unless args.length == 1
|
|
130
|
+
|
|
131
|
+
blank_where_arg?(args.first)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def blank_where_arg?(arg)
|
|
135
|
+
return true if arg.nil?
|
|
136
|
+
return true if arg.respond_to?(:empty?) && arg.empty?
|
|
137
|
+
|
|
138
|
+
arg.is_a?(String) && arg.strip.empty?
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def validate_not_args!(args)
|
|
142
|
+
raise ArgumentError, 'wrong number of arguments (given 0, expected 1+)' if args.nil? || args.empty?
|
|
143
|
+
|
|
144
|
+
return unless args.length == 1
|
|
145
|
+
|
|
146
|
+
value = args.first
|
|
147
|
+
return unless value.nil? || (value.is_a?(Array) && value.empty?)
|
|
148
|
+
|
|
149
|
+
raise ArgumentError, 'Unsupported argument type: (NilClass)'
|
|
150
|
+
end
|
|
151
|
+
|
|
125
152
|
def merge_relation(other, assoc: nil)
|
|
126
153
|
return self if other.nil?
|
|
127
154
|
|
|
@@ -227,7 +254,9 @@ module SearchEngine
|
|
|
227
254
|
# - nil predicates to hidden *_blank flags when `optional` is enabled (new behavior)
|
|
228
255
|
# Delegates other inputs to the DSL parser.
|
|
229
256
|
def build_ast_with_empty_array_rewrites(args, negated: false)
|
|
230
|
-
|
|
257
|
+
# Preserve input shape so template bind arrays stay intact:
|
|
258
|
+
# where('field IN ?', [1, 2]) => ['field IN ?', [1, 2]]
|
|
259
|
+
items = Array(args).compact
|
|
231
260
|
return [] if items.empty?
|
|
232
261
|
|
|
233
262
|
out_nodes = []
|
|
@@ -560,7 +589,9 @@ module SearchEngine
|
|
|
560
589
|
|
|
561
590
|
# Normalize where arguments into an array of string fragments safe for Typesense.
|
|
562
591
|
def normalize_where(args)
|
|
563
|
-
|
|
592
|
+
# Preserve input shape so template bind arrays stay intact:
|
|
593
|
+
# where('field IN ?', [1, 2]) => ['field IN ?', [1, 2]]
|
|
594
|
+
list = Array(args).compact
|
|
564
595
|
return [] if list.empty?
|
|
565
596
|
|
|
566
597
|
fragments = []
|
data/lib/search_engine/schema.rb
CHANGED
|
@@ -436,8 +436,13 @@ module SearchEngine
|
|
|
436
436
|
existing = list_physicals_starting_with(prefix, client: client)
|
|
437
437
|
used_sequences = existing.map { |name| name.split('_').last.to_i }
|
|
438
438
|
|
|
439
|
-
seq = 1
|
|
440
|
-
|
|
439
|
+
seq = (1..999).find { |candidate| !used_sequences.include?(candidate) }
|
|
440
|
+
unless seq
|
|
441
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
442
|
+
"Schema physical name sequence space exhausted for prefix '#{prefix}' (001..999 all occupied). " \
|
|
443
|
+
'Wait one second and retry.'
|
|
444
|
+
end
|
|
445
|
+
|
|
441
446
|
format('%<prefix>s%<seq>03d', prefix: prefix, seq: seq)
|
|
442
447
|
end
|
|
443
448
|
|
|
@@ -4,8 +4,13 @@ module SearchEngine
|
|
|
4
4
|
module Sources
|
|
5
5
|
# Adapter that delegates batch enumeration to a provided callable.
|
|
6
6
|
#
|
|
7
|
-
# The callable is expected to implement `call(cursor:, partition:)` and
|
|
8
|
-
#
|
|
7
|
+
# The callable is expected to implement `call(cursor:, partition:)` and use one mode:
|
|
8
|
+
# - return mode: return an Enumerable of batches
|
|
9
|
+
# - yield mode: yield batches via the provided block argument
|
|
10
|
+
#
|
|
11
|
+
# Returning batch-like data (Array/Enumerator) while also yielding is treated as
|
|
12
|
+
# an ambiguous mixed mode and raises an error.
|
|
13
|
+
# Shapes are application-defined.
|
|
9
14
|
#
|
|
10
15
|
# @example
|
|
11
16
|
# src = SearchEngine::Sources::LambdaSource.new(->(cursor:, partition:) { [[row1, row2]] })
|
|
@@ -32,16 +37,32 @@ module SearchEngine
|
|
|
32
37
|
return enum_for(:each_batch, partition: partition, cursor: cursor) unless block_given?
|
|
33
38
|
|
|
34
39
|
started = monotonic_ms
|
|
40
|
+
consume_rows = lambda do |rows|
|
|
41
|
+
duration = monotonic_ms - started
|
|
42
|
+
instrument_batch_fetched(source: 'lambda', batch_index: nil, rows_count: Array(rows).size,
|
|
43
|
+
duration_ms: duration, partition: partition, cursor: cursor,
|
|
44
|
+
adapter_options: { callable: @callable.class.name }
|
|
45
|
+
)
|
|
46
|
+
yield(rows)
|
|
47
|
+
started = monotonic_ms
|
|
48
|
+
nil
|
|
49
|
+
end
|
|
50
|
+
|
|
35
51
|
begin
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
52
|
+
yielded = false
|
|
53
|
+
returned = @callable.call(cursor: cursor, partition: partition) do |rows|
|
|
54
|
+
yielded = true
|
|
55
|
+
consume_rows.call(rows)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
if yielded
|
|
59
|
+
if mixed_mode_batch_return?(returned)
|
|
60
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
61
|
+
'lambda source callable must either yield batches or return an Enumerable of batches, not both'
|
|
62
|
+
end
|
|
63
|
+
else
|
|
64
|
+
to_iterate = returned.respond_to?(:each) ? returned : Array(returned)
|
|
65
|
+
to_iterate.each { |rows| consume_rows.call(rows) }
|
|
45
66
|
end
|
|
46
67
|
rescue StandardError => error
|
|
47
68
|
instrument_error(source: 'lambda', error: error, partition: partition, cursor: cursor,
|
|
@@ -50,6 +71,12 @@ module SearchEngine
|
|
|
50
71
|
raise
|
|
51
72
|
end
|
|
52
73
|
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
def mixed_mode_batch_return?(returned)
|
|
78
|
+
returned.is_a?(Array) || returned.is_a?(Enumerator)
|
|
79
|
+
end
|
|
53
80
|
end
|
|
54
81
|
end
|
|
55
82
|
end
|
|
@@ -12,6 +12,11 @@ module SearchEngine
|
|
|
12
12
|
# Enumerator.new { |y| external_api.each_page(cursor) { |rows| y << rows } }
|
|
13
13
|
# end
|
|
14
14
|
#
|
|
15
|
+
# Lambda source contract:
|
|
16
|
+
# - return mode: return an Enumerable of batches
|
|
17
|
+
# - yield mode: call the provided block with each batch
|
|
18
|
+
# Returning batch-like values while also yielding is treated as mixed mode and raises.
|
|
19
|
+
#
|
|
15
20
|
# All adapters implement `each_batch(partition:, cursor:)` and return an Enumerator
|
|
16
21
|
# when no block is provided.
|
|
17
22
|
module Sources
|
|
@@ -19,7 +24,7 @@ module SearchEngine
|
|
|
19
24
|
#
|
|
20
25
|
# @param type [Symbol] :active_record, :sql, or :lambda
|
|
21
26
|
# @param options [Hash] adapter-specific options
|
|
22
|
-
# @yield for :lambda sources,
|
|
27
|
+
# @yield for :lambda sources, block taking (cursor:, partition:) and either yielding or returning batches
|
|
23
28
|
# @return [Object] adapter responding to `each_batch(partition:, cursor:)`
|
|
24
29
|
def self.build(type, **options, &block)
|
|
25
30
|
case type.to_sym
|