search-engine-for-typesense 30.1.6.16 → 30.1.6.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/search_engine/search_engine/index_partition_job.rb +3 -1
- data/lib/search_engine/base/index_maintenance/cleanup.rb +8 -5
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +46 -30
- data/lib/search_engine/base/index_maintenance/schema.rb +12 -6
- data/lib/search_engine/base/index_maintenance.rb +31 -15
- data/lib/search_engine/bulk.rb +17 -10
- data/lib/search_engine/cascade.rb +67 -17
- data/lib/search_engine/cli/doctor.rb +4 -1
- data/lib/search_engine/dsl/parser.rb +17 -3
- data/lib/search_engine/indexer/bulk_import.rb +8 -3
- data/lib/search_engine/indexer/retry_policy.rb +5 -1
- data/lib/search_engine/indexer.rb +1 -1
- data/lib/search_engine/logging/output.rb +35 -0
- data/lib/search_engine/relation/dsl/filters.rb +33 -2
- data/lib/search_engine/schema.rb +7 -2
- data/lib/search_engine/sources/lambda_source.rb +38 -11
- data/lib/search_engine/sources.rb +6 -1
- data/lib/search_engine/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 01b787168b6ef3694f27552da317fa390eaee171bf0734d94725b4527a883968
|
|
4
|
+
data.tar.gz: 28b30b42639215b992ce6b152bd9c076dc5dc221f6c9a18be5f2355aa7c63865
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f5229d42405c66d0811102f31654631d8654bd52bdd55605f12f6baf446c2d61428d95a3f6a0c56a05b6dc4b404a68d3772ee13d4a1ca6a1739efd3de2f93390
|
|
7
|
+
data.tar.gz: faf42770cec8639077c6efae5e0f1599d1d9eabba0b3fdab23149a5a334ba3a3f50cc1156940bbb8b373f95b695f3e70457370687a774e39c0d2934b9f4cc07a
|
|
@@ -33,6 +33,7 @@ module SearchEngine
|
|
|
33
33
|
# @param metadata [Hash]
|
|
34
34
|
# @return [void]
|
|
35
35
|
def perform(collection_class_name, partition, into: nil, metadata: {})
|
|
36
|
+
payload = nil
|
|
36
37
|
klass = constantize_collection!(collection_class_name)
|
|
37
38
|
payload = base_payload(klass, partition: partition, into: into)
|
|
38
39
|
instrument('search_engine.dispatcher.job_started',
|
|
@@ -54,7 +55,8 @@ module SearchEngine
|
|
|
54
55
|
)
|
|
55
56
|
nil
|
|
56
57
|
rescue StandardError => error
|
|
57
|
-
|
|
58
|
+
safe_payload = payload || error_payload(error)
|
|
59
|
+
instrument_error(error, payload: safe_payload.merge(metadata: metadata || {}))
|
|
58
60
|
raise
|
|
59
61
|
end
|
|
60
62
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'active_support/concern'
|
|
4
|
+
require 'search_engine/logging/output'
|
|
4
5
|
|
|
5
6
|
module SearchEngine
|
|
6
7
|
class Base
|
|
@@ -44,13 +45,15 @@ module SearchEngine
|
|
|
44
45
|
# @return [Integer] number of deleted documents
|
|
45
46
|
def cleanup(into: nil, partition: nil, clear_cache: false)
|
|
46
47
|
logical = respond_to?(:collection) ? collection.to_s : name.to_s
|
|
47
|
-
puts
|
|
48
|
-
|
|
48
|
+
SearchEngine::Logging::Output.puts
|
|
49
|
+
SearchEngine::Logging::Output.puts(
|
|
50
|
+
SearchEngine::Logging::Color.header(%(>>>>>> Cleanup Collection "#{logical}"))
|
|
51
|
+
)
|
|
49
52
|
|
|
50
53
|
filters = SearchEngine::StaleRules.compile_filters(self, partition: partition)
|
|
51
54
|
filters.compact!
|
|
52
55
|
filters.reject! { |f| f.to_s.strip.empty? }
|
|
53
|
-
step = SearchEngine::Logging::StepLine.new('Cleanup')
|
|
56
|
+
step = SearchEngine::Logging::StepLine.new('Cleanup', io: SearchEngine::Logging::Output.io)
|
|
54
57
|
if filters.empty?
|
|
55
58
|
step.skip('no stale configuration')
|
|
56
59
|
return 0
|
|
@@ -76,14 +79,14 @@ module SearchEngine
|
|
|
76
79
|
step&.close
|
|
77
80
|
if clear_cache
|
|
78
81
|
begin
|
|
79
|
-
puts("Cleanup — #{SearchEngine::Logging::Color.bold('cache clear')}")
|
|
82
|
+
SearchEngine::Logging::Output.puts("Cleanup — #{SearchEngine::Logging::Color.bold('cache clear')}")
|
|
80
83
|
SearchEngine::Cache.clear
|
|
81
84
|
rescue StandardError => error
|
|
82
85
|
err_msg = "Cleanup — cache clear error=#{error.class}: #{error.message.to_s[0, 200]}"
|
|
83
86
|
warn(SearchEngine::Logging::Color.apply(err_msg, :red))
|
|
84
87
|
end
|
|
85
88
|
end
|
|
86
|
-
puts(SearchEngine::Logging::Color.header(%(>>>>>> Cleanup Done)))
|
|
89
|
+
SearchEngine::Logging::Output.puts(SearchEngine::Logging::Color.header(%(>>>>>> Cleanup Done)))
|
|
87
90
|
end
|
|
88
91
|
|
|
89
92
|
private
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'search_engine/logging/output'
|
|
4
|
+
|
|
3
5
|
module SearchEngine
|
|
4
6
|
class Base
|
|
5
7
|
module IndexMaintenance
|
|
@@ -16,8 +18,10 @@ module SearchEngine
|
|
|
16
18
|
# @return [Hash, nil] result hash with :status, :docs_total, :success_total, :failed_total, :sample_error
|
|
17
19
|
def index_collection(partition: nil, client: nil, pre: nil, force_rebuild: false)
|
|
18
20
|
logical = respond_to?(:collection) ? collection.to_s : name.to_s
|
|
19
|
-
puts
|
|
20
|
-
|
|
21
|
+
SearchEngine::Logging::Output.puts
|
|
22
|
+
SearchEngine::Logging::Output.puts(
|
|
23
|
+
SearchEngine::Logging::Color.header(%(>>>>>> Indexing Collection "#{logical}"))
|
|
24
|
+
)
|
|
21
25
|
client_obj = client || SearchEngine.client
|
|
22
26
|
|
|
23
27
|
result = if partition.nil?
|
|
@@ -56,7 +60,7 @@ module SearchEngine
|
|
|
56
60
|
|
|
57
61
|
diff = SearchEngine::Schema.diff(self, client: client)[:diff] || {}
|
|
58
62
|
missing = __se_schema_missing?(diff)
|
|
59
|
-
step = SearchEngine::Logging::StepLine.new('Presence')
|
|
63
|
+
step = SearchEngine::Logging::StepLine.new('Presence', io: SearchEngine::Logging::Output.io)
|
|
60
64
|
missing ? step.finish_warn('missing') : step.finish('present')
|
|
61
65
|
|
|
62
66
|
applied, indexed_inside_apply = __se_full_apply_if_missing(client, missing)
|
|
@@ -76,7 +80,7 @@ module SearchEngine
|
|
|
76
80
|
def __se_full_apply_if_missing(client, missing)
|
|
77
81
|
applied = false
|
|
78
82
|
indexed_inside_apply = false
|
|
79
|
-
step = SearchEngine::Logging::StepLine.new('Schema')
|
|
83
|
+
step = SearchEngine::Logging::StepLine.new('Schema', io: SearchEngine::Logging::Output.io)
|
|
80
84
|
if missing
|
|
81
85
|
step.update('creating')
|
|
82
86
|
begin
|
|
@@ -100,7 +104,7 @@ module SearchEngine
|
|
|
100
104
|
end
|
|
101
105
|
|
|
102
106
|
def __se_full_check_drift(diff, missing, force_rebuild)
|
|
103
|
-
step = SearchEngine::Logging::StepLine.new('Schema Status')
|
|
107
|
+
step = SearchEngine::Logging::StepLine.new('Schema Status', io: SearchEngine::Logging::Output.io)
|
|
104
108
|
unless missing
|
|
105
109
|
step.update('checking')
|
|
106
110
|
drift = __se_schema_drift?(diff)
|
|
@@ -118,7 +122,7 @@ module SearchEngine
|
|
|
118
122
|
end
|
|
119
123
|
|
|
120
124
|
def __se_full_apply_if_drift(client, drift, applied, indexed_inside_apply, force_rebuild)
|
|
121
|
-
step = SearchEngine::Logging::StepLine.new('Schema Apply')
|
|
125
|
+
step = SearchEngine::Logging::StepLine.new('Schema Apply', io: SearchEngine::Logging::Output.io)
|
|
122
126
|
if drift
|
|
123
127
|
step.update('applying')
|
|
124
128
|
begin
|
|
@@ -143,7 +147,7 @@ module SearchEngine
|
|
|
143
147
|
|
|
144
148
|
def __se_full_indexation(applied, indexed_inside_apply)
|
|
145
149
|
result = nil
|
|
146
|
-
step = SearchEngine::Logging::StepLine.new('Indexing')
|
|
150
|
+
step = SearchEngine::Logging::StepLine.new('Indexing', io: SearchEngine::Logging::Output.io)
|
|
147
151
|
if applied && indexed_inside_apply
|
|
148
152
|
result = indexed_inside_apply if indexed_inside_apply.is_a?(Hash)
|
|
149
153
|
if __se_result_status(result) == :ok
|
|
@@ -177,7 +181,7 @@ module SearchEngine
|
|
|
177
181
|
end
|
|
178
182
|
|
|
179
183
|
def __se_full_retention(applied, logical, client)
|
|
180
|
-
step = SearchEngine::Logging::StepLine.new('Retention')
|
|
184
|
+
step = SearchEngine::Logging::StepLine.new('Retention', io: SearchEngine::Logging::Output.io)
|
|
181
185
|
if applied
|
|
182
186
|
step.skip('handled by schema apply')
|
|
183
187
|
else
|
|
@@ -195,7 +199,7 @@ module SearchEngine
|
|
|
195
199
|
diff = diff_res[:diff] || {}
|
|
196
200
|
|
|
197
201
|
missing = __se_schema_missing?(diff)
|
|
198
|
-
step = SearchEngine::Logging::StepLine.new('Presence')
|
|
202
|
+
step = SearchEngine::Logging::StepLine.new('Presence', io: SearchEngine::Logging::Output.io)
|
|
199
203
|
if missing
|
|
200
204
|
step.finish_warn('missing — collection not present, exit early')
|
|
201
205
|
return { status: :failed, docs_total: 0, success_total: 0, failed_total: 0,
|
|
@@ -203,7 +207,7 @@ module SearchEngine
|
|
|
203
207
|
end
|
|
204
208
|
step.finish('present')
|
|
205
209
|
|
|
206
|
-
step = SearchEngine::Logging::StepLine.new('Schema Status')
|
|
210
|
+
step = SearchEngine::Logging::StepLine.new('Schema Status', io: SearchEngine::Logging::Output.io)
|
|
207
211
|
step.update('checking')
|
|
208
212
|
drift = __se_schema_drift?(diff)
|
|
209
213
|
if drift
|
|
@@ -215,12 +219,13 @@ module SearchEngine
|
|
|
215
219
|
|
|
216
220
|
__se_preflight_dependencies!(mode: pre, client: client) if pre
|
|
217
221
|
|
|
218
|
-
step = SearchEngine::Logging::StepLine.new('Partial Indexing')
|
|
222
|
+
step = SearchEngine::Logging::StepLine.new('Partial Indexing', io: SearchEngine::Logging::Output.io)
|
|
219
223
|
step.update('indexing')
|
|
220
224
|
step.yield_line!
|
|
221
225
|
|
|
222
226
|
renderer = SearchEngine::Logging::LiveRenderer.new(
|
|
223
|
-
labels: partitions.map(&:inspect), partitions: partitions
|
|
227
|
+
labels: partitions.map(&:inspect), partitions: partitions,
|
|
228
|
+
io: SearchEngine::Logging::Output.io
|
|
224
229
|
)
|
|
225
230
|
renderer.start
|
|
226
231
|
summaries = []
|
|
@@ -255,35 +260,30 @@ module SearchEngine
|
|
|
255
260
|
# rubocop:disable Metrics/PerceivedComplexity, Metrics/AbcSize
|
|
256
261
|
def __se_cascade_after_indexation!(context: :full)
|
|
257
262
|
if SearchEngine::Instrumentation.context&.[](:bulk_suppress_cascade)
|
|
258
|
-
puts
|
|
259
|
-
|
|
263
|
+
SearchEngine::Logging::Output.puts
|
|
264
|
+
SearchEngine::Logging::Output.puts(
|
|
265
|
+
SearchEngine::Logging::Color.dim('>>>>>> Cascade Referencers — suppressed (bulk)')
|
|
266
|
+
)
|
|
260
267
|
return
|
|
261
268
|
end
|
|
262
|
-
puts
|
|
263
|
-
|
|
269
|
+
SearchEngine::Logging::Output.puts
|
|
270
|
+
SearchEngine::Logging::Output.puts(
|
|
271
|
+
SearchEngine::Logging::Color.header(%(>>>>>> Cascade Referencers))
|
|
272
|
+
)
|
|
264
273
|
results = SearchEngine::Cascade.cascade_reindex!(source: self, ids: nil, context: context)
|
|
265
274
|
outcomes = Array(results[:outcomes])
|
|
266
275
|
if outcomes.empty?
|
|
267
|
-
puts(SearchEngine::Logging::Color.dim(' none'))
|
|
276
|
+
SearchEngine::Logging::Output.puts(SearchEngine::Logging::Color.dim(' none'))
|
|
268
277
|
else
|
|
269
278
|
outcomes.each do |o|
|
|
270
279
|
coll = o[:collection] || o['collection']
|
|
271
280
|
mode = (o[:mode] || o['mode']).to_s
|
|
272
|
-
|
|
273
|
-
when 'partial'
|
|
274
|
-
puts(%( Referencer "#{coll}" → #{SearchEngine::Logging::Color.apply('partial reindex', :green)}))
|
|
275
|
-
when 'full'
|
|
276
|
-
puts(%( Referencer "#{coll}" → #{SearchEngine::Logging::Color.apply('full reindex', :green)}))
|
|
277
|
-
when 'skipped_unregistered'
|
|
278
|
-
puts(SearchEngine::Logging::Color.dim(%( Referencer "#{coll}" → skipped (unregistered))))
|
|
279
|
-
when 'skipped_cycle'
|
|
280
|
-
puts(SearchEngine::Logging::Color.dim(%( Referencer "#{coll}" → skipped (cycle))))
|
|
281
|
-
else
|
|
282
|
-
puts(%( Referencer "#{coll}" → #{mode}))
|
|
283
|
-
end
|
|
281
|
+
__se_log_cascade_outcome(coll, mode)
|
|
284
282
|
end
|
|
285
283
|
end
|
|
286
|
-
|
|
284
|
+
SearchEngine::Logging::Output.puts(
|
|
285
|
+
SearchEngine::Logging::Color.header('>>>>>> Cascade Done')
|
|
286
|
+
)
|
|
287
287
|
rescue StandardError => error
|
|
288
288
|
base = "Cascade — error=#{error.class}: #{error.message.to_s[0, 200]}"
|
|
289
289
|
if error.respond_to?(:status) || error.respond_to?(:body)
|
|
@@ -314,6 +314,22 @@ module SearchEngine
|
|
|
314
314
|
end
|
|
315
315
|
# rubocop:enable Metrics/PerceivedComplexity, Metrics/AbcSize
|
|
316
316
|
|
|
317
|
+
def __se_log_cascade_outcome(coll, mode)
|
|
318
|
+
msg = case mode
|
|
319
|
+
when 'partial'
|
|
320
|
+
%( Referencer "#{coll}" → #{SearchEngine::Logging::Color.apply('partial reindex', :green)})
|
|
321
|
+
when 'full'
|
|
322
|
+
%( Referencer "#{coll}" → #{SearchEngine::Logging::Color.apply('full reindex', :green)})
|
|
323
|
+
when 'skipped_unregistered'
|
|
324
|
+
SearchEngine::Logging::Color.dim(%( Referencer "#{coll}" → skipped (unregistered)))
|
|
325
|
+
when 'skipped_cycle'
|
|
326
|
+
SearchEngine::Logging::Color.dim(%( Referencer "#{coll}" → skipped (cycle)))
|
|
327
|
+
else
|
|
328
|
+
%( Referencer "#{coll}" → #{mode})
|
|
329
|
+
end
|
|
330
|
+
SearchEngine::Logging::Output.puts(msg)
|
|
331
|
+
end
|
|
332
|
+
|
|
317
333
|
# Raise {SearchEngine::Errors::IndexationAborted} when the result
|
|
318
334
|
# from {__se_index_partitions!} indicates a non-ok status. Called
|
|
319
335
|
# inside a {Schema.apply!} block to prevent the alias swap.
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'search_engine/logging/output'
|
|
4
|
+
|
|
3
5
|
module SearchEngine
|
|
4
6
|
class Base
|
|
5
7
|
module IndexMaintenance
|
|
@@ -27,7 +29,7 @@ module SearchEngine
|
|
|
27
29
|
|
|
28
30
|
def update_collection!
|
|
29
31
|
client = SearchEngine.client
|
|
30
|
-
step = SearchEngine::Logging::StepLine.new('Update Collection')
|
|
32
|
+
step = SearchEngine::Logging::StepLine.new('Update Collection', io: SearchEngine::Logging::Output.io)
|
|
31
33
|
step.update('analyzing diff')
|
|
32
34
|
updated = SearchEngine::Schema.update!(self, client: client)
|
|
33
35
|
|
|
@@ -51,14 +53,16 @@ module SearchEngine
|
|
|
51
53
|
physicals = __se_list_all_physicals(logical, client)
|
|
52
54
|
bare_schema = client.retrieve_collection_schema(logical, timeout_ms: 10_000)
|
|
53
55
|
|
|
54
|
-
step = SearchEngine::Logging::StepLine.new('Drop Collection')
|
|
56
|
+
step = SearchEngine::Logging::StepLine.new('Drop Collection', io: SearchEngine::Logging::Output.io)
|
|
55
57
|
if !has_alias && physicals.empty? && bare_schema.nil?
|
|
56
58
|
step.skip('not present')
|
|
57
59
|
return
|
|
58
60
|
end
|
|
59
61
|
|
|
60
|
-
puts
|
|
61
|
-
|
|
62
|
+
SearchEngine::Logging::Output.puts
|
|
63
|
+
SearchEngine::Logging::Output.puts(
|
|
64
|
+
SearchEngine::Logging::Color.header(%(>>>>>> Dropping Collection "#{logical}"))
|
|
65
|
+
)
|
|
62
66
|
|
|
63
67
|
physicals.each do |name|
|
|
64
68
|
step.update("dropping physical #{name}")
|
|
@@ -76,7 +80,9 @@ module SearchEngine
|
|
|
76
80
|
end
|
|
77
81
|
|
|
78
82
|
step.finish("done (physicals=#{physicals.size})")
|
|
79
|
-
|
|
83
|
+
SearchEngine::Logging::Output.puts(
|
|
84
|
+
SearchEngine::Logging::Color.header(%(>>>>>> Dropped Collection "#{logical}"))
|
|
85
|
+
)
|
|
80
86
|
nil
|
|
81
87
|
ensure
|
|
82
88
|
step&.close
|
|
@@ -110,7 +116,7 @@ module SearchEngine
|
|
|
110
116
|
physicals = __se_list_all_physicals(logical, client)
|
|
111
117
|
bare_schema = client.retrieve_collection_schema(logical)
|
|
112
118
|
|
|
113
|
-
step = SearchEngine::Logging::StepLine.new('Recreate Collection')
|
|
119
|
+
step = SearchEngine::Logging::StepLine.new('Recreate Collection', io: SearchEngine::Logging::Output.io)
|
|
114
120
|
if has_alias || physicals.any? || bare_schema
|
|
115
121
|
step.update("dropping existing (logical=#{logical})")
|
|
116
122
|
physicals.each { |name| client.delete_collection(name) }
|
|
@@ -8,6 +8,7 @@ require 'search_engine/logging/color'
|
|
|
8
8
|
require 'search_engine/logging/batch_line'
|
|
9
9
|
require 'search_engine/logging/step_line'
|
|
10
10
|
require 'search_engine/logging/live_renderer'
|
|
11
|
+
require 'search_engine/logging/output'
|
|
11
12
|
|
|
12
13
|
module SearchEngine
|
|
13
14
|
class Base
|
|
@@ -43,11 +44,11 @@ module SearchEngine
|
|
|
43
44
|
return if deps.empty?
|
|
44
45
|
|
|
45
46
|
indent = ' ' * depth
|
|
46
|
-
puts if depth.zero?
|
|
47
|
+
SearchEngine::Logging::Output.puts if depth.zero?
|
|
47
48
|
header = SearchEngine::Logging::Color.header(
|
|
48
49
|
%(#{indent}>>>>>> Preflight Dependencies (mode: #{mode}, collection: "#{current}"))
|
|
49
50
|
)
|
|
50
|
-
puts(header)
|
|
51
|
+
SearchEngine::Logging::Output.puts(header)
|
|
51
52
|
|
|
52
53
|
deps.each do |cfg|
|
|
53
54
|
dep_coll = (cfg[:collection] || cfg['collection']).to_s
|
|
@@ -56,7 +57,9 @@ module SearchEngine
|
|
|
56
57
|
dep_klass = __se_resolve_dep_class(dep_coll)
|
|
57
58
|
|
|
58
59
|
if dep_klass.nil?
|
|
59
|
-
|
|
60
|
+
SearchEngine::Logging::Output.puts(
|
|
61
|
+
SearchEngine::Logging::Color.dim(%(#{indent} "#{dep_coll}" → skipped (unregistered)))
|
|
62
|
+
)
|
|
60
63
|
visited.add(dep_coll)
|
|
61
64
|
next
|
|
62
65
|
end
|
|
@@ -78,7 +81,9 @@ module SearchEngine
|
|
|
78
81
|
visited.add(dep_coll)
|
|
79
82
|
end
|
|
80
83
|
|
|
81
|
-
|
|
84
|
+
SearchEngine::Logging::Output.puts(
|
|
85
|
+
SearchEngine::Logging::Color.header(%(#{indent}>>>>>> Preflight Done (collection: "#{current}")))
|
|
86
|
+
)
|
|
82
87
|
end
|
|
83
88
|
|
|
84
89
|
# @return [String] current collection logical name; empty string when unavailable
|
|
@@ -172,28 +177,32 @@ module SearchEngine
|
|
|
172
177
|
when 'ensure'
|
|
173
178
|
if missing
|
|
174
179
|
status_word = SearchEngine::Logging::Color.apply('ensure (missing)', :yellow)
|
|
175
|
-
puts(%(#{indent}"#{dep_coll}" → #{status_word} → index_collection))
|
|
176
|
-
# Avoid nested preflight to prevent redundant recursion cycles
|
|
180
|
+
SearchEngine::Logging::Output.puts(%(#{indent}"#{dep_coll}" → #{status_word} → index_collection))
|
|
177
181
|
SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
|
|
178
182
|
dep_klass.index_collection(client: client)
|
|
179
183
|
end
|
|
180
184
|
else
|
|
181
|
-
|
|
185
|
+
SearchEngine::Logging::Output.puts(
|
|
186
|
+
SearchEngine::Logging::Color.dim(%(#{indent}"#{dep_coll}" → present (skip)))
|
|
187
|
+
)
|
|
182
188
|
end
|
|
183
189
|
when 'index'
|
|
184
190
|
if missing || drift
|
|
185
191
|
reason = missing ? 'missing' : 'drift'
|
|
186
192
|
status_word = SearchEngine::Logging::Color.apply("index (#{reason})", :yellow)
|
|
187
|
-
puts(%(#{indent}"#{dep_coll}" → #{status_word} → index_collection))
|
|
188
|
-
# Avoid nested preflight to prevent redundant recursion cycles
|
|
193
|
+
SearchEngine::Logging::Output.puts(%(#{indent}"#{dep_coll}" → #{status_word} → index_collection))
|
|
189
194
|
SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
|
|
190
195
|
dep_klass.index_collection(client: client)
|
|
191
196
|
end
|
|
192
197
|
else
|
|
193
|
-
|
|
198
|
+
SearchEngine::Logging::Output.puts(
|
|
199
|
+
SearchEngine::Logging::Color.dim(%(#{indent}"#{dep_coll}" → in_sync (skip)))
|
|
200
|
+
)
|
|
194
201
|
end
|
|
195
202
|
else
|
|
196
|
-
|
|
203
|
+
SearchEngine::Logging::Output.puts(
|
|
204
|
+
SearchEngine::Logging::Color.dim(%(#{indent}"#{dep_coll}" → skipped (unknown mode: #{mode})))
|
|
205
|
+
)
|
|
197
206
|
end
|
|
198
207
|
end
|
|
199
208
|
|
|
@@ -201,7 +210,9 @@ module SearchEngine
|
|
|
201
210
|
return unless batches.is_a?(Array)
|
|
202
211
|
|
|
203
212
|
batches.each_with_index do |batch_stats, idx|
|
|
204
|
-
|
|
213
|
+
SearchEngine::Logging::Output.puts(
|
|
214
|
+
SearchEngine::Logging::BatchLine.format(batch_stats, idx + 1, indifferent: true)
|
|
215
|
+
)
|
|
205
216
|
end
|
|
206
217
|
end
|
|
207
218
|
|
|
@@ -285,7 +296,8 @@ module SearchEngine
|
|
|
285
296
|
docs_estimate = __se_heuristic_docs_estimate(1)
|
|
286
297
|
renderer = SearchEngine::Logging::LiveRenderer.new(
|
|
287
298
|
labels: ['single'], partitions: [nil],
|
|
288
|
-
per_partition_docs_estimates: [docs_estimate]
|
|
299
|
+
per_partition_docs_estimates: [docs_estimate],
|
|
300
|
+
io: SearchEngine::Logging::Output.io
|
|
289
301
|
)
|
|
290
302
|
renderer.start
|
|
291
303
|
|
|
@@ -348,7 +360,9 @@ module SearchEngine
|
|
|
348
360
|
def __se_index_partitions_seq!(parts, into, compiled)
|
|
349
361
|
docs_estimates = __se_per_partition_docs_estimates(parts, compiled)
|
|
350
362
|
renderer = SearchEngine::Logging::LiveRenderer.new(
|
|
351
|
-
labels: parts.map(&:inspect), partitions: parts,
|
|
363
|
+
labels: parts.map(&:inspect), partitions: parts,
|
|
364
|
+
per_partition_docs_estimates: docs_estimates,
|
|
365
|
+
io: SearchEngine::Logging::Output.io
|
|
352
366
|
)
|
|
353
367
|
renderer.start
|
|
354
368
|
|
|
@@ -387,7 +401,9 @@ module SearchEngine
|
|
|
387
401
|
|
|
388
402
|
docs_estimates = __se_per_partition_docs_estimates(parts, compiled)
|
|
389
403
|
renderer = SearchEngine::Logging::LiveRenderer.new(
|
|
390
|
-
labels: parts.map(&:inspect), partitions: parts,
|
|
404
|
+
labels: parts.map(&:inspect), partitions: parts,
|
|
405
|
+
per_partition_docs_estimates: docs_estimates,
|
|
406
|
+
io: SearchEngine::Logging::Output.io
|
|
391
407
|
)
|
|
392
408
|
renderer.start
|
|
393
409
|
|
data/lib/search_engine/bulk.rb
CHANGED
|
@@ -18,9 +18,10 @@ module SearchEngine
|
|
|
18
18
|
# When no targets are provided, all declared/registered collections are indexed
|
|
19
19
|
# (models are eagerly loaded from the configured `search_engine_models` path).
|
|
20
20
|
# @param targets [Array<Symbol, String, Class>] collections or model classes
|
|
21
|
+
# @param silent [Boolean] suppress progress output to stdout (errors still go to stderr)
|
|
21
22
|
# @return [Hash] summary (includes :failed_collections_total for unresolved targets)
|
|
22
|
-
def index_collections(*targets, client: nil)
|
|
23
|
-
run!(mode: :index, targets: targets, client: client)
|
|
23
|
+
def index_collections(*targets, client: nil, silent: false)
|
|
24
|
+
run!(mode: :index, targets: targets, client: client, silent: silent)
|
|
24
25
|
end
|
|
25
26
|
|
|
26
27
|
# Index all registered/declared collections.
|
|
@@ -30,20 +31,22 @@ module SearchEngine
|
|
|
30
31
|
# and runs indexing as if they were passed to {.index_collections}.
|
|
31
32
|
#
|
|
32
33
|
# @param client [SearchEngine::Client, nil]
|
|
34
|
+
# @param silent [Boolean] suppress progress output to stdout (errors still go to stderr)
|
|
33
35
|
# @return [Hash] summary (includes :failed_collections_total for unresolved targets)
|
|
34
|
-
def index_all(client: nil)
|
|
36
|
+
def index_all(client: nil, silent: false)
|
|
35
37
|
ensure_models_loaded_from_configured_path!
|
|
36
38
|
names = SearchEngine::CollectionResolver.models_map.keys
|
|
37
|
-
run!(mode: :index, targets: names, client: client)
|
|
39
|
+
run!(mode: :index, targets: names, client: client, silent: silent)
|
|
38
40
|
end
|
|
39
41
|
|
|
40
42
|
# Drop+index (destructive), mirroring {SearchEngine::Base.reindex_collection!}.
|
|
41
43
|
# When no targets are provided, all declared/registered collections are reindexed
|
|
42
44
|
# (models are eagerly loaded from the configured `search_engine_models` path).
|
|
43
45
|
# @param targets [Array<Symbol, String, Class>] collections or model classes
|
|
46
|
+
# @param silent [Boolean] suppress progress output to stdout (errors still go to stderr)
|
|
44
47
|
# @return [Hash] summary (includes :failed_collections_total for unresolved targets)
|
|
45
|
-
def reindex_collections!(*targets, client: nil)
|
|
46
|
-
run!(mode: :reindex, targets: targets, client: client)
|
|
48
|
+
def reindex_collections!(*targets, client: nil, silent: false)
|
|
49
|
+
run!(mode: :reindex, targets: targets, client: client, silent: silent)
|
|
47
50
|
end
|
|
48
51
|
|
|
49
52
|
# Reindex all registered/declared collections.
|
|
@@ -53,11 +56,12 @@ module SearchEngine
|
|
|
53
56
|
# and runs reindexing as if they were passed to {.reindex_collections!}.
|
|
54
57
|
#
|
|
55
58
|
# @param client [SearchEngine::Client, nil]
|
|
59
|
+
# @param silent [Boolean] suppress progress output to stdout (errors still go to stderr)
|
|
56
60
|
# @return [Hash] summary (includes :failed_collections_total for unresolved targets)
|
|
57
|
-
def reindex_all!(client: nil)
|
|
61
|
+
def reindex_all!(client: nil, silent: false)
|
|
58
62
|
ensure_models_loaded_from_configured_path!
|
|
59
63
|
names = SearchEngine::CollectionResolver.models_map.keys
|
|
60
|
-
run!(mode: :reindex, targets: names, client: client)
|
|
64
|
+
run!(mode: :reindex, targets: names, client: client, silent: silent)
|
|
61
65
|
end
|
|
62
66
|
|
|
63
67
|
# Drop orphaned physical collections across all logical collections.
|
|
@@ -74,8 +78,9 @@ module SearchEngine
|
|
|
74
78
|
# @param mode [Symbol] :index | :reindex
|
|
75
79
|
# @param targets [Array]
|
|
76
80
|
# @param client [SearchEngine::Client, nil]
|
|
81
|
+
# @param silent [Boolean]
|
|
77
82
|
# @return [Hash]
|
|
78
|
-
def run!(mode:, targets:, client: nil)
|
|
83
|
+
def run!(mode:, targets:, client: nil, silent: false)
|
|
79
84
|
raise ArgumentError, 'mode must be :index or :reindex' unless %i[index reindex].include?(mode.to_sym)
|
|
80
85
|
|
|
81
86
|
ts_client = client || SearchEngine.client
|
|
@@ -119,7 +124,9 @@ module SearchEngine
|
|
|
119
124
|
collection_results = []
|
|
120
125
|
failed_collections_total = 0
|
|
121
126
|
|
|
122
|
-
|
|
127
|
+
ctx = { bulk: true, bulk_suppress_cascade: true, bulk_mode: mode.to_sym }
|
|
128
|
+
ctx[:bulk_silent] = true if silent
|
|
129
|
+
SearchEngine::Instrumentation.with_context(ctx) do
|
|
123
130
|
SearchEngine::Instrumentation.instrument('search_engine.bulk.run', payload.merge(stats)) do |ctx|
|
|
124
131
|
run_stage!(mode, stage1_list, :input, collection_results)
|
|
125
132
|
run_stage!(mode, cascade_order, :cascade, collection_results)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'search_engine/logging/output'
|
|
4
|
+
|
|
3
5
|
module SearchEngine
|
|
4
6
|
# Cascade reindexing for collections that reference other collections via
|
|
5
7
|
# Typesense field-level references.
|
|
@@ -18,7 +20,7 @@ module SearchEngine
|
|
|
18
20
|
# @param context [Symbol] :update or :full
|
|
19
21
|
# @param client [SearchEngine::Client, nil]
|
|
20
22
|
# @return [Hash]
|
|
21
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting
|
|
23
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting, Metrics/PerceivedComplexity, Metrics/BlockLength
|
|
22
24
|
def cascade_reindex!(source:, ids:, context: :update, client: nil)
|
|
23
25
|
raise ArgumentError, 'context must be :update or :full' unless %i[update full].include?(context.to_sym)
|
|
24
26
|
|
|
@@ -28,6 +30,7 @@ module SearchEngine
|
|
|
28
30
|
|
|
29
31
|
graph = build_reverse_graph(client: ts_client)
|
|
30
32
|
referencers = Array(graph[src_collection])
|
|
33
|
+
target_collections = referencers.map { |edge| edge[:referrer] }.uniq
|
|
31
34
|
|
|
32
35
|
# Detect immediate cycles (A <-> B) and skip those pairs
|
|
33
36
|
cycle_pairs = detect_immediate_cycles(graph)
|
|
@@ -43,7 +46,7 @@ module SearchEngine
|
|
|
43
46
|
alias_cache: alias_cache
|
|
44
47
|
)
|
|
45
48
|
|
|
46
|
-
|
|
49
|
+
outcomes_by_collection = {}
|
|
47
50
|
partial_count = 0
|
|
48
51
|
full_count = 0
|
|
49
52
|
skipped_unregistered = 0
|
|
@@ -57,18 +60,19 @@ module SearchEngine
|
|
|
57
60
|
# Skip cycle pairs deterministically (avoid ping-pong)
|
|
58
61
|
if cycle_pairs.include?([src_collection, referrer_coll])
|
|
59
62
|
skipped_cycles << { pair: [src_collection, referrer_coll] }
|
|
60
|
-
|
|
63
|
+
merge_cascade_outcome!(outcomes_by_collection, { collection: referrer_coll, mode: :skipped_cycle })
|
|
61
64
|
next
|
|
62
65
|
end
|
|
63
66
|
|
|
64
67
|
ref_klass = safe_collection_class(referrer_coll)
|
|
65
68
|
unless ref_klass
|
|
66
69
|
skipped_unregistered += 1
|
|
67
|
-
|
|
70
|
+
merge_cascade_outcome!(outcomes_by_collection, { collection: referrer_coll, mode: :skipped_unregistered })
|
|
68
71
|
next
|
|
69
72
|
end
|
|
70
73
|
|
|
71
74
|
mode = :full
|
|
75
|
+
partial_failure = nil
|
|
72
76
|
if context.to_sym == :update && can_partial_reindex?(ref_klass)
|
|
73
77
|
begin
|
|
74
78
|
SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: { local_key.to_sym => Array(ids) },
|
|
@@ -90,9 +94,8 @@ into: nil
|
|
|
90
94
|
mode = :skipped_no_partitions
|
|
91
95
|
end
|
|
92
96
|
end
|
|
93
|
-
#
|
|
94
|
-
|
|
95
|
-
message: error.message.to_s[0, 200] }
|
|
97
|
+
# Preserve the partial failure details on the final single outcome row.
|
|
98
|
+
partial_failure = { error_class: error.class.name, message: error.message.to_s[0, 200] }
|
|
96
99
|
end
|
|
97
100
|
elsif seen_full[referrer_coll]
|
|
98
101
|
mode = :skipped_duplicate
|
|
@@ -107,14 +110,17 @@ into: nil
|
|
|
107
110
|
end
|
|
108
111
|
end
|
|
109
112
|
|
|
110
|
-
|
|
113
|
+
outcome = { collection: referrer_coll, mode: mode }
|
|
114
|
+
outcome.merge!(partial_failure) if partial_failure
|
|
115
|
+
merge_cascade_outcome!(outcomes_by_collection, outcome)
|
|
111
116
|
end
|
|
117
|
+
outcomes = outcomes_by_collection.values
|
|
112
118
|
|
|
113
119
|
payload = {
|
|
114
120
|
source_collection: src_collection,
|
|
115
121
|
ids_count: Array(ids).size,
|
|
116
122
|
context: context.to_sym,
|
|
117
|
-
targets_total:
|
|
123
|
+
targets_total: target_collections.size,
|
|
118
124
|
partial_count: partial_count,
|
|
119
125
|
full_count: full_count,
|
|
120
126
|
skipped_unregistered: skipped_unregistered,
|
|
@@ -124,7 +130,7 @@ into: nil
|
|
|
124
130
|
|
|
125
131
|
payload.merge(outcomes: outcomes)
|
|
126
132
|
end
|
|
127
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting
|
|
133
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockNesting, Metrics/PerceivedComplexity, Metrics/BlockLength
|
|
128
134
|
|
|
129
135
|
# Build a reverse graph from Typesense live schemas when possible, falling
|
|
130
136
|
# back to compiled local schemas for registered models.
|
|
@@ -188,12 +194,16 @@ into: nil
|
|
|
188
194
|
parts = parts.reject { |p| p.nil? || p.to_s.strip.empty? }
|
|
189
195
|
|
|
190
196
|
if parts.empty?
|
|
191
|
-
|
|
197
|
+
SearchEngine::Logging::Output.puts(
|
|
198
|
+
SearchEngine::Logging::Color.dim(%( Referencer "#{coll_display}" — partitions=0 → skip))
|
|
199
|
+
)
|
|
192
200
|
return false
|
|
193
201
|
end
|
|
194
202
|
|
|
195
203
|
parts_str = SearchEngine::Logging::Color.bold("partitions=#{parts.size}")
|
|
196
|
-
puts(
|
|
204
|
+
SearchEngine::Logging::Output.puts(
|
|
205
|
+
%( Referencer "#{coll_display}" — #{parts_str} parallel=#{compiled.max_parallel})
|
|
206
|
+
)
|
|
197
207
|
mp = compiled.max_parallel.to_i
|
|
198
208
|
if mp > 1 && parts.size > 1
|
|
199
209
|
require 'concurrent-ruby'
|
|
@@ -210,7 +220,9 @@ into: nil
|
|
|
210
220
|
end
|
|
211
221
|
|
|
212
222
|
else
|
|
213
|
-
|
|
223
|
+
SearchEngine::Logging::Output.puts(
|
|
224
|
+
%( Referencer "#{coll_display}" — #{SearchEngine::Logging::Color.bold('single')})
|
|
225
|
+
)
|
|
214
226
|
SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: nil, into: nil)
|
|
215
227
|
executed = true
|
|
216
228
|
end
|
|
@@ -332,7 +344,7 @@ into: nil
|
|
|
332
344
|
coll_display = physical && physical != logical ? "#{logical} (physical: #{physical})" : logical
|
|
333
345
|
action = force_rebuild ? 'force_rebuild index_collection' : 'index_collection'
|
|
334
346
|
status_word = SearchEngine::Logging::Color.apply("schema rebuild required, running #{action}", :yellow)
|
|
335
|
-
puts(%( Referencer "#{coll_display}" — #{status_word}))
|
|
347
|
+
SearchEngine::Logging::Output.puts(%( Referencer "#{coll_display}" — #{status_word}))
|
|
336
348
|
|
|
337
349
|
SearchEngine::Instrumentation.with_context(bulk_suppress_cascade: true) do
|
|
338
350
|
ref_klass.index_collection(client: client, pre: :ensure, force_rebuild: force_rebuild)
|
|
@@ -340,7 +352,7 @@ into: nil
|
|
|
340
352
|
true
|
|
341
353
|
rescue StandardError => error
|
|
342
354
|
err_line = %( Referencer "#{logical}" — schema rebuild failed: #{error.message})
|
|
343
|
-
|
|
355
|
+
warn(SearchEngine::Logging::Color.apply(err_line, :red))
|
|
344
356
|
false
|
|
345
357
|
end
|
|
346
358
|
|
|
@@ -349,7 +361,9 @@ into: nil
|
|
|
349
361
|
pool.post do
|
|
350
362
|
SearchEngine::Instrumentation.with_context(ctx) do
|
|
351
363
|
summary = SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: p, into: nil)
|
|
352
|
-
mtx.synchronize
|
|
364
|
+
mtx.synchronize do
|
|
365
|
+
SearchEngine::Logging::Output.puts(SearchEngine::Logging::PartitionProgress.line(p, summary))
|
|
366
|
+
end
|
|
353
367
|
end
|
|
354
368
|
end
|
|
355
369
|
end
|
|
@@ -359,7 +373,7 @@ into: nil
|
|
|
359
373
|
executed = false
|
|
360
374
|
parts.each do |p|
|
|
361
375
|
summary = SearchEngine::Indexer.rebuild_partition!(ref_klass, partition: p, into: nil)
|
|
362
|
-
puts(SearchEngine::Logging::PartitionProgress.line(p, summary))
|
|
376
|
+
SearchEngine::Logging::Output.puts(SearchEngine::Logging::PartitionProgress.line(p, summary))
|
|
363
377
|
executed = true
|
|
364
378
|
end
|
|
365
379
|
executed
|
|
@@ -469,6 +483,42 @@ foreign_key: fk }
|
|
|
469
483
|
pairs.uniq
|
|
470
484
|
end
|
|
471
485
|
|
|
486
|
+
def merge_cascade_outcome!(outcomes_by_collection, outcome)
|
|
487
|
+
key = outcome[:collection]
|
|
488
|
+
existing = outcomes_by_collection[key]
|
|
489
|
+
if existing.nil?
|
|
490
|
+
outcomes_by_collection[key] = outcome
|
|
491
|
+
return
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
winner = if cascade_outcome_priority(outcome[:mode]) > cascade_outcome_priority(existing[:mode])
|
|
495
|
+
outcome.dup
|
|
496
|
+
else
|
|
497
|
+
existing.dup
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
error_class = existing[:error_class] || outcome[:error_class]
|
|
501
|
+
message = existing[:message] || outcome[:message]
|
|
502
|
+
if error_class
|
|
503
|
+
winner[:error_class] = error_class
|
|
504
|
+
winner[:message] = message
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
outcomes_by_collection[key] = winner
|
|
508
|
+
end
|
|
509
|
+
|
|
510
|
+
def cascade_outcome_priority(mode)
|
|
511
|
+
case mode.to_sym
|
|
512
|
+
when :full then 60
|
|
513
|
+
when :partial then 50
|
|
514
|
+
when :skipped_no_partitions then 40
|
|
515
|
+
when :skipped_duplicate then 30
|
|
516
|
+
when :skipped_cycle then 20
|
|
517
|
+
when :skipped_unregistered then 10
|
|
518
|
+
else 0
|
|
519
|
+
end
|
|
520
|
+
end
|
|
521
|
+
|
|
472
522
|
def safe_collection_class(name)
|
|
473
523
|
SearchEngine::CollectionResolver.model_for_logical(name)
|
|
474
524
|
end
|
|
@@ -166,7 +166,10 @@ module SearchEngine
|
|
|
166
166
|
started = monotonic_ms
|
|
167
167
|
client = client_with_overrides
|
|
168
168
|
health = client.health
|
|
169
|
-
|
|
169
|
+
ok_value = if health.is_a?(Hash)
|
|
170
|
+
health.key?(:ok) ? health[:ok] : health['ok']
|
|
171
|
+
end
|
|
172
|
+
ok = (ok_value == true)
|
|
170
173
|
details = { response: redacted_value(health) }
|
|
171
174
|
hint = ok ? nil : 'Verify host/port/protocol and network reachability to Typesense.'
|
|
172
175
|
|
|
@@ -66,7 +66,9 @@ module SearchEngine
|
|
|
66
66
|
# @param joins [Array<Symbol>, nil]
|
|
67
67
|
# @return [Array<SearchEngine::AST::Node>]
|
|
68
68
|
def parse_list(list, klass:, joins: nil)
|
|
69
|
-
|
|
69
|
+
# Preserve input shape so template bind arrays stay intact:
|
|
70
|
+
# parse_list(['brand_id IN ?', [1, 2]]) should keep [1, 2] as one bind arg.
|
|
71
|
+
items = Array(list).compact
|
|
70
72
|
return [] if items.empty?
|
|
71
73
|
|
|
72
74
|
nodes = []
|
|
@@ -92,7 +94,7 @@ module SearchEngine
|
|
|
92
94
|
nodes << parse_raw(entry.to_s)
|
|
93
95
|
i += 1
|
|
94
96
|
when Array
|
|
95
|
-
nodes
|
|
97
|
+
nodes.concat(Array(parse_array_entry(entry, klass: klass, joins: joins)))
|
|
96
98
|
i += 1
|
|
97
99
|
else
|
|
98
100
|
raise ArgumentError, "Parser: unsupported where argument #{entry.class}"
|
|
@@ -104,7 +106,19 @@ module SearchEngine
|
|
|
104
106
|
# --- Internals -------------------------------------------------------
|
|
105
107
|
|
|
106
108
|
def parse_array_entry(entry, klass:, joins: nil)
|
|
107
|
-
|
|
109
|
+
raise ArgumentError, 'Unsupported argument type: [] (Array)' if entry.empty?
|
|
110
|
+
|
|
111
|
+
first = entry.first
|
|
112
|
+
unless first.is_a?(String)
|
|
113
|
+
# Support wrapped nested where-lists while preserving template bind arrays:
|
|
114
|
+
# where([['a > ?', 1], ['b < ?', 2]]) should parse as two predicates.
|
|
115
|
+
if first.is_a?(Array) || first.is_a?(Hash) || first.is_a?(Symbol)
|
|
116
|
+
return parse_list(entry, klass: klass, joins: joins)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
return parse_raw(entry.to_s)
|
|
120
|
+
end
|
|
121
|
+
|
|
108
122
|
return parse_list(entry, klass: klass, joins: joins) unless placeholders?(entry.first)
|
|
109
123
|
|
|
110
124
|
template = entry.first
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'search_engine/logging/color'
|
|
4
4
|
require 'search_engine/logging/batch_line'
|
|
5
|
+
require 'search_engine/logging/output'
|
|
5
6
|
|
|
6
7
|
module SearchEngine
|
|
7
8
|
class Indexer
|
|
@@ -151,7 +152,11 @@ module SearchEngine
|
|
|
151
152
|
shared_state[:on_batch] = on_batch
|
|
152
153
|
producer_error = nil
|
|
153
154
|
|
|
154
|
-
|
|
155
|
+
if log_batches
|
|
156
|
+
SearchEngine::Logging::Output.puts(
|
|
157
|
+
SearchEngine::Logging::Color.dim(' Starting parallel batch processing...')
|
|
158
|
+
)
|
|
159
|
+
end
|
|
155
160
|
started_at = monotonic_ms
|
|
156
161
|
|
|
157
162
|
producer_thread = start_producer_thread(
|
|
@@ -234,7 +239,7 @@ module SearchEngine
|
|
|
234
239
|
else
|
|
235
240
|
" Processed #{batch_count} batches... (#{elapsed}ms)"
|
|
236
241
|
end
|
|
237
|
-
puts(SearchEngine::Logging::Color.dim(progress))
|
|
242
|
+
SearchEngine::Logging::Output.puts(SearchEngine::Logging::Color.dim(progress))
|
|
238
243
|
end
|
|
239
244
|
rescue StandardError => error
|
|
240
245
|
yield error if block_given?
|
|
@@ -680,7 +685,7 @@ module SearchEngine
|
|
|
680
685
|
end
|
|
681
686
|
|
|
682
687
|
def log_batch(stats, batch_number)
|
|
683
|
-
puts(SearchEngine::Logging::BatchLine.format(stats, batch_number))
|
|
688
|
+
SearchEngine::Logging::Output.puts(SearchEngine::Logging::BatchLine.format(stats, batch_number))
|
|
684
689
|
end
|
|
685
690
|
end
|
|
686
691
|
end
|
|
@@ -103,8 +103,12 @@ module SearchEngine
|
|
|
103
103
|
rng = (Thread.current[:__se_retry_rng__] ||= Random.new)
|
|
104
104
|
min = range.begin.to_f
|
|
105
105
|
max = range.end.to_f
|
|
106
|
-
#
|
|
106
|
+
# Honor exclusive-end ranges by shifting the upper bound down one
|
|
107
|
+
# representable float step. Inclusive ranges preserve the original end.
|
|
108
|
+
max = max.prev_float if range.exclude_end?
|
|
107
109
|
span = max - min
|
|
110
|
+
return min if span <= 0.0
|
|
111
|
+
|
|
108
112
|
min + (rng.rand * span)
|
|
109
113
|
end
|
|
110
114
|
end
|
|
@@ -83,7 +83,7 @@ module SearchEngine
|
|
|
83
83
|
enum: docs_enum,
|
|
84
84
|
batch_size: nil,
|
|
85
85
|
action: :upsert,
|
|
86
|
-
log_batches: partition.nil? && on_batch.nil?,
|
|
86
|
+
log_batches: !SearchEngine::Instrumentation.context[:bulk_silent] && partition.nil? && on_batch.nil?,
|
|
87
87
|
max_parallel: max_parallel,
|
|
88
88
|
on_batch: on_batch
|
|
89
89
|
)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
module Logging
|
|
5
|
+
# Thread-safe output routing for indexation progress logging.
|
|
6
|
+
#
|
|
7
|
+
# When the instrumentation context includes `bulk_silent: true`,
|
|
8
|
+
# all output is routed to {File::NULL}. Otherwise, output goes to `$stdout`.
|
|
9
|
+
# Error output via `warn()` is unaffected — it always reaches `$stderr`.
|
|
10
|
+
#
|
|
11
|
+
# @since M10
|
|
12
|
+
module Output
|
|
13
|
+
NULL_IO = File.open(File::NULL, 'w')
|
|
14
|
+
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
# @return [Boolean] true when the current thread context indicates silent mode
|
|
18
|
+
def silent?
|
|
19
|
+
SearchEngine::Instrumentation.context[:bulk_silent] == true
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# @return [IO] the appropriate output stream for progress logging
|
|
23
|
+
def io
|
|
24
|
+
silent? ? NULL_IO : $stdout
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Write a line to the progress output (suppressed in silent mode).
|
|
28
|
+
# @param args [Array] arguments forwarded to `IO#puts`
|
|
29
|
+
# @return [nil]
|
|
30
|
+
def puts(*args)
|
|
31
|
+
io.puts(*args)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -17,6 +17,7 @@ module SearchEngine
|
|
|
17
17
|
# @param args [Array<Object>]
|
|
18
18
|
# @return [SearchEngine::Relation]
|
|
19
19
|
def not(*args)
|
|
20
|
+
@relation.send(:validate_not_args!, args)
|
|
20
21
|
nodes = Array(@relation.send(:build_ast_with_empty_array_rewrites, args, negated: true))
|
|
21
22
|
|
|
22
23
|
# Invert non-hidden predicates (Eq, In) returned by the builder
|
|
@@ -44,6 +45,7 @@ module SearchEngine
|
|
|
44
45
|
# @return [SearchEngine::Relation, WhereChain]
|
|
45
46
|
def where(*args)
|
|
46
47
|
return self if args.nil? || args.empty?
|
|
48
|
+
return self if blank_where_args?(args)
|
|
47
49
|
|
|
48
50
|
ast_nodes = build_ast_with_empty_array_rewrites(args, negated: false)
|
|
49
51
|
fragments = normalize_where(args)
|
|
@@ -58,6 +60,7 @@ module SearchEngine
|
|
|
58
60
|
# @param args [Array<Object>]
|
|
59
61
|
# @return [SearchEngine::Relation]
|
|
60
62
|
def not(*args)
|
|
63
|
+
validate_not_args!(args)
|
|
61
64
|
nodes = Array(build_ast_with_empty_array_rewrites(args, negated: true))
|
|
62
65
|
|
|
63
66
|
negated = nodes.map do |node|
|
|
@@ -122,6 +125,30 @@ module SearchEngine
|
|
|
122
125
|
|
|
123
126
|
private
|
|
124
127
|
|
|
128
|
+
def blank_where_args?(args)
|
|
129
|
+
return false unless args.length == 1
|
|
130
|
+
|
|
131
|
+
blank_where_arg?(args.first)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def blank_where_arg?(arg)
|
|
135
|
+
return true if arg.nil?
|
|
136
|
+
return true if arg.respond_to?(:empty?) && arg.empty?
|
|
137
|
+
|
|
138
|
+
arg.is_a?(String) && arg.strip.empty?
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def validate_not_args!(args)
|
|
142
|
+
raise ArgumentError, 'wrong number of arguments (given 0, expected 1+)' if args.nil? || args.empty?
|
|
143
|
+
|
|
144
|
+
return unless args.length == 1
|
|
145
|
+
|
|
146
|
+
value = args.first
|
|
147
|
+
return unless value.nil? || (value.is_a?(Array) && value.empty?)
|
|
148
|
+
|
|
149
|
+
raise ArgumentError, 'Unsupported argument type: (NilClass)'
|
|
150
|
+
end
|
|
151
|
+
|
|
125
152
|
def merge_relation(other, assoc: nil)
|
|
126
153
|
return self if other.nil?
|
|
127
154
|
|
|
@@ -227,7 +254,9 @@ module SearchEngine
|
|
|
227
254
|
# - nil predicates to hidden *_blank flags when `optional` is enabled (new behavior)
|
|
228
255
|
# Delegates other inputs to the DSL parser.
|
|
229
256
|
def build_ast_with_empty_array_rewrites(args, negated: false)
|
|
230
|
-
|
|
257
|
+
# Preserve input shape so template bind arrays stay intact:
|
|
258
|
+
# where('field IN ?', [1, 2]) => ['field IN ?', [1, 2]]
|
|
259
|
+
items = Array(args).compact
|
|
231
260
|
return [] if items.empty?
|
|
232
261
|
|
|
233
262
|
out_nodes = []
|
|
@@ -560,7 +589,9 @@ module SearchEngine
|
|
|
560
589
|
|
|
561
590
|
# Normalize where arguments into an array of string fragments safe for Typesense.
|
|
562
591
|
def normalize_where(args)
|
|
563
|
-
|
|
592
|
+
# Preserve input shape so template bind arrays stay intact:
|
|
593
|
+
# where('field IN ?', [1, 2]) => ['field IN ?', [1, 2]]
|
|
594
|
+
list = Array(args).compact
|
|
564
595
|
return [] if list.empty?
|
|
565
596
|
|
|
566
597
|
fragments = []
|
data/lib/search_engine/schema.rb
CHANGED
|
@@ -436,8 +436,13 @@ module SearchEngine
|
|
|
436
436
|
existing = list_physicals_starting_with(prefix, client: client)
|
|
437
437
|
used_sequences = existing.map { |name| name.split('_').last.to_i }
|
|
438
438
|
|
|
439
|
-
seq = 1
|
|
440
|
-
|
|
439
|
+
seq = (1..999).find { |candidate| !used_sequences.include?(candidate) }
|
|
440
|
+
unless seq
|
|
441
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
442
|
+
"Schema physical name sequence space exhausted for prefix '#{prefix}' (001..999 all occupied). " \
|
|
443
|
+
'Wait one second and retry.'
|
|
444
|
+
end
|
|
445
|
+
|
|
441
446
|
format('%<prefix>s%<seq>03d', prefix: prefix, seq: seq)
|
|
442
447
|
end
|
|
443
448
|
|
|
@@ -4,8 +4,13 @@ module SearchEngine
|
|
|
4
4
|
module Sources
|
|
5
5
|
# Adapter that delegates batch enumeration to a provided callable.
|
|
6
6
|
#
|
|
7
|
-
# The callable is expected to implement `call(cursor:, partition:)` and
|
|
8
|
-
#
|
|
7
|
+
# The callable is expected to implement `call(cursor:, partition:)` and use one mode:
|
|
8
|
+
# - return mode: return an Enumerable of batches
|
|
9
|
+
# - yield mode: yield batches via the provided block argument
|
|
10
|
+
#
|
|
11
|
+
# Returning batch-like data (Array/Enumerator) while also yielding is treated as
|
|
12
|
+
# an ambiguous mixed mode and raises an error.
|
|
13
|
+
# Shapes are application-defined.
|
|
9
14
|
#
|
|
10
15
|
# @example
|
|
11
16
|
# src = SearchEngine::Sources::LambdaSource.new(->(cursor:, partition:) { [[row1, row2]] })
|
|
@@ -32,16 +37,32 @@ module SearchEngine
|
|
|
32
37
|
return enum_for(:each_batch, partition: partition, cursor: cursor) unless block_given?
|
|
33
38
|
|
|
34
39
|
started = monotonic_ms
|
|
40
|
+
consume_rows = lambda do |rows|
|
|
41
|
+
duration = monotonic_ms - started
|
|
42
|
+
instrument_batch_fetched(source: 'lambda', batch_index: nil, rows_count: Array(rows).size,
|
|
43
|
+
duration_ms: duration, partition: partition, cursor: cursor,
|
|
44
|
+
adapter_options: { callable: @callable.class.name }
|
|
45
|
+
)
|
|
46
|
+
yield(rows)
|
|
47
|
+
started = monotonic_ms
|
|
48
|
+
nil
|
|
49
|
+
end
|
|
50
|
+
|
|
35
51
|
begin
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
52
|
+
yielded = false
|
|
53
|
+
returned = @callable.call(cursor: cursor, partition: partition) do |rows|
|
|
54
|
+
yielded = true
|
|
55
|
+
consume_rows.call(rows)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
if yielded
|
|
59
|
+
if mixed_mode_batch_return?(returned)
|
|
60
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
61
|
+
'lambda source callable must either yield batches or return an Enumerable of batches, not both'
|
|
62
|
+
end
|
|
63
|
+
else
|
|
64
|
+
to_iterate = returned.respond_to?(:each) ? returned : Array(returned)
|
|
65
|
+
to_iterate.each { |rows| consume_rows.call(rows) }
|
|
45
66
|
end
|
|
46
67
|
rescue StandardError => error
|
|
47
68
|
instrument_error(source: 'lambda', error: error, partition: partition, cursor: cursor,
|
|
@@ -50,6 +71,12 @@ module SearchEngine
|
|
|
50
71
|
raise
|
|
51
72
|
end
|
|
52
73
|
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
def mixed_mode_batch_return?(returned)
|
|
78
|
+
returned.is_a?(Array) || returned.is_a?(Enumerator)
|
|
79
|
+
end
|
|
53
80
|
end
|
|
54
81
|
end
|
|
55
82
|
end
|
|
@@ -12,6 +12,11 @@ module SearchEngine
|
|
|
12
12
|
# Enumerator.new { |y| external_api.each_page(cursor) { |rows| y << rows } }
|
|
13
13
|
# end
|
|
14
14
|
#
|
|
15
|
+
# Lambda source contract:
|
|
16
|
+
# - return mode: return an Enumerable of batches
|
|
17
|
+
# - yield mode: call the provided block with each batch
|
|
18
|
+
# Returning batch-like values while also yielding is treated as mixed mode and raises.
|
|
19
|
+
#
|
|
15
20
|
# All adapters implement `each_batch(partition:, cursor:)` and return an Enumerator
|
|
16
21
|
# when no block is provided.
|
|
17
22
|
module Sources
|
|
@@ -19,7 +24,7 @@ module SearchEngine
|
|
|
19
24
|
#
|
|
20
25
|
# @param type [Symbol] :active_record, :sql, or :lambda
|
|
21
26
|
# @param options [Hash] adapter-specific options
|
|
22
|
-
# @yield for :lambda sources,
|
|
27
|
+
# @yield for :lambda sources, block taking (cursor:, partition:) and either yielding or returning batches
|
|
23
28
|
# @return [Object] adapter responding to `each_batch(partition:, cursor:)`
|
|
24
29
|
def self.build(type, **options, &block)
|
|
25
30
|
case type.to_sym
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: search-engine-for-typesense
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 30.1.6.
|
|
4
|
+
version: 30.1.6.18
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Nikita Shkoda
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: concurrent-ruby
|
|
@@ -159,6 +159,7 @@ files:
|
|
|
159
159
|
- lib/search_engine/logging/cursor_guard.rb
|
|
160
160
|
- lib/search_engine/logging/format_helpers.rb
|
|
161
161
|
- lib/search_engine/logging/live_renderer.rb
|
|
162
|
+
- lib/search_engine/logging/output.rb
|
|
162
163
|
- lib/search_engine/logging/partition_progress.rb
|
|
163
164
|
- lib/search_engine/logging/spinner.rb
|
|
164
165
|
- lib/search_engine/logging/step_line.rb
|