search-engine-for-typesense 30.1.6.6 → 30.1.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +19 -4
- data/lib/search_engine/base/index_maintenance.rb +65 -13
- data/lib/search_engine/config.rb +4 -0
- data/lib/search_engine/errors.rb +7 -0
- data/lib/search_engine/indexer/bulk_import.rb +72 -34
- data/lib/search_engine/interruptible_pool.rb +14 -5
- data/lib/search_engine/logging/live_renderer.rb +20 -11
- data/lib/search_engine/partitioner.rb +42 -0
- data/lib/search_engine/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ed93c9aa019a0f633ad5c7cc47ec08ea303f5e51491cfc3130e677271b000e1f
|
|
4
|
+
data.tar.gz: 6446746c790fc6b1c0fa084a5ba9bebd555f5e298738f7b8800fd2717257659f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 99aa8f0be6a9b43d0de16e402fe22cb60614861c73f26f122f0c155e3d35de33053a7822170c440bb407bcac73d7a5ba42f57ba0eda825a49fa210e369ba7ea3
|
|
7
|
+
data.tar.gz: a6526cc8752cdc5a8f61c03a55519e5cf18da1ac538e1028a881b8e5a2d147d5c44cb26b3f4fbbb46907589985564baa895166320b3c1eee230d6557f1890976
|
|
@@ -133,22 +133,37 @@ module SearchEngine
|
|
|
133
133
|
result = nil
|
|
134
134
|
step = SearchEngine::Logging::StepLine.new('Indexing')
|
|
135
135
|
if applied && indexed_inside_apply
|
|
136
|
-
step.skip('performed during schema apply')
|
|
137
136
|
result = indexed_inside_apply if indexed_inside_apply.is_a?(Hash)
|
|
137
|
+
if __se_result_status(result) == :ok
|
|
138
|
+
step.skip('performed during schema apply')
|
|
139
|
+
else
|
|
140
|
+
__se_finish_indexation_step(step, result)
|
|
141
|
+
end
|
|
138
142
|
else
|
|
139
143
|
step.update('indexing')
|
|
140
144
|
step.yield_line!
|
|
141
145
|
result = __se_index_partitions!(into: nil)
|
|
142
|
-
step
|
|
146
|
+
__se_finish_indexation_step(step, result)
|
|
143
147
|
end
|
|
144
148
|
|
|
145
|
-
|
|
146
|
-
__se_cascade_after_indexation!(context: :full) if cascade_ok
|
|
149
|
+
__se_cascade_after_indexation!(context: :full) if __se_result_status(result) == :ok
|
|
147
150
|
result
|
|
148
151
|
ensure
|
|
149
152
|
step&.close
|
|
150
153
|
end
|
|
151
154
|
|
|
155
|
+
def __se_result_status(result)
|
|
156
|
+
result.is_a?(Hash) ? result[:status] : :ok
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def __se_finish_indexation_step(step, result)
|
|
160
|
+
case __se_result_status(result)
|
|
161
|
+
when :ok then step.finish('done')
|
|
162
|
+
when :partial then step.finish_warn('partial')
|
|
163
|
+
else step.finish_warn('failed')
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
152
167
|
def __se_full_retention(applied, logical, client)
|
|
153
168
|
step = SearchEngine::Logging::StepLine.new('Retention')
|
|
154
169
|
if applied
|
|
@@ -270,9 +270,9 @@ module SearchEngine
|
|
|
270
270
|
if compiled
|
|
271
271
|
parts = Array(compiled.partitions)
|
|
272
272
|
max_p = compiled.max_parallel.to_i
|
|
273
|
-
return __se_index_partitions_seq!(parts, into) if max_p <= 1 || parts.size <= 1
|
|
273
|
+
return __se_index_partitions_seq!(parts, into, compiled) if max_p <= 1 || parts.size <= 1
|
|
274
274
|
|
|
275
|
-
__se_index_partitions_parallel!(parts, into, max_p)
|
|
275
|
+
__se_index_partitions_parallel!(parts, into, max_p, compiled)
|
|
276
276
|
else
|
|
277
277
|
summary = SearchEngine::Indexer.rebuild_partition!(self, partition: nil, into: into)
|
|
278
278
|
__se_build_index_result([summary])
|
|
@@ -314,10 +314,10 @@ module SearchEngine
|
|
|
314
314
|
|
|
315
315
|
class_methods do
|
|
316
316
|
# Sequential processing of partition list with live progress rendering.
|
|
317
|
-
def __se_index_partitions_seq!(parts, into)
|
|
318
|
-
|
|
317
|
+
def __se_index_partitions_seq!(parts, into, compiled)
|
|
318
|
+
docs_estimates = __se_per_partition_docs_estimates(parts, compiled)
|
|
319
319
|
renderer = SearchEngine::Logging::LiveRenderer.new(
|
|
320
|
-
labels: parts.map(&:inspect), partitions: parts,
|
|
320
|
+
labels: parts.map(&:inspect), partitions: parts, per_partition_docs_estimates: docs_estimates
|
|
321
321
|
)
|
|
322
322
|
renderer.start
|
|
323
323
|
|
|
@@ -351,12 +351,12 @@ module SearchEngine
|
|
|
351
351
|
|
|
352
352
|
class_methods do
|
|
353
353
|
# Parallel processing via bounded thread pool with live progress rendering.
|
|
354
|
-
def __se_index_partitions_parallel!(parts, into, max_p)
|
|
354
|
+
def __se_index_partitions_parallel!(parts, into, max_p, compiled)
|
|
355
355
|
require 'concurrent-ruby'
|
|
356
356
|
|
|
357
|
-
|
|
357
|
+
docs_estimates = __se_per_partition_docs_estimates(parts, compiled)
|
|
358
358
|
renderer = SearchEngine::Logging::LiveRenderer.new(
|
|
359
|
-
labels: parts.map(&:inspect), partitions: parts,
|
|
359
|
+
labels: parts.map(&:inspect), partitions: parts, per_partition_docs_estimates: docs_estimates
|
|
360
360
|
)
|
|
361
361
|
renderer.start
|
|
362
362
|
|
|
@@ -373,7 +373,15 @@ module SearchEngine
|
|
|
373
373
|
warn("\n Interrupted \u2014 stopping parallel partition workers\u2026")
|
|
374
374
|
end
|
|
375
375
|
|
|
376
|
-
|
|
376
|
+
pool_timeout = begin
|
|
377
|
+
SearchEngine.config.indexer.pool_timeout
|
|
378
|
+
rescue StandardError
|
|
379
|
+
nil
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
pool_status = SearchEngine::InterruptiblePool.run(
|
|
383
|
+
pool, on_interrupt: on_interrupt, timeout: pool_timeout
|
|
384
|
+
) do
|
|
377
385
|
parts.each_with_index do |part, idx|
|
|
378
386
|
break if cancelled.true?
|
|
379
387
|
|
|
@@ -400,6 +408,8 @@ module SearchEngine
|
|
|
400
408
|
end
|
|
401
409
|
end
|
|
402
410
|
|
|
411
|
+
__se_flag_incomplete_slots!(renderer, parts, partition_errors, mtx, pool_timeout) if pool_status == :timed_out
|
|
412
|
+
|
|
403
413
|
begin
|
|
404
414
|
renderer.stop
|
|
405
415
|
rescue StandardError
|
|
@@ -414,11 +424,27 @@ module SearchEngine
|
|
|
414
424
|
end
|
|
415
425
|
|
|
416
426
|
class_methods do
|
|
417
|
-
#
|
|
427
|
+
# Build an array of per-partition doc estimates.
|
|
428
|
+
# Tries per-partition counting (auto-detected from AR fetch result),
|
|
429
|
+
# fills any nils with the equal-split heuristic fallback.
|
|
430
|
+
#
|
|
431
|
+
# @param parts [Array] partition keys
|
|
432
|
+
# @param compiled [SearchEngine::Partitioner::Compiled, nil]
|
|
433
|
+
# @return [Array<Integer, nil>]
|
|
434
|
+
def __se_per_partition_docs_estimates(parts, compiled)
|
|
435
|
+
estimates = parts.map { |part| compiled&.partition_doc_count(part) }
|
|
436
|
+
|
|
437
|
+
heuristic = __se_heuristic_docs_estimate(parts.size)
|
|
438
|
+
parts.each_index.map { |i| estimates[i] || heuristic }
|
|
439
|
+
rescue StandardError
|
|
440
|
+
Array.new(parts.size, nil)
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
# Equal-split heuristic: total_docs / partition_count.
|
|
418
444
|
# @param partition_count [Integer]
|
|
419
445
|
# @return [Integer, nil]
|
|
420
|
-
def
|
|
421
|
-
total = SearchEngine::Indexer::BulkImport.
|
|
446
|
+
def __se_heuristic_docs_estimate(partition_count)
|
|
447
|
+
total = SearchEngine::Indexer::BulkImport.estimate_total_docs(self)
|
|
422
448
|
return nil unless total
|
|
423
449
|
|
|
424
450
|
(total.to_f / partition_count).ceil
|
|
@@ -426,7 +452,33 @@ module SearchEngine
|
|
|
426
452
|
nil
|
|
427
453
|
end
|
|
428
454
|
|
|
429
|
-
|
|
455
|
+
# Flag renderer slots that are still pending/in-progress after the pool
|
|
456
|
+
# timed out. Marks each as errored and appends to partition_errors so the
|
|
457
|
+
# caller raises and reports failure.
|
|
458
|
+
#
|
|
459
|
+
# @param renderer [SearchEngine::Logging::LiveRenderer]
|
|
460
|
+
# @param parts [Array] partition keys
|
|
461
|
+
# @param partition_errors [Array<StandardError>]
|
|
462
|
+
# @param mtx [Mutex]
|
|
463
|
+
# @param pool_timeout [Integer, nil] resolved timeout (seconds) from caller
|
|
464
|
+
# @return [void]
|
|
465
|
+
def __se_flag_incomplete_slots!(renderer, parts, partition_errors, mtx, pool_timeout)
|
|
466
|
+
effective_timeout = pool_timeout || SearchEngine::InterruptiblePool::GRACEFUL_TIMEOUT
|
|
467
|
+
|
|
468
|
+
parts.each_index do |idx|
|
|
469
|
+
slot = renderer[idx]
|
|
470
|
+
next if %i[done error].include?(slot.state)
|
|
471
|
+
|
|
472
|
+
error = SearchEngine::Errors::PartitionTimeout.new(
|
|
473
|
+
"partition #{parts[idx].inspect} was not processed — " \
|
|
474
|
+
"parallel pool timed out after #{effective_timeout}s"
|
|
475
|
+
)
|
|
476
|
+
slot.finish_error(error)
|
|
477
|
+
mtx.synchronize { partition_errors << error }
|
|
478
|
+
end
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
private :__se_per_partition_docs_estimates, :__se_heuristic_docs_estimate, :__se_flag_incomplete_slots!
|
|
430
482
|
end
|
|
431
483
|
|
|
432
484
|
class_methods do
|
data/lib/search_engine/config.rb
CHANGED
|
@@ -105,6 +105,9 @@ module SearchEngine
|
|
|
105
105
|
attr_accessor :queue_name
|
|
106
106
|
# @return [Boolean] whether to run model.count for progress bar estimates (default true)
|
|
107
107
|
attr_accessor :estimate_progress
|
|
108
|
+
# @return [Integer, nil] graceful-shutdown timeout (seconds) for the parallel
|
|
109
|
+
# partition pool. When nil, falls back to InterruptiblePool::GRACEFUL_TIMEOUT (3600s).
|
|
110
|
+
attr_accessor :pool_timeout
|
|
108
111
|
|
|
109
112
|
def initialize
|
|
110
113
|
@batch_size = 2000
|
|
@@ -114,6 +117,7 @@ module SearchEngine
|
|
|
114
117
|
@dispatch = active_job_available? ? :active_job : :inline
|
|
115
118
|
@queue_name = 'search_index'
|
|
116
119
|
@estimate_progress = true
|
|
120
|
+
@pool_timeout = nil
|
|
117
121
|
end
|
|
118
122
|
|
|
119
123
|
private
|
data/lib/search_engine/errors.rb
CHANGED
|
@@ -120,6 +120,13 @@ module SearchEngine
|
|
|
120
120
|
# the underlying HTTP client used by the official Typesense gem.
|
|
121
121
|
class Timeout < Error; end
|
|
122
122
|
|
|
123
|
+
# Raised when one or more partitions were not processed because the
|
|
124
|
+
# parallel pool's graceful-shutdown timeout was exceeded.
|
|
125
|
+
#
|
|
126
|
+
# The pool kills remaining queued/running tasks after the timeout,
|
|
127
|
+
# leaving those partitions unindexed.
|
|
128
|
+
class PartitionTimeout < Error; end
|
|
129
|
+
|
|
123
130
|
# Raised for network-level connectivity issues prior to receiving a response.
|
|
124
131
|
#
|
|
125
132
|
# Examples: DNS resolution failures, refused TCP connections, TLS handshake
|
|
@@ -72,7 +72,7 @@ module SearchEngine
|
|
|
72
72
|
failed_total = 0
|
|
73
73
|
failed_batches_total = 0
|
|
74
74
|
batches_total = 0
|
|
75
|
-
|
|
75
|
+
source_batches_done = 0
|
|
76
76
|
started_at = monotonic_ms
|
|
77
77
|
|
|
78
78
|
docs_enum.each do |raw_batch|
|
|
@@ -95,11 +95,13 @@ module SearchEngine
|
|
|
95
95
|
batches << stats
|
|
96
96
|
validate_soft_batch_size!(batch_size, stats[:docs_count])
|
|
97
97
|
log_batch(stats, batches_total) if log_batches
|
|
98
|
-
on_batch&.call(
|
|
99
|
-
batches_done: batches_total, docs_total: docs_total,
|
|
100
|
-
success_total: success_total, failed_total: failed_total
|
|
101
|
-
)
|
|
102
98
|
end
|
|
99
|
+
|
|
100
|
+
source_batches_done += 1
|
|
101
|
+
on_batch&.call(
|
|
102
|
+
batches_done: source_batches_done, docs_total: docs_total,
|
|
103
|
+
success_total: success_total, failed_total: failed_total
|
|
104
|
+
)
|
|
103
105
|
end
|
|
104
106
|
|
|
105
107
|
# Calculate total duration as wall-clock time from start to finish (not sum of batch durations)
|
|
@@ -195,6 +197,7 @@ module SearchEngine
|
|
|
195
197
|
failed_total: 0,
|
|
196
198
|
failed_batches_total: 0,
|
|
197
199
|
batches_total: 0,
|
|
200
|
+
source_batches_done: 0,
|
|
198
201
|
idx_counter: -1,
|
|
199
202
|
started_at: monotonic_ms,
|
|
200
203
|
mtx: Mutex.new,
|
|
@@ -314,12 +317,11 @@ module SearchEngine
|
|
|
314
317
|
# @return [void]
|
|
315
318
|
def process_single_batch_parallel(raw_batch:, into:, action:, retry_policy:, batch_size:, log_batches:,
|
|
316
319
|
shared_state:)
|
|
317
|
-
# Each thread gets its own resources
|
|
318
320
|
thread_client = SearchEngine.client
|
|
319
321
|
thread_buffer = +''
|
|
320
322
|
thread_idx = shared_state[:mtx].synchronize { shared_state[:idx_counter] += 1 }
|
|
321
323
|
|
|
322
|
-
|
|
324
|
+
snapshot = begin
|
|
323
325
|
stats_list = import_batch_with_handling(
|
|
324
326
|
client: thread_client,
|
|
325
327
|
collection: into,
|
|
@@ -332,6 +334,8 @@ module SearchEngine
|
|
|
332
334
|
|
|
333
335
|
shared_state[:mtx].synchronize do
|
|
334
336
|
aggregate_stats(stats_list, shared_state, batch_size, log_batches)
|
|
337
|
+
shared_state[:source_batches_done] += 1
|
|
338
|
+
progress_snapshot(shared_state)
|
|
335
339
|
end
|
|
336
340
|
rescue StandardError => error
|
|
337
341
|
docs_count = begin
|
|
@@ -346,27 +350,26 @@ module SearchEngine
|
|
|
346
350
|
err_msg = " batch_index=#{thread_idx} → error=#{error.class}: #{error.message.to_s[0, 200]}"
|
|
347
351
|
warn(SearchEngine::Logging::Color.apply(err_msg, :red))
|
|
348
352
|
aggregate_stats([failure_stat], shared_state, batch_size, log_batches)
|
|
353
|
+
shared_state[:source_batches_done] += 1
|
|
354
|
+
progress_snapshot(shared_state)
|
|
349
355
|
end
|
|
350
356
|
end
|
|
351
357
|
|
|
352
|
-
|
|
353
|
-
snapshots&.each { |snap| on_batch&.call(**snap) }
|
|
358
|
+
shared_state[:on_batch]&.call(**snapshot) if snapshot
|
|
354
359
|
end
|
|
355
360
|
|
|
356
361
|
# Aggregate batch statistics thread-safely into shared state.
|
|
357
362
|
#
|
|
358
363
|
# Must be called within a mutex synchronization block. Updates counters,
|
|
359
364
|
# appends to batches array, validates batch size, and optionally logs.
|
|
360
|
-
# Returns counter snapshots (one per stats entry) for firing callbacks
|
|
361
|
-
# outside the lock.
|
|
362
365
|
#
|
|
363
366
|
# @param stats_list [Array<Hash>] array of stats hashes from batch processing
|
|
364
367
|
# @param shared_state [Hash] shared state hash to update (must be mutex-protected)
|
|
365
368
|
# @param batch_size [Integer, nil] soft guard for logging when exceeded
|
|
366
369
|
# @param log_batches [Boolean] whether to log each batch as it completes
|
|
367
|
-
# @return [
|
|
370
|
+
# @return [void]
|
|
368
371
|
def aggregate_stats(stats_list, shared_state, batch_size, log_batches)
|
|
369
|
-
stats_list.
|
|
372
|
+
stats_list.each do |stats|
|
|
370
373
|
shared_state[:docs_total] += stats[:docs_count].to_i
|
|
371
374
|
shared_state[:success_total] += stats[:success_count].to_i
|
|
372
375
|
shared_state[:failed_total] += stats[:failure_count].to_i
|
|
@@ -375,13 +378,23 @@ module SearchEngine
|
|
|
375
378
|
shared_state[:batches] << stats
|
|
376
379
|
validate_soft_batch_size!(batch_size, stats[:docs_count])
|
|
377
380
|
log_batch(stats, shared_state[:batches_total]) if log_batches
|
|
378
|
-
{
|
|
379
|
-
batches_done: shared_state[:batches_total], docs_total: shared_state[:docs_total],
|
|
380
|
-
success_total: shared_state[:success_total], failed_total: shared_state[:failed_total]
|
|
381
|
-
}
|
|
382
381
|
end
|
|
383
382
|
end
|
|
384
383
|
|
|
384
|
+
# Build a progress snapshot from shared state for the on_batch callback.
|
|
385
|
+
# Must be called within a mutex synchronization block.
|
|
386
|
+
#
|
|
387
|
+
# @param shared_state [Hash] shared state hash (must be mutex-protected)
|
|
388
|
+
# @return [Hash] progress counters keyed by :batches_done, :docs_total, etc.
|
|
389
|
+
def progress_snapshot(shared_state)
|
|
390
|
+
{
|
|
391
|
+
batches_done: shared_state[:source_batches_done],
|
|
392
|
+
docs_total: shared_state[:docs_total],
|
|
393
|
+
success_total: shared_state[:success_total],
|
|
394
|
+
failed_total: shared_state[:failed_total]
|
|
395
|
+
}
|
|
396
|
+
end
|
|
397
|
+
|
|
385
398
|
# Build a Summary object from aggregated shared state.
|
|
386
399
|
#
|
|
387
400
|
# Calculates total duration and constructs a Summary with all aggregated
|
|
@@ -426,14 +439,14 @@ module SearchEngine
|
|
|
426
439
|
enum.is_a?(Enumerator) ? enum : enum.each
|
|
427
440
|
end
|
|
428
441
|
|
|
429
|
-
# Estimate total
|
|
442
|
+
# Estimate total source record count for the given model class.
|
|
430
443
|
#
|
|
431
|
-
#
|
|
432
|
-
#
|
|
444
|
+
# Shared foundation for batch and doc estimates. Performs a model.count
|
|
445
|
+
# with a soft timeout to avoid blocking on slow tables.
|
|
433
446
|
#
|
|
434
447
|
# @param klass [Class] a {SearchEngine::Base} subclass
|
|
435
|
-
# @return [Integer, nil]
|
|
436
|
-
def
|
|
448
|
+
# @return [Integer, nil] record count or nil if not estimable
|
|
449
|
+
def estimate_source_record_count(klass)
|
|
437
450
|
return nil if SearchEngine.config.indexer.estimate_progress == false
|
|
438
451
|
return nil unless klass.is_a?(Class)
|
|
439
452
|
|
|
@@ -447,22 +460,39 @@ module SearchEngine
|
|
|
447
460
|
model = source_def.dig(:options, :model)
|
|
448
461
|
return nil unless model.respond_to?(:count)
|
|
449
462
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
463
|
+
count_with_timeout(model, 10)
|
|
464
|
+
rescue StandardError
|
|
465
|
+
nil
|
|
466
|
+
end
|
|
454
467
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
468
|
+
# Estimate total batch count for progress logging.
|
|
469
|
+
#
|
|
470
|
+
# @param klass [Class] a {SearchEngine::Base} subclass
|
|
471
|
+
# @return [Integer, nil] estimated total batch count or nil if not estimable
|
|
472
|
+
def estimate_total_batches(klass)
|
|
473
|
+
total_records = estimate_source_record_count(klass)
|
|
474
|
+
return nil unless total_records&.positive?
|
|
458
475
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
476
|
+
batch_size = batch_size_for_klass(klass)
|
|
477
|
+
return nil unless batch_size&.positive?
|
|
478
|
+
|
|
479
|
+
(total_records.to_f / batch_size).ceil
|
|
480
|
+
rescue StandardError
|
|
481
|
+
nil
|
|
463
482
|
end
|
|
464
483
|
|
|
465
|
-
|
|
484
|
+
# Estimate total document count for doc-based progress tracking.
|
|
485
|
+
#
|
|
486
|
+
# @param klass [Class] a {SearchEngine::Base} subclass
|
|
487
|
+
# @return [Integer, nil] estimated total docs or nil if not estimable
|
|
488
|
+
def estimate_total_docs(klass)
|
|
489
|
+
count = estimate_source_record_count(klass)
|
|
490
|
+
count&.positive? ? count : nil
|
|
491
|
+
rescue StandardError
|
|
492
|
+
nil
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
public :estimate_total_batches, :estimate_total_docs
|
|
466
496
|
|
|
467
497
|
# Thread-based soft timeout for model.count, avoiding Timeout.timeout
|
|
468
498
|
# which can corrupt ActiveRecord connection state.
|
|
@@ -497,6 +527,14 @@ module SearchEngine
|
|
|
497
527
|
klass.instance_variable_get(:@__mapper_dsl__)
|
|
498
528
|
end
|
|
499
529
|
|
|
530
|
+
def batch_size_for_klass(klass)
|
|
531
|
+
dsl = mapper_dsl_for_klass(klass)
|
|
532
|
+
source_def = dsl&.dig(:source)
|
|
533
|
+
batch_size = source_def&.dig(:options, :batch_size)
|
|
534
|
+
batch_size ||= SearchEngine.config.sources.active_record.batch_size
|
|
535
|
+
batch_size.to_i
|
|
536
|
+
end
|
|
537
|
+
|
|
500
538
|
# Import a single batch with error handling and recursive 413 splitting.
|
|
501
539
|
#
|
|
502
540
|
# Public wrapper that delegates to the internal method with batch_index set to nil,
|
|
@@ -11,19 +11,28 @@ module SearchEngine
|
|
|
11
11
|
|
|
12
12
|
# Execute a block that posts work to a thread pool, then wait for completion.
|
|
13
13
|
#
|
|
14
|
-
# Normal path: graceful shutdown → long wait.
|
|
14
|
+
# Normal path: graceful shutdown → long wait → :ok.
|
|
15
|
+
# Timeout: graceful wait expires → pool.kill → :timed_out.
|
|
15
16
|
# Interrupt: on_interrupt callback → pool.kill → short wait → re-raise.
|
|
16
17
|
# Other error: ensure kill → short wait.
|
|
17
18
|
#
|
|
18
19
|
# @param pool [Concurrent::FixedThreadPool]
|
|
19
20
|
# @param on_interrupt [Proc, nil] callback invoked before killing the pool
|
|
21
|
+
# @param timeout [Integer, nil] override for graceful-shutdown timeout (seconds);
|
|
22
|
+
# defaults to {GRACEFUL_TIMEOUT} when nil
|
|
20
23
|
# @yield block that posts work to the pool
|
|
21
|
-
# @return [
|
|
22
|
-
def self.run(pool, on_interrupt: nil)
|
|
24
|
+
# @return [Symbol] :ok on clean completion, :timed_out when the graceful timeout was exceeded
|
|
25
|
+
def self.run(pool, on_interrupt: nil, timeout: nil)
|
|
23
26
|
yield
|
|
24
27
|
pool.shutdown
|
|
25
|
-
|
|
26
|
-
pool.wait_for_termination(
|
|
28
|
+
effective_timeout = timeout || GRACEFUL_TIMEOUT
|
|
29
|
+
completed = pool.wait_for_termination(effective_timeout)
|
|
30
|
+
unless completed
|
|
31
|
+
pool.kill
|
|
32
|
+
pool.wait_for_termination(CLEANUP_TIMEOUT)
|
|
33
|
+
return :timed_out
|
|
34
|
+
end
|
|
35
|
+
:ok
|
|
27
36
|
rescue Interrupt
|
|
28
37
|
on_interrupt&.call
|
|
29
38
|
pool.kill
|
|
@@ -16,7 +16,7 @@ module SearchEngine
|
|
|
16
16
|
# using {PartitionProgress.line}, preserving CI/pipe compatibility.
|
|
17
17
|
#
|
|
18
18
|
# @example
|
|
19
|
-
# renderer = LiveRenderer.new(labels: parts.map(&:inspect),
|
|
19
|
+
# renderer = LiveRenderer.new(labels: parts.map(&:inspect), per_partition_docs_estimate: 5000)
|
|
20
20
|
# renderer.start
|
|
21
21
|
# parts.each_with_index do |part, i|
|
|
22
22
|
# renderer[i].start
|
|
@@ -34,9 +34,12 @@ module SearchEngine
|
|
|
34
34
|
|
|
35
35
|
# @param labels [Array<String>] display label for each slot (partition key)
|
|
36
36
|
# @param partitions [Array, nil] raw partition values for non-TTY output (defaults to labels)
|
|
37
|
-
# @param
|
|
37
|
+
# @param per_partition_docs_estimates [Array<Integer, nil>, nil] per-slot doc estimates (takes priority)
|
|
38
|
+
# @param per_partition_docs_estimate [Integer, nil] uniform doc estimate for all slots (fallback)
|
|
39
|
+
# @param per_partition_estimate [Integer, nil] deprecated batch-based estimate (last resort fallback)
|
|
38
40
|
# @param io [IO] output stream (defaults to $stdout)
|
|
39
|
-
def initialize(labels:, partitions: nil,
|
|
41
|
+
def initialize(labels:, partitions: nil, per_partition_docs_estimates: nil,
|
|
42
|
+
per_partition_docs_estimate: nil, per_partition_estimate: nil, io: $stdout)
|
|
40
43
|
@io = io
|
|
41
44
|
@tty = Color.enabled?
|
|
42
45
|
@mutex = Mutex.new
|
|
@@ -46,8 +49,13 @@ module SearchEngine
|
|
|
46
49
|
@rendered_once = false
|
|
47
50
|
nontty_cb = @tty ? nil : method(:flush_nontty_slot)
|
|
48
51
|
raw = partitions || labels
|
|
52
|
+
per_slot = per_partition_docs_estimates || []
|
|
53
|
+
global_est = per_partition_docs_estimate || per_partition_estimate
|
|
49
54
|
@slots = labels.each_with_index.map do |label, idx|
|
|
50
|
-
Slot.new(
|
|
55
|
+
Slot.new(
|
|
56
|
+
label: label, partition: raw[idx], docs_estimate: per_slot[idx] || global_est,
|
|
57
|
+
on_done: nontty_cb
|
|
58
|
+
)
|
|
51
59
|
end
|
|
52
60
|
@viewport = resolve_viewport
|
|
53
61
|
end
|
|
@@ -238,12 +246,12 @@ module SearchEngine
|
|
|
238
246
|
|
|
239
247
|
# @param label [String] partition display label (e.g. partition key inspect)
|
|
240
248
|
# @param partition [Object, nil] raw partition value for non-TTY output (defaults to label)
|
|
241
|
-
# @param
|
|
249
|
+
# @param docs_estimate [Integer, nil] estimated total docs for doc-based progress bar
|
|
242
250
|
# @param on_done [Proc, nil] callback invoked after finish/finish_error (non-TTY flush)
|
|
243
|
-
def initialize(label:, partition: nil,
|
|
251
|
+
def initialize(label:, partition: nil, docs_estimate: nil, on_done: nil)
|
|
244
252
|
@label = label
|
|
245
253
|
@partition = partition.nil? ? label : partition
|
|
246
|
-
@
|
|
254
|
+
@docs_estimate = docs_estimate
|
|
247
255
|
@on_done = on_done
|
|
248
256
|
@state = :pending
|
|
249
257
|
@batches_done = 0
|
|
@@ -387,15 +395,16 @@ module SearchEngine
|
|
|
387
395
|
end
|
|
388
396
|
|
|
389
397
|
def build_progress_part
|
|
390
|
-
if @
|
|
391
|
-
ratio = @
|
|
398
|
+
if @docs_estimate&.positive? && @docs_total.positive?
|
|
399
|
+
ratio = @docs_total.to_f / @docs_estimate
|
|
392
400
|
pct = [100, (ratio * 100).round].min
|
|
393
401
|
filled = [(ratio * BAR_WIDTH).round, BAR_WIDTH].min
|
|
394
402
|
empty = BAR_WIDTH - filled
|
|
395
403
|
bar = "\u2588" * filled + "\u2591" * empty
|
|
396
|
-
"#{
|
|
404
|
+
batch_info = @batches_done.positive? ? "#{@batches_done} batches " : ''
|
|
405
|
+
"#{bar} #{pct}% #{batch_info}(#{@docs_total}/#{@docs_estimate} docs)"
|
|
397
406
|
elsif @batches_done.positive?
|
|
398
|
-
"#{@batches_done} batches
|
|
407
|
+
"#{@batches_done} batches (#{@docs_total} docs)"
|
|
399
408
|
else
|
|
400
409
|
''
|
|
401
410
|
end
|
|
@@ -76,8 +76,50 @@ module SearchEngine
|
|
|
76
76
|
end
|
|
77
77
|
end
|
|
78
78
|
|
|
79
|
+
# Auto-detect doc count for a partition from the partition_fetch result.
|
|
80
|
+
# Counts the same scope that partition_fetch will enumerate — single source of truth.
|
|
81
|
+
#
|
|
82
|
+
# @param partition [Object] partition key
|
|
83
|
+
# @return [Integer, nil] doc count or nil when unavailable
|
|
84
|
+
def partition_doc_count(partition)
|
|
85
|
+
count = auto_count_from_fetch(partition)
|
|
86
|
+
count.is_a?(Integer) && count.positive? ? count : nil
|
|
87
|
+
rescue StandardError
|
|
88
|
+
nil
|
|
89
|
+
end
|
|
90
|
+
|
|
79
91
|
private
|
|
80
92
|
|
|
93
|
+
# Call partition_fetch_proc and try to extract a countable relation.
|
|
94
|
+
# Calling the proc only builds a lazy AR object — no batch queries fire.
|
|
95
|
+
# The single COUNT query is the only DB cost.
|
|
96
|
+
def auto_count_from_fetch(partition)
|
|
97
|
+
return nil unless @partition_fetch_proc
|
|
98
|
+
|
|
99
|
+
result = @partition_fetch_proc.call(partition)
|
|
100
|
+
extract_count(result)
|
|
101
|
+
rescue StandardError
|
|
102
|
+
nil
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def extract_count(result)
|
|
106
|
+
relation = countable_relation_from(result)
|
|
107
|
+
relation&.count
|
|
108
|
+
rescue StandardError
|
|
109
|
+
nil
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def countable_relation_from(result)
|
|
113
|
+
if defined?(ActiveRecord::Batches::BatchEnumerator) &&
|
|
114
|
+
result.is_a?(ActiveRecord::Batches::BatchEnumerator)
|
|
115
|
+
return result.instance_variable_get(:@relation)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
return result if defined?(ActiveRecord::Relation) && result.is_a?(ActiveRecord::Relation)
|
|
119
|
+
|
|
120
|
+
nil
|
|
121
|
+
end
|
|
122
|
+
|
|
81
123
|
def validate_hook_arity!(proc_obj, name:)
|
|
82
124
|
ar = proc_obj.arity
|
|
83
125
|
return if ar == 1 || ar.negative?
|