search-engine-for-typesense 30.1.6.6 → 30.1.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 685951054b6b65954ff6517a08176c8bec124c197980dec5fcfc0ebbd19f6691
4
- data.tar.gz: 12e290d5e45325cb031b06f87d158b33300fdc8682d3a417ba7191940e35211e
3
+ metadata.gz: ed93c9aa019a0f633ad5c7cc47ec08ea303f5e51491cfc3130e677271b000e1f
4
+ data.tar.gz: 6446746c790fc6b1c0fa084a5ba9bebd555f5e298738f7b8800fd2717257659f
5
5
  SHA512:
6
- metadata.gz: f7e7e8ab135067d43fd2cffed2dfc0fdeac7f3861c267dc47d5131c2600a7e5adee5b9267d5b326517cfe32c526a0e67d13d614312183c59ff0e496def671990
7
- data.tar.gz: dcb0366fa3ddd2dbe03bc50dec6032093761eba588ae2e88116e2b939cbc8f62ea442c0857448b3d69d20cdf7eccc98b0f5d9d1ee2b2d6ce593453539d889b97
6
+ metadata.gz: 99aa8f0be6a9b43d0de16e402fe22cb60614861c73f26f122f0c155e3d35de33053a7822170c440bb407bcac73d7a5ba42f57ba0eda825a49fa210e369ba7ea3
7
+ data.tar.gz: a6526cc8752cdc5a8f61c03a55519e5cf18da1ac538e1028a881b8e5a2d147d5c44cb26b3f4fbbb46907589985564baa895166320b3c1eee230d6557f1890976
@@ -133,22 +133,37 @@ module SearchEngine
133
133
  result = nil
134
134
  step = SearchEngine::Logging::StepLine.new('Indexing')
135
135
  if applied && indexed_inside_apply
136
- step.skip('performed during schema apply')
137
136
  result = indexed_inside_apply if indexed_inside_apply.is_a?(Hash)
137
+ if __se_result_status(result) == :ok
138
+ step.skip('performed during schema apply')
139
+ else
140
+ __se_finish_indexation_step(step, result)
141
+ end
138
142
  else
139
143
  step.update('indexing')
140
144
  step.yield_line!
141
145
  result = __se_index_partitions!(into: nil)
142
- step.finish('done')
146
+ __se_finish_indexation_step(step, result)
143
147
  end
144
148
 
145
- cascade_ok = result.is_a?(Hash) ? result[:status] == :ok : false
146
- __se_cascade_after_indexation!(context: :full) if cascade_ok
149
+ __se_cascade_after_indexation!(context: :full) if __se_result_status(result) == :ok
147
150
  result
148
151
  ensure
149
152
  step&.close
150
153
  end
151
154
 
155
+ def __se_result_status(result)
156
+ result.is_a?(Hash) ? result[:status] : :ok
157
+ end
158
+
159
+ def __se_finish_indexation_step(step, result)
160
+ case __se_result_status(result)
161
+ when :ok then step.finish('done')
162
+ when :partial then step.finish_warn('partial')
163
+ else step.finish_warn('failed')
164
+ end
165
+ end
166
+
152
167
  def __se_full_retention(applied, logical, client)
153
168
  step = SearchEngine::Logging::StepLine.new('Retention')
154
169
  if applied
@@ -270,9 +270,9 @@ module SearchEngine
270
270
  if compiled
271
271
  parts = Array(compiled.partitions)
272
272
  max_p = compiled.max_parallel.to_i
273
- return __se_index_partitions_seq!(parts, into) if max_p <= 1 || parts.size <= 1
273
+ return __se_index_partitions_seq!(parts, into, compiled) if max_p <= 1 || parts.size <= 1
274
274
 
275
- __se_index_partitions_parallel!(parts, into, max_p)
275
+ __se_index_partitions_parallel!(parts, into, max_p, compiled)
276
276
  else
277
277
  summary = SearchEngine::Indexer.rebuild_partition!(self, partition: nil, into: into)
278
278
  __se_build_index_result([summary])
@@ -314,10 +314,10 @@ module SearchEngine
314
314
 
315
315
  class_methods do
316
316
  # Sequential processing of partition list with live progress rendering.
317
- def __se_index_partitions_seq!(parts, into)
318
- estimate = __se_per_partition_estimate(parts.size)
317
+ def __se_index_partitions_seq!(parts, into, compiled)
318
+ docs_estimates = __se_per_partition_docs_estimates(parts, compiled)
319
319
  renderer = SearchEngine::Logging::LiveRenderer.new(
320
- labels: parts.map(&:inspect), partitions: parts, per_partition_estimate: estimate
320
+ labels: parts.map(&:inspect), partitions: parts, per_partition_docs_estimates: docs_estimates
321
321
  )
322
322
  renderer.start
323
323
 
@@ -351,12 +351,12 @@ module SearchEngine
351
351
 
352
352
  class_methods do
353
353
  # Parallel processing via bounded thread pool with live progress rendering.
354
- def __se_index_partitions_parallel!(parts, into, max_p)
354
+ def __se_index_partitions_parallel!(parts, into, max_p, compiled)
355
355
  require 'concurrent-ruby'
356
356
 
357
- estimate = __se_per_partition_estimate(parts.size)
357
+ docs_estimates = __se_per_partition_docs_estimates(parts, compiled)
358
358
  renderer = SearchEngine::Logging::LiveRenderer.new(
359
- labels: parts.map(&:inspect), partitions: parts, per_partition_estimate: estimate
359
+ labels: parts.map(&:inspect), partitions: parts, per_partition_docs_estimates: docs_estimates
360
360
  )
361
361
  renderer.start
362
362
 
@@ -373,7 +373,15 @@ module SearchEngine
373
373
  warn("\n Interrupted \u2014 stopping parallel partition workers\u2026")
374
374
  end
375
375
 
376
- SearchEngine::InterruptiblePool.run(pool, on_interrupt: on_interrupt) do
376
+ pool_timeout = begin
377
+ SearchEngine.config.indexer.pool_timeout
378
+ rescue StandardError
379
+ nil
380
+ end
381
+
382
+ pool_status = SearchEngine::InterruptiblePool.run(
383
+ pool, on_interrupt: on_interrupt, timeout: pool_timeout
384
+ ) do
377
385
  parts.each_with_index do |part, idx|
378
386
  break if cancelled.true?
379
387
 
@@ -400,6 +408,8 @@ module SearchEngine
400
408
  end
401
409
  end
402
410
 
411
+ __se_flag_incomplete_slots!(renderer, parts, partition_errors, mtx, pool_timeout) if pool_status == :timed_out
412
+
403
413
  begin
404
414
  renderer.stop
405
415
  rescue StandardError
@@ -414,11 +424,27 @@ module SearchEngine
414
424
  end
415
425
 
416
426
  class_methods do
417
- # Heuristic per-partition batch estimate for progress bars.
427
+ # Build an array of per-partition doc estimates.
428
+ # Tries per-partition counting (auto-detected from AR fetch result),
429
+ # fills any nils with the equal-split heuristic fallback.
430
+ #
431
+ # @param parts [Array] partition keys
432
+ # @param compiled [SearchEngine::Partitioner::Compiled, nil]
433
+ # @return [Array<Integer, nil>]
434
+ def __se_per_partition_docs_estimates(parts, compiled)
435
+ estimates = parts.map { |part| compiled&.partition_doc_count(part) }
436
+
437
+ heuristic = __se_heuristic_docs_estimate(parts.size)
438
+ parts.each_index.map { |i| estimates[i] || heuristic }
439
+ rescue StandardError
440
+ Array.new(parts.size, nil)
441
+ end
442
+
443
+ # Equal-split heuristic: total_docs / partition_count.
418
444
  # @param partition_count [Integer]
419
445
  # @return [Integer, nil]
420
- def __se_per_partition_estimate(partition_count)
421
- total = SearchEngine::Indexer::BulkImport.estimate_total_batches(self)
446
+ def __se_heuristic_docs_estimate(partition_count)
447
+ total = SearchEngine::Indexer::BulkImport.estimate_total_docs(self)
422
448
  return nil unless total
423
449
 
424
450
  (total.to_f / partition_count).ceil
@@ -426,7 +452,33 @@ module SearchEngine
426
452
  nil
427
453
  end
428
454
 
429
- private :__se_per_partition_estimate
455
+ # Flag renderer slots that are still pending/in-progress after the pool
456
+ # timed out. Marks each as errored and appends to partition_errors so the
457
+ # caller raises and reports failure.
458
+ #
459
+ # @param renderer [SearchEngine::Logging::LiveRenderer]
460
+ # @param parts [Array] partition keys
461
+ # @param partition_errors [Array<StandardError>]
462
+ # @param mtx [Mutex]
463
+ # @param pool_timeout [Integer, nil] resolved timeout (seconds) from caller
464
+ # @return [void]
465
+ def __se_flag_incomplete_slots!(renderer, parts, partition_errors, mtx, pool_timeout)
466
+ effective_timeout = pool_timeout || SearchEngine::InterruptiblePool::GRACEFUL_TIMEOUT
467
+
468
+ parts.each_index do |idx|
469
+ slot = renderer[idx]
470
+ next if %i[done error].include?(slot.state)
471
+
472
+ error = SearchEngine::Errors::PartitionTimeout.new(
473
+ "partition #{parts[idx].inspect} was not processed — " \
474
+ "parallel pool timed out after #{effective_timeout}s"
475
+ )
476
+ slot.finish_error(error)
477
+ mtx.synchronize { partition_errors << error }
478
+ end
479
+ end
480
+
481
+ private :__se_per_partition_docs_estimates, :__se_heuristic_docs_estimate, :__se_flag_incomplete_slots!
430
482
  end
431
483
 
432
484
  class_methods do
@@ -105,6 +105,9 @@ module SearchEngine
105
105
  attr_accessor :queue_name
106
106
  # @return [Boolean] whether to run model.count for progress bar estimates (default true)
107
107
  attr_accessor :estimate_progress
108
+ # @return [Integer, nil] graceful-shutdown timeout (seconds) for the parallel
109
+ # partition pool. When nil, falls back to InterruptiblePool::GRACEFUL_TIMEOUT (3600s).
110
+ attr_accessor :pool_timeout
108
111
 
109
112
  def initialize
110
113
  @batch_size = 2000
@@ -114,6 +117,7 @@ module SearchEngine
114
117
  @dispatch = active_job_available? ? :active_job : :inline
115
118
  @queue_name = 'search_index'
116
119
  @estimate_progress = true
120
+ @pool_timeout = nil
117
121
  end
118
122
 
119
123
  private
@@ -120,6 +120,13 @@ module SearchEngine
120
120
  # the underlying HTTP client used by the official Typesense gem.
121
121
  class Timeout < Error; end
122
122
 
123
+ # Raised when one or more partitions were not processed because the
124
+ # parallel pool's graceful-shutdown timeout was exceeded.
125
+ #
126
+ # The pool kills remaining queued/running tasks after the timeout,
127
+ # leaving those partitions unindexed.
128
+ class PartitionTimeout < Error; end
129
+
123
130
  # Raised for network-level connectivity issues prior to receiving a response.
124
131
  #
125
132
  # Examples: DNS resolution failures, refused TCP connections, TLS handshake
@@ -72,7 +72,7 @@ module SearchEngine
72
72
  failed_total = 0
73
73
  failed_batches_total = 0
74
74
  batches_total = 0
75
- # Capture start time before processing any batches to measure total wall-clock duration
75
+ source_batches_done = 0
76
76
  started_at = monotonic_ms
77
77
 
78
78
  docs_enum.each do |raw_batch|
@@ -95,11 +95,13 @@ module SearchEngine
95
95
  batches << stats
96
96
  validate_soft_batch_size!(batch_size, stats[:docs_count])
97
97
  log_batch(stats, batches_total) if log_batches
98
- on_batch&.call(
99
- batches_done: batches_total, docs_total: docs_total,
100
- success_total: success_total, failed_total: failed_total
101
- )
102
98
  end
99
+
100
+ source_batches_done += 1
101
+ on_batch&.call(
102
+ batches_done: source_batches_done, docs_total: docs_total,
103
+ success_total: success_total, failed_total: failed_total
104
+ )
103
105
  end
104
106
 
105
107
  # Calculate total duration as wall-clock time from start to finish (not sum of batch durations)
@@ -195,6 +197,7 @@ module SearchEngine
195
197
  failed_total: 0,
196
198
  failed_batches_total: 0,
197
199
  batches_total: 0,
200
+ source_batches_done: 0,
198
201
  idx_counter: -1,
199
202
  started_at: monotonic_ms,
200
203
  mtx: Mutex.new,
@@ -314,12 +317,11 @@ module SearchEngine
314
317
  # @return [void]
315
318
  def process_single_batch_parallel(raw_batch:, into:, action:, retry_policy:, batch_size:, log_batches:,
316
319
  shared_state:)
317
- # Each thread gets its own resources
318
320
  thread_client = SearchEngine.client
319
321
  thread_buffer = +''
320
322
  thread_idx = shared_state[:mtx].synchronize { shared_state[:idx_counter] += 1 }
321
323
 
322
- snapshots = begin
324
+ snapshot = begin
323
325
  stats_list = import_batch_with_handling(
324
326
  client: thread_client,
325
327
  collection: into,
@@ -332,6 +334,8 @@ module SearchEngine
332
334
 
333
335
  shared_state[:mtx].synchronize do
334
336
  aggregate_stats(stats_list, shared_state, batch_size, log_batches)
337
+ shared_state[:source_batches_done] += 1
338
+ progress_snapshot(shared_state)
335
339
  end
336
340
  rescue StandardError => error
337
341
  docs_count = begin
@@ -346,27 +350,26 @@ module SearchEngine
346
350
  err_msg = " batch_index=#{thread_idx} → error=#{error.class}: #{error.message.to_s[0, 200]}"
347
351
  warn(SearchEngine::Logging::Color.apply(err_msg, :red))
348
352
  aggregate_stats([failure_stat], shared_state, batch_size, log_batches)
353
+ shared_state[:source_batches_done] += 1
354
+ progress_snapshot(shared_state)
349
355
  end
350
356
  end
351
357
 
352
- on_batch = shared_state[:on_batch]
353
- snapshots&.each { |snap| on_batch&.call(**snap) }
358
+ shared_state[:on_batch]&.call(**snapshot) if snapshot
354
359
  end
355
360
 
356
361
  # Aggregate batch statistics thread-safely into shared state.
357
362
  #
358
363
  # Must be called within a mutex synchronization block. Updates counters,
359
364
  # appends to batches array, validates batch size, and optionally logs.
360
- # Returns counter snapshots (one per stats entry) for firing callbacks
361
- # outside the lock.
362
365
  #
363
366
  # @param stats_list [Array<Hash>] array of stats hashes from batch processing
364
367
  # @param shared_state [Hash] shared state hash to update (must be mutex-protected)
365
368
  # @param batch_size [Integer, nil] soft guard for logging when exceeded
366
369
  # @param log_batches [Boolean] whether to log each batch as it completes
367
- # @return [Array<Hash>] counter snapshots suitable for on_batch callbacks
370
+ # @return [void]
368
371
  def aggregate_stats(stats_list, shared_state, batch_size, log_batches)
369
- stats_list.map do |stats|
372
+ stats_list.each do |stats|
370
373
  shared_state[:docs_total] += stats[:docs_count].to_i
371
374
  shared_state[:success_total] += stats[:success_count].to_i
372
375
  shared_state[:failed_total] += stats[:failure_count].to_i
@@ -375,13 +378,23 @@ module SearchEngine
375
378
  shared_state[:batches] << stats
376
379
  validate_soft_batch_size!(batch_size, stats[:docs_count])
377
380
  log_batch(stats, shared_state[:batches_total]) if log_batches
378
- {
379
- batches_done: shared_state[:batches_total], docs_total: shared_state[:docs_total],
380
- success_total: shared_state[:success_total], failed_total: shared_state[:failed_total]
381
- }
382
381
  end
383
382
  end
384
383
 
384
+ # Build a progress snapshot from shared state for the on_batch callback.
385
+ # Must be called within a mutex synchronization block.
386
+ #
387
+ # @param shared_state [Hash] shared state hash (must be mutex-protected)
388
+ # @return [Hash] progress counters keyed by :batches_done, :docs_total, etc.
389
+ def progress_snapshot(shared_state)
390
+ {
391
+ batches_done: shared_state[:source_batches_done],
392
+ docs_total: shared_state[:docs_total],
393
+ success_total: shared_state[:success_total],
394
+ failed_total: shared_state[:failed_total]
395
+ }
396
+ end
397
+
385
398
  # Build a Summary object from aggregated shared state.
386
399
  #
387
400
  # Calculates total duration and constructs a Summary with all aggregated
@@ -426,14 +439,14 @@ module SearchEngine
426
439
  enum.is_a?(Enumerator) ? enum : enum.each
427
440
  end
428
441
 
429
- # Estimate total batch count for progress logging.
442
+ # Estimate total source record count for the given model class.
430
443
  #
431
- # Attempts to estimate batch count for ActiveRecord sources by counting records
432
- # and dividing by batch_size. Returns nil for other source types or when estimation fails.
444
+ # Shared foundation for batch and doc estimates. Performs a model.count
445
+ # with a soft timeout to avoid blocking on slow tables.
433
446
  #
434
447
  # @param klass [Class] a {SearchEngine::Base} subclass
435
- # @return [Integer, nil] estimated total batch count or nil if not estimable
436
- def estimate_total_batches(klass)
448
+ # @return [Integer, nil] record count or nil if not estimable
449
+ def estimate_source_record_count(klass)
437
450
  return nil if SearchEngine.config.indexer.estimate_progress == false
438
451
  return nil unless klass.is_a?(Class)
439
452
 
@@ -447,22 +460,39 @@ module SearchEngine
447
460
  model = source_def.dig(:options, :model)
448
461
  return nil unless model.respond_to?(:count)
449
462
 
450
- batch_size = source_def.dig(:options, :batch_size)
451
- batch_size ||= SearchEngine.config.sources.active_record.batch_size
452
- batch_size = batch_size.to_i
453
- return nil unless batch_size.positive?
463
+ count_with_timeout(model, 10)
464
+ rescue StandardError
465
+ nil
466
+ end
454
467
 
455
- begin
456
- total_records = count_with_timeout(model, 10)
457
- return nil unless total_records&.positive?
468
+ # Estimate total batch count for progress logging.
469
+ #
470
+ # @param klass [Class] a {SearchEngine::Base} subclass
471
+ # @return [Integer, nil] estimated total batch count or nil if not estimable
472
+ def estimate_total_batches(klass)
473
+ total_records = estimate_source_record_count(klass)
474
+ return nil unless total_records&.positive?
458
475
 
459
- (total_records.to_f / batch_size).ceil
460
- rescue StandardError
461
- nil
462
- end
476
+ batch_size = batch_size_for_klass(klass)
477
+ return nil unless batch_size&.positive?
478
+
479
+ (total_records.to_f / batch_size).ceil
480
+ rescue StandardError
481
+ nil
463
482
  end
464
483
 
465
- public :estimate_total_batches
484
+ # Estimate total document count for doc-based progress tracking.
485
+ #
486
+ # @param klass [Class] a {SearchEngine::Base} subclass
487
+ # @return [Integer, nil] estimated total docs or nil if not estimable
488
+ def estimate_total_docs(klass)
489
+ count = estimate_source_record_count(klass)
490
+ count&.positive? ? count : nil
491
+ rescue StandardError
492
+ nil
493
+ end
494
+
495
+ public :estimate_total_batches, :estimate_total_docs
466
496
 
467
497
  # Thread-based soft timeout for model.count, avoiding Timeout.timeout
468
498
  # which can corrupt ActiveRecord connection state.
@@ -497,6 +527,14 @@ module SearchEngine
497
527
  klass.instance_variable_get(:@__mapper_dsl__)
498
528
  end
499
529
 
530
+ def batch_size_for_klass(klass)
531
+ dsl = mapper_dsl_for_klass(klass)
532
+ source_def = dsl&.dig(:source)
533
+ batch_size = source_def&.dig(:options, :batch_size)
534
+ batch_size ||= SearchEngine.config.sources.active_record.batch_size
535
+ batch_size.to_i
536
+ end
537
+
500
538
  # Import a single batch with error handling and recursive 413 splitting.
501
539
  #
502
540
  # Public wrapper that delegates to the internal method with batch_index set to nil,
@@ -11,19 +11,28 @@ module SearchEngine
11
11
 
12
12
  # Execute a block that posts work to a thread pool, then wait for completion.
13
13
  #
14
- # Normal path: graceful shutdown → long wait.
14
+ # Normal path: graceful shutdown → long wait → :ok.
15
+ # Timeout: graceful wait expires → pool.kill → :timed_out.
15
16
  # Interrupt: on_interrupt callback → pool.kill → short wait → re-raise.
16
17
  # Other error: ensure kill → short wait.
17
18
  #
18
19
  # @param pool [Concurrent::FixedThreadPool]
19
20
  # @param on_interrupt [Proc, nil] callback invoked before killing the pool
21
+ # @param timeout [Integer, nil] override for graceful-shutdown timeout (seconds);
22
+ # defaults to {GRACEFUL_TIMEOUT} when nil
20
23
  # @yield block that posts work to the pool
21
- # @return [void]
22
- def self.run(pool, on_interrupt: nil)
24
+ # @return [Symbol] :ok on clean completion, :timed_out when the graceful timeout was exceeded
25
+ def self.run(pool, on_interrupt: nil, timeout: nil)
23
26
  yield
24
27
  pool.shutdown
25
- pool.wait_for_termination(GRACEFUL_TIMEOUT) || pool.kill
26
- pool.wait_for_termination(CLEANUP_TIMEOUT)
28
+ effective_timeout = timeout || GRACEFUL_TIMEOUT
29
+ completed = pool.wait_for_termination(effective_timeout)
30
+ unless completed
31
+ pool.kill
32
+ pool.wait_for_termination(CLEANUP_TIMEOUT)
33
+ return :timed_out
34
+ end
35
+ :ok
27
36
  rescue Interrupt
28
37
  on_interrupt&.call
29
38
  pool.kill
@@ -16,7 +16,7 @@ module SearchEngine
16
16
  # using {PartitionProgress.line}, preserving CI/pipe compatibility.
17
17
  #
18
18
  # @example
19
- # renderer = LiveRenderer.new(labels: parts.map(&:inspect), per_partition_estimate: 50)
19
+ # renderer = LiveRenderer.new(labels: parts.map(&:inspect), per_partition_docs_estimate: 5000)
20
20
  # renderer.start
21
21
  # parts.each_with_index do |part, i|
22
22
  # renderer[i].start
@@ -34,9 +34,12 @@ module SearchEngine
34
34
 
35
35
  # @param labels [Array<String>] display label for each slot (partition key)
36
36
  # @param partitions [Array, nil] raw partition values for non-TTY output (defaults to labels)
37
- # @param per_partition_estimate [Integer, nil] estimated batches per partition (for progress bars)
37
+ # @param per_partition_docs_estimates [Array<Integer, nil>, nil] per-slot doc estimates (takes priority)
38
+ # @param per_partition_docs_estimate [Integer, nil] uniform doc estimate for all slots (fallback)
39
+ # @param per_partition_estimate [Integer, nil] deprecated batch-based estimate (last resort fallback)
38
40
  # @param io [IO] output stream (defaults to $stdout)
39
- def initialize(labels:, partitions: nil, per_partition_estimate: nil, io: $stdout)
41
+ def initialize(labels:, partitions: nil, per_partition_docs_estimates: nil,
42
+ per_partition_docs_estimate: nil, per_partition_estimate: nil, io: $stdout)
40
43
  @io = io
41
44
  @tty = Color.enabled?
42
45
  @mutex = Mutex.new
@@ -46,8 +49,13 @@ module SearchEngine
46
49
  @rendered_once = false
47
50
  nontty_cb = @tty ? nil : method(:flush_nontty_slot)
48
51
  raw = partitions || labels
52
+ per_slot = per_partition_docs_estimates || []
53
+ global_est = per_partition_docs_estimate || per_partition_estimate
49
54
  @slots = labels.each_with_index.map do |label, idx|
50
- Slot.new(label: label, partition: raw[idx], estimate: per_partition_estimate, on_done: nontty_cb)
55
+ Slot.new(
56
+ label: label, partition: raw[idx], docs_estimate: per_slot[idx] || global_est,
57
+ on_done: nontty_cb
58
+ )
51
59
  end
52
60
  @viewport = resolve_viewport
53
61
  end
@@ -238,12 +246,12 @@ module SearchEngine
238
246
 
239
247
  # @param label [String] partition display label (e.g. partition key inspect)
240
248
  # @param partition [Object, nil] raw partition value for non-TTY output (defaults to label)
241
- # @param estimate [Integer, nil] estimated total batches for progress bar
249
+ # @param docs_estimate [Integer, nil] estimated total docs for doc-based progress bar
242
250
  # @param on_done [Proc, nil] callback invoked after finish/finish_error (non-TTY flush)
243
- def initialize(label:, partition: nil, estimate: nil, on_done: nil)
251
+ def initialize(label:, partition: nil, docs_estimate: nil, on_done: nil)
244
252
  @label = label
245
253
  @partition = partition.nil? ? label : partition
246
- @estimate = estimate
254
+ @docs_estimate = docs_estimate
247
255
  @on_done = on_done
248
256
  @state = :pending
249
257
  @batches_done = 0
@@ -387,15 +395,16 @@ module SearchEngine
387
395
  end
388
396
 
389
397
  def build_progress_part
390
- if @estimate&.positive? && @batches_done.positive?
391
- ratio = @batches_done.to_f / @estimate
398
+ if @docs_estimate&.positive? && @docs_total.positive?
399
+ ratio = @docs_total.to_f / @docs_estimate
392
400
  pct = [100, (ratio * 100).round].min
393
401
  filled = [(ratio * BAR_WIDTH).round, BAR_WIDTH].min
394
402
  empty = BAR_WIDTH - filled
395
403
  bar = "\u2588" * filled + "\u2591" * empty
396
- "#{bar} #{pct}% #{@batches_done}/#{@estimate} batches (#{@docs_total} docs)"
404
+ batch_info = @batches_done.positive? ? "#{@batches_done} batches " : ''
405
+ "#{bar} #{pct}% #{batch_info}(#{@docs_total}/#{@docs_estimate} docs)"
397
406
  elsif @batches_done.positive?
398
- "#{@batches_done} batches, #{@docs_total} docs"
407
+ "#{@batches_done} batches (#{@docs_total} docs)"
399
408
  else
400
409
  ''
401
410
  end
@@ -76,8 +76,50 @@ module SearchEngine
76
76
  end
77
77
  end
78
78
 
79
+ # Auto-detect doc count for a partition from the partition_fetch result.
80
+ # Counts the same scope that partition_fetch will enumerate — single source of truth.
81
+ #
82
+ # @param partition [Object] partition key
83
+ # @return [Integer, nil] doc count or nil when unavailable
84
+ def partition_doc_count(partition)
85
+ count = auto_count_from_fetch(partition)
86
+ count.is_a?(Integer) && count.positive? ? count : nil
87
+ rescue StandardError
88
+ nil
89
+ end
90
+
79
91
  private
80
92
 
93
+ # Call partition_fetch_proc and try to extract a countable relation.
94
+ # Calling the proc only builds a lazy AR object — no batch queries fire.
95
+ # The single COUNT query is the only DB cost.
96
+ def auto_count_from_fetch(partition)
97
+ return nil unless @partition_fetch_proc
98
+
99
+ result = @partition_fetch_proc.call(partition)
100
+ extract_count(result)
101
+ rescue StandardError
102
+ nil
103
+ end
104
+
105
+ def extract_count(result)
106
+ relation = countable_relation_from(result)
107
+ relation&.count
108
+ rescue StandardError
109
+ nil
110
+ end
111
+
112
+ def countable_relation_from(result)
113
+ if defined?(ActiveRecord::Batches::BatchEnumerator) &&
114
+ result.is_a?(ActiveRecord::Batches::BatchEnumerator)
115
+ return result.instance_variable_get(:@relation)
116
+ end
117
+
118
+ return result if defined?(ActiveRecord::Relation) && result.is_a?(ActiveRecord::Relation)
119
+
120
+ nil
121
+ end
122
+
81
123
  def validate_hook_arity!(proc_obj, name:)
82
124
  ar = proc_obj.arity
83
125
  return if ar == 1 || ar.negative?
@@ -3,5 +3,5 @@
3
3
  module SearchEngine
4
4
  # Current gem version.
5
5
  # @return [String]
6
- VERSION = '30.1.6.6'
6
+ VERSION = '30.1.6.8'
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search-engine-for-typesense
3
3
  version: !ruby/object:Gem::Version
4
- version: 30.1.6.6
4
+ version: 30.1.6.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nikita Shkoda