search-engine-for-typesense 30.1.6.6 → 30.1.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 685951054b6b65954ff6517a08176c8bec124c197980dec5fcfc0ebbd19f6691
4
- data.tar.gz: 12e290d5e45325cb031b06f87d158b33300fdc8682d3a417ba7191940e35211e
3
+ metadata.gz: 17890bf666142ee9e0cd293c1ea5fc13dcf4089c50bb1e721708b7c9972f5e15
4
+ data.tar.gz: 84866d6a6d238f29e3eda054f445a88780c98ab9eff75673f6930c044a336a02
5
5
  SHA512:
6
- metadata.gz: f7e7e8ab135067d43fd2cffed2dfc0fdeac7f3861c267dc47d5131c2600a7e5adee5b9267d5b326517cfe32c526a0e67d13d614312183c59ff0e496def671990
7
- data.tar.gz: dcb0366fa3ddd2dbe03bc50dec6032093761eba588ae2e88116e2b939cbc8f62ea442c0857448b3d69d20cdf7eccc98b0f5d9d1ee2b2d6ce593453539d889b97
6
+ metadata.gz: 01aa100a2d5a612c56a85cd6f4e02d232476911ceea18cd7f024904a477874ab492d675d0ad9624212a4891e15e0a17371cc2c9314b38bb2bcb332e36212faba
7
+ data.tar.gz: fb6ba4067d04afdb1cc0bc2236c0040832fc6632d8cd0320080c77b7b9b6fa8d9b8ce6dca99ab725757ca4124107f67570ad2281e03404193c746258becef127
@@ -270,9 +270,9 @@ module SearchEngine
270
270
  if compiled
271
271
  parts = Array(compiled.partitions)
272
272
  max_p = compiled.max_parallel.to_i
273
- return __se_index_partitions_seq!(parts, into) if max_p <= 1 || parts.size <= 1
273
+ return __se_index_partitions_seq!(parts, into, compiled) if max_p <= 1 || parts.size <= 1
274
274
 
275
- __se_index_partitions_parallel!(parts, into, max_p)
275
+ __se_index_partitions_parallel!(parts, into, max_p, compiled)
276
276
  else
277
277
  summary = SearchEngine::Indexer.rebuild_partition!(self, partition: nil, into: into)
278
278
  __se_build_index_result([summary])
@@ -314,10 +314,10 @@ module SearchEngine
314
314
 
315
315
  class_methods do
316
316
  # Sequential processing of partition list with live progress rendering.
317
- def __se_index_partitions_seq!(parts, into)
318
- estimate = __se_per_partition_estimate(parts.size)
317
+ def __se_index_partitions_seq!(parts, into, compiled)
318
+ docs_estimates = __se_per_partition_docs_estimates(parts, compiled)
319
319
  renderer = SearchEngine::Logging::LiveRenderer.new(
320
- labels: parts.map(&:inspect), partitions: parts, per_partition_estimate: estimate
320
+ labels: parts.map(&:inspect), partitions: parts, per_partition_docs_estimates: docs_estimates
321
321
  )
322
322
  renderer.start
323
323
 
@@ -351,12 +351,12 @@ module SearchEngine
351
351
 
352
352
  class_methods do
353
353
  # Parallel processing via bounded thread pool with live progress rendering.
354
- def __se_index_partitions_parallel!(parts, into, max_p)
354
+ def __se_index_partitions_parallel!(parts, into, max_p, compiled)
355
355
  require 'concurrent-ruby'
356
356
 
357
- estimate = __se_per_partition_estimate(parts.size)
357
+ docs_estimates = __se_per_partition_docs_estimates(parts, compiled)
358
358
  renderer = SearchEngine::Logging::LiveRenderer.new(
359
- labels: parts.map(&:inspect), partitions: parts, per_partition_estimate: estimate
359
+ labels: parts.map(&:inspect), partitions: parts, per_partition_docs_estimates: docs_estimates
360
360
  )
361
361
  renderer.start
362
362
 
@@ -414,11 +414,27 @@ module SearchEngine
414
414
  end
415
415
 
416
416
  class_methods do
417
- # Heuristic per-partition batch estimate for progress bars.
417
+ # Build an array of per-partition doc estimates.
418
+ # Tries per-partition counting (auto-detected from AR fetch result),
419
+ # fills any nils with the equal-split heuristic fallback.
420
+ #
421
+ # @param parts [Array] partition keys
422
+ # @param compiled [SearchEngine::Partitioner::Compiled, nil]
423
+ # @return [Array<Integer, nil>]
424
+ def __se_per_partition_docs_estimates(parts, compiled)
425
+ estimates = parts.map { |part| compiled&.partition_doc_count(part) }
426
+
427
+ heuristic = __se_heuristic_docs_estimate(parts.size)
428
+ parts.each_index.map { |i| estimates[i] || heuristic }
429
+ rescue StandardError
430
+ Array.new(parts.size, nil)
431
+ end
432
+
433
+ # Equal-split heuristic: total_docs / partition_count.
418
434
  # @param partition_count [Integer]
419
435
  # @return [Integer, nil]
420
- def __se_per_partition_estimate(partition_count)
421
- total = SearchEngine::Indexer::BulkImport.estimate_total_batches(self)
436
+ def __se_heuristic_docs_estimate(partition_count)
437
+ total = SearchEngine::Indexer::BulkImport.estimate_total_docs(self)
422
438
  return nil unless total
423
439
 
424
440
  (total.to_f / partition_count).ceil
@@ -426,7 +442,7 @@ module SearchEngine
426
442
  nil
427
443
  end
428
444
 
429
- private :__se_per_partition_estimate
445
+ private :__se_per_partition_docs_estimates, :__se_heuristic_docs_estimate
430
446
  end
431
447
 
432
448
  class_methods do
@@ -72,7 +72,7 @@ module SearchEngine
72
72
  failed_total = 0
73
73
  failed_batches_total = 0
74
74
  batches_total = 0
75
- # Capture start time before processing any batches to measure total wall-clock duration
75
+ source_batches_done = 0
76
76
  started_at = monotonic_ms
77
77
 
78
78
  docs_enum.each do |raw_batch|
@@ -95,11 +95,13 @@ module SearchEngine
95
95
  batches << stats
96
96
  validate_soft_batch_size!(batch_size, stats[:docs_count])
97
97
  log_batch(stats, batches_total) if log_batches
98
- on_batch&.call(
99
- batches_done: batches_total, docs_total: docs_total,
100
- success_total: success_total, failed_total: failed_total
101
- )
102
98
  end
99
+
100
+ source_batches_done += 1
101
+ on_batch&.call(
102
+ batches_done: source_batches_done, docs_total: docs_total,
103
+ success_total: success_total, failed_total: failed_total
104
+ )
103
105
  end
104
106
 
105
107
  # Calculate total duration as wall-clock time from start to finish (not sum of batch durations)
@@ -195,6 +197,7 @@ module SearchEngine
195
197
  failed_total: 0,
196
198
  failed_batches_total: 0,
197
199
  batches_total: 0,
200
+ source_batches_done: 0,
198
201
  idx_counter: -1,
199
202
  started_at: monotonic_ms,
200
203
  mtx: Mutex.new,
@@ -314,12 +317,11 @@ module SearchEngine
314
317
  # @return [void]
315
318
  def process_single_batch_parallel(raw_batch:, into:, action:, retry_policy:, batch_size:, log_batches:,
316
319
  shared_state:)
317
- # Each thread gets its own resources
318
320
  thread_client = SearchEngine.client
319
321
  thread_buffer = +''
320
322
  thread_idx = shared_state[:mtx].synchronize { shared_state[:idx_counter] += 1 }
321
323
 
322
- snapshots = begin
324
+ snapshot = begin
323
325
  stats_list = import_batch_with_handling(
324
326
  client: thread_client,
325
327
  collection: into,
@@ -332,6 +334,8 @@ module SearchEngine
332
334
 
333
335
  shared_state[:mtx].synchronize do
334
336
  aggregate_stats(stats_list, shared_state, batch_size, log_batches)
337
+ shared_state[:source_batches_done] += 1
338
+ progress_snapshot(shared_state)
335
339
  end
336
340
  rescue StandardError => error
337
341
  docs_count = begin
@@ -346,27 +350,26 @@ module SearchEngine
346
350
  err_msg = " batch_index=#{thread_idx} → error=#{error.class}: #{error.message.to_s[0, 200]}"
347
351
  warn(SearchEngine::Logging::Color.apply(err_msg, :red))
348
352
  aggregate_stats([failure_stat], shared_state, batch_size, log_batches)
353
+ shared_state[:source_batches_done] += 1
354
+ progress_snapshot(shared_state)
349
355
  end
350
356
  end
351
357
 
352
- on_batch = shared_state[:on_batch]
353
- snapshots&.each { |snap| on_batch&.call(**snap) }
358
+ shared_state[:on_batch]&.call(**snapshot) if snapshot
354
359
  end
355
360
 
356
361
  # Aggregate batch statistics thread-safely into shared state.
357
362
  #
358
363
  # Must be called within a mutex synchronization block. Updates counters,
359
364
  # appends to batches array, validates batch size, and optionally logs.
360
- # Returns counter snapshots (one per stats entry) for firing callbacks
361
- # outside the lock.
362
365
  #
363
366
  # @param stats_list [Array<Hash>] array of stats hashes from batch processing
364
367
  # @param shared_state [Hash] shared state hash to update (must be mutex-protected)
365
368
  # @param batch_size [Integer, nil] soft guard for logging when exceeded
366
369
  # @param log_batches [Boolean] whether to log each batch as it completes
367
- # @return [Array<Hash>] counter snapshots suitable for on_batch callbacks
370
+ # @return [void]
368
371
  def aggregate_stats(stats_list, shared_state, batch_size, log_batches)
369
- stats_list.map do |stats|
372
+ stats_list.each do |stats|
370
373
  shared_state[:docs_total] += stats[:docs_count].to_i
371
374
  shared_state[:success_total] += stats[:success_count].to_i
372
375
  shared_state[:failed_total] += stats[:failure_count].to_i
@@ -375,13 +378,23 @@ module SearchEngine
375
378
  shared_state[:batches] << stats
376
379
  validate_soft_batch_size!(batch_size, stats[:docs_count])
377
380
  log_batch(stats, shared_state[:batches_total]) if log_batches
378
- {
379
- batches_done: shared_state[:batches_total], docs_total: shared_state[:docs_total],
380
- success_total: shared_state[:success_total], failed_total: shared_state[:failed_total]
381
- }
382
381
  end
383
382
  end
384
383
 
384
+ # Build a progress snapshot from shared state for the on_batch callback.
385
+ # Must be called within a mutex synchronization block.
386
+ #
387
+ # @param shared_state [Hash] shared state hash (must be mutex-protected)
388
+ # @return [Hash] progress counters keyed by :batches_done, :docs_total, etc.
389
+ def progress_snapshot(shared_state)
390
+ {
391
+ batches_done: shared_state[:source_batches_done],
392
+ docs_total: shared_state[:docs_total],
393
+ success_total: shared_state[:success_total],
394
+ failed_total: shared_state[:failed_total]
395
+ }
396
+ end
397
+
385
398
  # Build a Summary object from aggregated shared state.
386
399
  #
387
400
  # Calculates total duration and constructs a Summary with all aggregated
@@ -426,14 +439,14 @@ module SearchEngine
426
439
  enum.is_a?(Enumerator) ? enum : enum.each
427
440
  end
428
441
 
429
- # Estimate total batch count for progress logging.
442
+ # Estimate total source record count for the given model class.
430
443
  #
431
- # Attempts to estimate batch count for ActiveRecord sources by counting records
432
- # and dividing by batch_size. Returns nil for other source types or when estimation fails.
444
+ # Shared foundation for batch and doc estimates. Performs a model.count
445
+ # with a soft timeout to avoid blocking on slow tables.
433
446
  #
434
447
  # @param klass [Class] a {SearchEngine::Base} subclass
435
- # @return [Integer, nil] estimated total batch count or nil if not estimable
436
- def estimate_total_batches(klass)
448
+ # @return [Integer, nil] record count or nil if not estimable
449
+ def estimate_source_record_count(klass)
437
450
  return nil if SearchEngine.config.indexer.estimate_progress == false
438
451
  return nil unless klass.is_a?(Class)
439
452
 
@@ -447,22 +460,39 @@ module SearchEngine
447
460
  model = source_def.dig(:options, :model)
448
461
  return nil unless model.respond_to?(:count)
449
462
 
450
- batch_size = source_def.dig(:options, :batch_size)
451
- batch_size ||= SearchEngine.config.sources.active_record.batch_size
452
- batch_size = batch_size.to_i
453
- return nil unless batch_size.positive?
463
+ count_with_timeout(model, 10)
464
+ rescue StandardError
465
+ nil
466
+ end
454
467
 
455
- begin
456
- total_records = count_with_timeout(model, 10)
457
- return nil unless total_records&.positive?
468
+ # Estimate total batch count for progress logging.
469
+ #
470
+ # @param klass [Class] a {SearchEngine::Base} subclass
471
+ # @return [Integer, nil] estimated total batch count or nil if not estimable
472
+ def estimate_total_batches(klass)
473
+ total_records = estimate_source_record_count(klass)
474
+ return nil unless total_records&.positive?
458
475
 
459
- (total_records.to_f / batch_size).ceil
460
- rescue StandardError
461
- nil
462
- end
476
+ batch_size = batch_size_for_klass(klass)
477
+ return nil unless batch_size&.positive?
478
+
479
+ (total_records.to_f / batch_size).ceil
480
+ rescue StandardError
481
+ nil
463
482
  end
464
483
 
465
- public :estimate_total_batches
484
+ # Estimate total document count for doc-based progress tracking.
485
+ #
486
+ # @param klass [Class] a {SearchEngine::Base} subclass
487
+ # @return [Integer, nil] estimated total docs or nil if not estimable
488
+ def estimate_total_docs(klass)
489
+ count = estimate_source_record_count(klass)
490
+ count&.positive? ? count : nil
491
+ rescue StandardError
492
+ nil
493
+ end
494
+
495
+ public :estimate_total_batches, :estimate_total_docs
466
496
 
467
497
  # Thread-based soft timeout for model.count, avoiding Timeout.timeout
468
498
  # which can corrupt ActiveRecord connection state.
@@ -497,6 +527,14 @@ module SearchEngine
497
527
  klass.instance_variable_get(:@__mapper_dsl__)
498
528
  end
499
529
 
530
+ def batch_size_for_klass(klass)
531
+ dsl = mapper_dsl_for_klass(klass)
532
+ source_def = dsl&.dig(:source)
533
+ batch_size = source_def&.dig(:options, :batch_size)
534
+ batch_size ||= SearchEngine.config.sources.active_record.batch_size
535
+ batch_size.to_i
536
+ end
537
+
500
538
  # Import a single batch with error handling and recursive 413 splitting.
501
539
  #
502
540
  # Public wrapper that delegates to the internal method with batch_index set to nil,
@@ -16,7 +16,7 @@ module SearchEngine
16
16
  # using {PartitionProgress.line}, preserving CI/pipe compatibility.
17
17
  #
18
18
  # @example
19
- # renderer = LiveRenderer.new(labels: parts.map(&:inspect), per_partition_estimate: 50)
19
+ # renderer = LiveRenderer.new(labels: parts.map(&:inspect), per_partition_docs_estimate: 5000)
20
20
  # renderer.start
21
21
  # parts.each_with_index do |part, i|
22
22
  # renderer[i].start
@@ -34,9 +34,12 @@ module SearchEngine
34
34
 
35
35
  # @param labels [Array<String>] display label for each slot (partition key)
36
36
  # @param partitions [Array, nil] raw partition values for non-TTY output (defaults to labels)
37
- # @param per_partition_estimate [Integer, nil] estimated batches per partition (for progress bars)
37
+ # @param per_partition_docs_estimates [Array<Integer, nil>, nil] per-slot doc estimates (takes priority)
38
+ # @param per_partition_docs_estimate [Integer, nil] uniform doc estimate for all slots (fallback)
39
+ # @param per_partition_estimate [Integer, nil] deprecated batch-based estimate (last resort fallback)
38
40
  # @param io [IO] output stream (defaults to $stdout)
39
- def initialize(labels:, partitions: nil, per_partition_estimate: nil, io: $stdout)
41
+ def initialize(labels:, partitions: nil, per_partition_docs_estimates: nil,
42
+ per_partition_docs_estimate: nil, per_partition_estimate: nil, io: $stdout)
40
43
  @io = io
41
44
  @tty = Color.enabled?
42
45
  @mutex = Mutex.new
@@ -46,8 +49,13 @@ module SearchEngine
46
49
  @rendered_once = false
47
50
  nontty_cb = @tty ? nil : method(:flush_nontty_slot)
48
51
  raw = partitions || labels
52
+ per_slot = per_partition_docs_estimates || []
53
+ global_est = per_partition_docs_estimate || per_partition_estimate
49
54
  @slots = labels.each_with_index.map do |label, idx|
50
- Slot.new(label: label, partition: raw[idx], estimate: per_partition_estimate, on_done: nontty_cb)
55
+ Slot.new(
56
+ label: label, partition: raw[idx], docs_estimate: per_slot[idx] || global_est,
57
+ on_done: nontty_cb
58
+ )
51
59
  end
52
60
  @viewport = resolve_viewport
53
61
  end
@@ -238,12 +246,12 @@ module SearchEngine
238
246
 
239
247
  # @param label [String] partition display label (e.g. partition key inspect)
240
248
  # @param partition [Object, nil] raw partition value for non-TTY output (defaults to label)
241
- # @param estimate [Integer, nil] estimated total batches for progress bar
249
+ # @param docs_estimate [Integer, nil] estimated total docs for doc-based progress bar
242
250
  # @param on_done [Proc, nil] callback invoked after finish/finish_error (non-TTY flush)
243
- def initialize(label:, partition: nil, estimate: nil, on_done: nil)
251
+ def initialize(label:, partition: nil, docs_estimate: nil, on_done: nil)
244
252
  @label = label
245
253
  @partition = partition.nil? ? label : partition
246
- @estimate = estimate
254
+ @docs_estimate = docs_estimate
247
255
  @on_done = on_done
248
256
  @state = :pending
249
257
  @batches_done = 0
@@ -387,15 +395,16 @@ module SearchEngine
387
395
  end
388
396
 
389
397
  def build_progress_part
390
- if @estimate&.positive? && @batches_done.positive?
391
- ratio = @batches_done.to_f / @estimate
398
+ if @docs_estimate&.positive? && @docs_total.positive?
399
+ ratio = @docs_total.to_f / @docs_estimate
392
400
  pct = [100, (ratio * 100).round].min
393
401
  filled = [(ratio * BAR_WIDTH).round, BAR_WIDTH].min
394
402
  empty = BAR_WIDTH - filled
395
403
  bar = "\u2588" * filled + "\u2591" * empty
396
- "#{bar} #{pct}% #{@batches_done}/#{@estimate} batches (#{@docs_total} docs)"
404
+ batch_info = @batches_done.positive? ? "#{@batches_done} batches " : ''
405
+ "#{bar} #{pct}% #{batch_info}(#{@docs_total}/#{@docs_estimate} docs)"
397
406
  elsif @batches_done.positive?
398
- "#{@batches_done} batches, #{@docs_total} docs"
407
+ "#{@batches_done} batches (#{@docs_total} docs)"
399
408
  else
400
409
  ''
401
410
  end
@@ -76,8 +76,50 @@ module SearchEngine
76
76
  end
77
77
  end
78
78
 
79
+ # Auto-detect doc count for a partition from the partition_fetch result.
80
+ # Counts the same scope that partition_fetch will enumerate — single source of truth.
81
+ #
82
+ # @param partition [Object] partition key
83
+ # @return [Integer, nil] doc count or nil when unavailable
84
+ def partition_doc_count(partition)
85
+ count = auto_count_from_fetch(partition)
86
+ count.is_a?(Integer) && count.positive? ? count : nil
87
+ rescue StandardError
88
+ nil
89
+ end
90
+
79
91
  private
80
92
 
93
+ # Call partition_fetch_proc and try to extract a countable relation.
94
+ # Calling the proc only builds a lazy AR object — no batch queries fire.
95
+ # The single COUNT query is the only DB cost.
96
+ def auto_count_from_fetch(partition)
97
+ return nil unless @partition_fetch_proc
98
+
99
+ result = @partition_fetch_proc.call(partition)
100
+ extract_count(result)
101
+ rescue StandardError
102
+ nil
103
+ end
104
+
105
+ def extract_count(result)
106
+ relation = countable_relation_from(result)
107
+ relation&.count
108
+ rescue StandardError
109
+ nil
110
+ end
111
+
112
+ def countable_relation_from(result)
113
+ if defined?(ActiveRecord::Batches::BatchEnumerator) &&
114
+ result.is_a?(ActiveRecord::Batches::BatchEnumerator)
115
+ return result.instance_variable_get(:@relation)
116
+ end
117
+
118
+ return result if defined?(ActiveRecord::Relation) && result.is_a?(ActiveRecord::Relation)
119
+
120
+ nil
121
+ end
122
+
81
123
  def validate_hook_arity!(proc_obj, name:)
82
124
  ar = proc_obj.arity
83
125
  return if ar == 1 || ar.negative?
@@ -3,5 +3,5 @@
3
3
  module SearchEngine
4
4
  # Current gem version.
5
5
  # @return [String]
6
- VERSION = '30.1.6.6'
6
+ VERSION = '30.1.6.7'
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search-engine-for-typesense
3
3
  version: !ruby/object:Gem::Version
4
- version: 30.1.6.6
4
+ version: 30.1.6.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nikita Shkoda