search-engine-for-typesense 30.1.8.16 → 30.1.8.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fd4cd9a02cea20dbf913f71c55c1ae57b77d29245be9e1f02e93a22e791cadcf
4
- data.tar.gz: 404f2b78d290227c1fdfcd0b82cfcd7d41f4ec353d5e551a20658cd9a78ff593
3
+ metadata.gz: d799dd8ebda35a7687a0043fa6c2e64405ff6a4adac15756f2483ecf1c8ea963
4
+ data.tar.gz: e408f30ec0d0cab5a053866f6db1985c9dd2e369555fc9412a9321ee3ac1ba5e
5
5
  SHA512:
6
- metadata.gz: 166cb097643cfb4376a1232eaab0e4f371fd1b3c409a9103f2a3d812dd1ba74f9812e13986e542abcbd086fe4cdcf05db3bca799ddaac2e5ee94215db5d83098
7
- data.tar.gz: 5a57d41c35a28022ca8384f8f7924b5d26e7210ac3a3681ccc8a9dcfaf7cba90ba5db6ea3914d09d2f9c19efa6fe43947778a086ecafc13fddc0a4ab1e751d5e
6
+ metadata.gz: 5b72688894bdb836651521e2217d305266096a18a4e616a6dc508e00c19ace5f2170baa79e127220ebd5fdb6ca21477b19a856c66d3e91b4b28d9fcec48f96d1
7
+ data.tar.gz: 13f44c384d0485688f399799efaab02645f1cc2b261359c13b7f688a853a536f9a57fc3e6fadea4fead6222114887d639ead85150184b93c2d2832cf74624d9f
data/README.md CHANGED
@@ -222,6 +222,11 @@ SearchEngine.configure do |c|
222
222
  c.postgres_outbox.channel = "search_engine_outbox"
223
223
  c.postgres_outbox.queue_name = "search_engine"
224
224
  c.postgres_outbox.batch_size = 1000
225
+ c.postgres_outbox.batch_sizes = {
226
+ product_balances: 10_000,
227
+ calculated_products: 1_000,
228
+ products: 2_000
229
+ }
225
230
  c.postgres_outbox.drain_target_parallelism = 1
226
231
  c.postgres_outbox.drain_job_max_batches = 1
227
232
  c.postgres_outbox.drain_job_max_runtime_s = nil
@@ -241,6 +246,10 @@ SearchEngine.configure do |c|
241
246
  end
242
247
  ```
243
248
 
249
+ `batch_size` is the global fallback for all collections. Use `batch_sizes` when some collections are much
250
+ lighter or heavier than others. Omitted drain limits use the per-collection values; explicit `limit:`
251
+ arguments still override the map and use one global cap for that drain.
252
+
244
253
  Generate and edit the migrations:
245
254
 
246
255
  ```bash
@@ -22,7 +22,7 @@ module SearchEngine
22
22
  return perform_with_drain_slot(limit: limit, target_key: target_key, drain_slot: drain_slot)
23
23
  end
24
24
 
25
- effective_limit = limit || SearchEngine.config.postgres_outbox.batch_size
25
+ effective_limit = drain_limit(limit)
26
26
  drainer = drainer_for(target_key)
27
27
  summary = drainer.drain_once(limit: effective_limit)
28
28
  enqueue_continuation(limit: limit, target_key: target_key) if continue_draining?(summary, effective_limit)
@@ -40,7 +40,7 @@ module SearchEngine
40
40
  def perform_with_drain_slot(limit:, target_key:, drain_slot:)
41
41
  target = delivery_target_for!(target_key)
42
42
  slot = drain_slot.to_i
43
- effective_limit = limit || SearchEngine.config.postgres_outbox.batch_size
43
+ effective_limit = drain_limit(limit)
44
44
  repository = repository_for_slot
45
45
  slot_requeued = false
46
46
  return stale_slot_summary(target.key, slot) unless repository.start_drain_slot!(
@@ -100,7 +100,7 @@ module SearchEngine
100
100
  end
101
101
 
102
102
  def continue_draining?(summary, effective_limit)
103
- summary[:continue] || summary[:claimed].to_i >= effective_limit.to_i
103
+ summary[:continue] || (!effective_limit.nil? && summary[:claimed].to_i >= effective_limit.to_i)
104
104
  end
105
105
 
106
106
  def drainer_for(target_key)
@@ -184,6 +184,13 @@ module SearchEngine
184
184
  [SearchEngine.config.postgres_outbox.drain_job_max_batches.to_i, 1].max
185
185
  end
186
186
 
187
+ def drain_limit(limit)
188
+ return limit unless limit.nil?
189
+ return nil if SearchEngine.config.postgres_outbox.collection_batch_sizes?
190
+
191
+ SearchEngine.config.postgres_outbox.batch_size
192
+ end
193
+
187
194
  def runtime_budget_exhausted?
188
195
  max_runtime_s = SearchEngine.config.postgres_outbox.drain_job_max_runtime_s.to_i
189
196
  return false unless max_runtime_s.positive?
@@ -2,8 +2,8 @@
2
2
 
3
3
  require 'active_support/concern'
4
4
  require 'set'
5
- require 'json'
6
5
  require 'search_engine/indexer/batch_planner'
6
+ require 'search_engine/indexer/import_response_parser'
7
7
 
8
8
  module SearchEngine
9
9
  class Base
@@ -531,67 +531,8 @@ module SearchEngine
531
531
  result
532
532
  end
533
533
 
534
- def safe_parse_json(str)
535
- JSON.parse(str)
536
- rescue StandardError
537
- nil
538
- end
539
-
540
534
  def parse_import_response(raw)
541
- return parse_import_response_from_string(raw) if raw.is_a?(String)
542
- return parse_import_response_from_array(raw) if raw.is_a?(Array)
543
-
544
- [0, 0, []]
545
- end
546
-
547
- def parse_import_response_from_string(str)
548
- success = 0
549
- failure = 0
550
- samples = []
551
-
552
- str.each_line do |line|
553
- line = line.strip
554
- next if line.empty?
555
-
556
- h = safe_parse_json(line)
557
- unless h
558
- failure += 1
559
- samples << 'invalid-json-line'
560
- next
561
- end
562
-
563
- if truthy?(h['success'] || h[:success])
564
- success += 1
565
- else
566
- failure += 1
567
- msg = h['error'] || h[:error] || h['message'] || h[:message]
568
- samples << msg.to_s[0, 200] if msg
569
- end
570
- end
571
-
572
- [success, failure, samples[0, 5]]
573
- end
574
-
575
- def parse_import_response_from_array(arr)
576
- success = 0
577
- failure = 0
578
- samples = []
579
-
580
- arr.each do |h|
581
- if h.is_a?(Hash) && truthy?(h['success'] || h[:success])
582
- success += 1
583
- else
584
- failure += 1
585
- msg = h.is_a?(Hash) ? (h['error'] || h[:error] || h['message'] || h[:message]) : nil
586
- samples << msg.to_s[0, 200] if msg
587
- end
588
- end
589
-
590
- [success, failure, samples[0, 5]]
591
- end
592
-
593
- def truthy?(val)
594
- val == true || val.to_s.downcase == 'true'
535
+ SearchEngine::Indexer::ImportResponseParser.parse(raw)
595
536
  end
596
537
 
597
538
  def normalize_records_input(records)
@@ -280,6 +280,8 @@ module SearchEngine
280
280
  attr_accessor :queue_name
281
281
  # @return [Integer] maximum events to claim per processing batch
282
282
  attr_accessor :batch_size
283
+ # @return [Hash] optional per-collection processing batch sizes
284
+ attr_accessor :batch_sizes
283
285
  # @return [Integer] maximum processing attempts before leaving an event failed
284
286
  attr_accessor :max_attempts
285
287
  # @return [Integer] polling interval in seconds
@@ -317,6 +319,7 @@ module SearchEngine
317
319
  @channel = 'search_engine_outbox'
318
320
  @queue_name = 'search_engine'
319
321
  @batch_size = 1000
322
+ @batch_sizes = {}
320
323
  @max_attempts = 10
321
324
  @poll_interval_s = 5
322
325
  @listener_wait_timeout_s = 30
@@ -332,6 +335,34 @@ module SearchEngine
332
335
  @drain_job_max_batches = 1
333
336
  @drain_job_max_runtime_s = nil
334
337
  end
338
+
339
+ # Resolve the processing batch size for a collection.
340
+ #
341
+ # @param collection [String, Symbol, nil] collection name
342
+ # @return [Integer] positive per-collection batch size or global fallback
343
+ def batch_size_for(collection)
344
+ configured = normalized_batch_sizes[collection.to_s]
345
+ return configured if configured&.positive?
346
+
347
+ batch_size.to_i
348
+ end
349
+
350
+ # Whether any positive per-collection batch sizes are configured.
351
+ #
352
+ # @return [Boolean]
353
+ def collection_batch_sizes?
354
+ normalized_batch_sizes.any?
355
+ end
356
+
357
+ # Normalized positive per-collection batch sizes keyed by collection name.
358
+ #
359
+ # @return [Hash<String, Integer>]
360
+ def normalized_batch_sizes
361
+ Hash(batch_sizes).each_with_object({}) do |(key, value), result|
362
+ size = value.to_i
363
+ result[key.to_s] = size if size.positive?
364
+ end
365
+ end
335
366
  end
336
367
 
337
368
  # Lightweight nested configuration for observability/logging.
@@ -886,6 +917,7 @@ module SearchEngine
886
917
  channel: postgres_outbox.channel,
887
918
  queue_name: postgres_outbox.queue_name,
888
919
  batch_size: postgres_outbox.batch_size,
920
+ batch_sizes: postgres_outbox.batch_sizes,
889
921
  max_attempts: postgres_outbox.max_attempts,
890
922
  poll_interval_s: postgres_outbox.poll_interval_s,
891
923
  listener_wait_timeout_s: postgres_outbox.listener_wait_timeout_s,
@@ -28,7 +28,7 @@ module SearchEngine
28
28
  docs.each_with_index do |raw, idx|
29
29
  doc = ensure_hash_document(raw)
30
30
  ensure_id!(doc)
31
- doc[:doc_updated_at] = now_i if doc.is_a?(Hash)
31
+ assign_doc_updated_at!(doc, now_i)
32
32
  buffer << JSON.generate(doc)
33
33
  buffer << "\n" if idx < (size - 1)
34
34
  count += 1
@@ -65,6 +65,15 @@ module SearchEngine
65
65
  has_id = doc.key?(:id) || doc.key?('id')
66
66
  raise SearchEngine::Errors::InvalidParams, 'document is missing required id' unless has_id
67
67
  end
68
+
69
+ def assign_doc_updated_at!(doc, timestamp)
70
+ if doc.key?('doc_updated_at')
71
+ doc.delete(:doc_updated_at)
72
+ doc['doc_updated_at'] = timestamp
73
+ else
74
+ doc[:doc_updated_at] = timestamp
75
+ end
76
+ end
68
77
  end
69
78
  end
70
79
  end
@@ -24,8 +24,10 @@ module SearchEngine
24
24
  def parse(raw)
25
25
  return parse_from_string(raw) if raw.is_a?(String)
26
26
  return parse_from_array(raw) if raw.is_a?(Array)
27
+ return [0, 0, []] if raw.nil?
27
28
 
28
- [0, 0, []]
29
+ raise SearchEngine::Errors::InvalidParams,
30
+ "Unsupported Typesense import response shape: #{raw.class.name}"
29
31
  end
30
32
 
31
33
  def parse_from_string(str)
@@ -69,6 +69,13 @@ module SearchEngine
69
69
  kwargs = { target_key: slot.fetch(:target_key), drain_slot: slot.fetch(:slot) }
70
70
  kwargs[:limit] = limit unless limit.nil?
71
71
  job.perform_later(**kwargs)
72
+ rescue StandardError => error
73
+ repository.release_requeued_drain_slot!(
74
+ target_key: slot.fetch(:target_key),
75
+ slot: slot.fetch(:slot),
76
+ error: error
77
+ )
78
+ raise
72
79
  end
73
80
 
74
81
  def delivery_targets
@@ -22,7 +22,7 @@ module SearchEngine
22
22
  # Claim, coalesce, order, process, and mark one batch.
23
23
  # @param limit [Integer]
24
24
  # @return [Hash]
25
- def drain_once(limit: SearchEngine.config.postgres_outbox.batch_size)
25
+ def drain_once(limit: nil)
26
26
  SearchEngine::Instrumentation.instrument(
27
27
  'search_engine.postgres_outbox.drain',
28
28
  drain_payload(limit)
@@ -31,7 +31,7 @@ module SearchEngine
31
31
  summary = empty_summary(events)
32
32
  next summary if events.empty?
33
33
 
34
- summary[:continue] = true if continue_after_nonempty_target_batch?
34
+ summary[:continue] = true if continue_after_nonempty_batch?
35
35
 
36
36
  kept, superseded_ids = coalesce(events)
37
37
  repository.mark_superseded!(superseded_ids)
@@ -61,8 +61,8 @@ module SearchEngine
61
61
  summary
62
62
  end
63
63
 
64
- def continue_after_nonempty_target_batch?
65
- !target_key.nil?
64
+ def continue_after_nonempty_batch?
65
+ !target_key.nil? || SearchEngine.config.postgres_outbox.collection_batch_sizes?
66
66
  end
67
67
 
68
68
  def coalesce(events)
@@ -42,6 +42,9 @@ module SearchEngine
42
42
  add_index table_name,
43
43
  %i[collection document_id status id],
44
44
  name: 'idx_search_engine_outbox_coalescing'
45
+ add_index table_name,
46
+ %i[collection document_id id],
47
+ name: 'idx_search_engine_outbox_coalesce_lookup'
45
48
  add_index table_name,
46
49
  :locked_at,
47
50
  name: 'idx_search_engine_outbox_processing',
@@ -24,7 +24,7 @@ module SearchEngine
24
24
  rows = []
25
25
 
26
26
  connection.transaction do
27
- rows = select_rows(claim_select_sql(limit.to_i))
27
+ rows = select_rows(claim_select_sql(limit))
28
28
  ids = rows.map { |row| row_value(row, :id) }
29
29
  execute(supersede_older_pending_sql(rows)) unless rows.empty?
30
30
  execute(claim_update_sql(ids, worker_id)) unless ids.empty?
@@ -111,13 +111,13 @@ module SearchEngine
111
111
 
112
112
  # Create missing delivery rows for all configured delivery targets.
113
113
  # @return [void]
114
- def materialize_deliveries!(limit: SearchEngine.config.postgres_outbox.batch_size)
114
+ def materialize_deliveries!(limit: nil)
115
115
  targets = materialization_delivery_targets
116
116
  return if targets.empty?
117
117
 
118
118
  rows = []
119
119
  connection.transaction do
120
- rows = select_rows(delivery_materialization_select_sql(limit.to_i, targets))
120
+ rows = select_rows(delivery_materialization_select_sql(limit, targets))
121
121
  next if rows.empty?
122
122
 
123
123
  execute(materialization_supersede_older_deliveries_sql(rows, targets))
@@ -267,6 +267,9 @@ module SearchEngine
267
267
  end
268
268
 
269
269
  def delivery_materialization_select_sql(limit, targets)
270
+ return collection_limited_materialization_select_sql(targets) if collection_limited_batch?(limit)
271
+
272
+ limit = global_limit_for(limit)
270
273
  <<~SQL
271
274
  WITH target(target_key, queue_name) AS (
272
275
  VALUES #{delivery_target_values_sql(targets)}
@@ -346,7 +349,7 @@ module SearchEngine
346
349
  rows = []
347
350
 
348
351
  connection.transaction do
349
- rows = select_rows(delivery_claim_select_sql(limit.to_i))
352
+ rows = select_rows(delivery_claim_select_sql(limit))
350
353
  delivery_ids = rows.map { |row| row_value(row, :delivery_id) }
351
354
  execute(delivery_supersede_older_pending_sql(rows)) unless rows.empty?
352
355
  execute(delivery_claim_update_sql(delivery_ids, worker_id)) unless delivery_ids.empty?
@@ -369,6 +372,9 @@ module SearchEngine
369
372
  end
370
373
 
371
374
  def claim_select_sql(limit)
375
+ return collection_limited_claim_select_sql if collection_limited_batch?(limit)
376
+
377
+ limit = global_limit_for(limit)
372
378
  <<~SQL
373
379
  WITH ranked_pending AS (
374
380
  SELECT id,
@@ -410,6 +416,9 @@ module SearchEngine
410
416
  end
411
417
 
412
418
  def delivery_claim_select_sql(limit)
419
+ return collection_limited_delivery_claim_select_sql if collection_limited_batch?(limit)
420
+
421
+ limit = global_limit_for(limit)
413
422
  <<~SQL
414
423
  WITH ranked_pending AS (
415
424
  SELECT deliveries.id AS delivery_id,
@@ -461,26 +470,33 @@ module SearchEngine
461
470
 
462
471
  def materialization_supersede_older_deliveries_sql(rows, targets)
463
472
  <<~SQL
464
- WITH updated_deliveries AS (
473
+ WITH latest(collection, document_id, id) AS (
474
+ VALUES #{coalesce_values_sql(rows)}
475
+ ),
476
+ target(target_key, queue_name) AS (
477
+ VALUES #{delivery_target_values_sql(targets)}
478
+ ),
479
+ older_event_targets AS MATERIALIZED (
480
+ SELECT older_events.id AS event_id,
481
+ target.target_key
482
+ FROM latest
483
+ CROSS JOIN target
484
+ INNER JOIN #{quoted_table} older_events
485
+ ON older_events.collection = latest.collection
486
+ AND older_events.document_id = latest.document_id
487
+ AND older_events.id < latest.id
488
+ ),
489
+ updated_deliveries AS (
465
490
  UPDATE #{quoted_delivery_table} older_deliveries
466
491
  SET status = 'superseded',
467
492
  processed_at = CURRENT_TIMESTAMP,
468
493
  locked_at = NULL,
469
494
  locked_by = NULL,
470
495
  updated_at = CURRENT_TIMESTAMP
471
- FROM #{quoted_table} older_events,
472
- (
473
- VALUES #{coalesce_values_sql(rows)}
474
- ) AS latest(collection, document_id, id),
475
- (
476
- VALUES #{delivery_target_values_sql(targets)}
477
- ) AS target(target_key, queue_name)
478
- WHERE older_deliveries.event_id = older_events.id
496
+ FROM older_event_targets
497
+ WHERE older_deliveries.event_id = older_event_targets.event_id
479
498
  AND older_deliveries.status = 'pending'
480
- AND older_deliveries.target_key = target.target_key
481
- AND older_events.collection = latest.collection
482
- AND older_events.document_id = latest.document_id
483
- AND older_events.id < latest.id
499
+ AND older_deliveries.target_key = older_event_targets.target_key
484
500
  RETURNING older_deliveries.event_id
485
501
  ),
486
502
  aggregate AS (
@@ -519,23 +535,29 @@ module SearchEngine
519
535
 
520
536
  def delivery_supersede_older_pending_sql(rows)
521
537
  <<~SQL
522
- WITH updated_deliveries AS (
538
+ WITH latest(target_key, collection, document_id, event_id, delivery_id) AS (
539
+ VALUES #{delivery_coalesce_values_sql(rows)}
540
+ ),
541
+ older_event_targets AS MATERIALIZED (
542
+ SELECT older_events.id AS event_id,
543
+ latest.target_key
544
+ FROM latest
545
+ INNER JOIN #{quoted_table} older_events
546
+ ON older_events.collection = latest.collection
547
+ AND older_events.document_id = latest.document_id
548
+ AND older_events.id < latest.event_id
549
+ ),
550
+ updated_deliveries AS (
523
551
  UPDATE #{quoted_delivery_table} older_deliveries
524
552
  SET status = 'superseded',
525
553
  processed_at = CURRENT_TIMESTAMP,
526
554
  locked_at = NULL,
527
555
  locked_by = NULL,
528
556
  updated_at = CURRENT_TIMESTAMP
529
- FROM #{quoted_table} older_events,
530
- (
531
- VALUES #{delivery_coalesce_values_sql(rows)}
532
- ) AS latest(target_key, collection, document_id, event_id, delivery_id)
533
- WHERE older_deliveries.event_id = older_events.id
557
+ FROM older_event_targets
558
+ WHERE older_deliveries.event_id = older_event_targets.event_id
534
559
  AND older_deliveries.status = 'pending'
535
- AND older_deliveries.target_key = latest.target_key
536
- AND older_events.collection = latest.collection
537
- AND older_events.document_id = latest.document_id
538
- AND older_events.id < latest.event_id
560
+ AND older_deliveries.target_key = older_event_targets.target_key
539
561
  RETURNING older_deliveries.event_id
540
562
  ),
541
563
  aggregate AS (
@@ -864,6 +886,200 @@ module SearchEngine
864
886
  [delay.to_i, 0].max
865
887
  end
866
888
 
889
+ def collection_limited_claim_select_sql
890
+ <<~SQL
891
+ WITH #{collection_limits_cte_sql},
892
+ ranked_pending AS (
893
+ SELECT id,
894
+ ROW_NUMBER() OVER (
895
+ PARTITION BY collection, document_id
896
+ ORDER BY id DESC
897
+ ) AS row_number
898
+ FROM #{quoted_table}
899
+ WHERE status = 'pending'
900
+ ),
901
+ latest_due AS (
902
+ SELECT outbox.id,
903
+ outbox.collection
904
+ FROM #{quoted_table} outbox
905
+ INNER JOIN ranked_pending
906
+ ON ranked_pending.id = outbox.id
907
+ WHERE ranked_pending.row_number = 1
908
+ AND (outbox.next_attempt_at IS NULL OR outbox.next_attempt_at <= CURRENT_TIMESTAMP)
909
+ ORDER BY outbox.id ASC
910
+ LIMIT #{collection_limited_candidate_limit}
911
+ ),
912
+ ranked_by_collection AS (
913
+ SELECT latest_due.id,
914
+ ROW_NUMBER() OVER (
915
+ PARTITION BY latest_due.collection
916
+ ORDER BY latest_due.id ASC
917
+ ) AS collection_row_number,
918
+ COALESCE(collection_limits.batch_size, #{global_limit_for(nil)}) AS collection_batch_size
919
+ FROM latest_due
920
+ LEFT JOIN collection_limits
921
+ ON collection_limits.collection = latest_due.collection
922
+ ),
923
+ selected_due AS (
924
+ SELECT id
925
+ FROM ranked_by_collection
926
+ WHERE collection_row_number <= collection_batch_size
927
+ )
928
+ SELECT outbox.*
929
+ FROM #{quoted_table} outbox
930
+ INNER JOIN selected_due
931
+ ON selected_due.id = outbox.id
932
+ ORDER BY outbox.id ASC
933
+ FOR UPDATE SKIP LOCKED
934
+ SQL
935
+ end
936
+
937
+ def collection_limited_materialization_select_sql(targets)
938
+ <<~SQL
939
+ WITH target(target_key, queue_name) AS (
940
+ VALUES #{delivery_target_values_sql(targets)}
941
+ ),
942
+ #{collection_limits_cte_sql},
943
+ candidate_events AS MATERIALIZED (
944
+ SELECT outbox.id,
945
+ outbox.collection,
946
+ outbox.document_id
947
+ FROM #{quoted_table} outbox
948
+ WHERE outbox.status IN ('pending', 'processing', 'failed')
949
+ AND (outbox.next_attempt_at IS NULL OR outbox.next_attempt_at <= CURRENT_TIMESTAMP)
950
+ AND EXISTS (
951
+ SELECT 1
952
+ FROM target
953
+ WHERE NOT EXISTS (
954
+ SELECT 1
955
+ FROM #{quoted_delivery_table} deliveries
956
+ WHERE deliveries.event_id = outbox.id
957
+ AND deliveries.target_key = target.target_key
958
+ )
959
+ )
960
+ ORDER BY outbox.id ASC
961
+ LIMIT #{collection_limited_candidate_limit}
962
+ FOR UPDATE SKIP LOCKED
963
+ ),
964
+ latest_candidate_ids AS (
965
+ SELECT id,
966
+ collection
967
+ FROM (
968
+ SELECT id,
969
+ collection,
970
+ ROW_NUMBER() OVER (
971
+ PARTITION BY collection, document_id
972
+ ORDER BY id DESC
973
+ ) AS row_number
974
+ FROM candidate_events
975
+ ) ranked_candidate_events
976
+ WHERE row_number = 1
977
+ ),
978
+ ranked_by_collection AS (
979
+ SELECT latest_candidate_ids.id,
980
+ ROW_NUMBER() OVER (
981
+ PARTITION BY latest_candidate_ids.collection
982
+ ORDER BY latest_candidate_ids.id ASC
983
+ ) AS collection_row_number,
984
+ COALESCE(collection_limits.batch_size, #{global_limit_for(nil)}) AS collection_batch_size
985
+ FROM latest_candidate_ids
986
+ LEFT JOIN collection_limits
987
+ ON collection_limits.collection = latest_candidate_ids.collection
988
+ ),
989
+ selected_candidate_ids AS (
990
+ SELECT id
991
+ FROM ranked_by_collection
992
+ WHERE collection_row_number <= collection_batch_size
993
+ )
994
+ SELECT outbox.*
995
+ FROM #{quoted_table} outbox
996
+ INNER JOIN selected_candidate_ids
997
+ ON selected_candidate_ids.id = outbox.id
998
+ ORDER BY outbox.id ASC
999
+ SQL
1000
+ end
1001
+
1002
+ def collection_limited_delivery_claim_select_sql
1003
+ <<~SQL
1004
+ WITH #{collection_limits_cte_sql},
1005
+ ranked_pending AS (
1006
+ SELECT deliveries.id AS delivery_id,
1007
+ events.id AS event_id,
1008
+ events.collection,
1009
+ ROW_NUMBER() OVER (
1010
+ PARTITION BY deliveries.target_key, events.collection, events.document_id
1011
+ ORDER BY events.id DESC, deliveries.id DESC
1012
+ ) AS row_number
1013
+ FROM #{quoted_delivery_table} deliveries
1014
+ INNER JOIN #{quoted_table} events
1015
+ ON events.id = deliveries.event_id
1016
+ WHERE deliveries.target_key = #{quote(target_key)}
1017
+ AND deliveries.status = 'pending'
1018
+ ),
1019
+ latest_due AS (
1020
+ SELECT deliveries.id,
1021
+ ranked_pending.collection
1022
+ FROM #{quoted_delivery_table} deliveries
1023
+ INNER JOIN ranked_pending
1024
+ ON ranked_pending.delivery_id = deliveries.id
1025
+ WHERE ranked_pending.row_number = 1
1026
+ AND (deliveries.next_attempt_at IS NULL OR deliveries.next_attempt_at <= CURRENT_TIMESTAMP)
1027
+ ORDER BY deliveries.id ASC
1028
+ LIMIT #{collection_limited_candidate_limit}
1029
+ ),
1030
+ ranked_by_collection AS (
1031
+ SELECT latest_due.id,
1032
+ ROW_NUMBER() OVER (
1033
+ PARTITION BY latest_due.collection
1034
+ ORDER BY latest_due.id ASC
1035
+ ) AS collection_row_number,
1036
+ COALESCE(collection_limits.batch_size, #{global_limit_for(nil)}) AS collection_batch_size
1037
+ FROM latest_due
1038
+ LEFT JOIN collection_limits
1039
+ ON collection_limits.collection = latest_due.collection
1040
+ ),
1041
+ selected_due AS (
1042
+ SELECT id
1043
+ FROM ranked_by_collection
1044
+ WHERE collection_row_number <= collection_batch_size
1045
+ )
1046
+ SELECT events.*,
1047
+ deliveries.id AS delivery_id,
1048
+ deliveries.target_key,
1049
+ deliveries.attempts AS delivery_attempts
1050
+ FROM #{quoted_delivery_table} deliveries
1051
+ INNER JOIN #{quoted_table} events
1052
+ ON events.id = deliveries.event_id
1053
+ INNER JOIN selected_due
1054
+ ON selected_due.id = deliveries.id
1055
+ ORDER BY deliveries.id ASC
1056
+ FOR UPDATE SKIP LOCKED
1057
+ SQL
1058
+ end
1059
+
1060
+ def collection_limited_batch?(limit)
1061
+ limit.nil? && SearchEngine.config.postgres_outbox.collection_batch_sizes?
1062
+ end
1063
+
1064
+ def global_limit_for(limit)
1065
+ (limit || SearchEngine.config.postgres_outbox.batch_size).to_i
1066
+ end
1067
+
1068
+ def collection_limited_candidate_limit
1069
+ batch_size_sum = SearchEngine.config.postgres_outbox.normalized_batch_sizes.values.sum
1070
+ [global_limit_for(nil), 1].max + batch_size_sum
1071
+ end
1072
+
1073
+ def collection_limits_cte_sql
1074
+ "collection_limits(collection, batch_size) AS (VALUES #{collection_batch_size_values_sql})"
1075
+ end
1076
+
1077
+ def collection_batch_size_values_sql
1078
+ SearchEngine.config.postgres_outbox.normalized_batch_sizes.sort.map do |collection, size|
1079
+ "(#{quote(collection)}, #{size})"
1080
+ end.join(', ')
1081
+ end
1082
+
867
1083
  def truncate_error(error)
868
1084
  message = error.respond_to?(:message) ? error.message : error.to_s
869
1085
  message.to_s[0, ERROR_LIMIT]
@@ -3,5 +3,5 @@
3
3
  module SearchEngine
4
4
  # Current gem version.
5
5
  # @return [String]
6
- VERSION = '30.1.8.16'
6
+ VERSION = '30.1.8.18'
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search-engine-for-typesense
3
3
  version: !ruby/object:Gem::Version
4
- version: 30.1.8.16
4
+ version: 30.1.8.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nikita Shkoda