search-engine-for-typesense 30.1.8.16 → 30.1.8.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -0
- data/app/search_engine/search_engine/postgres_outbox/drain_job.rb +10 -3
- data/lib/search_engine/base/creation.rb +2 -61
- data/lib/search_engine/config.rb +32 -0
- data/lib/search_engine/indexer/batch_planner.rb +10 -1
- data/lib/search_engine/indexer/import_response_parser.rb +3 -1
- data/lib/search_engine/postgres_outbox/drain_enqueuer.rb +7 -0
- data/lib/search_engine/postgres_outbox/drainer.rb +4 -4
- data/lib/search_engine/postgres_outbox/repository.rb +207 -4
- data/lib/search_engine/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3a268323694adc2817c238b7ae9f522a7685b55b240691fb5891a98b4bc3d371
|
|
4
|
+
data.tar.gz: 3fcdddf6f9bdd81ecca5aae6755e3d55f7d5a9fd52e50ed3ed2d5bbdaa163b53
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 88f09c6d71f2a6c91505b8d83e57b6e989de2f8fbbc477b7b35185996da5cb0eae918dcf3ed14f8c9f3c69760469b5da9d32da2336b5cd640bffc386e31869b0
|
|
7
|
+
data.tar.gz: ab29408bb29a73d210aa29c3db62bb3bf684b8105a6dbe5aaf918ed299f8849b3eadb8ecb406d30db11e2353941aafb5ec8cb58ae4c180ca684ad720a5a55697
|
data/README.md
CHANGED
|
@@ -222,6 +222,11 @@ SearchEngine.configure do |c|
|
|
|
222
222
|
c.postgres_outbox.channel = "search_engine_outbox"
|
|
223
223
|
c.postgres_outbox.queue_name = "search_engine"
|
|
224
224
|
c.postgres_outbox.batch_size = 1000
|
|
225
|
+
c.postgres_outbox.batch_sizes = {
|
|
226
|
+
product_balances: 10_000,
|
|
227
|
+
calculated_products: 1_000,
|
|
228
|
+
products: 2_000
|
|
229
|
+
}
|
|
225
230
|
c.postgres_outbox.drain_target_parallelism = 1
|
|
226
231
|
c.postgres_outbox.drain_job_max_batches = 1
|
|
227
232
|
c.postgres_outbox.drain_job_max_runtime_s = nil
|
|
@@ -241,6 +246,10 @@ SearchEngine.configure do |c|
|
|
|
241
246
|
end
|
|
242
247
|
```
|
|
243
248
|
|
|
249
|
+
`batch_size` is the global fallback for all collections. Use `batch_sizes` when some collections are much
|
|
250
|
+
lighter or heavier than others. Omitted drain limits use the per-collection values; explicit `limit:`
|
|
251
|
+
arguments still override the map and use one global cap for that drain.
|
|
252
|
+
|
|
244
253
|
Generate and edit the migrations:
|
|
245
254
|
|
|
246
255
|
```bash
|
|
@@ -22,7 +22,7 @@ module SearchEngine
|
|
|
22
22
|
return perform_with_drain_slot(limit: limit, target_key: target_key, drain_slot: drain_slot)
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
-
effective_limit = limit
|
|
25
|
+
effective_limit = drain_limit(limit)
|
|
26
26
|
drainer = drainer_for(target_key)
|
|
27
27
|
summary = drainer.drain_once(limit: effective_limit)
|
|
28
28
|
enqueue_continuation(limit: limit, target_key: target_key) if continue_draining?(summary, effective_limit)
|
|
@@ -40,7 +40,7 @@ module SearchEngine
|
|
|
40
40
|
def perform_with_drain_slot(limit:, target_key:, drain_slot:)
|
|
41
41
|
target = delivery_target_for!(target_key)
|
|
42
42
|
slot = drain_slot.to_i
|
|
43
|
-
effective_limit = limit
|
|
43
|
+
effective_limit = drain_limit(limit)
|
|
44
44
|
repository = repository_for_slot
|
|
45
45
|
slot_requeued = false
|
|
46
46
|
return stale_slot_summary(target.key, slot) unless repository.start_drain_slot!(
|
|
@@ -100,7 +100,7 @@ module SearchEngine
|
|
|
100
100
|
end
|
|
101
101
|
|
|
102
102
|
def continue_draining?(summary, effective_limit)
|
|
103
|
-
summary[:continue] || summary[:claimed].to_i >= effective_limit.to_i
|
|
103
|
+
summary[:continue] || (!effective_limit.nil? && summary[:claimed].to_i >= effective_limit.to_i)
|
|
104
104
|
end
|
|
105
105
|
|
|
106
106
|
def drainer_for(target_key)
|
|
@@ -184,6 +184,13 @@ module SearchEngine
|
|
|
184
184
|
[SearchEngine.config.postgres_outbox.drain_job_max_batches.to_i, 1].max
|
|
185
185
|
end
|
|
186
186
|
|
|
187
|
+
def drain_limit(limit)
|
|
188
|
+
return limit unless limit.nil?
|
|
189
|
+
return nil if SearchEngine.config.postgres_outbox.collection_batch_sizes?
|
|
190
|
+
|
|
191
|
+
SearchEngine.config.postgres_outbox.batch_size
|
|
192
|
+
end
|
|
193
|
+
|
|
187
194
|
def runtime_budget_exhausted?
|
|
188
195
|
max_runtime_s = SearchEngine.config.postgres_outbox.drain_job_max_runtime_s.to_i
|
|
189
196
|
return false unless max_runtime_s.positive?
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
require 'active_support/concern'
|
|
4
4
|
require 'set'
|
|
5
|
-
require 'json'
|
|
6
5
|
require 'search_engine/indexer/batch_planner'
|
|
6
|
+
require 'search_engine/indexer/import_response_parser'
|
|
7
7
|
|
|
8
8
|
module SearchEngine
|
|
9
9
|
class Base
|
|
@@ -531,67 +531,8 @@ module SearchEngine
|
|
|
531
531
|
result
|
|
532
532
|
end
|
|
533
533
|
|
|
534
|
-
def safe_parse_json(str)
|
|
535
|
-
JSON.parse(str)
|
|
536
|
-
rescue StandardError
|
|
537
|
-
nil
|
|
538
|
-
end
|
|
539
|
-
|
|
540
534
|
def parse_import_response(raw)
|
|
541
|
-
|
|
542
|
-
return parse_import_response_from_array(raw) if raw.is_a?(Array)
|
|
543
|
-
|
|
544
|
-
[0, 0, []]
|
|
545
|
-
end
|
|
546
|
-
|
|
547
|
-
def parse_import_response_from_string(str)
|
|
548
|
-
success = 0
|
|
549
|
-
failure = 0
|
|
550
|
-
samples = []
|
|
551
|
-
|
|
552
|
-
str.each_line do |line|
|
|
553
|
-
line = line.strip
|
|
554
|
-
next if line.empty?
|
|
555
|
-
|
|
556
|
-
h = safe_parse_json(line)
|
|
557
|
-
unless h
|
|
558
|
-
failure += 1
|
|
559
|
-
samples << 'invalid-json-line'
|
|
560
|
-
next
|
|
561
|
-
end
|
|
562
|
-
|
|
563
|
-
if truthy?(h['success'] || h[:success])
|
|
564
|
-
success += 1
|
|
565
|
-
else
|
|
566
|
-
failure += 1
|
|
567
|
-
msg = h['error'] || h[:error] || h['message'] || h[:message]
|
|
568
|
-
samples << msg.to_s[0, 200] if msg
|
|
569
|
-
end
|
|
570
|
-
end
|
|
571
|
-
|
|
572
|
-
[success, failure, samples[0, 5]]
|
|
573
|
-
end
|
|
574
|
-
|
|
575
|
-
def parse_import_response_from_array(arr)
|
|
576
|
-
success = 0
|
|
577
|
-
failure = 0
|
|
578
|
-
samples = []
|
|
579
|
-
|
|
580
|
-
arr.each do |h|
|
|
581
|
-
if h.is_a?(Hash) && truthy?(h['success'] || h[:success])
|
|
582
|
-
success += 1
|
|
583
|
-
else
|
|
584
|
-
failure += 1
|
|
585
|
-
msg = h.is_a?(Hash) ? (h['error'] || h[:error] || h['message'] || h[:message]) : nil
|
|
586
|
-
samples << msg.to_s[0, 200] if msg
|
|
587
|
-
end
|
|
588
|
-
end
|
|
589
|
-
|
|
590
|
-
[success, failure, samples[0, 5]]
|
|
591
|
-
end
|
|
592
|
-
|
|
593
|
-
def truthy?(val)
|
|
594
|
-
val == true || val.to_s.downcase == 'true'
|
|
535
|
+
SearchEngine::Indexer::ImportResponseParser.parse(raw)
|
|
595
536
|
end
|
|
596
537
|
|
|
597
538
|
def normalize_records_input(records)
|
data/lib/search_engine/config.rb
CHANGED
|
@@ -280,6 +280,8 @@ module SearchEngine
|
|
|
280
280
|
attr_accessor :queue_name
|
|
281
281
|
# @return [Integer] maximum events to claim per processing batch
|
|
282
282
|
attr_accessor :batch_size
|
|
283
|
+
# @return [Hash] optional per-collection processing batch sizes
|
|
284
|
+
attr_accessor :batch_sizes
|
|
283
285
|
# @return [Integer] maximum processing attempts before leaving an event failed
|
|
284
286
|
attr_accessor :max_attempts
|
|
285
287
|
# @return [Integer] polling interval in seconds
|
|
@@ -317,6 +319,7 @@ module SearchEngine
|
|
|
317
319
|
@channel = 'search_engine_outbox'
|
|
318
320
|
@queue_name = 'search_engine'
|
|
319
321
|
@batch_size = 1000
|
|
322
|
+
@batch_sizes = {}
|
|
320
323
|
@max_attempts = 10
|
|
321
324
|
@poll_interval_s = 5
|
|
322
325
|
@listener_wait_timeout_s = 30
|
|
@@ -332,6 +335,34 @@ module SearchEngine
|
|
|
332
335
|
@drain_job_max_batches = 1
|
|
333
336
|
@drain_job_max_runtime_s = nil
|
|
334
337
|
end
|
|
338
|
+
|
|
339
|
+
# Resolve the processing batch size for a collection.
|
|
340
|
+
#
|
|
341
|
+
# @param collection [String, Symbol, nil] collection name
|
|
342
|
+
# @return [Integer] positive per-collection batch size or global fallback
|
|
343
|
+
def batch_size_for(collection)
|
|
344
|
+
configured = normalized_batch_sizes[collection.to_s]
|
|
345
|
+
return configured if configured&.positive?
|
|
346
|
+
|
|
347
|
+
batch_size.to_i
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
# Whether any positive per-collection batch sizes are configured.
|
|
351
|
+
#
|
|
352
|
+
# @return [Boolean]
|
|
353
|
+
def collection_batch_sizes?
|
|
354
|
+
normalized_batch_sizes.any?
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Normalized positive per-collection batch sizes keyed by collection name.
|
|
358
|
+
#
|
|
359
|
+
# @return [Hash<String, Integer>]
|
|
360
|
+
def normalized_batch_sizes
|
|
361
|
+
Hash(batch_sizes).each_with_object({}) do |(key, value), result|
|
|
362
|
+
size = value.to_i
|
|
363
|
+
result[key.to_s] = size if size.positive?
|
|
364
|
+
end
|
|
365
|
+
end
|
|
335
366
|
end
|
|
336
367
|
|
|
337
368
|
# Lightweight nested configuration for observability/logging.
|
|
@@ -886,6 +917,7 @@ module SearchEngine
|
|
|
886
917
|
channel: postgres_outbox.channel,
|
|
887
918
|
queue_name: postgres_outbox.queue_name,
|
|
888
919
|
batch_size: postgres_outbox.batch_size,
|
|
920
|
+
batch_sizes: postgres_outbox.batch_sizes,
|
|
889
921
|
max_attempts: postgres_outbox.max_attempts,
|
|
890
922
|
poll_interval_s: postgres_outbox.poll_interval_s,
|
|
891
923
|
listener_wait_timeout_s: postgres_outbox.listener_wait_timeout_s,
|
|
@@ -28,7 +28,7 @@ module SearchEngine
|
|
|
28
28
|
docs.each_with_index do |raw, idx|
|
|
29
29
|
doc = ensure_hash_document(raw)
|
|
30
30
|
ensure_id!(doc)
|
|
31
|
-
doc
|
|
31
|
+
assign_doc_updated_at!(doc, now_i)
|
|
32
32
|
buffer << JSON.generate(doc)
|
|
33
33
|
buffer << "\n" if idx < (size - 1)
|
|
34
34
|
count += 1
|
|
@@ -65,6 +65,15 @@ module SearchEngine
|
|
|
65
65
|
has_id = doc.key?(:id) || doc.key?('id')
|
|
66
66
|
raise SearchEngine::Errors::InvalidParams, 'document is missing required id' unless has_id
|
|
67
67
|
end
|
|
68
|
+
|
|
69
|
+
def assign_doc_updated_at!(doc, timestamp)
|
|
70
|
+
if doc.key?('doc_updated_at')
|
|
71
|
+
doc.delete(:doc_updated_at)
|
|
72
|
+
doc['doc_updated_at'] = timestamp
|
|
73
|
+
else
|
|
74
|
+
doc[:doc_updated_at] = timestamp
|
|
75
|
+
end
|
|
76
|
+
end
|
|
68
77
|
end
|
|
69
78
|
end
|
|
70
79
|
end
|
|
@@ -24,8 +24,10 @@ module SearchEngine
|
|
|
24
24
|
def parse(raw)
|
|
25
25
|
return parse_from_string(raw) if raw.is_a?(String)
|
|
26
26
|
return parse_from_array(raw) if raw.is_a?(Array)
|
|
27
|
+
return [0, 0, []] if raw.nil?
|
|
27
28
|
|
|
28
|
-
|
|
29
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
30
|
+
"Unsupported Typesense import response shape: #{raw.class.name}"
|
|
29
31
|
end
|
|
30
32
|
|
|
31
33
|
def parse_from_string(str)
|
|
@@ -69,6 +69,13 @@ module SearchEngine
|
|
|
69
69
|
kwargs = { target_key: slot.fetch(:target_key), drain_slot: slot.fetch(:slot) }
|
|
70
70
|
kwargs[:limit] = limit unless limit.nil?
|
|
71
71
|
job.perform_later(**kwargs)
|
|
72
|
+
rescue StandardError => error
|
|
73
|
+
repository.release_requeued_drain_slot!(
|
|
74
|
+
target_key: slot.fetch(:target_key),
|
|
75
|
+
slot: slot.fetch(:slot),
|
|
76
|
+
error: error
|
|
77
|
+
)
|
|
78
|
+
raise
|
|
72
79
|
end
|
|
73
80
|
|
|
74
81
|
def delivery_targets
|
|
@@ -22,7 +22,7 @@ module SearchEngine
|
|
|
22
22
|
# Claim, coalesce, order, process, and mark one batch.
|
|
23
23
|
# @param limit [Integer]
|
|
24
24
|
# @return [Hash]
|
|
25
|
-
def drain_once(limit:
|
|
25
|
+
def drain_once(limit: nil)
|
|
26
26
|
SearchEngine::Instrumentation.instrument(
|
|
27
27
|
'search_engine.postgres_outbox.drain',
|
|
28
28
|
drain_payload(limit)
|
|
@@ -31,7 +31,7 @@ module SearchEngine
|
|
|
31
31
|
summary = empty_summary(events)
|
|
32
32
|
next summary if events.empty?
|
|
33
33
|
|
|
34
|
-
summary[:continue] = true if
|
|
34
|
+
summary[:continue] = true if continue_after_nonempty_batch?
|
|
35
35
|
|
|
36
36
|
kept, superseded_ids = coalesce(events)
|
|
37
37
|
repository.mark_superseded!(superseded_ids)
|
|
@@ -61,8 +61,8 @@ module SearchEngine
|
|
|
61
61
|
summary
|
|
62
62
|
end
|
|
63
63
|
|
|
64
|
-
def
|
|
65
|
-
!target_key.nil?
|
|
64
|
+
def continue_after_nonempty_batch?
|
|
65
|
+
!target_key.nil? || SearchEngine.config.postgres_outbox.collection_batch_sizes?
|
|
66
66
|
end
|
|
67
67
|
|
|
68
68
|
def coalesce(events)
|
|
@@ -24,7 +24,7 @@ module SearchEngine
|
|
|
24
24
|
rows = []
|
|
25
25
|
|
|
26
26
|
connection.transaction do
|
|
27
|
-
rows = select_rows(claim_select_sql(limit
|
|
27
|
+
rows = select_rows(claim_select_sql(limit))
|
|
28
28
|
ids = rows.map { |row| row_value(row, :id) }
|
|
29
29
|
execute(supersede_older_pending_sql(rows)) unless rows.empty?
|
|
30
30
|
execute(claim_update_sql(ids, worker_id)) unless ids.empty?
|
|
@@ -111,13 +111,13 @@ module SearchEngine
|
|
|
111
111
|
|
|
112
112
|
# Create missing delivery rows for all configured delivery targets.
|
|
113
113
|
# @return [void]
|
|
114
|
-
def materialize_deliveries!(limit:
|
|
114
|
+
def materialize_deliveries!(limit: nil)
|
|
115
115
|
targets = materialization_delivery_targets
|
|
116
116
|
return if targets.empty?
|
|
117
117
|
|
|
118
118
|
rows = []
|
|
119
119
|
connection.transaction do
|
|
120
|
-
rows = select_rows(delivery_materialization_select_sql(limit
|
|
120
|
+
rows = select_rows(delivery_materialization_select_sql(limit, targets))
|
|
121
121
|
next if rows.empty?
|
|
122
122
|
|
|
123
123
|
execute(materialization_supersede_older_deliveries_sql(rows, targets))
|
|
@@ -267,6 +267,9 @@ module SearchEngine
|
|
|
267
267
|
end
|
|
268
268
|
|
|
269
269
|
def delivery_materialization_select_sql(limit, targets)
|
|
270
|
+
return collection_limited_materialization_select_sql(targets) if collection_limited_batch?(limit)
|
|
271
|
+
|
|
272
|
+
limit = global_limit_for(limit)
|
|
270
273
|
<<~SQL
|
|
271
274
|
WITH target(target_key, queue_name) AS (
|
|
272
275
|
VALUES #{delivery_target_values_sql(targets)}
|
|
@@ -346,7 +349,7 @@ module SearchEngine
|
|
|
346
349
|
rows = []
|
|
347
350
|
|
|
348
351
|
connection.transaction do
|
|
349
|
-
rows = select_rows(delivery_claim_select_sql(limit
|
|
352
|
+
rows = select_rows(delivery_claim_select_sql(limit))
|
|
350
353
|
delivery_ids = rows.map { |row| row_value(row, :delivery_id) }
|
|
351
354
|
execute(delivery_supersede_older_pending_sql(rows)) unless rows.empty?
|
|
352
355
|
execute(delivery_claim_update_sql(delivery_ids, worker_id)) unless delivery_ids.empty?
|
|
@@ -369,6 +372,9 @@ module SearchEngine
|
|
|
369
372
|
end
|
|
370
373
|
|
|
371
374
|
def claim_select_sql(limit)
|
|
375
|
+
return collection_limited_claim_select_sql if collection_limited_batch?(limit)
|
|
376
|
+
|
|
377
|
+
limit = global_limit_for(limit)
|
|
372
378
|
<<~SQL
|
|
373
379
|
WITH ranked_pending AS (
|
|
374
380
|
SELECT id,
|
|
@@ -410,6 +416,9 @@ module SearchEngine
|
|
|
410
416
|
end
|
|
411
417
|
|
|
412
418
|
def delivery_claim_select_sql(limit)
|
|
419
|
+
return collection_limited_delivery_claim_select_sql if collection_limited_batch?(limit)
|
|
420
|
+
|
|
421
|
+
limit = global_limit_for(limit)
|
|
413
422
|
<<~SQL
|
|
414
423
|
WITH ranked_pending AS (
|
|
415
424
|
SELECT deliveries.id AS delivery_id,
|
|
@@ -864,6 +873,200 @@ module SearchEngine
|
|
|
864
873
|
[delay.to_i, 0].max
|
|
865
874
|
end
|
|
866
875
|
|
|
876
|
+
def collection_limited_claim_select_sql
|
|
877
|
+
<<~SQL
|
|
878
|
+
WITH #{collection_limits_cte_sql},
|
|
879
|
+
ranked_pending AS (
|
|
880
|
+
SELECT id,
|
|
881
|
+
ROW_NUMBER() OVER (
|
|
882
|
+
PARTITION BY collection, document_id
|
|
883
|
+
ORDER BY id DESC
|
|
884
|
+
) AS row_number
|
|
885
|
+
FROM #{quoted_table}
|
|
886
|
+
WHERE status = 'pending'
|
|
887
|
+
),
|
|
888
|
+
latest_due AS (
|
|
889
|
+
SELECT outbox.id,
|
|
890
|
+
outbox.collection
|
|
891
|
+
FROM #{quoted_table} outbox
|
|
892
|
+
INNER JOIN ranked_pending
|
|
893
|
+
ON ranked_pending.id = outbox.id
|
|
894
|
+
WHERE ranked_pending.row_number = 1
|
|
895
|
+
AND (outbox.next_attempt_at IS NULL OR outbox.next_attempt_at <= CURRENT_TIMESTAMP)
|
|
896
|
+
ORDER BY outbox.id ASC
|
|
897
|
+
LIMIT #{collection_limited_candidate_limit}
|
|
898
|
+
),
|
|
899
|
+
ranked_by_collection AS (
|
|
900
|
+
SELECT latest_due.id,
|
|
901
|
+
ROW_NUMBER() OVER (
|
|
902
|
+
PARTITION BY latest_due.collection
|
|
903
|
+
ORDER BY latest_due.id ASC
|
|
904
|
+
) AS collection_row_number,
|
|
905
|
+
COALESCE(collection_limits.batch_size, #{global_limit_for(nil)}) AS collection_batch_size
|
|
906
|
+
FROM latest_due
|
|
907
|
+
LEFT JOIN collection_limits
|
|
908
|
+
ON collection_limits.collection = latest_due.collection
|
|
909
|
+
),
|
|
910
|
+
selected_due AS (
|
|
911
|
+
SELECT id
|
|
912
|
+
FROM ranked_by_collection
|
|
913
|
+
WHERE collection_row_number <= collection_batch_size
|
|
914
|
+
)
|
|
915
|
+
SELECT outbox.*
|
|
916
|
+
FROM #{quoted_table} outbox
|
|
917
|
+
INNER JOIN selected_due
|
|
918
|
+
ON selected_due.id = outbox.id
|
|
919
|
+
ORDER BY outbox.id ASC
|
|
920
|
+
FOR UPDATE SKIP LOCKED
|
|
921
|
+
SQL
|
|
922
|
+
end
|
|
923
|
+
|
|
924
|
+
def collection_limited_materialization_select_sql(targets)
|
|
925
|
+
<<~SQL
|
|
926
|
+
WITH target(target_key, queue_name) AS (
|
|
927
|
+
VALUES #{delivery_target_values_sql(targets)}
|
|
928
|
+
),
|
|
929
|
+
#{collection_limits_cte_sql},
|
|
930
|
+
candidate_events AS MATERIALIZED (
|
|
931
|
+
SELECT outbox.id,
|
|
932
|
+
outbox.collection,
|
|
933
|
+
outbox.document_id
|
|
934
|
+
FROM #{quoted_table} outbox
|
|
935
|
+
WHERE outbox.status IN ('pending', 'processing', 'failed')
|
|
936
|
+
AND (outbox.next_attempt_at IS NULL OR outbox.next_attempt_at <= CURRENT_TIMESTAMP)
|
|
937
|
+
AND EXISTS (
|
|
938
|
+
SELECT 1
|
|
939
|
+
FROM target
|
|
940
|
+
WHERE NOT EXISTS (
|
|
941
|
+
SELECT 1
|
|
942
|
+
FROM #{quoted_delivery_table} deliveries
|
|
943
|
+
WHERE deliveries.event_id = outbox.id
|
|
944
|
+
AND deliveries.target_key = target.target_key
|
|
945
|
+
)
|
|
946
|
+
)
|
|
947
|
+
ORDER BY outbox.id ASC
|
|
948
|
+
LIMIT #{collection_limited_candidate_limit}
|
|
949
|
+
FOR UPDATE SKIP LOCKED
|
|
950
|
+
),
|
|
951
|
+
latest_candidate_ids AS (
|
|
952
|
+
SELECT id,
|
|
953
|
+
collection
|
|
954
|
+
FROM (
|
|
955
|
+
SELECT id,
|
|
956
|
+
collection,
|
|
957
|
+
ROW_NUMBER() OVER (
|
|
958
|
+
PARTITION BY collection, document_id
|
|
959
|
+
ORDER BY id DESC
|
|
960
|
+
) AS row_number
|
|
961
|
+
FROM candidate_events
|
|
962
|
+
) ranked_candidate_events
|
|
963
|
+
WHERE row_number = 1
|
|
964
|
+
),
|
|
965
|
+
ranked_by_collection AS (
|
|
966
|
+
SELECT latest_candidate_ids.id,
|
|
967
|
+
ROW_NUMBER() OVER (
|
|
968
|
+
PARTITION BY latest_candidate_ids.collection
|
|
969
|
+
ORDER BY latest_candidate_ids.id ASC
|
|
970
|
+
) AS collection_row_number,
|
|
971
|
+
COALESCE(collection_limits.batch_size, #{global_limit_for(nil)}) AS collection_batch_size
|
|
972
|
+
FROM latest_candidate_ids
|
|
973
|
+
LEFT JOIN collection_limits
|
|
974
|
+
ON collection_limits.collection = latest_candidate_ids.collection
|
|
975
|
+
),
|
|
976
|
+
selected_candidate_ids AS (
|
|
977
|
+
SELECT id
|
|
978
|
+
FROM ranked_by_collection
|
|
979
|
+
WHERE collection_row_number <= collection_batch_size
|
|
980
|
+
)
|
|
981
|
+
SELECT outbox.*
|
|
982
|
+
FROM #{quoted_table} outbox
|
|
983
|
+
INNER JOIN selected_candidate_ids
|
|
984
|
+
ON selected_candidate_ids.id = outbox.id
|
|
985
|
+
ORDER BY outbox.id ASC
|
|
986
|
+
SQL
|
|
987
|
+
end
|
|
988
|
+
|
|
989
|
+
def collection_limited_delivery_claim_select_sql
|
|
990
|
+
<<~SQL
|
|
991
|
+
WITH #{collection_limits_cte_sql},
|
|
992
|
+
ranked_pending AS (
|
|
993
|
+
SELECT deliveries.id AS delivery_id,
|
|
994
|
+
events.id AS event_id,
|
|
995
|
+
events.collection,
|
|
996
|
+
ROW_NUMBER() OVER (
|
|
997
|
+
PARTITION BY deliveries.target_key, events.collection, events.document_id
|
|
998
|
+
ORDER BY events.id DESC, deliveries.id DESC
|
|
999
|
+
) AS row_number
|
|
1000
|
+
FROM #{quoted_delivery_table} deliveries
|
|
1001
|
+
INNER JOIN #{quoted_table} events
|
|
1002
|
+
ON events.id = deliveries.event_id
|
|
1003
|
+
WHERE deliveries.target_key = #{quote(target_key)}
|
|
1004
|
+
AND deliveries.status = 'pending'
|
|
1005
|
+
),
|
|
1006
|
+
latest_due AS (
|
|
1007
|
+
SELECT deliveries.id,
|
|
1008
|
+
ranked_pending.collection
|
|
1009
|
+
FROM #{quoted_delivery_table} deliveries
|
|
1010
|
+
INNER JOIN ranked_pending
|
|
1011
|
+
ON ranked_pending.delivery_id = deliveries.id
|
|
1012
|
+
WHERE ranked_pending.row_number = 1
|
|
1013
|
+
AND (deliveries.next_attempt_at IS NULL OR deliveries.next_attempt_at <= CURRENT_TIMESTAMP)
|
|
1014
|
+
ORDER BY deliveries.id ASC
|
|
1015
|
+
LIMIT #{collection_limited_candidate_limit}
|
|
1016
|
+
),
|
|
1017
|
+
ranked_by_collection AS (
|
|
1018
|
+
SELECT latest_due.id,
|
|
1019
|
+
ROW_NUMBER() OVER (
|
|
1020
|
+
PARTITION BY latest_due.collection
|
|
1021
|
+
ORDER BY latest_due.id ASC
|
|
1022
|
+
) AS collection_row_number,
|
|
1023
|
+
COALESCE(collection_limits.batch_size, #{global_limit_for(nil)}) AS collection_batch_size
|
|
1024
|
+
FROM latest_due
|
|
1025
|
+
LEFT JOIN collection_limits
|
|
1026
|
+
ON collection_limits.collection = latest_due.collection
|
|
1027
|
+
),
|
|
1028
|
+
selected_due AS (
|
|
1029
|
+
SELECT id
|
|
1030
|
+
FROM ranked_by_collection
|
|
1031
|
+
WHERE collection_row_number <= collection_batch_size
|
|
1032
|
+
)
|
|
1033
|
+
SELECT events.*,
|
|
1034
|
+
deliveries.id AS delivery_id,
|
|
1035
|
+
deliveries.target_key,
|
|
1036
|
+
deliveries.attempts AS delivery_attempts
|
|
1037
|
+
FROM #{quoted_delivery_table} deliveries
|
|
1038
|
+
INNER JOIN #{quoted_table} events
|
|
1039
|
+
ON events.id = deliveries.event_id
|
|
1040
|
+
INNER JOIN selected_due
|
|
1041
|
+
ON selected_due.id = deliveries.id
|
|
1042
|
+
ORDER BY deliveries.id ASC
|
|
1043
|
+
FOR UPDATE SKIP LOCKED
|
|
1044
|
+
SQL
|
|
1045
|
+
end
|
|
1046
|
+
|
|
1047
|
+
def collection_limited_batch?(limit)
|
|
1048
|
+
limit.nil? && SearchEngine.config.postgres_outbox.collection_batch_sizes?
|
|
1049
|
+
end
|
|
1050
|
+
|
|
1051
|
+
def global_limit_for(limit)
|
|
1052
|
+
(limit || SearchEngine.config.postgres_outbox.batch_size).to_i
|
|
1053
|
+
end
|
|
1054
|
+
|
|
1055
|
+
def collection_limited_candidate_limit
|
|
1056
|
+
batch_size_sum = SearchEngine.config.postgres_outbox.normalized_batch_sizes.values.sum
|
|
1057
|
+
[global_limit_for(nil), 1].max + batch_size_sum
|
|
1058
|
+
end
|
|
1059
|
+
|
|
1060
|
+
def collection_limits_cte_sql
|
|
1061
|
+
"collection_limits(collection, batch_size) AS (VALUES #{collection_batch_size_values_sql})"
|
|
1062
|
+
end
|
|
1063
|
+
|
|
1064
|
+
def collection_batch_size_values_sql
|
|
1065
|
+
SearchEngine.config.postgres_outbox.normalized_batch_sizes.sort.map do |collection, size|
|
|
1066
|
+
"(#{quote(collection)}, #{size})"
|
|
1067
|
+
end.join(', ')
|
|
1068
|
+
end
|
|
1069
|
+
|
|
867
1070
|
def truncate_error(error)
|
|
868
1071
|
message = error.respond_to?(:message) ? error.message : error.to_s
|
|
869
1072
|
message.to_s[0, ERROR_LIMIT]
|