sbmt-outbox 6.16.0 → 6.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 86044914d6babd961a882ecfd9394875e0b67f74a2b92f44db48480bb5096448
4
- data.tar.gz: 3ae09e5801c2dcacc9096bae5c2428f2c57445394dd5a3d036c58b1fa63121d2
3
+ metadata.gz: 3145b7891faf0d1c657121122757608c910d34812d5084674dd77b1d925d8a43
4
+ data.tar.gz: de2766b4beb880123d46224b6d978c8bf2b031ad8dfcb22722acca72d3d94d0b
5
5
  SHA512:
6
- metadata.gz: 9f180114bce2540c91f6c942195fa59c51bcfea67bd984ee5b7db19fd95653933492ce30f7890833e879469b2bb08ca5bcfad2b5a8efa8221eae904b2839a42f
7
- data.tar.gz: e9aeadbc87d76a0f86c7b7c9ddad157c45851e3941901bf4ff249c116a7d32827645157ed644709ba59756569af0d9637e0b66c0c9b0c10937ba6b5878ac740d
6
+ metadata.gz: a129a09ceb3b19931a6eb698dabbf4438628b209297f1d32d4e80e7d141a2ea3dd5f70e5a34cfe999da793731c836c94e55e93d6127a9e3e11a9021d62163c6a
7
+ data.tar.gz: 0b9a632efe465d1554ddedc11197d752996f6dc621b51c831e69c1e9b1acd528f3180be1107bfcc43dd796cb48f9459fc135f79ae46bd46a30b1362135e9045d
data/README.md CHANGED
@@ -273,6 +273,7 @@ default: &default
273
273
  delivered_min_retention_period: PT1H #optional, default: PT1H, for statuses: delivered, retention period for delivered items, https://en.wikipedia.org/wiki/ISO_8601#Durations
274
274
  deletion_batch_size: 1_000 #optional, default: 1_000
275
275
  deletion_sleep_time: 0.5 #optional, default: 0.5
276
+ deletion_time_window: PT4H #optional, default: PT4H, for statuses: delivered, retention period for delivered items, https://en.wikipedia.org/wiki/ISO_8601#Durations
276
277
  max_retries: 3 # default 0, the number of retries before the item will be marked as failed
277
278
  strict_order: false # optional, default
278
279
  transports: # transports section
@@ -353,6 +354,7 @@ inbox_items: # inbox items section
353
354
  delivered_min_retention_period: PT1H #optional, default: PT1H, for statuses: delivered, retention period for delivered items, https://en.wikipedia.org/wiki/ISO_8601#Durations
354
355
  deletion_batch_size: 1_000 #optional, default: 1_000
355
356
  deletion_sleep_time: 0.5 #optional, default: 0.5
357
+ deletion_time_window: PT4H #optional, default: PT4H, for statuses: delivered, retention period for delivered items, https://en.wikipedia.org/wiki/ISO_8601#Durations
356
358
  max_retries: 3 # default 0, the number of retries before the item will be marked as failed
357
359
  transports: # transports section
358
360
  import_order: # underscored transport class name
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sbmt/outbox/metrics/utils"
4
+ require "sbmt/outbox/v2/redis_item_meta"
4
5
 
5
6
  module Sbmt
6
7
  module Outbox
@@ -8,14 +9,16 @@ module Sbmt
8
9
  param :item_class, reader: :private
9
10
  param :item_id, reader: :private
10
11
  option :worker_version, reader: :private, optional: true, default: -> { 1 }
12
+ option :cache_ttl_sec, reader: :private, optional: true, default: -> { 5 * 60 }
13
+ option :redis, reader: :private, optional: true, default: -> {}
11
14
 
12
15
  METRICS_COUNTERS = %i[error_counter retry_counter sent_counter fetch_error_counter discarded_counter].freeze
13
16
 
14
- delegate :log_success, :log_info, :log_failure, to: "Sbmt::Outbox.logger"
17
+ delegate :log_success, :log_info, :log_failure, :log_debug, to: "Sbmt::Outbox.logger"
15
18
  delegate :item_process_middlewares, to: "Sbmt::Outbox"
16
19
  delegate :box_type, :box_name, :owner, to: :item_class
17
20
 
18
- attr_accessor :process_latency
21
+ attr_accessor :process_latency, :retry_latency
19
22
 
20
23
  def call
21
24
  log_success(
@@ -26,9 +29,23 @@ module Sbmt
26
29
  item = nil
27
30
 
28
31
  item_class.transaction do
29
- item = yield fetch_item
32
+ item = yield fetch_item_and_lock_for_update
33
+
34
+ cached_item = fetch_redis_item_meta(redis_item_key(item_id))
35
+ if cached_retries_exceeded?(cached_item)
36
+ msg = "max retries exceeded: marking item as failed based on cached data: #{cached_item}"
37
+ item.set_errors_count(cached_item.errors_count)
38
+ track_failed(msg, item)
39
+ next Failure(msg)
40
+ end
41
+
42
+ if cached_greater_errors_count?(item, cached_item)
43
+ log_failure("inconsistent item: cached_errors_count:#{cached_item.errors_count} > db_errors_count:#{item.errors_count}: setting errors_count based on cached data:#{cached_item}")
44
+ item.set_errors_count(cached_item.errors_count)
45
+ end
30
46
 
31
47
  if item.processed_at?
48
+ self.retry_latency = Time.current - item.created_at
32
49
  item.config.retry_strategies.each do |retry_strategy|
33
50
  yield check_retry_strategy(item, retry_strategy)
34
51
  end
@@ -62,7 +79,48 @@ module Sbmt
62
79
 
63
80
  private
64
81
 
65
- def fetch_item
82
+ def cached_retries_exceeded?(cached_item)
83
+ return false unless cached_item
84
+
85
+ item_class.max_retries_exceeded?(cached_item.errors_count)
86
+ end
87
+
88
+ def cached_greater_errors_count?(db_item, cached_item)
89
+ return false unless cached_item
90
+
91
+ cached_item.errors_count > db_item.errors_count
92
+ end
93
+
94
+ def fetch_redis_item_meta(redis_key)
95
+ return if worker_version < 2
96
+
97
+ data = redis.call("GET", redis_key)
98
+ return if data.blank?
99
+
100
+ Sbmt::Outbox::V2::RedisItemMeta.deserialize!(data)
101
+ rescue => ex
102
+ log_debug("error while fetching redis meta: #{ex.message}")
103
+ nil
104
+ end
105
+
106
+ def set_redis_item_meta(item, ex)
107
+ return if worker_version < 2
108
+ return if item.nil?
109
+
110
+ redis_key = redis_item_key(item.id)
111
+ error_msg = format_exception_error(ex, extract_cause: false)
112
+ data = Sbmt::Outbox::V2::RedisItemMeta.new(errors_count: item.errors_count, error_msg: error_msg)
113
+ redis.call("SET", redis_key, data.to_s, "EX", cache_ttl_sec)
114
+ rescue => ex
115
+ log_debug("error while fetching redis meta: #{ex.message}")
116
+ nil
117
+ end
118
+
119
+ def redis_item_key(item_id)
120
+ "#{box_type}:#{item_class.box_name}:#{item_id}"
121
+ end
122
+
123
+ def fetch_item_and_lock_for_update
66
124
  item = item_class
67
125
  .lock("FOR UPDATE")
68
126
  .find_by(id: item_id)
@@ -171,6 +229,7 @@ module Sbmt
171
229
  item.pending!
172
230
  end
173
231
  rescue => e
232
+ set_redis_item_meta(item, e)
174
233
  log_error_handling_error(e, item)
175
234
  end
176
235
 
@@ -259,6 +318,7 @@ module Sbmt
259
318
  end
260
319
 
261
320
  track_process_latency(labels) if process_latency
321
+ track_retry_latency(labels) if retry_latency
262
322
 
263
323
  return unless counters[:sent_counter].positive?
264
324
 
@@ -279,6 +339,10 @@ module Sbmt
279
339
  def track_process_latency(labels)
280
340
  Yabeda.outbox.process_latency.measure(labels, process_latency.round(3))
281
341
  end
342
+
343
+ def track_retry_latency(labels)
344
+ Yabeda.outbox.retry_latency.measure(labels, retry_latency.round(3))
345
+ end
282
346
  end
283
347
  end
284
348
  end
@@ -102,45 +102,50 @@ module Sbmt
102
102
  # SELECT "items"."id"
103
103
  # FROM "items"
104
104
  # WHERE (
105
- # "items"."status" = 1 AND "items"."created_at" < '2023-05-01 00:00:00'
105
+ # "items"."status" IN (2) AND "items"."created_at" BETWEEN "2025-01-29 12:18:32.917836" AND "2025-01-29 12:18:32.927596" LIMIT 1000
106
106
  # )
107
- # LIMIT 1000
108
107
  # )
109
108
  def postgres_delete_in_batches(waterline_failed, waterline_delivered)
110
- table = item_class.arel_table
111
-
112
109
  status_delivered = item_class.statuses[:delivered]
113
110
  status_failed_discarded = item_class.statuses.values_at(:failed, :discarded)
114
111
 
115
- delete_items_in_batches(table, table[:status].eq(status_delivered).and(table[:created_at].lt(waterline_delivered)))
116
- delete_items_in_batches(table, table[:status].in(status_failed_discarded).and(table[:created_at].lt(waterline_failed)))
112
+ delete_items_in_batches_with_between(waterline_delivered, status_delivered)
113
+ delete_items_in_batches_with_between(waterline_failed, status_failed_discarded)
117
114
  end
118
115
 
119
- def delete_items_in_batches(table, condition)
120
- subquery = table
121
- .project(table[:id])
122
- .where(condition)
123
- .take(item_class.config.deletion_batch_size)
124
-
125
- delete_statement = Arel::Nodes::DeleteStatement.new
126
- delete_statement.relation = table
127
- delete_statement.wheres = [table[:id].in(subquery)]
116
+ def delete_items_in_batches_with_between(waterline, statuses)
117
+ table = item_class.arel_table
118
+ batch_size = item_class.config.deletion_batch_size
119
+ time_window = item_class.config.deletion_time_window
120
+ min_date = item_class.where(table[:status].in(statuses)).minimum(:created_at)
128
121
  deleted_count = nil
129
122
 
130
- loop do
131
- track_deleted_latency do
132
- deleted_count = item_class
133
- .connection
134
- .execute(delete_statement.to_sql)
135
- .cmd_tuples
136
- end
123
+ while min_date && min_date < waterline
124
+ max_date = [min_date + time_window, waterline].min
125
+
126
+ loop do
127
+ subquery = table
128
+ .project(table[:id])
129
+ .where(table[:status].in(statuses))
130
+ .where(table[:created_at].between(min_date..max_date))
131
+ .take(batch_size)
132
+
133
+ delete_statement = Arel::Nodes::DeleteStatement.new
134
+ delete_statement.relation = table
135
+ delete_statement.wheres = [table[:id].in(subquery)]
136
+
137
+ track_deleted_latency do
138
+ deleted_count = item_class.connection.execute(delete_statement.to_sql).cmd_tuples
139
+ end
137
140
 
138
- track_deleted_counter(deleted_count)
141
+ track_deleted_counter(deleted_count)
139
142
 
140
- logger.log_info("Deleted #{deleted_count} #{box_type} items for #{box_name} items")
141
- break if deleted_count == 0
142
- lock_timer.checkpoint!
143
- sleep(item_class.config.deletion_sleep_time)
143
+ logger.log_info("Deleted #{deleted_count} #{box_type} items for #{box_name} between #{min_date} and #{max_date}")
144
+ break if deleted_count < batch_size
145
+ lock_timer.checkpoint!
146
+ sleep(item_class.config.deletion_sleep_time) if deleted_count > 0
147
+ end
148
+ min_date = max_date
144
149
  end
145
150
  end
146
151
 
@@ -154,37 +159,43 @@ module Sbmt
154
159
  # This approach doesn't require a subquery, making it more straightforward.
155
160
  #
156
161
  # Example SQL generated for deletion:
157
- # DELETE FROM `items`
162
+ # DELETE FROM "items"
158
163
  # WHERE (
159
- # `items`.`status` = 1 AND `items`.`created_at` < '2023-05-01 00:00:00'
164
+ # "items"."status" IN (2) AND "items"."created_at" BETWEEN "2024-12-29 18:34:25.369234" AND "2024-12-29 22:34:25.369234" LIMIT 1000
160
165
  # )
161
- # LIMIT 1000
162
166
  def mysql_delete_in_batches(waterline_failed, waterline_delivered)
163
167
  status_delivered = item_class.statuses[:delivered]
164
168
  status_failed_discarded = [item_class.statuses.values_at(:failed, :discarded)]
165
169
 
166
- delete_items_in_batches_mysql(
167
- item_class.where(status: status_delivered, created_at: ...waterline_delivered)
168
- )
169
- delete_items_in_batches_mysql(
170
- item_class.where(status: status_failed_discarded).where(created_at: ...waterline_failed)
171
- )
170
+ delete_items_in_batches_with_between_mysql(waterline_delivered, status_delivered)
171
+ delete_items_in_batches_with_between_mysql(waterline_failed, status_failed_discarded)
172
172
  end
173
173
 
174
- def delete_items_in_batches_mysql(query)
174
+ def delete_items_in_batches_with_between_mysql(waterline, statuses)
175
+ batch_size = item_class.config.deletion_batch_size
176
+ time_window = item_class.config.deletion_time_window
177
+ min_date = item_class.where(status: statuses).minimum(:created_at)
175
178
  deleted_count = nil
176
179
 
177
- loop do
178
- track_deleted_latency do
179
- deleted_count = query.limit(item_class.config.deletion_batch_size).delete_all
180
- end
180
+ while min_date && min_date < waterline
181
+ max_date = [min_date + time_window, waterline].min
182
+
183
+ loop do
184
+ track_deleted_latency do
185
+ deleted_count = item_class
186
+ .where(status: statuses, created_at: min_date..max_date)
187
+ .limit(batch_size)
188
+ .delete_all
189
+ end
181
190
 
182
- track_deleted_counter(deleted_count)
191
+ track_deleted_counter(deleted_count)
183
192
 
184
- logger.log_info("Deleted #{deleted_count} #{box_type} items for #{box_name} items")
185
- break if deleted_count == 0
186
- lock_timer.checkpoint!
187
- sleep(item_class.config.deletion_sleep_time)
193
+ logger.log_info("Deleted #{deleted_count} #{box_type} items for #{box_name} between #{min_date} and #{max_date}")
194
+ break if deleted_count < batch_size
195
+ lock_timer.checkpoint!
196
+ sleep(item_class.config.deletion_sleep_time) if deleted_count > 0
197
+ end
198
+ min_date = max_date
188
199
  end
189
200
  end
190
201
 
@@ -49,6 +49,17 @@ module Sbmt
49
49
  end
50
50
  end
51
51
  end
52
+
53
+ def max_retries_exceeded?(count)
54
+ return false if config.strict_order
55
+ return true unless retriable?
56
+
57
+ count > config.max_retries
58
+ end
59
+
60
+ def retriable?
61
+ config.max_retries > 0
62
+ end
52
63
  end
53
64
 
54
65
  enum :status, {
@@ -135,20 +146,21 @@ module Sbmt
135
146
  end
136
147
 
137
148
  def retriable?
138
- config.max_retries > 0
149
+ self.class.retriable?
139
150
  end
140
151
 
141
152
  def max_retries_exceeded?
142
- return false if config.strict_order
143
- return true unless retriable?
144
-
145
- errors_count > config.max_retries
153
+ self.class.max_retries_exceeded?(errors_count)
146
154
  end
147
155
 
148
156
  def increment_errors_counter
149
157
  increment(:errors_count)
150
158
  end
151
159
 
160
+ def set_errors_count(count)
161
+ self.errors_count = count
162
+ end
163
+
152
164
  def add_error(ex_or_msg)
153
165
  increment_errors_counter
154
166
 
@@ -60,6 +60,10 @@ module Sbmt
60
60
  @delivered_min_retention_period ||= ActiveSupport::Duration.parse(options[:delivered_min_retention_period] || "PT1H")
61
61
  end
62
62
 
63
+ def deletion_time_window
64
+ @deletion_time_window ||= ActiveSupport::Duration.parse(options[:deletion_time_window] || "PT4H")
65
+ end
66
+
63
67
  def max_retries
64
68
  @max_retries ||= (options[:max_retries] || 0).to_i
65
69
  end
@@ -50,6 +50,12 @@ Yabeda.configure do
50
50
  buckets: [0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, 20, 30].freeze,
51
51
  comment: "A histogram for outbox/inbox deletion latency"
52
52
 
53
+ histogram :retry_latency,
54
+ tags: %i[type name partition owner],
55
+ unit: :seconds,
56
+ buckets: [1, 10, 20, 50, 120, 300, 900, 1800, 3600].freeze,
57
+ comment: "A histogram outbox retry latency"
58
+
53
59
  counter :deleted_counter,
54
60
  tags: %i[box_type box_name],
55
61
  comment: "A counter for the number of deleted outbox/inbox items"
@@ -25,8 +25,8 @@ module Sbmt
25
25
  c.cdn_url = "https://cdn.jsdelivr.net/npm/sbmt-outbox-ui@0.0.8/dist/assets/index.js"
26
26
  end
27
27
  c.process_items = ActiveSupport::OrderedOptions.new.tap do |c|
28
- c.general_timeout = 120
29
- c.cutoff_timeout = 60
28
+ c.general_timeout = 180
29
+ c.cutoff_timeout = 90
30
30
  c.batch_size = 200
31
31
  end
32
32
  c.worker = ActiveSupport::OrderedOptions.new.tap do |c|
@@ -54,8 +54,8 @@ module Sbmt
54
54
  end
55
55
  c.processor = ActiveSupport::OrderedOptions.new.tap do |pc|
56
56
  pc.threads_count = 4
57
- pc.general_timeout = 120
58
- pc.cutoff_timeout = 60
57
+ pc.general_timeout = 180
58
+ pc.cutoff_timeout = 90
59
59
  pc.brpop_delay = 1
60
60
  end
61
61
 
@@ -10,7 +10,7 @@ module Sbmt
10
10
  module V2
11
11
  class Processor < BoxProcessor
12
12
  delegate :processor_config, :batch_process_middlewares, :logger, to: "Sbmt::Outbox"
13
- attr_reader :lock_timeout, :brpop_delay
13
+ attr_reader :lock_timeout, :cache_ttl, :cutoff_timeout, :brpop_delay
14
14
 
15
15
  REDIS_BRPOP_MIN_DELAY = 0.1
16
16
 
@@ -18,11 +18,16 @@ module Sbmt
18
18
  boxes,
19
19
  threads_count: nil,
20
20
  lock_timeout: nil,
21
+ cache_ttl: nil,
22
+ cutoff_timeout: nil,
21
23
  brpop_delay: nil,
22
24
  redis: nil
23
25
  )
24
26
  @lock_timeout = lock_timeout || processor_config.general_timeout
27
+ @cache_ttl = cache_ttl || @lock_timeout * 10
28
+ @cutoff_timeout = cutoff_timeout || processor_config.cutoff_timeout
25
29
  @brpop_delay = brpop_delay || redis_brpop_delay(boxes.count, processor_config.brpop_delay)
30
+ @redis = redis
26
31
 
27
32
  super(boxes: boxes, threads_count: threads_count || processor_config.threads_count, name: "processor", redis: redis)
28
33
  end
@@ -66,14 +71,19 @@ module Sbmt
66
71
  end
67
72
 
68
73
  def process(task)
69
- lock_timer = Cutoff.new(lock_timeout)
74
+ lock_timer = Cutoff.new(cutoff_timeout)
70
75
  last_id = 0
71
76
  strict_order = task.item_class.config.strict_order
72
77
 
73
78
  box_worker.item_execution_runtime.measure(task.yabeda_labels) do
74
79
  Outbox.database_switcher.use_master do
75
80
  task.ids.each do |id|
76
- result = ProcessItem.call(task.item_class, id, worker_version: task.yabeda_labels[:worker_version])
81
+ result = ProcessItem.call(
82
+ task.item_class, id,
83
+ worker_version: task.yabeda_labels[:worker_version],
84
+ cache_ttl_sec: cache_ttl,
85
+ redis: @redis
86
+ )
77
87
 
78
88
  box_worker.job_items_counter.increment(task.yabeda_labels)
79
89
  last_id = id
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sbmt
4
+ module Outbox
5
+ module V2
6
+ class RedisItemMeta
7
+ attr_reader :version, :timestamp, :errors_count, :error_msg
8
+
9
+ CURRENT_VERSION = 1
10
+ MAX_ERROR_LEN = 200
11
+
12
+ def initialize(errors_count:, error_msg:, timestamp: Time.current.to_i, version: CURRENT_VERSION)
13
+ @errors_count = errors_count
14
+ @error_msg = error_msg
15
+ @timestamp = timestamp
16
+ @version = version
17
+ end
18
+
19
+ def to_s
20
+ serialize
21
+ end
22
+
23
+ def serialize
24
+ JSON.generate({
25
+ version: version,
26
+ timestamp: timestamp,
27
+ errors_count: errors_count,
28
+ error_msg: error_msg.slice(0, MAX_ERROR_LEN)
29
+ })
30
+ end
31
+
32
+ def self.deserialize!(value)
33
+ raise "invalid data type: string is required" unless value.is_a?(String)
34
+
35
+ data = JSON.parse!(value, max_nesting: 1)
36
+ new(
37
+ version: data["version"],
38
+ timestamp: data["timestamp"].to_i,
39
+ errors_count: data["errors_count"].to_i,
40
+ error_msg: data["error_msg"]
41
+ )
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Sbmt
4
4
  module Outbox
5
- VERSION = "6.16.0"
5
+ VERSION = "6.18.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sbmt-outbox
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.16.0
4
+ version: 6.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sbermarket Ruby-Platform Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-01-28 00:00:00.000000000 Z
11
+ date: 2025-02-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: connection_pool
@@ -600,6 +600,7 @@ files:
600
600
  - lib/sbmt/outbox/v2/poll_throttler/redis_queue_time_lag.rb
601
601
  - lib/sbmt/outbox/v2/poller.rb
602
602
  - lib/sbmt/outbox/v2/processor.rb
603
+ - lib/sbmt/outbox/v2/redis_item_meta.rb
603
604
  - lib/sbmt/outbox/v2/redis_job.rb
604
605
  - lib/sbmt/outbox/v2/tasks/base.rb
605
606
  - lib/sbmt/outbox/v2/tasks/default.rb
@@ -629,7 +630,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
629
630
  - !ruby/object:Gem::Version
630
631
  version: '0'
631
632
  requirements: []
632
- rubygems_version: 3.1.6
633
+ rubygems_version: 3.3.7
633
634
  signing_key:
634
635
  specification_version: 4
635
636
  summary: Outbox service