sbmt-outbox 6.16.0 → 6.18.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 86044914d6babd961a882ecfd9394875e0b67f74a2b92f44db48480bb5096448
4
- data.tar.gz: 3ae09e5801c2dcacc9096bae5c2428f2c57445394dd5a3d036c58b1fa63121d2
3
+ metadata.gz: 3145b7891faf0d1c657121122757608c910d34812d5084674dd77b1d925d8a43
4
+ data.tar.gz: de2766b4beb880123d46224b6d978c8bf2b031ad8dfcb22722acca72d3d94d0b
5
5
  SHA512:
6
- metadata.gz: 9f180114bce2540c91f6c942195fa59c51bcfea67bd984ee5b7db19fd95653933492ce30f7890833e879469b2bb08ca5bcfad2b5a8efa8221eae904b2839a42f
7
- data.tar.gz: e9aeadbc87d76a0f86c7b7c9ddad157c45851e3941901bf4ff249c116a7d32827645157ed644709ba59756569af0d9637e0b66c0c9b0c10937ba6b5878ac740d
6
+ metadata.gz: a129a09ceb3b19931a6eb698dabbf4438628b209297f1d32d4e80e7d141a2ea3dd5f70e5a34cfe999da793731c836c94e55e93d6127a9e3e11a9021d62163c6a
7
+ data.tar.gz: 0b9a632efe465d1554ddedc11197d752996f6dc621b51c831e69c1e9b1acd528f3180be1107bfcc43dd796cb48f9459fc135f79ae46bd46a30b1362135e9045d
data/README.md CHANGED
@@ -273,6 +273,7 @@ default: &default
273
273
  delivered_min_retention_period: PT1H #optional, default: PT1H, for statuses: delivered, retention period for delivered items, https://en.wikipedia.org/wiki/ISO_8601#Durations
274
274
  deletion_batch_size: 1_000 #optional, default: 1_000
275
275
  deletion_sleep_time: 0.5 #optional, default: 0.5
276
+ deletion_time_window: PT4H #optional, default: PT4H, for statuses: delivered, retention period for delivered items, https://en.wikipedia.org/wiki/ISO_8601#Durations
276
277
  max_retries: 3 # default 0, the number of retries before the item will be marked as failed
277
278
  strict_order: false # optional, default
278
279
  transports: # transports section
@@ -353,6 +354,7 @@ inbox_items: # inbox items section
353
354
  delivered_min_retention_period: PT1H #optional, default: PT1H, for statuses: delivered, retention period for delivered items, https://en.wikipedia.org/wiki/ISO_8601#Durations
354
355
  deletion_batch_size: 1_000 #optional, default: 1_000
355
356
  deletion_sleep_time: 0.5 #optional, default: 0.5
357
+ deletion_time_window: PT4H #optional, default: PT4H, for statuses: delivered, retention period for delivered items, https://en.wikipedia.org/wiki/ISO_8601#Durations
356
358
  max_retries: 3 # default 0, the number of retries before the item will be marked as failed
357
359
  transports: # transports section
358
360
  import_order: # underscored transport class name
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sbmt/outbox/metrics/utils"
4
+ require "sbmt/outbox/v2/redis_item_meta"
4
5
 
5
6
  module Sbmt
6
7
  module Outbox
@@ -8,14 +9,16 @@ module Sbmt
8
9
  param :item_class, reader: :private
9
10
  param :item_id, reader: :private
10
11
  option :worker_version, reader: :private, optional: true, default: -> { 1 }
12
+ option :cache_ttl_sec, reader: :private, optional: true, default: -> { 5 * 60 }
13
+ option :redis, reader: :private, optional: true, default: -> {}
11
14
 
12
15
  METRICS_COUNTERS = %i[error_counter retry_counter sent_counter fetch_error_counter discarded_counter].freeze
13
16
 
14
- delegate :log_success, :log_info, :log_failure, to: "Sbmt::Outbox.logger"
17
+ delegate :log_success, :log_info, :log_failure, :log_debug, to: "Sbmt::Outbox.logger"
15
18
  delegate :item_process_middlewares, to: "Sbmt::Outbox"
16
19
  delegate :box_type, :box_name, :owner, to: :item_class
17
20
 
18
- attr_accessor :process_latency
21
+ attr_accessor :process_latency, :retry_latency
19
22
 
20
23
  def call
21
24
  log_success(
@@ -26,9 +29,23 @@ module Sbmt
26
29
  item = nil
27
30
 
28
31
  item_class.transaction do
29
- item = yield fetch_item
32
+ item = yield fetch_item_and_lock_for_update
33
+
34
+ cached_item = fetch_redis_item_meta(redis_item_key(item_id))
35
+ if cached_retries_exceeded?(cached_item)
36
+ msg = "max retries exceeded: marking item as failed based on cached data: #{cached_item}"
37
+ item.set_errors_count(cached_item.errors_count)
38
+ track_failed(msg, item)
39
+ next Failure(msg)
40
+ end
41
+
42
+ if cached_greater_errors_count?(item, cached_item)
43
+ log_failure("inconsistent item: cached_errors_count:#{cached_item.errors_count} > db_errors_count:#{item.errors_count}: setting errors_count based on cached data:#{cached_item}")
44
+ item.set_errors_count(cached_item.errors_count)
45
+ end
30
46
 
31
47
  if item.processed_at?
48
+ self.retry_latency = Time.current - item.created_at
32
49
  item.config.retry_strategies.each do |retry_strategy|
33
50
  yield check_retry_strategy(item, retry_strategy)
34
51
  end
@@ -62,7 +79,48 @@ module Sbmt
62
79
 
63
80
  private
64
81
 
65
- def fetch_item
82
+ def cached_retries_exceeded?(cached_item)
83
+ return false unless cached_item
84
+
85
+ item_class.max_retries_exceeded?(cached_item.errors_count)
86
+ end
87
+
88
+ def cached_greater_errors_count?(db_item, cached_item)
89
+ return false unless cached_item
90
+
91
+ cached_item.errors_count > db_item.errors_count
92
+ end
93
+
94
+ def fetch_redis_item_meta(redis_key)
95
+ return if worker_version < 2
96
+
97
+ data = redis.call("GET", redis_key)
98
+ return if data.blank?
99
+
100
+ Sbmt::Outbox::V2::RedisItemMeta.deserialize!(data)
101
+ rescue => ex
102
+ log_debug("error while fetching redis meta: #{ex.message}")
103
+ nil
104
+ end
105
+
106
+ def set_redis_item_meta(item, ex)
107
+ return if worker_version < 2
108
+ return if item.nil?
109
+
110
+ redis_key = redis_item_key(item.id)
111
+ error_msg = format_exception_error(ex, extract_cause: false)
112
+ data = Sbmt::Outbox::V2::RedisItemMeta.new(errors_count: item.errors_count, error_msg: error_msg)
113
+ redis.call("SET", redis_key, data.to_s, "EX", cache_ttl_sec)
114
+ rescue => ex
115
+ log_debug("error while fetching redis meta: #{ex.message}")
116
+ nil
117
+ end
118
+
119
+ def redis_item_key(item_id)
120
+ "#{box_type}:#{item_class.box_name}:#{item_id}"
121
+ end
122
+
123
+ def fetch_item_and_lock_for_update
66
124
  item = item_class
67
125
  .lock("FOR UPDATE")
68
126
  .find_by(id: item_id)
@@ -171,6 +229,7 @@ module Sbmt
171
229
  item.pending!
172
230
  end
173
231
  rescue => e
232
+ set_redis_item_meta(item, e)
174
233
  log_error_handling_error(e, item)
175
234
  end
176
235
 
@@ -259,6 +318,7 @@ module Sbmt
259
318
  end
260
319
 
261
320
  track_process_latency(labels) if process_latency
321
+ track_retry_latency(labels) if retry_latency
262
322
 
263
323
  return unless counters[:sent_counter].positive?
264
324
 
@@ -279,6 +339,10 @@ module Sbmt
279
339
  def track_process_latency(labels)
280
340
  Yabeda.outbox.process_latency.measure(labels, process_latency.round(3))
281
341
  end
342
+
343
+ def track_retry_latency(labels)
344
+ Yabeda.outbox.retry_latency.measure(labels, retry_latency.round(3))
345
+ end
282
346
  end
283
347
  end
284
348
  end
@@ -102,45 +102,50 @@ module Sbmt
102
102
  # SELECT "items"."id"
103
103
  # FROM "items"
104
104
  # WHERE (
105
- # "items"."status" = 1 AND "items"."created_at" < '2023-05-01 00:00:00'
105
+ # "items"."status" IN (2) AND "items"."created_at" BETWEEN "2025-01-29 12:18:32.917836" AND "2025-01-29 12:18:32.927596" LIMIT 1000
106
106
  # )
107
- # LIMIT 1000
108
107
  # )
109
108
  def postgres_delete_in_batches(waterline_failed, waterline_delivered)
110
- table = item_class.arel_table
111
-
112
109
  status_delivered = item_class.statuses[:delivered]
113
110
  status_failed_discarded = item_class.statuses.values_at(:failed, :discarded)
114
111
 
115
- delete_items_in_batches(table, table[:status].eq(status_delivered).and(table[:created_at].lt(waterline_delivered)))
116
- delete_items_in_batches(table, table[:status].in(status_failed_discarded).and(table[:created_at].lt(waterline_failed)))
112
+ delete_items_in_batches_with_between(waterline_delivered, status_delivered)
113
+ delete_items_in_batches_with_between(waterline_failed, status_failed_discarded)
117
114
  end
118
115
 
119
- def delete_items_in_batches(table, condition)
120
- subquery = table
121
- .project(table[:id])
122
- .where(condition)
123
- .take(item_class.config.deletion_batch_size)
124
-
125
- delete_statement = Arel::Nodes::DeleteStatement.new
126
- delete_statement.relation = table
127
- delete_statement.wheres = [table[:id].in(subquery)]
116
+ def delete_items_in_batches_with_between(waterline, statuses)
117
+ table = item_class.arel_table
118
+ batch_size = item_class.config.deletion_batch_size
119
+ time_window = item_class.config.deletion_time_window
120
+ min_date = item_class.where(table[:status].in(statuses)).minimum(:created_at)
128
121
  deleted_count = nil
129
122
 
130
- loop do
131
- track_deleted_latency do
132
- deleted_count = item_class
133
- .connection
134
- .execute(delete_statement.to_sql)
135
- .cmd_tuples
136
- end
123
+ while min_date && min_date < waterline
124
+ max_date = [min_date + time_window, waterline].min
125
+
126
+ loop do
127
+ subquery = table
128
+ .project(table[:id])
129
+ .where(table[:status].in(statuses))
130
+ .where(table[:created_at].between(min_date..max_date))
131
+ .take(batch_size)
132
+
133
+ delete_statement = Arel::Nodes::DeleteStatement.new
134
+ delete_statement.relation = table
135
+ delete_statement.wheres = [table[:id].in(subquery)]
136
+
137
+ track_deleted_latency do
138
+ deleted_count = item_class.connection.execute(delete_statement.to_sql).cmd_tuples
139
+ end
137
140
 
138
- track_deleted_counter(deleted_count)
141
+ track_deleted_counter(deleted_count)
139
142
 
140
- logger.log_info("Deleted #{deleted_count} #{box_type} items for #{box_name} items")
141
- break if deleted_count == 0
142
- lock_timer.checkpoint!
143
- sleep(item_class.config.deletion_sleep_time)
143
+ logger.log_info("Deleted #{deleted_count} #{box_type} items for #{box_name} between #{min_date} and #{max_date}")
144
+ break if deleted_count < batch_size
145
+ lock_timer.checkpoint!
146
+ sleep(item_class.config.deletion_sleep_time) if deleted_count > 0
147
+ end
148
+ min_date = max_date
144
149
  end
145
150
  end
146
151
 
@@ -154,37 +159,43 @@ module Sbmt
154
159
  # This approach doesn't require a subquery, making it more straightforward.
155
160
  #
156
161
  # Example SQL generated for deletion:
157
- # DELETE FROM `items`
162
+ # DELETE FROM "items"
158
163
  # WHERE (
159
- # `items`.`status` = 1 AND `items`.`created_at` < '2023-05-01 00:00:00'
164
+ # "items"."status" IN (2) AND "items"."created_at" BETWEEN "2024-12-29 18:34:25.369234" AND "2024-12-29 22:34:25.369234" LIMIT 1000
160
165
  # )
161
- # LIMIT 1000
162
166
  def mysql_delete_in_batches(waterline_failed, waterline_delivered)
163
167
  status_delivered = item_class.statuses[:delivered]
164
168
  status_failed_discarded = [item_class.statuses.values_at(:failed, :discarded)]
165
169
 
166
- delete_items_in_batches_mysql(
167
- item_class.where(status: status_delivered, created_at: ...waterline_delivered)
168
- )
169
- delete_items_in_batches_mysql(
170
- item_class.where(status: status_failed_discarded).where(created_at: ...waterline_failed)
171
- )
170
+ delete_items_in_batches_with_between_mysql(waterline_delivered, status_delivered)
171
+ delete_items_in_batches_with_between_mysql(waterline_failed, status_failed_discarded)
172
172
  end
173
173
 
174
- def delete_items_in_batches_mysql(query)
174
+ def delete_items_in_batches_with_between_mysql(waterline, statuses)
175
+ batch_size = item_class.config.deletion_batch_size
176
+ time_window = item_class.config.deletion_time_window
177
+ min_date = item_class.where(status: statuses).minimum(:created_at)
175
178
  deleted_count = nil
176
179
 
177
- loop do
178
- track_deleted_latency do
179
- deleted_count = query.limit(item_class.config.deletion_batch_size).delete_all
180
- end
180
+ while min_date && min_date < waterline
181
+ max_date = [min_date + time_window, waterline].min
182
+
183
+ loop do
184
+ track_deleted_latency do
185
+ deleted_count = item_class
186
+ .where(status: statuses, created_at: min_date..max_date)
187
+ .limit(batch_size)
188
+ .delete_all
189
+ end
181
190
 
182
- track_deleted_counter(deleted_count)
191
+ track_deleted_counter(deleted_count)
183
192
 
184
- logger.log_info("Deleted #{deleted_count} #{box_type} items for #{box_name} items")
185
- break if deleted_count == 0
186
- lock_timer.checkpoint!
187
- sleep(item_class.config.deletion_sleep_time)
193
+ logger.log_info("Deleted #{deleted_count} #{box_type} items for #{box_name} between #{min_date} and #{max_date}")
194
+ break if deleted_count < batch_size
195
+ lock_timer.checkpoint!
196
+ sleep(item_class.config.deletion_sleep_time) if deleted_count > 0
197
+ end
198
+ min_date = max_date
188
199
  end
189
200
  end
190
201
 
@@ -49,6 +49,17 @@ module Sbmt
49
49
  end
50
50
  end
51
51
  end
52
+
53
+ def max_retries_exceeded?(count)
54
+ return false if config.strict_order
55
+ return true unless retriable?
56
+
57
+ count > config.max_retries
58
+ end
59
+
60
+ def retriable?
61
+ config.max_retries > 0
62
+ end
52
63
  end
53
64
 
54
65
  enum :status, {
@@ -135,20 +146,21 @@ module Sbmt
135
146
  end
136
147
 
137
148
  def retriable?
138
- config.max_retries > 0
149
+ self.class.retriable?
139
150
  end
140
151
 
141
152
  def max_retries_exceeded?
142
- return false if config.strict_order
143
- return true unless retriable?
144
-
145
- errors_count > config.max_retries
153
+ self.class.max_retries_exceeded?(errors_count)
146
154
  end
147
155
 
148
156
  def increment_errors_counter
149
157
  increment(:errors_count)
150
158
  end
151
159
 
160
+ def set_errors_count(count)
161
+ self.errors_count = count
162
+ end
163
+
152
164
  def add_error(ex_or_msg)
153
165
  increment_errors_counter
154
166
 
@@ -60,6 +60,10 @@ module Sbmt
60
60
  @delivered_min_retention_period ||= ActiveSupport::Duration.parse(options[:delivered_min_retention_period] || "PT1H")
61
61
  end
62
62
 
63
+ def deletion_time_window
64
+ @deletion_time_window ||= ActiveSupport::Duration.parse(options[:deletion_time_window] || "PT4H")
65
+ end
66
+
63
67
  def max_retries
64
68
  @max_retries ||= (options[:max_retries] || 0).to_i
65
69
  end
@@ -50,6 +50,12 @@ Yabeda.configure do
50
50
  buckets: [0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, 20, 30].freeze,
51
51
  comment: "A histogram for outbox/inbox deletion latency"
52
52
 
53
+ histogram :retry_latency,
54
+ tags: %i[type name partition owner],
55
+ unit: :seconds,
56
+ buckets: [1, 10, 20, 50, 120, 300, 900, 1800, 3600].freeze,
57
+ comment: "A histogram outbox retry latency"
58
+
53
59
  counter :deleted_counter,
54
60
  tags: %i[box_type box_name],
55
61
  comment: "A counter for the number of deleted outbox/inbox items"
@@ -25,8 +25,8 @@ module Sbmt
25
25
  c.cdn_url = "https://cdn.jsdelivr.net/npm/sbmt-outbox-ui@0.0.8/dist/assets/index.js"
26
26
  end
27
27
  c.process_items = ActiveSupport::OrderedOptions.new.tap do |c|
28
- c.general_timeout = 120
29
- c.cutoff_timeout = 60
28
+ c.general_timeout = 180
29
+ c.cutoff_timeout = 90
30
30
  c.batch_size = 200
31
31
  end
32
32
  c.worker = ActiveSupport::OrderedOptions.new.tap do |c|
@@ -54,8 +54,8 @@ module Sbmt
54
54
  end
55
55
  c.processor = ActiveSupport::OrderedOptions.new.tap do |pc|
56
56
  pc.threads_count = 4
57
- pc.general_timeout = 120
58
- pc.cutoff_timeout = 60
57
+ pc.general_timeout = 180
58
+ pc.cutoff_timeout = 90
59
59
  pc.brpop_delay = 1
60
60
  end
61
61
 
@@ -10,7 +10,7 @@ module Sbmt
10
10
  module V2
11
11
  class Processor < BoxProcessor
12
12
  delegate :processor_config, :batch_process_middlewares, :logger, to: "Sbmt::Outbox"
13
- attr_reader :lock_timeout, :brpop_delay
13
+ attr_reader :lock_timeout, :cache_ttl, :cutoff_timeout, :brpop_delay
14
14
 
15
15
  REDIS_BRPOP_MIN_DELAY = 0.1
16
16
 
@@ -18,11 +18,16 @@ module Sbmt
18
18
  boxes,
19
19
  threads_count: nil,
20
20
  lock_timeout: nil,
21
+ cache_ttl: nil,
22
+ cutoff_timeout: nil,
21
23
  brpop_delay: nil,
22
24
  redis: nil
23
25
  )
24
26
  @lock_timeout = lock_timeout || processor_config.general_timeout
27
+ @cache_ttl = cache_ttl || @lock_timeout * 10
28
+ @cutoff_timeout = cutoff_timeout || processor_config.cutoff_timeout
25
29
  @brpop_delay = brpop_delay || redis_brpop_delay(boxes.count, processor_config.brpop_delay)
30
+ @redis = redis
26
31
 
27
32
  super(boxes: boxes, threads_count: threads_count || processor_config.threads_count, name: "processor", redis: redis)
28
33
  end
@@ -66,14 +71,19 @@ module Sbmt
66
71
  end
67
72
 
68
73
  def process(task)
69
- lock_timer = Cutoff.new(lock_timeout)
74
+ lock_timer = Cutoff.new(cutoff_timeout)
70
75
  last_id = 0
71
76
  strict_order = task.item_class.config.strict_order
72
77
 
73
78
  box_worker.item_execution_runtime.measure(task.yabeda_labels) do
74
79
  Outbox.database_switcher.use_master do
75
80
  task.ids.each do |id|
76
- result = ProcessItem.call(task.item_class, id, worker_version: task.yabeda_labels[:worker_version])
81
+ result = ProcessItem.call(
82
+ task.item_class, id,
83
+ worker_version: task.yabeda_labels[:worker_version],
84
+ cache_ttl_sec: cache_ttl,
85
+ redis: @redis
86
+ )
77
87
 
78
88
  box_worker.job_items_counter.increment(task.yabeda_labels)
79
89
  last_id = id
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sbmt
4
+ module Outbox
5
+ module V2
6
+ class RedisItemMeta
7
+ attr_reader :version, :timestamp, :errors_count, :error_msg
8
+
9
+ CURRENT_VERSION = 1
10
+ MAX_ERROR_LEN = 200
11
+
12
+ def initialize(errors_count:, error_msg:, timestamp: Time.current.to_i, version: CURRENT_VERSION)
13
+ @errors_count = errors_count
14
+ @error_msg = error_msg
15
+ @timestamp = timestamp
16
+ @version = version
17
+ end
18
+
19
+ def to_s
20
+ serialize
21
+ end
22
+
23
+ def serialize
24
+ JSON.generate({
25
+ version: version,
26
+ timestamp: timestamp,
27
+ errors_count: errors_count,
28
+ error_msg: error_msg.slice(0, MAX_ERROR_LEN)
29
+ })
30
+ end
31
+
32
+ def self.deserialize!(value)
33
+ raise "invalid data type: string is required" unless value.is_a?(String)
34
+
35
+ data = JSON.parse!(value, max_nesting: 1)
36
+ new(
37
+ version: data["version"],
38
+ timestamp: data["timestamp"].to_i,
39
+ errors_count: data["errors_count"].to_i,
40
+ error_msg: data["error_msg"]
41
+ )
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Sbmt
4
4
  module Outbox
5
- VERSION = "6.16.0"
5
+ VERSION = "6.18.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sbmt-outbox
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.16.0
4
+ version: 6.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sbermarket Ruby-Platform Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-01-28 00:00:00.000000000 Z
11
+ date: 2025-02-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: connection_pool
@@ -600,6 +600,7 @@ files:
600
600
  - lib/sbmt/outbox/v2/poll_throttler/redis_queue_time_lag.rb
601
601
  - lib/sbmt/outbox/v2/poller.rb
602
602
  - lib/sbmt/outbox/v2/processor.rb
603
+ - lib/sbmt/outbox/v2/redis_item_meta.rb
603
604
  - lib/sbmt/outbox/v2/redis_job.rb
604
605
  - lib/sbmt/outbox/v2/tasks/base.rb
605
606
  - lib/sbmt/outbox/v2/tasks/default.rb
@@ -629,7 +630,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
629
630
  - !ruby/object:Gem::Version
630
631
  version: '0'
631
632
  requirements: []
632
- rubygems_version: 3.1.6
633
+ rubygems_version: 3.3.7
633
634
  signing_key:
634
635
  specification_version: 4
635
636
  summary: Outbox service