hutch-schedule 0.7.2 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 51507037110c8e884ac519fe99c27f8547da70f5ada9744bc034a7ec10da5ee3
4
- data.tar.gz: 45f2fcb4e686f86a13bcded021c661b69227c8eca5f5b0b96764a9ac840a8372
3
+ metadata.gz: adda9d8410e98a3010611c8bda21d0e79c98d840340a0f27fd5e3d4e0b60b46e
4
+ data.tar.gz: dd678456a60980e5e96fc4d85898afb5b71f663f5286026c0777287ee819956f
5
5
  SHA512:
6
- metadata.gz: 1c507eead9eef80897516c1296cf3eaa3df4674fbba1b188138ce06317a332cfe886291e596a403f5fa3573079f4b9650a0206252e5567b451bda54dadeb07e4
7
- data.tar.gz: 2f38719bc7da6b3ea9a7826a5e82555336b81b46acb62622af6d33f7ab1f52679af6f77d7e048274168dcb1166a10eb5ead8fe29fe1ffa5200a4022022444e18
6
+ metadata.gz: 07d613f9c09fe6dbccc0424af9edc536082b58e252d9a393685f30653c2d2564d28017eac8619299c14d5a5dae18a3fd61a897532c7ee3a8dac8ce069d0c6aed
7
+ data.tar.gz: 2303ce0e4213d387a96b6d1377c798e85ec36514099eb46b7a52296bc92922fe6f88f958815a035c30c66738da67802961a41b478d5c65ca30c9c20e95c48083
@@ -1,6 +1,16 @@
1
1
  # Change Log
2
2
  All notable changes to this project will be documented in this file.
3
3
 
4
+ ## [0.7.3] - 2020-04-16
5
+ ### Fixed
6
+ - add #threshold Proc to support pass enqueue msg to lambada args
7
+ - add check interval for flush Hutch::Worker.buffer_queue to RabbitMQ to avoid blocking for handling limited message
8
+
9
+ ## [0.7.1] - 2020-04-16
10
+ ### Fixed
11
+ - add threshold default {context, rate, interval} value
12
+ - fix monkey patch Hutch::Config.define_methods
13
+
4
14
  ## [0.7.0] - 2020-04-13
5
15
  ### Fixed
6
16
  - Use monkey patch to support Conumser ratelimit through Hutch::Threshold
data/README.md CHANGED
@@ -62,13 +62,17 @@ poller_interval| 1 | seconds of the poller to trigger, poller the message in Buf
62
62
  poller_batch_size | 100 | the message size of every batch triggerd by the poller
63
63
  redis_url | redis://127.0.0.1:6379/0 | Redis backend url for Ratelimit and Unique Job
64
64
  ratelimit_bucket_interval | 1 | Ratelimit use the time bucket (seconds) to store the counts, lower the more accurate
65
+ worker_buffer_flush_interval | 6 | Monkey patch, flush the Hutch::Worker.@buffer_queue message to RabbitMQ check interval
65
66
 
66
67
  ## Hutch::Enqueue
67
- Let consumer to include `Hutch::Enqueue` then it has the ability of publishing message to RabbitMQ with the `consume '<routing_key>'`
68
+ Let consumer to include `Hutch::Enqueue` then it has the ability of publishing message to RabbitMQ with the `consume '<routing_key>'`.
69
+
70
+ *Only support enqueue `Hash` format message*
68
71
 
69
72
  * enqueue: just publish one message
70
73
  * enqueue_in: publish one message and delay <interval> seconds
71
74
  * enqueue_at: publish one message and auto calculate the <interval> seconds need to delay
75
+ * enqueue_uniq(_in/at): publish uniq message with uniq_key
72
76
 
73
77
  According to the RabbitMQ [TTL Message design limits](http://www.rabbitmq.com/ttl.html#per-message-ttl-caveats) ([discus](https://github.com/rebus-org/Rebus/issues/594#issuecomment-289961537)),
74
78
  We design the fixed delay level from seconds to hours, below is the details:
@@ -111,7 +115,7 @@ end
111
115
  ```
112
116
 
113
117
  threshold lambada need get return value must be a Hash and include:
114
- * context: the limit context with currency threshold
118
+ * context: the limit context with current threshold
115
119
  * rate: the rate speed of threshold
116
120
  * interval: the time range of threshold
117
121
 
@@ -11,21 +11,22 @@ module Hutch
11
11
 
12
12
  # Add Consumer methods
13
13
  class_methods do
14
+
14
15
  # Publish the message to this consumer with one routing_key
15
- def enqueue(message)
16
- Hutch.publish(enqueue_routing_key, message)
16
+ def enqueue(msg = {})
17
+ Hutch.publish(enqueue_routing_key, msg)
17
18
  end
18
19
 
19
20
  # enqueue unique message
20
- def enqueue_uniq(uniq_key, message)
21
+ def enqueue_uniq(uniq_key, msg = {})
21
22
  return false unless uniq_key_check(uniq_key)
22
- enqueue(message)
23
+ enqueue(msg)
23
24
  end
24
25
 
25
26
  # publish message at a delay times
26
27
  # interval: delay interval seconds
27
28
  # message: publish message
28
- def enqueue_in(interval, message, props = {})
29
+ def enqueue_in(interval, message = {}, props = {})
29
30
  # TODO: 超过 3h 的延迟也会接收, 但是不会延迟那么长时间, 但给予 warn
30
31
  delay_seconds = delay_seconds_level(interval)
31
32
 
@@ -40,13 +41,13 @@ module Hutch
40
41
  Hutch::Schedule.publish(delay_routing_key, message, properties)
41
42
  end
42
43
 
43
- def enqueue_uniq_in(uniq_key, interval, message, props = {})
44
+ def enqueue_uniq_in(uniq_key, interval, message = {}, props = {})
44
45
  return false unless uniq_key_check(uniq_key)
45
46
  enqueue_in(interval, message, props)
46
47
  end
47
48
 
48
49
  # delay at exatly time point
49
- def enqueue_at(time, message, props = {})
50
+ def enqueue_at(time, message = {}, props = {})
50
51
  # compatible with with ActiveJob API
51
52
  time_or_timestamp = time.respond_to?(:utc) ? time.utc.to_f : time
52
53
  # if time is early then now then just delay 1 second
@@ -54,7 +55,7 @@ module Hutch
54
55
  enqueue_in(interval, message, props)
55
56
  end
56
57
 
57
- def enqueue_uniq_at(uniq_key, time, message, props = {})
58
+ def enqueue_uniq_at(uniq_key, time, message = {}, props = {})
58
59
  return false unless uniq_key_check(uniq_key)
59
60
  enqueue_at(time, message, props)
60
61
  end
@@ -19,6 +19,10 @@ module Hutch
19
19
  # Ratelimit redis backend reconnect attempts
20
20
  number_setting :ratelimit_redis_reconnect_attempts, 10
21
21
 
22
+ # Hutch::Worker buffer flush interval in seconds
23
+ # 这个时间长度决定了 woker.buffer_queue 中长周期等待的任务交换给 RabbitMQ 的检查周期, 不适合太过频繁
24
+ number_setting :worker_buffer_flush_interval, 6
25
+
22
26
  initialize(
23
27
  worker_pool_size: 20,
24
28
  poller_interval: 1,
@@ -26,7 +30,10 @@ module Hutch
26
30
  # @see Redis::Client
27
31
  redis_url: "redis://127.0.0.1:6379/0",
28
32
  ratelimit_bucket_interval: 1,
29
- ratelimit_redis_reconnect_attempts: 10
33
+ ratelimit_redis_reconnect_attempts: 10,
34
+ worker_buffer_flush_interval: 6,
35
+ # need an positive channel_prefetch
36
+ channel_prefetch: 20
30
37
  )
31
38
  define_methods
32
39
  end
@@ -10,30 +10,36 @@ module Hutch
10
10
  # 明确告知 RabbitMQ 此任务完成.
11
11
  class Worker
12
12
  def initialize(broker, consumers, setup_procs)
13
+ raise "use Hutch::Schedule must set an positive channel_prefetch" if Hutch::Config.get(:channel_prefetch) < 1
13
14
  @broker = broker
14
15
  self.consumers = consumers
15
16
  self.setup_procs = setup_procs
16
17
 
17
18
  @message_worker = Concurrent::FixedThreadPool.new(Hutch::Config.get(:worker_pool_size))
18
19
  @timer_worker = Concurrent::TimerTask.execute(execution_interval: Hutch::Config.get(:poller_interval)) do
20
+ # all chekcer in the same thread
19
21
  heartbeat_connection
22
+ flush_to_retry
20
23
  retry_buffer_queue
21
24
  end
22
25
 
23
- # The queue size maybe the same as channel[prefetch] and every Consumer have it's own buffer queue with the same prefetch size,
24
- # when the buffer queue have the prefetch size message rabbitmq will stop push message to this consumer but it's ok.
25
- # The consumer will threshold by the shared redis instace.
26
- @buffer_queue = ::Queue.new
27
- @batch_size = Hutch::Config.get(:poller_batch_size)
28
- @connected = Hutch.connected?
26
+ # The queue size maybe the same as channel[prefetch] and every Consumer shared one buffer queue with the
27
+ # same prefetch size, when current consumer have unack messages reach the prefetch size rabbitmq will stop push
28
+ # message to this consumer.
29
+ # Because the buffer queue is shared by all consumers so the max queue size is [prefetch * consumer count],
30
+ # if prefetch is 20 and have 30 consumer the max queue size is 20 * 30 = 600.
31
+ @buffer_queue = ::Queue.new
32
+ @batch_size = Hutch::Config.get(:poller_batch_size)
33
+ @connected = Hutch.connected?
34
+ @last_flush_time = Time.now.utc
29
35
  end
30
36
 
31
37
  # Stop a running worker by killing all subscriber threads.
32
38
  # Stop two thread pool
33
39
  def stop
34
40
  @timer_worker.shutdown
35
- @message_worker.shutdown
36
41
  @broker.stop
42
+ @message_worker.shutdown
37
43
  end
38
44
 
39
45
  # Bind a consumer's routing keys to its queue, and set up a subscription to
@@ -50,33 +56,93 @@ module Hutch
50
56
  end
51
57
  end
52
58
 
53
- def handle_message_with_limits(consumer, delivery_info, properties, payload)
54
- # 1. consumer.limit?
55
- # 2. yes: make and ConsumerMsg to queue
56
- # 3. no: post handle
59
+ # cmsg: ConsumerMsg
60
+ def handle_cmsg_with_limits(cmsg)
61
+ # 正常的任务处理 ratelimit 的处理逻辑, 如果有限制那么就进入 buffer 缓冲
62
+ consumer = cmsg.consumer
57
63
  @message_worker.post do
58
- if consumer.ratelimit_exceeded?
59
- @buffer_queue.push(ConsumerMsg.new(consumer, delivery_info, properties, payload))
64
+ if consumer.ratelimit_exceeded?(cmsg.message)
65
+ @buffer_queue.push(cmsg)
60
66
  else
61
67
  # if Hutch disconnect skip do work let message timeout in rabbitmq waiting message push again
62
68
  return unless @connected
63
- consumer.ratelimit_add
64
- handle_message(consumer, delivery_info, properties, payload)
69
+ consumer.ratelimit_add(cmsg.message)
70
+ handle_cmsg(*cmsg.handle_cmsg_args)
65
71
  end
66
72
  end
67
73
  end
68
74
 
75
+ def handle_message_with_limits(consumer, delivery_info, properties, payload)
76
+ handle_cmsg_with_limits(consumer_msg(consumer, delivery_info, properties, payload))
77
+ end
78
+
79
+ # change args to message reuse the code from #handle_message
80
+ def consumer_msg(consumer, delivery_info, properties, payload)
81
+ serializer = consumer.get_serializer || Hutch::Config[:serializer]
82
+ logger.debug {
83
+ spec = serializer.binary? ? "#{payload.bytesize} bytes" : "#{payload}"
84
+ "message(#{properties.message_id || '-'}): " +
85
+ "routing key: #{delivery_info.routing_key}, " +
86
+ "consumer: #{consumer}, " +
87
+ "payload: #{spec}"
88
+ }
89
+
90
+ ConsumerMsg.new(consumer, Hutch::Message.new(delivery_info, properties, payload, serializer))
91
+ end
92
+
93
+ def handle_cmsg(consumer, delivery_info, properties, payload, message)
94
+ consumer_instance = consumer.new.tap { |c| c.broker, c.delivery_info = @broker, delivery_info }
95
+ with_tracing(consumer_instance).handle(message)
96
+ @broker.ack(delivery_info.delivery_tag)
97
+ rescue => ex
98
+ acknowledge_error(delivery_info, properties, @broker, ex)
99
+ handle_error(properties, payload, consumer, ex)
100
+ end
101
+
102
+
69
103
  # 心跳检查 Hutch 的连接
70
104
  def heartbeat_connection
71
105
  @connected = Hutch.connected?
72
106
  end
73
107
 
74
- # 每隔一段时间, 从 buffer queue 中转移任务到执行
108
+ # 每隔一段时间, 从 buffer queue 中转移任务到执行, interval 比较短的会立即执行掉
75
109
  def retry_buffer_queue
110
+ begin_size = @buffer_queue.size
111
+ now = Time.now.utc
112
+ stat = {}
76
113
  @batch_size.times do
77
114
  cmsg = peak
78
- return if cmsg.blank?
79
- handle_message_with_limits(cmsg.consumer, cmsg.delivery_info, cmsg.properties, cmsg.payload)
115
+ break if cmsg.blank?
116
+ handle_cmsg_with_limits(cmsg)
117
+
118
+ next unless logger.level == Logger::DEBUG
119
+ if stat.key?(cmsg.message.body[:b])
120
+ stat[cmsg.message.body[:b]] += 1
121
+ else
122
+ stat[cmsg.message.body[:b]] = 1
123
+ end
124
+ end
125
+ logger.debug "retry_buffer_queue #{Time.now.utc - now}, size from #{begin_size} to #{@buffer_queue.size}, stat: #{stat}"
126
+ end
127
+
128
+ # 对于 rate 间隔比较长的, 不适合一直存储在 buffer 中, 所以需要根据 interval 的值将长周期的 message 重新入队给 RabbitMQ 让其进行
129
+ # 等待, 但同时不可以让其直接 Requeue, 这样会导致频繁的与 RabbitMQ 来往交换. 需要让消息根据周期以及执行次数逐步拉长等待, 直到最终最长
130
+ # 时间的等待.
131
+ #
132
+ # 有下面几个要求:
133
+ # - 在 retry_buffer_queue 之前调用
134
+ # - 整个方法调用时间长度需要在 1s 之内
135
+ def flush_to_retry
136
+ now = Time.now.utc
137
+ if now - @last_flush_time >= Hutch::Config.get(:worker_buffer_flush_interval)
138
+ @buffer_queue.size.times do
139
+ cmsg = peak
140
+ break if cmsg.blank?
141
+ # 如果没有被处理, 重新放回 buffer
142
+ @buffer_queue.push(cmsg) unless cmsg.enqueue_in_or_not
143
+ end
144
+ @last_flush_time = now
145
+ logger.debug "flush_to_retry #{Time.now.utc - now}"
80
146
  end
81
147
  end
82
148
 
@@ -88,15 +154,41 @@ module Hutch
88
154
  end
89
155
  end
90
156
 
91
- # Consumer Message wrap rabbitmq message infomation
157
+ # Consumer Message wrap Hutch::Message and Consumer
92
158
  class ConsumerMsg
93
- attr_reader :consumer, :delivery_info, :properties, :payload
159
+ attr_reader :consumer, :message
160
+
161
+ def logger
162
+ Hutch::Logging.logger
163
+ end
164
+
165
+ def initialize(consumer, message)
166
+ @consumer = consumer
167
+ @message = message
168
+ end
169
+
170
+ def handle_cmsg_args
171
+ [consumer, message.delivery_info, message.properties, message.payload, message]
172
+ end
173
+
174
+ def interval
175
+ @interval ||= consumer.interval(message)
176
+ end
94
177
 
95
- def initialize(consumer, delivery_info, properties, payload)
96
- @consumer = consumer
97
- @delivery_info = delivery_info
98
- @properties = properties
99
- @payload = payload
178
+ # if delays > 10s then let the message to rabbitmq to delay and enqueue again instead of rabbitmq reqneue
179
+ def enqueue_in_or_not
180
+ # interval 小于 5s, 的则不会传, 在自己的 buffer 中等待
181
+ return false if interval < Hutch::Config.get(:worker_buffer_flush_interval)
182
+ # 等待时间过长的消息, 交给远端的 rabbitmq 去进行等待, 不占用 buffer 空间
183
+ # 如果数据量特别大, 但 ratelimit 特别严格, 那么也会变为固定周期的积压, 需要增加对执行次数的记录以及延长
184
+ # 市场 30s 执行一次的任务, 积累了 200 个, 那么这个积压会越来越多, 直到保持到一个 RabbitMQ 与 hutch 之间的最长等待周期, 会一直空转
185
+ # - 要么增加对执行次数的考虑, 拉长延长. 但最终会有一个最长的延长 10800 (3h), 这个问题最终仍然会存在
186
+ # - 设置延长多长之后, 就舍弃这个任务, 因为由于 ratelimit 的存在, 但又持续的积压, 不可能处理完这个任务
187
+ # 这个方案没有很好的解决方法, 这是一个典型的 "生产速度大于消费速度" 的问题, 如果长时间的 生产 > 消费, 这个问题是无解的
188
+ Hutch.broker.ack(message.delivery_info.delivery_tag)
189
+ # TODO: 如果存在 x-death 的 count 需要额外考虑, 解决与 error retry 的 x-death 复用的问题
190
+ # 临时给一个随机的 1,2 倍率的延迟, 大概率为 1 倍,小概率为 2 倍
191
+ consumer.enqueue_in(interval * [rand(3), 1].max, message.body, message.properties.to_hash)
100
192
  end
101
193
  end
102
194
  end
@@ -1,5 +1,5 @@
1
1
  module Hutch
2
2
  module Schedule
3
- VERSION = "0.7.2"
3
+ VERSION = "0.7.3"
4
4
  end
5
5
  end
@@ -31,6 +31,7 @@ module Hutch
31
31
  def threshold(args)
32
32
  @block_given = args.is_a?(Proc)
33
33
  if @block_given
34
+ raise "block only can have zero or one arguments" if args.arity > 1
34
35
  @threshold_block = args
35
36
  else
36
37
  raise "need args or block" if args.blank?
@@ -49,34 +50,48 @@ module Hutch
49
50
  redis: Hutch::Schedule.redis)
50
51
  end
51
52
 
53
+
52
54
  # is class level @rate_limiter _context exceeded?
53
55
  # if class level @rate_limiter is nil alwayt return false
54
- def ratelimit_exceeded?
56
+ def ratelimit_exceeded?(message)
55
57
  return false if @rate_limiter.blank?
56
- @rate_limiter.exceeded?(_context, threshold: _rate, interval: _interval)
58
+ args = threshold_args(message)
59
+ @rate_limiter.exceeded?(_context(args), threshold: _rate(args), interval: _interval(args))
57
60
  rescue Redis::BaseError
58
61
  # when redis cann't connect return exceeded limit
59
62
  true
60
63
  end
61
64
 
62
65
  # 增加一次调用
63
- def ratelimit_add
66
+ def ratelimit_add(message)
64
67
  return if @rate_limiter.blank?
65
- @rate_limiter.add(_context)
68
+ @rate_limiter.add(_context(threshold_args(message)))
66
69
  rescue Redis::BaseError
67
70
  nil
68
71
  end
69
72
 
70
- def _context
71
- @block_given ? @threshold_block.call[:context].presence || default_context : @context
73
+ def threshold_args(message)
74
+ if @block_given
75
+ @threshold_block.arity == 0 ? @threshold_block.call : @threshold_block.call(message.body)
76
+ else
77
+ { context: @context, rate: @rate, interval: @interval }
78
+ end
79
+ end
80
+
81
+ def interval(message)
82
+ _interval(threshold_args(message))
83
+ end
84
+
85
+ def _context(args)
86
+ args.fetch(:context, default_context)
72
87
  end
73
88
 
74
- def _rate
75
- @block_given ? @threshold_block.call[:rate].presence || default_rate : @rate
89
+ def _rate(args)
90
+ args.fetch(:rate, default_rate)
76
91
  end
77
92
 
78
- def _interval
79
- @block_given ? @threshold_block.call[:interval].presence || default_interval : @interval
93
+ def _interval(args)
94
+ args.fetch(:interval, default_interval)
80
95
  end
81
96
  end
82
97
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hutch-schedule
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.7.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wyatt pan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-04-15 00:00:00.000000000 Z
11
+ date: 2020-04-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hutch