hutch-schedule 0.7.2 → 0.7.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +6 -2
- data/lib/hutch/enqueue.rb +9 -8
- data/lib/hutch/patch/config.rb +8 -1
- data/lib/hutch/patch/worker.rb +117 -25
- data/lib/hutch/schedule/version.rb +1 -1
- data/lib/hutch/threshold.rb +25 -10
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: adda9d8410e98a3010611c8bda21d0e79c98d840340a0f27fd5e3d4e0b60b46e
|
4
|
+
data.tar.gz: dd678456a60980e5e96fc4d85898afb5b71f663f5286026c0777287ee819956f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07d613f9c09fe6dbccc0424af9edc536082b58e252d9a393685f30653c2d2564d28017eac8619299c14d5a5dae18a3fd61a897532c7ee3a8dac8ce069d0c6aed
|
7
|
+
data.tar.gz: 2303ce0e4213d387a96b6d1377c798e85ec36514099eb46b7a52296bc92922fe6f88f958815a035c30c66738da67802961a41b478d5c65ca30c9c20e95c48083
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,16 @@
|
|
1
1
|
# Change Log
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
|
4
|
+
## [0.7.3] - 2020-04-16
|
5
|
+
### Fixed
|
6
|
+
- add #threshold Proc to support pass enqueue msg to lambada args
|
7
|
+
- add check interval for flush Hutch::Worker.buffer_queue to RabbitMQ to avoid blocking for handling limited message
|
8
|
+
|
9
|
+
## [0.7.1] - 2020-04-16
|
10
|
+
### Fixed
|
11
|
+
- add threshold default {context, rate, interval} value
|
12
|
+
- fix monkey patch Hutch::Config.define_methods
|
13
|
+
|
4
14
|
## [0.7.0] - 2020-04-13
|
5
15
|
### Fixed
|
6
16
|
- Use monkey patch to support Conumser ratelimit through Hutch::Threshold
|
data/README.md
CHANGED
@@ -62,13 +62,17 @@ poller_interval| 1 | seconds of the poller to trigger, poller the message in Buf
|
|
62
62
|
poller_batch_size | 100 | the message size of every batch triggerd by the poller
|
63
63
|
redis_url | redis://127.0.0.1:6379/0 | Redis backend url for Ratelimit and Unique Job
|
64
64
|
ratelimit_bucket_interval | 1 | Ratelimit use the time bucket (seconds) to store the counts, lower the more accurate
|
65
|
+
worker_buffer_flush_interval | 6 | Monkey patch, flush the Hutch::Worker.@buffer_queue message to RabbitMQ check interval
|
65
66
|
|
66
67
|
## Hutch::Enqueue
|
67
|
-
Let consumer to include `Hutch::Enqueue` then it has the ability of publishing message to RabbitMQ with the `consume '<routing_key>'
|
68
|
+
Let consumer to include `Hutch::Enqueue` then it has the ability of publishing message to RabbitMQ with the `consume '<routing_key>'`.
|
69
|
+
|
70
|
+
*Only support enqueue `Hash` format message*
|
68
71
|
|
69
72
|
* enqueue: just publish one message
|
70
73
|
* enqueue_in: publish one message and delay <interval> seconds
|
71
74
|
* enqueue_at: publish one message and auto calculate the <interval> seconds need to delay
|
75
|
+
* enqueue_uniq(_in/at): publish uniq message with uniq_key
|
72
76
|
|
73
77
|
According to the RabbitMQ [TTL Message design limits](http://www.rabbitmq.com/ttl.html#per-message-ttl-caveats) ([discus](https://github.com/rebus-org/Rebus/issues/594#issuecomment-289961537)),
|
74
78
|
We design the fixed delay level from seconds to hours, below is the details:
|
@@ -111,7 +115,7 @@ end
|
|
111
115
|
```
|
112
116
|
|
113
117
|
threshold lambada need get return value must be a Hash and include:
|
114
|
-
* context: the limit context with
|
118
|
+
* context: the limit context with current threshold
|
115
119
|
* rate: the rate speed of threshold
|
116
120
|
* interval: the time range of threshold
|
117
121
|
|
data/lib/hutch/enqueue.rb
CHANGED
@@ -11,21 +11,22 @@ module Hutch
|
|
11
11
|
|
12
12
|
# Add Consumer methods
|
13
13
|
class_methods do
|
14
|
+
|
14
15
|
# Publish the message to this consumer with one routing_key
|
15
|
-
def enqueue(
|
16
|
-
Hutch.publish(enqueue_routing_key,
|
16
|
+
def enqueue(msg = {})
|
17
|
+
Hutch.publish(enqueue_routing_key, msg)
|
17
18
|
end
|
18
19
|
|
19
20
|
# enqueue unique message
|
20
|
-
def enqueue_uniq(uniq_key,
|
21
|
+
def enqueue_uniq(uniq_key, msg = {})
|
21
22
|
return false unless uniq_key_check(uniq_key)
|
22
|
-
enqueue(
|
23
|
+
enqueue(msg)
|
23
24
|
end
|
24
25
|
|
25
26
|
# publish message at a delay times
|
26
27
|
# interval: delay interval seconds
|
27
28
|
# message: publish message
|
28
|
-
def enqueue_in(interval, message, props = {})
|
29
|
+
def enqueue_in(interval, message = {}, props = {})
|
29
30
|
# TODO: 超过 3h 的延迟也会接收, 但是不会延迟那么长时间, 但给予 warn
|
30
31
|
delay_seconds = delay_seconds_level(interval)
|
31
32
|
|
@@ -40,13 +41,13 @@ module Hutch
|
|
40
41
|
Hutch::Schedule.publish(delay_routing_key, message, properties)
|
41
42
|
end
|
42
43
|
|
43
|
-
def enqueue_uniq_in(uniq_key, interval, message, props = {})
|
44
|
+
def enqueue_uniq_in(uniq_key, interval, message = {}, props = {})
|
44
45
|
return false unless uniq_key_check(uniq_key)
|
45
46
|
enqueue_in(interval, message, props)
|
46
47
|
end
|
47
48
|
|
48
49
|
# delay at exatly time point
|
49
|
-
def enqueue_at(time, message, props = {})
|
50
|
+
def enqueue_at(time, message = {}, props = {})
|
50
51
|
# compatible with with ActiveJob API
|
51
52
|
time_or_timestamp = time.respond_to?(:utc) ? time.utc.to_f : time
|
52
53
|
# if time is early then now then just delay 1 second
|
@@ -54,7 +55,7 @@ module Hutch
|
|
54
55
|
enqueue_in(interval, message, props)
|
55
56
|
end
|
56
57
|
|
57
|
-
def enqueue_uniq_at(uniq_key, time, message, props = {})
|
58
|
+
def enqueue_uniq_at(uniq_key, time, message = {}, props = {})
|
58
59
|
return false unless uniq_key_check(uniq_key)
|
59
60
|
enqueue_at(time, message, props)
|
60
61
|
end
|
data/lib/hutch/patch/config.rb
CHANGED
@@ -19,6 +19,10 @@ module Hutch
|
|
19
19
|
# Ratelimit redis backend reconnect attempts
|
20
20
|
number_setting :ratelimit_redis_reconnect_attempts, 10
|
21
21
|
|
22
|
+
# Hutch::Worker buffer flush interval in seconds
|
23
|
+
# 这个时间长度决定了 woker.buffer_queue 中长周期等待的任务交换给 RabbitMQ 的检查周期, 不适合太过频繁
|
24
|
+
number_setting :worker_buffer_flush_interval, 6
|
25
|
+
|
22
26
|
initialize(
|
23
27
|
worker_pool_size: 20,
|
24
28
|
poller_interval: 1,
|
@@ -26,7 +30,10 @@ module Hutch
|
|
26
30
|
# @see Redis::Client
|
27
31
|
redis_url: "redis://127.0.0.1:6379/0",
|
28
32
|
ratelimit_bucket_interval: 1,
|
29
|
-
ratelimit_redis_reconnect_attempts: 10
|
33
|
+
ratelimit_redis_reconnect_attempts: 10,
|
34
|
+
worker_buffer_flush_interval: 6,
|
35
|
+
# need an positive channel_prefetch
|
36
|
+
channel_prefetch: 20
|
30
37
|
)
|
31
38
|
define_methods
|
32
39
|
end
|
data/lib/hutch/patch/worker.rb
CHANGED
@@ -10,30 +10,36 @@ module Hutch
|
|
10
10
|
# 明确告知 RabbitMQ 此任务完成.
|
11
11
|
class Worker
|
12
12
|
def initialize(broker, consumers, setup_procs)
|
13
|
+
raise "use Hutch::Schedule must set an positive channel_prefetch" if Hutch::Config.get(:channel_prefetch) < 1
|
13
14
|
@broker = broker
|
14
15
|
self.consumers = consumers
|
15
16
|
self.setup_procs = setup_procs
|
16
17
|
|
17
18
|
@message_worker = Concurrent::FixedThreadPool.new(Hutch::Config.get(:worker_pool_size))
|
18
19
|
@timer_worker = Concurrent::TimerTask.execute(execution_interval: Hutch::Config.get(:poller_interval)) do
|
20
|
+
# all chekcer in the same thread
|
19
21
|
heartbeat_connection
|
22
|
+
flush_to_retry
|
20
23
|
retry_buffer_queue
|
21
24
|
end
|
22
25
|
|
23
|
-
# The queue size maybe the same as channel[prefetch] and every Consumer
|
24
|
-
# when
|
25
|
-
#
|
26
|
-
|
27
|
-
|
28
|
-
@
|
26
|
+
# The queue size maybe the same as channel[prefetch] and every Consumer shared one buffer queue with the
|
27
|
+
# same prefetch size, when current consumer have unack messages reach the prefetch size rabbitmq will stop push
|
28
|
+
# message to this consumer.
|
29
|
+
# Because the buffer queue is shared by all consumers so the max queue size is [prefetch * consumer count],
|
30
|
+
# if prefetch is 20 and have 30 consumer the max queue size is 20 * 30 = 600.
|
31
|
+
@buffer_queue = ::Queue.new
|
32
|
+
@batch_size = Hutch::Config.get(:poller_batch_size)
|
33
|
+
@connected = Hutch.connected?
|
34
|
+
@last_flush_time = Time.now.utc
|
29
35
|
end
|
30
36
|
|
31
37
|
# Stop a running worker by killing all subscriber threads.
|
32
38
|
# Stop two thread pool
|
33
39
|
def stop
|
34
40
|
@timer_worker.shutdown
|
35
|
-
@message_worker.shutdown
|
36
41
|
@broker.stop
|
42
|
+
@message_worker.shutdown
|
37
43
|
end
|
38
44
|
|
39
45
|
# Bind a consumer's routing keys to its queue, and set up a subscription to
|
@@ -50,33 +56,93 @@ module Hutch
|
|
50
56
|
end
|
51
57
|
end
|
52
58
|
|
53
|
-
|
54
|
-
|
55
|
-
#
|
56
|
-
|
59
|
+
# cmsg: ConsumerMsg
|
60
|
+
def handle_cmsg_with_limits(cmsg)
|
61
|
+
# 正常的任务处理 ratelimit 的处理逻辑, 如果有限制那么就进入 buffer 缓冲
|
62
|
+
consumer = cmsg.consumer
|
57
63
|
@message_worker.post do
|
58
|
-
if consumer.ratelimit_exceeded?
|
59
|
-
@buffer_queue.push(
|
64
|
+
if consumer.ratelimit_exceeded?(cmsg.message)
|
65
|
+
@buffer_queue.push(cmsg)
|
60
66
|
else
|
61
67
|
# if Hutch disconnect skip do work let message timeout in rabbitmq waiting message push again
|
62
68
|
return unless @connected
|
63
|
-
consumer.ratelimit_add
|
64
|
-
|
69
|
+
consumer.ratelimit_add(cmsg.message)
|
70
|
+
handle_cmsg(*cmsg.handle_cmsg_args)
|
65
71
|
end
|
66
72
|
end
|
67
73
|
end
|
68
74
|
|
75
|
+
def handle_message_with_limits(consumer, delivery_info, properties, payload)
|
76
|
+
handle_cmsg_with_limits(consumer_msg(consumer, delivery_info, properties, payload))
|
77
|
+
end
|
78
|
+
|
79
|
+
# change args to message reuse the code from #handle_message
|
80
|
+
def consumer_msg(consumer, delivery_info, properties, payload)
|
81
|
+
serializer = consumer.get_serializer || Hutch::Config[:serializer]
|
82
|
+
logger.debug {
|
83
|
+
spec = serializer.binary? ? "#{payload.bytesize} bytes" : "#{payload}"
|
84
|
+
"message(#{properties.message_id || '-'}): " +
|
85
|
+
"routing key: #{delivery_info.routing_key}, " +
|
86
|
+
"consumer: #{consumer}, " +
|
87
|
+
"payload: #{spec}"
|
88
|
+
}
|
89
|
+
|
90
|
+
ConsumerMsg.new(consumer, Hutch::Message.new(delivery_info, properties, payload, serializer))
|
91
|
+
end
|
92
|
+
|
93
|
+
def handle_cmsg(consumer, delivery_info, properties, payload, message)
|
94
|
+
consumer_instance = consumer.new.tap { |c| c.broker, c.delivery_info = @broker, delivery_info }
|
95
|
+
with_tracing(consumer_instance).handle(message)
|
96
|
+
@broker.ack(delivery_info.delivery_tag)
|
97
|
+
rescue => ex
|
98
|
+
acknowledge_error(delivery_info, properties, @broker, ex)
|
99
|
+
handle_error(properties, payload, consumer, ex)
|
100
|
+
end
|
101
|
+
|
102
|
+
|
69
103
|
# 心跳检查 Hutch 的连接
|
70
104
|
def heartbeat_connection
|
71
105
|
@connected = Hutch.connected?
|
72
106
|
end
|
73
107
|
|
74
|
-
# 每隔一段时间, 从 buffer queue
|
108
|
+
# 每隔一段时间, 从 buffer queue 中转移任务到执行, interval 比较短的会立即执行掉
|
75
109
|
def retry_buffer_queue
|
110
|
+
begin_size = @buffer_queue.size
|
111
|
+
now = Time.now.utc
|
112
|
+
stat = {}
|
76
113
|
@batch_size.times do
|
77
114
|
cmsg = peak
|
78
|
-
|
79
|
-
|
115
|
+
break if cmsg.blank?
|
116
|
+
handle_cmsg_with_limits(cmsg)
|
117
|
+
|
118
|
+
next unless logger.level == Logger::DEBUG
|
119
|
+
if stat.key?(cmsg.message.body[:b])
|
120
|
+
stat[cmsg.message.body[:b]] += 1
|
121
|
+
else
|
122
|
+
stat[cmsg.message.body[:b]] = 1
|
123
|
+
end
|
124
|
+
end
|
125
|
+
logger.debug "retry_buffer_queue #{Time.now.utc - now}, size from #{begin_size} to #{@buffer_queue.size}, stat: #{stat}"
|
126
|
+
end
|
127
|
+
|
128
|
+
# 对于 rate 间隔比较长的, 不适合一直存储在 buffer 中, 所以需要根据 interval 的值将长周期的 message 重新入队给 RabbitMQ 让其进行
|
129
|
+
# 等待, 但同时不可以让其直接 Requeue, 这样会导致频繁的与 RabbitMQ 来往交换. 需要让消息根据周期以及执行次数逐步拉长等待, 直到最终最长
|
130
|
+
# 时间的等待.
|
131
|
+
#
|
132
|
+
# 有下面几个要求:
|
133
|
+
# - 在 retry_buffer_queue 之前调用
|
134
|
+
# - 整个方法调用时间长度需要在 1s 之内
|
135
|
+
def flush_to_retry
|
136
|
+
now = Time.now.utc
|
137
|
+
if now - @last_flush_time >= Hutch::Config.get(:worker_buffer_flush_interval)
|
138
|
+
@buffer_queue.size.times do
|
139
|
+
cmsg = peak
|
140
|
+
break if cmsg.blank?
|
141
|
+
# 如果没有被处理, 重新放回 buffer
|
142
|
+
@buffer_queue.push(cmsg) unless cmsg.enqueue_in_or_not
|
143
|
+
end
|
144
|
+
@last_flush_time = now
|
145
|
+
logger.debug "flush_to_retry #{Time.now.utc - now}"
|
80
146
|
end
|
81
147
|
end
|
82
148
|
|
@@ -88,15 +154,41 @@ module Hutch
|
|
88
154
|
end
|
89
155
|
end
|
90
156
|
|
91
|
-
# Consumer Message wrap
|
157
|
+
# Consumer Message wrap Hutch::Message and Consumer
|
92
158
|
class ConsumerMsg
|
93
|
-
attr_reader :consumer, :
|
159
|
+
attr_reader :consumer, :message
|
160
|
+
|
161
|
+
def logger
|
162
|
+
Hutch::Logging.logger
|
163
|
+
end
|
164
|
+
|
165
|
+
def initialize(consumer, message)
|
166
|
+
@consumer = consumer
|
167
|
+
@message = message
|
168
|
+
end
|
169
|
+
|
170
|
+
def handle_cmsg_args
|
171
|
+
[consumer, message.delivery_info, message.properties, message.payload, message]
|
172
|
+
end
|
173
|
+
|
174
|
+
def interval
|
175
|
+
@interval ||= consumer.interval(message)
|
176
|
+
end
|
94
177
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
178
|
+
# if delays > 10s then let the message to rabbitmq to delay and enqueue again instead of rabbitmq reqneue
|
179
|
+
def enqueue_in_or_not
|
180
|
+
# interval 小于 5s, 的则不会传, 在自己的 buffer 中等待
|
181
|
+
return false if interval < Hutch::Config.get(:worker_buffer_flush_interval)
|
182
|
+
# 等待时间过长的消息, 交给远端的 rabbitmq 去进行等待, 不占用 buffer 空间
|
183
|
+
# 如果数据量特别大, 但 ratelimit 特别严格, 那么也会变为固定周期的积压, 需要增加对执行次数的记录以及延长
|
184
|
+
# 市场 30s 执行一次的任务, 积累了 200 个, 那么这个积压会越来越多, 直到保持到一个 RabbitMQ 与 hutch 之间的最长等待周期, 会一直空转
|
185
|
+
# - 要么增加对执行次数的考虑, 拉长延长. 但最终会有一个最长的延长 10800 (3h), 这个问题最终仍然会存在
|
186
|
+
# - 设置延长多长之后, 就舍弃这个任务, 因为由于 ratelimit 的存在, 但又持续的积压, 不可能处理完这个任务
|
187
|
+
# 这个方案没有很好的解决方法, 这是一个典型的 "生产速度大于消费速度" 的问题, 如果长时间的 生产 > 消费, 这个问题是无解的
|
188
|
+
Hutch.broker.ack(message.delivery_info.delivery_tag)
|
189
|
+
# TODO: 如果存在 x-death 的 count 需要额外考虑, 解决与 error retry 的 x-death 复用的问题
|
190
|
+
# 临时给一个随机的 1,2 倍率的延迟, 大概率为 1 倍,小概率为 2 倍
|
191
|
+
consumer.enqueue_in(interval * [rand(3), 1].max, message.body, message.properties.to_hash)
|
100
192
|
end
|
101
193
|
end
|
102
194
|
end
|
data/lib/hutch/threshold.rb
CHANGED
@@ -31,6 +31,7 @@ module Hutch
|
|
31
31
|
def threshold(args)
|
32
32
|
@block_given = args.is_a?(Proc)
|
33
33
|
if @block_given
|
34
|
+
raise "block only can have zero or one arguments" if args.arity > 1
|
34
35
|
@threshold_block = args
|
35
36
|
else
|
36
37
|
raise "need args or block" if args.blank?
|
@@ -49,34 +50,48 @@ module Hutch
|
|
49
50
|
redis: Hutch::Schedule.redis)
|
50
51
|
end
|
51
52
|
|
53
|
+
|
52
54
|
# is class level @rate_limiter _context exceeded?
|
53
55
|
# if class level @rate_limiter is nil alwayt return false
|
54
|
-
def ratelimit_exceeded?
|
56
|
+
def ratelimit_exceeded?(message)
|
55
57
|
return false if @rate_limiter.blank?
|
56
|
-
|
58
|
+
args = threshold_args(message)
|
59
|
+
@rate_limiter.exceeded?(_context(args), threshold: _rate(args), interval: _interval(args))
|
57
60
|
rescue Redis::BaseError
|
58
61
|
# when redis cann't connect return exceeded limit
|
59
62
|
true
|
60
63
|
end
|
61
64
|
|
62
65
|
# 增加一次调用
|
63
|
-
def ratelimit_add
|
66
|
+
def ratelimit_add(message)
|
64
67
|
return if @rate_limiter.blank?
|
65
|
-
@rate_limiter.add(_context)
|
68
|
+
@rate_limiter.add(_context(threshold_args(message)))
|
66
69
|
rescue Redis::BaseError
|
67
70
|
nil
|
68
71
|
end
|
69
72
|
|
70
|
-
def
|
71
|
-
@block_given
|
73
|
+
def threshold_args(message)
|
74
|
+
if @block_given
|
75
|
+
@threshold_block.arity == 0 ? @threshold_block.call : @threshold_block.call(message.body)
|
76
|
+
else
|
77
|
+
{ context: @context, rate: @rate, interval: @interval }
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def interval(message)
|
82
|
+
_interval(threshold_args(message))
|
83
|
+
end
|
84
|
+
|
85
|
+
def _context(args)
|
86
|
+
args.fetch(:context, default_context)
|
72
87
|
end
|
73
88
|
|
74
|
-
def _rate
|
75
|
-
|
89
|
+
def _rate(args)
|
90
|
+
args.fetch(:rate, default_rate)
|
76
91
|
end
|
77
92
|
|
78
|
-
def _interval
|
79
|
-
|
93
|
+
def _interval(args)
|
94
|
+
args.fetch(:interval, default_interval)
|
80
95
|
end
|
81
96
|
end
|
82
97
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hutch-schedule
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wyatt pan
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hutch
|