hutch-schedule 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +6 -2
- data/lib/hutch/enqueue.rb +9 -8
- data/lib/hutch/patch/config.rb +8 -1
- data/lib/hutch/patch/worker.rb +117 -25
- data/lib/hutch/schedule/version.rb +1 -1
- data/lib/hutch/threshold.rb +25 -10
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: adda9d8410e98a3010611c8bda21d0e79c98d840340a0f27fd5e3d4e0b60b46e
|
4
|
+
data.tar.gz: dd678456a60980e5e96fc4d85898afb5b71f663f5286026c0777287ee819956f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07d613f9c09fe6dbccc0424af9edc536082b58e252d9a393685f30653c2d2564d28017eac8619299c14d5a5dae18a3fd61a897532c7ee3a8dac8ce069d0c6aed
|
7
|
+
data.tar.gz: 2303ce0e4213d387a96b6d1377c798e85ec36514099eb46b7a52296bc92922fe6f88f958815a035c30c66738da67802961a41b478d5c65ca30c9c20e95c48083
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,16 @@
|
|
1
1
|
# Change Log
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
|
4
|
+
## [0.7.3] - 2020-04-16
|
5
|
+
### Fixed
|
6
|
+
- add #threshold Proc to support pass enqueue msg to lambada args
|
7
|
+
- add check interval for flush Hutch::Worker.buffer_queue to RabbitMQ to avoid blocking for handling limited message
|
8
|
+
|
9
|
+
## [0.7.1] - 2020-04-16
|
10
|
+
### Fixed
|
11
|
+
- add threshold default {context, rate, interval} value
|
12
|
+
- fix monkey patch Hutch::Config.define_methods
|
13
|
+
|
4
14
|
## [0.7.0] - 2020-04-13
|
5
15
|
### Fixed
|
6
16
|
- Use monkey patch to support Conumser ratelimit through Hutch::Threshold
|
data/README.md
CHANGED
@@ -62,13 +62,17 @@ poller_interval| 1 | seconds of the poller to trigger, poller the message in Buf
|
|
62
62
|
poller_batch_size | 100 | the message size of every batch triggerd by the poller
|
63
63
|
redis_url | redis://127.0.0.1:6379/0 | Redis backend url for Ratelimit and Unique Job
|
64
64
|
ratelimit_bucket_interval | 1 | Ratelimit use the time bucket (seconds) to store the counts, lower the more accurate
|
65
|
+
worker_buffer_flush_interval | 6 | Monkey patch, flush the Hutch::Worker.@buffer_queue message to RabbitMQ check interval
|
65
66
|
|
66
67
|
## Hutch::Enqueue
|
67
|
-
Let consumer to include `Hutch::Enqueue` then it has the ability of publishing message to RabbitMQ with the `consume '<routing_key>'
|
68
|
+
Let consumer to include `Hutch::Enqueue` then it has the ability of publishing message to RabbitMQ with the `consume '<routing_key>'`.
|
69
|
+
|
70
|
+
*Only support enqueue `Hash` format message*
|
68
71
|
|
69
72
|
* enqueue: just publish one message
|
70
73
|
* enqueue_in: publish one message and delay <interval> seconds
|
71
74
|
* enqueue_at: publish one message and auto calculate the <interval> seconds need to delay
|
75
|
+
* enqueue_uniq(_in/at): publish uniq message with uniq_key
|
72
76
|
|
73
77
|
According to the RabbitMQ [TTL Message design limits](http://www.rabbitmq.com/ttl.html#per-message-ttl-caveats) ([discus](https://github.com/rebus-org/Rebus/issues/594#issuecomment-289961537)),
|
74
78
|
We design the fixed delay level from seconds to hours, below is the details:
|
@@ -111,7 +115,7 @@ end
|
|
111
115
|
```
|
112
116
|
|
113
117
|
threshold lambada need get return value must be a Hash and include:
|
114
|
-
* context: the limit context with
|
118
|
+
* context: the limit context with current threshold
|
115
119
|
* rate: the rate speed of threshold
|
116
120
|
* interval: the time range of threshold
|
117
121
|
|
data/lib/hutch/enqueue.rb
CHANGED
@@ -11,21 +11,22 @@ module Hutch
|
|
11
11
|
|
12
12
|
# Add Consumer methods
|
13
13
|
class_methods do
|
14
|
+
|
14
15
|
# Publish the message to this consumer with one routing_key
|
15
|
-
def enqueue(
|
16
|
-
Hutch.publish(enqueue_routing_key,
|
16
|
+
def enqueue(msg = {})
|
17
|
+
Hutch.publish(enqueue_routing_key, msg)
|
17
18
|
end
|
18
19
|
|
19
20
|
# enqueue unique message
|
20
|
-
def enqueue_uniq(uniq_key,
|
21
|
+
def enqueue_uniq(uniq_key, msg = {})
|
21
22
|
return false unless uniq_key_check(uniq_key)
|
22
|
-
enqueue(
|
23
|
+
enqueue(msg)
|
23
24
|
end
|
24
25
|
|
25
26
|
# publish message at a delay times
|
26
27
|
# interval: delay interval seconds
|
27
28
|
# message: publish message
|
28
|
-
def enqueue_in(interval, message, props = {})
|
29
|
+
def enqueue_in(interval, message = {}, props = {})
|
29
30
|
# TODO: 超过 3h 的延迟也会接收, 但是不会延迟那么长时间, 但给予 warn
|
30
31
|
delay_seconds = delay_seconds_level(interval)
|
31
32
|
|
@@ -40,13 +41,13 @@ module Hutch
|
|
40
41
|
Hutch::Schedule.publish(delay_routing_key, message, properties)
|
41
42
|
end
|
42
43
|
|
43
|
-
def enqueue_uniq_in(uniq_key, interval, message, props = {})
|
44
|
+
def enqueue_uniq_in(uniq_key, interval, message = {}, props = {})
|
44
45
|
return false unless uniq_key_check(uniq_key)
|
45
46
|
enqueue_in(interval, message, props)
|
46
47
|
end
|
47
48
|
|
48
49
|
# delay at exatly time point
|
49
|
-
def enqueue_at(time, message, props = {})
|
50
|
+
def enqueue_at(time, message = {}, props = {})
|
50
51
|
# compatible with with ActiveJob API
|
51
52
|
time_or_timestamp = time.respond_to?(:utc) ? time.utc.to_f : time
|
52
53
|
# if time is early then now then just delay 1 second
|
@@ -54,7 +55,7 @@ module Hutch
|
|
54
55
|
enqueue_in(interval, message, props)
|
55
56
|
end
|
56
57
|
|
57
|
-
def enqueue_uniq_at(uniq_key, time, message, props = {})
|
58
|
+
def enqueue_uniq_at(uniq_key, time, message = {}, props = {})
|
58
59
|
return false unless uniq_key_check(uniq_key)
|
59
60
|
enqueue_at(time, message, props)
|
60
61
|
end
|
data/lib/hutch/patch/config.rb
CHANGED
@@ -19,6 +19,10 @@ module Hutch
|
|
19
19
|
# Ratelimit redis backend reconnect attempts
|
20
20
|
number_setting :ratelimit_redis_reconnect_attempts, 10
|
21
21
|
|
22
|
+
# Hutch::Worker buffer flush interval in seconds
|
23
|
+
# 这个时间长度决定了 woker.buffer_queue 中长周期等待的任务交换给 RabbitMQ 的检查周期, 不适合太过频繁
|
24
|
+
number_setting :worker_buffer_flush_interval, 6
|
25
|
+
|
22
26
|
initialize(
|
23
27
|
worker_pool_size: 20,
|
24
28
|
poller_interval: 1,
|
@@ -26,7 +30,10 @@ module Hutch
|
|
26
30
|
# @see Redis::Client
|
27
31
|
redis_url: "redis://127.0.0.1:6379/0",
|
28
32
|
ratelimit_bucket_interval: 1,
|
29
|
-
ratelimit_redis_reconnect_attempts: 10
|
33
|
+
ratelimit_redis_reconnect_attempts: 10,
|
34
|
+
worker_buffer_flush_interval: 6,
|
35
|
+
# need an positive channel_prefetch
|
36
|
+
channel_prefetch: 20
|
30
37
|
)
|
31
38
|
define_methods
|
32
39
|
end
|
data/lib/hutch/patch/worker.rb
CHANGED
@@ -10,30 +10,36 @@ module Hutch
|
|
10
10
|
# 明确告知 RabbitMQ 此任务完成.
|
11
11
|
class Worker
|
12
12
|
def initialize(broker, consumers, setup_procs)
|
13
|
+
raise "use Hutch::Schedule must set an positive channel_prefetch" if Hutch::Config.get(:channel_prefetch) < 1
|
13
14
|
@broker = broker
|
14
15
|
self.consumers = consumers
|
15
16
|
self.setup_procs = setup_procs
|
16
17
|
|
17
18
|
@message_worker = Concurrent::FixedThreadPool.new(Hutch::Config.get(:worker_pool_size))
|
18
19
|
@timer_worker = Concurrent::TimerTask.execute(execution_interval: Hutch::Config.get(:poller_interval)) do
|
20
|
+
# all chekcer in the same thread
|
19
21
|
heartbeat_connection
|
22
|
+
flush_to_retry
|
20
23
|
retry_buffer_queue
|
21
24
|
end
|
22
25
|
|
23
|
-
# The queue size maybe the same as channel[prefetch] and every Consumer
|
24
|
-
# when
|
25
|
-
#
|
26
|
-
|
27
|
-
|
28
|
-
@
|
26
|
+
# The queue size maybe the same as channel[prefetch] and every Consumer shared one buffer queue with the
|
27
|
+
# same prefetch size, when current consumer have unack messages reach the prefetch size rabbitmq will stop push
|
28
|
+
# message to this consumer.
|
29
|
+
# Because the buffer queue is shared by all consumers so the max queue size is [prefetch * consumer count],
|
30
|
+
# if prefetch is 20 and have 30 consumer the max queue size is 20 * 30 = 600.
|
31
|
+
@buffer_queue = ::Queue.new
|
32
|
+
@batch_size = Hutch::Config.get(:poller_batch_size)
|
33
|
+
@connected = Hutch.connected?
|
34
|
+
@last_flush_time = Time.now.utc
|
29
35
|
end
|
30
36
|
|
31
37
|
# Stop a running worker by killing all subscriber threads.
|
32
38
|
# Stop two thread pool
|
33
39
|
def stop
|
34
40
|
@timer_worker.shutdown
|
35
|
-
@message_worker.shutdown
|
36
41
|
@broker.stop
|
42
|
+
@message_worker.shutdown
|
37
43
|
end
|
38
44
|
|
39
45
|
# Bind a consumer's routing keys to its queue, and set up a subscription to
|
@@ -50,33 +56,93 @@ module Hutch
|
|
50
56
|
end
|
51
57
|
end
|
52
58
|
|
53
|
-
|
54
|
-
|
55
|
-
#
|
56
|
-
|
59
|
+
# cmsg: ConsumerMsg
|
60
|
+
def handle_cmsg_with_limits(cmsg)
|
61
|
+
# 正常的任务处理 ratelimit 的处理逻辑, 如果有限制那么就进入 buffer 缓冲
|
62
|
+
consumer = cmsg.consumer
|
57
63
|
@message_worker.post do
|
58
|
-
if consumer.ratelimit_exceeded?
|
59
|
-
@buffer_queue.push(
|
64
|
+
if consumer.ratelimit_exceeded?(cmsg.message)
|
65
|
+
@buffer_queue.push(cmsg)
|
60
66
|
else
|
61
67
|
# if Hutch disconnect skip do work let message timeout in rabbitmq waiting message push again
|
62
68
|
return unless @connected
|
63
|
-
consumer.ratelimit_add
|
64
|
-
|
69
|
+
consumer.ratelimit_add(cmsg.message)
|
70
|
+
handle_cmsg(*cmsg.handle_cmsg_args)
|
65
71
|
end
|
66
72
|
end
|
67
73
|
end
|
68
74
|
|
75
|
+
def handle_message_with_limits(consumer, delivery_info, properties, payload)
|
76
|
+
handle_cmsg_with_limits(consumer_msg(consumer, delivery_info, properties, payload))
|
77
|
+
end
|
78
|
+
|
79
|
+
# change args to message reuse the code from #handle_message
|
80
|
+
def consumer_msg(consumer, delivery_info, properties, payload)
|
81
|
+
serializer = consumer.get_serializer || Hutch::Config[:serializer]
|
82
|
+
logger.debug {
|
83
|
+
spec = serializer.binary? ? "#{payload.bytesize} bytes" : "#{payload}"
|
84
|
+
"message(#{properties.message_id || '-'}): " +
|
85
|
+
"routing key: #{delivery_info.routing_key}, " +
|
86
|
+
"consumer: #{consumer}, " +
|
87
|
+
"payload: #{spec}"
|
88
|
+
}
|
89
|
+
|
90
|
+
ConsumerMsg.new(consumer, Hutch::Message.new(delivery_info, properties, payload, serializer))
|
91
|
+
end
|
92
|
+
|
93
|
+
def handle_cmsg(consumer, delivery_info, properties, payload, message)
|
94
|
+
consumer_instance = consumer.new.tap { |c| c.broker, c.delivery_info = @broker, delivery_info }
|
95
|
+
with_tracing(consumer_instance).handle(message)
|
96
|
+
@broker.ack(delivery_info.delivery_tag)
|
97
|
+
rescue => ex
|
98
|
+
acknowledge_error(delivery_info, properties, @broker, ex)
|
99
|
+
handle_error(properties, payload, consumer, ex)
|
100
|
+
end
|
101
|
+
|
102
|
+
|
69
103
|
# 心跳检查 Hutch 的连接
|
70
104
|
def heartbeat_connection
|
71
105
|
@connected = Hutch.connected?
|
72
106
|
end
|
73
107
|
|
74
|
-
# 每隔一段时间, 从 buffer queue
|
108
|
+
# 每隔一段时间, 从 buffer queue 中转移任务到执行, interval 比较短的会立即执行掉
|
75
109
|
def retry_buffer_queue
|
110
|
+
begin_size = @buffer_queue.size
|
111
|
+
now = Time.now.utc
|
112
|
+
stat = {}
|
76
113
|
@batch_size.times do
|
77
114
|
cmsg = peak
|
78
|
-
|
79
|
-
|
115
|
+
break if cmsg.blank?
|
116
|
+
handle_cmsg_with_limits(cmsg)
|
117
|
+
|
118
|
+
next unless logger.level == Logger::DEBUG
|
119
|
+
if stat.key?(cmsg.message.body[:b])
|
120
|
+
stat[cmsg.message.body[:b]] += 1
|
121
|
+
else
|
122
|
+
stat[cmsg.message.body[:b]] = 1
|
123
|
+
end
|
124
|
+
end
|
125
|
+
logger.debug "retry_buffer_queue #{Time.now.utc - now}, size from #{begin_size} to #{@buffer_queue.size}, stat: #{stat}"
|
126
|
+
end
|
127
|
+
|
128
|
+
# 对于 rate 间隔比较长的, 不适合一直存储在 buffer 中, 所以需要根据 interval 的值将长周期的 message 重新入队给 RabbitMQ 让其进行
|
129
|
+
# 等待, 但同时不可以让其直接 Requeue, 这样会导致频繁的与 RabbitMQ 来往交换. 需要让消息根据周期以及执行次数逐步拉长等待, 直到最终最长
|
130
|
+
# 时间的等待.
|
131
|
+
#
|
132
|
+
# 有下面几个要求:
|
133
|
+
# - 在 retry_buffer_queue 之前调用
|
134
|
+
# - 整个方法调用时间长度需要在 1s 之内
|
135
|
+
def flush_to_retry
|
136
|
+
now = Time.now.utc
|
137
|
+
if now - @last_flush_time >= Hutch::Config.get(:worker_buffer_flush_interval)
|
138
|
+
@buffer_queue.size.times do
|
139
|
+
cmsg = peak
|
140
|
+
break if cmsg.blank?
|
141
|
+
# 如果没有被处理, 重新放回 buffer
|
142
|
+
@buffer_queue.push(cmsg) unless cmsg.enqueue_in_or_not
|
143
|
+
end
|
144
|
+
@last_flush_time = now
|
145
|
+
logger.debug "flush_to_retry #{Time.now.utc - now}"
|
80
146
|
end
|
81
147
|
end
|
82
148
|
|
@@ -88,15 +154,41 @@ module Hutch
|
|
88
154
|
end
|
89
155
|
end
|
90
156
|
|
91
|
-
# Consumer Message wrap
|
157
|
+
# Consumer Message wrap Hutch::Message and Consumer
|
92
158
|
class ConsumerMsg
|
93
|
-
attr_reader :consumer, :
|
159
|
+
attr_reader :consumer, :message
|
160
|
+
|
161
|
+
def logger
|
162
|
+
Hutch::Logging.logger
|
163
|
+
end
|
164
|
+
|
165
|
+
def initialize(consumer, message)
|
166
|
+
@consumer = consumer
|
167
|
+
@message = message
|
168
|
+
end
|
169
|
+
|
170
|
+
def handle_cmsg_args
|
171
|
+
[consumer, message.delivery_info, message.properties, message.payload, message]
|
172
|
+
end
|
173
|
+
|
174
|
+
def interval
|
175
|
+
@interval ||= consumer.interval(message)
|
176
|
+
end
|
94
177
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
178
|
+
# if delays > 10s then let the message to rabbitmq to delay and enqueue again instead of rabbitmq reqneue
|
179
|
+
def enqueue_in_or_not
|
180
|
+
# interval 小于 5s, 的则不会传, 在自己的 buffer 中等待
|
181
|
+
return false if interval < Hutch::Config.get(:worker_buffer_flush_interval)
|
182
|
+
# 等待时间过长的消息, 交给远端的 rabbitmq 去进行等待, 不占用 buffer 空间
|
183
|
+
# 如果数据量特别大, 但 ratelimit 特别严格, 那么也会变为固定周期的积压, 需要增加对执行次数的记录以及延长
|
184
|
+
# 市场 30s 执行一次的任务, 积累了 200 个, 那么这个积压会越来越多, 直到保持到一个 RabbitMQ 与 hutch 之间的最长等待周期, 会一直空转
|
185
|
+
# - 要么增加对执行次数的考虑, 拉长延长. 但最终会有一个最长的延长 10800 (3h), 这个问题最终仍然会存在
|
186
|
+
# - 设置延长多长之后, 就舍弃这个任务, 因为由于 ratelimit 的存在, 但又持续的积压, 不可能处理完这个任务
|
187
|
+
# 这个方案没有很好的解决方法, 这是一个典型的 "生产速度大于消费速度" 的问题, 如果长时间的 生产 > 消费, 这个问题是无解的
|
188
|
+
Hutch.broker.ack(message.delivery_info.delivery_tag)
|
189
|
+
# TODO: 如果存在 x-death 的 count 需要额外考虑, 解决与 error retry 的 x-death 复用的问题
|
190
|
+
# 临时给一个随机的 1,2 倍率的延迟, 大概率为 1 倍,小概率为 2 倍
|
191
|
+
consumer.enqueue_in(interval * [rand(3), 1].max, message.body, message.properties.to_hash)
|
100
192
|
end
|
101
193
|
end
|
102
194
|
end
|
data/lib/hutch/threshold.rb
CHANGED
@@ -31,6 +31,7 @@ module Hutch
|
|
31
31
|
def threshold(args)
|
32
32
|
@block_given = args.is_a?(Proc)
|
33
33
|
if @block_given
|
34
|
+
raise "block only can have zero or one arguments" if args.arity > 1
|
34
35
|
@threshold_block = args
|
35
36
|
else
|
36
37
|
raise "need args or block" if args.blank?
|
@@ -49,34 +50,48 @@ module Hutch
|
|
49
50
|
redis: Hutch::Schedule.redis)
|
50
51
|
end
|
51
52
|
|
53
|
+
|
52
54
|
# is class level @rate_limiter _context exceeded?
|
53
55
|
# if class level @rate_limiter is nil alwayt return false
|
54
|
-
def ratelimit_exceeded?
|
56
|
+
def ratelimit_exceeded?(message)
|
55
57
|
return false if @rate_limiter.blank?
|
56
|
-
|
58
|
+
args = threshold_args(message)
|
59
|
+
@rate_limiter.exceeded?(_context(args), threshold: _rate(args), interval: _interval(args))
|
57
60
|
rescue Redis::BaseError
|
58
61
|
# when redis cann't connect return exceeded limit
|
59
62
|
true
|
60
63
|
end
|
61
64
|
|
62
65
|
# 增加一次调用
|
63
|
-
def ratelimit_add
|
66
|
+
def ratelimit_add(message)
|
64
67
|
return if @rate_limiter.blank?
|
65
|
-
@rate_limiter.add(_context)
|
68
|
+
@rate_limiter.add(_context(threshold_args(message)))
|
66
69
|
rescue Redis::BaseError
|
67
70
|
nil
|
68
71
|
end
|
69
72
|
|
70
|
-
def
|
71
|
-
@block_given
|
73
|
+
def threshold_args(message)
|
74
|
+
if @block_given
|
75
|
+
@threshold_block.arity == 0 ? @threshold_block.call : @threshold_block.call(message.body)
|
76
|
+
else
|
77
|
+
{ context: @context, rate: @rate, interval: @interval }
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def interval(message)
|
82
|
+
_interval(threshold_args(message))
|
83
|
+
end
|
84
|
+
|
85
|
+
def _context(args)
|
86
|
+
args.fetch(:context, default_context)
|
72
87
|
end
|
73
88
|
|
74
|
-
def _rate
|
75
|
-
|
89
|
+
def _rate(args)
|
90
|
+
args.fetch(:rate, default_rate)
|
76
91
|
end
|
77
92
|
|
78
|
-
def _interval
|
79
|
-
|
93
|
+
def _interval(args)
|
94
|
+
args.fetch(:interval, default_interval)
|
80
95
|
end
|
81
96
|
end
|
82
97
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hutch-schedule
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wyatt pan
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hutch
|