redstream 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,115 @@
1
+
2
+ require "thread"
3
+
4
+ module Redstream
5
+ # The Redstream::Consumer class to read messages from a specified redis
6
+ # stream in batches.
7
+ #
8
+ # @example
9
+ # Redstream::Consumer.new(name: "user_indexer", stream_name: "users").run do |messages|
10
+ # # ...
11
+ # end
12
+
13
+ class Consumer
14
+ # Initializes a new consumer instance. Please note that you can have
15
+ # multiple consumers per stream, by specifying different names.
16
+ #
17
+ # @param name [String] The consumer name. The name is used for locking
18
+ # @param stream_name [String] The name of the redis stream. Please note
19
+ # that redstream adds a prefix to the redis keys. However, the
20
+ # stream_name param must be specified without any prefixes here.
21
+ # When using Redstream::Model, the stream name is the downcased,
22
+ # pluralized and underscored version of the model name. I.e., the
23
+ # stream name for a 'User' model will be 'users'
24
+ # @param batch_size [Fixnum] The desired batch size, that is the number
25
+ # of messages yielded at max. More concretely, the number of messages
26
+ # yielded may be lower the batch_size, but not higher
27
+ # @param logger [Logger] The logger used for error logging
28
+
29
+ def initialize(name:, stream_name:, batch_size: 1_000, logger: Logger.new("/dev/null"))
30
+ @name = name
31
+ @stream_name = stream_name
32
+ @batch_size = batch_size
33
+ @logger = logger
34
+ @redis = Redstream.connection_pool.with(&:dup)
35
+ @lock = Lock.new(name: "consumer:#{@stream_name}:#{@name}")
36
+ end
37
+
38
+ # Returns its maximum committed id, i.e. the consumer's offset.
39
+ #
40
+ # @return [String, nil] The committed id, or nil
41
+
42
+ def max_committed_id
43
+ @redis.get Redstream.offset_key_name(stream_name: @stream_name, consumer_name: @name)
44
+ end
45
+
46
+ # Loops and thus blocks forever while reading messages from the specified
47
+ # stream and yielding them in batches.
48
+ #
49
+ # @example
50
+ # consumer.run do |messages|
51
+ # # ...
52
+ # end
53
+
54
+ def run(&block)
55
+ loop { run_once(&block) }
56
+ end
57
+
58
+ # Reads a single batch from the specified stream and yields it. You usually
59
+ # want to use the #run method instead, which loops/blocks forever.
60
+ #
61
+ # @example
62
+ # consumer.run_once do |messages|
63
+ # # ...
64
+ # end
65
+
66
+ def run_once(&block)
67
+ got_lock = @lock.acquire do
68
+ offset = @redis.get(Redstream.offset_key_name(stream_name: @stream_name, consumer_name: @name))
69
+ offset ||= "0-0"
70
+
71
+ stream_key_name = Redstream.stream_key_name(@stream_name)
72
+
73
+ response = begin
74
+ @redis.xread(stream_key_name, offset, count: @batch_size, block: 5_000)
75
+ rescue Redis::TimeoutError
76
+ nil
77
+ end
78
+
79
+ return if response.nil? || response[stream_key_name].nil? || response[stream_key_name].empty?
80
+
81
+ messages = response[stream_key_name].map do |raw_message|
82
+ Message.new(raw_message)
83
+ end
84
+
85
+ block.call(messages)
86
+
87
+ offset = response[stream_key_name].last[0]
88
+
89
+ return unless offset
90
+
91
+ commit offset
92
+ end
93
+
94
+ sleep(5) unless got_lock
95
+ rescue => e
96
+ @logger.error e
97
+
98
+ sleep 5
99
+
100
+ retry
101
+ end
102
+
103
+ # @api private
104
+ #
105
+ # Commits the specified offset/ID as the maximum ID already read, such that
106
+ # subsequent read calls will use this offset/ID as a starting point.
107
+ #
108
+ # @param offset [String] The offset/ID to commit
109
+
110
+ def commit(offset)
111
+ @redis.set Redstream.offset_key_name(stream_name: @stream_name, consumer_name: @name), offset
112
+ end
113
+ end
114
+ end
115
+
@@ -0,0 +1,100 @@
1
+
2
+ module Redstream
3
+ # The Redstream::Delayer class is responsible for reading messages from
4
+ # special delay streams which are used to fix inconsistencies resulting from
5
+ # network or other issues in between after_save and after_commit callbacks.
6
+ # To be able to fix such issues, delay messages will be added to a delay
7
+ # stream within an after_save callback. The delay messages aren't fetched
8
+ # immediately, but e.g. 5 minutes later, such that we can be sure that the
9
+ # database transaction is committed or has been rolled back, but is no longer
10
+ # running.
11
+ #
12
+ # @example
13
+ # Redstream::Delayer.new(stream_name: "users", delay: 5.minutes, logger: Logger.new(STDOUT)).run
14
+
15
+ class Delayer
16
+ # Initializes the delayer for the specified stream name and delay.
17
+ #
18
+ # @param stream_name [String] The stream name. Please note, that redstream
19
+ # adds a prefix to the redis keys. However, the stream_name param must
20
+ # be specified without any prefixes here. When using Redstream::Model,
21
+ # the stream name is the downcased, pluralized and underscored version
22
+ # of the model name. I.e., the stream name for a 'User' model will be
23
+ # 'users'
24
+ # @param delay [Fixnum, Float, ActiveSupport::Duration] The delay, i.e.
25
+ # the age a message must have before processing it.
26
+ # @param logger [Logger] The logger used for logging debug and error
27
+ # messages to.
28
+
29
+ def initialize(stream_name:, delay:, logger: Logger.new("/dev/null"))
30
+ @stream_name = stream_name
31
+ @delay = delay
32
+ @logger = logger
33
+
34
+ @consumer = Consumer.new(name: "delayer", stream_name: "#{stream_name}.delay", logger: logger)
35
+ @batch = []
36
+ end
37
+
38
+ # Loops and blocks forever processing delay messages read from a delay
39
+ # stream.
40
+
41
+ def run
42
+ loop { run_once }
43
+ end
44
+
45
+ # Reads and processes a single batch of delay messages from a delay
46
+ # stream. You usually want to use the #run method instead, which
47
+ # loops/blocks forever.
48
+
49
+ def run_once
50
+ @consumer.run_once do |messages|
51
+ messages.each do |message|
52
+ seconds_to_sleep = message.message_id.to_f / 1_000 + @delay.to_f - Time.now.to_f
53
+
54
+ if seconds_to_sleep > 0
55
+ if @batch.size > 0
56
+ id = @batch.last.message_id
57
+
58
+ deliver
59
+
60
+ @consumer.commit id
61
+ end
62
+
63
+ sleep(seconds_to_sleep + 1)
64
+ end
65
+
66
+ @batch << message
67
+ end
68
+
69
+ deliver
70
+ end
71
+ rescue => e
72
+ @logger.error e
73
+
74
+ sleep 5
75
+
76
+ retry
77
+ end
78
+
79
+ private
80
+
81
+ def deliver
82
+ return if @batch.size.zero?
83
+
84
+ @logger.debug "Delayed #{@batch.size} messages for #{@delay.to_f} seconds on stream #{@stream_name}"
85
+
86
+ Redstream.connection_pool.with do |redis|
87
+ redis.pipelined do
88
+ @batch.each do |message|
89
+ redis.xadd Redstream.stream_key_name(@stream_name), payload: message.fields["payload"]
90
+ end
91
+ end
92
+
93
+ redis.xdel Redstream.stream_key_name("#{@stream_name}.delay"), @batch.map(&:message_id)
94
+ end
95
+
96
+ @batch = []
97
+ end
98
+ end
99
+ end
100
+
@@ -0,0 +1,80 @@
1
+
2
+ require "securerandom"
3
+
4
+ module Redstream
5
+ # @api private
6
+ #
7
+ # As the name suggests, the Redstream::Lock class implements a redis based
8
+ # locking mechanism. It atomically (lua script) gets/sets the lock key and
9
+ # updates its expire timeout, in case it currently holds the lock. Moreover,
10
+ # once it got the lock, it tries to keep it by updating the lock expire
11
+ # timeout from within a thread every 3 seconds.
12
+ #
13
+ # @example
14
+ # lock = Redstream::Lock.new(name: "user_stream_lock")
15
+ #
16
+ # loop do
17
+ # got_lock = lock.acquire do
18
+ # # ...
19
+ # end
20
+ #
21
+ # sleep(5) unless got_lock
22
+ # end
23
+
24
+ class Lock
25
+ def initialize(name:)
26
+ @name = name
27
+ @id = SecureRandom.hex
28
+ end
29
+
30
+ def acquire(&block)
31
+ got_lock = get_lock
32
+ keep_lock(&block) if got_lock
33
+ got_lock
34
+ end
35
+
36
+ private
37
+
38
+ def keep_lock(&block)
39
+ stop = false
40
+ mutex = Mutex.new
41
+
42
+ Thread.new do
43
+ until mutex.synchronize { stop }
44
+ Redstream.connection_pool.with { |redis| redis.expire(Redstream.lock_key_name(@name), 5) }
45
+
46
+ sleep 3
47
+ end
48
+ end
49
+
50
+ block.call
51
+ ensure
52
+ mutex.synchronize do
53
+ stop = true
54
+ end
55
+ end
56
+
57
+ def get_lock
58
+ @get_lock_script =<<-EOF
59
+ local lock_key_name, id = ARGV[1], ARGV[2]
60
+
61
+ local cur = redis.call('get', lock_key_name)
62
+
63
+ if not cur then
64
+ redis.call('setex', lock_key_name, 5, id)
65
+
66
+ return true
67
+ elseif cur == id then
68
+ redis.call('expire', lock_key_name, 5)
69
+
70
+ return true
71
+ end
72
+
73
+ return false
74
+ EOF
75
+
76
+ Redstream.connection_pool.with { |redis| redis.eval(@get_lock_script, argv: [Redstream.lock_key_name(@name), @id]) }
77
+ end
78
+ end
79
+ end
80
+
@@ -0,0 +1,52 @@
1
+
2
+ module Redstream
3
+ # The Redstream::Message class wraps a raw redis stream message to allow hash
4
+ # and id/offset access as well as convenient parsing of the json payload.
5
+
6
+ class Message
7
+ # @api private
8
+ #
9
+ # Initializes the message.
10
+ #
11
+ # @param raw_message [Array] The raw message as returned by redis
12
+
13
+ def initialize(raw_message)
14
+ @message_id = raw_message[0]
15
+ @raw_message = raw_message
16
+ end
17
+
18
+ # Returns the message id, i.e. the redis message id consisting of a
19
+ # timestamp plus sequence number.
20
+ #
21
+ # @returns [String] The message id
22
+
23
+ def message_id
24
+ @message_id
25
+ end
26
+
27
+ # Returns the parsed message payload as provided by the model's
28
+ # #redstream_payload method. Check out Redstream::Model for more details.
29
+ #
30
+ # @return [Hash] The parsed payload
31
+
32
+ def payload
33
+ @payload ||= JSON.parse(fields["payload"])
34
+ end
35
+
36
+ # As a redis stream message allows to specify fields,
37
+ # this allows to retrieve the fields as a hash.
38
+ #
39
+ # @returns The fields hash
40
+
41
+ def fields
42
+ @fields ||= @raw_message[1]
43
+ end
44
+
45
+ # Returns the raw message content as returned by redis.
46
+
47
+ def raw_message
48
+ @raw_message
49
+ end
50
+ end
51
+ end
52
+
@@ -0,0 +1,57 @@
1
+
2
+ module Redstream
3
+ # Include Redstream::Model in your model to stream the model's updates via
4
+ # redis streams.
5
+ #
6
+ # @example
7
+ # class User < ActiveRecord::Base
8
+ # include Redstream::Model
9
+ #
10
+ # # ...
11
+ #
12
+ # redstream_callbacks
13
+ # end
14
+
15
+ module Model
16
+ def self.included(base)
17
+ base.extend(ClassMethods)
18
+ end
19
+
20
+ module ClassMethods
21
+ # Adds after_save, after_touch, after_destroy and, most importantly,
22
+ # after_commit callbacks. after_save, after_touch and after_destroy write
23
+ # a delay message to a delay stream. The delay messages are exactly like
24
+ # other messages, but will be read and replayed by a Redstream::Delayer
25
+ # only after a certain amount of time has passed (5 minutes usually) to
26
+ # fix potential inconsistencies which result from network or other issues
27
+ # in between database commit and the rails after_commit callback.
28
+ #
29
+ # @param producer [Redstream::Producer] A Redstream::Producer that is
30
+ # responsible for writing to a redis stream
31
+
32
+ def redstream_callbacks(producer: Producer.new)
33
+ after_save { |object| producer.delay object }
34
+ after_touch { |object| producer.delay object }
35
+ after_destroy { |object| producer.delay object }
36
+ after_commit { |object| producer.queue object }
37
+ end
38
+
39
+ def redstream_name
40
+ name.pluralize.underscore
41
+ end
42
+ end
43
+
44
+ # Override to customize the message payload. By default, the payload
45
+ # consists of the record id only (see example 1).
46
+ #
47
+ # @example Default
48
+ # def redstream_payload
49
+ # { id: id }
50
+ # end
51
+
52
+ def redstream_payload
53
+ { id: id }
54
+ end
55
+ end
56
+ end
57
+
@@ -0,0 +1,145 @@
1
+
2
+ module Redstream
3
+ # A Redstream::Producer is responsible for writing the actual messages to
4
+ # redis. This includes the delay messages as well as the messages for
5
+ # immediate retrieval. Usually, you don't have to use a producer directly.
6
+ # Instead, Redstream::Model handles all producer related interaction.
7
+ # However, Redstream::Model is not able to recognize model updates resulting
8
+ # from model updates via e.g. #update_all, #delete_all, etc, i.e. updates
9
+ # which by-pass model callbacks. Thus, calls to e.g. #update_all must be
10
+ # wrapped with `find_in_batches` and Redstream::Producer#bulk (see example),
11
+ # to write these updates to the redis streams as well.
12
+ #
13
+ # @example
14
+ # producer = Redstream::Producer.new
15
+ #
16
+ # User.where(confirmed: true).find_in_batches do |users|
17
+ # producer.bulk users do
18
+ # User.where(id: users.map(&:id)).update_all(send_mailing: true)
19
+ # end
20
+ # end
21
+
22
+ class Producer
23
+ include MonitorMixin
24
+
25
+ # Initializes a new producer. In case you're using a distributed redis
26
+ # setup, you can use redis WAIT to improve real world data safety via the
27
+ # wait param.
28
+ #
29
+ # @param wait [Boolean, Integer] Defaults to false. Specify an integer to
30
+ # enable using redis WAIT for writing delay messages. Check out the
31
+ # redis docs for more info regarding WAIT.
32
+
33
+ def initialize(wait: false)
34
+ @wait = wait
35
+ @stream_name_cache = {}
36
+
37
+ super()
38
+ end
39
+
40
+ # Use to wrap calls to #update_all, #delete_all, etc. I.e. methods, which
41
+ # by-pass model lifecycle callbacks (after_save, etc.), as Redstream::Model
42
+ # can't recognize these updates and write them to redis streams
43
+ # automatically. You need to pass the records to be updated to the bulk
44
+ # method. The bulk method writes delay messages for the records to kafka,
45
+ # then yields and the writes the message for immediate retrieval. The
46
+ # method must ensure that the same set of records is used for the delay
47
+ # messages and the instant messages. Thus, you optimally, pass an array of
48
+ # records to it. If you pass an ActiveRecord::Relation, the method
49
+ # converts it to an array, i.e. loading all matching records into memory.
50
+ #
51
+ # @param records [#to_a] The object/objects that will be updated or deleted
52
+
53
+ def bulk(records)
54
+ records_array = Array(records)
55
+
56
+ bulk_delay(records_array)
57
+
58
+ yield
59
+
60
+ bulk_queue(records_array)
61
+ end
62
+
63
+ # @api private
64
+ #
65
+ # Writes delay messages to a delay stream in redis.
66
+ #
67
+ # @param records [#to_a] The object/objects that will be updated or deleted
68
+
69
+ def bulk_delay(records)
70
+ records.each_slice(250) do |slice|
71
+ Redstream.connection_pool.with do |redis|
72
+ redis.pipelined do
73
+ slice.map do |object|
74
+ redis.xadd Redstream.stream_key_name("#{stream_name(object)}.delay"), payload: JSON.dump(object.redstream_payload)
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+ Redstream.connection_pool.with do |redis|
81
+ redis.wait(@wait, 0) if @wait
82
+ end
83
+
84
+ true
85
+ end
86
+
87
+ # @api private
88
+ #
89
+ # Writes messages to a stream in redis for immediate retrieval.
90
+ #
91
+ # @param records [#to_a] The object/objects that will be updated deleted
92
+
93
+ def bulk_queue(records)
94
+ records.each_slice(250) do |slice|
95
+ Redstream.connection_pool.with do |redis|
96
+ redis.pipelined do
97
+ slice.each do |object|
98
+ redis.xadd Redstream.stream_key_name(stream_name(object)), payload: JSON.dump(object.redstream_payload)
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ true
105
+ end
106
+
107
+ # @api private
108
+ #
109
+ # Writes a single delay message to a delay stream in redis.
110
+ #
111
+ # @param object The object hat will be updated, deleted, etc.
112
+
113
+ def delay(object)
114
+ Redstream.connection_pool.with do |redis|
115
+ redis.xadd Redstream.stream_key_name("#{stream_name(object)}.delay"), payload: JSON.dump(object.redstream_payload)
116
+ redis.wait(@wait, 0) if @wait
117
+ end
118
+
119
+ true
120
+ end
121
+
122
+ # @api private
123
+ #
124
+ # Writes a single message to a stream in redis for immediate retrieval.
125
+ #
126
+ # @param object The object hat will be updated, deleted, etc.
127
+
128
+ def queue(object)
129
+ Redstream.connection_pool.with do |redis|
130
+ redis.xadd Redstream.stream_key_name(stream_name(object)), payload: JSON.dump(object.redstream_payload)
131
+ end
132
+
133
+ true
134
+ end
135
+
136
+ private
137
+
138
+ def stream_name(object)
139
+ synchronize do
140
+ @stream_name_cache[object.class] ||= object.class.redstream_name
141
+ end
142
+ end
143
+ end
144
+ end
145
+