redstream 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,115 @@
1
+
2
+ require "thread"
3
+
4
+ module Redstream
5
+ # The Redstream::Consumer class to read messages from a specified redis
6
+ # stream in batches.
7
+ #
8
+ # @example
9
+ # Redstream::Consumer.new(name: "user_indexer", stream_name: "users").run do |messages|
10
+ # # ...
11
+ # end
12
+
13
+ class Consumer
14
+ # Initializes a new consumer instance. Please note that you can have
15
+ # multiple consumers per stream, by specifying different names.
16
+ #
17
+ # @param name [String] The consumer name. The name is used for locking
18
+ # @param stream_name [String] The name of the redis stream. Please note
19
+ # that redstream adds a prefix to the redis keys. However, the
20
+ # stream_name param must be specified without any prefixes here.
21
+ # When using Redstream::Model, the stream name is the downcased,
22
+ # pluralized and underscored version of the model name. I.e., the
23
+ # stream name for a 'User' model will be 'users'
24
+ # @param batch_size [Fixnum] The desired batch size, that is the number
25
+ # of messages yielded at max. More concretely, the number of messages
26
+ # yielded may be lower the batch_size, but not higher
27
+ # @param logger [Logger] The logger used for error logging
28
+
29
+ def initialize(name:, stream_name:, batch_size: 1_000, logger: Logger.new("/dev/null"))
30
+ @name = name
31
+ @stream_name = stream_name
32
+ @batch_size = batch_size
33
+ @logger = logger
34
+ @redis = Redstream.connection_pool.with(&:dup)
35
+ @lock = Lock.new(name: "consumer:#{@stream_name}:#{@name}")
36
+ end
37
+
38
+ # Returns its maximum committed id, i.e. the consumer's offset.
39
+ #
40
+ # @return [String, nil] The committed id, or nil
41
+
42
+ def max_committed_id
43
+ @redis.get Redstream.offset_key_name(stream_name: @stream_name, consumer_name: @name)
44
+ end
45
+
46
+ # Loops and thus blocks forever while reading messages from the specified
47
+ # stream and yielding them in batches.
48
+ #
49
+ # @example
50
+ # consumer.run do |messages|
51
+ # # ...
52
+ # end
53
+
54
+ def run(&block)
55
+ loop { run_once(&block) }
56
+ end
57
+
58
+ # Reads a single batch from the specified stream and yields it. You usually
59
+ # want to use the #run method instead, which loops/blocks forever.
60
+ #
61
+ # @example
62
+ # consumer.run_once do |messages|
63
+ # # ...
64
+ # end
65
+
66
+ def run_once(&block)
67
+ got_lock = @lock.acquire do
68
+ offset = @redis.get(Redstream.offset_key_name(stream_name: @stream_name, consumer_name: @name))
69
+ offset ||= "0-0"
70
+
71
+ stream_key_name = Redstream.stream_key_name(@stream_name)
72
+
73
+ response = begin
74
+ @redis.xread(stream_key_name, offset, count: @batch_size, block: 5_000)
75
+ rescue Redis::TimeoutError
76
+ nil
77
+ end
78
+
79
+ return if response.nil? || response[stream_key_name].nil? || response[stream_key_name].empty?
80
+
81
+ messages = response[stream_key_name].map do |raw_message|
82
+ Message.new(raw_message)
83
+ end
84
+
85
+ block.call(messages)
86
+
87
+ offset = response[stream_key_name].last[0]
88
+
89
+ return unless offset
90
+
91
+ commit offset
92
+ end
93
+
94
+ sleep(5) unless got_lock
95
+ rescue => e
96
+ @logger.error e
97
+
98
+ sleep 5
99
+
100
+ retry
101
+ end
102
+
103
+ # @api private
104
+ #
105
+ # Commits the specified offset/ID as the maximum ID already read, such that
106
+ # subsequent read calls will use this offset/ID as a starting point.
107
+ #
108
+ # @param offset [String] The offset/ID to commit
109
+
110
+ def commit(offset)
111
+ @redis.set Redstream.offset_key_name(stream_name: @stream_name, consumer_name: @name), offset
112
+ end
113
+ end
114
+ end
115
+
@@ -0,0 +1,100 @@
1
+
2
+ module Redstream
3
+ # The Redstream::Delayer class is responsible for reading messages from
4
+ # special delay streams which are used to fix inconsistencies resulting from
5
+ # network or other issues in between after_save and after_commit callbacks.
6
+ # To be able to fix such issues, delay messages will be added to a delay
7
+ # stream within an after_save callback. The delay messages aren't fetched
8
+ # immediately, but e.g. 5 minutes later, such that we can be sure that the
9
+ # database transaction is committed or has been rolled back, but is no longer
10
+ # running.
11
+ #
12
+ # @example
13
+ # Redstream::Delayer.new(stream_name: "users", delay: 5.minutes, logger: Logger.new(STDOUT)).run
14
+
15
+ class Delayer
16
+ # Initializes the delayer for the specified stream name and delay.
17
+ #
18
+ # @param stream_name [String] The stream name. Please note, that redstream
19
+ # adds a prefix to the redis keys. However, the stream_name param must
20
+ # be specified without any prefixes here. When using Redstream::Model,
21
+ # the stream name is the downcased, pluralized and underscored version
22
+ # of the model name. I.e., the stream name for a 'User' model will be
23
+ # 'users'
24
+ # @param delay [Fixnum, Float, ActiveSupport::Duration] The delay, i.e.
25
+ # the age a message must have before processing it.
26
+ # @param logger [Logger] The logger used for logging debug and error
27
+ # messages to.
28
+
29
+ def initialize(stream_name:, delay:, logger: Logger.new("/dev/null"))
30
+ @stream_name = stream_name
31
+ @delay = delay
32
+ @logger = logger
33
+
34
+ @consumer = Consumer.new(name: "delayer", stream_name: "#{stream_name}.delay", logger: logger)
35
+ @batch = []
36
+ end
37
+
38
+ # Loops and blocks forever processing delay messages read from a delay
39
+ # stream.
40
+
41
+ def run
42
+ loop { run_once }
43
+ end
44
+
45
+ # Reads and processes a single batch of delay messages from a delay
46
+ # stream. You usually want to use the #run method instead, which
47
+ # loops/blocks forever.
48
+
49
+ def run_once
50
+ @consumer.run_once do |messages|
51
+ messages.each do |message|
52
+ seconds_to_sleep = message.message_id.to_f / 1_000 + @delay.to_f - Time.now.to_f
53
+
54
+ if seconds_to_sleep > 0
55
+ if @batch.size > 0
56
+ id = @batch.last.message_id
57
+
58
+ deliver
59
+
60
+ @consumer.commit id
61
+ end
62
+
63
+ sleep(seconds_to_sleep + 1)
64
+ end
65
+
66
+ @batch << message
67
+ end
68
+
69
+ deliver
70
+ end
71
+ rescue => e
72
+ @logger.error e
73
+
74
+ sleep 5
75
+
76
+ retry
77
+ end
78
+
79
+ private
80
+
81
+ def deliver
82
+ return if @batch.size.zero?
83
+
84
+ @logger.debug "Delayed #{@batch.size} messages for #{@delay.to_f} seconds on stream #{@stream_name}"
85
+
86
+ Redstream.connection_pool.with do |redis|
87
+ redis.pipelined do
88
+ @batch.each do |message|
89
+ redis.xadd Redstream.stream_key_name(@stream_name), payload: message.fields["payload"]
90
+ end
91
+ end
92
+
93
+ redis.xdel Redstream.stream_key_name("#{@stream_name}.delay"), @batch.map(&:message_id)
94
+ end
95
+
96
+ @batch = []
97
+ end
98
+ end
99
+ end
100
+
@@ -0,0 +1,80 @@
1
+
2
+ require "securerandom"
3
+
4
+ module Redstream
5
+ # @api private
6
+ #
7
+ # As the name suggests, the Redstream::Lock class implements a redis based
8
+ # locking mechanism. It atomically (lua script) gets/sets the lock key and
9
+ # updates its expire timeout, in case it currently holds the lock. Moreover,
10
+ # once it got the lock, it tries to keep it by updating the lock expire
11
+ # timeout from within a thread every 3 seconds.
12
+ #
13
+ # @example
14
+ # lock = Redstream::Lock.new(name: "user_stream_lock")
15
+ #
16
+ # loop do
17
+ # got_lock = lock.acquire do
18
+ # # ...
19
+ # end
20
+ #
21
+ # sleep(5) unless got_lock
22
+ # end
23
+
24
+ class Lock
25
+ def initialize(name:)
26
+ @name = name
27
+ @id = SecureRandom.hex
28
+ end
29
+
30
+ def acquire(&block)
31
+ got_lock = get_lock
32
+ keep_lock(&block) if got_lock
33
+ got_lock
34
+ end
35
+
36
+ private
37
+
38
+ def keep_lock(&block)
39
+ stop = false
40
+ mutex = Mutex.new
41
+
42
+ Thread.new do
43
+ until mutex.synchronize { stop }
44
+ Redstream.connection_pool.with { |redis| redis.expire(Redstream.lock_key_name(@name), 5) }
45
+
46
+ sleep 3
47
+ end
48
+ end
49
+
50
+ block.call
51
+ ensure
52
+ mutex.synchronize do
53
+ stop = true
54
+ end
55
+ end
56
+
57
+ def get_lock
58
+ @get_lock_script =<<-EOF
59
+ local lock_key_name, id = ARGV[1], ARGV[2]
60
+
61
+ local cur = redis.call('get', lock_key_name)
62
+
63
+ if not cur then
64
+ redis.call('setex', lock_key_name, 5, id)
65
+
66
+ return true
67
+ elseif cur == id then
68
+ redis.call('expire', lock_key_name, 5)
69
+
70
+ return true
71
+ end
72
+
73
+ return false
74
+ EOF
75
+
76
+ Redstream.connection_pool.with { |redis| redis.eval(@get_lock_script, argv: [Redstream.lock_key_name(@name), @id]) }
77
+ end
78
+ end
79
+ end
80
+
@@ -0,0 +1,52 @@
1
+
2
+ module Redstream
3
+ # The Redstream::Message class wraps a raw redis stream message to allow hash
4
+ # and id/offset access as well as convenient parsing of the json payload.
5
+
6
+ class Message
7
+ # @api private
8
+ #
9
+ # Initializes the message.
10
+ #
11
+ # @param raw_message [Array] The raw message as returned by redis
12
+
13
+ def initialize(raw_message)
14
+ @message_id = raw_message[0]
15
+ @raw_message = raw_message
16
+ end
17
+
18
+ # Returns the message id, i.e. the redis message id consisting of a
19
+ # timestamp plus sequence number.
20
+ #
21
+ # @returns [String] The message id
22
+
23
+ def message_id
24
+ @message_id
25
+ end
26
+
27
+ # Returns the parsed message payload as provided by the model's
28
+ # #redstream_payload method. Check out Redstream::Model for more details.
29
+ #
30
+ # @return [Hash] The parsed payload
31
+
32
+ def payload
33
+ @payload ||= JSON.parse(fields["payload"])
34
+ end
35
+
36
+ # As a redis stream message allows to specify fields,
37
+ # this allows to retrieve the fields as a hash.
38
+ #
39
+ # @returns The fields hash
40
+
41
+ def fields
42
+ @fields ||= @raw_message[1]
43
+ end
44
+
45
+ # Returns the raw message content as returned by redis.
46
+
47
+ def raw_message
48
+ @raw_message
49
+ end
50
+ end
51
+ end
52
+
@@ -0,0 +1,57 @@
1
+
2
+ module Redstream
3
+ # Include Redstream::Model in your model to stream the model's updates via
4
+ # redis streams.
5
+ #
6
+ # @example
7
+ # class User < ActiveRecord::Base
8
+ # include Redstream::Model
9
+ #
10
+ # # ...
11
+ #
12
+ # redstream_callbacks
13
+ # end
14
+
15
+ module Model
16
+ def self.included(base)
17
+ base.extend(ClassMethods)
18
+ end
19
+
20
+ module ClassMethods
21
+ # Adds after_save, after_touch, after_destroy and, most importantly,
22
+ # after_commit callbacks. after_save, after_touch and after_destroy write
23
+ # a delay message to a delay stream. The delay messages are exactly like
24
+ # other messages, but will be read and replayed by a Redstream::Delayer
25
+ # only after a certain amount of time has passed (5 minutes usually) to
26
+ # fix potential inconsistencies which result from network or other issues
27
+ # in between database commit and the rails after_commit callback.
28
+ #
29
+ # @param producer [Redstream::Producer] A Redstream::Producer that is
30
+ # responsible for writing to a redis stream
31
+
32
+ def redstream_callbacks(producer: Producer.new)
33
+ after_save { |object| producer.delay object }
34
+ after_touch { |object| producer.delay object }
35
+ after_destroy { |object| producer.delay object }
36
+ after_commit { |object| producer.queue object }
37
+ end
38
+
39
+ def redstream_name
40
+ name.pluralize.underscore
41
+ end
42
+ end
43
+
44
+ # Override to customize the message payload. By default, the payload
45
+ # consists of the record id only (see example 1).
46
+ #
47
+ # @example Default
48
+ # def redstream_payload
49
+ # { id: id }
50
+ # end
51
+
52
+ def redstream_payload
53
+ { id: id }
54
+ end
55
+ end
56
+ end
57
+
@@ -0,0 +1,145 @@
1
+
2
+ module Redstream
3
+ # A Redstream::Producer is responsible for writing the actual messages to
4
+ # redis. This includes the delay messages as well as the messages for
5
+ # immediate retrieval. Usually, you don't have to use a producer directly.
6
+ # Instead, Redstream::Model handles all producer related interaction.
7
+ # However, Redstream::Model is not able to recognize model updates resulting
8
+ # from model updates via e.g. #update_all, #delete_all, etc, i.e. updates
9
+ # which by-pass model callbacks. Thus, calls to e.g. #update_all must be
10
+ # wrapped with `find_in_batches` and Redstream::Producer#bulk (see example),
11
+ # to write these updates to the redis streams as well.
12
+ #
13
+ # @example
14
+ # producer = Redstream::Producer.new
15
+ #
16
+ # User.where(confirmed: true).find_in_batches do |users|
17
+ # producer.bulk users do
18
+ # User.where(id: users.map(&:id)).update_all(send_mailing: true)
19
+ # end
20
+ # end
21
+
22
+ class Producer
23
+ include MonitorMixin
24
+
25
+ # Initializes a new producer. In case you're using a distributed redis
26
+ # setup, you can use redis WAIT to improve real world data safety via the
27
+ # wait param.
28
+ #
29
+ # @param wait [Boolean, Integer] Defaults to false. Specify an integer to
30
+ # enable using redis WAIT for writing delay messages. Check out the
31
+ # redis docs for more info regarding WAIT.
32
+
33
+ def initialize(wait: false)
34
+ @wait = wait
35
+ @stream_name_cache = {}
36
+
37
+ super()
38
+ end
39
+
40
+ # Use to wrap calls to #update_all, #delete_all, etc. I.e. methods, which
41
+ # by-pass model lifecycle callbacks (after_save, etc.), as Redstream::Model
42
+ # can't recognize these updates and write them to redis streams
43
+ # automatically. You need to pass the records to be updated to the bulk
44
+ # method. The bulk method writes delay messages for the records to kafka,
45
+ # then yields and the writes the message for immediate retrieval. The
46
+ # method must ensure that the same set of records is used for the delay
47
+ # messages and the instant messages. Thus, you optimally, pass an array of
48
+ # records to it. If you pass an ActiveRecord::Relation, the method
49
+ # converts it to an array, i.e. loading all matching records into memory.
50
+ #
51
+ # @param records [#to_a] The object/objects that will be updated or deleted
52
+
53
+ def bulk(records)
54
+ records_array = Array(records)
55
+
56
+ bulk_delay(records_array)
57
+
58
+ yield
59
+
60
+ bulk_queue(records_array)
61
+ end
62
+
63
+ # @api private
64
+ #
65
+ # Writes delay messages to a delay stream in redis.
66
+ #
67
+ # @param records [#to_a] The object/objects that will be updated or deleted
68
+
69
+ def bulk_delay(records)
70
+ records.each_slice(250) do |slice|
71
+ Redstream.connection_pool.with do |redis|
72
+ redis.pipelined do
73
+ slice.map do |object|
74
+ redis.xadd Redstream.stream_key_name("#{stream_name(object)}.delay"), payload: JSON.dump(object.redstream_payload)
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+ Redstream.connection_pool.with do |redis|
81
+ redis.wait(@wait, 0) if @wait
82
+ end
83
+
84
+ true
85
+ end
86
+
87
+ # @api private
88
+ #
89
+ # Writes messages to a stream in redis for immediate retrieval.
90
+ #
91
+ # @param records [#to_a] The object/objects that will be updated deleted
92
+
93
+ def bulk_queue(records)
94
+ records.each_slice(250) do |slice|
95
+ Redstream.connection_pool.with do |redis|
96
+ redis.pipelined do
97
+ slice.each do |object|
98
+ redis.xadd Redstream.stream_key_name(stream_name(object)), payload: JSON.dump(object.redstream_payload)
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ true
105
+ end
106
+
107
+ # @api private
108
+ #
109
+ # Writes a single delay message to a delay stream in redis.
110
+ #
111
+ # @param object The object hat will be updated, deleted, etc.
112
+
113
+ def delay(object)
114
+ Redstream.connection_pool.with do |redis|
115
+ redis.xadd Redstream.stream_key_name("#{stream_name(object)}.delay"), payload: JSON.dump(object.redstream_payload)
116
+ redis.wait(@wait, 0) if @wait
117
+ end
118
+
119
+ true
120
+ end
121
+
122
+ # @api private
123
+ #
124
+ # Writes a single message to a stream in redis for immediate retrieval.
125
+ #
126
+ # @param object The object hat will be updated, deleted, etc.
127
+
128
+ def queue(object)
129
+ Redstream.connection_pool.with do |redis|
130
+ redis.xadd Redstream.stream_key_name(stream_name(object)), payload: JSON.dump(object.redstream_payload)
131
+ end
132
+
133
+ true
134
+ end
135
+
136
+ private
137
+
138
+ def stream_name(object)
139
+ synchronize do
140
+ @stream_name_cache[object.class] ||= object.class.redstream_name
141
+ end
142
+ end
143
+ end
144
+ end
145
+