karafka 2.4.9 → 2.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.ruby-version +1 -1
  4. data/CHANGELOG.md +14 -0
  5. data/Gemfile.lock +6 -6
  6. data/config/locales/errors.yml +1 -0
  7. data/config/locales/pro_errors.yml +18 -0
  8. data/lib/active_job/queue_adapters/karafka_adapter.rb +6 -0
  9. data/lib/karafka/base_consumer.rb +23 -0
  10. data/lib/karafka/contracts/consumer_group.rb +17 -0
  11. data/lib/karafka/instrumentation/logger_listener.rb +3 -0
  12. data/lib/karafka/instrumentation/notifications.rb +3 -0
  13. data/lib/karafka/instrumentation/vendors/appsignal/client.rb +32 -11
  14. data/lib/karafka/instrumentation/vendors/appsignal/errors_listener.rb +1 -1
  15. data/lib/karafka/messages/message.rb +6 -0
  16. data/lib/karafka/pro/loader.rb +2 -1
  17. data/lib/karafka/pro/routing/features/recurring_tasks/builder.rb +9 -8
  18. data/lib/karafka/pro/routing/features/scheduled_messages/builder.rb +131 -0
  19. data/lib/karafka/pro/routing/features/scheduled_messages/config.rb +28 -0
  20. data/lib/karafka/pro/routing/features/scheduled_messages/contracts/topic.rb +40 -0
  21. data/lib/karafka/pro/routing/features/scheduled_messages/proxy.rb +27 -0
  22. data/lib/karafka/pro/routing/features/scheduled_messages/topic.rb +44 -0
  23. data/lib/karafka/pro/routing/features/scheduled_messages.rb +24 -0
  24. data/lib/karafka/pro/scheduled_messages/consumer.rb +185 -0
  25. data/lib/karafka/pro/scheduled_messages/contracts/config.rb +56 -0
  26. data/lib/karafka/pro/scheduled_messages/contracts/message.rb +77 -0
  27. data/lib/karafka/pro/scheduled_messages/daily_buffer.rb +79 -0
  28. data/lib/karafka/pro/scheduled_messages/day.rb +45 -0
  29. data/lib/karafka/pro/scheduled_messages/deserializers/headers.rb +46 -0
  30. data/lib/karafka/pro/scheduled_messages/deserializers/payload.rb +35 -0
  31. data/lib/karafka/pro/scheduled_messages/dispatcher.rb +122 -0
  32. data/lib/karafka/pro/scheduled_messages/errors.rb +28 -0
  33. data/lib/karafka/pro/scheduled_messages/max_epoch.rb +41 -0
  34. data/lib/karafka/pro/scheduled_messages/proxy.rb +176 -0
  35. data/lib/karafka/pro/scheduled_messages/schema_validator.rb +37 -0
  36. data/lib/karafka/pro/scheduled_messages/serializer.rb +55 -0
  37. data/lib/karafka/pro/scheduled_messages/setup/config.rb +60 -0
  38. data/lib/karafka/pro/scheduled_messages/state.rb +62 -0
  39. data/lib/karafka/pro/scheduled_messages/tracker.rb +64 -0
  40. data/lib/karafka/pro/scheduled_messages.rb +67 -0
  41. data/lib/karafka/processing/executor.rb +6 -0
  42. data/lib/karafka/processing/strategies/default.rb +10 -0
  43. data/lib/karafka/railtie.rb +0 -20
  44. data/lib/karafka/version.rb +1 -1
  45. data.tar.gz.sig +0 -0
  46. metadata +26 -3
  47. metadata.gz.sig +3 -2
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ # Scheduled messages functionality routing namespace
19
+ class ScheduledMessages < Base
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module ScheduledMessages
17
+ # Consumer that coordinates scheduling of messages when the time comes
18
+ class Consumer < ::Karafka::BaseConsumer
19
+ # Prepares the initial state of all stateful components
20
+ def initialized
21
+ clear!
22
+ # Max epoch is always moving forward with the time. Never backwards, hence we do not
23
+ # reset it at all.
24
+ @max_epoch = MaxEpoch.new
25
+ @state = State.new(nil)
26
+ end
27
+
28
+ # Processes messages and runs dispatch (via tick) if needed
29
+ def consume
30
+ return if reload!
31
+
32
+ messages.each do |message|
33
+ SchemaValidator.call(message)
34
+ process_message(message)
35
+ end
36
+
37
+ @states_reporter.call
38
+
39
+ eofed if eofed?
40
+
41
+ # Unless given day data is fully loaded we should not dispatch any notifications nor
42
+ # should we mark messages.
43
+ return unless @state.loaded?
44
+
45
+ tick
46
+
47
+ # Despite the fact that we need to load the whole stream once a day we do mark.
48
+ # We mark as consumed for two main reasons:
49
+ # - by marking we can indicate to Web UI and other monitoring tools that we have a
50
+ # potential real lag with loading schedules in case there would be a lot of messages
51
+ # added to the schedules topic
52
+ # - we prevent a situation where there is no notion of this consumer group in the
53
+ # reporting, allowing us to establish "presence"
54
+ mark_as_consumed(messages.last)
55
+ end
56
+
57
+ # Runs end of file operations
58
+ def eofed
59
+ return if reload!
60
+
61
+ # If end of the partition is reached, it always means all data is loaded
62
+ @state.loaded!
63
+ @states_reporter.call
64
+ end
65
+
66
+ # Performs periodic operations when no new data is provided to the topic partition
67
+ def tick
68
+ return if reload!
69
+
70
+ # We should not dispatch any data until the whole state is loaded. We need to make sure,
71
+ # that all tombstone events are loaded not to duplicate dispatches
72
+ return unless @state.loaded?
73
+
74
+ keys = []
75
+ epochs = []
76
+
77
+ # We first collect all the data for dispatch and then dispatch and **only** after
78
+ # dispatch that is sync is successful we remove those messages from the daily buffer
79
+ # and update the max epoch. Since only the dispatch itself is volatile and can crash
80
+ # with timeouts, etc, we need to be sure it wen through prior to deleting those messages
81
+ # from the daily buffer. That way we ensure the at least once delivery and in case of
82
+ # a transactional producer, exactly once delivery.
83
+ @daily_buffer.for_dispatch do |epoch, message|
84
+ epochs << epoch
85
+ keys << message.key
86
+ @dispatcher << message
87
+ end
88
+
89
+ @dispatcher.flush
90
+
91
+ @max_epoch.update(epochs.max)
92
+
93
+ keys.each { |key| @daily_buffer.delete(key) }
94
+
95
+ @states_reporter.call
96
+ end
97
+
98
+ private
99
+
100
+ # Takes each message and adds it to the daily accumulator if needed or performs other
101
+ # accumulator and time related per-message operations.
102
+ # @param message [Karafka::Messages::Message]
103
+ def process_message(message)
104
+ # If we started to receive messages younger than the moment we created the consumer for
105
+ # the given day, it means we have loaded all the history and we are no longer in the
106
+ # loading phase.
107
+ if message.timestamp.to_i > @today.created_at
108
+ @state.loaded!
109
+ tags.add(:state, @state.to_s)
110
+ end
111
+
112
+ # If this is a schedule message we need to check if this is for today. Tombstone events
113
+ # are always considered immediate as they indicate, that a message with a given key
114
+ # was already dispatched or that user decided not to dispatch and cancelled the dispatch
115
+ # via tombstone publishing.
116
+ if message.headers['schedule_source_type'] == 'schedule'
117
+ time = message.headers['schedule_target_epoch']
118
+
119
+ # Do not track historical below today as those will be reflected in the daily buffer
120
+ @tracker.track(message) if time >= @today.starts_at
121
+
122
+ if time > @today.ends_at || time < @max_epoch.to_i
123
+ # Clean the message immediately when not needed (won't be scheduled) to preserve
124
+ # memory
125
+ message.clean!
126
+
127
+ return
128
+ end
129
+ end
130
+
131
+ # Add to buffer all tombstones and messages for the same day
132
+ @daily_buffer << message
133
+ end
134
+
135
+ # Moves the offset back and re-seeks and reloads the current day not dispatched assignments
136
+ def reload!
137
+ # If this is a new assignment we always need to seek from beginning to load the data
138
+ if @state.fresh?
139
+ clear!
140
+ seek(0)
141
+
142
+ return true
143
+ end
144
+
145
+ # Unless state is loaded we do not do anything more because we're in the loading process
146
+ return false unless @state.loaded?
147
+
148
+ # If day has ended we reload and start new day with new schedules
149
+ if @today.ended?
150
+ clear!
151
+ seek(0)
152
+
153
+ return true
154
+ end
155
+
156
+ false
157
+ end
158
+
159
+ # Resets all buffers and states so we can start a new day with a clean slate
160
+ # We can fully recreate the dispatcher because any undispatched messages will be dispatched
161
+ # with the new day dispatcher after it is reloaded.
162
+ def clear!
163
+ @daily_buffer = DailyBuffer.new
164
+ @today = Day.new
165
+ @tracker = Tracker.new
166
+ @state = State.new(false)
167
+ @dispatcher = config.dispatcher_class.new(topic.name, partition)
168
+ @states_reporter = Helpers::IntervalRunner.new do
169
+ @tracker.today = @daily_buffer.size
170
+ @tracker.state = @state.to_s
171
+
172
+ @dispatcher.state(@tracker)
173
+ end
174
+
175
+ tags.add(:state, @state.to_s)
176
+ end
177
+
178
+ # @return [Karafka::Core::Configurable::Node] Schedules config node
179
+ def config
180
+ @config ||= Karafka::App.config.scheduled_messages
181
+ end
182
+ end
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module ScheduledMessages
17
+ # Recurring Tasks related contracts
18
+ module Contracts
19
+ # Makes sure, all the expected config is defined as it should be
20
+ class Config < ::Karafka::Contracts::Base
21
+ configure do |config|
22
+ config.error_messages = YAML.safe_load(
23
+ File.read(
24
+ File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
25
+ )
26
+ ).fetch('en').fetch('validations').fetch('config')
27
+ end
28
+
29
+ nested(:scheduled_messages) do
30
+ required(:consumer_class) { |val| val < ::Karafka::BaseConsumer }
31
+
32
+ # Do not allow to run more often than every second
33
+ required(:interval) { |val| val.is_a?(Integer) && val >= 1_000 }
34
+
35
+ required(:flush_batch_size) { |val| val.is_a?(Integer) && val.positive? }
36
+
37
+ required(:dispatcher_class) { |val| !val.nil? }
38
+
39
+ required(:group_id) do |val|
40
+ val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
41
+ end
42
+
43
+ required(:states_postfix) do |val|
44
+ val.is_a?(String) && Karafka::Contracts::TOPIC_REGEXP.match?(val)
45
+ end
46
+
47
+ nested(:deserializers) do
48
+ required(:headers) { |val| !val.nil? }
49
+ required(:payload) { |val| !val.nil? }
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module ScheduledMessages
17
+ module Contracts
18
+ # Future message expected format.
19
+ #
20
+ # Our envelope always needs to comply with this format, otherwise we won't have enough
21
+ # details to be able to dispatch the message
22
+ class Message < ::Karafka::Contracts::Base
23
+ configure do |config|
24
+ config.error_messages = YAML.safe_load(
25
+ File.read(
26
+ File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
27
+ )
28
+ ).fetch('en').fetch('validations').fetch('scheduled_messages_message')
29
+ end
30
+
31
+ # Headers we expect in each message of type "message" that goes to our scheduled messages
32
+ # topic
33
+ EXPECTED_HEADERS = %w[
34
+ schedule_schema_version
35
+ schedule_target_epoch
36
+ schedule_source_type
37
+ schedule_target_topic
38
+ ].freeze
39
+
40
+ required(:key) { |val| val.is_a?(String) && val.size.positive? }
41
+ required(:headers) { |val| val.is_a?(Hash) && (val.keys & EXPECTED_HEADERS).size == 4 }
42
+
43
+ # Make sure, that schedule_target_epoch is not older than grace period behind us.
44
+ # While this is not ideal verification of scheduling stuff in past, at leats it will
45
+ # prevent user errors when they schedule at 0, etc
46
+ virtual do |data, errors|
47
+ next unless errors.empty?
48
+
49
+ epoch_time = data[:headers].fetch('schedule_target_epoch').to_i
50
+
51
+ # We allow for small lag as those will be dispatched but we should prevent dispatching
52
+ # in the past in general as often it is a source of errors
53
+ next if epoch_time >= Time.now.to_i - 10
54
+
55
+ [[[:headers], :schedule_target_epoch_in_the_past]]
56
+ end
57
+
58
+ # Makes sure, that the target envelope topic we dispatch to is a scheduled messages topic
59
+ virtual do |data, errors|
60
+ next unless errors.empty?
61
+
62
+ scheduled_topics = Karafka::App
63
+ .routes
64
+ .flat_map(&:topics)
65
+ .flat_map(&:to_a)
66
+ .select(&:scheduled_messages?)
67
+ .map(&:name)
68
+
69
+ next if scheduled_topics.include?(data[:topic].to_s)
70
+
71
+ [[[:topic], :not_a_scheduled_messages_topic]]
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module ScheduledMessages
17
+ # Stores schedules for the current day and gives back those that should be dispatched
18
+ # We do not use min-heap implementation and just a regular hash because we want to be able
19
+ # to update the schedules based on the key as well as remove the schedules in case it would
20
+ # be cancelled. While removals could be implemented, updates with different timestamp would
21
+ # be more complex. At the moment a lookup of 8 640 000 messages (100 per second) takes
22
+ # up to 1.5 second, thus it is acceptable. Please ping me if you encounter performance
23
+ # issues with this naive implementation so it can be improved.
24
+ class DailyBuffer
25
+ def initialize
26
+ @accu = {}
27
+ end
28
+
29
+ # @return [Integer] number of elements to schedule today
30
+ def size
31
+ @accu.size
32
+ end
33
+
34
+ # Adds message to the buffer or removes the message from the buffer if it is a tombstone
35
+ # message for a given key
36
+ #
37
+ # @param message [Karafka::Messages::Message]
38
+ #
39
+ # @note Only messages for a given day should be added here.
40
+ def <<(message)
41
+ # Non schedule are only tombstones and cancellations
42
+ schedule = message.headers['schedule_source_type'] == 'schedule'
43
+
44
+ key = message.key
45
+
46
+ if schedule
47
+ epoch = message.headers['schedule_target_epoch']
48
+ @accu[key] = [epoch, message]
49
+ else
50
+ @accu.delete(key)
51
+ end
52
+ end
53
+
54
+ # Yields messages that should be dispatched (sent) to Kafka
55
+ #
56
+ # @yieldparam [Integer, Karafka::Messages::Message] epoch of the message and the message
57
+ # itself
58
+ #
59
+ # @note We yield epoch alongside of the message so we do not have to extract it several
60
+ # times later on. This simplifies the API
61
+ def for_dispatch
62
+ dispatch = Time.now.to_i
63
+
64
+ @accu.each_value do |epoch, message|
65
+ next unless epoch <= dispatch
66
+
67
+ yield(epoch, message)
68
+ end
69
+ end
70
+
71
+ # Removes given key from the accumulator
72
+ # @param key [String] key to remove
73
+ def delete(key)
74
+ @accu.delete(key)
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module ScheduledMessages
17
+ # Just a simple UTC day implementation.
18
+ # Since we operate on a scope of one day, this allows us to encapsulate when given day ends
19
+ class Day
20
+ # @return [Integer] utc timestamp when this day object was created. Keep in mind, that
21
+ # this is **not** when the day started but when this object was created.
22
+ attr_reader :created_at
23
+ # @return [Integer] utc timestamp when this day ends (last second of day).
24
+ # Equal to 23:59:59.
25
+ attr_reader :ends_at
26
+ # @return [Integer] utc timestamp when this day starts. Equal to 00:00:00
27
+ attr_reader :starts_at
28
+
29
+ def initialize
30
+ @created_at = Time.now.to_i
31
+
32
+ time = Time.at(@created_at)
33
+
34
+ @starts_at = Time.utc(time.year, time.month, time.day).to_i
35
+ @ends_at = @starts_at + 86_399
36
+ end
37
+
38
+ # @return [Boolean] did the current day we operate on ended.
39
+ def ended?
40
+ @ends_at < Time.now.to_i
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module ScheduledMessages
17
+ # Namespace for schedules data related deserializers.
18
+ module Deserializers
19
+ # Converts certain pieces of headers into their integer form for messages
20
+ class Headers
21
+ # @param metadata [Karafka::aMessages::Metadata]
22
+ # @return [Hash] headers
23
+ def call(metadata)
24
+ raw_headers = metadata.raw_headers
25
+
26
+ type = raw_headers.fetch('schedule_source_type')
27
+
28
+ # tombstone and cancellation events are not operable, thus we do not have to cast any
29
+ # of the headers pieces
30
+ return raw_headers unless type == 'schedule'
31
+
32
+ headers = raw_headers.dup
33
+ headers['schedule_target_epoch'] = headers['schedule_target_epoch'].to_i
34
+
35
+ # This attribute is optional, this is why we have to check for its existence
36
+ if headers.key?('schedule_target_partition')
37
+ headers['schedule_target_partition'] = headers['schedule_target_partition'].to_i
38
+ end
39
+
40
+ headers
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module ScheduledMessages
17
+ module Deserializers
18
+ # States payload deserializer
19
+ # We only deserialize states data and never anything else. Other payloads are the payloads
20
+ # we are expected to proxy, thus there is no need to deserialize them in any context.
21
+ # Their appropriate target topics should have expected deserializers
22
+ class Payload
23
+ # @param message [::Karafka::Messages::Message]
24
+ # @return [Hash] deserialized data
25
+ def call(message)
26
+ ::JSON.parse(
27
+ Zlib::Inflate.inflate(message.raw_payload),
28
+ symbolize_names: true
29
+ )
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module ScheduledMessages
17
+ # Dispatcher responsible for dispatching the messages to appropriate target topics and for
18
+ # dispatching other messages. All messages (aside from the once users dispatch with the
19
+ # envelope) are sent via this dispatcher.
20
+ #
21
+ # Messages are buffered and dispatched in batches to improve dispatch performance.
22
+ class Dispatcher
23
+ # @return [Array<Hash>] buffer with message hashes for dispatch
24
+ attr_reader :buffer
25
+
26
+ # @param topic [String] consumed topic name
27
+ # @param partition [Integer] consumed partition
28
+ def initialize(topic, partition)
29
+ @topic = topic
30
+ @partition = partition
31
+ @buffer = []
32
+ @serializer = Serializer.new
33
+ end
34
+
35
+ # Prepares the scheduled message to the dispatch to the target topic. Extracts all the
36
+ # "schedule_" details and prepares it, so the dispatched message goes with the expected
37
+ # attributes to the desired location. Alongside of that it actually builds 2
38
+ # (1 if logs off) messages: tombstone event matching the schedule so it is no longer valid
39
+ # and the log message that has the same data as the dispatched message. Helpful when
40
+ # debugging.
41
+ #
42
+ # @param message [Karafka::Messages::Message] message from the schedules topic.
43
+ #
44
+ # @note This method adds the message to the buffer, does **not** dispatch it.
45
+ # @note It also produces needed tombstone event as well as an audit log message
46
+ def <<(message)
47
+ target_headers = message.raw_headers.merge(
48
+ 'schedule_source_topic' => @topic,
49
+ 'schedule_source_partition' => @partition.to_s,
50
+ 'schedule_source_offset' => message.offset.to_s,
51
+ 'schedule_source_key' => message.key
52
+ ).compact
53
+
54
+ target = {
55
+ payload: message.raw_payload,
56
+ headers: target_headers
57
+ }
58
+
59
+ extract(target, message.headers, :topic)
60
+ extract(target, message.headers, :partition)
61
+ extract(target, message.headers, :key)
62
+ extract(target, message.headers, :partition_key)
63
+
64
+ @buffer << target
65
+
66
+ # Tombstone message so this schedule is no longer in use and gets removed from Kafka by
67
+ # Kafka itself during compacting. It will not cancel it because already dispatched but
68
+ # will cause it not to be sent again and will be marked as dispatched.
69
+ @buffer << Proxy.tombstone(message: message)
70
+ end
71
+
72
+ # Builds and dispatches the state report message with schedules details
73
+ #
74
+ # @param tracker [Tracker]
75
+ #
76
+ # @note This is dispatched async because it's just a statistical metric.
77
+ def state(tracker)
78
+ config.producer.produce_async(
79
+ topic: "#{@topic}#{config.states_postfix}",
80
+ payload: @serializer.state(tracker),
81
+ key: 'state',
82
+ partition: @partition,
83
+ headers: { 'zlib' => 'true' }
84
+ )
85
+ end
86
+
87
+ # Sends all messages to Kafka in a sync way.
88
+ # We use sync with batches to prevent overloading.
89
+ # When transactional producer in use, this will be wrapped in a transaction automatically.
90
+ def flush
91
+ until @buffer.empty?
92
+ config.producer.produce_many_sync(
93
+ # We can remove this prior to the dispatch because we only evict messages from the
94
+ # daily buffer once dispatch is successful
95
+ @buffer.shift(config.flush_batch_size)
96
+ )
97
+ end
98
+ end
99
+
100
+ private
101
+
102
+ # @return [Karafka::Core::Configurable::Node] scheduled messages config node
103
+ def config
104
+ @config ||= Karafka::App.config.scheduled_messages
105
+ end
106
+
107
+ # Extracts and copies the future attribute to a proper place in the target message.
108
+ #
109
+ # @param target [Hash]
110
+ # @param headers [Hash]
111
+ # @param attribute [Symbol]
112
+ def extract(target, headers, attribute)
113
+ schedule_attribute = "schedule_target_#{attribute}"
114
+
115
+ return unless headers.key?(schedule_attribute)
116
+
117
+ target[attribute] = headers[schedule_attribute]
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end