karafka 1.0.1 → 1.1.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -3
- data/Gemfile +1 -0
- data/Gemfile.lock +14 -32
- data/README.md +1 -1
- data/karafka.gemspec +2 -3
- data/lib/karafka.rb +2 -3
- data/lib/karafka/attributes_map.rb +3 -3
- data/lib/karafka/backends/inline.rb +2 -2
- data/lib/karafka/base_controller.rb +19 -69
- data/lib/karafka/base_responder.rb +10 -5
- data/lib/karafka/cli/info.rb +1 -2
- data/lib/karafka/cli/server.rb +6 -8
- data/lib/karafka/connection/{messages_consumer.rb → consumer.rb} +27 -12
- data/lib/karafka/connection/listener.rb +6 -13
- data/lib/karafka/connection/{messages_processor.rb → processor.rb} +3 -3
- data/lib/karafka/controllers/callbacks.rb +54 -0
- data/lib/karafka/controllers/includer.rb +1 -1
- data/lib/karafka/controllers/single_params.rb +2 -2
- data/lib/karafka/errors.rb +7 -0
- data/lib/karafka/fetcher.rb +11 -5
- data/lib/karafka/monitor.rb +2 -2
- data/lib/karafka/params/params.rb +3 -1
- data/lib/karafka/params/params_batch.rb +1 -1
- data/lib/karafka/patches/dry_configurable.rb +0 -2
- data/lib/karafka/patches/ruby_kafka.rb +34 -0
- data/lib/karafka/persistence/consumer.rb +25 -0
- data/lib/karafka/persistence/controller.rb +24 -9
- data/lib/karafka/process.rb +1 -1
- data/lib/karafka/responders/topic.rb +8 -1
- data/lib/karafka/schemas/config.rb +0 -10
- data/lib/karafka/schemas/consumer_group.rb +9 -8
- data/lib/karafka/schemas/consumer_group_topic.rb +1 -1
- data/lib/karafka/schemas/responder_usage.rb +1 -0
- data/lib/karafka/server.rb +6 -19
- data/lib/karafka/setup/config.rb +15 -34
- data/lib/karafka/setup/configurators/base.rb +1 -1
- data/lib/karafka/setup/configurators/water_drop.rb +11 -13
- data/lib/karafka/templates/karafka.rb.example +1 -1
- data/lib/karafka/version.rb +1 -1
- metadata +15 -28
- data/Rakefile +0 -7
- data/lib/karafka/setup/configurators/celluloid.rb +0 -19
@@ -3,15 +3,16 @@
|
|
3
3
|
module Karafka
|
4
4
|
module Connection
|
5
5
|
# Class used as a wrapper around Ruby-Kafka to simplify additional
|
6
|
-
# features that we provide/might provide in future
|
7
|
-
class
|
6
|
+
# features that we provide/might provide in future and to hide the internal implementation
|
7
|
+
class Consumer
|
8
8
|
# Creates a queue consumer that will pull the data from Kafka
|
9
9
|
# @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
|
10
10
|
# we create a client
|
11
|
-
# @return [Karafka::Connection::
|
11
|
+
# @return [Karafka::Connection::Consumer] group consumer that can subscribe to
|
12
12
|
# multiple topics
|
13
13
|
def initialize(consumer_group)
|
14
14
|
@consumer_group = consumer_group
|
15
|
+
Persistence::Consumer.write(self)
|
15
16
|
end
|
16
17
|
|
17
18
|
# Opens connection, gets messages and calls a block for each of the incoming messages
|
@@ -19,10 +20,10 @@ module Karafka
|
|
19
20
|
# @note This will yield with raw messages - no preprocessing or reformatting.
|
20
21
|
def fetch_loop
|
21
22
|
send(
|
22
|
-
consumer_group.
|
23
|
+
consumer_group.batch_fetching ? :consume_each_batch : :consume_each_message
|
23
24
|
) { |messages| yield(messages) }
|
24
25
|
rescue Kafka::ProcessingError => e
|
25
|
-
# If there was an error during
|
26
|
+
# If there was an error during consumption, we have to log it, pause current partition
|
26
27
|
# and process other things
|
27
28
|
Karafka.monitor.notice_error(self.class, e.cause)
|
28
29
|
pause(e.topic, e.partition)
|
@@ -36,25 +37,39 @@ module Karafka
|
|
36
37
|
end
|
37
38
|
|
38
39
|
# Gracefuly stops topic consumption
|
40
|
+
# @note Stopping running consumers without a really important reason is not recommended
|
41
|
+
# as until all the consumers are stopped, the server will keep running serving only
|
42
|
+
# part of the messages
|
39
43
|
def stop
|
40
44
|
@kafka_consumer&.stop
|
41
45
|
@kafka_consumer = nil
|
42
46
|
end
|
43
47
|
|
44
|
-
|
45
|
-
|
46
|
-
attr_reader :consumer_group
|
47
|
-
|
48
|
-
# Pauses processing of a given topic partition
|
48
|
+
# Pauses fetching and consumption of a given topic partition
|
49
49
|
# @param topic [String] topic that we want to pause
|
50
50
|
# @param partition [Integer] number partition that we want to pause
|
51
51
|
def pause(topic, partition)
|
52
52
|
settings = ConfigAdapter.pausing(consumer_group)
|
53
|
-
|
53
|
+
timeout = settings[:timeout]
|
54
|
+
raise(Errors::InvalidPauseTimeout, timeout) unless timeout.positive?
|
54
55
|
kafka_consumer.pause(topic, partition, settings)
|
55
|
-
true
|
56
56
|
end
|
57
57
|
|
58
|
+
# Marks a given message as consumed and commit the offsets
|
59
|
+
# @note In opposite to ruby-kafka, we commit the offset for each manual marking to be sure
|
60
|
+
# that offset commit happen asap in case of a crash
|
61
|
+
# @param [Karafka::Params::Params] params message that we want to mark as processed
|
62
|
+
def mark_as_consumed(params)
|
63
|
+
kafka_consumer.mark_message_as_processed(params)
|
64
|
+
# Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
|
65
|
+
# before the automatic triggers have kicked in.
|
66
|
+
kafka_consumer.commit_offsets
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
attr_reader :consumer_group
|
72
|
+
|
58
73
|
# Consumes messages from Kafka in batches
|
59
74
|
# @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
|
60
75
|
def consume_each_batch
|
@@ -7,10 +7,6 @@ module Karafka
|
|
7
7
|
# @note Listener itself does nothing with the message - it will return to the block
|
8
8
|
# a raw Kafka::FetchedMessage
|
9
9
|
class Listener
|
10
|
-
include Celluloid
|
11
|
-
|
12
|
-
execute_block_on_receiver :fetch_loop
|
13
|
-
|
14
10
|
attr_reader :consumer_group
|
15
11
|
|
16
12
|
# @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
|
@@ -31,7 +27,7 @@ module Karafka
|
|
31
27
|
# Kafka connections / Internet connection issues / Etc. Business logic problems should not
|
32
28
|
# propagate this far
|
33
29
|
def fetch_loop(block)
|
34
|
-
|
30
|
+
consumer.fetch_loop do |raw_messages|
|
35
31
|
block.call(consumer_group.id, raw_messages)
|
36
32
|
end
|
37
33
|
# This is on purpose - see the notes for this method
|
@@ -39,19 +35,16 @@ module Karafka
|
|
39
35
|
rescue Exception => e
|
40
36
|
# rubocop:enable RescueException
|
41
37
|
Karafka.monitor.notice_error(self.class, e)
|
42
|
-
@
|
43
|
-
retry if @
|
38
|
+
@consumer&.stop
|
39
|
+
retry if @consumer
|
44
40
|
end
|
45
41
|
|
46
42
|
private
|
47
43
|
|
48
|
-
# @return [Karafka::Connection::
|
44
|
+
# @return [Karafka::Connection::Consumer] wrapped kafka consumer for a given topic
|
49
45
|
# consumption
|
50
|
-
|
51
|
-
|
52
|
-
@messages_consumer ||= MessagesConsumer.new(consumer_group).tap do |consumer|
|
53
|
-
Karafka::Server.consumers << consumer if Karafka::Server.consumers
|
54
|
-
end
|
46
|
+
def consumer
|
47
|
+
@consumer ||= Consumer.new(consumer_group)
|
55
48
|
end
|
56
49
|
end
|
57
50
|
end
|
@@ -3,14 +3,14 @@
|
|
3
3
|
module Karafka
|
4
4
|
module Connection
|
5
5
|
# Class that consumes messages for which we listen
|
6
|
-
module
|
6
|
+
module Processor
|
7
7
|
class << self
|
8
8
|
# Processes messages (does something with them)
|
9
9
|
# It will either schedule or run a proper controller action for messages
|
10
10
|
# @note This should be looped to obtain a constant listening
|
11
11
|
# @note We catch all the errors here, to make sure that none failures
|
12
12
|
# for a given consumption will affect other consumed messages
|
13
|
-
# If we wouldn't catch it, it would propagate up until killing the
|
13
|
+
# If we wouldn't catch it, it would propagate up until killing the thread
|
14
14
|
# @param group_id [String] group_id of a group from which a given message came
|
15
15
|
# @param kafka_messages [Array<Kafka::FetchedMessage>] raw messages fetched from kafka
|
16
16
|
def process(group_id, kafka_messages)
|
@@ -27,7 +27,7 @@ module Karafka
|
|
27
27
|
# Depending on a case (persisted or not) we might use new controller instance per each
|
28
28
|
# batch, or use the same instance for all of them (for implementing buffering, etc)
|
29
29
|
send(
|
30
|
-
topic.
|
30
|
+
topic.batch_consuming ? :process_batch : :process_each,
|
31
31
|
controller,
|
32
32
|
kafka_messages
|
33
33
|
)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Controllers
|
5
|
+
# Additional callbacks that can be used to trigger some actions on certain moments like
|
6
|
+
# manual offset management, committing or anything else outside of a standard messages flow
|
7
|
+
# They are not included by default, as we don't want to provide functionalities that are
|
8
|
+
# not required by users by default
|
9
|
+
# Please refer to the wiki callbacks page for more details on how to use them
|
10
|
+
module Callbacks
|
11
|
+
# Types of events on which we run callbacks
|
12
|
+
TYPES = %i[
|
13
|
+
after_fetched
|
14
|
+
after_poll
|
15
|
+
before_poll
|
16
|
+
before_stop
|
17
|
+
].freeze
|
18
|
+
|
19
|
+
# Class methods needed to make callbacks run
|
20
|
+
module ClassMethods
|
21
|
+
TYPES.each do |type|
|
22
|
+
# A Creates a callback wrapper
|
23
|
+
# @param method_name [Symbol, String] method name or nil if we plan to provide a block
|
24
|
+
# @yield A block with a code that should be executed before scheduling
|
25
|
+
define_method type do |method_name = nil, &block|
|
26
|
+
set_callback type, :before, method_name ? method_name : block
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# @param controller_class [Class] controller class that we extend with callbacks
|
32
|
+
def self.included(controller_class)
|
33
|
+
controller_class.class_eval do
|
34
|
+
extend ClassMethods
|
35
|
+
include ActiveSupport::Callbacks
|
36
|
+
|
37
|
+
# The call method is wrapped with a set of callbacks
|
38
|
+
# We won't run process if any of the callbacks throw abort
|
39
|
+
# @see http://api.rubyonrails.org/classes/ActiveSupport/Callbacks/ClassMethods.html#method-i-get_callbacks
|
40
|
+
TYPES.each { |type| define_callbacks type }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Executes the default controller flow, runs callbacks and if not halted will call process
|
45
|
+
# method of a proper backend. This is here because it interacts with the default Karafka
|
46
|
+
# call flow and needs to be overwritten in order to support callbacks
|
47
|
+
def call
|
48
|
+
run_callbacks :after_fetched do
|
49
|
+
process
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -34,7 +34,7 @@ module Karafka
|
|
34
34
|
# @param controller_class [Class] controller class
|
35
35
|
# @param topic [Karafka::Routing::Topic] topic of a controller class
|
36
36
|
def bind_params(controller_class, topic)
|
37
|
-
return if topic.
|
37
|
+
return if topic.batch_consuming
|
38
38
|
controller_class.include SingleParams
|
39
39
|
end
|
40
40
|
|
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
module Karafka
|
4
4
|
module Controllers
|
5
|
-
# Params alias for single message
|
5
|
+
# Params alias for single message consumption controllers
|
6
6
|
module SingleParams
|
7
7
|
private
|
8
8
|
|
9
|
-
# @return [Karafka::Params::Params] params instance for non batch
|
9
|
+
# @return [Karafka::Params::Params] params instance for non batch consumption controllers
|
10
10
|
def params
|
11
11
|
params_batch.first
|
12
12
|
end
|
data/lib/karafka/errors.rb
CHANGED
@@ -32,5 +32,12 @@ module Karafka
|
|
32
32
|
|
33
33
|
# Raised when we try to use Karafka CLI commands (except install) without a bootfile
|
34
34
|
MissingBootFile = Class.new(BaseError)
|
35
|
+
|
36
|
+
# Raised when we want to read a persisted thread messages consumer but it is unavailable
|
37
|
+
# This should never happen and if it does, please contact us
|
38
|
+
MissingConsumer = Class.new(BaseError)
|
39
|
+
|
40
|
+
# Raised when we attemp to pause a partition but the pause timeout is equal to 0
|
41
|
+
InvalidPauseTimeout = Class.new(BaseError)
|
35
42
|
end
|
36
43
|
end
|
data/lib/karafka/fetcher.rb
CHANGED
@@ -9,14 +9,20 @@ module Karafka
|
|
9
9
|
# Fetch loop should never end, which means that we won't create more actor clusters
|
10
10
|
# so we don't have to terminate them
|
11
11
|
def fetch_loop
|
12
|
-
|
13
|
-
|
12
|
+
threads = listeners.map do |listener|
|
13
|
+
# We abort on exception because there should be an exception handling developed for
|
14
|
+
# each listener running in separate threads, so the exceptions should never leak
|
15
|
+
# and if that happens, it means that something really bad happened and we should stop
|
16
|
+
# the whole process
|
17
|
+
Thread
|
18
|
+
.new { listener.fetch_loop(processor) }
|
19
|
+
.tap { |thread| thread.abort_on_exception = true }
|
14
20
|
end
|
15
21
|
|
16
|
-
|
22
|
+
threads.each(&:join)
|
17
23
|
# If anything crashes here, we need to raise the error and crush the runner because it means
|
18
24
|
# that something really bad happened
|
19
|
-
rescue => e
|
25
|
+
rescue StandardError => e
|
20
26
|
Karafka.monitor.notice_error(self.class, e)
|
21
27
|
Karafka::App.stop!
|
22
28
|
raise e
|
@@ -35,7 +41,7 @@ module Karafka
|
|
35
41
|
# @yieldparam messages [Array<Kafka::FetchedMessage>] messages from kafka (raw)
|
36
42
|
def processor
|
37
43
|
lambda do |group_id, messages|
|
38
|
-
Karafka::Connection::
|
44
|
+
Karafka::Connection::Processor.process(group_id, messages)
|
39
45
|
end
|
40
46
|
end
|
41
47
|
end
|
data/lib/karafka/monitor.rb
CHANGED
@@ -13,7 +13,7 @@ module Karafka
|
|
13
13
|
include Singleton
|
14
14
|
|
15
15
|
# This method is executed in many important places in the code (during data flow), like
|
16
|
-
# the moment before #
|
16
|
+
# the moment before #consume_async, etc. For full list just grep for 'monitor.notice'
|
17
17
|
# @param caller_class [Class] class of object that executed this call
|
18
18
|
# @param options [Hash] hash with options that we passed to notice. It differs based
|
19
19
|
# on of who and when is calling
|
@@ -55,7 +55,7 @@ module Karafka
|
|
55
55
|
def caller_exceptions_map
|
56
56
|
@caller_exceptions_map ||= {
|
57
57
|
error: [
|
58
|
-
Karafka::Connection::
|
58
|
+
Karafka::Connection::Consumer,
|
59
59
|
Karafka::Connection::Listener,
|
60
60
|
Karafka::Params::Params
|
61
61
|
],
|
@@ -14,6 +14,7 @@ module Karafka
|
|
14
14
|
partition
|
15
15
|
offset
|
16
16
|
key
|
17
|
+
create_time
|
17
18
|
].freeze
|
18
19
|
|
19
20
|
# Params attributes that should be available via a method call invocation for Kafka
|
@@ -26,6 +27,7 @@ module Karafka
|
|
26
27
|
partition
|
27
28
|
offset
|
28
29
|
key
|
30
|
+
create_time
|
29
31
|
].freeze
|
30
32
|
|
31
33
|
class << self
|
@@ -46,7 +48,7 @@ module Karafka
|
|
46
48
|
if message.is_a?(Hash)
|
47
49
|
new(parser: parser).send(:merge!, message)
|
48
50
|
else
|
49
|
-
# This happens inside Kafka::
|
51
|
+
# This happens inside Kafka::FetchedProcessor
|
50
52
|
new(
|
51
53
|
parser: parser,
|
52
54
|
parsed: false,
|
@@ -4,7 +4,7 @@ module Karafka
|
|
4
4
|
module Params
|
5
5
|
# Params batch represents a set of messages received from Kafka.
|
6
6
|
# @note Params internally are lazy loaded before first use. That way we can skip parsing
|
7
|
-
# process if we have
|
7
|
+
# process if we have after_fetched that rejects some incoming messages without using params
|
8
8
|
# It can be also used when handling really heavy data (in terms of parsing).
|
9
9
|
class ParamsBatch
|
10
10
|
include Enumerable
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Patches
|
5
|
+
# Batches for Ruby Kafka gem
|
6
|
+
module RubyKafka
|
7
|
+
# This patch allows us to inject business logic in between fetches and before the consumer
|
8
|
+
# stop, so we can perform stop commit or anything else that we need since
|
9
|
+
# ruby-kafka fetch loop does not allow that directly
|
10
|
+
# We don't wan't to use poll ruby-kafka api as it brings many more problems that we would
|
11
|
+
# have to take care of. That way, nothing like that ever happens but we get the control
|
12
|
+
# over the stopping process that we need (since we're the once that initiate it for each
|
13
|
+
# thread)
|
14
|
+
def consumer_loop
|
15
|
+
super do
|
16
|
+
controllers = Karafka::Persistence::Controller
|
17
|
+
.all
|
18
|
+
.values
|
19
|
+
.flat_map(&:values)
|
20
|
+
.select { |ctrl| ctrl.respond_to?(:run_callbacks) }
|
21
|
+
|
22
|
+
if Karafka::App.stopped?
|
23
|
+
controllers.each { |ctrl| ctrl.run_callbacks :before_stop }
|
24
|
+
Karafka::Persistence::Consumer.read.stop
|
25
|
+
else
|
26
|
+
controllers.each { |ctrl| ctrl.run_callbacks :before_poll }
|
27
|
+
yield
|
28
|
+
controllers.each { |ctrl| ctrl.run_callbacks :after_poll }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Persistence
|
5
|
+
# Persistence layer to store current thread messages consumer for further use
|
6
|
+
class Consumer
|
7
|
+
# Thread.current key under which we store current thread messages consumer
|
8
|
+
PERSISTENCE_SCOPE = :consumer
|
9
|
+
|
10
|
+
# @param consumer [Karafka::Connection::Consumer] messages consumer of
|
11
|
+
# a current thread
|
12
|
+
# @return [Karafka::Connection::Consumer] persisted messages consumer
|
13
|
+
def self.write(consumer)
|
14
|
+
Thread.current[PERSISTENCE_SCOPE] = consumer
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [Karafka::Connection::Consumer] persisted messages consumer
|
18
|
+
# @raise [Karafka::Errors::MissingConsumer] raised when no thread messages consumer
|
19
|
+
# but we try to use it anyway
|
20
|
+
def self.read
|
21
|
+
Thread.current[PERSISTENCE_SCOPE] || raise(Errors::MissingConsumer)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -8,15 +8,30 @@ module Karafka
|
|
8
8
|
# topic and partition to store some additional details when the persistent mode
|
9
9
|
# for a given topic is turned on
|
10
10
|
class Controller
|
11
|
-
#
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
# Thread.current scope under which we store controllers data
|
12
|
+
PERSISTENCE_SCOPE = :controllers
|
13
|
+
|
14
|
+
class << self
|
15
|
+
# @return [Hash] current thread persistence scope hash with all the controllers
|
16
|
+
def all
|
17
|
+
Thread.current[PERSISTENCE_SCOPE] ||= {}
|
18
|
+
end
|
19
|
+
|
20
|
+
# Used to build (if block given) and/or fetch a current controller instance that will be
|
21
|
+
# used to process messages from a given topic and partition
|
22
|
+
# @return [Karafka::BaseController] base controller descendant
|
23
|
+
# @param topic [Karafka::Routing::Topic] topic instance for which we might cache
|
24
|
+
# @param partition [Integer] number of partition for which we want to cache
|
25
|
+
def fetch(topic, partition)
|
26
|
+
all[topic.id] ||= {}
|
27
|
+
|
28
|
+
# We always store a current instance
|
29
|
+
if topic.persistent
|
30
|
+
all[topic.id][partition] ||= yield
|
31
|
+
else
|
32
|
+
all[topic.id][partition] = yield
|
33
|
+
end
|
34
|
+
end
|
20
35
|
end
|
21
36
|
end
|
22
37
|
end
|
data/lib/karafka/process.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Karafka
|
4
|
-
# Class used to catch signals from ruby Signal class in order to manage Karafka
|
4
|
+
# Class used to catch signals from ruby Signal class in order to manage Karafka stop
|
5
5
|
# @note There might be only one process - this class is a singleton
|
6
6
|
class Process
|
7
7
|
include Singleton
|