karafka 1.0.1 → 1.4.14

Sign up to get free protection for your applications and to get access to all the features.
Files changed (121) hide show
  1. checksums.yaml +5 -5
  2. checksums.yaml.gz.sig +0 -0
  3. data/.coditsu/ci.yml +3 -0
  4. data/.console_irbrc +1 -3
  5. data/.diffend.yml +3 -0
  6. data/.github/ISSUE_TEMPLATE/bug_report.md +50 -0
  7. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  8. data/.github/workflows/ci.yml +76 -0
  9. data/.gitignore +1 -0
  10. data/.ruby-version +1 -1
  11. data/CHANGELOG.md +286 -16
  12. data/CODE_OF_CONDUCT.md +1 -1
  13. data/CONTRIBUTING.md +6 -7
  14. data/Gemfile +5 -2
  15. data/Gemfile.lock +100 -103
  16. data/README.md +54 -74
  17. data/bin/karafka +1 -1
  18. data/certs/mensfeld.pem +26 -0
  19. data/config/errors.yml +40 -5
  20. data/docker-compose.yml +17 -0
  21. data/karafka.gemspec +31 -15
  22. data/lib/karafka/app.rb +19 -18
  23. data/lib/karafka/assignment_strategies/round_robin.rb +13 -0
  24. data/lib/karafka/attributes_map.rb +17 -21
  25. data/lib/karafka/backends/inline.rb +2 -3
  26. data/lib/karafka/base_consumer.rb +57 -0
  27. data/lib/karafka/base_responder.rb +77 -31
  28. data/lib/karafka/cli/base.rb +4 -4
  29. data/lib/karafka/cli/console.rb +11 -9
  30. data/lib/karafka/cli/flow.rb +9 -7
  31. data/lib/karafka/cli/info.rb +5 -4
  32. data/lib/karafka/cli/install.rb +32 -8
  33. data/lib/karafka/cli/missingno.rb +19 -0
  34. data/lib/karafka/cli/server.rb +18 -16
  35. data/lib/karafka/cli.rb +10 -2
  36. data/lib/karafka/code_reloader.rb +67 -0
  37. data/lib/karafka/connection/{config_adapter.rb → api_adapter.rb} +71 -22
  38. data/lib/karafka/connection/batch_delegator.rb +55 -0
  39. data/lib/karafka/connection/builder.rb +23 -0
  40. data/lib/karafka/connection/client.rb +120 -0
  41. data/lib/karafka/connection/listener.rb +39 -26
  42. data/lib/karafka/connection/message_delegator.rb +36 -0
  43. data/lib/karafka/consumers/batch_metadata.rb +10 -0
  44. data/lib/karafka/consumers/callbacks.rb +71 -0
  45. data/lib/karafka/consumers/includer.rb +64 -0
  46. data/lib/karafka/consumers/responders.rb +24 -0
  47. data/lib/karafka/{controllers → consumers}/single_params.rb +3 -3
  48. data/lib/karafka/contracts/config.rb +21 -0
  49. data/lib/karafka/contracts/consumer_group.rb +211 -0
  50. data/lib/karafka/contracts/consumer_group_topic.rb +19 -0
  51. data/lib/karafka/contracts/responder_usage.rb +54 -0
  52. data/lib/karafka/contracts/server_cli_options.rb +31 -0
  53. data/lib/karafka/contracts.rb +10 -0
  54. data/lib/karafka/errors.rb +27 -12
  55. data/lib/karafka/fetcher.rb +15 -15
  56. data/lib/karafka/helpers/class_matcher.rb +20 -10
  57. data/lib/karafka/helpers/config_retriever.rb +3 -3
  58. data/lib/karafka/helpers/inflector.rb +26 -0
  59. data/lib/karafka/helpers/multi_delegator.rb +0 -1
  60. data/lib/karafka/instrumentation/logger.rb +54 -0
  61. data/lib/karafka/instrumentation/monitor.rb +70 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +36 -0
  63. data/lib/karafka/instrumentation/stdout_listener.rb +140 -0
  64. data/lib/karafka/params/batch_metadata.rb +26 -0
  65. data/lib/karafka/params/builders/batch_metadata.rb +30 -0
  66. data/lib/karafka/params/builders/params.rb +38 -0
  67. data/lib/karafka/params/builders/params_batch.rb +25 -0
  68. data/lib/karafka/params/metadata.rb +20 -0
  69. data/lib/karafka/params/params.rb +35 -107
  70. data/lib/karafka/params/params_batch.rb +38 -19
  71. data/lib/karafka/patches/ruby_kafka.rb +47 -0
  72. data/lib/karafka/persistence/client.rb +29 -0
  73. data/lib/karafka/persistence/consumers.rb +45 -0
  74. data/lib/karafka/persistence/topics.rb +48 -0
  75. data/lib/karafka/process.rb +6 -9
  76. data/lib/karafka/responders/builder.rb +15 -14
  77. data/lib/karafka/responders/topic.rb +14 -9
  78. data/lib/karafka/routing/builder.rb +38 -9
  79. data/lib/karafka/routing/consumer_group.rb +6 -4
  80. data/lib/karafka/routing/consumer_mapper.rb +10 -9
  81. data/lib/karafka/routing/proxy.rb +10 -1
  82. data/lib/karafka/routing/router.rb +1 -1
  83. data/lib/karafka/routing/topic.rb +8 -12
  84. data/lib/karafka/routing/topic_mapper.rb +16 -18
  85. data/lib/karafka/serialization/json/deserializer.rb +27 -0
  86. data/lib/karafka/serialization/json/serializer.rb +31 -0
  87. data/lib/karafka/server.rb +50 -39
  88. data/lib/karafka/setup/config.rb +138 -91
  89. data/lib/karafka/setup/configurators/water_drop.rb +21 -16
  90. data/lib/karafka/setup/dsl.rb +21 -0
  91. data/lib/karafka/status.rb +7 -3
  92. data/lib/karafka/templates/{application_controller.rb.example → application_consumer.rb.erb} +2 -2
  93. data/lib/karafka/templates/karafka.rb.erb +92 -0
  94. data/lib/karafka/version.rb +1 -1
  95. data/lib/karafka.rb +19 -15
  96. data.tar.gz.sig +0 -0
  97. metadata +119 -81
  98. metadata.gz.sig +5 -0
  99. data/.github/ISSUE_TEMPLATE.md +0 -2
  100. data/.travis.yml +0 -17
  101. data/Rakefile +0 -7
  102. data/lib/karafka/base_controller.rb +0 -117
  103. data/lib/karafka/connection/messages_consumer.rb +0 -106
  104. data/lib/karafka/connection/messages_processor.rb +0 -61
  105. data/lib/karafka/controllers/includer.rb +0 -51
  106. data/lib/karafka/controllers/responders.rb +0 -19
  107. data/lib/karafka/loader.rb +0 -29
  108. data/lib/karafka/logger.rb +0 -53
  109. data/lib/karafka/monitor.rb +0 -98
  110. data/lib/karafka/parsers/json.rb +0 -38
  111. data/lib/karafka/patches/dry_configurable.rb +0 -33
  112. data/lib/karafka/persistence/controller.rb +0 -23
  113. data/lib/karafka/schemas/config.rb +0 -31
  114. data/lib/karafka/schemas/consumer_group.rb +0 -64
  115. data/lib/karafka/schemas/consumer_group_topic.rb +0 -18
  116. data/lib/karafka/schemas/responder_usage.rb +0 -38
  117. data/lib/karafka/schemas/server_cli_options.rb +0 -43
  118. data/lib/karafka/setup/configurators/base.rb +0 -35
  119. data/lib/karafka/setup/configurators/celluloid.rb +0 -19
  120. data/lib/karafka/templates/karafka.rb.example +0 -41
  121. /data/lib/karafka/templates/{application_responder.rb.example → application_responder.rb.erb} +0 -0
data/lib/karafka/cli.rb CHANGED
@@ -10,6 +10,8 @@ module Karafka
10
10
  class Cli < Thor
11
11
  package_name 'Karafka'
12
12
 
13
+ default_task :missingno
14
+
13
15
  class << self
14
16
  # Loads all Cli commands into Thor framework
15
17
  # This method should be executed before we run Karafka::Cli.start, otherwise we won't
@@ -20,6 +22,12 @@ module Karafka
20
22
  end
21
23
  end
22
24
 
25
+ # When there is a CLI crash, exit
26
+ # @return [true]
27
+ def exit_on_failure?
28
+ true
29
+ end
30
+
23
31
  private
24
32
 
25
33
  # @return [Array<Class>] Array with Cli action classes that can be used as commands
@@ -37,7 +45,7 @@ end
37
45
  # This is kinda trick - since we don't have a autoload and other magic stuff
38
46
  # like Rails does, so instead this method allows us to replace currently running
39
47
  # console with a new one via Kernel.exec. It will start console with new code loaded
40
- # Yes we know that it is not turbofast, however it is turbo convinient and small
48
+ # Yes, we know that it is not turbo fast, however it is turbo convenient and small
41
49
  #
42
50
  # Also - the KARAFKA_CONSOLE is used to detect that we're executing the irb session
43
51
  # so this method is only available when the Karafka console is running
@@ -47,7 +55,7 @@ end
47
55
  if ENV['KARAFKA_CONSOLE']
48
56
  # Reloads Karafka irb console session
49
57
  def reload!
50
- puts "Reloading...\n"
58
+ Karafka.logger.info "Reloading...\n"
51
59
  Kernel.exec Karafka::Cli::Console.command
52
60
  end
53
61
  end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Special type of a listener, that is not an instrumentation one, but one that triggers
5
+ # code reload in the development mode after each fetched batch (or message)
6
+ #
7
+ # Please refer to the development code reload sections for details on the benefits and downsides
8
+ # of the in-process code reloading
9
+ class CodeReloader
10
+ # This mutex is needed as we might have an application that has multiple consumer groups
11
+ # running in separate threads and we should not trigger reload before fully reloading the app
12
+ # in previous thread
13
+ MUTEX = Mutex.new
14
+
15
+ private_constant :MUTEX
16
+
17
+ # @param reloaders [Array<Object>] any code loaders that we use in this app. Whether it is
18
+ # the Rails loader, Zeitwerk or anything else that allows reloading triggering
19
+ # @param block [Proc] yields given block just before reloading. This can be used to hook custom
20
+ # reloading stuff, that ain't reloaders (for example for resetting dry-events registry)
21
+ def initialize(*reloaders, &block)
22
+ @reloaders = reloaders
23
+ @block = block
24
+ end
25
+
26
+ # Binds to the instrumentation events and triggers reload
27
+ # @param _event [Dry::Event] empty dry event
28
+ # @note Since we de-register all the user defined objects and redraw routes, it means that
29
+ # we won't be able to do a multi-batch buffering in the development mode as each of the
30
+ # batches will be buffered on a newly created "per fetch" instance.
31
+ def on_connection_listener_fetch_loop(_event)
32
+ reload
33
+ end
34
+
35
+ private
36
+
37
+ # Triggers reload of both standard and Rails reloaders as well as expires all internals of
38
+ # Karafka, so it can be rediscovered and rebuilt
39
+ def reload
40
+ MUTEX.synchronize do
41
+ if @reloaders[0].respond_to?(:execute)
42
+ reload_with_rails
43
+ else
44
+ reload_without_rails
45
+ end
46
+ end
47
+ end
48
+
49
+ # Rails reloading procedure
50
+ def reload_with_rails
51
+ updatable = @reloaders.select(&:updated?)
52
+
53
+ return if updatable.empty?
54
+
55
+ updatable.each(&:execute)
56
+ @block&.call
57
+ Karafka::App.reload
58
+ end
59
+
60
+ # Zeitwerk and other reloaders
61
+ def reload_without_rails
62
+ @reloaders.each(&:reload)
63
+ @block&.call
64
+ Karafka::App.reload
65
+ end
66
+ end
67
+ end
@@ -3,19 +3,23 @@
3
3
  module Karafka
4
4
  # Namespace for all the things related to Kafka connection
5
5
  module Connection
6
- # Mapper used to convert our internal settings into ruby-kafka settings
6
+ # Mapper used to convert our internal settings into ruby-kafka settings based on their
7
+ # API requirements.
7
8
  # Since ruby-kafka has more and more options and there are few "levels" on which
8
9
  # we have to apply them (despite the fact, that in Karafka you configure all of it
9
10
  # in one place), we have to remap it into what ruby-kafka driver requires
10
11
  # @note The good thing about Kafka.new method is that it ignores all options that
11
12
  # do nothing. So we don't have to worry about injecting our internal settings
12
13
  # into the client and breaking stuff
13
- module ConfigAdapter
14
+ module ApiAdapter
14
15
  class << self
15
16
  # Builds all the configuration settings for Kafka.new method
16
- # @param _consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
17
- # @return [Hash] hash with all the settings required by Kafka.new method
18
- def client(_consumer_group)
17
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
18
+ # @return [Array<Hash>] Array with all the client arguments including hash with all
19
+ # the settings required by Kafka.new method
20
+ # @note We return array, so we can inject any arguments we want, in case of changes in the
21
+ # raw driver
22
+ def client(consumer_group)
19
23
  # This one is a default that takes all the settings except special
20
24
  # cases defined in the map
21
25
  settings = {
@@ -23,22 +27,29 @@ module Karafka
23
27
  client_id: ::Karafka::App.config.client_id
24
28
  }
25
29
 
26
- kafka_configs.each do |setting_name, setting_value|
30
+ kafka_configs.each_key do |setting_name|
27
31
  # All options for config adapter should be ignored as we're just interested
28
32
  # in what is left, as we want to pass all the options that are "typical"
29
- # and not listed in the config_adapter special cases mapping. All the values
30
- # from the config_adapter mapping go somewhere else, not to the client directly
31
- next if AttributesMap.config_adapter.values.flatten.include?(setting_name)
33
+ # and not listed in the api_adapter special cases mapping. All the values
34
+ # from the api_adapter mapping go somewhere else, not to the client directly
35
+ next if AttributesMap.api_adapter.values.flatten.include?(setting_name)
32
36
 
33
- settings[setting_name] = setting_value
37
+ # Settings for each consumer group are either defined per consumer group or are
38
+ # inherited from the global/general settings level, thus we don't have to fetch them
39
+ # from the kafka settings as they are already on a consumer group level
40
+ settings[setting_name] = consumer_group.public_send(setting_name)
34
41
  end
35
42
 
36
- sanitize(settings)
43
+ settings_hash = sanitize(settings)
44
+
45
+ # Normalization for the way Kafka::Client accepts arguments from 0.5.3
46
+ [settings_hash.delete(:seed_brokers), settings_hash]
37
47
  end
38
48
 
39
49
  # Builds all the configuration settings for kafka#consumer method
40
50
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
41
- # @return [Hash] hash with all the settings required by Kafka#consumer method
51
+ # @return [Hash] all the consumer keyword arguments including hash with all
52
+ # the settings required by Kafka#consumer
42
53
  def consumer(consumer_group)
43
54
  settings = { group_id: consumer_group.id }
44
55
  settings = fetch_for(:consumer, consumer_group, settings)
@@ -48,25 +59,61 @@ module Karafka
48
59
  # Builds all the configuration settings for kafka consumer consume_each_batch and
49
60
  # consume_each_message methods
50
61
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
51
- # @return [Hash] hash with all the settings required by
62
+ # @return [Hash] hash with all the arguments required by consuming method
63
+ # including all the settings required by
52
64
  # Kafka::Consumer#consume_each_message and Kafka::Consumer#consume_each_batch method
53
- def consuming(consumer_group)
54
- sanitize(fetch_for(:consuming, consumer_group))
65
+ def consumption(consumer_group)
66
+ sanitize(
67
+ fetch_for(
68
+ :consumption,
69
+ consumer_group,
70
+ automatically_mark_as_processed: consumer_group.automatically_mark_as_consumed
71
+ )
72
+ )
55
73
  end
56
74
 
57
75
  # Builds all the configuration settings for kafka consumer#subscribe method
58
76
  # @param topic [Karafka::Routing::Topic] topic that holds details for a given subscription
59
77
  # @return [Hash] hash with all the settings required by kafka consumer#subscribe method
60
- def subscription(topic)
61
- settings = fetch_for(:subscription, topic)
78
+ def subscribe(topic)
79
+ settings = fetch_for(:subscribe, topic)
62
80
  [Karafka::App.config.topic_mapper.outgoing(topic.name), sanitize(settings)]
63
81
  end
64
82
 
65
83
  # Builds all the configuration settings required by kafka consumer#pause method
84
+ # @param topic [String] topic that we want to pause
85
+ # @param partition [Integer] number partition that we want to pause
66
86
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group details
67
- # @return [Hash] hash with all the settings required to pause kafka consumer
68
- def pausing(consumer_group)
69
- { timeout: consumer_group.pause_timeout }
87
+ # @return [Hash] hash with all the details required to pause kafka consumer
88
+ def pause(topic, partition, consumer_group)
89
+ {
90
+ args: [Karafka::App.config.topic_mapper.outgoing(topic), partition],
91
+ kwargs: {
92
+ timeout: consumer_group.pause_timeout,
93
+ max_timeout: consumer_group.pause_max_timeout,
94
+ exponential_backoff: consumer_group.pause_exponential_backoff
95
+ }
96
+ }
97
+ end
98
+
99
+ # Remaps topic details taking the topic mapper feature into consideration.
100
+ # @param params [Karafka::Params::Params] params instance
101
+ # @return [Array] array with all the details needed by ruby-kafka to mark message
102
+ # as processed
103
+ # @note When default empty topic mapper is used, no need for any conversion as the
104
+ # internal and external format are exactly the same
105
+ def mark_message_as_processed(params)
106
+ # Majority of users don't use custom topic mappers. No need to change anything when it
107
+ # is a default mapper that does not change anything. Only some cloud providers require
108
+ # topics to be remapped
109
+ return [params.metadata] if Karafka::App.config.topic_mapper.is_a?(
110
+ Karafka::Routing::TopicMapper
111
+ )
112
+
113
+ # @note We don't use tap as it is around 13% slower than non-dup version
114
+ dupped = params.metadata.dup
115
+ dupped['topic'] = Karafka::App.config.topic_mapper.outgoing(params.metadata.topic)
116
+ [dupped]
70
117
  end
71
118
 
72
119
  private
@@ -79,10 +126,12 @@ module Karafka
79
126
  def fetch_for(namespace_key, route_layer, preexisting_settings = {})
80
127
  kafka_configs.each_key do |setting_name|
81
128
  # Ignore settings that are not related to our namespace
82
- next unless AttributesMap.config_adapter[namespace_key].include?(setting_name)
129
+ next unless AttributesMap.api_adapter[namespace_key].include?(setting_name)
130
+
83
131
  # Ignore settings that are already initialized
84
132
  # In case they are in preexisting settings fetched differently
85
- next if preexisting_settings.keys.include?(setting_name)
133
+ next if preexisting_settings.key?(setting_name)
134
+
86
135
  # Fetch all the settings from a given layer object. Objects can handle the fallback
87
136
  # to the kafka settings, so
88
137
  preexisting_settings[setting_name] = route_layer.send(setting_name)
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that delegates processing of batch received messages for which we listen to
6
+ # a proper processor
7
+ module BatchDelegator
8
+ class << self
9
+ # Delegates messages (does something with them)
10
+ # It will either schedule or run a proper processor action for messages
11
+ # @param group_id [String] group_id of a group from which a given message came
12
+ # @param kafka_batch [<Kafka::FetchedBatch>] raw messages fetched batch
13
+ # @note This should be looped to obtain a constant delegating of new messages
14
+ def call(group_id, kafka_batch)
15
+ topic = Persistence::Topics.fetch(group_id, kafka_batch.topic)
16
+ consumer = Persistence::Consumers.fetch(topic, kafka_batch.partition)
17
+
18
+ Karafka.monitor.instrument(
19
+ 'connection.batch_delegator.call',
20
+ caller: self,
21
+ consumer: consumer,
22
+ kafka_batch: kafka_batch
23
+ ) do
24
+ # Due to how ruby-kafka is built, we have the metadata that is stored on the batch
25
+ # level only available for batch consuming
26
+ consumer.batch_metadata = Params::Builders::BatchMetadata.from_kafka_batch(
27
+ kafka_batch,
28
+ topic
29
+ )
30
+
31
+ kafka_messages = kafka_batch.messages
32
+
33
+ # Depending on a case (persisted or not) we might use new consumer instance per
34
+ # each batch, or use the same one for all of them (for implementing buffering, etc.)
35
+ if topic.batch_consuming
36
+ consumer.params_batch = Params::Builders::ParamsBatch.from_kafka_messages(
37
+ kafka_messages,
38
+ topic
39
+ )
40
+ consumer.call
41
+ else
42
+ kafka_messages.each do |kafka_message|
43
+ consumer.params_batch = Params::Builders::ParamsBatch.from_kafka_messages(
44
+ [kafka_message],
45
+ topic
46
+ )
47
+ consumer.call
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Builder used to construct Kafka client
6
+ module Builder
7
+ class << self
8
+ # Builds a Kafka::Client instance that we use to work with Kafka cluster
9
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which we want
10
+ # to have a new Kafka client
11
+ # @return [::Kafka::Client] returns a Kafka client
12
+ def call(consumer_group)
13
+ settings = ApiAdapter.client(consumer_group)
14
+
15
+ Kafka.new(
16
+ settings[0],
17
+ **settings[1]
18
+ )
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class used as a wrapper around Ruby-Kafka client to simplify additional
6
+ # features that we provide/might provide in future and to hide the internal implementation
7
+ class Client
8
+ extend Forwardable
9
+
10
+ %i[
11
+ seek
12
+ trigger_heartbeat
13
+ trigger_heartbeat!
14
+ ].each do |delegated_method|
15
+ def_delegator :kafka_consumer, delegated_method
16
+ end
17
+
18
+ # Creates a queue consumer client that will pull the data from Kafka
19
+ # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group for which
20
+ # we create a client
21
+ # @return [Karafka::Connection::Client] group consumer that can subscribe to
22
+ # multiple topics
23
+ def initialize(consumer_group)
24
+ @consumer_group = consumer_group
25
+ Persistence::Client.write(self)
26
+ end
27
+
28
+ # Opens connection, gets messages and calls a block for each of the incoming messages
29
+ # @yieldparam [Array<Kafka::FetchedMessage>, Symbol] kafka response with an info about
30
+ # the type of the fetcher that is being used
31
+ # @note This will yield with raw messages - no preprocessing or reformatting.
32
+ def fetch_loop
33
+ settings = ApiAdapter.consumption(consumer_group)
34
+
35
+ if consumer_group.batch_fetching
36
+ kafka_consumer.each_batch(**settings) { |batch| yield(batch, :batch) }
37
+ else
38
+ kafka_consumer.each_message(**settings) { |message| yield(message, :message) }
39
+ end
40
+ # @note We catch only the processing errors as any other are considered critical (exceptions)
41
+ # and should require a client restart with a backoff
42
+ rescue Kafka::ProcessingError => e
43
+ # If there was an error during consumption, we have to log it, pause current partition
44
+ # and process other things
45
+ Karafka.monitor.instrument(
46
+ 'connection.client.fetch_loop.error',
47
+ caller: self,
48
+ error: e.cause
49
+ )
50
+ pause(e.topic, e.partition)
51
+ retry
52
+ end
53
+
54
+ # Gracefully stops topic consumption
55
+ # @note Stopping running consumers without a really important reason is not recommended
56
+ # as until all the consumers are stopped, the server will keep running serving only
57
+ # part of the messages
58
+ def stop
59
+ @kafka_consumer&.stop
60
+ @kafka_consumer = nil
61
+ end
62
+
63
+ # Pauses fetching and consumption of a given topic partition
64
+ # @param topic [String] topic that we want to pause
65
+ # @param partition [Integer] number partition that we want to pause
66
+ def pause(topic, partition)
67
+ args, kwargs = ApiAdapter.pause(topic, partition, consumer_group).values_at(:args, :kwargs)
68
+ kafka_consumer.pause(*args, **kwargs)
69
+ end
70
+
71
+ # Marks given message as consumed
72
+ # @param [Karafka::Params::Params] params message that we want to mark as processed
73
+ # @note This method won't trigger automatic offsets commits, rather relying on the ruby-kafka
74
+ # offsets time-interval based committing
75
+ def mark_as_consumed(params)
76
+ kafka_consumer.mark_message_as_processed(
77
+ *ApiAdapter.mark_message_as_processed(params)
78
+ )
79
+ end
80
+
81
+ # Marks a given message as consumed and commit the offsets in a blocking way
82
+ # @param [Karafka::Params::Params] params message that we want to mark as processed
83
+ # @note This method commits the offset for each manual marking to be sure
84
+ # that offset commit happen asap in case of a crash
85
+ def mark_as_consumed!(params)
86
+ mark_as_consumed(params)
87
+ # Trigger an immediate, blocking offset commit in order to minimize the risk of crashing
88
+ # before the automatic triggers have kicked in.
89
+ kafka_consumer.commit_offsets
90
+ end
91
+
92
+ private
93
+
94
+ attr_reader :consumer_group
95
+
96
+ # @return [Kafka::Consumer] returns a ready to consume Kafka consumer
97
+ # that is set up to consume from topics of a given consumer group
98
+ def kafka_consumer
99
+ # @note We don't cache the connection internally because we cache kafka_consumer that uses
100
+ # kafka client object instance
101
+ @kafka_consumer ||= Builder.call(consumer_group).consumer(
102
+ **ApiAdapter.consumer(consumer_group)
103
+ ).tap do |consumer|
104
+ consumer_group.topics.each do |topic|
105
+ settings = ApiAdapter.subscribe(topic)
106
+
107
+ consumer.subscribe(settings[0], **settings[1])
108
+ end
109
+ end
110
+ rescue Kafka::ConnectionError
111
+ # If we would not wait it will spam log file with failed
112
+ # attempts if Kafka is down
113
+ sleep(consumer_group.reconnect_timeout)
114
+ # We don't log and just re-raise - this will be logged
115
+ # down the road
116
+ raise
117
+ end
118
+ end
119
+ end
120
+ end
@@ -7,12 +7,6 @@ module Karafka
7
7
  # @note Listener itself does nothing with the message - it will return to the block
8
8
  # a raw Kafka::FetchedMessage
9
9
  class Listener
10
- include Celluloid
11
-
12
- execute_block_on_receiver :fetch_loop
13
-
14
- attr_reader :consumer_group
15
-
16
10
  # @param consumer_group [Karafka::Routing::ConsumerGroup] consumer group that holds details
17
11
  # on what topics and with what settings should we listen
18
12
  # @return [Karafka::Connection::Listener] listener instance
@@ -20,38 +14,57 @@ module Karafka
20
14
  @consumer_group = consumer_group
21
15
  end
22
16
 
17
+ # Runs prefetch callbacks and executes the main listener fetch loop
18
+ def call
19
+ Karafka.monitor.instrument(
20
+ 'connection.listener.before_fetch_loop',
21
+ consumer_group: @consumer_group,
22
+ client: client
23
+ )
24
+ fetch_loop
25
+ end
26
+
27
+ private
28
+
23
29
  # Opens connection, gets messages and calls a block for each of the incoming messages
24
- # @yieldparam [String] consumer group id
25
- # @yieldparam [Array<Kafka::FetchedMessage>] kafka fetched messages
26
- # @note This will yield with a raw message - no preprocessing or reformatting
27
30
  # @note We catch all the errors here, so they don't affect other listeners (or this one)
28
31
  # so we will be able to listen and consume other incoming messages.
29
32
  # Since it is run inside Karafka::Connection::ActorCluster - catching all the exceptions
30
- # won't crash the whole cluster. Here we mostly focus on catchin the exceptions related to
33
+ # won't crash the whole cluster. Here we mostly focus on catching the exceptions related to
31
34
  # Kafka connections / Internet connection issues / Etc. Business logic problems should not
32
35
  # propagate this far
33
- def fetch_loop(block)
34
- messages_consumer.fetch_loop do |raw_messages|
35
- block.call(consumer_group.id, raw_messages)
36
+ def fetch_loop
37
+ # @note What happens here is a delegation of processing to a proper processor based
38
+ # on the incoming messages characteristics
39
+ client.fetch_loop do |raw_data, type|
40
+ Karafka.monitor.instrument('connection.listener.fetch_loop')
41
+
42
+ case type
43
+ when :message
44
+ MessageDelegator.call(@consumer_group.id, raw_data)
45
+ when :batch
46
+ BatchDelegator.call(@consumer_group.id, raw_data)
47
+ end
36
48
  end
37
49
  # This is on purpose - see the notes for this method
38
- # rubocop:disable RescueException
50
+ # rubocop:disable Lint/RescueException
39
51
  rescue Exception => e
40
- # rubocop:enable RescueException
41
- Karafka.monitor.notice_error(self.class, e)
42
- @messages_consumer&.stop
43
- retry if @messages_consumer
52
+ Karafka.monitor.instrument('connection.listener.fetch_loop.error', caller: self, error: e)
53
+ # rubocop:enable Lint/RescueException
54
+ # We can stop client without a problem, as it will reinitialize itself when running the
55
+ # `fetch_loop` again
56
+ @client.stop
57
+ # We need to clear the consumers cache for current connection when fatal error happens and
58
+ # we reset the connection. Otherwise for consumers with manual offset management, the
59
+ # persistence might have stored some data that would be reprocessed
60
+ Karafka::Persistence::Consumers.clear
61
+ sleep(@consumer_group.reconnect_timeout) && retry
44
62
  end
45
63
 
46
- private
47
-
48
- # @return [Karafka::Connection::MessagesConsumer] wrapped kafka consumer for a given topic
64
+ # @return [Karafka::Connection::Client] wrapped kafka consuming client for a given topic
49
65
  # consumption
50
- # @note It adds consumer into Karafka::Server consumers pool for graceful shutdown on exit
51
- def messages_consumer
52
- @messages_consumer ||= MessagesConsumer.new(consumer_group).tap do |consumer|
53
- Karafka::Server.consumers << consumer if Karafka::Server.consumers
54
- end
66
+ def client
67
+ @client ||= Client.new(@consumer_group)
55
68
  end
56
69
  end
57
70
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Connection
5
+ # Class that delegates processing of a single received message for which we listen to
6
+ # a proper processor
7
+ module MessageDelegator
8
+ class << self
9
+ # Delegates message (does something with it)
10
+ # It will either schedule or run a proper processor action for the incoming message
11
+ # @param group_id [String] group_id of a group from which a given message came
12
+ # @param kafka_message [<Kafka::FetchedMessage>] raw message from kafka
13
+ # @note This should be looped to obtain a constant delegating of new messages
14
+ def call(group_id, kafka_message)
15
+ topic = Persistence::Topics.fetch(group_id, kafka_message.topic)
16
+ consumer = Persistence::Consumers.fetch(topic, kafka_message.partition)
17
+
18
+ Karafka.monitor.instrument(
19
+ 'connection.message_delegator.call',
20
+ caller: self,
21
+ consumer: consumer,
22
+ kafka_message: kafka_message
23
+ ) do
24
+ # @note We always get a single message within single delegator, which means that
25
+ # we don't care if user marked it as a batch consumed or not.
26
+ consumer.params_batch = Params::Builders::ParamsBatch.from_kafka_messages(
27
+ [kafka_message],
28
+ topic
29
+ )
30
+ consumer.call
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Brings the batch metadata into consumers that support batch_fetching
6
+ module BatchMetadata
7
+ attr_accessor :batch_metadata
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Consumers
5
+ # Additional callbacks that can be used to trigger some actions on certain moments like
6
+ # manual offset management, committing or anything else outside of a standard messages flow
7
+ # They are not included by default, as we don't want to provide functionalities that are
8
+ # not required by users by default
9
+ # Please refer to the wiki callbacks page for more details on how to use them
10
+ module Callbacks
11
+ # Types of events on which we run callbacks
12
+ TYPES = %i[
13
+ after_fetch
14
+ after_poll
15
+ before_poll
16
+ before_stop
17
+ ].freeze
18
+
19
+ private_constant :TYPES
20
+
21
+ # Class methods needed to make callbacks run
22
+ module ClassMethods
23
+ TYPES.each do |type|
24
+ # Creates a callback wrapper
25
+ #
26
+ # @param method_name [Symbol, String] method name or nil if we plan to provide a block
27
+ # @yield A block with a code that should be executed before scheduling
28
+ # @note We don't have to optimize the key fetching here as those are class methods that
29
+ # are evaluated once upon start
30
+ define_method(type) do |method_name = nil, &block|
31
+ key = "consumers.#{Helpers::Inflector.map(to_s)}.#{type}"
32
+ Karafka::App.monitor.register_event(key)
33
+
34
+ Karafka::App.monitor.subscribe(key) do |event|
35
+ context = event[:context]
36
+
37
+ if method_name
38
+ context.send(method_name)
39
+ else
40
+ context.instance_eval(&block)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ class << self
48
+ # @param consumer_class [Class] consumer class that we extend with callbacks
49
+ def included(consumer_class)
50
+ consumer_class.class_eval do
51
+ extend ClassMethods
52
+ end
53
+ end
54
+ end
55
+
56
+ # Executes the default consumer flow, runs callbacks and if not halted will call process
57
+ # method of a proper backend. It is here because it interacts with the default Karafka
58
+ # call flow and needs to be overwritten to support callbacks
59
+ def call
60
+ if self.class.respond_to?(:after_fetch)
61
+ Karafka::App.monitor.instrument(
62
+ "consumers.#{Helpers::Inflector.map(self.class.to_s)}.after_fetch",
63
+ context: self
64
+ )
65
+ end
66
+
67
+ process
68
+ end
69
+ end
70
+ end
71
+ end