karafka 1.4.13 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Class used to run the Karafka listeners in separate threads
5
+ class Runner
6
+ # Starts listening on all the listeners asynchronously and handles the jobs queue closing
7
+ # after listeners are done with their work.
8
+ def call
9
+ # Despite possibility of having several independent listeners, we aim to have one queue for
10
+ # jobs across and one workers poll for that
11
+ jobs_queue = Processing::JobsQueue.new
12
+
13
+ workers = Processing::WorkersBatch.new(jobs_queue)
14
+ listeners = Connection::ListenersBatch.new(jobs_queue)
15
+
16
+ workers.each(&:async_call)
17
+ listeners.each(&:async_call)
18
+
19
+ # We aggregate threads here for a supervised shutdown process
20
+ Karafka::Server.workers = workers
21
+ Karafka::Server.listeners = listeners
22
+
23
+ # All the listener threads need to finish
24
+ listeners.each(&:join)
25
+
26
+ # We close the jobs queue only when no listener threads are working.
27
+ # This ensures, that everything was closed prior to us not accepting anymore jobs and that
28
+ # no more jobs will be enqueued. Since each listener waits for jobs to finish, once those
29
+ # are done, we can close.
30
+ jobs_queue.close
31
+
32
+ # All the workers need to stop processing anything before we can stop the runner completely
33
+ # This ensures that even async long-running jobs have time to finish before we are done
34
+ # with everything. One thing worth keeping in mind though: It is the end user responsibility
35
+ # to handle the shutdown detection in their long-running processes. Otherwise if timeout
36
+ # is exceeded, there will be a forced shutdown.
37
+ workers.each(&:join)
38
+ # If anything crashes here, we need to raise the error and crush the runner because it means
39
+ # that something terrible happened
40
+ rescue StandardError => e
41
+ Karafka.monitor.instrument(
42
+ 'error.occurred',
43
+ caller: self,
44
+ error: e,
45
+ type: 'runner.call.error'
46
+ )
47
+ Karafka::App.stop!
48
+ raise e
49
+ end
50
+ end
51
+ end
@@ -1,25 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- # Module for all supported by default serialization and deserialization ways
4
+ # Module for all supported by default serialization and deserialization ways.
5
5
  module Serialization
6
- # Namespace for json ser/der
6
+ # Namespace for json serializers and deserializers.
7
7
  module Json
8
- # Default Karafka Json deserializer for loading JSON data
8
+ # Default Karafka Json deserializer for loading JSON data.
9
9
  class Deserializer
10
- # @param params [Karafka::Params::Params] Full params object that we want to deserialize
10
+ # @param message [Karafka::Messages::Message] Message object that we want to deserialize
11
11
  # @return [Hash] hash with deserialized JSON data
12
- # @example
13
- # params = {
14
- # 'payload' => "{\"a\":1}",
15
- # 'topic' => 'my-topic',
16
- # 'headers' => { 'message_type' => :test }
17
- # }
18
- # Deserializer.call(params) #=> { 'a' => 1 }
19
- def call(params)
20
- params.raw_payload.nil? ? nil : ::JSON.parse(params.raw_payload)
21
- rescue ::JSON::ParserError => e
22
- raise ::Karafka::Errors::DeserializationError, e
12
+ def call(message)
13
+ message.raw_payload.nil? ? nil : ::JSON.parse(message.raw_payload)
23
14
  end
24
15
  end
25
16
  end
@@ -3,8 +3,6 @@
3
3
  module Karafka
4
4
  # Karafka consuming server class
5
5
  class Server
6
- @consumer_threads = Concurrent::Array.new
7
-
8
6
  # How long should we sleep between checks on shutting down consumers
9
7
  SUPERVISION_SLEEP = 0.1
10
8
  # What system exit code should we use when we terminated forcefully
@@ -17,55 +15,77 @@ module Karafka
17
15
 
18
16
  class << self
19
17
  # Set of consuming threads. Each consumer thread contains a single consumer
20
- attr_accessor :consumer_threads
18
+ attr_accessor :listeners
19
+
20
+ # Set of workers
21
+ attr_accessor :workers
21
22
 
22
23
  # Writer for list of consumer groups that we want to consume in our current process context
23
24
  attr_writer :consumer_groups
24
25
 
25
26
  # Method which runs app
26
27
  def run
27
- process.on_sigint { stop_supervised }
28
- process.on_sigquit { stop_supervised }
29
- process.on_sigterm { stop_supervised }
30
- run_supervised
28
+ # Since we do a lot of threading and queuing, we don't want to stop from the trap context
29
+ # as some things may not work there as expected, that is why we spawn a separate thread to
30
+ # handle the stopping process
31
+ process.on_sigint { Thread.new { stop } }
32
+ process.on_sigquit { Thread.new { stop } }
33
+ process.on_sigterm { Thread.new { stop } }
34
+
35
+ # Start is blocking until stop is called and when we stop, it will wait until
36
+ # all of the things are ready to stop
37
+ start
38
+
39
+ # We always need to wait for Karafka to stop here since we should wait for the stop running
40
+ # in a separate thread (or trap context) to indicate everything is closed
41
+ # Since `#start` is blocking, we were get here only after the runner is done. This will
42
+ # not add any performance degradation because of that.
43
+ Thread.pass until Karafka::App.stopped?
44
+ # Try its best to shutdown underlying components before re-raising
45
+ # rubocop:disable Lint/RescueException
46
+ rescue Exception => e
47
+ # rubocop:enable Lint/RescueException
48
+ stop
49
+
50
+ raise e
31
51
  end
32
52
 
33
53
  # @return [Array<String>] array with names of consumer groups that should be consumed in a
34
54
  # current server context
35
55
  def consumer_groups
36
- # If not specified, a server will listed on all the topics
56
+ # If not specified, a server will listen on all the topics
37
57
  @consumer_groups ||= Karafka::App.consumer_groups.map(&:name).freeze
38
58
  end
39
59
 
40
- private
41
-
42
- # @return [Karafka::Process] process wrapper instance used to catch system signal calls
43
- def process
44
- Karafka::App.config.internal.process
45
- end
46
-
47
60
  # Starts Karafka with a supervision
48
61
  # @note We don't need to sleep because Karafka::Fetcher is locking and waiting to
49
62
  # finish loop (and it won't happen until we explicitly want to stop)
50
- def run_supervised
63
+ def start
51
64
  process.supervise
52
65
  Karafka::App.run!
53
- Karafka::App.config.internal.fetcher.call
66
+ Karafka::Runner.new.call
54
67
  end
55
68
 
56
69
  # Stops Karafka with a supervision (as long as there is a shutdown timeout)
57
- # If consumers won't stop in a given time frame, it will force them to exit
58
- def stop_supervised
70
+ # If consumers or workers won't stop in a given time frame, it will force them to exit
71
+ #
72
+ # @note This method is not async. It should not be executed from the workers as it will
73
+ # lock them forever. If you need to run Karafka shutdown from within workers threads,
74
+ # please start a separate thread to do so.
75
+ def stop
59
76
  Karafka::App.stop!
60
77
 
61
- # See https://github.com/dry-rb/dry-configurable/issues/93
62
- timeout = Thread.new { Karafka::App.config.shutdown_timeout }.join.value
78
+ timeout = Karafka::App.config.shutdown_timeout
63
79
 
64
80
  # We check from time to time (for the timeout period) if all the threads finished
65
81
  # their work and if so, we can just return and normal shutdown process will take place
66
- (timeout * SUPERVISION_CHECK_FACTOR).to_i.times do
67
- if consumer_threads.count(&:alive?).zero?
68
- Thread.new { Karafka.monitor.instrument('app.stopped') }.join
82
+ # We divide it by 1000 because we use time in ms.
83
+ ((timeout / 1_000) * SUPERVISION_CHECK_FACTOR).to_i.times do
84
+ if listeners.count(&:alive?).zero? &&
85
+ workers.count(&:alive?).zero?
86
+
87
+ Karafka::App.producer.close
88
+
69
89
  return
70
90
  end
71
91
 
@@ -74,12 +94,33 @@ module Karafka
74
94
 
75
95
  raise Errors::ForcefulShutdownError
76
96
  rescue Errors::ForcefulShutdownError => e
77
- Thread.new { Karafka.monitor.instrument('app.stopping.error', error: e) }.join
97
+ Karafka.monitor.instrument(
98
+ 'error.occurred',
99
+ caller: self,
100
+ error: e,
101
+ type: 'app.stopping.error'
102
+ )
103
+
78
104
  # We're done waiting, lets kill them!
79
- consumer_threads.each(&:terminate)
105
+ workers.each(&:terminate)
106
+ listeners.each(&:terminate)
107
+ # We always need to shutdown clients to make sure we do not force the GC to close consumer.
108
+ # This can cause memory leaks and crashes.
109
+ listeners.each(&:shutdown)
110
+
111
+ Karafka::App.producer.close
80
112
 
81
113
  # exit! is not within the instrumentation as it would not trigger due to exit
82
114
  Kernel.exit! FORCEFUL_EXIT_CODE
115
+ ensure
116
+ Karafka::App.stopped!
117
+ end
118
+
119
+ private
120
+
121
+ # @return [Karafka::Process] process wrapper instance used to catch system signal calls
122
+ def process
123
+ Karafka::App.config.internal.process
83
124
  end
84
125
  end
85
126
  end
@@ -12,19 +12,46 @@ module Karafka
12
12
  # enough and will still keep the code simple
13
13
  # @see Karafka::Setup::Configurators::Base for more details about configurators api
14
14
  class Config
15
- extend Dry::Configurable
15
+ extend ::Karafka::Core::Configurable
16
16
 
17
- # Contract for checking the config provided by the user
18
- CONTRACT = Karafka::Contracts::Config.new.freeze
17
+ # Defaults for kafka settings, that will be overwritten only if not present already
18
+ KAFKA_DEFAULTS = {
19
+ 'client.id': 'karafka'
20
+ }.freeze
19
21
 
20
- private_constant :CONTRACT
22
+ # Contains settings that should not be used in production but make life easier in dev
23
+ DEV_DEFAULTS = {
24
+ # Will create non-existing topics automatically.
25
+ # Note that the broker needs to be configured with `auto.create.topics.enable=true`
26
+ # While it is not recommended in prod, it simplifies work in dev
27
+ 'allow.auto.create.topics': 'true',
28
+ # We refresh the cluster state often as newly created topics in dev may not be detected
29
+ # fast enough. Fast enough means within reasonable time to provide decent user experience
30
+ # While it's only a one time thing for new topics, it can still be irritating to have to
31
+ # restart the process.
32
+ 'topic.metadata.refresh.interval.ms': 5_000
33
+ }.freeze
34
+
35
+ private_constant :KAFKA_DEFAULTS, :DEV_DEFAULTS
21
36
 
22
37
  # Available settings
38
+
39
+ # Namespace for Pro version related license management. If you use LGPL, no need to worry
40
+ # about any of this
41
+ setting :license do
42
+ # option token [String, false] - license token issued when you acquire a Pro license
43
+ # Leave false if using the LGPL version and all is going to work just fine :)
44
+ #
45
+ # @note By using the commercial components, you accept the LICENSE-COMM commercial license
46
+ # terms and conditions
47
+ setting :token, default: false
48
+ # option entity [String] for whom we did issue the license
49
+ setting :entity, default: ''
50
+ end
51
+
23
52
  # option client_id [String] kafka client_id - used to provide
24
53
  # default Kafka groups namespaces and identify that app in kafka
25
- setting :client_id
26
- # What backend do we want to use to process messages
27
- setting :backend, default: :inline
54
+ setting :client_id, default: 'karafka'
28
55
  # option logger [Instance] logger that we want to use
29
56
  setting :logger, default: ::Karafka::Instrumentation::Logger.new
30
57
  # option monitor [Instance] monitor that we will to use (defaults to Karafka::Monitor)
@@ -33,195 +60,140 @@ module Karafka
33
60
  # or they need to maintain their own internal consumer group naming conventions, they
34
61
  # can easily do it, replacing the default client_id + consumer name pattern concept
35
62
  setting :consumer_mapper, default: Routing::ConsumerMapper.new
36
- # Mapper used to remap names of topics, so we can have a clean internal topic naming
37
- # despite using any Kafka provider that uses namespacing, etc
38
- # It needs to implement two methods:
39
- # - #incoming - for remapping from the incoming message to our internal format
40
- # - #outgoing - for remapping from internal topic name into outgoing message
41
- setting :topic_mapper, default: Routing::TopicMapper.new
42
- # Default serializer for converting whatever we want to send to kafka to json
43
- setting :serializer, default: Karafka::Serialization::Json::Serializer.new
63
+ # option [Boolean] should we reload consumers with each incoming batch thus effectively
64
+ # supporting code reload (if someone reloads code) or should we keep the persistence
65
+ setting :consumer_persistence, default: true
44
66
  # Default deserializer for converting incoming data into ruby objects
45
67
  setting :deserializer, default: Karafka::Serialization::Json::Deserializer.new
46
- # If batch_fetching is true, we will fetch kafka messages in batches instead of 1 by 1
47
- # @note Fetching does not equal consuming, see batch_consuming description for details
48
- setting :batch_fetching, default: true
49
- # If batch_consuming is true, we will have access to #params_batch instead of #params.
50
- # #params_batch will contain params received from Kafka (may be more than 1) so we can
51
- # process them in batches
52
- setting :batch_consuming, default: false
53
- # option shutdown_timeout [Integer, nil] the number of seconds after which Karafka no
54
- # longer wait for the consumers to stop gracefully but instead we force terminate
68
+ # option [String] should we start with the earliest possible offset or latest
69
+ # This will set the `auto.offset.reset` value unless present in the kafka scope
70
+ setting :initial_offset, default: 'earliest'
71
+ # option [Boolean] should we leave offset management to the user
72
+ setting :manual_offset_management, default: false
73
+ # options max_messages [Integer] how many messages do we want to fetch from Kafka in one go
74
+ setting :max_messages, default: 100
75
+ # option [Integer] number of milliseconds we can wait while fetching data
76
+ setting :max_wait_time, default: 1_000
77
+ # option shutdown_timeout [Integer] the number of milliseconds after which Karafka no
78
+ # longer waits for the consumers to stop gracefully but instead we force terminate
55
79
  # everything.
56
- setting :shutdown_timeout, default: 60
57
-
58
- # option kafka [Hash] - optional - kafka configuration options
59
- setting :kafka do
60
- # Array with at least one host
61
- setting :seed_brokers, default: %w[kafka://127.0.0.1:9092]
62
- # option session_timeout [Integer] the number of seconds after which, if a client
63
- # hasn't contacted the Kafka cluster, it will be kicked out of the group.
64
- setting :session_timeout, default: 30
65
- # Time that a given partition will be paused from fetching messages, when message
66
- # consumption fails. It allows us to process other partitions, while the error is being
67
- # resolved and also "slows" things down, so it prevents from "eating" up all messages and
68
- # consuming them with failed code. Use `nil` if you want to pause forever and never retry.
69
- setting :pause_timeout, default: 10
70
- # option pause_max_timeout [Integer, nil] the maximum number of seconds to pause for,
71
- # or `nil` if no maximum should be enforced.
72
- setting :pause_max_timeout, default: nil
73
- # option pause_exponential_backoff [Boolean] whether to enable exponential backoff
74
- setting :pause_exponential_backoff, default: false
75
- # option offset_commit_interval [Integer] the interval between offset commits,
76
- # in seconds.
77
- setting :offset_commit_interval, default: 10
78
- # option offset_commit_threshold [Integer] the number of messages that can be
79
- # processed before their offsets are committed. If zero, offset commits are
80
- # not triggered by message consumption.
81
- setting :offset_commit_threshold, default: 0
82
- # option heartbeat_interval [Integer] the interval between heartbeats; must be less
83
- # than the session window.
84
- setting :heartbeat_interval, default: 10
85
- # option offset_retention_time [Integer] The length of the retention window, known as
86
- # offset retention time
87
- setting :offset_retention_time, default: nil
88
- # option fetcher_max_queue_size [Integer] max number of items in the fetch queue that
89
- # are stored for further processing. Note, that each item in the queue represents a
90
- # response from a single broker
91
- setting :fetcher_max_queue_size, default: 10
92
- # option assignment_strategy [Object] a strategy determining the assignment of
93
- # partitions to the consumers.
94
- setting :assignment_strategy, default: Karafka::AssignmentStrategies::RoundRobin.new
95
- # option max_bytes_per_partition [Integer] the maximum amount of data fetched
96
- # from a single partition at a time.
97
- setting :max_bytes_per_partition, default: 1_048_576
98
- # whether to consume messages starting at the beginning or to just consume new messages
99
- setting :start_from_beginning, default: true
100
- # option resolve_seed_brokers [Boolean] whether to resolve each hostname of the seed
101
- # brokers
102
- setting :resolve_seed_brokers, default: false
103
- # option min_bytes [Integer] the minimum number of bytes to read before
104
- # returning messages from the server; if `max_wait_time` is reached, this
105
- # is ignored.
106
- setting :min_bytes, default: 1
107
- # option max_bytes [Integer] the maximum number of bytes to read before returning messages
108
- # from each broker.
109
- setting :max_bytes, default: 10_485_760
110
- # option max_wait_time [Integer, Float] max_wait_time is the maximum number of seconds to
111
- # wait before returning data from a single message fetch. By setting this high you also
112
- # increase the fetching throughput - and by setting it low you set a bound on latency.
113
- # This configuration overrides `min_bytes`, so you'll _always_ get data back within the
114
- # time specified. The default value is one second. If you want to have at most five
115
- # seconds of latency, set `max_wait_time` to 5. You should make sure
116
- # max_wait_time * num brokers + heartbeat_interval is less than session_timeout.
117
- setting :max_wait_time, default: 1
118
- # option automatically_mark_as_consumed [Boolean] should we automatically mark received
119
- # messages as consumed (processed) after non-error consumption
120
- setting :automatically_mark_as_consumed, default: true
121
- # option reconnect_timeout [Integer] How long should we wait before trying to reconnect to
122
- # Kafka cluster that went down (in seconds)
123
- setting :reconnect_timeout, default: 5
124
- # option connect_timeout [Integer] Sets the number of seconds to wait while connecting to
125
- # a broker for the first time. When ruby-kafka initializes, it needs to connect to at
126
- # least one host.
127
- setting :connect_timeout, default: 10
128
- # option socket_timeout [Integer] Sets the number of seconds to wait when reading from or
129
- # writing to a socket connection to a broker. After this timeout expires the connection
130
- # will be killed. Note that some Kafka operations are by definition long-running, such as
131
- # waiting for new messages to arrive in a partition, so don't set this value too low
132
- setting :socket_timeout, default: 30
133
- # option partitioner [Object, nil] the partitioner that should be used by the client
134
- setting :partitioner, default: nil
135
-
136
- # SSL authentication related settings
137
- # option ca_cert [String, nil] SSL CA certificate
138
- setting :ssl_ca_cert, default: nil
139
- # option ssl_ca_cert_file_path [String, nil] SSL CA certificate file path
140
- setting :ssl_ca_cert_file_path, default: nil
141
- # option ssl_ca_certs_from_system [Boolean] Use the CA certs from your system's default
142
- # certificate store
143
- setting :ssl_ca_certs_from_system, default: false
144
- # option ssl_verify_hostname [Boolean] Verify the hostname for client certs
145
- setting :ssl_verify_hostname, default: true
146
- # option ssl_client_cert [String, nil] SSL client certificate
147
- setting :ssl_client_cert, default: nil
148
- # option ssl_client_cert_key [String, nil] SSL client certificate password
149
- setting :ssl_client_cert_key, default: nil
150
- # option sasl_gssapi_principal [String, nil] sasl principal
151
- setting :sasl_gssapi_principal, default: nil
152
- # option sasl_gssapi_keytab [String, nil] sasl keytab
153
- setting :sasl_gssapi_keytab, default: nil
154
- # option sasl_plain_authzid [String] The authorization identity to use
155
- setting :sasl_plain_authzid, default: ''
156
- # option sasl_plain_username [String, nil] The username used to authenticate
157
- setting :sasl_plain_username, default: nil
158
- # option sasl_plain_password [String, nil] The password used to authenticate
159
- setting :sasl_plain_password, default: nil
160
- # option sasl_scram_username [String, nil] The username used to authenticate
161
- setting :sasl_scram_username, default: nil
162
- # option sasl_scram_password [String, nil] The password used to authenticate
163
- setting :sasl_scram_password, default: nil
164
- # option sasl_scram_mechanism [String, nil] Scram mechanism, either 'sha256' or 'sha512'
165
- setting :sasl_scram_mechanism, default: nil
166
- # option sasl_over_ssl [Boolean] whether to enforce SSL with SASL
167
- setting :sasl_over_ssl, default: true
168
- # option ssl_client_cert_chain [String, nil] client cert chain or nil if not used
169
- setting :ssl_client_cert_chain, default: nil
170
- # option ssl_client_cert_key_password [String, nil] the password required to read
171
- # the ssl_client_cert_key
172
- setting :ssl_client_cert_key_password, default: nil
173
- # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
174
- # implements method token.
175
- setting :sasl_oauth_token_provider, default: nil
176
- end
80
+ setting :shutdown_timeout, default: 60_000
81
+ # option [Integer] number of threads in which we want to do parallel processing
82
+ setting :concurrency, default: 5
83
+ # option [Integer] how long should we wait upon processing error
84
+ setting :pause_timeout, default: 1_000
85
+ # option [Integer] what is the max timeout in case of an exponential backoff
86
+ setting :pause_max_timeout, default: 30_000
87
+ # option [Boolean] should we use exponential backoff
88
+ setting :pause_with_exponential_backoff, default: true
89
+ # option [::WaterDrop::Producer, nil]
90
+ # Unless configured, will be created once Karafka is configured based on user Karafka setup
91
+ setting :producer, default: nil
92
+
93
+ # rdkafka default options
94
+ # @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
95
+ setting :kafka, default: {}
177
96
 
178
- # Namespace for internal settings that should not be modified
179
- # It's a temporary step to "declassify" several things internally before we move to a
180
- # non global state
97
+ # Namespace for internal settings that should not be modified directly
181
98
  setting :internal do
182
- # option routing_builder [Karafka::Routing::Builder] builder instance
183
- setting :routing_builder, default: Routing::Builder.new
184
99
  # option status [Karafka::Status] app status
185
100
  setting :status, default: Status.new
186
101
  # option process [Karafka::Process] process status
187
102
  # @note In the future, we need to have a single process representation for all the karafka
188
103
  # instances
189
104
  setting :process, default: Process.new
190
- # option fetcher [Karafka::Fetcher] fetcher instance
191
- setting :fetcher, default: Fetcher.new
192
- # option configurators [Array<Object>] all configurators that we want to run after
193
- # the setup
194
- setting :configurators, default: [Configurators::WaterDrop.new]
105
+
106
+ setting :routing do
107
+ # option builder [Karafka::Routing::Builder] builder instance
108
+ setting :builder, default: Routing::Builder.new
109
+ # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
110
+ # group builder
111
+ setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
112
+ end
113
+
114
+ setting :processing do
115
+ # option scheduler [Object] scheduler we will be using
116
+ setting :scheduler, default: Processing::Scheduler.new
117
+ # option jobs_builder [Object] jobs builder we want to use
118
+ setting :jobs_builder, default: Processing::JobsBuilder.new
119
+ # option coordinator [Class] work coordinator we want to user for processing coordination
120
+ setting :coordinator_class, default: Processing::Coordinator
121
+ # option partitioner_class [Class] partitioner we use against a batch of data
122
+ setting :partitioner_class, default: Processing::Partitioner
123
+ end
124
+
125
+ # Karafka components for ActiveJob
126
+ setting :active_job do
127
+ # option dispatcher [Karafka::ActiveJob::Dispatcher] default dispatcher for ActiveJob
128
+ setting :dispatcher, default: ActiveJob::Dispatcher.new
129
+ # option job_options_contract [Karafka::Contracts::JobOptionsContract] contract for
130
+ # ensuring, that extra job options defined are valid
131
+ setting :job_options_contract, default: ActiveJob::JobOptionsContract.new
132
+ # option consumer [Class] consumer class that should be used to consume ActiveJob data
133
+ setting :consumer_class, default: ActiveJob::Consumer
134
+ end
195
135
  end
196
136
 
137
+ # This will load all the defaults that can be later overwritten.
138
+ # Thanks to that we have an initial state out of the box.
139
+ configure
140
+
197
141
  class << self
198
142
  # Configuring method
199
- # @yield Runs a block of code providing a config singleton instance to it
200
- # @yieldparam [Karafka::Setup::Config] Karafka config instance
201
- def setup
202
- configure { |config| yield(config) }
143
+ # @param block [Proc] block we want to execute with the config instance
144
+ def setup(&block)
145
+ configure(&block)
146
+ merge_kafka_defaults!(config)
147
+
148
+ Contracts::Config.new.validate!(config.to_h)
149
+
150
+ licenser = Licenser.new
151
+
152
+ # Tries to load our license gem and if present will try to load the correct license
153
+ licenser.prepare_and_verify(config.license)
154
+
155
+ configure_components
156
+
157
+ Karafka::App.initialized!
203
158
  end
204
159
 
205
- # Everything that should be initialized after the setup
206
- # Components are in karafka/config directory and are all loaded one by one
207
- # If you want to configure a next component, please add a proper file to config dir
208
- def setup_components
209
- config
210
- .internal
211
- .configurators
212
- .each { |configurator| configurator.call(config) }
160
+ private
161
+
162
+ # Propagates the kafka setting defaults unless they are already present
163
+ # This makes it easier to set some values that users usually don't change but still allows
164
+ # them to overwrite the whole hash if they want to
165
+ # @param config [Karafka::Core::Configurable::Node] config of this producer
166
+ def merge_kafka_defaults!(config)
167
+ KAFKA_DEFAULTS.each do |key, value|
168
+ next if config.kafka.key?(key)
169
+
170
+ config.kafka[key] = value
171
+ end
172
+
173
+ return if Karafka::App.env.production?
174
+
175
+ DEV_DEFAULTS.each do |key, value|
176
+ next if config.kafka.key?(key)
177
+
178
+ config.kafka[key] = value
179
+ end
213
180
  end
214
181
 
215
- # Validate config based on the config contract
216
- # @return [Boolean] true if configuration is valid
217
- # @raise [Karafka::Errors::InvalidConfigurationError] raised when configuration
218
- # doesn't match with the config contract
219
- def validate!
220
- validation_result = CONTRACT.call(config.to_h)
182
+ # Sets up all the components that are based on the user configuration
183
+ # @note At the moment it is only WaterDrop
184
+ def configure_components
185
+ config.producer ||= ::WaterDrop::Producer.new do |producer_config|
186
+ # In some cases WaterDrop updates the config and we don't want our consumer config to
187
+ # be polluted by those updates, that's why we copy
188
+ producer_config.kafka = config.kafka.dup
189
+ producer_config.logger = config.logger
190
+ end
221
191
 
222
- return true if validation_result.success?
192
+ return unless Karafka.pro?
223
193
 
224
- raise Errors::InvalidConfigurationError, validation_result.errors.to_h
194
+ # Runs the pro loader that includes all the pro components
195
+ require 'karafka/pro/loader'
196
+ Pro::Loader.setup(config)
225
197
  end
226
198
  end
227
199
  end
@@ -3,16 +3,22 @@
3
3
  module Karafka
4
4
  # App status monitor
5
5
  class Status
6
- # Available states and their transitions
6
+ # Available states and their transitions.
7
7
  STATES = {
8
8
  initializing: :initialize!,
9
9
  initialized: :initialized!,
10
10
  running: :run!,
11
- stopping: :stop!
11
+ stopping: :stop!,
12
+ stopped: :stopped!
12
13
  }.freeze
13
14
 
14
15
  private_constant :STATES
15
16
 
17
+ # By default we are in the initializing state
18
+ def initialize
19
+ initialize!
20
+ end
21
+
16
22
  STATES.each do |state, transition|
17
23
  define_method :"#{state}?" do
18
24
  @status == state
@@ -20,9 +26,12 @@ module Karafka
20
26
 
21
27
  define_method transition do
22
28
  @status = state
23
- # Trap context disallows to run certain things that we instrument
24
- # so the state changes are executed from a separate thread
25
- Thread.new { Karafka.monitor.instrument("app.#{state}") }.join
29
+
30
+ # Skip on creation (initializing)
31
+ # We skip as during this state we do not have yet a monitor
32
+ return if initializing?
33
+
34
+ Karafka.monitor.instrument("app.#{state}")
26
35
  end
27
36
  end
28
37
  end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Example consumer that prints messages payloads
4
+ class ExampleConsumer < ApplicationConsumer
5
+ def consume
6
+ messages.each { |message| puts message.payload }
7
+ end
8
+
9
+ # Run anything upon partition being revoked
10
+ # def revoked
11
+ # end
12
+
13
+ # Define here any teardown things you want when Karafka server stops
14
+ # def shutdown
15
+ # end
16
+ end