karafka 1.4.13 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Class used to run the Karafka listeners in separate threads
5
+ class Runner
6
+ # Starts listening on all the listeners asynchronously and handles the jobs queue closing
7
+ # after listeners are done with their work.
8
+ def call
9
+ # Despite possibility of having several independent listeners, we aim to have one queue for
10
+ # jobs across and one workers poll for that
11
+ jobs_queue = Processing::JobsQueue.new
12
+
13
+ workers = Processing::WorkersBatch.new(jobs_queue)
14
+ listeners = Connection::ListenersBatch.new(jobs_queue)
15
+
16
+ workers.each(&:async_call)
17
+ listeners.each(&:async_call)
18
+
19
+ # We aggregate threads here for a supervised shutdown process
20
+ Karafka::Server.workers = workers
21
+ Karafka::Server.listeners = listeners
22
+
23
+ # All the listener threads need to finish
24
+ listeners.each(&:join)
25
+
26
+ # We close the jobs queue only when no listener threads are working.
27
+ # This ensures, that everything was closed prior to us not accepting anymore jobs and that
28
+ # no more jobs will be enqueued. Since each listener waits for jobs to finish, once those
29
+ # are done, we can close.
30
+ jobs_queue.close
31
+
32
+ # All the workers need to stop processing anything before we can stop the runner completely
33
+ # This ensures that even async long-running jobs have time to finish before we are done
34
+ # with everything. One thing worth keeping in mind though: It is the end user responsibility
35
+ # to handle the shutdown detection in their long-running processes. Otherwise if timeout
36
+ # is exceeded, there will be a forced shutdown.
37
+ workers.each(&:join)
38
+ # If anything crashes here, we need to raise the error and crush the runner because it means
39
+ # that something terrible happened
40
+ rescue StandardError => e
41
+ Karafka.monitor.instrument(
42
+ 'error.occurred',
43
+ caller: self,
44
+ error: e,
45
+ type: 'runner.call.error'
46
+ )
47
+ Karafka::App.stop!
48
+ raise e
49
+ end
50
+ end
51
+ end
@@ -1,25 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- # Module for all supported by default serialization and deserialization ways
4
+ # Module for all supported by default serialization and deserialization ways.
5
5
  module Serialization
6
- # Namespace for json ser/der
6
+ # Namespace for json serializers and deserializers.
7
7
  module Json
8
- # Default Karafka Json deserializer for loading JSON data
8
+ # Default Karafka Json deserializer for loading JSON data.
9
9
  class Deserializer
10
- # @param params [Karafka::Params::Params] Full params object that we want to deserialize
10
+ # @param message [Karafka::Messages::Message] Message object that we want to deserialize
11
11
  # @return [Hash] hash with deserialized JSON data
12
- # @example
13
- # params = {
14
- # 'payload' => "{\"a\":1}",
15
- # 'topic' => 'my-topic',
16
- # 'headers' => { 'message_type' => :test }
17
- # }
18
- # Deserializer.call(params) #=> { 'a' => 1 }
19
- def call(params)
20
- params.raw_payload.nil? ? nil : ::JSON.parse(params.raw_payload)
21
- rescue ::JSON::ParserError => e
22
- raise ::Karafka::Errors::DeserializationError, e
12
+ def call(message)
13
+ message.raw_payload.nil? ? nil : ::JSON.parse(message.raw_payload)
23
14
  end
24
15
  end
25
16
  end
@@ -3,8 +3,6 @@
3
3
  module Karafka
4
4
  # Karafka consuming server class
5
5
  class Server
6
- @consumer_threads = Concurrent::Array.new
7
-
8
6
  # How long should we sleep between checks on shutting down consumers
9
7
  SUPERVISION_SLEEP = 0.1
10
8
  # What system exit code should we use when we terminated forcefully
@@ -17,55 +15,77 @@ module Karafka
17
15
 
18
16
  class << self
19
17
  # Set of consuming threads. Each consumer thread contains a single consumer
20
- attr_accessor :consumer_threads
18
+ attr_accessor :listeners
19
+
20
+ # Set of workers
21
+ attr_accessor :workers
21
22
 
22
23
  # Writer for list of consumer groups that we want to consume in our current process context
23
24
  attr_writer :consumer_groups
24
25
 
25
26
  # Method which runs app
26
27
  def run
27
- process.on_sigint { stop_supervised }
28
- process.on_sigquit { stop_supervised }
29
- process.on_sigterm { stop_supervised }
30
- run_supervised
28
+ # Since we do a lot of threading and queuing, we don't want to stop from the trap context
29
+ # as some things may not work there as expected, that is why we spawn a separate thread to
30
+ # handle the stopping process
31
+ process.on_sigint { Thread.new { stop } }
32
+ process.on_sigquit { Thread.new { stop } }
33
+ process.on_sigterm { Thread.new { stop } }
34
+
35
+ # Start is blocking until stop is called and when we stop, it will wait until
36
+ # all of the things are ready to stop
37
+ start
38
+
39
+ # We always need to wait for Karafka to stop here since we should wait for the stop running
40
+ # in a separate thread (or trap context) to indicate everything is closed
41
+ # Since `#start` is blocking, we were get here only after the runner is done. This will
42
+ # not add any performance degradation because of that.
43
+ Thread.pass until Karafka::App.stopped?
44
+ # Try its best to shutdown underlying components before re-raising
45
+ # rubocop:disable Lint/RescueException
46
+ rescue Exception => e
47
+ # rubocop:enable Lint/RescueException
48
+ stop
49
+
50
+ raise e
31
51
  end
32
52
 
33
53
  # @return [Array<String>] array with names of consumer groups that should be consumed in a
34
54
  # current server context
35
55
  def consumer_groups
36
- # If not specified, a server will listed on all the topics
56
+ # If not specified, a server will listen on all the topics
37
57
  @consumer_groups ||= Karafka::App.consumer_groups.map(&:name).freeze
38
58
  end
39
59
 
40
- private
41
-
42
- # @return [Karafka::Process] process wrapper instance used to catch system signal calls
43
- def process
44
- Karafka::App.config.internal.process
45
- end
46
-
47
60
  # Starts Karafka with a supervision
48
61
  # @note We don't need to sleep because Karafka::Fetcher is locking and waiting to
49
62
  # finish loop (and it won't happen until we explicitly want to stop)
50
- def run_supervised
63
+ def start
51
64
  process.supervise
52
65
  Karafka::App.run!
53
- Karafka::App.config.internal.fetcher.call
66
+ Karafka::Runner.new.call
54
67
  end
55
68
 
56
69
  # Stops Karafka with a supervision (as long as there is a shutdown timeout)
57
- # If consumers won't stop in a given time frame, it will force them to exit
58
- def stop_supervised
70
+ # If consumers or workers won't stop in a given time frame, it will force them to exit
71
+ #
72
+ # @note This method is not async. It should not be executed from the workers as it will
73
+ # lock them forever. If you need to run Karafka shutdown from within workers threads,
74
+ # please start a separate thread to do so.
75
+ def stop
59
76
  Karafka::App.stop!
60
77
 
61
- # See https://github.com/dry-rb/dry-configurable/issues/93
62
- timeout = Thread.new { Karafka::App.config.shutdown_timeout }.join.value
78
+ timeout = Karafka::App.config.shutdown_timeout
63
79
 
64
80
  # We check from time to time (for the timeout period) if all the threads finished
65
81
  # their work and if so, we can just return and normal shutdown process will take place
66
- (timeout * SUPERVISION_CHECK_FACTOR).to_i.times do
67
- if consumer_threads.count(&:alive?).zero?
68
- Thread.new { Karafka.monitor.instrument('app.stopped') }.join
82
+ # We divide it by 1000 because we use time in ms.
83
+ ((timeout / 1_000) * SUPERVISION_CHECK_FACTOR).to_i.times do
84
+ if listeners.count(&:alive?).zero? &&
85
+ workers.count(&:alive?).zero?
86
+
87
+ Karafka::App.producer.close
88
+
69
89
  return
70
90
  end
71
91
 
@@ -74,12 +94,33 @@ module Karafka
74
94
 
75
95
  raise Errors::ForcefulShutdownError
76
96
  rescue Errors::ForcefulShutdownError => e
77
- Thread.new { Karafka.monitor.instrument('app.stopping.error', error: e) }.join
97
+ Karafka.monitor.instrument(
98
+ 'error.occurred',
99
+ caller: self,
100
+ error: e,
101
+ type: 'app.stopping.error'
102
+ )
103
+
78
104
  # We're done waiting, lets kill them!
79
- consumer_threads.each(&:terminate)
105
+ workers.each(&:terminate)
106
+ listeners.each(&:terminate)
107
+ # We always need to shutdown clients to make sure we do not force the GC to close consumer.
108
+ # This can cause memory leaks and crashes.
109
+ listeners.each(&:shutdown)
110
+
111
+ Karafka::App.producer.close
80
112
 
81
113
  # exit! is not within the instrumentation as it would not trigger due to exit
82
114
  Kernel.exit! FORCEFUL_EXIT_CODE
115
+ ensure
116
+ Karafka::App.stopped!
117
+ end
118
+
119
+ private
120
+
121
+ # @return [Karafka::Process] process wrapper instance used to catch system signal calls
122
+ def process
123
+ Karafka::App.config.internal.process
83
124
  end
84
125
  end
85
126
  end
@@ -12,19 +12,46 @@ module Karafka
12
12
  # enough and will still keep the code simple
13
13
  # @see Karafka::Setup::Configurators::Base for more details about configurators api
14
14
  class Config
15
- extend Dry::Configurable
15
+ extend ::Karafka::Core::Configurable
16
16
 
17
- # Contract for checking the config provided by the user
18
- CONTRACT = Karafka::Contracts::Config.new.freeze
17
+ # Defaults for kafka settings, that will be overwritten only if not present already
18
+ KAFKA_DEFAULTS = {
19
+ 'client.id': 'karafka'
20
+ }.freeze
19
21
 
20
- private_constant :CONTRACT
22
+ # Contains settings that should not be used in production but make life easier in dev
23
+ DEV_DEFAULTS = {
24
+ # Will create non-existing topics automatically.
25
+ # Note that the broker needs to be configured with `auto.create.topics.enable=true`
26
+ # While it is not recommended in prod, it simplifies work in dev
27
+ 'allow.auto.create.topics': 'true',
28
+ # We refresh the cluster state often as newly created topics in dev may not be detected
29
+ # fast enough. Fast enough means within reasonable time to provide decent user experience
30
+ # While it's only a one time thing for new topics, it can still be irritating to have to
31
+ # restart the process.
32
+ 'topic.metadata.refresh.interval.ms': 5_000
33
+ }.freeze
34
+
35
+ private_constant :KAFKA_DEFAULTS, :DEV_DEFAULTS
21
36
 
22
37
  # Available settings
38
+
39
+ # Namespace for Pro version related license management. If you use LGPL, no need to worry
40
+ # about any of this
41
+ setting :license do
42
+ # option token [String, false] - license token issued when you acquire a Pro license
43
+ # Leave false if using the LGPL version and all is going to work just fine :)
44
+ #
45
+ # @note By using the commercial components, you accept the LICENSE-COMM commercial license
46
+ # terms and conditions
47
+ setting :token, default: false
48
+ # option entity [String] for whom we did issue the license
49
+ setting :entity, default: ''
50
+ end
51
+
23
52
  # option client_id [String] kafka client_id - used to provide
24
53
  # default Kafka groups namespaces and identify that app in kafka
25
- setting :client_id
26
- # What backend do we want to use to process messages
27
- setting :backend, default: :inline
54
+ setting :client_id, default: 'karafka'
28
55
  # option logger [Instance] logger that we want to use
29
56
  setting :logger, default: ::Karafka::Instrumentation::Logger.new
30
57
  # option monitor [Instance] monitor that we will to use (defaults to Karafka::Monitor)
@@ -33,195 +60,140 @@ module Karafka
33
60
  # or they need to maintain their own internal consumer group naming conventions, they
34
61
  # can easily do it, replacing the default client_id + consumer name pattern concept
35
62
  setting :consumer_mapper, default: Routing::ConsumerMapper.new
36
- # Mapper used to remap names of topics, so we can have a clean internal topic naming
37
- # despite using any Kafka provider that uses namespacing, etc
38
- # It needs to implement two methods:
39
- # - #incoming - for remapping from the incoming message to our internal format
40
- # - #outgoing - for remapping from internal topic name into outgoing message
41
- setting :topic_mapper, default: Routing::TopicMapper.new
42
- # Default serializer for converting whatever we want to send to kafka to json
43
- setting :serializer, default: Karafka::Serialization::Json::Serializer.new
63
+ # option [Boolean] should we reload consumers with each incoming batch thus effectively
64
+ # supporting code reload (if someone reloads code) or should we keep the persistence
65
+ setting :consumer_persistence, default: true
44
66
  # Default deserializer for converting incoming data into ruby objects
45
67
  setting :deserializer, default: Karafka::Serialization::Json::Deserializer.new
46
- # If batch_fetching is true, we will fetch kafka messages in batches instead of 1 by 1
47
- # @note Fetching does not equal consuming, see batch_consuming description for details
48
- setting :batch_fetching, default: true
49
- # If batch_consuming is true, we will have access to #params_batch instead of #params.
50
- # #params_batch will contain params received from Kafka (may be more than 1) so we can
51
- # process them in batches
52
- setting :batch_consuming, default: false
53
- # option shutdown_timeout [Integer, nil] the number of seconds after which Karafka no
54
- # longer wait for the consumers to stop gracefully but instead we force terminate
68
+ # option [String] should we start with the earliest possible offset or latest
69
+ # This will set the `auto.offset.reset` value unless present in the kafka scope
70
+ setting :initial_offset, default: 'earliest'
71
+ # option [Boolean] should we leave offset management to the user
72
+ setting :manual_offset_management, default: false
73
+ # options max_messages [Integer] how many messages do we want to fetch from Kafka in one go
74
+ setting :max_messages, default: 100
75
+ # option [Integer] number of milliseconds we can wait while fetching data
76
+ setting :max_wait_time, default: 1_000
77
+ # option shutdown_timeout [Integer] the number of milliseconds after which Karafka no
78
+ # longer waits for the consumers to stop gracefully but instead we force terminate
55
79
  # everything.
56
- setting :shutdown_timeout, default: 60
57
-
58
- # option kafka [Hash] - optional - kafka configuration options
59
- setting :kafka do
60
- # Array with at least one host
61
- setting :seed_brokers, default: %w[kafka://127.0.0.1:9092]
62
- # option session_timeout [Integer] the number of seconds after which, if a client
63
- # hasn't contacted the Kafka cluster, it will be kicked out of the group.
64
- setting :session_timeout, default: 30
65
- # Time that a given partition will be paused from fetching messages, when message
66
- # consumption fails. It allows us to process other partitions, while the error is being
67
- # resolved and also "slows" things down, so it prevents from "eating" up all messages and
68
- # consuming them with failed code. Use `nil` if you want to pause forever and never retry.
69
- setting :pause_timeout, default: 10
70
- # option pause_max_timeout [Integer, nil] the maximum number of seconds to pause for,
71
- # or `nil` if no maximum should be enforced.
72
- setting :pause_max_timeout, default: nil
73
- # option pause_exponential_backoff [Boolean] whether to enable exponential backoff
74
- setting :pause_exponential_backoff, default: false
75
- # option offset_commit_interval [Integer] the interval between offset commits,
76
- # in seconds.
77
- setting :offset_commit_interval, default: 10
78
- # option offset_commit_threshold [Integer] the number of messages that can be
79
- # processed before their offsets are committed. If zero, offset commits are
80
- # not triggered by message consumption.
81
- setting :offset_commit_threshold, default: 0
82
- # option heartbeat_interval [Integer] the interval between heartbeats; must be less
83
- # than the session window.
84
- setting :heartbeat_interval, default: 10
85
- # option offset_retention_time [Integer] The length of the retention window, known as
86
- # offset retention time
87
- setting :offset_retention_time, default: nil
88
- # option fetcher_max_queue_size [Integer] max number of items in the fetch queue that
89
- # are stored for further processing. Note, that each item in the queue represents a
90
- # response from a single broker
91
- setting :fetcher_max_queue_size, default: 10
92
- # option assignment_strategy [Object] a strategy determining the assignment of
93
- # partitions to the consumers.
94
- setting :assignment_strategy, default: Karafka::AssignmentStrategies::RoundRobin.new
95
- # option max_bytes_per_partition [Integer] the maximum amount of data fetched
96
- # from a single partition at a time.
97
- setting :max_bytes_per_partition, default: 1_048_576
98
- # whether to consume messages starting at the beginning or to just consume new messages
99
- setting :start_from_beginning, default: true
100
- # option resolve_seed_brokers [Boolean] whether to resolve each hostname of the seed
101
- # brokers
102
- setting :resolve_seed_brokers, default: false
103
- # option min_bytes [Integer] the minimum number of bytes to read before
104
- # returning messages from the server; if `max_wait_time` is reached, this
105
- # is ignored.
106
- setting :min_bytes, default: 1
107
- # option max_bytes [Integer] the maximum number of bytes to read before returning messages
108
- # from each broker.
109
- setting :max_bytes, default: 10_485_760
110
- # option max_wait_time [Integer, Float] max_wait_time is the maximum number of seconds to
111
- # wait before returning data from a single message fetch. By setting this high you also
112
- # increase the fetching throughput - and by setting it low you set a bound on latency.
113
- # This configuration overrides `min_bytes`, so you'll _always_ get data back within the
114
- # time specified. The default value is one second. If you want to have at most five
115
- # seconds of latency, set `max_wait_time` to 5. You should make sure
116
- # max_wait_time * num brokers + heartbeat_interval is less than session_timeout.
117
- setting :max_wait_time, default: 1
118
- # option automatically_mark_as_consumed [Boolean] should we automatically mark received
119
- # messages as consumed (processed) after non-error consumption
120
- setting :automatically_mark_as_consumed, default: true
121
- # option reconnect_timeout [Integer] How long should we wait before trying to reconnect to
122
- # Kafka cluster that went down (in seconds)
123
- setting :reconnect_timeout, default: 5
124
- # option connect_timeout [Integer] Sets the number of seconds to wait while connecting to
125
- # a broker for the first time. When ruby-kafka initializes, it needs to connect to at
126
- # least one host.
127
- setting :connect_timeout, default: 10
128
- # option socket_timeout [Integer] Sets the number of seconds to wait when reading from or
129
- # writing to a socket connection to a broker. After this timeout expires the connection
130
- # will be killed. Note that some Kafka operations are by definition long-running, such as
131
- # waiting for new messages to arrive in a partition, so don't set this value too low
132
- setting :socket_timeout, default: 30
133
- # option partitioner [Object, nil] the partitioner that should be used by the client
134
- setting :partitioner, default: nil
135
-
136
- # SSL authentication related settings
137
- # option ca_cert [String, nil] SSL CA certificate
138
- setting :ssl_ca_cert, default: nil
139
- # option ssl_ca_cert_file_path [String, nil] SSL CA certificate file path
140
- setting :ssl_ca_cert_file_path, default: nil
141
- # option ssl_ca_certs_from_system [Boolean] Use the CA certs from your system's default
142
- # certificate store
143
- setting :ssl_ca_certs_from_system, default: false
144
- # option ssl_verify_hostname [Boolean] Verify the hostname for client certs
145
- setting :ssl_verify_hostname, default: true
146
- # option ssl_client_cert [String, nil] SSL client certificate
147
- setting :ssl_client_cert, default: nil
148
- # option ssl_client_cert_key [String, nil] SSL client certificate password
149
- setting :ssl_client_cert_key, default: nil
150
- # option sasl_gssapi_principal [String, nil] sasl principal
151
- setting :sasl_gssapi_principal, default: nil
152
- # option sasl_gssapi_keytab [String, nil] sasl keytab
153
- setting :sasl_gssapi_keytab, default: nil
154
- # option sasl_plain_authzid [String] The authorization identity to use
155
- setting :sasl_plain_authzid, default: ''
156
- # option sasl_plain_username [String, nil] The username used to authenticate
157
- setting :sasl_plain_username, default: nil
158
- # option sasl_plain_password [String, nil] The password used to authenticate
159
- setting :sasl_plain_password, default: nil
160
- # option sasl_scram_username [String, nil] The username used to authenticate
161
- setting :sasl_scram_username, default: nil
162
- # option sasl_scram_password [String, nil] The password used to authenticate
163
- setting :sasl_scram_password, default: nil
164
- # option sasl_scram_mechanism [String, nil] Scram mechanism, either 'sha256' or 'sha512'
165
- setting :sasl_scram_mechanism, default: nil
166
- # option sasl_over_ssl [Boolean] whether to enforce SSL with SASL
167
- setting :sasl_over_ssl, default: true
168
- # option ssl_client_cert_chain [String, nil] client cert chain or nil if not used
169
- setting :ssl_client_cert_chain, default: nil
170
- # option ssl_client_cert_key_password [String, nil] the password required to read
171
- # the ssl_client_cert_key
172
- setting :ssl_client_cert_key_password, default: nil
173
- # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
174
- # implements method token.
175
- setting :sasl_oauth_token_provider, default: nil
176
- end
80
+ setting :shutdown_timeout, default: 60_000
81
+ # option [Integer] number of threads in which we want to do parallel processing
82
+ setting :concurrency, default: 5
83
+ # option [Integer] how long should we wait upon processing error
84
+ setting :pause_timeout, default: 1_000
85
+ # option [Integer] what is the max timeout in case of an exponential backoff
86
+ setting :pause_max_timeout, default: 30_000
87
+ # option [Boolean] should we use exponential backoff
88
+ setting :pause_with_exponential_backoff, default: true
89
+ # option [::WaterDrop::Producer, nil]
90
+ # Unless configured, will be created once Karafka is configured based on user Karafka setup
91
+ setting :producer, default: nil
92
+
93
+ # rdkafka default options
94
+ # @see https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
95
+ setting :kafka, default: {}
177
96
 
178
- # Namespace for internal settings that should not be modified
179
- # It's a temporary step to "declassify" several things internally before we move to a
180
- # non global state
97
+ # Namespace for internal settings that should not be modified directly
181
98
  setting :internal do
182
- # option routing_builder [Karafka::Routing::Builder] builder instance
183
- setting :routing_builder, default: Routing::Builder.new
184
99
  # option status [Karafka::Status] app status
185
100
  setting :status, default: Status.new
186
101
  # option process [Karafka::Process] process status
187
102
  # @note In the future, we need to have a single process representation for all the karafka
188
103
  # instances
189
104
  setting :process, default: Process.new
190
- # option fetcher [Karafka::Fetcher] fetcher instance
191
- setting :fetcher, default: Fetcher.new
192
- # option configurators [Array<Object>] all configurators that we want to run after
193
- # the setup
194
- setting :configurators, default: [Configurators::WaterDrop.new]
105
+
106
+ setting :routing do
107
+ # option builder [Karafka::Routing::Builder] builder instance
108
+ setting :builder, default: Routing::Builder.new
109
+ # option subscription_groups_builder [Routing::SubscriptionGroupsBuilder] subscription
110
+ # group builder
111
+ setting :subscription_groups_builder, default: Routing::SubscriptionGroupsBuilder.new
112
+ end
113
+
114
+ setting :processing do
115
+ # option scheduler [Object] scheduler we will be using
116
+ setting :scheduler, default: Processing::Scheduler.new
117
+ # option jobs_builder [Object] jobs builder we want to use
118
+ setting :jobs_builder, default: Processing::JobsBuilder.new
119
+ # option coordinator [Class] work coordinator we want to user for processing coordination
120
+ setting :coordinator_class, default: Processing::Coordinator
121
+ # option partitioner_class [Class] partitioner we use against a batch of data
122
+ setting :partitioner_class, default: Processing::Partitioner
123
+ end
124
+
125
+ # Karafka components for ActiveJob
126
+ setting :active_job do
127
+ # option dispatcher [Karafka::ActiveJob::Dispatcher] default dispatcher for ActiveJob
128
+ setting :dispatcher, default: ActiveJob::Dispatcher.new
129
+ # option job_options_contract [Karafka::Contracts::JobOptionsContract] contract for
130
+ # ensuring, that extra job options defined are valid
131
+ setting :job_options_contract, default: ActiveJob::JobOptionsContract.new
132
+ # option consumer [Class] consumer class that should be used to consume ActiveJob data
133
+ setting :consumer_class, default: ActiveJob::Consumer
134
+ end
195
135
  end
196
136
 
137
+ # This will load all the defaults that can be later overwritten.
138
+ # Thanks to that we have an initial state out of the box.
139
+ configure
140
+
197
141
  class << self
198
142
  # Configuring method
199
- # @yield Runs a block of code providing a config singleton instance to it
200
- # @yieldparam [Karafka::Setup::Config] Karafka config instance
201
- def setup
202
- configure { |config| yield(config) }
143
+ # @param block [Proc] block we want to execute with the config instance
144
+ def setup(&block)
145
+ configure(&block)
146
+ merge_kafka_defaults!(config)
147
+
148
+ Contracts::Config.new.validate!(config.to_h)
149
+
150
+ licenser = Licenser.new
151
+
152
+ # Tries to load our license gem and if present will try to load the correct license
153
+ licenser.prepare_and_verify(config.license)
154
+
155
+ configure_components
156
+
157
+ Karafka::App.initialized!
203
158
  end
204
159
 
205
- # Everything that should be initialized after the setup
206
- # Components are in karafka/config directory and are all loaded one by one
207
- # If you want to configure a next component, please add a proper file to config dir
208
- def setup_components
209
- config
210
- .internal
211
- .configurators
212
- .each { |configurator| configurator.call(config) }
160
+ private
161
+
162
+ # Propagates the kafka setting defaults unless they are already present
163
+ # This makes it easier to set some values that users usually don't change but still allows
164
+ # them to overwrite the whole hash if they want to
165
+ # @param config [Karafka::Core::Configurable::Node] config of this producer
166
+ def merge_kafka_defaults!(config)
167
+ KAFKA_DEFAULTS.each do |key, value|
168
+ next if config.kafka.key?(key)
169
+
170
+ config.kafka[key] = value
171
+ end
172
+
173
+ return if Karafka::App.env.production?
174
+
175
+ DEV_DEFAULTS.each do |key, value|
176
+ next if config.kafka.key?(key)
177
+
178
+ config.kafka[key] = value
179
+ end
213
180
  end
214
181
 
215
- # Validate config based on the config contract
216
- # @return [Boolean] true if configuration is valid
217
- # @raise [Karafka::Errors::InvalidConfigurationError] raised when configuration
218
- # doesn't match with the config contract
219
- def validate!
220
- validation_result = CONTRACT.call(config.to_h)
182
+ # Sets up all the components that are based on the user configuration
183
+ # @note At the moment it is only WaterDrop
184
+ def configure_components
185
+ config.producer ||= ::WaterDrop::Producer.new do |producer_config|
186
+ # In some cases WaterDrop updates the config and we don't want our consumer config to
187
+ # be polluted by those updates, that's why we copy
188
+ producer_config.kafka = config.kafka.dup
189
+ producer_config.logger = config.logger
190
+ end
221
191
 
222
- return true if validation_result.success?
192
+ return unless Karafka.pro?
223
193
 
224
- raise Errors::InvalidConfigurationError, validation_result.errors.to_h
194
+ # Runs the pro loader that includes all the pro components
195
+ require 'karafka/pro/loader'
196
+ Pro::Loader.setup(config)
225
197
  end
226
198
  end
227
199
  end
@@ -3,16 +3,22 @@
3
3
  module Karafka
4
4
  # App status monitor
5
5
  class Status
6
- # Available states and their transitions
6
+ # Available states and their transitions.
7
7
  STATES = {
8
8
  initializing: :initialize!,
9
9
  initialized: :initialized!,
10
10
  running: :run!,
11
- stopping: :stop!
11
+ stopping: :stop!,
12
+ stopped: :stopped!
12
13
  }.freeze
13
14
 
14
15
  private_constant :STATES
15
16
 
17
+ # By default we are in the initializing state
18
+ def initialize
19
+ initialize!
20
+ end
21
+
16
22
  STATES.each do |state, transition|
17
23
  define_method :"#{state}?" do
18
24
  @status == state
@@ -20,9 +26,12 @@ module Karafka
20
26
 
21
27
  define_method transition do
22
28
  @status = state
23
- # Trap context disallows to run certain things that we instrument
24
- # so the state changes are executed from a separate thread
25
- Thread.new { Karafka.monitor.instrument("app.#{state}") }.join
29
+
30
+ # Skip on creation (initializing)
31
+ # We skip as during this state we do not have yet a monitor
32
+ return if initializing?
33
+
34
+ Karafka.monitor.instrument("app.#{state}")
26
35
  end
27
36
  end
28
37
  end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Example consumer that prints messages payloads
4
+ class ExampleConsumer < ApplicationConsumer
5
+ def consume
6
+ messages.each { |message| puts message.payload }
7
+ end
8
+
9
+ # Run anything upon partition being revoked
10
+ # def revoked
11
+ # end
12
+
13
+ # Define here any teardown things you want when Karafka server stops
14
+ # def shutdown
15
+ # end
16
+ end