karafka 1.4.13 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Contracts
15
+ # Contract for validating correct Pro components setup on a consumer group and topic levels
16
+ class ConsumerGroup < Base
17
+ virtual do |data, errors|
18
+ next unless errors.empty?
19
+ next unless data.key?(:topics)
20
+
21
+ fetched_errors = []
22
+
23
+ data.fetch(:topics).each do |topic|
24
+ ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
25
+ fetched_errors << [[topic, key].flatten, value]
26
+ end
27
+ end
28
+
29
+ fetched_errors
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Contracts
15
+ # Contract for validating correct Pro components setup on a topic levels
16
+ class ConsumerGroupTopic < Base
17
+ configure do |config|
18
+ config.error_messages = YAML.safe_load(
19
+ File.read(
20
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
21
+ )
22
+ ).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
23
+ end
24
+
25
+ virtual do |data|
26
+ next if data[:consumer] < Karafka::Pro::BaseConsumer
27
+
28
+ [[%i[consumer], :consumer_format]]
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Loader requires and loads all the pro components only when they are needed
15
+ class Loader
16
+ # All the pro components that need to be loaded
17
+ COMPONENTS = %w[
18
+ base_consumer
19
+ performance_tracker
20
+ processing/scheduler
21
+ processing/jobs/consume_non_blocking
22
+ processing/jobs_builder
23
+ processing/coordinator
24
+ processing/partitioner
25
+ contracts/base
26
+ contracts/consumer_group
27
+ contracts/consumer_group_topic
28
+ routing/topic_extensions
29
+ routing/builder_extensions
30
+ active_job/consumer
31
+ active_job/dispatcher
32
+ active_job/job_options_contract
33
+ ].freeze
34
+
35
+ private_constant :COMPONENTS
36
+
37
+ class << self
38
+ # Loads all the pro components and configures them wherever it is expected
39
+ # @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
40
+ # components
41
+ def setup(config)
42
+ COMPONENTS.each { |component| require_relative(component) }
43
+
44
+ reconfigure(config)
45
+
46
+ load_routing_extensions
47
+ end
48
+
49
+ private
50
+
51
+ # Sets proper config options to use pro components
52
+ # @param config [WaterDrop::Configurable::Node] root config node
53
+ def reconfigure(config)
54
+ icfg = config.internal
55
+
56
+ icfg.processing.coordinator_class = Processing::Coordinator
57
+ icfg.processing.partitioner_class = Processing::Partitioner
58
+ icfg.processing.scheduler = Processing::Scheduler.new
59
+ icfg.processing.jobs_builder = Processing::JobsBuilder.new
60
+
61
+ icfg.active_job.consumer_class = ActiveJob::Consumer
62
+ icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
63
+ icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
64
+
65
+ config.monitor.subscribe(PerformanceTracker.instance)
66
+ end
67
+
68
+ # Loads routing extensions
69
+ def load_routing_extensions
70
+ ::Karafka::Routing::Topic.include(Routing::TopicExtensions)
71
+ ::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Tracker used to keep track of performance metrics
15
+ # It provides insights that can be used to optimize processing flow
16
+ class PerformanceTracker
17
+ include Singleton
18
+
19
+ # How many samples do we collect per topic partition
20
+ SAMPLES_COUNT = 200
21
+
22
+ private_constant :SAMPLES_COUNT
23
+
24
+ # Builds up nested concurrent hash for data tracking
25
+ def initialize
26
+ @processing_times = Concurrent::Hash.new do |topics_hash, topic|
27
+ topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
28
+ # This array does not have to be concurrent because we always access single partition
29
+ # data via instrumentation that operates in a single thread via consumer
30
+ partitions_hash[partition] = []
31
+ end
32
+ end
33
+ end
34
+
35
+ # @param topic [String]
36
+ # @param partition [Integer]
37
+ # @return [Float] p95 processing time of a single message from a single topic partition
38
+ def processing_time_p95(topic, partition)
39
+ values = @processing_times[topic][partition]
40
+
41
+ return 0 if values.empty?
42
+ return values.first if values.size == 1
43
+
44
+ percentile(0.95, values)
45
+ end
46
+
47
+ # @private
48
+ # @param event [Karafka::Core::Monitoring::Event] event details
49
+ # Tracks time taken to process a single message of a given topic partition
50
+ def on_consumer_consumed(event)
51
+ consumer = event[:caller]
52
+ messages = consumer.messages
53
+ topic = messages.metadata.topic
54
+ partition = messages.metadata.partition
55
+
56
+ samples = @processing_times[topic][partition]
57
+ samples << event[:time] / messages.count
58
+
59
+ return unless samples.size > SAMPLES_COUNT
60
+
61
+ samples.shift
62
+ end
63
+
64
+ private
65
+
66
+ # Computers the requested percentile out of provided values
67
+ # @param percentile [Float]
68
+ # @param values [Array<String>] all the values based on which we should
69
+ # @return [Float] computed percentile
70
+ def percentile(percentile, values)
71
+ values_sorted = values.sort
72
+
73
+ floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
74
+ mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
75
+
76
+ values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro coordinator that provides extra orchestration methods useful for parallel processing
16
+ # within the same partition
17
+ class Coordinator < ::Karafka::Processing::Coordinator
18
+ # @param args [Object] anything the base coordinator accepts
19
+ def initialize(*args)
20
+ super
21
+ @on_started_invoked = false
22
+ @on_finished_invoked = false
23
+ @flow_lock = Mutex.new
24
+ end
25
+
26
+ # Starts the coordination process
27
+ # @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
28
+ # going to coordinate.
29
+ def start(messages)
30
+ super
31
+
32
+ @mutex.synchronize do
33
+ @on_started_invoked = false
34
+ @on_finished_invoked = false
35
+ @first_message = messages.first
36
+ @last_message = messages.last
37
+ end
38
+ end
39
+
40
+ # @return [Boolean] is the coordinated work finished or not
41
+ def finished?
42
+ @running_jobs.zero?
43
+ end
44
+
45
+ # Runs given code only once per all the coordinated jobs upon starting first of them
46
+ def on_started
47
+ @flow_lock.synchronize do
48
+ return if @on_started_invoked
49
+
50
+ @on_started_invoked = true
51
+
52
+ yield(@first_message, @last_message)
53
+ end
54
+ end
55
+
56
+ # Runs once when all the work that is suppose to be coordinated is finished
57
+ # It runs once per all the coordinated jobs and should be used to run any type of post
58
+ # jobs coordination processing execution
59
+ def on_finished
60
+ @flow_lock.synchronize do
61
+ return unless finished?
62
+ return if @on_finished_invoked
63
+
64
+ @on_finished_invoked = true
65
+
66
+ yield(@first_message, @last_message)
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro components related to processing part of Karafka
15
+ module Processing
16
+ # Pro jobs
17
+ module Jobs
18
+ # The main job type in a non-blocking variant.
19
+ # This variant works "like" the regular consumption but pauses the partition for as long
20
+ # as it is needed until a job is done.
21
+ #
22
+ # It can be useful when having long lasting jobs that would exceed `max.poll.interval`
23
+ # if would block.
24
+ #
25
+ # @note It needs to be working with a proper consumer that will handle the partition
26
+ # management. This layer of the framework knows nothing about Kafka messages consumption.
27
+ class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
28
+ # Releases the blocking lock after it is done with the preparation phase for this job
29
+ def before_call
30
+ super
31
+ @non_blocking = true
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro jobs builder that supports lrj
16
+ class JobsBuilder < ::Karafka::Processing::JobsBuilder
17
+ # @param executor [Karafka::Processing::Executor]
18
+ # @param messages [Karafka::Messages::Messages] messages batch to be consumed
19
+ # @param coordinator [Karafka::Processing::Coordinator]
20
+ # @return [Karafka::Processing::Jobs::Consume] blocking job
21
+ # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
22
+ def consume(executor, messages, coordinator)
23
+ if executor.topic.long_running_job?
24
+ Jobs::ConsumeNonBlocking.new(executor, messages, coordinator)
25
+ else
26
+ super
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro partitioner that can distribute work based on the virtual partitioner settings
16
+ class Partitioner < ::Karafka::Processing::Partitioner
17
+ # @param topic [String] topic name
18
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages
19
+ # @yieldparam [Integer] group id
20
+ # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
21
+ def call(topic, messages)
22
+ ktopic = @subscription_group.topics.find(topic)
23
+
24
+ @concurrency ||= ::Karafka::App.config.concurrency
25
+
26
+ # We only partition work if we have a virtual partitioner and more than one thread to
27
+ # process the data. With one thread it is not worth partitioning the work as the work
28
+ # itself will be assigned to one thread (pointless work)
29
+ if ktopic.virtual_partitioner? && @concurrency > 1
30
+ # We need to reduce it to number of threads, so the group_id is not a direct effect
31
+ # of the end user action. Otherwise the persistence layer for consumers would cache
32
+ # it forever and it would cause memory leaks
33
+ groupings = messages
34
+ .group_by { |msg| ktopic.virtual_partitioner.call(msg) }
35
+ .values
36
+
37
+ # Reduce the max concurrency to a size that matches the concurrency
38
+ # As mentioned above we cannot use the partitioning keys directly as it could cause
39
+ # memory leaks
40
+ #
41
+ # The algorithm here is simple, we assume that the most costly in terms of processing,
42
+ # will be processing of the biggest group and we reduce the smallest once to have
43
+ # max of groups equal to concurrency
44
+ while groupings.size > @concurrency
45
+ groupings.sort_by! { |grouping| -grouping.size }
46
+
47
+ # Offset order needs to be maintained for virtual partitions
48
+ groupings << (groupings.pop + groupings.pop).sort_by!(&:offset)
49
+ end
50
+
51
+ groupings.each_with_index { |messages_group, index| yield(index, messages_group) }
52
+ else
53
+ # When no virtual partitioner, works as regular one
54
+ yield(0, messages)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Optimizes scheduler that takes into consideration of execution time needed to process
16
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
17
+ #
18
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations
19
+ # as when taking IO into consideration, the can achieve optimized parallel processing.
20
+ #
21
+ # This scheduler can also work with virtual partitions.
22
+ #
23
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
24
+ # default FIFO scheduler from the default Karafka scheduler
25
+ class Scheduler < ::Karafka::Processing::Scheduler
26
+ # Schedules jobs in the LJF order for consumption
27
+ #
28
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
29
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
30
+ #
31
+ def schedule_consumption(queue, jobs_array)
32
+ pt = PerformanceTracker.instance
33
+
34
+ ordered = []
35
+
36
+ jobs_array.each do |job|
37
+ messages = job.messages
38
+ message = messages.first
39
+
40
+ cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
41
+
42
+ ordered << [job, cost]
43
+ end
44
+
45
+ ordered.sort_by!(&:last)
46
+ ordered.reverse!
47
+ ordered.map!(&:first)
48
+
49
+ ordered.each do |job|
50
+ queue << job
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro routing components
15
+ module Routing
16
+ # Routing extensions for builder to be able to validate Pro components correct usage
17
+ module BuilderExtensions
18
+ # Validate consumer groups with pro contracts
19
+ # @param block [Proc] routing defining block
20
+ def draw(&block)
21
+ super
22
+
23
+ each do |consumer_group|
24
+ ::Karafka::Pro::Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro routing components
15
+ module Routing
16
+ # Routing extensions that allow to configure some extra PRO routing options
17
+ module TopicExtensions
18
+ class << self
19
+ # @param base [Class] class we extend
20
+ def included(base)
21
+ base.attr_accessor :long_running_job
22
+ base.attr_accessor :virtual_partitioner
23
+ end
24
+ end
25
+
26
+ # @return [Boolean] true if virtual partitioner is defined, false otherwise
27
+ def virtual_partitioner?
28
+ virtual_partitioner != nil
29
+ end
30
+
31
+ # @return [Boolean] is a given job on a topic a long-running one
32
+ def long_running_job?
33
+ @long_running_job || false
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this repository
5
+ # and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ module Karafka
10
+ # Namespace for pro components, licensed under the commercial license agreement.
11
+ module Pro
12
+ end
13
+ end
@@ -9,6 +9,7 @@ module Karafka
9
9
  SIGINT
10
10
  SIGQUIT
11
11
  SIGTERM
12
+ SIGTTIN
12
13
  ].freeze
13
14
 
14
15
  HANDLED_SIGNALS.each do |signal|
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Basic coordinator that allows us to provide coordination objects into consumers.
6
+ #
7
+ # This is a wrapping layer to simplify management of work to be handled around consumption.
8
+ #
9
+ # @note This coordinator needs to be thread safe. Some operations are performed only in the
10
+ # listener thread, but we go with thread-safe by default for all not to worry about potential
11
+ # future mistakes.
12
+ class Coordinator
13
+ # @return [Karafka::TimeTrackers::Pause]
14
+ attr_reader :pause_tracker
15
+
16
+ # @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
17
+ def initialize(pause_tracker)
18
+ @pause_tracker = pause_tracker
19
+ @revoked = false
20
+ @consumptions = {}
21
+ @running_jobs = 0
22
+ @mutex = Mutex.new
23
+ end
24
+
25
+ # Starts the coordinator for given consumption jobs
26
+ # @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
27
+ # going to coordinate work. Not used with regular coordinator.
28
+ def start(_messages)
29
+ @mutex.synchronize do
30
+ @running_jobs = 0
31
+ # We need to clear the consumption results hash here, otherwise we could end up storing
32
+ # consumption results of consumer instances we no longer control
33
+ @consumptions.clear
34
+ end
35
+ end
36
+
37
+ # Increases number of jobs that we handle with this coordinator
38
+ def increment
39
+ @mutex.synchronize { @running_jobs += 1 }
40
+ end
41
+
42
+ # Decrements number of jobs we handle at the moment
43
+ def decrement
44
+ @mutex.synchronize do
45
+ @running_jobs -= 1
46
+
47
+ return @running_jobs unless @running_jobs.negative?
48
+
49
+ # This should never happen. If it does, something is heavily out of sync. Please reach
50
+ # out to us if you encounter this
51
+ raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
52
+ end
53
+ end
54
+
55
+ # @param consumer [Object] karafka consumer (normal or pro)
56
+ # @return [Karafka::Processing::Result] result object which we can use to indicate
57
+ # consumption processing state.
58
+ def consumption(consumer)
59
+ @mutex.synchronize do
60
+ @consumptions[consumer] ||= Processing::Result.new
61
+ end
62
+ end
63
+
64
+ # Is all the consumption done and finished successfully for this coordinator
65
+ def success?
66
+ @mutex.synchronize { @running_jobs.zero? && @consumptions.values.all?(&:success?) }
67
+ end
68
+
69
+ # Marks given coordinator for processing group as revoked
70
+ #
71
+ # This is invoked in two places:
72
+ # - from the main listener loop when we detect revoked partitions
73
+ # - from the consumer in case checkpointing fails
74
+ #
75
+ # This means, we can end up having consumer being aware that it was revoked prior to the
76
+ # listener loop dispatching the revocation job. It is ok, as effectively nothing will be
77
+ # processed until revocation jobs are done.
78
+ def revoke
79
+ @mutex.synchronize { @revoked = true }
80
+ end
81
+
82
+ # @return [Boolean] is the partition we are processing revoked or not
83
+ def revoked?
84
+ @revoked
85
+ end
86
+ end
87
+ end
88
+ end