karafka 1.4.13 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Contracts
15
+ # Contract for validating correct Pro components setup on a consumer group and topic levels
16
+ class ConsumerGroup < Base
17
+ virtual do |data, errors|
18
+ next unless errors.empty?
19
+ next unless data.key?(:topics)
20
+
21
+ fetched_errors = []
22
+
23
+ data.fetch(:topics).each do |topic|
24
+ ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
25
+ fetched_errors << [[topic, key].flatten, value]
26
+ end
27
+ end
28
+
29
+ fetched_errors
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Contracts
15
+ # Contract for validating correct Pro components setup on a topic levels
16
+ class ConsumerGroupTopic < Base
17
+ configure do |config|
18
+ config.error_messages = YAML.safe_load(
19
+ File.read(
20
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
21
+ )
22
+ ).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
23
+ end
24
+
25
+ virtual do |data|
26
+ next if data[:consumer] < Karafka::Pro::BaseConsumer
27
+
28
+ [[%i[consumer], :consumer_format]]
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Loader requires and loads all the pro components only when they are needed
15
+ class Loader
16
+ # All the pro components that need to be loaded
17
+ COMPONENTS = %w[
18
+ base_consumer
19
+ performance_tracker
20
+ processing/scheduler
21
+ processing/jobs/consume_non_blocking
22
+ processing/jobs_builder
23
+ processing/coordinator
24
+ processing/partitioner
25
+ contracts/base
26
+ contracts/consumer_group
27
+ contracts/consumer_group_topic
28
+ routing/topic_extensions
29
+ routing/builder_extensions
30
+ active_job/consumer
31
+ active_job/dispatcher
32
+ active_job/job_options_contract
33
+ ].freeze
34
+
35
+ private_constant :COMPONENTS
36
+
37
+ class << self
38
+ # Loads all the pro components and configures them wherever it is expected
39
+ # @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
40
+ # components
41
+ def setup(config)
42
+ COMPONENTS.each { |component| require_relative(component) }
43
+
44
+ reconfigure(config)
45
+
46
+ load_routing_extensions
47
+ end
48
+
49
+ private
50
+
51
+ # Sets proper config options to use pro components
52
+ # @param config [WaterDrop::Configurable::Node] root config node
53
+ def reconfigure(config)
54
+ icfg = config.internal
55
+
56
+ icfg.processing.coordinator_class = Processing::Coordinator
57
+ icfg.processing.partitioner_class = Processing::Partitioner
58
+ icfg.processing.scheduler = Processing::Scheduler.new
59
+ icfg.processing.jobs_builder = Processing::JobsBuilder.new
60
+
61
+ icfg.active_job.consumer_class = ActiveJob::Consumer
62
+ icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
63
+ icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
64
+
65
+ config.monitor.subscribe(PerformanceTracker.instance)
66
+ end
67
+
68
+ # Loads routing extensions
69
+ def load_routing_extensions
70
+ ::Karafka::Routing::Topic.include(Routing::TopicExtensions)
71
+ ::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Tracker used to keep track of performance metrics
15
+ # It provides insights that can be used to optimize processing flow
16
+ class PerformanceTracker
17
+ include Singleton
18
+
19
+ # How many samples do we collect per topic partition
20
+ SAMPLES_COUNT = 200
21
+
22
+ private_constant :SAMPLES_COUNT
23
+
24
+ # Builds up nested concurrent hash for data tracking
25
+ def initialize
26
+ @processing_times = Concurrent::Hash.new do |topics_hash, topic|
27
+ topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
28
+ # This array does not have to be concurrent because we always access single partition
29
+ # data via instrumentation that operates in a single thread via consumer
30
+ partitions_hash[partition] = []
31
+ end
32
+ end
33
+ end
34
+
35
+ # @param topic [String]
36
+ # @param partition [Integer]
37
+ # @return [Float] p95 processing time of a single message from a single topic partition
38
+ def processing_time_p95(topic, partition)
39
+ values = @processing_times[topic][partition]
40
+
41
+ return 0 if values.empty?
42
+ return values.first if values.size == 1
43
+
44
+ percentile(0.95, values)
45
+ end
46
+
47
+ # @private
48
+ # @param event [Karafka::Core::Monitoring::Event] event details
49
+ # Tracks time taken to process a single message of a given topic partition
50
+ def on_consumer_consumed(event)
51
+ consumer = event[:caller]
52
+ messages = consumer.messages
53
+ topic = messages.metadata.topic
54
+ partition = messages.metadata.partition
55
+
56
+ samples = @processing_times[topic][partition]
57
+ samples << event[:time] / messages.count
58
+
59
+ return unless samples.size > SAMPLES_COUNT
60
+
61
+ samples.shift
62
+ end
63
+
64
+ private
65
+
66
+ # Computers the requested percentile out of provided values
67
+ # @param percentile [Float]
68
+ # @param values [Array<String>] all the values based on which we should
69
+ # @return [Float] computed percentile
70
+ def percentile(percentile, values)
71
+ values_sorted = values.sort
72
+
73
+ floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
74
+ mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
75
+
76
+ values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro coordinator that provides extra orchestration methods useful for parallel processing
16
+ # within the same partition
17
+ class Coordinator < ::Karafka::Processing::Coordinator
18
+ # @param args [Object] anything the base coordinator accepts
19
+ def initialize(*args)
20
+ super
21
+ @on_started_invoked = false
22
+ @on_finished_invoked = false
23
+ @flow_lock = Mutex.new
24
+ end
25
+
26
+ # Starts the coordination process
27
+ # @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
28
+ # going to coordinate.
29
+ def start(messages)
30
+ super
31
+
32
+ @mutex.synchronize do
33
+ @on_started_invoked = false
34
+ @on_finished_invoked = false
35
+ @first_message = messages.first
36
+ @last_message = messages.last
37
+ end
38
+ end
39
+
40
+ # @return [Boolean] is the coordinated work finished or not
41
+ def finished?
42
+ @running_jobs.zero?
43
+ end
44
+
45
+ # Runs given code only once per all the coordinated jobs upon starting first of them
46
+ def on_started
47
+ @flow_lock.synchronize do
48
+ return if @on_started_invoked
49
+
50
+ @on_started_invoked = true
51
+
52
+ yield(@first_message, @last_message)
53
+ end
54
+ end
55
+
56
+ # Runs once when all the work that is suppose to be coordinated is finished
57
+ # It runs once per all the coordinated jobs and should be used to run any type of post
58
+ # jobs coordination processing execution
59
+ def on_finished
60
+ @flow_lock.synchronize do
61
+ return unless finished?
62
+ return if @on_finished_invoked
63
+
64
+ @on_finished_invoked = true
65
+
66
+ yield(@first_message, @last_message)
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro components related to processing part of Karafka
15
+ module Processing
16
+ # Pro jobs
17
+ module Jobs
18
+ # The main job type in a non-blocking variant.
19
+ # This variant works "like" the regular consumption but pauses the partition for as long
20
+ # as it is needed until a job is done.
21
+ #
22
+ # It can be useful when having long lasting jobs that would exceed `max.poll.interval`
23
+ # if would block.
24
+ #
25
+ # @note It needs to be working with a proper consumer that will handle the partition
26
+ # management. This layer of the framework knows nothing about Kafka messages consumption.
27
+ class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
28
+ # Releases the blocking lock after it is done with the preparation phase for this job
29
+ def before_call
30
+ super
31
+ @non_blocking = true
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro jobs builder that supports lrj
16
+ class JobsBuilder < ::Karafka::Processing::JobsBuilder
17
+ # @param executor [Karafka::Processing::Executor]
18
+ # @param messages [Karafka::Messages::Messages] messages batch to be consumed
19
+ # @param coordinator [Karafka::Processing::Coordinator]
20
+ # @return [Karafka::Processing::Jobs::Consume] blocking job
21
+ # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
22
+ def consume(executor, messages, coordinator)
23
+ if executor.topic.long_running_job?
24
+ Jobs::ConsumeNonBlocking.new(executor, messages, coordinator)
25
+ else
26
+ super
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro partitioner that can distribute work based on the virtual partitioner settings
16
+ class Partitioner < ::Karafka::Processing::Partitioner
17
+ # @param topic [String] topic name
18
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages
19
+ # @yieldparam [Integer] group id
20
+ # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
21
+ def call(topic, messages)
22
+ ktopic = @subscription_group.topics.find(topic)
23
+
24
+ @concurrency ||= ::Karafka::App.config.concurrency
25
+
26
+ # We only partition work if we have a virtual partitioner and more than one thread to
27
+ # process the data. With one thread it is not worth partitioning the work as the work
28
+ # itself will be assigned to one thread (pointless work)
29
+ if ktopic.virtual_partitioner? && @concurrency > 1
30
+ # We need to reduce it to number of threads, so the group_id is not a direct effect
31
+ # of the end user action. Otherwise the persistence layer for consumers would cache
32
+ # it forever and it would cause memory leaks
33
+ groupings = messages
34
+ .group_by { |msg| ktopic.virtual_partitioner.call(msg) }
35
+ .values
36
+
37
+ # Reduce the max concurrency to a size that matches the concurrency
38
+ # As mentioned above we cannot use the partitioning keys directly as it could cause
39
+ # memory leaks
40
+ #
41
+ # The algorithm here is simple, we assume that the most costly in terms of processing,
42
+ # will be processing of the biggest group and we reduce the smallest once to have
43
+ # max of groups equal to concurrency
44
+ while groupings.size > @concurrency
45
+ groupings.sort_by! { |grouping| -grouping.size }
46
+
47
+ # Offset order needs to be maintained for virtual partitions
48
+ groupings << (groupings.pop + groupings.pop).sort_by!(&:offset)
49
+ end
50
+
51
+ groupings.each_with_index { |messages_group, index| yield(index, messages_group) }
52
+ else
53
+ # When no virtual partitioner, works as regular one
54
+ yield(0, messages)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Optimizes scheduler that takes into consideration of execution time needed to process
16
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
17
+ #
18
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations
19
+ # as when taking IO into consideration, the can achieve optimized parallel processing.
20
+ #
21
+ # This scheduler can also work with virtual partitions.
22
+ #
23
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
24
+ # default FIFO scheduler from the default Karafka scheduler
25
+ class Scheduler < ::Karafka::Processing::Scheduler
26
+ # Schedules jobs in the LJF order for consumption
27
+ #
28
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
29
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
30
+ #
31
+ def schedule_consumption(queue, jobs_array)
32
+ pt = PerformanceTracker.instance
33
+
34
+ ordered = []
35
+
36
+ jobs_array.each do |job|
37
+ messages = job.messages
38
+ message = messages.first
39
+
40
+ cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
41
+
42
+ ordered << [job, cost]
43
+ end
44
+
45
+ ordered.sort_by!(&:last)
46
+ ordered.reverse!
47
+ ordered.map!(&:first)
48
+
49
+ ordered.each do |job|
50
+ queue << job
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro routing components
15
+ module Routing
16
+ # Routing extensions for builder to be able to validate Pro components correct usage
17
+ module BuilderExtensions
18
+ # Validate consumer groups with pro contracts
19
+ # @param block [Proc] routing defining block
20
+ def draw(&block)
21
+ super
22
+
23
+ each do |consumer_group|
24
+ ::Karafka::Pro::Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro routing components
15
+ module Routing
16
+ # Routing extensions that allow to configure some extra PRO routing options
17
+ module TopicExtensions
18
+ class << self
19
+ # @param base [Class] class we extend
20
+ def included(base)
21
+ base.attr_accessor :long_running_job
22
+ base.attr_accessor :virtual_partitioner
23
+ end
24
+ end
25
+
26
+ # @return [Boolean] true if virtual partitioner is defined, false otherwise
27
+ def virtual_partitioner?
28
+ virtual_partitioner != nil
29
+ end
30
+
31
+ # @return [Boolean] is a given job on a topic a long-running one
32
+ def long_running_job?
33
+ @long_running_job || false
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this repository
5
+ # and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ module Karafka
10
+ # Namespace for pro components, licensed under the commercial license agreement.
11
+ module Pro
12
+ end
13
+ end
@@ -9,6 +9,7 @@ module Karafka
9
9
  SIGINT
10
10
  SIGQUIT
11
11
  SIGTERM
12
+ SIGTTIN
12
13
  ].freeze
13
14
 
14
15
  HANDLED_SIGNALS.each do |signal|
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Basic coordinator that allows us to provide coordination objects into consumers.
6
+ #
7
+ # This is a wrapping layer to simplify management of work to be handled around consumption.
8
+ #
9
+ # @note This coordinator needs to be thread safe. Some operations are performed only in the
10
+ # listener thread, but we go with thread-safe by default for all not to worry about potential
11
+ # future mistakes.
12
+ class Coordinator
13
+ # @return [Karafka::TimeTrackers::Pause]
14
+ attr_reader :pause_tracker
15
+
16
+ # @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
17
+ def initialize(pause_tracker)
18
+ @pause_tracker = pause_tracker
19
+ @revoked = false
20
+ @consumptions = {}
21
+ @running_jobs = 0
22
+ @mutex = Mutex.new
23
+ end
24
+
25
+ # Starts the coordinator for given consumption jobs
26
+ # @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
27
+ # going to coordinate work. Not used with regular coordinator.
28
+ def start(_messages)
29
+ @mutex.synchronize do
30
+ @running_jobs = 0
31
+ # We need to clear the consumption results hash here, otherwise we could end up storing
32
+ # consumption results of consumer instances we no longer control
33
+ @consumptions.clear
34
+ end
35
+ end
36
+
37
+ # Increases number of jobs that we handle with this coordinator
38
+ def increment
39
+ @mutex.synchronize { @running_jobs += 1 }
40
+ end
41
+
42
+ # Decrements number of jobs we handle at the moment
43
+ def decrement
44
+ @mutex.synchronize do
45
+ @running_jobs -= 1
46
+
47
+ return @running_jobs unless @running_jobs.negative?
48
+
49
+ # This should never happen. If it does, something is heavily out of sync. Please reach
50
+ # out to us if you encounter this
51
+ raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
52
+ end
53
+ end
54
+
55
+ # @param consumer [Object] karafka consumer (normal or pro)
56
+ # @return [Karafka::Processing::Result] result object which we can use to indicate
57
+ # consumption processing state.
58
+ def consumption(consumer)
59
+ @mutex.synchronize do
60
+ @consumptions[consumer] ||= Processing::Result.new
61
+ end
62
+ end
63
+
64
+ # Is all the consumption done and finished successfully for this coordinator
65
+ def success?
66
+ @mutex.synchronize { @running_jobs.zero? && @consumptions.values.all?(&:success?) }
67
+ end
68
+
69
+ # Marks given coordinator for processing group as revoked
70
+ #
71
+ # This is invoked in two places:
72
+ # - from the main listener loop when we detect revoked partitions
73
+ # - from the consumer in case checkpointing fails
74
+ #
75
+ # This means, we can end up having consumer being aware that it was revoked prior to the
76
+ # listener loop dispatching the revocation job. It is ok, as effectively nothing will be
77
+ # processed until revocation jobs are done.
78
+ def revoke
79
+ @mutex.synchronize { @revoked = true }
80
+ end
81
+
82
+ # @return [Boolean] is the partition we are processing revoked or not
83
+ def revoked?
84
+ @revoked
85
+ end
86
+ end
87
+ end
88
+ end