karafka 1.4.0 → 2.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +89 -18
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +365 -1
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +56 -112
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +61 -68
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +272 -0
  16. data/bin/karafka +10 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/certs/cert_chain.pem +26 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +59 -38
  23. data/docker-compose.yml +10 -3
  24. data/karafka.gemspec +18 -21
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +33 -0
  32. data/lib/karafka/admin.rb +63 -0
  33. data/lib/karafka/app.rb +15 -20
  34. data/lib/karafka/base_consumer.rb +197 -31
  35. data/lib/karafka/cli/info.rb +44 -10
  36. data/lib/karafka/cli/install.rb +22 -12
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -3
  39. data/lib/karafka/connection/client.rb +379 -89
  40. data/lib/karafka/connection/listener.rb +250 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -184
  49. data/lib/karafka/contracts/consumer_group_topic.rb +35 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger.rb +6 -10
  60. data/lib/karafka/instrumentation/logger_listener.rb +174 -0
  61. data/lib/karafka/instrumentation/monitor.rb +13 -61
  62. data/lib/karafka/instrumentation/notifications.rb +53 -0
  63. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  64. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  65. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  66. data/lib/karafka/instrumentation.rb +21 -0
  67. data/lib/karafka/licenser.rb +75 -0
  68. data/lib/karafka/messages/batch_metadata.rb +45 -0
  69. data/lib/karafka/messages/builders/batch_metadata.rb +39 -0
  70. data/lib/karafka/messages/builders/message.rb +39 -0
  71. data/lib/karafka/messages/builders/messages.rb +34 -0
  72. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  73. data/lib/karafka/messages/messages.rb +64 -0
  74. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  75. data/lib/karafka/messages/seek.rb +9 -0
  76. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  77. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  78. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  79. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  80. data/lib/karafka/pro/base_consumer.rb +107 -0
  81. data/lib/karafka/pro/contracts/base.rb +21 -0
  82. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  83. data/lib/karafka/pro/contracts/consumer_group_topic.rb +69 -0
  84. data/lib/karafka/pro/loader.rb +76 -0
  85. data/lib/karafka/pro/performance_tracker.rb +80 -0
  86. data/lib/karafka/pro/processing/coordinator.rb +85 -0
  87. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
  88. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  89. data/lib/karafka/pro/processing/partitioner.rb +58 -0
  90. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  91. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  92. data/lib/karafka/pro/routing/topic_extensions.rb +74 -0
  93. data/lib/karafka/pro.rb +13 -0
  94. data/lib/karafka/process.rb +1 -0
  95. data/lib/karafka/processing/coordinator.rb +103 -0
  96. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  97. data/lib/karafka/processing/executor.rb +126 -0
  98. data/lib/karafka/processing/executors_buffer.rb +88 -0
  99. data/lib/karafka/processing/jobs/base.rb +55 -0
  100. data/lib/karafka/processing/jobs/consume.rb +47 -0
  101. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  102. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  103. data/lib/karafka/processing/jobs_builder.rb +29 -0
  104. data/lib/karafka/processing/jobs_queue.rb +144 -0
  105. data/lib/karafka/processing/partitioner.rb +22 -0
  106. data/lib/karafka/processing/result.rb +37 -0
  107. data/lib/karafka/processing/scheduler.rb +22 -0
  108. data/lib/karafka/processing/worker.rb +91 -0
  109. data/lib/karafka/processing/workers_batch.rb +27 -0
  110. data/lib/karafka/railtie.rb +127 -0
  111. data/lib/karafka/routing/builder.rb +26 -23
  112. data/lib/karafka/routing/consumer_group.rb +37 -17
  113. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  114. data/lib/karafka/routing/proxy.rb +9 -16
  115. data/lib/karafka/routing/router.rb +1 -1
  116. data/lib/karafka/routing/subscription_group.rb +53 -0
  117. data/lib/karafka/routing/subscription_groups_builder.rb +54 -0
  118. data/lib/karafka/routing/topic.rb +65 -24
  119. data/lib/karafka/routing/topics.rb +38 -0
  120. data/lib/karafka/runner.rb +51 -0
  121. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  122. data/lib/karafka/server.rb +67 -26
  123. data/lib/karafka/setup/config.rb +153 -175
  124. data/lib/karafka/status.rb +14 -5
  125. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  126. data/lib/karafka/templates/karafka.rb.erb +17 -55
  127. data/lib/karafka/time_trackers/base.rb +19 -0
  128. data/lib/karafka/time_trackers/pause.rb +92 -0
  129. data/lib/karafka/time_trackers/poll.rb +65 -0
  130. data/lib/karafka/version.rb +1 -1
  131. data/lib/karafka.rb +46 -16
  132. data.tar.gz.sig +0 -0
  133. metadata +145 -171
  134. metadata.gz.sig +0 -0
  135. data/.github/FUNDING.yml +0 -3
  136. data/MIT-LICENCE +0 -18
  137. data/certs/mensfeld.pem +0 -25
  138. data/lib/karafka/attributes_map.rb +0 -62
  139. data/lib/karafka/backends/inline.rb +0 -16
  140. data/lib/karafka/base_responder.rb +0 -226
  141. data/lib/karafka/cli/flow.rb +0 -48
  142. data/lib/karafka/code_reloader.rb +0 -67
  143. data/lib/karafka/connection/api_adapter.rb +0 -161
  144. data/lib/karafka/connection/batch_delegator.rb +0 -55
  145. data/lib/karafka/connection/builder.rb +0 -18
  146. data/lib/karafka/connection/message_delegator.rb +0 -36
  147. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  148. data/lib/karafka/consumers/callbacks.rb +0 -71
  149. data/lib/karafka/consumers/includer.rb +0 -64
  150. data/lib/karafka/consumers/responders.rb +0 -24
  151. data/lib/karafka/consumers/single_params.rb +0 -15
  152. data/lib/karafka/contracts/responder_usage.rb +0 -54
  153. data/lib/karafka/fetcher.rb +0 -42
  154. data/lib/karafka/helpers/class_matcher.rb +0 -88
  155. data/lib/karafka/helpers/config_retriever.rb +0 -46
  156. data/lib/karafka/helpers/inflector.rb +0 -26
  157. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  158. data/lib/karafka/params/batch_metadata.rb +0 -26
  159. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  160. data/lib/karafka/params/builders/params.rb +0 -38
  161. data/lib/karafka/params/builders/params_batch.rb +0 -25
  162. data/lib/karafka/params/params_batch.rb +0 -60
  163. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  164. data/lib/karafka/persistence/client.rb +0 -29
  165. data/lib/karafka/persistence/consumers.rb +0 -45
  166. data/lib/karafka/persistence/topics.rb +0 -48
  167. data/lib/karafka/responders/builder.rb +0 -36
  168. data/lib/karafka/responders/topic.rb +0 -55
  169. data/lib/karafka/routing/topic_mapper.rb +0 -53
  170. data/lib/karafka/serialization/json/serializer.rb +0 -31
  171. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  172. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro partitioner that can distribute work based on the virtual partitioner settings
16
+ class Partitioner < ::Karafka::Processing::Partitioner
17
+ # @param topic [String] topic name
18
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages
19
+ # @yieldparam [Integer] group id
20
+ # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
21
+ def call(topic, messages)
22
+ ktopic = @subscription_group.topics.find(topic)
23
+
24
+ # We only partition work if we have a virtual partitioner and more than one thread to
25
+ # process the data. With one thread it is not worth partitioning the work as the work
26
+ # itself will be assigned to one thread (pointless work)
27
+ if ktopic.virtual_partitions? && ktopic.virtual_partitions.max_partitions > 1
28
+ # We need to reduce it to number of threads, so the group_id is not a direct effect
29
+ # of the end user action. Otherwise the persistence layer for consumers would cache
30
+ # it forever and it would cause memory leaks
31
+ groupings = messages
32
+ .group_by { |msg| ktopic.virtual_partitions.partitioner.call(msg) }
33
+ .values
34
+
35
+ # Reduce the number of virtual partitions to a size that matches the max_partitions
36
+ # As mentioned above we cannot use the partitioning keys directly as it could cause
37
+ # memory leaks
38
+ #
39
+ # The algorithm here is simple, we assume that the most costly in terms of processing,
40
+ # will be processing of the biggest group and we reduce the smallest once to have
41
+ # max of groups equal to max_partitions
42
+ while groupings.size > ktopic.virtual_partitions.max_partitions
43
+ groupings.sort_by! { |grouping| -grouping.size }
44
+
45
+ # Offset order needs to be maintained for virtual partitions
46
+ groupings << (groupings.pop + groupings.pop).sort_by!(&:offset)
47
+ end
48
+
49
+ groupings.each_with_index { |messages_group, index| yield(index, messages_group) }
50
+ else
51
+ # When no virtual partitioner, works as regular one
52
+ yield(0, messages)
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Optimizes scheduler that takes into consideration of execution time needed to process
16
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
17
+ #
18
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations
19
+ # as when taking IO into consideration, the can achieve optimized parallel processing.
20
+ #
21
+ # This scheduler can also work with virtual partitions.
22
+ #
23
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
24
+ # default FIFO scheduler from the default Karafka scheduler
25
+ class Scheduler < ::Karafka::Processing::Scheduler
26
+ # Schedules jobs in the LJF order for consumption
27
+ #
28
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
29
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
30
+ #
31
+ def schedule_consumption(queue, jobs_array)
32
+ pt = PerformanceTracker.instance
33
+
34
+ ordered = []
35
+
36
+ jobs_array.each do |job|
37
+ messages = job.messages
38
+ message = messages.first
39
+
40
+ cost = pt.processing_time_p95(message.topic, message.partition) * messages.size
41
+
42
+ ordered << [job, cost]
43
+ end
44
+
45
+ ordered.sort_by!(&:last)
46
+ ordered.reverse!
47
+ ordered.map!(&:first)
48
+
49
+ ordered.each do |job|
50
+ queue << job
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro routing components
15
+ module Routing
16
+ # Routing extensions for builder to be able to validate Pro components correct usage
17
+ module BuilderExtensions
18
+ # Validate consumer groups with pro contracts
19
+ # @param block [Proc] routing defining block
20
+ def draw(&block)
21
+ super
22
+
23
+ each do |consumer_group|
24
+ ::Karafka::Pro::Contracts::ConsumerGroup.new.validate!(consumer_group.to_h)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro routing components
15
+ module Routing
16
+ # Routing extensions that allow to configure some extra PRO routing options
17
+ module TopicExtensions
18
+ # Internal representation of the virtual partitions settings and configuration
19
+ # This allows us to abstract away things in a nice manner
20
+ #
21
+ # For features with more options than just on/off we use this approach as it simplifies
22
+ # the code. We do not use it for all not to create unneeded complexity
23
+ VirtualPartitions = Struct.new(
24
+ :active,
25
+ :partitioner,
26
+ :max_partitions,
27
+ keyword_init: true
28
+ ) { alias_method :active?, :active }
29
+
30
+ class << self
31
+ # @param base [Class] class we extend
32
+ def prepended(base)
33
+ base.attr_accessor :long_running_job
34
+ end
35
+ end
36
+
37
+ # @param max_partitions [Integer] max number of virtual partitions that can come out of the
38
+ # single distribution flow. When set to more than the Karafka threading, will create
39
+ # more work than workers. When less, can ensure we have spare resources to process other
40
+ # things in parallel.
41
+ # @param partitioner [nil, #call] nil or callable partitioner
42
+ # @return [VirtualPartitions] method that allows to set the virtual partitions details
43
+ # during the routing configuration and then allows to retrieve it
44
+ def virtual_partitions(
45
+ max_partitions: Karafka::App.config.concurrency,
46
+ partitioner: nil
47
+ )
48
+ @virtual_partitions ||= VirtualPartitions.new(
49
+ active: !partitioner.nil?,
50
+ max_partitions: max_partitions,
51
+ partitioner: partitioner
52
+ )
53
+ end
54
+
55
+ # @return [Boolean] are virtual partitions enabled for given topic
56
+ def virtual_partitions?
57
+ virtual_partitions.active?
58
+ end
59
+
60
+ # @return [Boolean] is a given job on a topic a long-running one
61
+ def long_running_job?
62
+ @long_running_job || false
63
+ end
64
+
65
+ # @return [Hash] hash with topic details and the extensions details
66
+ def to_h
67
+ super.merge(
68
+ virtual_partitions: virtual_partitions.to_h
69
+ )
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this repository
5
+ # and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ module Karafka
10
+ # Namespace for pro components, licensed under the commercial license agreement.
11
+ module Pro
12
+ end
13
+ end
@@ -9,6 +9,7 @@ module Karafka
9
9
  SIGINT
10
10
  SIGQUIT
11
11
  SIGTERM
12
+ SIGTTIN
12
13
  ].freeze
13
14
 
14
15
  HANDLED_SIGNALS.each do |signal|
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Basic coordinator that allows us to provide coordination objects into consumers.
6
+ #
7
+ # This is a wrapping layer to simplify management of work to be handled around consumption.
8
+ #
9
+ # @note This coordinator needs to be thread safe. Some operations are performed only in the
10
+ # listener thread, but we go with thread-safe by default for all not to worry about potential
11
+ # future mistakes.
12
+ class Coordinator
13
+ # @return [Karafka::TimeTrackers::Pause]
14
+ attr_reader :pause_tracker
15
+
16
+ attr_reader :seek_offset
17
+
18
+ # @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
19
+ def initialize(pause_tracker)
20
+ @pause_tracker = pause_tracker
21
+ @revoked = false
22
+ @consumptions = {}
23
+ @running_jobs = 0
24
+ @mutex = Mutex.new
25
+ end
26
+
27
+ # Starts the coordinator for given consumption jobs
28
+ # @param messages [Array<Karafka::Messages::Message>] batch of message for which we are
29
+ # going to coordinate work. Not used with regular coordinator.
30
+ def start(messages)
31
+ @mutex.synchronize do
32
+ @running_jobs = 0
33
+ # We need to clear the consumption results hash here, otherwise we could end up storing
34
+ # consumption results of consumer instances we no longer control
35
+ @consumptions.clear
36
+
37
+ # We set it on the first encounter and never again, because then the offset setting
38
+ # should be up to the consumers logic (our or the end user)
39
+ # Seek offset needs to be always initialized as for case where manual offset management
40
+ # is turned on, we need to have reference to the first offset even in case of running
41
+ # multiple batches without marking any messages as consumed. Rollback needs to happen to
42
+ # the last place we know of or the last message + 1 that was marked
43
+ @seek_offset ||= messages.first.offset
44
+ end
45
+ end
46
+
47
+ # @param offset [Integer] message offset
48
+ def seek_offset=(offset)
49
+ @mutex.synchronize { @seek_offset = offset }
50
+ end
51
+
52
+ # Increases number of jobs that we handle with this coordinator
53
+ def increment
54
+ @mutex.synchronize { @running_jobs += 1 }
55
+ end
56
+
57
+ # Decrements number of jobs we handle at the moment
58
+ def decrement
59
+ @mutex.synchronize do
60
+ @running_jobs -= 1
61
+
62
+ return @running_jobs unless @running_jobs.negative?
63
+
64
+ # This should never happen. If it does, something is heavily out of sync. Please reach
65
+ # out to us if you encounter this
66
+ raise Karafka::Errors::InvalidCoordinatorState, 'Was zero before decrementation'
67
+ end
68
+ end
69
+
70
+ # @param consumer [Object] karafka consumer (normal or pro)
71
+ # @return [Karafka::Processing::Result] result object which we can use to indicate
72
+ # consumption processing state.
73
+ def consumption(consumer)
74
+ @mutex.synchronize do
75
+ @consumptions[consumer] ||= Processing::Result.new
76
+ end
77
+ end
78
+
79
+ # Is all the consumption done and finished successfully for this coordinator
80
+ def success?
81
+ @mutex.synchronize { @running_jobs.zero? && @consumptions.values.all?(&:success?) }
82
+ end
83
+
84
+ # Marks given coordinator for processing group as revoked
85
+ #
86
+ # This is invoked in two places:
87
+ # - from the main listener loop when we detect revoked partitions
88
+ # - from the consumer in case checkpointing fails
89
+ #
90
+ # This means, we can end up having consumer being aware that it was revoked prior to the
91
+ # listener loop dispatching the revocation job. It is ok, as effectively nothing will be
92
+ # processed until revocation jobs are done.
93
+ def revoke
94
+ @mutex.synchronize { @revoked = true }
95
+ end
96
+
97
+ # @return [Boolean] is the partition we are processing revoked or not
98
+ def revoked?
99
+ @revoked
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Coordinators builder used to build coordinators per topic partition
6
+ #
7
+ # It provides direct pauses access for revocation
8
+ #
9
+ # @note This buffer operates only from the listener loop, thus we do not have to make it
10
+ # thread-safe.
11
+ class CoordinatorsBuffer
12
+ def initialize
13
+ @pauses_manager = Connection::PausesManager.new
14
+ @coordinator_class = ::Karafka::App.config.internal.processing.coordinator_class
15
+ @coordinators = Hash.new { |h, k| h[k] = {} }
16
+ end
17
+
18
+ # @param topic [String] topic name
19
+ # @param partition [Integer] partition number
20
+ def find_or_create(topic, partition)
21
+ @coordinators[topic][partition] ||= @coordinator_class.new(
22
+ @pauses_manager.fetch(topic, partition)
23
+ )
24
+ end
25
+
26
+ # Resumes processing of partitions for which pause time has ended.
27
+ # @param block we want to run for resumed topic partitions
28
+ # @yieldparam [String] topic name
29
+ # @yieldparam [Integer] partition number
30
+ def resume(&block)
31
+ @pauses_manager.resume(&block)
32
+ end
33
+
34
+ # @param topic [String] topic name
35
+ # @param partition [Integer] partition number
36
+ def revoke(topic, partition)
37
+ return unless @coordinators[topic].key?(partition)
38
+
39
+ # The fact that we delete here does not change the fact that the executor still holds the
40
+ # reference to this coordinator. We delete it here, as we will no longer process any
41
+ # new stuff with it and we may need a new coordinator if we regain this partition, but the
42
+ # coordinator may still be in use
43
+ @coordinators[topic].delete(partition).revoke
44
+ end
45
+
46
+ # Clears coordinators and re-created the pauses manager
47
+ # This should be used only for critical errors recovery
48
+ def reset
49
+ @pauses_manager = Connection::PausesManager.new
50
+ @coordinators.clear
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace that encapsulates all the logic related to processing data.
5
+ module Processing
6
+ # Executors:
7
+ # - run consumers code (for `#call`) or run given preparation / teardown operations when needed
8
+ # from separate threads.
9
+ # - they re-create consumer instances in case of partitions that were revoked and assigned
10
+ # back.
11
+ #
12
+ # @note Executors are not removed after partition is revoked. They are not that big and will
13
+ # be re-used in case of a re-claim
14
+ class Executor
15
+ # @return [String] unique id that we use to ensure, that we use for state tracking
16
+ attr_reader :id
17
+
18
+ # @return [String] subscription group id to which a given executor belongs
19
+ attr_reader :group_id
20
+
21
+ # @return [Karafka::Messages::Messages] messages batch
22
+ attr_reader :messages
23
+
24
+ # Topic accessibility may be needed for the jobs builder to be able to build a proper job
25
+ # based on the topic settings defined by the end user
26
+ #
27
+ # @return [Karafka::Routing::Topic] topic of this executor
28
+ attr_reader :topic
29
+
30
+ # @param group_id [String] id of the subscription group to which the executor belongs
31
+ # @param client [Karafka::Connection::Client] kafka client
32
+ # @param topic [Karafka::Routing::Topic] topic for which this executor will run
33
+ def initialize(group_id, client, topic)
34
+ @id = SecureRandom.uuid
35
+ @group_id = group_id
36
+ @client = client
37
+ @topic = topic
38
+ end
39
+
40
+ # Allows us to prepare the consumer in the listener thread prior to the job being send to
41
+ # the queue. It also allows to run some code that is time sensitive and cannot wait in the
42
+ # queue as it could cause starvation.
43
+ #
44
+ # @param messages [Array<Karafka::Messages::Message>]
45
+ # @param coordinator [Karafka::Processing::Coordinator] coordinator for processing management
46
+ def before_enqueue(messages, coordinator)
47
+ # the moment we've received the batch or actually the moment we've enqueued it,
48
+ # but good enough
49
+ @enqueued_at = Time.now
50
+
51
+ # Recreate consumer with each batch if persistence is not enabled
52
+ # We reload the consumers with each batch instead of relying on some external signals
53
+ # when needed for consistency. That way devs may have it on or off and not in this
54
+ # middle state, where re-creation of a consumer instance would occur only sometimes
55
+ @consumer = nil unless ::Karafka::App.config.consumer_persistence
56
+
57
+ consumer.coordinator = coordinator
58
+
59
+ # First we build messages batch...
60
+ consumer.messages = Messages::Builders::Messages.call(
61
+ messages,
62
+ @topic,
63
+ @enqueued_at
64
+ )
65
+
66
+ consumer.on_before_enqueue
67
+ end
68
+
69
+ # Runs setup and warm-up code in the worker prior to running the consumption
70
+ def before_consume
71
+ consumer.on_before_consume
72
+ end
73
+
74
+ # Runs consumer data processing against given batch and handles failures and errors.
75
+ def consume
76
+ # We run the consumer client logic...
77
+ consumer.on_consume
78
+ end
79
+
80
+ # Runs consumer after consumption code
81
+ def after_consume
82
+ consumer.on_after_consume
83
+ end
84
+
85
+ # Runs the controller `#revoked` method that should be triggered when a given consumer is
86
+ # no longer needed due to partitions reassignment.
87
+ #
88
+ # @note Clearing the consumer will ensure, that if we get the partition back, it will be
89
+ # handled with a consumer with a clean state.
90
+ #
91
+ # @note We run it only when consumer was present, because presence indicates, that at least
92
+ # a single message has been consumed.
93
+ #
94
+ # @note We do not reset the consumer but we indicate need for recreation instead, because
95
+ # after the revocation, there still may be `#after_consume` running that needs a given
96
+ # consumer instance.
97
+ def revoked
98
+ consumer.on_revoked if @consumer
99
+ end
100
+
101
+ # Runs the controller `#shutdown` method that should be triggered when a given consumer is
102
+ # no longer needed as we're closing the process.
103
+ #
104
+ # @note While we do not need to clear the consumer here, it's a good habit to clean after
105
+ # work is done.
106
+ def shutdown
107
+ # There is a case, where the consumer no longer exists because it was revoked, in case like
108
+ # that we do not build a new instance and shutdown should not be triggered.
109
+ consumer.on_shutdown if @consumer
110
+ end
111
+
112
+ private
113
+
114
+ # @return [Object] cached consumer instance
115
+ def consumer
116
+ @consumer ||= begin
117
+ consumer = @topic.consumer_class.new
118
+ consumer.topic = @topic
119
+ consumer.client = @client
120
+ consumer.producer = ::Karafka::App.producer
121
+ consumer
122
+ end
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Buffer for executors of a given subscription group. It wraps around the concept of building
6
+ # and caching them, so we can re-use them instead of creating new each time.
7
+ class ExecutorsBuffer
8
+ # @param client [Connection::Client]
9
+ # @param subscription_group [Routing::SubscriptionGroup]
10
+ # @return [ExecutorsBuffer]
11
+ def initialize(client, subscription_group)
12
+ @subscription_group = subscription_group
13
+ @client = client
14
+ # We need two layers here to keep track of topics, partitions and processing groups
15
+ @buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
16
+ end
17
+
18
+ # Finds or creates an executor based on the provided details
19
+ #
20
+ # @param topic [String] topic name
21
+ # @param partition [Integer] partition number
22
+ # @param parallel_key [String] parallel group key
23
+ # @return [Executor] consumer executor
24
+ def find_or_create(topic, partition, parallel_key)
25
+ ktopic = find_topic(topic)
26
+
27
+ @buffer[ktopic][partition][parallel_key] ||= Executor.new(
28
+ @subscription_group.id,
29
+ @client,
30
+ ktopic
31
+ )
32
+ end
33
+
34
+ # Revokes executors of a given topic partition, so they won't be used anymore for incoming
35
+ # messages
36
+ #
37
+ # @param topic [String] topic name
38
+ # @param partition [Integer] partition number
39
+ def revoke(topic, partition)
40
+ ktopic = find_topic(topic)
41
+
42
+ @buffer[ktopic][partition].clear
43
+ end
44
+
45
+ # Finds all the executors available for a given topic partition
46
+ #
47
+ # @param topic [String] topic name
48
+ # @param partition [Integer] partition number
49
+ # @return [Array<Executor>] executors in use for this topic + partition
50
+ def find_all(topic, partition)
51
+ ktopic = find_topic(topic)
52
+
53
+ @buffer[ktopic][partition].values
54
+ end
55
+
56
+ # Iterates over all available executors and yields them together with topic and partition
57
+ # info
58
+ # @yieldparam [Routing::Topic] karafka routing topic object
59
+ # @yieldparam [Integer] partition number
60
+ # @yieldparam [Executor] given executor
61
+ def each
62
+ @buffer.each do |ktopic, partitions|
63
+ partitions.each do |partition, executors|
64
+ executors.each do |_parallel_key, executor|
65
+ # We skip the parallel key here as it does not serve any value when iterating
66
+ yield(ktopic, partition, executor)
67
+ end
68
+ end
69
+ end
70
+ end
71
+
72
+ # Clears the executors buffer. Useful for critical errors recovery.
73
+ def clear
74
+ @buffer.clear
75
+ end
76
+
77
+ private
78
+
79
+ # Finds topic based on its name
80
+ #
81
+ # @param topic [String] topic we're looking for
82
+ # @return [Karafka::Routing::Topic] topic we're interested in
83
+ def find_topic(topic)
84
+ @subscription_group.topics.find(topic) || raise(Errors::TopicNotFoundError, topic)
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Namespace for all the jobs that are supposed to run in workers.
6
+ module Jobs
7
+ # Base class for all the jobs types that are suppose to run in workers threads.
8
+ # Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
9
+ # Only `#call` is required.
10
+ class Base
11
+ extend Forwardable
12
+
13
+ # @note Since one job has always one executor, we use the jobs id and group id as reference
14
+ def_delegators :executor, :id, :group_id
15
+
16
+ attr_reader :executor
17
+
18
+ # Creates a new job instance
19
+ def initialize
20
+ # All jobs are blocking by default and they can release the lock when blocking operations
21
+ # are done (if needed)
22
+ @non_blocking = false
23
+ end
24
+
25
+ # When redefined can run any code prior to the job being enqueued
26
+ # @note This will run in the listener thread and not in the worker
27
+ def before_enqueue; end
28
+
29
+ # When redefined can run any code that should run before executing the proper code
30
+ def before_call; end
31
+
32
+ # The main entry-point of a job
33
+ def call
34
+ raise NotImplementedError, 'Please implement in a subclass'
35
+ end
36
+
37
+ # When redefined can run any code that should run after executing the proper code
38
+ def after_call; end
39
+
40
+ # @return [Boolean] is this a non-blocking job
41
+ #
42
+ # @note Blocking job is a job, that will cause the job queue to wait until it is finished
43
+ # before removing the lock on new jobs being added
44
+ #
45
+ # @note All the jobs are blocking by default
46
+ #
47
+ # @note Job **needs** to mark itself as non-blocking only **after** it is done with all
48
+ # the blocking things (pausing partition, etc).
49
+ def non_blocking?
50
+ @non_blocking
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end