karafka 1.4.0 → 2.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +89 -18
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +365 -1
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +56 -112
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +61 -68
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +272 -0
  16. data/bin/karafka +10 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/certs/cert_chain.pem +26 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +59 -38
  23. data/docker-compose.yml +10 -3
  24. data/karafka.gemspec +18 -21
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +33 -0
  32. data/lib/karafka/admin.rb +63 -0
  33. data/lib/karafka/app.rb +15 -20
  34. data/lib/karafka/base_consumer.rb +197 -31
  35. data/lib/karafka/cli/info.rb +44 -10
  36. data/lib/karafka/cli/install.rb +22 -12
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -3
  39. data/lib/karafka/connection/client.rb +379 -89
  40. data/lib/karafka/connection/listener.rb +250 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -184
  49. data/lib/karafka/contracts/consumer_group_topic.rb +35 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger.rb +6 -10
  60. data/lib/karafka/instrumentation/logger_listener.rb +174 -0
  61. data/lib/karafka/instrumentation/monitor.rb +13 -61
  62. data/lib/karafka/instrumentation/notifications.rb +53 -0
  63. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  64. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  65. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  66. data/lib/karafka/instrumentation.rb +21 -0
  67. data/lib/karafka/licenser.rb +75 -0
  68. data/lib/karafka/messages/batch_metadata.rb +45 -0
  69. data/lib/karafka/messages/builders/batch_metadata.rb +39 -0
  70. data/lib/karafka/messages/builders/message.rb +39 -0
  71. data/lib/karafka/messages/builders/messages.rb +34 -0
  72. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  73. data/lib/karafka/messages/messages.rb +64 -0
  74. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  75. data/lib/karafka/messages/seek.rb +9 -0
  76. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  77. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  78. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  79. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  80. data/lib/karafka/pro/base_consumer.rb +107 -0
  81. data/lib/karafka/pro/contracts/base.rb +21 -0
  82. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  83. data/lib/karafka/pro/contracts/consumer_group_topic.rb +69 -0
  84. data/lib/karafka/pro/loader.rb +76 -0
  85. data/lib/karafka/pro/performance_tracker.rb +80 -0
  86. data/lib/karafka/pro/processing/coordinator.rb +85 -0
  87. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
  88. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  89. data/lib/karafka/pro/processing/partitioner.rb +58 -0
  90. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  91. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  92. data/lib/karafka/pro/routing/topic_extensions.rb +74 -0
  93. data/lib/karafka/pro.rb +13 -0
  94. data/lib/karafka/process.rb +1 -0
  95. data/lib/karafka/processing/coordinator.rb +103 -0
  96. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  97. data/lib/karafka/processing/executor.rb +126 -0
  98. data/lib/karafka/processing/executors_buffer.rb +88 -0
  99. data/lib/karafka/processing/jobs/base.rb +55 -0
  100. data/lib/karafka/processing/jobs/consume.rb +47 -0
  101. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  102. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  103. data/lib/karafka/processing/jobs_builder.rb +29 -0
  104. data/lib/karafka/processing/jobs_queue.rb +144 -0
  105. data/lib/karafka/processing/partitioner.rb +22 -0
  106. data/lib/karafka/processing/result.rb +37 -0
  107. data/lib/karafka/processing/scheduler.rb +22 -0
  108. data/lib/karafka/processing/worker.rb +91 -0
  109. data/lib/karafka/processing/workers_batch.rb +27 -0
  110. data/lib/karafka/railtie.rb +127 -0
  111. data/lib/karafka/routing/builder.rb +26 -23
  112. data/lib/karafka/routing/consumer_group.rb +37 -17
  113. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  114. data/lib/karafka/routing/proxy.rb +9 -16
  115. data/lib/karafka/routing/router.rb +1 -1
  116. data/lib/karafka/routing/subscription_group.rb +53 -0
  117. data/lib/karafka/routing/subscription_groups_builder.rb +54 -0
  118. data/lib/karafka/routing/topic.rb +65 -24
  119. data/lib/karafka/routing/topics.rb +38 -0
  120. data/lib/karafka/runner.rb +51 -0
  121. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  122. data/lib/karafka/server.rb +67 -26
  123. data/lib/karafka/setup/config.rb +153 -175
  124. data/lib/karafka/status.rb +14 -5
  125. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  126. data/lib/karafka/templates/karafka.rb.erb +17 -55
  127. data/lib/karafka/time_trackers/base.rb +19 -0
  128. data/lib/karafka/time_trackers/pause.rb +92 -0
  129. data/lib/karafka/time_trackers/poll.rb +65 -0
  130. data/lib/karafka/version.rb +1 -1
  131. data/lib/karafka.rb +46 -16
  132. data.tar.gz.sig +0 -0
  133. metadata +145 -171
  134. metadata.gz.sig +0 -0
  135. data/.github/FUNDING.yml +0 -3
  136. data/MIT-LICENCE +0 -18
  137. data/certs/mensfeld.pem +0 -25
  138. data/lib/karafka/attributes_map.rb +0 -62
  139. data/lib/karafka/backends/inline.rb +0 -16
  140. data/lib/karafka/base_responder.rb +0 -226
  141. data/lib/karafka/cli/flow.rb +0 -48
  142. data/lib/karafka/code_reloader.rb +0 -67
  143. data/lib/karafka/connection/api_adapter.rb +0 -161
  144. data/lib/karafka/connection/batch_delegator.rb +0 -55
  145. data/lib/karafka/connection/builder.rb +0 -18
  146. data/lib/karafka/connection/message_delegator.rb +0 -36
  147. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  148. data/lib/karafka/consumers/callbacks.rb +0 -71
  149. data/lib/karafka/consumers/includer.rb +0 -64
  150. data/lib/karafka/consumers/responders.rb +0 -24
  151. data/lib/karafka/consumers/single_params.rb +0 -15
  152. data/lib/karafka/contracts/responder_usage.rb +0 -54
  153. data/lib/karafka/fetcher.rb +0 -42
  154. data/lib/karafka/helpers/class_matcher.rb +0 -88
  155. data/lib/karafka/helpers/config_retriever.rb +0 -46
  156. data/lib/karafka/helpers/inflector.rb +0 -26
  157. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  158. data/lib/karafka/params/batch_metadata.rb +0 -26
  159. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  160. data/lib/karafka/params/builders/params.rb +0 -38
  161. data/lib/karafka/params/builders/params_batch.rb +0 -25
  162. data/lib/karafka/params/params_batch.rb +0 -60
  163. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  164. data/lib/karafka/persistence/client.rb +0 -29
  165. data/lib/karafka/persistence/consumers.rb +0 -45
  166. data/lib/karafka/persistence/topics.rb +0 -48
  167. data/lib/karafka/responders/builder.rb +0 -36
  168. data/lib/karafka/responders/topic.rb +0 -55
  169. data/lib/karafka/routing/topic_mapper.rb +0 -53
  170. data/lib/karafka/serialization/json/serializer.rb +0 -31
  171. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  172. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module ActiveJob
15
+ # Pro ActiveJob consumer that is suppose to handle long-running jobs as well as short
16
+ # running jobs
17
+ #
18
+ # When in LRJ, it will pause a given partition forever and will resume its processing only
19
+ # when all the jobs are done processing.
20
+ #
21
+ # It contains slightly better revocation warranties than the regular blocking consumer as
22
+ # it can stop processing batch of jobs in the middle after the revocation.
23
+ class Consumer < Karafka::Pro::BaseConsumer
24
+ # Runs ActiveJob jobs processing and handles lrj if needed
25
+ def consume
26
+ messages.each do |message|
27
+ # If for any reason we've lost this partition, not worth iterating over new messages
28
+ # as they are no longer ours
29
+ break if revoked?
30
+ break if Karafka::App.stopping?
31
+
32
+ ::ActiveJob::Base.execute(
33
+ ::ActiveSupport::JSON.decode(message.raw_payload)
34
+ )
35
+
36
+ # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
37
+ # this could create random markings
38
+ next if topic.virtual_partitions?
39
+
40
+ mark_as_consumed(message)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Karafka Pro ActiveJob components
15
+ module ActiveJob
16
+ # Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
17
+ # and that allows to inject additional options into the producer, effectively allowing for a
18
+ # much better and more granular control over the dispatch and consumption process.
19
+ class Dispatcher < ::Karafka::ActiveJob::Dispatcher
20
+ # Defaults for dispatching
21
+ # They can be updated by using `#karafka_options` on the job
22
+ DEFAULTS = {
23
+ dispatch_method: :produce_async,
24
+ # We don't create a dummy proc based partitioner as we would have to evaluate it with
25
+ # each job.
26
+ partitioner: nil,
27
+ # Allows for usage of `:key` or `:partition_key`
28
+ partition_key_type: :key
29
+ }.freeze
30
+
31
+ private_constant :DEFAULTS
32
+
33
+ # @param job [ActiveJob::Base] job
34
+ def call(job)
35
+ ::Karafka.producer.public_send(
36
+ fetch_option(job, :dispatch_method, DEFAULTS),
37
+ dispatch_details(job).merge!(
38
+ topic: job.queue_name,
39
+ payload: ::ActiveSupport::JSON.encode(job.serialize)
40
+ )
41
+ )
42
+ end
43
+
44
+ private
45
+
46
+ # @param job [ActiveJob::Base] job instance
47
+ # @return [Hash] hash with dispatch details to which we merge topic and payload
48
+ def dispatch_details(job)
49
+ partitioner = fetch_option(job, :partitioner, DEFAULTS)
50
+ key_type = fetch_option(job, :partition_key_type, DEFAULTS)
51
+
52
+ return {} unless partitioner
53
+
54
+ {
55
+ key_type => partitioner.call(job)
56
+ }
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module ActiveJob
15
+ # Contract for validating the options that can be altered with `#karafka_options` per job
16
+ # class that works with Pro features.
17
+ class JobOptionsContract < Contracts::Base
18
+ configure do |config|
19
+ config.error_messages = YAML.safe_load(
20
+ File.read(
21
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
22
+ )
23
+ ).fetch('en').fetch('validations').fetch('job_options')
24
+ end
25
+
26
+ optional(:dispatch_method) { |val| %i[produce_async produce_sync].include?(val) }
27
+ optional(:partitioner) { |val| val.respond_to?(:call) }
28
+ optional(:partition_key_type) { |val| %i[key partition_key].include?(val) }
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Karafka PRO consumer.
15
+ #
16
+ # If you use PRO, all your consumers should inherit (indirectly) from it.
17
+ #
18
+ # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
19
+ # after each batch is processed.
20
+ class BaseConsumer < Karafka::BaseConsumer
21
+ # Pause for tops 31 years
22
+ MAX_PAUSE_TIME = 1_000_000_000_000
23
+
24
+ private_constant :MAX_PAUSE_TIME
25
+
26
+ # Pauses processing of a given partition until we're done with the processing.
27
+ # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
+ # @note This needs to happen in the listener thread, because we cannot wait on this being
29
+ # executed in the workers. Workers may be already running some LRJ jobs that are blocking
30
+ # all the threads until finished, yet unless we pause the incoming partitions information,
31
+ # we may be kicked out of the consumer group due to not polling often enough
32
+ def on_before_enqueue
33
+ return unless topic.long_running_job?
34
+
35
+ # This ensures that when running LRJ with VP, things operate as expected run only once
36
+ # for all the virtual partitions collectively
37
+ coordinator.on_enqueued do
38
+ # Pause at the first message in a batch. That way in case of a crash, we will not loose
39
+ # any messages.
40
+ #
41
+ # For VP it applies the same way and since VP cannot be used with MOM we should not have
42
+ # any edge cases here.
43
+ pause(coordinator.seek_offset, MAX_PAUSE_TIME)
44
+ end
45
+ end
46
+
47
+ # Runs extra logic after consumption that is related to handling long-running jobs
48
+ # @note This overwrites the '#on_after_consume' from the base consumer
49
+ def on_after_consume
50
+ coordinator.on_finished do |last_group_message|
51
+ on_after_consume_regular(last_group_message)
52
+ end
53
+ end
54
+
55
+ # Trigger method for running on partition revocation.
56
+ #
57
+ # @private
58
+ def on_revoked
59
+ # We do not want to resume on revocation in case of a LRJ.
60
+ # For LRJ we resume after the successful processing or do a backoff pause in case of a
61
+ # failure. Double non-blocking resume could cause problems in coordination.
62
+ resume unless topic.long_running_job?
63
+
64
+ coordinator.revoke
65
+
66
+ Karafka.monitor.instrument('consumer.revoked', caller: self) do
67
+ revoked
68
+ end
69
+ rescue StandardError => e
70
+ Karafka.monitor.instrument(
71
+ 'error.occurred',
72
+ error: e,
73
+ caller: self,
74
+ type: 'consumer.revoked.error'
75
+ )
76
+ end
77
+
78
+ private
79
+
80
+ # Handles the post-consumption flow depending on topic settings
81
+ #
82
+ # @param last_group_message [Karafka::Messages::Message]
83
+ def on_after_consume_regular(last_group_message)
84
+ if coordinator.success?
85
+ coordinator.pause_tracker.reset
86
+
87
+ # We use the non-blocking one here. If someone needs the blocking one, can implement it
88
+ # with manual offset management
89
+ # Mark as consumed only if manual offset management is not on
90
+ mark_as_consumed(last_group_message) unless topic.manual_offset_management? || revoked?
91
+
92
+ # If this is not a long-running job there is nothing for us to do here
93
+ return unless topic.long_running_job?
94
+
95
+ seek(coordinator.seek_offset) unless revoked?
96
+
97
+ resume
98
+ else
99
+ # If processing failed, we need to pause
100
+ # For long running job this will overwrite the default never-ending pause and will cause
101
+ # the processing to keep going after the error backoff
102
+ pause(coordinator.seek_offset)
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Namespace for Karafka Pro related contracts
15
+ module Contracts
16
+ # Base contract for Pro components contracts
17
+ class Base < ::Karafka::Contracts::Base
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Contracts
15
+ # Contract for validating correct Pro components setup on a consumer group and topic levels
16
+ class ConsumerGroup < Base
17
+ virtual do |data, errors|
18
+ next unless errors.empty?
19
+ next unless data.key?(:topics)
20
+
21
+ fetched_errors = []
22
+
23
+ data.fetch(:topics).each do |topic|
24
+ ConsumerGroupTopic.new.call(topic).errors.each do |key, value|
25
+ fetched_errors << [[topic, key].flatten, value]
26
+ end
27
+ end
28
+
29
+ fetched_errors
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Contracts
15
+ # Contract for validating correct Pro components setup on a topic levels
16
+ class ConsumerGroupTopic < Base
17
+ configure do |config|
18
+ config.error_messages = YAML.safe_load(
19
+ File.read(
20
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
21
+ )
22
+ ).fetch('en').fetch('validations').fetch('pro_consumer_group_topic')
23
+ end
24
+
25
+ nested(:virtual_partitions) do
26
+ required(:active) { |val| [true, false].include?(val) }
27
+ required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
28
+ required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
29
+ end
30
+
31
+ virtual do |data, errors|
32
+ next unless errors.empty?
33
+ next if data[:consumer] < Karafka::Pro::BaseConsumer
34
+
35
+ [[%i[consumer], :consumer_format]]
36
+ end
37
+
38
+ # When virtual partitions are defined, partitioner needs to respond to `#call` and it
39
+ # cannot be nil
40
+ virtual do |data, errors|
41
+ next unless errors.empty?
42
+
43
+ virtual_partitions = data[:virtual_partitions]
44
+
45
+ next unless virtual_partitions[:active]
46
+ next if virtual_partitions[:partitioner].respond_to?(:call)
47
+
48
+ [[%i[virtual_partitions partitioner], :respond_to_call]]
49
+ end
50
+
51
+ # Make sure that manual offset management is not used together with Virtual Partitions
52
+ # This would not make any sense as there would be edge cases related to skipping
53
+ # messages even if there were errors.
54
+ virtual do |data, errors|
55
+ next unless errors.empty?
56
+
57
+ virtual_partitions = data[:virtual_partitions]
58
+ manual_offset_management = data[:manual_offset_management]
59
+
60
+ next unless virtual_partitions[:active]
61
+ next unless manual_offset_management
62
+ next if data[:tags].include?(:active_job)
63
+
64
+ [[%i[manual_offset_management], :not_with_virtual_partitions]]
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Loader requires and loads all the pro components only when they are needed
15
+ class Loader
16
+ # All the pro components that need to be loaded
17
+ COMPONENTS = %w[
18
+ base_consumer
19
+ performance_tracker
20
+ processing/scheduler
21
+ processing/jobs/consume_non_blocking
22
+ processing/jobs_builder
23
+ processing/coordinator
24
+ processing/partitioner
25
+ contracts/base
26
+ contracts/consumer_group
27
+ contracts/consumer_group_topic
28
+ routing/topic_extensions
29
+ routing/builder_extensions
30
+ active_job/consumer
31
+ active_job/dispatcher
32
+ active_job/job_options_contract
33
+ ].freeze
34
+
35
+ private_constant :COMPONENTS
36
+
37
+ class << self
38
+ # Loads all the pro components and configures them wherever it is expected
39
+ # @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
40
+ # components
41
+ def setup(config)
42
+ COMPONENTS.each { |component| require_relative(component) }
43
+
44
+ reconfigure(config)
45
+
46
+ load_routing_extensions
47
+ end
48
+
49
+ private
50
+
51
+ # Sets proper config options to use pro components
52
+ # @param config [WaterDrop::Configurable::Node] root config node
53
+ def reconfigure(config)
54
+ icfg = config.internal
55
+
56
+ icfg.processing.coordinator_class = Processing::Coordinator
57
+ icfg.processing.partitioner_class = Processing::Partitioner
58
+ icfg.processing.scheduler = Processing::Scheduler.new
59
+ icfg.processing.jobs_builder = Processing::JobsBuilder.new
60
+
61
+ icfg.active_job.consumer_class = ActiveJob::Consumer
62
+ icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
63
+ icfg.active_job.job_options_contract = ActiveJob::JobOptionsContract.new
64
+
65
+ config.monitor.subscribe(PerformanceTracker.instance)
66
+ end
67
+
68
+ # Loads routing extensions
69
+ def load_routing_extensions
70
+ ::Karafka::Routing::Topic.prepend(Routing::TopicExtensions)
71
+ ::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Tracker used to keep track of performance metrics
15
+ # It provides insights that can be used to optimize processing flow
16
+ class PerformanceTracker
17
+ include Singleton
18
+
19
+ # How many samples do we collect per topic partition
20
+ SAMPLES_COUNT = 200
21
+
22
+ private_constant :SAMPLES_COUNT
23
+
24
+ # Builds up nested concurrent hash for data tracking
25
+ def initialize
26
+ @processing_times = Concurrent::Hash.new do |topics_hash, topic|
27
+ topics_hash[topic] = Concurrent::Hash.new do |partitions_hash, partition|
28
+ # This array does not have to be concurrent because we always access single partition
29
+ # data via instrumentation that operates in a single thread via consumer
30
+ partitions_hash[partition] = []
31
+ end
32
+ end
33
+ end
34
+
35
+ # @param topic [String]
36
+ # @param partition [Integer]
37
+ # @return [Float] p95 processing time of a single message from a single topic partition
38
+ def processing_time_p95(topic, partition)
39
+ values = @processing_times[topic][partition]
40
+
41
+ return 0 if values.empty?
42
+ return values.first if values.size == 1
43
+
44
+ percentile(0.95, values)
45
+ end
46
+
47
+ # @private
48
+ # @param event [Karafka::Core::Monitoring::Event] event details
49
+ # Tracks time taken to process a single message of a given topic partition
50
+ def on_consumer_consumed(event)
51
+ consumer = event[:caller]
52
+ messages = consumer.messages
53
+ topic = messages.metadata.topic
54
+ partition = messages.metadata.partition
55
+
56
+ samples = @processing_times[topic][partition]
57
+ samples << event[:time] / messages.count
58
+
59
+ return unless samples.size > SAMPLES_COUNT
60
+
61
+ samples.shift
62
+ end
63
+
64
+ private
65
+
66
+ # Computers the requested percentile out of provided values
67
+ # @param percentile [Float]
68
+ # @param values [Array<String>] all the values based on which we should
69
+ # @return [Float] computed percentile
70
+ def percentile(percentile, values)
71
+ values_sorted = values.sort
72
+
73
+ floor = (percentile * (values_sorted.length - 1) + 1).floor - 1
74
+ mod = (percentile * (values_sorted.length - 1) + 1).modulo(1)
75
+
76
+ values_sorted[floor] + (mod * (values_sorted[floor + 1] - values_sorted[floor]))
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro coordinator that provides extra orchestration methods useful for parallel processing
16
+ # within the same partition
17
+ class Coordinator < ::Karafka::Processing::Coordinator
18
+ # @param args [Object] anything the base coordinator accepts
19
+ def initialize(*args)
20
+ super
21
+ @on_enqueued_invoked = false
22
+ @on_started_invoked = false
23
+ @on_finished_invoked = false
24
+ @flow_lock = Mutex.new
25
+ end
26
+
27
+ # Starts the coordination process
28
+ # @param messages [Array<Karafka::Messages::Message>] messages for which processing we are
29
+ # going to coordinate.
30
+ def start(messages)
31
+ super
32
+
33
+ @mutex.synchronize do
34
+ @on_enqueued_invoked = false
35
+ @on_started_invoked = false
36
+ @on_finished_invoked = false
37
+ @last_message = messages.last
38
+ end
39
+ end
40
+
41
+ # @return [Boolean] is the coordinated work finished or not
42
+ def finished?
43
+ @running_jobs.zero?
44
+ end
45
+
46
+ # Runs synchronized code once for a collective of virtual partitions prior to work being
47
+ # enqueued
48
+ def on_enqueued
49
+ @flow_lock.synchronize do
50
+ return if @on_enqueued_invoked
51
+
52
+ @on_enqueued_invoked = true
53
+
54
+ yield(@last_message)
55
+ end
56
+ end
57
+
58
+ # Runs given code only once per all the coordinated jobs upon starting first of them
59
+ def on_started
60
+ @flow_lock.synchronize do
61
+ return if @on_started_invoked
62
+
63
+ @on_started_invoked = true
64
+
65
+ yield(@last_message)
66
+ end
67
+ end
68
+
69
+ # Runs once when all the work that is suppose to be coordinated is finished
70
+ # It runs once per all the coordinated jobs and should be used to run any type of post
71
+ # jobs coordination processing execution
72
+ def on_finished
73
+ @flow_lock.synchronize do
74
+ return unless finished?
75
+ return if @on_finished_invoked
76
+
77
+ @on_finished_invoked = true
78
+
79
+ yield(@last_message)
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Pro components related to processing part of Karafka
15
+ module Processing
16
+ # Pro jobs
17
+ module Jobs
18
+ # The main job type in a non-blocking variant.
19
+ # This variant works "like" the regular consumption but pauses the partition for as long
20
+ # as it is needed until a job is done.
21
+ #
22
+ # It can be useful when having long lasting jobs that would exceed `max.poll.interval`
23
+ # if would block.
24
+ #
25
+ # @note It needs to be working with a proper consumer that will handle the partition
26
+ # management. This layer of the framework knows nothing about Kafka messages consumption.
27
+ class ConsumeNonBlocking < ::Karafka::Processing::Jobs::Consume
28
+ # Makes this job non-blocking from the start
29
+ # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Consume`
30
+ def initialize(*args)
31
+ super
32
+ @non_blocking = true
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module Processing
15
+ # Pro jobs builder that supports lrj
16
+ class JobsBuilder < ::Karafka::Processing::JobsBuilder
17
+ # @param executor [Karafka::Processing::Executor]
18
+ # @param messages [Karafka::Messages::Messages] messages batch to be consumed
19
+ # @param coordinator [Karafka::Processing::Coordinator]
20
+ # @return [Karafka::Processing::Jobs::Consume] blocking job
21
+ # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
22
+ def consume(executor, messages, coordinator)
23
+ if executor.topic.long_running_job?
24
+ Jobs::ConsumeNonBlocking.new(executor, messages, coordinator)
25
+ else
26
+ super
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end