karafka 1.4.13 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Checks the license presence for pro and loads pro components when needed (if any)
5
+ class Licenser
6
+ # Location in the gem where we store the public key
7
+ PUBLIC_KEY_LOCATION = File.join(Karafka.gem_root, 'certs', 'karafka-pro.pem')
8
+
9
+ private_constant :PUBLIC_KEY_LOCATION
10
+
11
+ # Tries to prepare license and verifies it
12
+ #
13
+ # @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
14
+ def prepare_and_verify(license_config)
15
+ prepare(license_config)
16
+ verify(license_config)
17
+ end
18
+
19
+ private
20
+
21
+ # @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
22
+ def prepare(license_config)
23
+ # If there is token, no action needed
24
+ # We support a case where someone would put the token in instead of using one from the
25
+ # license. That's in case there are limitations to using external package sources, etc
26
+ return if license_config.token
27
+
28
+ begin
29
+ license_config.token || require('karafka-license')
30
+ rescue LoadError
31
+ return
32
+ end
33
+
34
+ license_config.token = Karafka::License.token
35
+ end
36
+
37
+ # Check license and setup license details (if needed)
38
+ # @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
39
+ def verify(license_config)
40
+ # If no license, it will just run LGPL components without anything extra
41
+ return unless license_config.token
42
+
43
+ public_key = OpenSSL::PKey::RSA.new(File.read(PUBLIC_KEY_LOCATION))
44
+
45
+ # We gsub and strip in case someone copy-pasted it as a multi line string
46
+ formatted_token = license_config.token.strip.delete("\n").delete(' ')
47
+ decoded_token = Base64.decode64(formatted_token)
48
+
49
+ begin
50
+ data = public_key.public_decrypt(decoded_token)
51
+ rescue OpenSSL::OpenSSLError
52
+ data = nil
53
+ end
54
+
55
+ details = data ? JSON.parse(data) : raise_invalid_license_token(license_config)
56
+
57
+ license_config.entity = details.fetch('entity')
58
+ end
59
+
60
+ # Raises an error with info, that used token is invalid
61
+ # @param license_config [Karafka::Core::Configurable::Node]
62
+ def raise_invalid_license_token(license_config)
63
+ # We set it to false so `Karafka.pro?` method behaves as expected
64
+ license_config.token = false
65
+
66
+ raise(
67
+ Errors::InvalidLicenseTokenError,
68
+ <<~MSG.tr("\n", ' ')
69
+ License key you provided is invalid.
70
+ Please reach us at contact@karafka.io or visit https://karafka.io to obtain a valid one.
71
+ MSG
72
+ )
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # Simple batch metadata object that stores all non-message information received from Kafka
6
+ # cluster while fetching the data.
7
+ #
8
+ # @note This metadata object refers to per batch metadata, not `#message.metadata`
9
+ BatchMetadata = Struct.new(
10
+ :size,
11
+ :first_offset,
12
+ :last_offset,
13
+ :deserializer,
14
+ :partition,
15
+ :topic,
16
+ :created_at,
17
+ :scheduled_at,
18
+ :processed_at,
19
+ keyword_init: true
20
+ ) do
21
+ # This lag describes how long did it take for a message to be consumed from the moment it was
22
+ # created
23
+ def consumption_lag
24
+ time_distance_in_ms(processed_at, created_at)
25
+ end
26
+
27
+ # This lag describes how long did a batch have to wait before it was picked up by one of the
28
+ # workers
29
+ def processing_lag
30
+ time_distance_in_ms(processed_at, scheduled_at)
31
+ end
32
+
33
+ private
34
+
35
+ # Computes time distance in between two times in ms
36
+ #
37
+ # @param time1 [Time]
38
+ # @param time2 [Time]
39
+ # @return [Integer] distance in between two times in ms
40
+ def time_distance_in_ms(time1, time2)
41
+ ((time1 - time2) * 1_000).round
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ module Builders
6
+ # Builder for creating batch metadata object based on the batch informations.
7
+ module BatchMetadata
8
+ class << self
9
+ # Creates metadata based on the kafka batch data.
10
+ #
11
+ # @param messages [Array<Karafka::Messages::Message>] messages array
12
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
13
+ # @param scheduled_at [Time] moment when the batch was scheduled for processing
14
+ # @return [Karafka::Messages::BatchMetadata] batch metadata object
15
+ #
16
+ # @note We do not set `processed_at` as this needs to be assigned when the batch is
17
+ # picked up for processing.
18
+ def call(messages, topic, scheduled_at)
19
+ Karafka::Messages::BatchMetadata.new(
20
+ size: messages.count,
21
+ first_offset: messages.first.offset,
22
+ last_offset: messages.last.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: messages.first.partition,
25
+ topic: topic.name,
26
+ # We go with the assumption that the creation of the whole batch is the last message
27
+ # creation time
28
+ created_at: messages.last.timestamp,
29
+ # When this batch was built and scheduled for execution
30
+ scheduled_at: scheduled_at,
31
+ # We build the batch metadata when we pick up the job in the worker, thus we can use
32
+ # current time here
33
+ processed_at: Time.now
34
+ )
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # Builders encapsulate logic related to creating messages related objects.
6
+ module Builders
7
+ # Builder of a single message based on raw rdkafka message.
8
+ module Message
9
+ class << self
10
+ # @param kafka_message [Rdkafka::Consumer::Message] raw fetched message
11
+ # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
12
+ # @param received_at [Time] moment when we've received the message
13
+ # @return [Karafka::Messages::Message] message object with payload and metadata
14
+ def call(kafka_message, topic, received_at)
15
+ # @see https://github.com/appsignal/rdkafka-ruby/issues/168
16
+ kafka_message.headers.transform_keys!(&:to_s)
17
+
18
+ metadata = Karafka::Messages::Metadata.new(
19
+ timestamp: kafka_message.timestamp,
20
+ headers: kafka_message.headers,
21
+ key: kafka_message.key,
22
+ offset: kafka_message.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: kafka_message.partition,
25
+ topic: topic.name,
26
+ received_at: received_at
27
+ ).freeze
28
+
29
+ # Karafka messages cannot be frozen because of the lazy deserialization feature
30
+ Karafka::Messages::Message.new(
31
+ kafka_message.payload,
32
+ metadata
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ module Builders
6
+ # Builder for creating message batch instances.
7
+ module Messages
8
+ class << self
9
+ # Creates messages batch with messages inside based on the incoming messages and the
10
+ # topic from which it comes.
11
+ #
12
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages array
13
+ # @param topic [Karafka::Routing::Topic] topic for which we're received messages
14
+ # @param received_at [Time] moment in time when the messages were received
15
+ # @return [Karafka::Messages::Messages] messages batch object
16
+ def call(messages, topic, received_at)
17
+ metadata = BatchMetadata.call(
18
+ messages,
19
+ topic,
20
+ received_at
21
+ ).freeze
22
+
23
+ Karafka::Messages::Messages.new(
24
+ messages,
25
+ metadata
26
+ ).freeze
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- # Params namespace encapsulating all the logic that is directly related to params handling
5
- module Params
4
+ # Messages namespace encapsulating all the logic that is directly related to messages handling
5
+ module Messages
6
6
  # It provides lazy loading not only until the first usage, but also allows us to skip
7
7
  # using deserializer until we execute our logic. That way we can operate with
8
8
  # heavy-deserialization data without slowing down the whole application.
9
- class Params
9
+ class Message
10
10
  extend Forwardable
11
11
 
12
12
  attr_reader :raw_payload, :metadata
@@ -14,7 +14,7 @@ module Karafka
14
14
  def_delegators :metadata, *Metadata.members
15
15
 
16
16
  # @param raw_payload [Object] incoming payload before deserialization
17
- # @param metadata [Karafka::Params::Metadata] message metadata object
17
+ # @param metadata [Karafka::Messages::Metadata] message metadata object
18
18
  def initialize(raw_payload, metadata)
19
19
  @raw_payload = raw_payload
20
20
  @metadata = metadata
@@ -33,21 +33,16 @@ module Karafka
33
33
  @payload
34
34
  end
35
35
 
36
- # @return [Boolean] did given params payload were deserialized already
36
+ # @return [Boolean] did we deserialize payload already
37
37
  def deserialized?
38
38
  @deserialized
39
39
  end
40
40
 
41
41
  private
42
42
 
43
- # @return [Object] tries de-serializes data
43
+ # @return [Object] deserialized data
44
44
  def deserialize
45
- Karafka.monitor.instrument('params.params.deserialize', caller: self) do
46
- metadata.deserializer.call(self)
47
- end
48
- rescue ::StandardError => e
49
- Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
50
- raise e
45
+ metadata.deserializer.call(self)
51
46
  end
52
47
  end
53
48
  end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # Messages batch represents a set of messages received from Kafka of a single topic partition.
6
+ class Messages
7
+ include Enumerable
8
+
9
+ attr_reader :metadata
10
+
11
+ # @param messages_array [Array<Karafka::Messages::Message>] array with karafka messages
12
+ # @param metadata [Karafka::Messages::BatchMetadata]
13
+ # @return [Karafka::Messages::Messages] lazy evaluated messages batch object
14
+ def initialize(messages_array, metadata)
15
+ @messages_array = messages_array
16
+ @metadata = metadata
17
+ end
18
+
19
+ # @param block [Proc] block we want to execute per each message
20
+ # @note Invocation of this method will not cause loading and deserializing of messages.
21
+ def each(&block)
22
+ @messages_array.each(&block)
23
+ end
24
+
25
+ # Runs deserialization of all the messages and returns them
26
+ # @return [Array<Karafka::Messages::Message>]
27
+ def deserialize!
28
+ each(&:payload)
29
+ end
30
+
31
+ # @return [Array<Object>] array with deserialized payloads. This method can be useful when
32
+ # we don't care about metadata and just want to extract all the data payloads from the
33
+ # batch
34
+ def payloads
35
+ map(&:payload)
36
+ end
37
+
38
+ # @return [Array<String>] array with raw, not deserialized payloads
39
+ def raw_payloads
40
+ map(&:raw_payload)
41
+ end
42
+
43
+ # @return [Karafka::Messages::Message] first message
44
+ def first
45
+ @messages_array.first
46
+ end
47
+
48
+ # @return [Karafka::Messages::Message] last message
49
+ def last
50
+ @messages_array.last
51
+ end
52
+
53
+ # @return [Integer] number of messages in the batch
54
+ def size
55
+ @messages_array.size
56
+ end
57
+
58
+ # @return [Array<Karafka::Messages::Message>] pure array with messages
59
+ def to_a
60
+ @messages_array
61
+ end
62
+ end
63
+ end
64
+ end
@@ -1,18 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- module Params
5
- # Single message / params metadata details that can be accessed without the need for the
6
- # payload deserialization
4
+ module Messages
5
+ # Single message metadata details that can be accessed without the need of deserialization.
7
6
  Metadata = Struct.new(
8
- :create_time,
7
+ :timestamp,
9
8
  :headers,
10
- :is_control_record,
11
9
  :key,
12
10
  :offset,
13
11
  :deserializer,
14
12
  :partition,
15
- :receive_time,
13
+ :received_at,
16
14
  :topic,
17
15
  keyword_init: true
18
16
  )
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # "Fake" message that we use as an abstraction layer when seeking back.
6
+ # This allows us to encapsulate a seek with a simple abstraction
7
+ Seek = Struct.new(:topic, :partition, :offset)
8
+ end
9
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Patches to external components
5
+ module Patches
6
+ # Rdkafka related patches
7
+ module Rdkafka
8
+ # Rdkafka::Consumer patches
9
+ module Consumer
10
+ # A method that allows us to get the native kafka producer name
11
+ # @return [String] producer instance name
12
+ # @note We need this to make sure that we allocate proper dispatched events only to
13
+ # callback listeners that should publish them
14
+ def name
15
+ @name ||= ::Rdkafka::Bindings.rd_kafka_name(@native_kafka)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+
22
+ ::Rdkafka::Consumer.include ::Karafka::Patches::Rdkafka::Consumer
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module ActiveJob
15
+ # Pro ActiveJob consumer that is suppose to handle long-running jobs as well as short
16
+ # running jobs
17
+ #
18
+ # When in LRJ, it will pause a given partition forever and will resume its processing only
19
+ # when all the jobs are done processing.
20
+ #
21
+ # It contains slightly better revocation warranties than the regular blocking consumer as
22
+ # it can stop processing batch of jobs in the middle after the revocation.
23
+ class Consumer < Karafka::Pro::BaseConsumer
24
+ # Runs ActiveJob jobs processing and handles lrj if needed
25
+ def consume
26
+ messages.each do |message|
27
+ # If for any reason we've lost this partition, not worth iterating over new messages
28
+ # as they are no longer ours
29
+ break if revoked?
30
+ break if Karafka::App.stopping?
31
+
32
+ ::ActiveJob::Base.execute(
33
+ ::ActiveSupport::JSON.decode(message.raw_payload)
34
+ )
35
+
36
+ # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
37
+ # this could create random markings
38
+ next if topic.virtual_partitioner?
39
+
40
+ mark_as_consumed(message)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Karafka Pro ActiveJob components
15
+ module ActiveJob
16
+ # Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
17
+ # and that allows to inject additional options into the producer, effectively allowing for a
18
+ # much better and more granular control over the dispatch and consumption process.
19
+ class Dispatcher < ::Karafka::ActiveJob::Dispatcher
20
+ # Defaults for dispatching
21
+ # They can be updated by using `#karafka_options` on the job
22
+ DEFAULTS = {
23
+ dispatch_method: :produce_async,
24
+ # We don't create a dummy proc based partitioner as we would have to evaluate it with
25
+ # each job.
26
+ partitioner: nil,
27
+ # Allows for usage of `:key` or `:partition_key`
28
+ partition_key_type: :key
29
+ }.freeze
30
+
31
+ private_constant :DEFAULTS
32
+
33
+ # @param job [ActiveJob::Base] job
34
+ def call(job)
35
+ ::Karafka.producer.public_send(
36
+ fetch_option(job, :dispatch_method, DEFAULTS),
37
+ dispatch_details(job).merge!(
38
+ topic: job.queue_name,
39
+ payload: ::ActiveSupport::JSON.encode(job.serialize)
40
+ )
41
+ )
42
+ end
43
+
44
+ private
45
+
46
+ # @param job [ActiveJob::Base] job instance
47
+ # @return [Hash] hash with dispatch details to which we merge topic and payload
48
+ def dispatch_details(job)
49
+ partitioner = fetch_option(job, :partitioner, DEFAULTS)
50
+ key_type = fetch_option(job, :partition_key_type, DEFAULTS)
51
+
52
+ return {} unless partitioner
53
+
54
+ {
55
+ key_type => partitioner.call(job)
56
+ }
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module ActiveJob
15
+ # Contract for validating the options that can be altered with `#karafka_options` per job
16
+ # class that works with Pro features.
17
+ class JobOptionsContract < Contracts::Base
18
+ configure do |config|
19
+ config.error_messages = YAML.safe_load(
20
+ File.read(
21
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
22
+ )
23
+ ).fetch('en').fetch('validations').fetch('job_options')
24
+ end
25
+
26
+ optional(:dispatch_method) { |val| %i[produce_async produce_sync].include?(val) }
27
+ optional(:partitioner) { |val| val.respond_to?(:call) }
28
+ optional(:partition_key_type) { |val| %i[key partition_key].include?(val) }
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Karafka PRO consumer.
15
+ #
16
+ # If you use PRO, all your consumers should inherit (indirectly) from it.
17
+ #
18
+ # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
19
+ # after each batch is processed.
20
+ class BaseConsumer < Karafka::BaseConsumer
21
+ # Pause for tops 31 years
22
+ MAX_PAUSE_TIME = 1_000_000_000_000
23
+
24
+ private_constant :MAX_PAUSE_TIME
25
+
26
+ # Pauses processing of a given partition until we're done with the processing
27
+ # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
+ def on_before_consume
29
+ return unless topic.long_running_job?
30
+
31
+ # This ensures, that when running LRJ with VP, things operate as expected
32
+ coordinator.on_started do |first_group_message|
33
+ # Pause at the first message in a batch. That way in case of a crash, we will not loose
34
+ # any messages
35
+ pause(first_group_message.offset, MAX_PAUSE_TIME)
36
+ end
37
+ end
38
+
39
+ # Runs extra logic after consumption that is related to handling long-running jobs
40
+ # @note This overwrites the '#on_after_consume' from the base consumer
41
+ def on_after_consume
42
+ coordinator.on_finished do |first_group_message, last_group_message|
43
+ on_after_consume_regular(first_group_message, last_group_message)
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ # Handles the post-consumption flow depending on topic settings
50
+ #
51
+ # @param first_message [Karafka::Messages::Message]
52
+ # @param last_message [Karafka::Messages::Message]
53
+ def on_after_consume_regular(first_message, last_message)
54
+ if coordinator.success?
55
+ coordinator.pause_tracker.reset
56
+
57
+ # We use the non-blocking one here. If someone needs the blocking one, can implement it
58
+ # with manual offset management
59
+ # Mark as consumed only if manual offset management is not on
60
+ mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
61
+
62
+ # If this is not a long-running job there is nothing for us to do here
63
+ return unless topic.long_running_job?
64
+
65
+ # Once processing is done, we move to the new offset based on commits
66
+ # Here, in case manual offset management is off, we have the new proper offset of a
67
+ # first message from another batch from `@seek_offset`. If manual offset management
68
+ # is on, we move to place where the user indicated it was finished. This can create an
69
+ # interesting (yet valid) corner case, where with manual offset management on and no
70
+ # marking as consumed, we end up with an infinite loop processing same messages over and
71
+ # over again
72
+ seek(@seek_offset || first_message.offset)
73
+
74
+ resume
75
+ else
76
+ # If processing failed, we need to pause
77
+ pause(@seek_offset || first_message.offset)
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Namespace for Karafka Pro related contracts
15
+ module Contracts
16
+ # Base contract for Pro components contracts
17
+ class Base < ::Karafka::Contracts::Base
18
+ end
19
+ end
20
+ end
21
+ end