karafka 1.4.13 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Checks the license presence for pro and loads pro components when needed (if any)
5
+ class Licenser
6
+ # Location in the gem where we store the public key
7
+ PUBLIC_KEY_LOCATION = File.join(Karafka.gem_root, 'certs', 'karafka-pro.pem')
8
+
9
+ private_constant :PUBLIC_KEY_LOCATION
10
+
11
+ # Tries to prepare license and verifies it
12
+ #
13
+ # @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
14
+ def prepare_and_verify(license_config)
15
+ prepare(license_config)
16
+ verify(license_config)
17
+ end
18
+
19
+ private
20
+
21
+ # @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
22
+ def prepare(license_config)
23
+ # If there is token, no action needed
24
+ # We support a case where someone would put the token in instead of using one from the
25
+ # license. That's in case there are limitations to using external package sources, etc
26
+ return if license_config.token
27
+
28
+ begin
29
+ license_config.token || require('karafka-license')
30
+ rescue LoadError
31
+ return
32
+ end
33
+
34
+ license_config.token = Karafka::License.token
35
+ end
36
+
37
+ # Check license and setup license details (if needed)
38
+ # @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
39
+ def verify(license_config)
40
+ # If no license, it will just run LGPL components without anything extra
41
+ return unless license_config.token
42
+
43
+ public_key = OpenSSL::PKey::RSA.new(File.read(PUBLIC_KEY_LOCATION))
44
+
45
+ # We gsub and strip in case someone copy-pasted it as a multi line string
46
+ formatted_token = license_config.token.strip.delete("\n").delete(' ')
47
+ decoded_token = Base64.decode64(formatted_token)
48
+
49
+ begin
50
+ data = public_key.public_decrypt(decoded_token)
51
+ rescue OpenSSL::OpenSSLError
52
+ data = nil
53
+ end
54
+
55
+ details = data ? JSON.parse(data) : raise_invalid_license_token(license_config)
56
+
57
+ license_config.entity = details.fetch('entity')
58
+ end
59
+
60
+ # Raises an error with info, that used token is invalid
61
+ # @param license_config [Karafka::Core::Configurable::Node]
62
+ def raise_invalid_license_token(license_config)
63
+ # We set it to false so `Karafka.pro?` method behaves as expected
64
+ license_config.token = false
65
+
66
+ raise(
67
+ Errors::InvalidLicenseTokenError,
68
+ <<~MSG.tr("\n", ' ')
69
+ License key you provided is invalid.
70
+ Please reach us at contact@karafka.io or visit https://karafka.io to obtain a valid one.
71
+ MSG
72
+ )
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # Simple batch metadata object that stores all non-message information received from Kafka
6
+ # cluster while fetching the data.
7
+ #
8
+ # @note This metadata object refers to per batch metadata, not `#message.metadata`
9
+ BatchMetadata = Struct.new(
10
+ :size,
11
+ :first_offset,
12
+ :last_offset,
13
+ :deserializer,
14
+ :partition,
15
+ :topic,
16
+ :created_at,
17
+ :scheduled_at,
18
+ :processed_at,
19
+ keyword_init: true
20
+ ) do
21
+ # This lag describes how long did it take for a message to be consumed from the moment it was
22
+ # created
23
+ def consumption_lag
24
+ time_distance_in_ms(processed_at, created_at)
25
+ end
26
+
27
+ # This lag describes how long did a batch have to wait before it was picked up by one of the
28
+ # workers
29
+ def processing_lag
30
+ time_distance_in_ms(processed_at, scheduled_at)
31
+ end
32
+
33
+ private
34
+
35
+ # Computes time distance in between two times in ms
36
+ #
37
+ # @param time1 [Time]
38
+ # @param time2 [Time]
39
+ # @return [Integer] distance in between two times in ms
40
+ def time_distance_in_ms(time1, time2)
41
+ ((time1 - time2) * 1_000).round
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ module Builders
6
+ # Builder for creating batch metadata object based on the batch informations.
7
+ module BatchMetadata
8
+ class << self
9
+ # Creates metadata based on the kafka batch data.
10
+ #
11
+ # @param messages [Array<Karafka::Messages::Message>] messages array
12
+ # @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
13
+ # @param scheduled_at [Time] moment when the batch was scheduled for processing
14
+ # @return [Karafka::Messages::BatchMetadata] batch metadata object
15
+ #
16
+ # @note We do not set `processed_at` as this needs to be assigned when the batch is
17
+ # picked up for processing.
18
+ def call(messages, topic, scheduled_at)
19
+ Karafka::Messages::BatchMetadata.new(
20
+ size: messages.count,
21
+ first_offset: messages.first.offset,
22
+ last_offset: messages.last.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: messages.first.partition,
25
+ topic: topic.name,
26
+ # We go with the assumption that the creation of the whole batch is the last message
27
+ # creation time
28
+ created_at: messages.last.timestamp,
29
+ # When this batch was built and scheduled for execution
30
+ scheduled_at: scheduled_at,
31
+ # We build the batch metadata when we pick up the job in the worker, thus we can use
32
+ # current time here
33
+ processed_at: Time.now
34
+ )
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # Builders encapsulate logic related to creating messages related objects.
6
+ module Builders
7
+ # Builder of a single message based on raw rdkafka message.
8
+ module Message
9
+ class << self
10
+ # @param kafka_message [Rdkafka::Consumer::Message] raw fetched message
11
+ # @param topic [Karafka::Routing::Topic] topic for which this message was fetched
12
+ # @param received_at [Time] moment when we've received the message
13
+ # @return [Karafka::Messages::Message] message object with payload and metadata
14
+ def call(kafka_message, topic, received_at)
15
+ # @see https://github.com/appsignal/rdkafka-ruby/issues/168
16
+ kafka_message.headers.transform_keys!(&:to_s)
17
+
18
+ metadata = Karafka::Messages::Metadata.new(
19
+ timestamp: kafka_message.timestamp,
20
+ headers: kafka_message.headers,
21
+ key: kafka_message.key,
22
+ offset: kafka_message.offset,
23
+ deserializer: topic.deserializer,
24
+ partition: kafka_message.partition,
25
+ topic: topic.name,
26
+ received_at: received_at
27
+ ).freeze
28
+
29
+ # Karafka messages cannot be frozen because of the lazy deserialization feature
30
+ Karafka::Messages::Message.new(
31
+ kafka_message.payload,
32
+ metadata
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ module Builders
6
+ # Builder for creating message batch instances.
7
+ module Messages
8
+ class << self
9
+ # Creates messages batch with messages inside based on the incoming messages and the
10
+ # topic from which it comes.
11
+ #
12
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages array
13
+ # @param topic [Karafka::Routing::Topic] topic for which we're received messages
14
+ # @param received_at [Time] moment in time when the messages were received
15
+ # @return [Karafka::Messages::Messages] messages batch object
16
+ def call(messages, topic, received_at)
17
+ metadata = BatchMetadata.call(
18
+ messages,
19
+ topic,
20
+ received_at
21
+ ).freeze
22
+
23
+ Karafka::Messages::Messages.new(
24
+ messages,
25
+ metadata
26
+ ).freeze
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- # Params namespace encapsulating all the logic that is directly related to params handling
5
- module Params
4
+ # Messages namespace encapsulating all the logic that is directly related to messages handling
5
+ module Messages
6
6
  # It provides lazy loading not only until the first usage, but also allows us to skip
7
7
  # using deserializer until we execute our logic. That way we can operate with
8
8
  # heavy-deserialization data without slowing down the whole application.
9
- class Params
9
+ class Message
10
10
  extend Forwardable
11
11
 
12
12
  attr_reader :raw_payload, :metadata
@@ -14,7 +14,7 @@ module Karafka
14
14
  def_delegators :metadata, *Metadata.members
15
15
 
16
16
  # @param raw_payload [Object] incoming payload before deserialization
17
- # @param metadata [Karafka::Params::Metadata] message metadata object
17
+ # @param metadata [Karafka::Messages::Metadata] message metadata object
18
18
  def initialize(raw_payload, metadata)
19
19
  @raw_payload = raw_payload
20
20
  @metadata = metadata
@@ -33,21 +33,16 @@ module Karafka
33
33
  @payload
34
34
  end
35
35
 
36
- # @return [Boolean] did given params payload were deserialized already
36
+ # @return [Boolean] did we deserialize payload already
37
37
  def deserialized?
38
38
  @deserialized
39
39
  end
40
40
 
41
41
  private
42
42
 
43
- # @return [Object] tries de-serializes data
43
+ # @return [Object] deserialized data
44
44
  def deserialize
45
- Karafka.monitor.instrument('params.params.deserialize', caller: self) do
46
- metadata.deserializer.call(self)
47
- end
48
- rescue ::StandardError => e
49
- Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
50
- raise e
45
+ metadata.deserializer.call(self)
51
46
  end
52
47
  end
53
48
  end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # Messages batch represents a set of messages received from Kafka of a single topic partition.
6
+ class Messages
7
+ include Enumerable
8
+
9
+ attr_reader :metadata
10
+
11
+ # @param messages_array [Array<Karafka::Messages::Message>] array with karafka messages
12
+ # @param metadata [Karafka::Messages::BatchMetadata]
13
+ # @return [Karafka::Messages::Messages] lazy evaluated messages batch object
14
+ def initialize(messages_array, metadata)
15
+ @messages_array = messages_array
16
+ @metadata = metadata
17
+ end
18
+
19
+ # @param block [Proc] block we want to execute per each message
20
+ # @note Invocation of this method will not cause loading and deserializing of messages.
21
+ def each(&block)
22
+ @messages_array.each(&block)
23
+ end
24
+
25
+ # Runs deserialization of all the messages and returns them
26
+ # @return [Array<Karafka::Messages::Message>]
27
+ def deserialize!
28
+ each(&:payload)
29
+ end
30
+
31
+ # @return [Array<Object>] array with deserialized payloads. This method can be useful when
32
+ # we don't care about metadata and just want to extract all the data payloads from the
33
+ # batch
34
+ def payloads
35
+ map(&:payload)
36
+ end
37
+
38
+ # @return [Array<String>] array with raw, not deserialized payloads
39
+ def raw_payloads
40
+ map(&:raw_payload)
41
+ end
42
+
43
+ # @return [Karafka::Messages::Message] first message
44
+ def first
45
+ @messages_array.first
46
+ end
47
+
48
+ # @return [Karafka::Messages::Message] last message
49
+ def last
50
+ @messages_array.last
51
+ end
52
+
53
+ # @return [Integer] number of messages in the batch
54
+ def size
55
+ @messages_array.size
56
+ end
57
+
58
+ # @return [Array<Karafka::Messages::Message>] pure array with messages
59
+ def to_a
60
+ @messages_array
61
+ end
62
+ end
63
+ end
64
+ end
@@ -1,18 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- module Params
5
- # Single message / params metadata details that can be accessed without the need for the
6
- # payload deserialization
4
+ module Messages
5
+ # Single message metadata details that can be accessed without the need of deserialization.
7
6
  Metadata = Struct.new(
8
- :create_time,
7
+ :timestamp,
9
8
  :headers,
10
- :is_control_record,
11
9
  :key,
12
10
  :offset,
13
11
  :deserializer,
14
12
  :partition,
15
- :receive_time,
13
+ :received_at,
16
14
  :topic,
17
15
  keyword_init: true
18
16
  )
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Messages
5
+ # "Fake" message that we use as an abstraction layer when seeking back.
6
+ # This allows us to encapsulate a seek with a simple abstraction
7
+ Seek = Struct.new(:topic, :partition, :offset)
8
+ end
9
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Patches to external components
5
+ module Patches
6
+ # Rdkafka related patches
7
+ module Rdkafka
8
+ # Rdkafka::Consumer patches
9
+ module Consumer
10
+ # A method that allows us to get the native kafka producer name
11
+ # @return [String] producer instance name
12
+ # @note We need this to make sure that we allocate proper dispatched events only to
13
+ # callback listeners that should publish them
14
+ def name
15
+ @name ||= ::Rdkafka::Bindings.rd_kafka_name(@native_kafka)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+
22
+ ::Rdkafka::Consumer.include ::Karafka::Patches::Rdkafka::Consumer
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module ActiveJob
15
+ # Pro ActiveJob consumer that is suppose to handle long-running jobs as well as short
16
+ # running jobs
17
+ #
18
+ # When in LRJ, it will pause a given partition forever and will resume its processing only
19
+ # when all the jobs are done processing.
20
+ #
21
+ # It contains slightly better revocation warranties than the regular blocking consumer as
22
+ # it can stop processing batch of jobs in the middle after the revocation.
23
+ class Consumer < Karafka::Pro::BaseConsumer
24
+ # Runs ActiveJob jobs processing and handles lrj if needed
25
+ def consume
26
+ messages.each do |message|
27
+ # If for any reason we've lost this partition, not worth iterating over new messages
28
+ # as they are no longer ours
29
+ break if revoked?
30
+ break if Karafka::App.stopping?
31
+
32
+ ::ActiveJob::Base.execute(
33
+ ::ActiveSupport::JSON.decode(message.raw_payload)
34
+ )
35
+
36
+ # We cannot mark jobs as done after each if there are virtual partitions. Otherwise
37
+ # this could create random markings
38
+ next if topic.virtual_partitioner?
39
+
40
+ mark_as_consumed(message)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Karafka Pro ActiveJob components
15
+ module ActiveJob
16
+ # Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
17
+ # and that allows to inject additional options into the producer, effectively allowing for a
18
+ # much better and more granular control over the dispatch and consumption process.
19
+ class Dispatcher < ::Karafka::ActiveJob::Dispatcher
20
+ # Defaults for dispatching
21
+ # They can be updated by using `#karafka_options` on the job
22
+ DEFAULTS = {
23
+ dispatch_method: :produce_async,
24
+ # We don't create a dummy proc based partitioner as we would have to evaluate it with
25
+ # each job.
26
+ partitioner: nil,
27
+ # Allows for usage of `:key` or `:partition_key`
28
+ partition_key_type: :key
29
+ }.freeze
30
+
31
+ private_constant :DEFAULTS
32
+
33
+ # @param job [ActiveJob::Base] job
34
+ def call(job)
35
+ ::Karafka.producer.public_send(
36
+ fetch_option(job, :dispatch_method, DEFAULTS),
37
+ dispatch_details(job).merge!(
38
+ topic: job.queue_name,
39
+ payload: ::ActiveSupport::JSON.encode(job.serialize)
40
+ )
41
+ )
42
+ end
43
+
44
+ private
45
+
46
+ # @param job [ActiveJob::Base] job instance
47
+ # @return [Hash] hash with dispatch details to which we merge topic and payload
48
+ def dispatch_details(job)
49
+ partitioner = fetch_option(job, :partitioner, DEFAULTS)
50
+ key_type = fetch_option(job, :partition_key_type, DEFAULTS)
51
+
52
+ return {} unless partitioner
53
+
54
+ {
55
+ key_type => partitioner.call(job)
56
+ }
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ module ActiveJob
15
+ # Contract for validating the options that can be altered with `#karafka_options` per job
16
+ # class that works with Pro features.
17
+ class JobOptionsContract < Contracts::Base
18
+ configure do |config|
19
+ config.error_messages = YAML.safe_load(
20
+ File.read(
21
+ File.join(Karafka.gem_root, 'config', 'errors.yml')
22
+ )
23
+ ).fetch('en').fetch('validations').fetch('job_options')
24
+ end
25
+
26
+ optional(:dispatch_method) { |val| %i[produce_async produce_sync].include?(val) }
27
+ optional(:partitioner) { |val| val.respond_to?(:call) }
28
+ optional(:partition_key_type) { |val| %i[key partition_key].include?(val) }
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Karafka PRO consumer.
15
+ #
16
+ # If you use PRO, all your consumers should inherit (indirectly) from it.
17
+ #
18
+ # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
19
+ # after each batch is processed.
20
+ class BaseConsumer < Karafka::BaseConsumer
21
+ # Pause for tops 31 years
22
+ MAX_PAUSE_TIME = 1_000_000_000_000
23
+
24
+ private_constant :MAX_PAUSE_TIME
25
+
26
+ # Pauses processing of a given partition until we're done with the processing
27
+ # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
+ def on_before_consume
29
+ return unless topic.long_running_job?
30
+
31
+ # This ensures, that when running LRJ with VP, things operate as expected
32
+ coordinator.on_started do |first_group_message|
33
+ # Pause at the first message in a batch. That way in case of a crash, we will not loose
34
+ # any messages
35
+ pause(first_group_message.offset, MAX_PAUSE_TIME)
36
+ end
37
+ end
38
+
39
+ # Runs extra logic after consumption that is related to handling long-running jobs
40
+ # @note This overwrites the '#on_after_consume' from the base consumer
41
+ def on_after_consume
42
+ coordinator.on_finished do |first_group_message, last_group_message|
43
+ on_after_consume_regular(first_group_message, last_group_message)
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ # Handles the post-consumption flow depending on topic settings
50
+ #
51
+ # @param first_message [Karafka::Messages::Message]
52
+ # @param last_message [Karafka::Messages::Message]
53
+ def on_after_consume_regular(first_message, last_message)
54
+ if coordinator.success?
55
+ coordinator.pause_tracker.reset
56
+
57
+ # We use the non-blocking one here. If someone needs the blocking one, can implement it
58
+ # with manual offset management
59
+ # Mark as consumed only if manual offset management is not on
60
+ mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
61
+
62
+ # If this is not a long-running job there is nothing for us to do here
63
+ return unless topic.long_running_job?
64
+
65
+ # Once processing is done, we move to the new offset based on commits
66
+ # Here, in case manual offset management is off, we have the new proper offset of a
67
+ # first message from another batch from `@seek_offset`. If manual offset management
68
+ # is on, we move to place where the user indicated it was finished. This can create an
69
+ # interesting (yet valid) corner case, where with manual offset management on and no
70
+ # marking as consumed, we end up with an infinite loop processing same messages over and
71
+ # over again
72
+ seek(@seek_offset || first_message.offset)
73
+
74
+ resume
75
+ else
76
+ # If processing failed, we need to pause
77
+ pause(@seek_offset || first_message.offset)
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component.
4
+ # All of the commercial components are present in the lib/karafka/pro directory of this
5
+ # repository and their usage requires commercial license agreement.
6
+ #
7
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
8
+ #
9
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
10
+ # your code to Maciej Mensfeld.
11
+
12
+ module Karafka
13
+ module Pro
14
+ # Namespace for Karafka Pro related contracts
15
+ module Contracts
16
+ # Base contract for Pro components contracts
17
+ class Base < ::Karafka::Contracts::Base
18
+ end
19
+ end
20
+ end
21
+ end