karafka 1.4.13 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -3
- data/.github/workflows/ci.yml +85 -30
- data/.ruby-version +1 -1
- data/CHANGELOG.md +268 -7
- data/CONTRIBUTING.md +10 -19
- data/Gemfile +6 -0
- data/Gemfile.lock +44 -87
- data/LICENSE +17 -0
- data/LICENSE-COMM +89 -0
- data/LICENSE-LGPL +165 -0
- data/README.md +44 -48
- data/bin/benchmarks +85 -0
- data/bin/create_token +22 -0
- data/bin/integrations +237 -0
- data/bin/karafka +4 -0
- data/bin/scenario +29 -0
- data/bin/stress_many +13 -0
- data/bin/stress_one +13 -0
- data/bin/wait_for_kafka +20 -0
- data/certs/karafka-pro.pem +11 -0
- data/config/errors.yml +55 -40
- data/docker-compose.yml +39 -3
- data/karafka.gemspec +11 -17
- data/lib/active_job/karafka.rb +21 -0
- data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
- data/lib/karafka/active_job/consumer.rb +26 -0
- data/lib/karafka/active_job/dispatcher.rb +38 -0
- data/lib/karafka/active_job/job_extensions.rb +34 -0
- data/lib/karafka/active_job/job_options_contract.rb +21 -0
- data/lib/karafka/active_job/routing/extensions.rb +31 -0
- data/lib/karafka/app.rb +15 -20
- data/lib/karafka/base_consumer.rb +181 -31
- data/lib/karafka/cli/base.rb +4 -4
- data/lib/karafka/cli/info.rb +43 -9
- data/lib/karafka/cli/install.rb +19 -10
- data/lib/karafka/cli/server.rb +17 -42
- data/lib/karafka/cli.rb +4 -11
- data/lib/karafka/connection/client.rb +385 -90
- data/lib/karafka/connection/listener.rb +246 -38
- data/lib/karafka/connection/listeners_batch.rb +24 -0
- data/lib/karafka/connection/messages_buffer.rb +84 -0
- data/lib/karafka/connection/pauses_manager.rb +46 -0
- data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
- data/lib/karafka/connection/rebalance_manager.rb +78 -0
- data/lib/karafka/contracts/base.rb +17 -0
- data/lib/karafka/contracts/config.rb +88 -11
- data/lib/karafka/contracts/consumer_group.rb +21 -189
- data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
- data/lib/karafka/contracts/server_cli_options.rb +19 -18
- data/lib/karafka/contracts.rb +1 -1
- data/lib/karafka/env.rb +46 -0
- data/lib/karafka/errors.rb +21 -21
- data/lib/karafka/helpers/async.rb +33 -0
- data/lib/karafka/helpers/colorize.rb +20 -0
- data/lib/karafka/helpers/multi_delegator.rb +2 -2
- data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
- data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
- data/lib/karafka/instrumentation/logger_listener.rb +164 -0
- data/lib/karafka/instrumentation/monitor.rb +13 -61
- data/lib/karafka/instrumentation/notifications.rb +52 -0
- data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
- data/lib/karafka/instrumentation.rb +21 -0
- data/lib/karafka/licenser.rb +75 -0
- data/lib/karafka/messages/batch_metadata.rb +45 -0
- data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
- data/lib/karafka/messages/builders/message.rb +39 -0
- data/lib/karafka/messages/builders/messages.rb +32 -0
- data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
- data/lib/karafka/messages/messages.rb +64 -0
- data/lib/karafka/{params → messages}/metadata.rb +4 -6
- data/lib/karafka/messages/seek.rb +9 -0
- data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
- data/lib/karafka/pro/active_job/consumer.rb +46 -0
- data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
- data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
- data/lib/karafka/pro/base_consumer.rb +82 -0
- data/lib/karafka/pro/contracts/base.rb +21 -0
- data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
- data/lib/karafka/pro/loader.rb +76 -0
- data/lib/karafka/pro/performance_tracker.rb +80 -0
- data/lib/karafka/pro/processing/coordinator.rb +72 -0
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
- data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
- data/lib/karafka/pro/processing/partitioner.rb +60 -0
- data/lib/karafka/pro/processing/scheduler.rb +56 -0
- data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
- data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
- data/lib/karafka/pro.rb +13 -0
- data/lib/karafka/process.rb +1 -0
- data/lib/karafka/processing/coordinator.rb +88 -0
- data/lib/karafka/processing/coordinators_buffer.rb +54 -0
- data/lib/karafka/processing/executor.rb +118 -0
- data/lib/karafka/processing/executors_buffer.rb +88 -0
- data/lib/karafka/processing/jobs/base.rb +51 -0
- data/lib/karafka/processing/jobs/consume.rb +42 -0
- data/lib/karafka/processing/jobs/revoked.rb +22 -0
- data/lib/karafka/processing/jobs/shutdown.rb +23 -0
- data/lib/karafka/processing/jobs_builder.rb +29 -0
- data/lib/karafka/processing/jobs_queue.rb +144 -0
- data/lib/karafka/processing/partitioner.rb +22 -0
- data/lib/karafka/processing/result.rb +29 -0
- data/lib/karafka/processing/scheduler.rb +22 -0
- data/lib/karafka/processing/worker.rb +88 -0
- data/lib/karafka/processing/workers_batch.rb +27 -0
- data/lib/karafka/railtie.rb +113 -0
- data/lib/karafka/routing/builder.rb +15 -24
- data/lib/karafka/routing/consumer_group.rb +11 -19
- data/lib/karafka/routing/consumer_mapper.rb +1 -2
- data/lib/karafka/routing/router.rb +1 -1
- data/lib/karafka/routing/subscription_group.rb +53 -0
- data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
- data/lib/karafka/routing/topic.rb +61 -24
- data/lib/karafka/routing/topics.rb +38 -0
- data/lib/karafka/runner.rb +51 -0
- data/lib/karafka/serialization/json/deserializer.rb +6 -15
- data/lib/karafka/server.rb +67 -26
- data/lib/karafka/setup/config.rb +147 -175
- data/lib/karafka/status.rb +14 -5
- data/lib/karafka/templates/example_consumer.rb.erb +16 -0
- data/lib/karafka/templates/karafka.rb.erb +15 -51
- data/lib/karafka/time_trackers/base.rb +19 -0
- data/lib/karafka/time_trackers/pause.rb +92 -0
- data/lib/karafka/time_trackers/poll.rb +65 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +38 -17
- data.tar.gz.sig +0 -0
- metadata +118 -120
- metadata.gz.sig +0 -0
- data/MIT-LICENCE +0 -18
- data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
- data/lib/karafka/attributes_map.rb +0 -63
- data/lib/karafka/backends/inline.rb +0 -16
- data/lib/karafka/base_responder.rb +0 -226
- data/lib/karafka/cli/flow.rb +0 -48
- data/lib/karafka/cli/missingno.rb +0 -19
- data/lib/karafka/code_reloader.rb +0 -67
- data/lib/karafka/connection/api_adapter.rb +0 -158
- data/lib/karafka/connection/batch_delegator.rb +0 -55
- data/lib/karafka/connection/builder.rb +0 -23
- data/lib/karafka/connection/message_delegator.rb +0 -36
- data/lib/karafka/consumers/batch_metadata.rb +0 -10
- data/lib/karafka/consumers/callbacks.rb +0 -71
- data/lib/karafka/consumers/includer.rb +0 -64
- data/lib/karafka/consumers/responders.rb +0 -24
- data/lib/karafka/consumers/single_params.rb +0 -15
- data/lib/karafka/contracts/responder_usage.rb +0 -54
- data/lib/karafka/fetcher.rb +0 -42
- data/lib/karafka/helpers/class_matcher.rb +0 -88
- data/lib/karafka/helpers/config_retriever.rb +0 -46
- data/lib/karafka/helpers/inflector.rb +0 -26
- data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
- data/lib/karafka/params/batch_metadata.rb +0 -26
- data/lib/karafka/params/builders/batch_metadata.rb +0 -30
- data/lib/karafka/params/builders/params.rb +0 -38
- data/lib/karafka/params/builders/params_batch.rb +0 -25
- data/lib/karafka/params/params_batch.rb +0 -60
- data/lib/karafka/patches/ruby_kafka.rb +0 -47
- data/lib/karafka/persistence/client.rb +0 -29
- data/lib/karafka/persistence/consumers.rb +0 -45
- data/lib/karafka/persistence/topics.rb +0 -48
- data/lib/karafka/responders/builder.rb +0 -36
- data/lib/karafka/responders/topic.rb +0 -55
- data/lib/karafka/routing/topic_mapper.rb +0 -53
- data/lib/karafka/serialization/json/serializer.rb +0 -31
- data/lib/karafka/setup/configurators/water_drop.rb +0 -36
- data/lib/karafka/templates/application_responder.rb.erb +0 -11
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
# Checks the license presence for pro and loads pro components when needed (if any)
|
|
5
|
+
class Licenser
|
|
6
|
+
# Location in the gem where we store the public key
|
|
7
|
+
PUBLIC_KEY_LOCATION = File.join(Karafka.gem_root, 'certs', 'karafka-pro.pem')
|
|
8
|
+
|
|
9
|
+
private_constant :PUBLIC_KEY_LOCATION
|
|
10
|
+
|
|
11
|
+
# Tries to prepare license and verifies it
|
|
12
|
+
#
|
|
13
|
+
# @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
|
|
14
|
+
def prepare_and_verify(license_config)
|
|
15
|
+
prepare(license_config)
|
|
16
|
+
verify(license_config)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
# @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
|
|
22
|
+
def prepare(license_config)
|
|
23
|
+
# If there is token, no action needed
|
|
24
|
+
# We support a case where someone would put the token in instead of using one from the
|
|
25
|
+
# license. That's in case there are limitations to using external package sources, etc
|
|
26
|
+
return if license_config.token
|
|
27
|
+
|
|
28
|
+
begin
|
|
29
|
+
license_config.token || require('karafka-license')
|
|
30
|
+
rescue LoadError
|
|
31
|
+
return
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
license_config.token = Karafka::License.token
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Check license and setup license details (if needed)
|
|
38
|
+
# @param license_config [Karafka::Core::Configurable::Node] config related to the licensing
|
|
39
|
+
def verify(license_config)
|
|
40
|
+
# If no license, it will just run LGPL components without anything extra
|
|
41
|
+
return unless license_config.token
|
|
42
|
+
|
|
43
|
+
public_key = OpenSSL::PKey::RSA.new(File.read(PUBLIC_KEY_LOCATION))
|
|
44
|
+
|
|
45
|
+
# We gsub and strip in case someone copy-pasted it as a multi line string
|
|
46
|
+
formatted_token = license_config.token.strip.delete("\n").delete(' ')
|
|
47
|
+
decoded_token = Base64.decode64(formatted_token)
|
|
48
|
+
|
|
49
|
+
begin
|
|
50
|
+
data = public_key.public_decrypt(decoded_token)
|
|
51
|
+
rescue OpenSSL::OpenSSLError
|
|
52
|
+
data = nil
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
details = data ? JSON.parse(data) : raise_invalid_license_token(license_config)
|
|
56
|
+
|
|
57
|
+
license_config.entity = details.fetch('entity')
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Raises an error with info, that used token is invalid
|
|
61
|
+
# @param license_config [Karafka::Core::Configurable::Node]
|
|
62
|
+
def raise_invalid_license_token(license_config)
|
|
63
|
+
# We set it to false so `Karafka.pro?` method behaves as expected
|
|
64
|
+
license_config.token = false
|
|
65
|
+
|
|
66
|
+
raise(
|
|
67
|
+
Errors::InvalidLicenseTokenError,
|
|
68
|
+
<<~MSG.tr("\n", ' ')
|
|
69
|
+
License key you provided is invalid.
|
|
70
|
+
Please reach us at contact@karafka.io or visit https://karafka.io to obtain a valid one.
|
|
71
|
+
MSG
|
|
72
|
+
)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Messages
|
|
5
|
+
# Simple batch metadata object that stores all non-message information received from Kafka
|
|
6
|
+
# cluster while fetching the data.
|
|
7
|
+
#
|
|
8
|
+
# @note This metadata object refers to per batch metadata, not `#message.metadata`
|
|
9
|
+
BatchMetadata = Struct.new(
|
|
10
|
+
:size,
|
|
11
|
+
:first_offset,
|
|
12
|
+
:last_offset,
|
|
13
|
+
:deserializer,
|
|
14
|
+
:partition,
|
|
15
|
+
:topic,
|
|
16
|
+
:created_at,
|
|
17
|
+
:scheduled_at,
|
|
18
|
+
:processed_at,
|
|
19
|
+
keyword_init: true
|
|
20
|
+
) do
|
|
21
|
+
# This lag describes how long did it take for a message to be consumed from the moment it was
|
|
22
|
+
# created
|
|
23
|
+
def consumption_lag
|
|
24
|
+
time_distance_in_ms(processed_at, created_at)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# This lag describes how long did a batch have to wait before it was picked up by one of the
|
|
28
|
+
# workers
|
|
29
|
+
def processing_lag
|
|
30
|
+
time_distance_in_ms(processed_at, scheduled_at)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
# Computes time distance in between two times in ms
|
|
36
|
+
#
|
|
37
|
+
# @param time1 [Time]
|
|
38
|
+
# @param time2 [Time]
|
|
39
|
+
# @return [Integer] distance in between two times in ms
|
|
40
|
+
def time_distance_in_ms(time1, time2)
|
|
41
|
+
((time1 - time2) * 1_000).round
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Messages
|
|
5
|
+
module Builders
|
|
6
|
+
# Builder for creating batch metadata object based on the batch informations.
|
|
7
|
+
module BatchMetadata
|
|
8
|
+
class << self
|
|
9
|
+
# Creates metadata based on the kafka batch data.
|
|
10
|
+
#
|
|
11
|
+
# @param messages [Array<Karafka::Messages::Message>] messages array
|
|
12
|
+
# @param topic [Karafka::Routing::Topic] topic for which we've fetched the batch
|
|
13
|
+
# @param scheduled_at [Time] moment when the batch was scheduled for processing
|
|
14
|
+
# @return [Karafka::Messages::BatchMetadata] batch metadata object
|
|
15
|
+
#
|
|
16
|
+
# @note We do not set `processed_at` as this needs to be assigned when the batch is
|
|
17
|
+
# picked up for processing.
|
|
18
|
+
def call(messages, topic, scheduled_at)
|
|
19
|
+
Karafka::Messages::BatchMetadata.new(
|
|
20
|
+
size: messages.count,
|
|
21
|
+
first_offset: messages.first.offset,
|
|
22
|
+
last_offset: messages.last.offset,
|
|
23
|
+
deserializer: topic.deserializer,
|
|
24
|
+
partition: messages.first.partition,
|
|
25
|
+
topic: topic.name,
|
|
26
|
+
# We go with the assumption that the creation of the whole batch is the last message
|
|
27
|
+
# creation time
|
|
28
|
+
created_at: messages.last.timestamp,
|
|
29
|
+
# When this batch was built and scheduled for execution
|
|
30
|
+
scheduled_at: scheduled_at,
|
|
31
|
+
# We build the batch metadata when we pick up the job in the worker, thus we can use
|
|
32
|
+
# current time here
|
|
33
|
+
processed_at: Time.now
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Messages
|
|
5
|
+
# Builders encapsulate logic related to creating messages related objects.
|
|
6
|
+
module Builders
|
|
7
|
+
# Builder of a single message based on raw rdkafka message.
|
|
8
|
+
module Message
|
|
9
|
+
class << self
|
|
10
|
+
# @param kafka_message [Rdkafka::Consumer::Message] raw fetched message
|
|
11
|
+
# @param topic [Karafka::Routing::Topic] topic for which this message was fetched
|
|
12
|
+
# @param received_at [Time] moment when we've received the message
|
|
13
|
+
# @return [Karafka::Messages::Message] message object with payload and metadata
|
|
14
|
+
def call(kafka_message, topic, received_at)
|
|
15
|
+
# @see https://github.com/appsignal/rdkafka-ruby/issues/168
|
|
16
|
+
kafka_message.headers.transform_keys!(&:to_s)
|
|
17
|
+
|
|
18
|
+
metadata = Karafka::Messages::Metadata.new(
|
|
19
|
+
timestamp: kafka_message.timestamp,
|
|
20
|
+
headers: kafka_message.headers,
|
|
21
|
+
key: kafka_message.key,
|
|
22
|
+
offset: kafka_message.offset,
|
|
23
|
+
deserializer: topic.deserializer,
|
|
24
|
+
partition: kafka_message.partition,
|
|
25
|
+
topic: topic.name,
|
|
26
|
+
received_at: received_at
|
|
27
|
+
).freeze
|
|
28
|
+
|
|
29
|
+
# Karafka messages cannot be frozen because of the lazy deserialization feature
|
|
30
|
+
Karafka::Messages::Message.new(
|
|
31
|
+
kafka_message.payload,
|
|
32
|
+
metadata
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Messages
|
|
5
|
+
module Builders
|
|
6
|
+
# Builder for creating message batch instances.
|
|
7
|
+
module Messages
|
|
8
|
+
class << self
|
|
9
|
+
# Creates messages batch with messages inside based on the incoming messages and the
|
|
10
|
+
# topic from which it comes.
|
|
11
|
+
#
|
|
12
|
+
# @param messages [Array<Karafka::Messages::Message>] karafka messages array
|
|
13
|
+
# @param topic [Karafka::Routing::Topic] topic for which we're received messages
|
|
14
|
+
# @param received_at [Time] moment in time when the messages were received
|
|
15
|
+
# @return [Karafka::Messages::Messages] messages batch object
|
|
16
|
+
def call(messages, topic, received_at)
|
|
17
|
+
metadata = BatchMetadata.call(
|
|
18
|
+
messages,
|
|
19
|
+
topic,
|
|
20
|
+
received_at
|
|
21
|
+
).freeze
|
|
22
|
+
|
|
23
|
+
Karafka::Messages::Messages.new(
|
|
24
|
+
messages,
|
|
25
|
+
metadata
|
|
26
|
+
).freeze
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Karafka
|
|
4
|
-
#
|
|
5
|
-
module
|
|
4
|
+
# Messages namespace encapsulating all the logic that is directly related to messages handling
|
|
5
|
+
module Messages
|
|
6
6
|
# It provides lazy loading not only until the first usage, but also allows us to skip
|
|
7
7
|
# using deserializer until we execute our logic. That way we can operate with
|
|
8
8
|
# heavy-deserialization data without slowing down the whole application.
|
|
9
|
-
class
|
|
9
|
+
class Message
|
|
10
10
|
extend Forwardable
|
|
11
11
|
|
|
12
12
|
attr_reader :raw_payload, :metadata
|
|
@@ -14,7 +14,7 @@ module Karafka
|
|
|
14
14
|
def_delegators :metadata, *Metadata.members
|
|
15
15
|
|
|
16
16
|
# @param raw_payload [Object] incoming payload before deserialization
|
|
17
|
-
# @param metadata [Karafka::
|
|
17
|
+
# @param metadata [Karafka::Messages::Metadata] message metadata object
|
|
18
18
|
def initialize(raw_payload, metadata)
|
|
19
19
|
@raw_payload = raw_payload
|
|
20
20
|
@metadata = metadata
|
|
@@ -33,21 +33,16 @@ module Karafka
|
|
|
33
33
|
@payload
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
-
# @return [Boolean] did
|
|
36
|
+
# @return [Boolean] did we deserialize payload already
|
|
37
37
|
def deserialized?
|
|
38
38
|
@deserialized
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
private
|
|
42
42
|
|
|
43
|
-
# @return [Object]
|
|
43
|
+
# @return [Object] deserialized data
|
|
44
44
|
def deserialize
|
|
45
|
-
|
|
46
|
-
metadata.deserializer.call(self)
|
|
47
|
-
end
|
|
48
|
-
rescue ::StandardError => e
|
|
49
|
-
Karafka.monitor.instrument('params.params.deserialize.error', caller: self, error: e)
|
|
50
|
-
raise e
|
|
45
|
+
metadata.deserializer.call(self)
|
|
51
46
|
end
|
|
52
47
|
end
|
|
53
48
|
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Messages
|
|
5
|
+
# Messages batch represents a set of messages received from Kafka of a single topic partition.
|
|
6
|
+
class Messages
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
attr_reader :metadata
|
|
10
|
+
|
|
11
|
+
# @param messages_array [Array<Karafka::Messages::Message>] array with karafka messages
|
|
12
|
+
# @param metadata [Karafka::Messages::BatchMetadata]
|
|
13
|
+
# @return [Karafka::Messages::Messages] lazy evaluated messages batch object
|
|
14
|
+
def initialize(messages_array, metadata)
|
|
15
|
+
@messages_array = messages_array
|
|
16
|
+
@metadata = metadata
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# @param block [Proc] block we want to execute per each message
|
|
20
|
+
# @note Invocation of this method will not cause loading and deserializing of messages.
|
|
21
|
+
def each(&block)
|
|
22
|
+
@messages_array.each(&block)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Runs deserialization of all the messages and returns them
|
|
26
|
+
# @return [Array<Karafka::Messages::Message>]
|
|
27
|
+
def deserialize!
|
|
28
|
+
each(&:payload)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# @return [Array<Object>] array with deserialized payloads. This method can be useful when
|
|
32
|
+
# we don't care about metadata and just want to extract all the data payloads from the
|
|
33
|
+
# batch
|
|
34
|
+
def payloads
|
|
35
|
+
map(&:payload)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# @return [Array<String>] array with raw, not deserialized payloads
|
|
39
|
+
def raw_payloads
|
|
40
|
+
map(&:raw_payload)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# @return [Karafka::Messages::Message] first message
|
|
44
|
+
def first
|
|
45
|
+
@messages_array.first
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @return [Karafka::Messages::Message] last message
|
|
49
|
+
def last
|
|
50
|
+
@messages_array.last
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# @return [Integer] number of messages in the batch
|
|
54
|
+
def size
|
|
55
|
+
@messages_array.size
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# @return [Array<Karafka::Messages::Message>] pure array with messages
|
|
59
|
+
def to_a
|
|
60
|
+
@messages_array
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -1,18 +1,16 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Karafka
|
|
4
|
-
module
|
|
5
|
-
# Single message
|
|
6
|
-
# payload deserialization
|
|
4
|
+
module Messages
|
|
5
|
+
# Single message metadata details that can be accessed without the need of deserialization.
|
|
7
6
|
Metadata = Struct.new(
|
|
8
|
-
:
|
|
7
|
+
:timestamp,
|
|
9
8
|
:headers,
|
|
10
|
-
:is_control_record,
|
|
11
9
|
:key,
|
|
12
10
|
:offset,
|
|
13
11
|
:deserializer,
|
|
14
12
|
:partition,
|
|
15
|
-
:
|
|
13
|
+
:received_at,
|
|
16
14
|
:topic,
|
|
17
15
|
keyword_init: true
|
|
18
16
|
)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Messages
|
|
5
|
+
# "Fake" message that we use as an abstraction layer when seeking back.
|
|
6
|
+
# This allows us to encapsulate a seek with a simple abstraction
|
|
7
|
+
Seek = Struct.new(:topic, :partition, :offset)
|
|
8
|
+
end
|
|
9
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
# Patches to external components
|
|
5
|
+
module Patches
|
|
6
|
+
# Rdkafka related patches
|
|
7
|
+
module Rdkafka
|
|
8
|
+
# Rdkafka::Consumer patches
|
|
9
|
+
module Consumer
|
|
10
|
+
# A method that allows us to get the native kafka producer name
|
|
11
|
+
# @return [String] producer instance name
|
|
12
|
+
# @note We need this to make sure that we allocate proper dispatched events only to
|
|
13
|
+
# callback listeners that should publish them
|
|
14
|
+
def name
|
|
15
|
+
@name ||= ::Rdkafka::Bindings.rd_kafka_name(@native_kafka)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
::Rdkafka::Consumer.include ::Karafka::Patches::Rdkafka::Consumer
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
module ActiveJob
|
|
15
|
+
# Pro ActiveJob consumer that is suppose to handle long-running jobs as well as short
|
|
16
|
+
# running jobs
|
|
17
|
+
#
|
|
18
|
+
# When in LRJ, it will pause a given partition forever and will resume its processing only
|
|
19
|
+
# when all the jobs are done processing.
|
|
20
|
+
#
|
|
21
|
+
# It contains slightly better revocation warranties than the regular blocking consumer as
|
|
22
|
+
# it can stop processing batch of jobs in the middle after the revocation.
|
|
23
|
+
class Consumer < Karafka::Pro::BaseConsumer
|
|
24
|
+
# Runs ActiveJob jobs processing and handles lrj if needed
|
|
25
|
+
def consume
|
|
26
|
+
messages.each do |message|
|
|
27
|
+
# If for any reason we've lost this partition, not worth iterating over new messages
|
|
28
|
+
# as they are no longer ours
|
|
29
|
+
break if revoked?
|
|
30
|
+
break if Karafka::App.stopping?
|
|
31
|
+
|
|
32
|
+
::ActiveJob::Base.execute(
|
|
33
|
+
::ActiveSupport::JSON.decode(message.raw_payload)
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# We cannot mark jobs as done after each if there are virtual partitions. Otherwise
|
|
37
|
+
# this could create random markings
|
|
38
|
+
next if topic.virtual_partitioner?
|
|
39
|
+
|
|
40
|
+
mark_as_consumed(message)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
# Karafka Pro ActiveJob components
|
|
15
|
+
module ActiveJob
|
|
16
|
+
# Pro dispatcher that sends the ActiveJob job to a proper topic based on the queue name
|
|
17
|
+
# and that allows to inject additional options into the producer, effectively allowing for a
|
|
18
|
+
# much better and more granular control over the dispatch and consumption process.
|
|
19
|
+
class Dispatcher < ::Karafka::ActiveJob::Dispatcher
|
|
20
|
+
# Defaults for dispatching
|
|
21
|
+
# They can be updated by using `#karafka_options` on the job
|
|
22
|
+
DEFAULTS = {
|
|
23
|
+
dispatch_method: :produce_async,
|
|
24
|
+
# We don't create a dummy proc based partitioner as we would have to evaluate it with
|
|
25
|
+
# each job.
|
|
26
|
+
partitioner: nil,
|
|
27
|
+
# Allows for usage of `:key` or `:partition_key`
|
|
28
|
+
partition_key_type: :key
|
|
29
|
+
}.freeze
|
|
30
|
+
|
|
31
|
+
private_constant :DEFAULTS
|
|
32
|
+
|
|
33
|
+
# @param job [ActiveJob::Base] job
|
|
34
|
+
def call(job)
|
|
35
|
+
::Karafka.producer.public_send(
|
|
36
|
+
fetch_option(job, :dispatch_method, DEFAULTS),
|
|
37
|
+
dispatch_details(job).merge!(
|
|
38
|
+
topic: job.queue_name,
|
|
39
|
+
payload: ::ActiveSupport::JSON.encode(job.serialize)
|
|
40
|
+
)
|
|
41
|
+
)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
# @param job [ActiveJob::Base] job instance
|
|
47
|
+
# @return [Hash] hash with dispatch details to which we merge topic and payload
|
|
48
|
+
def dispatch_details(job)
|
|
49
|
+
partitioner = fetch_option(job, :partitioner, DEFAULTS)
|
|
50
|
+
key_type = fetch_option(job, :partition_key_type, DEFAULTS)
|
|
51
|
+
|
|
52
|
+
return {} unless partitioner
|
|
53
|
+
|
|
54
|
+
{
|
|
55
|
+
key_type => partitioner.call(job)
|
|
56
|
+
}
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
module ActiveJob
|
|
15
|
+
# Contract for validating the options that can be altered with `#karafka_options` per job
|
|
16
|
+
# class that works with Pro features.
|
|
17
|
+
class JobOptionsContract < Contracts::Base
|
|
18
|
+
configure do |config|
|
|
19
|
+
config.error_messages = YAML.safe_load(
|
|
20
|
+
File.read(
|
|
21
|
+
File.join(Karafka.gem_root, 'config', 'errors.yml')
|
|
22
|
+
)
|
|
23
|
+
).fetch('en').fetch('validations').fetch('job_options')
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
optional(:dispatch_method) { |val| %i[produce_async produce_sync].include?(val) }
|
|
27
|
+
optional(:partitioner) { |val| val.respond_to?(:call) }
|
|
28
|
+
optional(:partition_key_type) { |val| %i[key partition_key].include?(val) }
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
# Karafka PRO consumer.
|
|
15
|
+
#
|
|
16
|
+
# If you use PRO, all your consumers should inherit (indirectly) from it.
|
|
17
|
+
#
|
|
18
|
+
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
|
19
|
+
# after each batch is processed.
|
|
20
|
+
class BaseConsumer < Karafka::BaseConsumer
|
|
21
|
+
# Pause for tops 31 years
|
|
22
|
+
MAX_PAUSE_TIME = 1_000_000_000_000
|
|
23
|
+
|
|
24
|
+
private_constant :MAX_PAUSE_TIME
|
|
25
|
+
|
|
26
|
+
# Pauses processing of a given partition until we're done with the processing
|
|
27
|
+
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
|
28
|
+
def on_before_consume
|
|
29
|
+
return unless topic.long_running_job?
|
|
30
|
+
|
|
31
|
+
# This ensures, that when running LRJ with VP, things operate as expected
|
|
32
|
+
coordinator.on_started do |first_group_message|
|
|
33
|
+
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
|
34
|
+
# any messages
|
|
35
|
+
pause(first_group_message.offset, MAX_PAUSE_TIME)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Runs extra logic after consumption that is related to handling long-running jobs
|
|
40
|
+
# @note This overwrites the '#on_after_consume' from the base consumer
|
|
41
|
+
def on_after_consume
|
|
42
|
+
coordinator.on_finished do |first_group_message, last_group_message|
|
|
43
|
+
on_after_consume_regular(first_group_message, last_group_message)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
# Handles the post-consumption flow depending on topic settings
|
|
50
|
+
#
|
|
51
|
+
# @param first_message [Karafka::Messages::Message]
|
|
52
|
+
# @param last_message [Karafka::Messages::Message]
|
|
53
|
+
def on_after_consume_regular(first_message, last_message)
|
|
54
|
+
if coordinator.success?
|
|
55
|
+
coordinator.pause_tracker.reset
|
|
56
|
+
|
|
57
|
+
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
|
58
|
+
# with manual offset management
|
|
59
|
+
# Mark as consumed only if manual offset management is not on
|
|
60
|
+
mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
|
|
61
|
+
|
|
62
|
+
# If this is not a long-running job there is nothing for us to do here
|
|
63
|
+
return unless topic.long_running_job?
|
|
64
|
+
|
|
65
|
+
# Once processing is done, we move to the new offset based on commits
|
|
66
|
+
# Here, in case manual offset management is off, we have the new proper offset of a
|
|
67
|
+
# first message from another batch from `@seek_offset`. If manual offset management
|
|
68
|
+
# is on, we move to place where the user indicated it was finished. This can create an
|
|
69
|
+
# interesting (yet valid) corner case, where with manual offset management on and no
|
|
70
|
+
# marking as consumed, we end up with an infinite loop processing same messages over and
|
|
71
|
+
# over again
|
|
72
|
+
seek(@seek_offset || first_message.offset)
|
|
73
|
+
|
|
74
|
+
resume
|
|
75
|
+
else
|
|
76
|
+
# If processing failed, we need to pause
|
|
77
|
+
pause(@seek_offset || first_message.offset)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This Karafka component is a Pro component.
|
|
4
|
+
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
|
+
# repository and their usage requires commercial license agreement.
|
|
6
|
+
#
|
|
7
|
+
# Karafka has also commercial-friendly license, commercial support and commercial components.
|
|
8
|
+
#
|
|
9
|
+
# By sending a pull request to the pro components, you are agreeing to transfer the copyright of
|
|
10
|
+
# your code to Maciej Mensfeld.
|
|
11
|
+
|
|
12
|
+
module Karafka
|
|
13
|
+
module Pro
|
|
14
|
+
# Namespace for Karafka Pro related contracts
|
|
15
|
+
module Contracts
|
|
16
|
+
# Base contract for Pro components contracts
|
|
17
|
+
class Base < ::Karafka::Contracts::Base
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|