karafka 2.2.14 → 2.3.0.alpha2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +38 -12
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +24 -0
  6. data/Gemfile.lock +16 -16
  7. data/README.md +0 -2
  8. data/SECURITY.md +23 -0
  9. data/bin/integrations +1 -1
  10. data/config/locales/errors.yml +7 -1
  11. data/config/locales/pro_errors.yml +22 -0
  12. data/docker-compose.yml +1 -1
  13. data/karafka.gemspec +2 -2
  14. data/lib/karafka/admin/acl.rb +287 -0
  15. data/lib/karafka/admin.rb +9 -13
  16. data/lib/karafka/app.rb +5 -3
  17. data/lib/karafka/base_consumer.rb +9 -1
  18. data/lib/karafka/cli/base.rb +1 -1
  19. data/lib/karafka/connection/client.rb +83 -76
  20. data/lib/karafka/connection/conductor.rb +28 -0
  21. data/lib/karafka/connection/listener.rb +159 -42
  22. data/lib/karafka/connection/listeners_batch.rb +5 -11
  23. data/lib/karafka/connection/manager.rb +72 -0
  24. data/lib/karafka/connection/messages_buffer.rb +12 -0
  25. data/lib/karafka/connection/proxy.rb +17 -0
  26. data/lib/karafka/connection/status.rb +75 -0
  27. data/lib/karafka/contracts/config.rb +14 -10
  28. data/lib/karafka/contracts/consumer_group.rb +9 -1
  29. data/lib/karafka/contracts/topic.rb +3 -1
  30. data/lib/karafka/errors.rb +17 -0
  31. data/lib/karafka/instrumentation/logger_listener.rb +3 -0
  32. data/lib/karafka/instrumentation/notifications.rb +13 -5
  33. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
  34. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +20 -1
  35. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
  36. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
  37. data/lib/karafka/pro/base_consumer.rb +47 -0
  38. data/lib/karafka/pro/connection/manager.rb +269 -0
  39. data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
  40. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  41. data/lib/karafka/pro/iterator.rb +1 -6
  42. data/lib/karafka/pro/loader.rb +14 -0
  43. data/lib/karafka/pro/processing/coordinator.rb +2 -1
  44. data/lib/karafka/pro/processing/executor.rb +37 -0
  45. data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
  46. data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
  47. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
  48. data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
  49. data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
  50. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
  51. data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
  52. data/lib/karafka/pro/processing/schedulers/base.rb +39 -23
  53. data/lib/karafka/pro/processing/schedulers/default.rb +12 -14
  54. data/lib/karafka/pro/processing/strategies/default.rb +154 -1
  55. data/lib/karafka/pro/processing/strategies/dlq/default.rb +39 -0
  56. data/lib/karafka/pro/processing/strategies/vp/default.rb +65 -25
  57. data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
  58. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
  59. data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
  60. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
  61. data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
  62. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
  63. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
  64. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
  65. data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
  66. data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
  67. data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
  68. data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
  69. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
  70. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
  71. data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
  72. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
  73. data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
  74. data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
  75. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
  76. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
  77. data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
  78. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
  79. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  80. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
  81. data/lib/karafka/process.rb +5 -3
  82. data/lib/karafka/processing/coordinator.rb +5 -1
  83. data/lib/karafka/processing/executor.rb +16 -10
  84. data/lib/karafka/processing/executors_buffer.rb +19 -4
  85. data/lib/karafka/processing/schedulers/default.rb +3 -2
  86. data/lib/karafka/processing/strategies/default.rb +6 -0
  87. data/lib/karafka/processing/strategies/dlq.rb +36 -0
  88. data/lib/karafka/routing/builder.rb +12 -2
  89. data/lib/karafka/routing/consumer_group.rb +5 -5
  90. data/lib/karafka/routing/features/base.rb +44 -8
  91. data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
  92. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  93. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
  94. data/lib/karafka/routing/subscription_group.rb +2 -2
  95. data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
  96. data/lib/karafka/routing/topic.rb +8 -10
  97. data/lib/karafka/runner.rb +13 -3
  98. data/lib/karafka/server.rb +5 -9
  99. data/lib/karafka/setup/config.rb +17 -0
  100. data/lib/karafka/status.rb +23 -14
  101. data/lib/karafka/templates/karafka.rb.erb +7 -0
  102. data/lib/karafka/time_trackers/partition_usage.rb +56 -0
  103. data/lib/karafka/version.rb +1 -1
  104. data.tar.gz.sig +0 -0
  105. metadata +42 -10
  106. metadata.gz.sig +0 -0
  107. data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
@@ -25,7 +25,7 @@ module Karafka
25
25
  # @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
26
26
  # @param expanded_topics [Hash] hash with expanded and normalized topics data
27
27
  def initialize(consumer, expanded_topics)
28
- @consumer = Connection::Proxy.new(consumer)
28
+ @consumer = ::Karafka::Connection::Proxy.new(consumer)
29
29
  @expanded_topics = expanded_topics
30
30
  @mapped_topics = Hash.new { |h, k| h[k] = {} }
31
31
  end
@@ -22,11 +22,6 @@ module Karafka
22
22
  #
23
23
  # It does **not** create a consumer group and does not have any offset management.
24
24
  class Iterator
25
- # Local partition reference for librdkafka
26
- Partition = Struct.new(:partition, :offset)
27
-
28
- private_constant :Partition
29
-
30
25
  # A simple API allowing to iterate over topic/partition data, without having to subscribe
31
26
  # and deal with rebalances. This API allows for multi-partition streaming and is optimized
32
27
  # for data lookups. It allows for explicit stopping iteration over any partition during
@@ -127,7 +122,7 @@ module Karafka
127
122
 
128
123
  @current_consumer.pause(
129
124
  Rdkafka::Consumer::TopicPartitionList.new(
130
- name => [Partition.new(partition, 0)]
125
+ name => [Rdkafka::Consumer::Partition.new(partition, 0)]
131
126
  )
132
127
  )
133
128
  end
@@ -53,6 +53,7 @@ module Karafka
53
53
  features.each { |feature| feature.pre_setup(config) }
54
54
 
55
55
  reconfigure(config)
56
+ expand
56
57
 
57
58
  load_topic_features
58
59
  end
@@ -82,12 +83,17 @@ module Karafka
82
83
 
83
84
  icfg.cli.contract = Contracts::ServerCliOptions.new
84
85
 
86
+ # Use manager that supports multiplexing
87
+ icfg.connection.manager = Connection::Manager.new
88
+
85
89
  icfg.processing.coordinator_class = Processing::Coordinator
86
90
  icfg.processing.partitioner_class = Processing::Partitioner
87
91
  icfg.processing.scheduler_class = Processing::Schedulers::Default
88
92
  icfg.processing.jobs_queue_class = Processing::JobsQueue
93
+ icfg.processing.executor_class = Processing::Executor
89
94
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
90
95
  icfg.processing.strategy_selector = Processing::StrategySelector.new
96
+ icfg.processing.expansions_selector = Processing::ExpansionsSelector.new
91
97
 
92
98
  icfg.active_job.consumer_class = ActiveJob::Consumer
93
99
  icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
@@ -96,6 +102,14 @@ module Karafka
96
102
  config.monitor.subscribe(Instrumentation::PerformanceTracker.instance)
97
103
  end
98
104
 
105
+ # Adds extra modules to certain classes
106
+ # This expands their functionalities with things that are needed when operating in Pro
107
+ # It is used only when given class is part of the end user API and cannot be swapped by
108
+ # a pluggable component
109
+ def expand
110
+ Karafka::BaseConsumer.include Pro::BaseConsumer
111
+ end
112
+
99
113
  # Loads the Pro features of Karafka
100
114
  # @note Object space lookup is not the fastest but we do it once during boot, so it's ok
101
115
  def load_topic_features
@@ -42,7 +42,8 @@ module Karafka
42
42
 
43
43
  @virtual_offset_manager = VirtualOffsetManager.new(
44
44
  topic.name,
45
- partition
45
+ partition,
46
+ topic.virtual_partitions.offset_metadata_strategy
46
47
  )
47
48
 
48
49
  # We register our own "internal" filter to support filtering of messages that were marked
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Pro executor that supports periodic jobs
18
+ class Executor < Karafka::Processing::Executor
19
+ # Runs the code that should happen before periodic job is scheduled
20
+ #
21
+ # @note While jobs are called `Periodic`, from the consumer perspective it is "ticking".
22
+ # This name was taken for a reason: we may want to introduce periodic ticking also not
23
+ # only during polling but for example on wait and a name "poll" would not align well.
24
+ # A name "periodic" is not a verb and our other consumer actions are verbs like:
25
+ # consume or revoked. So for the sake of consistency we have ticking here.
26
+ def before_schedule_periodic
27
+ consumer.on_before_schedule_tick
28
+ end
29
+
30
+ # Triggers consumer ticking
31
+ def periodic
32
+ consumer.on_tick
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Pro selector of appropriate topic setup based features enhancements.
18
+ class ExpansionsSelector < Karafka::Processing::ExpansionsSelector
19
+ # @param topic [Karafka::Routing::Topic] topic with settings based on which we find
20
+ # expansions
21
+ # @return [Array<Module>] modules with proper expansions we're suppose to use to enhance
22
+ # the consumer
23
+ def find(topic)
24
+ # Start with the non-pro expansions
25
+ expansions = super
26
+ expansions << Pro::Processing::OffsetMetadata::Consumer if topic.offset_metadata?
27
+ expansions
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Jobs
18
+ # Job that represents a "ticking" work. Work that we run periodically for the Periodics
19
+ # enabled topics.
20
+ class Periodic < ::Karafka::Processing::Jobs::Base
21
+ # @param executor [Karafka::Pro::Processing::Executor] pro executor that is suppose to
22
+ # run a given job
23
+ def initialize(executor)
24
+ @executor = executor
25
+ super()
26
+ end
27
+
28
+ # Code executed before we schedule this job
29
+ def before_schedule
30
+ executor.before_schedule_periodic
31
+ end
32
+
33
+ # Runs the executor periodic action
34
+ def call
35
+ executor.periodic
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Jobs
18
+ # Non-Blocking version of the Periodic job
19
+ # We use this version for LRJ topics for cases where saturated resources would not allow
20
+ # to run this job for extended period of time. Under such scenarios, if we would not use
21
+ # a non-blocking one, we would reach max.poll.interval.ms.
22
+ class PeriodicNonBlocking < Periodic
23
+ # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Periodic`
24
+ def initialize(*args)
25
+ super
26
+ @non_blocking = true
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -16,12 +16,12 @@ module Karafka
16
16
  module Processing
17
17
  # Pro jobs builder that supports lrj
18
18
  class JobsBuilder < ::Karafka::Processing::JobsBuilder
19
- # @param executor [Karafka::Processing::Executor]
19
+ # @param executor [Karafka::Pro::Processing::Executor]
20
20
  def idle(executor)
21
21
  Karafka::Processing::Jobs::Idle.new(executor)
22
22
  end
23
23
 
24
- # @param executor [Karafka::Processing::Executor]
24
+ # @param executor [Karafka::Pro::Processing::Executor]
25
25
  # @param messages [Karafka::Messages::Messages] messages batch to be consumed
26
26
  # @return [Karafka::Processing::Jobs::Consume] blocking job
27
27
  # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
@@ -33,7 +33,7 @@ module Karafka
33
33
  end
34
34
  end
35
35
 
36
- # @param executor [Karafka::Processing::Executor]
36
+ # @param executor [Karafka::Pro::Processing::Executor]
37
37
  # @return [Karafka::Processing::Jobs::Revoked] revocation job for non LRJ
38
38
  # @return [Karafka::Processing::Jobs::RevokedNonBlocking] revocation job that is
39
39
  # non-blocking, so when revocation job is scheduled for LRJ it also will not block
@@ -44,6 +44,17 @@ module Karafka
44
44
  super
45
45
  end
46
46
  end
47
+
48
+ # @param executor [Karafka::Pro::Processing::Executor]
49
+ # @return [Jobs::Periodic] Periodic job
50
+ # @return [Jobs::PeriodicNonBlocking] Periodic non-blocking job
51
+ def periodic(executor)
52
+ if executor.topic.long_running_job?
53
+ Jobs::PeriodicNonBlocking.new(executor)
54
+ else
55
+ Jobs::Periodic.new(executor)
56
+ end
57
+ end
47
58
  end
48
59
  end
49
60
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Offset Metadata support on the processing side
18
+ module OffsetMetadata
19
+ # Extra API methods for offset metadata fetching
20
+ # @note Part of this feature API is embedded directly into the strategies because it alters
21
+ # how marking methods (`#mark_as_consumed` and `#mark_as_consumed!`) operate. Because
22
+ # of that, they had to be embedded into the strategies.
23
+ module Consumer
24
+ # @param cache [Boolean] should we use cached result if present (true by default)
25
+ # @return [false, Object] false in case we do not own the partition anymore or
26
+ # deserialized metadata based on the deserializer
27
+ # @note Caching is on as the assumption here is, that most of the time user will be
28
+ # interested only in the offset metadata that "came" from the time prior to the
29
+ # rebalance. That is because the rest of the metadata (current) is created and
30
+ # controlled by the user himself, thus there is no need to retrieve it. In case this
31
+ # is not true and user wants to always get the Kafka metadata, `cache` value of this
32
+ # feature can be set to false.
33
+ def offset_metadata(cache: true)
34
+ return false if revoked?
35
+
36
+ Fetcher.find(topic, partition, cache: cache)
37
+ end
38
+
39
+ alias committed_offset_metadata offset_metadata
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Offset Metadata support on the processing side
18
+ module OffsetMetadata
19
+ # This fetcher is responsible for fetching and caching committed offsets metadata
20
+ # information.
21
+ #
22
+ # By design we fetch all information for a requested topic assignments. Not all topics from
23
+ # the same subscription group may need metadata and even if, we can run the few smaller
24
+ # queries. This approach prevents us from querying all assigned topics data in one go
25
+ # preventing excessive queries.
26
+ #
27
+ # Since the assumption is, that user will not have to reach out for the later metadata
28
+ # since it is produced in the context of a given consumer assignment, we can cache the
29
+ # initial result and only allow users for explicit invalidation.
30
+ class Fetcher
31
+ include Singleton
32
+
33
+ class << self
34
+ extend Forwardable
35
+
36
+ def_delegators :instance, :register, :clear, :find
37
+ end
38
+
39
+ def initialize
40
+ @mutexes = {}
41
+ @clients = {}
42
+ @tpls = {}
43
+ end
44
+
45
+ # Registers a client of a given subscription group, so we can use it for queries later on
46
+ # @param client [Karafka::Connection::Client]
47
+ # @note Since we store the client reference and not the underlying rdkafka consumer
48
+ # instance, we do not have to deal with the recovery as it is abstracted away
49
+ def register(client)
50
+ @clients[client.subscription_group] = client
51
+ # We use one mutex per SG because independent SGs can query in parallel
52
+ @mutexes[client.subscription_group] = Mutex.new
53
+ @tpls[client.subscription_group] = {}
54
+ end
55
+
56
+ # Queries or retrieves from cache the given offset metadata for the selected partition
57
+ #
58
+ # @param topic [Karafka::Routing::Topic] routing topic with subscription group reference
59
+ # @param partition [Integer] partition for which we want to get stored offset metadata
60
+ # @param cache [Boolean] forces explicit query to Kafka when false and cache refresh.
61
+ # By default we use the setting from the topic level but this can be overwritten on
62
+ # a per request basis if needed.
63
+ # @return [Object, false] deserialized metadata (string deserializer by default) or
64
+ # false in case we were not able to obtain the details because we have lost the
65
+ # assignment
66
+ def find(topic, partition, cache: true)
67
+ cache = topic.offset_metadata.cache? && cache
68
+
69
+ tpls = fetch(topic, cache)
70
+
71
+ return false unless tpls
72
+
73
+ t_partitions = tpls.fetch(topic.name, [])
74
+ t_partition = t_partitions.find { |t_p| t_p.partition == partition }
75
+
76
+ # If we do not have given topic partition here, it means it is no longer part of our
77
+ # assignment and we should return false
78
+ return false unless t_partition
79
+
80
+ topic.offset_metadata.deserializer.call(t_partition.metadata)
81
+ end
82
+
83
+ # Clears cache of a given subscription group. It is triggered on assignment changes.
84
+ #
85
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group that
86
+ # we want to clear.
87
+ def clear(subscription_group)
88
+ @mutexes.fetch(subscription_group).synchronize do
89
+ @tpls[subscription_group].clear
90
+ end
91
+ end
92
+
93
+ private
94
+
95
+ # Fetches from Kafka all committed offsets for the given topic partitions that are
96
+ # assigned to this process.
97
+ #
98
+ # We fetch all because in majority of the cases, the behavior of the end user code is
99
+ # not specific to a given partition both same for all. In such cases we save on
100
+ # querying as we get all data for all partitions in one go.
101
+ #
102
+ # @param topic [Karafka::Routing::Topic] topic for which we want to fetch tpls data
103
+ # @param cache [Boolean] should we return cached data if present
104
+ def fetch(topic, cache)
105
+ subscription_group = topic.subscription_group
106
+ t_tpls = @tpls.fetch(subscription_group, false)
107
+ t_tpl = t_tpls[topic]
108
+
109
+ return t_tpl if t_tpl && cache
110
+
111
+ assigned_tpls = @clients.fetch(subscription_group).assignment
112
+ t_tpl = assigned_tpls.to_h.fetch(topic.name, false)
113
+
114
+ # May be false in case we lost given assignment but still run LRJ
115
+ return false unless t_tpl
116
+ return false if t_tpl.empty?
117
+
118
+ @mutexes.fetch(subscription_group).synchronize do
119
+ rd_tpl = Rdkafka::Consumer::TopicPartitionList.new(topic.name => t_tpl)
120
+
121
+ # While in theory we could lost assignment while being here, this will work and will
122
+ # return us proper tpl, we do not deal with this case on this layer and report anyhow
123
+ # There will not be any exception and this will operate correctly
124
+ t_tpls[topic] = @clients.fetch(subscription_group).committed(rd_tpl).to_h
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module OffsetMetadata
18
+ # Keeps track of rebalances and updates the fetcher
19
+ # Since we cache the tpls with metadata, we need to invalidate them on events that would
20
+ # cause changes in the assignments
21
+ class Listener
22
+ # When we start listening we need to register this client in the metadata fetcher, so
23
+ # we have the client related to a given subscription group that we can use in fetcher
24
+ # since fetcher may be used in filtering API and other places outside of the standard
25
+ # consumer flow
26
+ # @param event [Karafka::Core::Monitoring::Event]
27
+ def on_connection_listener_before_fetch_loop(event)
28
+ Fetcher.register event[:client]
29
+ end
30
+
31
+ # Invalidates internal cache when assignments change so we can get correct metadata
32
+ # @param event [Karafka::Core::Monitoring::Event]
33
+ def on_rebalance_partitions_assigned(event)
34
+ Fetcher.clear event[:subscription_group]
35
+ end
36
+
37
+ # Invalidates internal cache when assignments change so we can get correct metadata
38
+ # @param event [Karafka::Core::Monitoring::Event]
39
+ def on_rebalance_partitions_revoked(event)
40
+ Fetcher.clear event[:subscription_group]
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -31,9 +31,19 @@ module Karafka
31
31
  @mutex = Mutex.new
32
32
  end
33
33
 
34
+ # Schedules any jobs provided in a fifo order
35
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
36
+ def schedule_fifo(jobs_array)
37
+ jobs_array.each do |job|
38
+ @queue << job
39
+ end
40
+ end
41
+
34
42
  # Runs the consumption jobs scheduling flow under a mutex
35
43
  #
36
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
44
+ # @param jobs_array
45
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
46
+ # jobs for scheduling
37
47
  def on_schedule_consumption(jobs_array)
38
48
  @mutex.synchronize do
39
49
  schedule_consumption(jobs_array)
@@ -42,53 +52,59 @@ module Karafka
42
52
 
43
53
  # Should schedule the consumption jobs
44
54
  #
45
- # @param _jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
55
+ # @param _jobs_array
56
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
57
+ # jobs for scheduling
46
58
  def schedule_consumption(_jobs_array)
47
59
  raise NotImplementedError, 'Implement in a subclass'
48
60
  end
49
61
 
50
62
  # Runs the revocation jobs scheduling flow under a mutex
51
63
  #
52
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
64
+ # @param jobs_array
65
+ # [Array<Karafka::Processing::Jobs::Revoked, Processing::Jobs::RevokedNonBlocking>]
66
+ # jobs for scheduling
53
67
  def on_schedule_revocation(jobs_array)
54
68
  @mutex.synchronize do
55
69
  schedule_revocation(jobs_array)
56
70
  end
57
71
  end
58
72
 
59
- # Schedules the revocation jobs.
60
- #
61
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
62
- #
63
- # @note We provide a default scheduler logic here because by default revocation jobs
64
- # should be scheduled as fast as possible.
65
- def schedule_revocation(jobs_array)
66
- jobs_array.each do |job|
67
- @queue << job
68
- end
69
- end
70
-
71
73
  # Runs the shutdown jobs scheduling flow under a mutex
72
74
  #
73
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
75
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Shutdown>] jobs for scheduling
74
76
  def on_schedule_shutdown(jobs_array)
75
77
  @mutex.synchronize do
76
78
  schedule_shutdown(jobs_array)
77
79
  end
78
80
  end
79
81
 
80
- # Schedules the shutdown jobs.
82
+ # Runs the idle jobs scheduling flow under a mutex
81
83
  #
82
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
84
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Idle>] jobs for scheduling
85
+ def on_schedule_idle(jobs_array)
86
+ @mutex.synchronize do
87
+ schedule_idle(jobs_array)
88
+ end
89
+ end
90
+
91
+ # Runs the periodic jobs scheduling flow under a mutex
83
92
  #
84
- # @note We provide a default scheduler logic here because by default revocation jobs
85
- # should be scheduled as fast as possible.
86
- def schedule_shutdown(jobs_array)
87
- jobs_array.each do |job|
88
- @queue << job
93
+ # @param jobs_array
94
+ # [Array<Processing::Jobs::Periodic, Processing::Jobs::PeriodicNonBlocking>]
95
+ # jobs for scheduling
96
+ def on_schedule_periodic(jobs_array)
97
+ @mutex.synchronize do
98
+ schedule_periodic(jobs_array)
89
99
  end
90
100
  end
91
101
 
102
+ # Schedule by default all except consumption as fifo
103
+ alias schedule_revocation schedule_fifo
104
+ alias schedule_shutdown schedule_fifo
105
+ alias schedule_idle schedule_fifo
106
+ alias schedule_periodic schedule_fifo
107
+
92
108
  # Runs the manage tick under mutex
93
109
  def on_manage
94
110
  @mutex.synchronize { manage }
@@ -31,7 +31,9 @@ module Karafka
31
31
  class Default < Base
32
32
  # Schedules jobs in the LJF order for consumption
33
33
  #
34
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
34
+ # @param jobs_array
35
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
36
+ # jobs for scheduling
35
37
  def on_schedule_consumption(jobs_array)
36
38
  perf_tracker = Instrumentation::PerformanceTracker.instance
37
39
 
@@ -53,23 +55,19 @@ module Karafka
53
55
  end
54
56
  end
55
57
 
56
- # Schedules jobs in the fifo order
57
- #
58
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
59
- def on_schedule_revocation(jobs_array)
58
+ # Schedules any jobs provided in a fifo order
59
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
60
+ def schedule_fifo(jobs_array)
60
61
  jobs_array.each do |job|
61
62
  @queue << job
62
63
  end
63
64
  end
64
65
 
65
- # Schedules jobs in the fifo order
66
- #
67
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
68
- def on_schedule_shutdown(jobs_array)
69
- jobs_array.each do |job|
70
- @queue << job
71
- end
72
- end
66
+ # By default all non-consumption work is scheduled in a fifo order
67
+ alias on_schedule_revocation schedule_fifo
68
+ alias on_schedule_shutdown schedule_fifo
69
+ alias on_schedule_idle schedule_fifo
70
+ alias on_schedule_periodic schedule_fifo
73
71
 
74
72
  # This scheduler does not have anything to manage as it is a pass through and has no
75
73
  # state
@@ -87,7 +85,7 @@ module Karafka
87
85
  private
88
86
 
89
87
  # @param perf_tracker [PerformanceTracker]
90
- # @param job [Karafka::Processing::Jobs::Base] job we will be processing
88
+ # @param job [Karafka::Processing::Jobs::Consume] job we will be processing
91
89
  # @return [Numeric] estimated cost of processing this job
92
90
  def processing_cost(perf_tracker, job)
93
91
  if job.is_a?(::Karafka::Processing::Jobs::Consume)