karafka 2.2.14 → 2.3.0.alpha2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +38 -12
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +24 -0
  6. data/Gemfile.lock +16 -16
  7. data/README.md +0 -2
  8. data/SECURITY.md +23 -0
  9. data/bin/integrations +1 -1
  10. data/config/locales/errors.yml +7 -1
  11. data/config/locales/pro_errors.yml +22 -0
  12. data/docker-compose.yml +1 -1
  13. data/karafka.gemspec +2 -2
  14. data/lib/karafka/admin/acl.rb +287 -0
  15. data/lib/karafka/admin.rb +9 -13
  16. data/lib/karafka/app.rb +5 -3
  17. data/lib/karafka/base_consumer.rb +9 -1
  18. data/lib/karafka/cli/base.rb +1 -1
  19. data/lib/karafka/connection/client.rb +83 -76
  20. data/lib/karafka/connection/conductor.rb +28 -0
  21. data/lib/karafka/connection/listener.rb +159 -42
  22. data/lib/karafka/connection/listeners_batch.rb +5 -11
  23. data/lib/karafka/connection/manager.rb +72 -0
  24. data/lib/karafka/connection/messages_buffer.rb +12 -0
  25. data/lib/karafka/connection/proxy.rb +17 -0
  26. data/lib/karafka/connection/status.rb +75 -0
  27. data/lib/karafka/contracts/config.rb +14 -10
  28. data/lib/karafka/contracts/consumer_group.rb +9 -1
  29. data/lib/karafka/contracts/topic.rb +3 -1
  30. data/lib/karafka/errors.rb +17 -0
  31. data/lib/karafka/instrumentation/logger_listener.rb +3 -0
  32. data/lib/karafka/instrumentation/notifications.rb +13 -5
  33. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
  34. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +20 -1
  35. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
  36. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
  37. data/lib/karafka/pro/base_consumer.rb +47 -0
  38. data/lib/karafka/pro/connection/manager.rb +269 -0
  39. data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
  40. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  41. data/lib/karafka/pro/iterator.rb +1 -6
  42. data/lib/karafka/pro/loader.rb +14 -0
  43. data/lib/karafka/pro/processing/coordinator.rb +2 -1
  44. data/lib/karafka/pro/processing/executor.rb +37 -0
  45. data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
  46. data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
  47. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
  48. data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
  49. data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
  50. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
  51. data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
  52. data/lib/karafka/pro/processing/schedulers/base.rb +39 -23
  53. data/lib/karafka/pro/processing/schedulers/default.rb +12 -14
  54. data/lib/karafka/pro/processing/strategies/default.rb +154 -1
  55. data/lib/karafka/pro/processing/strategies/dlq/default.rb +39 -0
  56. data/lib/karafka/pro/processing/strategies/vp/default.rb +65 -25
  57. data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
  58. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
  59. data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
  60. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
  61. data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
  62. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
  63. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
  64. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
  65. data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
  66. data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
  67. data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
  68. data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
  69. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
  70. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
  71. data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
  72. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
  73. data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
  74. data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
  75. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
  76. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
  77. data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
  78. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
  79. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  80. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
  81. data/lib/karafka/process.rb +5 -3
  82. data/lib/karafka/processing/coordinator.rb +5 -1
  83. data/lib/karafka/processing/executor.rb +16 -10
  84. data/lib/karafka/processing/executors_buffer.rb +19 -4
  85. data/lib/karafka/processing/schedulers/default.rb +3 -2
  86. data/lib/karafka/processing/strategies/default.rb +6 -0
  87. data/lib/karafka/processing/strategies/dlq.rb +36 -0
  88. data/lib/karafka/routing/builder.rb +12 -2
  89. data/lib/karafka/routing/consumer_group.rb +5 -5
  90. data/lib/karafka/routing/features/base.rb +44 -8
  91. data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
  92. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  93. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
  94. data/lib/karafka/routing/subscription_group.rb +2 -2
  95. data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
  96. data/lib/karafka/routing/topic.rb +8 -10
  97. data/lib/karafka/runner.rb +13 -3
  98. data/lib/karafka/server.rb +5 -9
  99. data/lib/karafka/setup/config.rb +17 -0
  100. data/lib/karafka/status.rb +23 -14
  101. data/lib/karafka/templates/karafka.rb.erb +7 -0
  102. data/lib/karafka/time_trackers/partition_usage.rb +56 -0
  103. data/lib/karafka/version.rb +1 -1
  104. data.tar.gz.sig +0 -0
  105. metadata +42 -10
  106. metadata.gz.sig +0 -0
  107. data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
@@ -25,7 +25,7 @@ module Karafka
25
25
  # @param consumer [::Rdkafka::Consumer] consumer instance needed to talk with Kafka
26
26
  # @param expanded_topics [Hash] hash with expanded and normalized topics data
27
27
  def initialize(consumer, expanded_topics)
28
- @consumer = Connection::Proxy.new(consumer)
28
+ @consumer = ::Karafka::Connection::Proxy.new(consumer)
29
29
  @expanded_topics = expanded_topics
30
30
  @mapped_topics = Hash.new { |h, k| h[k] = {} }
31
31
  end
@@ -22,11 +22,6 @@ module Karafka
22
22
  #
23
23
  # It does **not** create a consumer group and does not have any offset management.
24
24
  class Iterator
25
- # Local partition reference for librdkafka
26
- Partition = Struct.new(:partition, :offset)
27
-
28
- private_constant :Partition
29
-
30
25
  # A simple API allowing to iterate over topic/partition data, without having to subscribe
31
26
  # and deal with rebalances. This API allows for multi-partition streaming and is optimized
32
27
  # for data lookups. It allows for explicit stopping iteration over any partition during
@@ -127,7 +122,7 @@ module Karafka
127
122
 
128
123
  @current_consumer.pause(
129
124
  Rdkafka::Consumer::TopicPartitionList.new(
130
- name => [Partition.new(partition, 0)]
125
+ name => [Rdkafka::Consumer::Partition.new(partition, 0)]
131
126
  )
132
127
  )
133
128
  end
@@ -53,6 +53,7 @@ module Karafka
53
53
  features.each { |feature| feature.pre_setup(config) }
54
54
 
55
55
  reconfigure(config)
56
+ expand
56
57
 
57
58
  load_topic_features
58
59
  end
@@ -82,12 +83,17 @@ module Karafka
82
83
 
83
84
  icfg.cli.contract = Contracts::ServerCliOptions.new
84
85
 
86
+ # Use manager that supports multiplexing
87
+ icfg.connection.manager = Connection::Manager.new
88
+
85
89
  icfg.processing.coordinator_class = Processing::Coordinator
86
90
  icfg.processing.partitioner_class = Processing::Partitioner
87
91
  icfg.processing.scheduler_class = Processing::Schedulers::Default
88
92
  icfg.processing.jobs_queue_class = Processing::JobsQueue
93
+ icfg.processing.executor_class = Processing::Executor
89
94
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
90
95
  icfg.processing.strategy_selector = Processing::StrategySelector.new
96
+ icfg.processing.expansions_selector = Processing::ExpansionsSelector.new
91
97
 
92
98
  icfg.active_job.consumer_class = ActiveJob::Consumer
93
99
  icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
@@ -96,6 +102,14 @@ module Karafka
96
102
  config.monitor.subscribe(Instrumentation::PerformanceTracker.instance)
97
103
  end
98
104
 
105
+ # Adds extra modules to certain classes
106
+ # This expands their functionalities with things that are needed when operating in Pro
107
+ # It is used only when given class is part of the end user API and cannot be swapped by
108
+ # a pluggable component
109
+ def expand
110
+ Karafka::BaseConsumer.include Pro::BaseConsumer
111
+ end
112
+
99
113
  # Loads the Pro features of Karafka
100
114
  # @note Object space lookup is not the fastest but we do it once during boot, so it's ok
101
115
  def load_topic_features
@@ -42,7 +42,8 @@ module Karafka
42
42
 
43
43
  @virtual_offset_manager = VirtualOffsetManager.new(
44
44
  topic.name,
45
- partition
45
+ partition,
46
+ topic.virtual_partitions.offset_metadata_strategy
46
47
  )
47
48
 
48
49
  # We register our own "internal" filter to support filtering of messages that were marked
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Pro executor that supports periodic jobs
18
+ class Executor < Karafka::Processing::Executor
19
+ # Runs the code that should happen before periodic job is scheduled
20
+ #
21
+ # @note While jobs are called `Periodic`, from the consumer perspective it is "ticking".
22
+ # This name was taken for a reason: we may want to introduce periodic ticking also not
23
+ # only during polling but for example on wait and a name "poll" would not align well.
24
+ # A name "periodic" is not a verb and our other consumer actions are verbs like:
25
+ # consume or revoked. So for the sake of consistency we have ticking here.
26
+ def before_schedule_periodic
27
+ consumer.on_before_schedule_tick
28
+ end
29
+
30
+ # Triggers consumer ticking
31
+ def periodic
32
+ consumer.on_tick
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Pro selector of appropriate topic setup based features enhancements.
18
+ class ExpansionsSelector < Karafka::Processing::ExpansionsSelector
19
+ # @param topic [Karafka::Routing::Topic] topic with settings based on which we find
20
+ # expansions
21
+ # @return [Array<Module>] modules with proper expansions we're suppose to use to enhance
22
+ # the consumer
23
+ def find(topic)
24
+ # Start with the non-pro expansions
25
+ expansions = super
26
+ expansions << Pro::Processing::OffsetMetadata::Consumer if topic.offset_metadata?
27
+ expansions
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Jobs
18
+ # Job that represents a "ticking" work. Work that we run periodically for the Periodics
19
+ # enabled topics.
20
+ class Periodic < ::Karafka::Processing::Jobs::Base
21
+ # @param executor [Karafka::Pro::Processing::Executor] pro executor that is suppose to
22
+ # run a given job
23
+ def initialize(executor)
24
+ @executor = executor
25
+ super()
26
+ end
27
+
28
+ # Code executed before we schedule this job
29
+ def before_schedule
30
+ executor.before_schedule_periodic
31
+ end
32
+
33
+ # Runs the executor periodic action
34
+ def call
35
+ executor.periodic
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module Jobs
18
+ # Non-Blocking version of the Periodic job
19
+ # We use this version for LRJ topics for cases where saturated resources would not allow
20
+ # to run this job for extended period of time. Under such scenarios, if we would not use
21
+ # a non-blocking one, we would reach max.poll.interval.ms.
22
+ class PeriodicNonBlocking < Periodic
23
+ # @param args [Array] any arguments accepted by `::Karafka::Processing::Jobs::Periodic`
24
+ def initialize(*args)
25
+ super
26
+ @non_blocking = true
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -16,12 +16,12 @@ module Karafka
16
16
  module Processing
17
17
  # Pro jobs builder that supports lrj
18
18
  class JobsBuilder < ::Karafka::Processing::JobsBuilder
19
- # @param executor [Karafka::Processing::Executor]
19
+ # @param executor [Karafka::Pro::Processing::Executor]
20
20
  def idle(executor)
21
21
  Karafka::Processing::Jobs::Idle.new(executor)
22
22
  end
23
23
 
24
- # @param executor [Karafka::Processing::Executor]
24
+ # @param executor [Karafka::Pro::Processing::Executor]
25
25
  # @param messages [Karafka::Messages::Messages] messages batch to be consumed
26
26
  # @return [Karafka::Processing::Jobs::Consume] blocking job
27
27
  # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
@@ -33,7 +33,7 @@ module Karafka
33
33
  end
34
34
  end
35
35
 
36
- # @param executor [Karafka::Processing::Executor]
36
+ # @param executor [Karafka::Pro::Processing::Executor]
37
37
  # @return [Karafka::Processing::Jobs::Revoked] revocation job for non LRJ
38
38
  # @return [Karafka::Processing::Jobs::RevokedNonBlocking] revocation job that is
39
39
  # non-blocking, so when revocation job is scheduled for LRJ it also will not block
@@ -44,6 +44,17 @@ module Karafka
44
44
  super
45
45
  end
46
46
  end
47
+
48
+ # @param executor [Karafka::Pro::Processing::Executor]
49
+ # @return [Jobs::Periodic] Periodic job
50
+ # @return [Jobs::PeriodicNonBlocking] Periodic non-blocking job
51
+ def periodic(executor)
52
+ if executor.topic.long_running_job?
53
+ Jobs::PeriodicNonBlocking.new(executor)
54
+ else
55
+ Jobs::Periodic.new(executor)
56
+ end
57
+ end
47
58
  end
48
59
  end
49
60
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Offset Metadata support on the processing side
18
+ module OffsetMetadata
19
+ # Extra API methods for offset metadata fetching
20
+ # @note Part of this feature API is embedded directly into the strategies because it alters
21
+ # how marking methods (`#mark_as_consumed` and `#mark_as_consumed!`) operate. Because
22
+ # of that, they had to be embedded into the strategies.
23
+ module Consumer
24
+ # @param cache [Boolean] should we use cached result if present (true by default)
25
+ # @return [false, Object] false in case we do not own the partition anymore or
26
+ # deserialized metadata based on the deserializer
27
+ # @note Caching is on as the assumption here is, that most of the time user will be
28
+ # interested only in the offset metadata that "came" from the time prior to the
29
+ # rebalance. That is because the rest of the metadata (current) is created and
30
+ # controlled by the user himself, thus there is no need to retrieve it. In case this
31
+ # is not true and user wants to always get the Kafka metadata, `cache` value of this
32
+ # feature can be set to false.
33
+ def offset_metadata(cache: true)
34
+ return false if revoked?
35
+
36
+ Fetcher.find(topic, partition, cache: cache)
37
+ end
38
+
39
+ alias committed_offset_metadata offset_metadata
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Offset Metadata support on the processing side
18
+ module OffsetMetadata
19
+ # This fetcher is responsible for fetching and caching committed offsets metadata
20
+ # information.
21
+ #
22
+ # By design we fetch all information for a requested topic assignments. Not all topics from
23
+ # the same subscription group may need metadata and even if, we can run the few smaller
24
+ # queries. This approach prevents us from querying all assigned topics data in one go
25
+ # preventing excessive queries.
26
+ #
27
+ # Since the assumption is, that user will not have to reach out for the later metadata
28
+ # since it is produced in the context of a given consumer assignment, we can cache the
29
+ # initial result and only allow users for explicit invalidation.
30
+ class Fetcher
31
+ include Singleton
32
+
33
+ class << self
34
+ extend Forwardable
35
+
36
+ def_delegators :instance, :register, :clear, :find
37
+ end
38
+
39
+ def initialize
40
+ @mutexes = {}
41
+ @clients = {}
42
+ @tpls = {}
43
+ end
44
+
45
+ # Registers a client of a given subscription group, so we can use it for queries later on
46
+ # @param client [Karafka::Connection::Client]
47
+ # @note Since we store the client reference and not the underlying rdkafka consumer
48
+ # instance, we do not have to deal with the recovery as it is abstracted away
49
+ def register(client)
50
+ @clients[client.subscription_group] = client
51
+ # We use one mutex per SG because independent SGs can query in parallel
52
+ @mutexes[client.subscription_group] = Mutex.new
53
+ @tpls[client.subscription_group] = {}
54
+ end
55
+
56
+ # Queries or retrieves from cache the given offset metadata for the selected partition
57
+ #
58
+ # @param topic [Karafka::Routing::Topic] routing topic with subscription group reference
59
+ # @param partition [Integer] partition for which we want to get stored offset metadata
60
+ # @param cache [Boolean] forces explicit query to Kafka when false and cache refresh.
61
+ # By default we use the setting from the topic level but this can be overwritten on
62
+ # a per request basis if needed.
63
+ # @return [Object, false] deserialized metadata (string deserializer by default) or
64
+ # false in case we were not able to obtain the details because we have lost the
65
+ # assignment
66
+ def find(topic, partition, cache: true)
67
+ cache = topic.offset_metadata.cache? && cache
68
+
69
+ tpls = fetch(topic, cache)
70
+
71
+ return false unless tpls
72
+
73
+ t_partitions = tpls.fetch(topic.name, [])
74
+ t_partition = t_partitions.find { |t_p| t_p.partition == partition }
75
+
76
+ # If we do not have given topic partition here, it means it is no longer part of our
77
+ # assignment and we should return false
78
+ return false unless t_partition
79
+
80
+ topic.offset_metadata.deserializer.call(t_partition.metadata)
81
+ end
82
+
83
+ # Clears cache of a given subscription group. It is triggered on assignment changes.
84
+ #
85
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group that
86
+ # we want to clear.
87
+ def clear(subscription_group)
88
+ @mutexes.fetch(subscription_group).synchronize do
89
+ @tpls[subscription_group].clear
90
+ end
91
+ end
92
+
93
+ private
94
+
95
+ # Fetches from Kafka all committed offsets for the given topic partitions that are
96
+ # assigned to this process.
97
+ #
98
+ # We fetch all because in majority of the cases, the behavior of the end user code is
99
+ # not specific to a given partition both same for all. In such cases we save on
100
+ # querying as we get all data for all partitions in one go.
101
+ #
102
+ # @param topic [Karafka::Routing::Topic] topic for which we want to fetch tpls data
103
+ # @param cache [Boolean] should we return cached data if present
104
+ def fetch(topic, cache)
105
+ subscription_group = topic.subscription_group
106
+ t_tpls = @tpls.fetch(subscription_group, false)
107
+ t_tpl = t_tpls[topic]
108
+
109
+ return t_tpl if t_tpl && cache
110
+
111
+ assigned_tpls = @clients.fetch(subscription_group).assignment
112
+ t_tpl = assigned_tpls.to_h.fetch(topic.name, false)
113
+
114
+ # May be false in case we lost given assignment but still run LRJ
115
+ return false unless t_tpl
116
+ return false if t_tpl.empty?
117
+
118
+ @mutexes.fetch(subscription_group).synchronize do
119
+ rd_tpl = Rdkafka::Consumer::TopicPartitionList.new(topic.name => t_tpl)
120
+
121
+ # While in theory we could lost assignment while being here, this will work and will
122
+ # return us proper tpl, we do not deal with this case on this layer and report anyhow
123
+ # There will not be any exception and this will operate correctly
124
+ t_tpls[topic] = @clients.fetch(subscription_group).committed(rd_tpl).to_h
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module OffsetMetadata
18
+ # Keeps track of rebalances and updates the fetcher
19
+ # Since we cache the tpls with metadata, we need to invalidate them on events that would
20
+ # cause changes in the assignments
21
+ class Listener
22
+ # When we start listening we need to register this client in the metadata fetcher, so
23
+ # we have the client related to a given subscription group that we can use in fetcher
24
+ # since fetcher may be used in filtering API and other places outside of the standard
25
+ # consumer flow
26
+ # @param event [Karafka::Core::Monitoring::Event]
27
+ def on_connection_listener_before_fetch_loop(event)
28
+ Fetcher.register event[:client]
29
+ end
30
+
31
+ # Invalidates internal cache when assignments change so we can get correct metadata
32
+ # @param event [Karafka::Core::Monitoring::Event]
33
+ def on_rebalance_partitions_assigned(event)
34
+ Fetcher.clear event[:subscription_group]
35
+ end
36
+
37
+ # Invalidates internal cache when assignments change so we can get correct metadata
38
+ # @param event [Karafka::Core::Monitoring::Event]
39
+ def on_rebalance_partitions_revoked(event)
40
+ Fetcher.clear event[:subscription_group]
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -31,9 +31,19 @@ module Karafka
31
31
  @mutex = Mutex.new
32
32
  end
33
33
 
34
+ # Schedules any jobs provided in a fifo order
35
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
36
+ def schedule_fifo(jobs_array)
37
+ jobs_array.each do |job|
38
+ @queue << job
39
+ end
40
+ end
41
+
34
42
  # Runs the consumption jobs scheduling flow under a mutex
35
43
  #
36
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
44
+ # @param jobs_array
45
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
46
+ # jobs for scheduling
37
47
  def on_schedule_consumption(jobs_array)
38
48
  @mutex.synchronize do
39
49
  schedule_consumption(jobs_array)
@@ -42,53 +52,59 @@ module Karafka
42
52
 
43
53
  # Should schedule the consumption jobs
44
54
  #
45
- # @param _jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
55
+ # @param _jobs_array
56
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
57
+ # jobs for scheduling
46
58
  def schedule_consumption(_jobs_array)
47
59
  raise NotImplementedError, 'Implement in a subclass'
48
60
  end
49
61
 
50
62
  # Runs the revocation jobs scheduling flow under a mutex
51
63
  #
52
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
64
+ # @param jobs_array
65
+ # [Array<Karafka::Processing::Jobs::Revoked, Processing::Jobs::RevokedNonBlocking>]
66
+ # jobs for scheduling
53
67
  def on_schedule_revocation(jobs_array)
54
68
  @mutex.synchronize do
55
69
  schedule_revocation(jobs_array)
56
70
  end
57
71
  end
58
72
 
59
- # Schedules the revocation jobs.
60
- #
61
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
62
- #
63
- # @note We provide a default scheduler logic here because by default revocation jobs
64
- # should be scheduled as fast as possible.
65
- def schedule_revocation(jobs_array)
66
- jobs_array.each do |job|
67
- @queue << job
68
- end
69
- end
70
-
71
73
  # Runs the shutdown jobs scheduling flow under a mutex
72
74
  #
73
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
75
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Shutdown>] jobs for scheduling
74
76
  def on_schedule_shutdown(jobs_array)
75
77
  @mutex.synchronize do
76
78
  schedule_shutdown(jobs_array)
77
79
  end
78
80
  end
79
81
 
80
- # Schedules the shutdown jobs.
82
+ # Runs the idle jobs scheduling flow under a mutex
81
83
  #
82
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs for scheduling
84
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Idle>] jobs for scheduling
85
+ def on_schedule_idle(jobs_array)
86
+ @mutex.synchronize do
87
+ schedule_idle(jobs_array)
88
+ end
89
+ end
90
+
91
+ # Runs the periodic jobs scheduling flow under a mutex
83
92
  #
84
- # @note We provide a default scheduler logic here because by default revocation jobs
85
- # should be scheduled as fast as possible.
86
- def schedule_shutdown(jobs_array)
87
- jobs_array.each do |job|
88
- @queue << job
93
+ # @param jobs_array
94
+ # [Array<Processing::Jobs::Periodic, Processing::Jobs::PeriodicNonBlocking>]
95
+ # jobs for scheduling
96
+ def on_schedule_periodic(jobs_array)
97
+ @mutex.synchronize do
98
+ schedule_periodic(jobs_array)
89
99
  end
90
100
  end
91
101
 
102
+ # Schedule by default all except consumption as fifo
103
+ alias schedule_revocation schedule_fifo
104
+ alias schedule_shutdown schedule_fifo
105
+ alias schedule_idle schedule_fifo
106
+ alias schedule_periodic schedule_fifo
107
+
92
108
  # Runs the manage tick under mutex
93
109
  def on_manage
94
110
  @mutex.synchronize { manage }
@@ -31,7 +31,9 @@ module Karafka
31
31
  class Default < Base
32
32
  # Schedules jobs in the LJF order for consumption
33
33
  #
34
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
34
+ # @param jobs_array
35
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
36
+ # jobs for scheduling
35
37
  def on_schedule_consumption(jobs_array)
36
38
  perf_tracker = Instrumentation::PerformanceTracker.instance
37
39
 
@@ -53,23 +55,19 @@ module Karafka
53
55
  end
54
56
  end
55
57
 
56
- # Schedules jobs in the fifo order
57
- #
58
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
59
- def on_schedule_revocation(jobs_array)
58
+ # Schedules any jobs provided in a fifo order
59
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
60
+ def schedule_fifo(jobs_array)
60
61
  jobs_array.each do |job|
61
62
  @queue << job
62
63
  end
63
64
  end
64
65
 
65
- # Schedules jobs in the fifo order
66
- #
67
- # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
68
- def on_schedule_shutdown(jobs_array)
69
- jobs_array.each do |job|
70
- @queue << job
71
- end
72
- end
66
+ # By default all non-consumption work is scheduled in a fifo order
67
+ alias on_schedule_revocation schedule_fifo
68
+ alias on_schedule_shutdown schedule_fifo
69
+ alias on_schedule_idle schedule_fifo
70
+ alias on_schedule_periodic schedule_fifo
73
71
 
74
72
  # This scheduler does not have anything to manage as it is a pass through and has no
75
73
  # state
@@ -87,7 +85,7 @@ module Karafka
87
85
  private
88
86
 
89
87
  # @param perf_tracker [PerformanceTracker]
90
- # @param job [Karafka::Processing::Jobs::Base] job we will be processing
88
+ # @param job [Karafka::Processing::Jobs::Consume] job we will be processing
91
89
  # @return [Numeric] estimated cost of processing this job
92
90
  def processing_cost(perf_tracker, job)
93
91
  if job.is_a?(::Karafka::Processing::Jobs::Consume)