karafka 2.2.13 → 2.3.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +38 -12
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +161 -125
  6. data/Gemfile.lock +12 -12
  7. data/README.md +0 -2
  8. data/SECURITY.md +23 -0
  9. data/config/locales/errors.yml +7 -1
  10. data/config/locales/pro_errors.yml +22 -0
  11. data/docker-compose.yml +3 -1
  12. data/karafka.gemspec +2 -2
  13. data/lib/karafka/admin/acl.rb +287 -0
  14. data/lib/karafka/admin.rb +118 -16
  15. data/lib/karafka/app.rb +12 -3
  16. data/lib/karafka/base_consumer.rb +32 -31
  17. data/lib/karafka/cli/base.rb +1 -1
  18. data/lib/karafka/connection/client.rb +94 -84
  19. data/lib/karafka/connection/conductor.rb +28 -0
  20. data/lib/karafka/connection/listener.rb +165 -46
  21. data/lib/karafka/connection/listeners_batch.rb +5 -11
  22. data/lib/karafka/connection/manager.rb +72 -0
  23. data/lib/karafka/connection/messages_buffer.rb +12 -0
  24. data/lib/karafka/connection/proxy.rb +17 -0
  25. data/lib/karafka/connection/status.rb +75 -0
  26. data/lib/karafka/contracts/config.rb +14 -10
  27. data/lib/karafka/contracts/consumer_group.rb +9 -1
  28. data/lib/karafka/contracts/topic.rb +3 -1
  29. data/lib/karafka/errors.rb +13 -0
  30. data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
  31. data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
  32. data/lib/karafka/instrumentation/logger_listener.rb +3 -9
  33. data/lib/karafka/instrumentation/notifications.rb +19 -9
  34. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
  35. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
  36. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
  37. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
  38. data/lib/karafka/pro/base_consumer.rb +47 -0
  39. data/lib/karafka/pro/connection/manager.rb +300 -0
  40. data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
  41. data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
  42. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  43. data/lib/karafka/pro/iterator.rb +1 -6
  44. data/lib/karafka/pro/loader.rb +16 -2
  45. data/lib/karafka/pro/processing/coordinator.rb +2 -1
  46. data/lib/karafka/pro/processing/executor.rb +37 -0
  47. data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
  48. data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
  49. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
  50. data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
  51. data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
  52. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
  53. data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
  54. data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
  55. data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
  56. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  57. data/lib/karafka/pro/processing/strategies/default.rb +136 -3
  58. data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
  59. data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
  60. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  61. data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
  62. data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
  63. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
  64. data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
  65. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
  66. data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
  67. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
  68. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
  69. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
  70. data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
  71. data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
  72. data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
  73. data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
  74. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
  75. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
  76. data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
  77. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
  78. data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
  79. data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
  80. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
  81. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
  82. data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
  83. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
  84. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  85. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
  86. data/lib/karafka/process.rb +5 -3
  87. data/lib/karafka/processing/coordinator.rb +5 -1
  88. data/lib/karafka/processing/executor.rb +43 -13
  89. data/lib/karafka/processing/executors_buffer.rb +22 -7
  90. data/lib/karafka/processing/jobs/base.rb +19 -2
  91. data/lib/karafka/processing/jobs/consume.rb +3 -3
  92. data/lib/karafka/processing/jobs/idle.rb +5 -0
  93. data/lib/karafka/processing/jobs/revoked.rb +5 -0
  94. data/lib/karafka/processing/jobs/shutdown.rb +5 -0
  95. data/lib/karafka/processing/jobs_queue.rb +19 -8
  96. data/lib/karafka/processing/schedulers/default.rb +42 -0
  97. data/lib/karafka/processing/strategies/base.rb +13 -4
  98. data/lib/karafka/processing/strategies/default.rb +23 -7
  99. data/lib/karafka/processing/strategies/dlq.rb +36 -0
  100. data/lib/karafka/processing/worker.rb +4 -1
  101. data/lib/karafka/routing/builder.rb +12 -2
  102. data/lib/karafka/routing/consumer_group.rb +5 -5
  103. data/lib/karafka/routing/features/base.rb +44 -8
  104. data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
  105. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  106. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
  107. data/lib/karafka/routing/proxy.rb +4 -3
  108. data/lib/karafka/routing/subscription_group.rb +2 -2
  109. data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
  110. data/lib/karafka/routing/topic.rb +8 -10
  111. data/lib/karafka/routing/topics.rb +1 -1
  112. data/lib/karafka/runner.rb +13 -3
  113. data/lib/karafka/server.rb +5 -9
  114. data/lib/karafka/setup/config.rb +21 -1
  115. data/lib/karafka/status.rb +23 -14
  116. data/lib/karafka/templates/karafka.rb.erb +7 -0
  117. data/lib/karafka/time_trackers/partition_usage.rb +56 -0
  118. data/lib/karafka/version.rb +1 -1
  119. data.tar.gz.sig +0 -0
  120. metadata +47 -13
  121. metadata.gz.sig +0 -0
  122. data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
  123. data/lib/karafka/pro/performance_tracker.rb +0 -84
  124. data/lib/karafka/pro/processing/scheduler.rb +0 -74
  125. data/lib/karafka/processing/scheduler.rb +0 -38
@@ -16,12 +16,12 @@ module Karafka
16
16
  module Processing
17
17
  # Pro jobs builder that supports lrj
18
18
  class JobsBuilder < ::Karafka::Processing::JobsBuilder
19
- # @param executor [Karafka::Processing::Executor]
19
+ # @param executor [Karafka::Pro::Processing::Executor]
20
20
  def idle(executor)
21
21
  Karafka::Processing::Jobs::Idle.new(executor)
22
22
  end
23
23
 
24
- # @param executor [Karafka::Processing::Executor]
24
+ # @param executor [Karafka::Pro::Processing::Executor]
25
25
  # @param messages [Karafka::Messages::Messages] messages batch to be consumed
26
26
  # @return [Karafka::Processing::Jobs::Consume] blocking job
27
27
  # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
@@ -33,7 +33,7 @@ module Karafka
33
33
  end
34
34
  end
35
35
 
36
- # @param executor [Karafka::Processing::Executor]
36
+ # @param executor [Karafka::Pro::Processing::Executor]
37
37
  # @return [Karafka::Processing::Jobs::Revoked] revocation job for non LRJ
38
38
  # @return [Karafka::Processing::Jobs::RevokedNonBlocking] revocation job that is
39
39
  # non-blocking, so when revocation job is scheduled for LRJ it also will not block
@@ -44,6 +44,17 @@ module Karafka
44
44
  super
45
45
  end
46
46
  end
47
+
48
+ # @param executor [Karafka::Pro::Processing::Executor]
49
+ # @return [Jobs::Periodic] Periodic job
50
+ # @return [Jobs::PeriodicNonBlocking] Periodic non-blocking job
51
+ def periodic(executor)
52
+ if executor.topic.long_running_job?
53
+ Jobs::PeriodicNonBlocking.new(executor)
54
+ else
55
+ Jobs::Periodic.new(executor)
56
+ end
57
+ end
47
58
  end
48
59
  end
49
60
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Offset Metadata support on the processing side
18
+ module OffsetMetadata
19
+ # Extra API methods for offset metadata fetching
20
+ # @note Part of this feature API is embedded directly into the strategies because it alters
21
+ # how marking methods (`#mark_as_consumed` and `#mark_as_consumed!`) operate. Because
22
+ # of that, they had to be embedded into the strategies.
23
+ module Consumer
24
+ # @param cache [Boolean] should we use cached result if present (true by default)
25
+ # @return [false, Object] false in case we do not own the partition anymore or
26
+ # deserialized metadata based on the deserializer
27
+ # @note Caching is on as the assumption here is, that most of the time user will be
28
+ # interested only in the offset metadata that "came" from the time prior to the
29
+ # rebalance. That is because the rest of the metadata (current) is created and
30
+ # controlled by the user himself, thus there is no need to retrieve it. In case this
31
+ # is not true and user wants to always get the Kafka metadata, `cache` value of this
32
+ # feature can be set to false.
33
+ def offset_metadata(cache: true)
34
+ return false if revoked?
35
+
36
+ Fetcher.find(topic, partition, cache: cache)
37
+ end
38
+
39
+ alias committed_offset_metadata offset_metadata
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Offset Metadata support on the processing side
18
+ module OffsetMetadata
19
+ # This fetcher is responsible for fetching and caching committed offsets metadata
20
+ # information.
21
+ #
22
+ # By design we fetch all information for a requested topic assignments. Not all topics from
23
+ # the same subscription group may need metadata and even if, we can run the few smaller
24
+ # queries. This approach prevents us from querying all assigned topics data in one go
25
+ # preventing excessive queries.
26
+ #
27
+ # Since the assumption is, that user will not have to reach out for the later metadata
28
+ # since it is produced in the context of a given consumer assignment, we can cache the
29
+ # initial result and only allow users for explicit invalidation.
30
+ class Fetcher
31
+ include Singleton
32
+
33
+ class << self
34
+ extend Forwardable
35
+
36
+ def_delegators :instance, :register, :clear, :find
37
+ end
38
+
39
+ def initialize
40
+ @mutexes = {}
41
+ @clients = {}
42
+ @tpls = {}
43
+ end
44
+
45
+ # Registers a client of a given subscription group, so we can use it for queries later on
46
+ # @param client [Karafka::Connection::Client]
47
+ # @note Since we store the client reference and not the underlying rdkafka consumer
48
+ # instance, we do not have to deal with the recovery as it is abstracted away
49
+ def register(client)
50
+ @clients[client.subscription_group] = client
51
+ # We use one mutex per SG because independent SGs can query in parallel
52
+ @mutexes[client.subscription_group] = Mutex.new
53
+ @tpls[client.subscription_group] = {}
54
+ end
55
+
56
+ # Queries or retrieves from cache the given offset metadata for the selected partition
57
+ #
58
+ # @param topic [Karafka::Routing::Topic] routing topic with subscription group reference
59
+ # @param partition [Integer] partition for which we want to get stored offset metadata
60
+ # @param cache [Boolean] forces explicit query to Kafka when false and cache refresh.
61
+ # By default we use the setting from the topic level but this can be overwritten on
62
+ # a per request basis if needed.
63
+ # @return [Object, false] deserialized metadata (string deserializer by default) or
64
+ # false in case we were not able to obtain the details because we have lost the
65
+ # assignment
66
+ def find(topic, partition, cache: true)
67
+ cache = topic.offset_metadata.cache? && cache
68
+
69
+ tpls = fetch(topic, cache)
70
+
71
+ return false unless tpls
72
+
73
+ t_partitions = tpls.fetch(topic.name, [])
74
+ t_partition = t_partitions.find { |t_p| t_p.partition == partition }
75
+
76
+ # If we do not have given topic partition here, it means it is no longer part of our
77
+ # assignment and we should return false
78
+ return false unless t_partition
79
+
80
+ topic.offset_metadata.deserializer.call(t_partition.metadata)
81
+ end
82
+
83
+ # Clears cache of a given subscription group. It is triggered on assignment changes.
84
+ #
85
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group that
86
+ # we want to clear.
87
+ def clear(subscription_group)
88
+ @mutexes.fetch(subscription_group).synchronize do
89
+ @tpls[subscription_group].clear
90
+ end
91
+ end
92
+
93
+ private
94
+
95
+ # Fetches from Kafka all committed offsets for the given topic partitions that are
96
+ # assigned to this process.
97
+ #
98
+ # We fetch all because in majority of the cases, the behavior of the end user code is
99
+ # not specific to a given partition both same for all. In such cases we save on
100
+ # querying as we get all data for all partitions in one go.
101
+ #
102
+ # @param topic [Karafka::Routing::Topic] topic for which we want to fetch tpls data
103
+ # @param cache [Boolean] should we return cached data if present
104
+ def fetch(topic, cache)
105
+ subscription_group = topic.subscription_group
106
+ t_tpls = @tpls.fetch(subscription_group, false)
107
+ t_tpl = t_tpls[topic]
108
+
109
+ return t_tpl if t_tpl && cache
110
+
111
+ assigned_tpls = @clients.fetch(subscription_group).assignment
112
+ t_tpl = assigned_tpls.to_h.fetch(topic.name, false)
113
+
114
+ # May be false in case we lost given assignment but still run LRJ
115
+ return false unless t_tpl
116
+ return false if t_tpl.empty?
117
+
118
+ @mutexes.fetch(subscription_group).synchronize do
119
+ rd_tpl = Rdkafka::Consumer::TopicPartitionList.new(topic.name => t_tpl)
120
+
121
+ # While in theory we could lost assignment while being here, this will work and will
122
+ # return us proper tpl, we do not deal with this case on this layer and report anyhow
123
+ # There will not be any exception and this will operate correctly
124
+ t_tpls[topic] = @clients.fetch(subscription_group).committed(rd_tpl).to_h
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module OffsetMetadata
18
+ # Keeps track of rebalances and updates the fetcher
19
+ # Since we cache the tpls with metadata, we need to invalidate them on events that would
20
+ # cause changes in the assignments
21
+ class Listener
22
+ # When we start listening we need to register this client in the metadata fetcher, so
23
+ # we have the client related to a given subscription group that we can use in fetcher
24
+ # since fetcher may be used in filtering API and other places outside of the standard
25
+ # consumer flow
26
+ # @param event [Karafka::Core::Monitoring::Event]
27
+ def on_connection_listener_before_fetch_loop(event)
28
+ Fetcher.register event[:client]
29
+ end
30
+
31
+ # Invalidates internal cache when assignments change so we can get correct metadata
32
+ # @param event [Karafka::Core::Monitoring::Event]
33
+ def on_rebalance_partitions_assigned(event)
34
+ Fetcher.clear event[:subscription_group]
35
+ end
36
+
37
+ # Invalidates internal cache when assignments change so we can get correct metadata
38
+ # @param event [Karafka::Core::Monitoring::Event]
39
+ def on_rebalance_partitions_revoked(event)
40
+ Fetcher.clear event[:subscription_group]
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for Pro schedulers related components
18
+ module Schedulers
19
+ # Base for all the Pro custom schedulers
20
+ #
21
+ # It wraps the Scheduler API with mutex to ensure, that during scheduling we do not start
22
+ # scheduling other work that could impact the decision making in between multiple
23
+ # subscription groups running in separate threads.
24
+ #
25
+ # @note All the `on_` methods can be redefined with a non-thread-safe versions without
26
+ # locks if needed, however when doing so, ensure that your scheduler is stateless.
27
+ class Base
28
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
29
+ def initialize(queue)
30
+ @queue = queue
31
+ @mutex = Mutex.new
32
+ end
33
+
34
+ # Schedules any jobs provided in a fifo order
35
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
36
+ def schedule_fifo(jobs_array)
37
+ jobs_array.each do |job|
38
+ @queue << job
39
+ end
40
+ end
41
+
42
+ # Runs the consumption jobs scheduling flow under a mutex
43
+ #
44
+ # @param jobs_array
45
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
46
+ # jobs for scheduling
47
+ def on_schedule_consumption(jobs_array)
48
+ @mutex.synchronize do
49
+ schedule_consumption(jobs_array)
50
+ end
51
+ end
52
+
53
+ # Should schedule the consumption jobs
54
+ #
55
+ # @param _jobs_array
56
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
57
+ # jobs for scheduling
58
+ def schedule_consumption(_jobs_array)
59
+ raise NotImplementedError, 'Implement in a subclass'
60
+ end
61
+
62
+ # Runs the revocation jobs scheduling flow under a mutex
63
+ #
64
+ # @param jobs_array
65
+ # [Array<Karafka::Processing::Jobs::Revoked, Processing::Jobs::RevokedNonBlocking>]
66
+ # jobs for scheduling
67
+ def on_schedule_revocation(jobs_array)
68
+ @mutex.synchronize do
69
+ schedule_revocation(jobs_array)
70
+ end
71
+ end
72
+
73
+ # Runs the shutdown jobs scheduling flow under a mutex
74
+ #
75
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Shutdown>] jobs for scheduling
76
+ def on_schedule_shutdown(jobs_array)
77
+ @mutex.synchronize do
78
+ schedule_shutdown(jobs_array)
79
+ end
80
+ end
81
+
82
+ # Runs the idle jobs scheduling flow under a mutex
83
+ #
84
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Idle>] jobs for scheduling
85
+ def on_schedule_idle(jobs_array)
86
+ @mutex.synchronize do
87
+ schedule_idle(jobs_array)
88
+ end
89
+ end
90
+
91
+ # Runs the periodic jobs scheduling flow under a mutex
92
+ #
93
+ # @param jobs_array
94
+ # [Array<Processing::Jobs::Periodic, Processing::Jobs::PeriodicNonBlocking>]
95
+ # jobs for scheduling
96
+ def on_schedule_periodic(jobs_array)
97
+ @mutex.synchronize do
98
+ schedule_periodic(jobs_array)
99
+ end
100
+ end
101
+
102
+ # Schedule by default all except consumption as fifo
103
+ alias schedule_revocation schedule_fifo
104
+ alias schedule_shutdown schedule_fifo
105
+ alias schedule_idle schedule_fifo
106
+ alias schedule_periodic schedule_fifo
107
+
108
+ # Runs the manage tick under mutex
109
+ def on_manage
110
+ @mutex.synchronize { manage }
111
+ end
112
+
113
+ # Should manage scheduling on jobs state changes
114
+ #
115
+ # By default does nothing as default schedulers are stateless
116
+ def manage
117
+ nil
118
+ end
119
+
120
+ # Runs clearing under mutex
121
+ #
122
+ # @param group_id [String] Subscription group id
123
+ def on_clear(group_id)
124
+ @mutex.synchronize { clear(group_id) }
125
+ end
126
+
127
+ # By default schedulers are stateless, so nothing to clear.
128
+ #
129
+ # @param _group_id [String] Subscription group id
130
+ def clear(_group_id)
131
+ nil
132
+ end
133
+
134
+ private
135
+
136
+ # @return [Karafka::Processing::JobsQueue] jobs queue reference for internal usage
137
+ # inside of the scheduler
138
+ attr_reader :queue
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for Pro schedulers
18
+ module Schedulers
19
+ # Optimizes scheduler that takes into consideration of execution time needed to process
20
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
21
+ #
22
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations
23
+ # as when taking IO into consideration, the can achieve optimized parallel processing.
24
+ #
25
+ # This scheduler can also work with virtual partitions.
26
+ #
27
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
28
+ # default FIFO scheduler from the default Karafka scheduler
29
+ #
30
+ # @note This is a stateless scheduler, thus we can override the `#on_` API.
31
+ class Default < Base
32
+ # Schedules jobs in the LJF order for consumption
33
+ #
34
+ # @param jobs_array
35
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
36
+ # jobs for scheduling
37
+ def on_schedule_consumption(jobs_array)
38
+ perf_tracker = Instrumentation::PerformanceTracker.instance
39
+
40
+ ordered = []
41
+
42
+ jobs_array.each do |job|
43
+ ordered << [
44
+ job,
45
+ processing_cost(perf_tracker, job)
46
+ ]
47
+ end
48
+
49
+ ordered.sort_by!(&:last)
50
+ ordered.reverse!
51
+ ordered.map!(&:first)
52
+
53
+ ordered.each do |job|
54
+ @queue << job
55
+ end
56
+ end
57
+
58
+ # Schedules any jobs provided in a fifo order
59
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
60
+ def schedule_fifo(jobs_array)
61
+ jobs_array.each do |job|
62
+ @queue << job
63
+ end
64
+ end
65
+
66
+ # By default all non-consumption work is scheduled in a fifo order
67
+ alias on_schedule_revocation schedule_fifo
68
+ alias on_schedule_shutdown schedule_fifo
69
+ alias on_schedule_idle schedule_fifo
70
+ alias on_schedule_periodic schedule_fifo
71
+
72
+ # This scheduler does not have anything to manage as it is a pass through and has no
73
+ # state
74
+ def on_manage
75
+ nil
76
+ end
77
+
78
+ # This scheduler does not need to be cleared because it is stateless
79
+ #
80
+ # @param _group_id [String] Subscription group id
81
+ def on_clear(_group_id)
82
+ nil
83
+ end
84
+
85
+ private
86
+
87
+ # @param perf_tracker [PerformanceTracker]
88
+ # @param job [Karafka::Processing::Jobs::Consume] job we will be processing
89
+ # @return [Numeric] estimated cost of processing this job
90
+ def processing_cost(perf_tracker, job)
91
+ if job.is_a?(::Karafka::Processing::Jobs::Consume)
92
+ messages = job.messages
93
+ message = messages.first
94
+
95
+ perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
96
+ else
97
+ # LJF will set first the most expensive, but we want to run the zero cost jobs
98
+ # related to the lifecycle always first. That is why we "emulate" that they
99
+ # the longest possible jobs that anyone can run
100
+ Float::INFINITY
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -33,7 +33,7 @@ module Karafka
33
33
  ].freeze
34
34
 
35
35
  # No actions needed for the standard flow here
36
- def handle_before_enqueue
36
+ def handle_before_schedule_consume
37
37
  super
38
38
 
39
39
  coordinator.on_enqueued do
@@ -27,9 +27,127 @@ module Karafka
27
27
  # Apply strategy for a non-feature based flow
28
28
  FEATURES = %i[].freeze
29
29
 
30
+ # Marks message as consumed in an async way.
31
+ #
32
+ # @param message [Messages::Message] last successfully processed message.
33
+ # @param offset_metadata [String, nil] offset metadata string or nil if nothing
34
+ # @return [Boolean] true if we were able to mark the offset, false otherwise.
35
+ # False indicates that we were not able and that we have lost the partition.
36
+ #
37
+ # @note We keep track of this offset in case we would mark as consumed and got error when
38
+ # processing another message. In case like this we do not pause on the message we've
39
+ # already processed but rather at the next one. This applies to both sync and async
40
+ # versions of this method.
41
+ def mark_as_consumed(message, offset_metadata = nil)
42
+ if @_in_transaction
43
+ mark_in_transaction(message, offset_metadata, true)
44
+ else
45
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
46
+ # In case like this we ignore marking
47
+ return true if coordinator.seek_offset.nil?
48
+ # Ignore earlier offsets than the one we already committed
49
+ return true if coordinator.seek_offset > message.offset
50
+ return false if revoked?
51
+ return revoked? unless client.mark_as_consumed(message, offset_metadata)
52
+
53
+ coordinator.seek_offset = message.offset + 1
54
+ end
55
+
56
+ true
57
+ end
58
+
59
+ # Marks message as consumed in a sync way.
60
+ #
61
+ # @param message [Messages::Message] last successfully processed message.
62
+ # @param offset_metadata [String, nil] offset metadata string or nil if nothing
63
+ # @return [Boolean] true if we were able to mark the offset, false otherwise.
64
+ # False indicates that we were not able and that we have lost the partition.
65
+ def mark_as_consumed!(message, offset_metadata = nil)
66
+ if @_in_transaction
67
+ mark_in_transaction(message, offset_metadata, false)
68
+ else
69
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
70
+ # In case like this we ignore marking
71
+ return true if coordinator.seek_offset.nil?
72
+ # Ignore earlier offsets than the one we already committed
73
+ return true if coordinator.seek_offset > message.offset
74
+ return false if revoked?
75
+
76
+ return revoked? unless client.mark_as_consumed!(message, offset_metadata)
77
+
78
+ coordinator.seek_offset = message.offset + 1
79
+ end
80
+
81
+ true
82
+ end
83
+
84
+ # Starts producer transaction, saves the transaction context for transactional marking
85
+ # and runs user code in this context
86
+ #
87
+ # Transactions on a consumer level differ from those initiated by the producer as they
88
+ # allow to mark offsets inside of the transaction. If the transaction is initialized
89
+ # only from the consumer, the offset will be stored in a regular fashion.
90
+ #
91
+ # @param block [Proc] code that we want to run in a transaction
92
+ def transaction(&block)
93
+ transaction_started = false
94
+
95
+ # Prevent from nested transactions. It would not make any sense
96
+ raise Errors::TransactionAlreadyInitializedError if @_in_transaction
97
+
98
+ transaction_started = true
99
+ @_transaction_marked = []
100
+ @_in_transaction = true
101
+
102
+ producer.transaction(&block)
103
+
104
+ @_in_transaction = false
105
+
106
+ # This offset is already stored in transaction but we set it here anyhow because we
107
+ # want to make sure our internal in-memory state is aligned with the transaction
108
+ #
109
+ # @note We never need to use the blocking `#mark_as_consumed!` here because the offset
110
+ # anyhow was already stored during the transaction
111
+ #
112
+ # @note In theory we could only keep reference to the most recent marking and reject
113
+ # others. We however do not do it for two reasons:
114
+ # - User may have non standard flow relying on some alternative order and we want to
115
+ # mimic this
116
+ # - Complex strategies like VPs can use this in VPs to mark in parallel without
117
+ # having to redefine the transactional flow completely
118
+ @_transaction_marked.each do |marking|
119
+ marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
120
+ end
121
+ ensure
122
+ if transaction_started
123
+ @_transaction_marked.clear
124
+ @_in_transaction = false
125
+ end
126
+ end
127
+
128
+ # Stores the next offset for processing inside of the transaction and stores it in a
129
+ # local accumulator for post-transaction status update
130
+ #
131
+ # @param message [Messages::Message] message we want to commit inside of a transaction
132
+ # @param offset_metadata [String, nil] offset metadata or nil if none
133
+ # @param async [Boolean] should we mark in async or sync way (applicable only to post
134
+ # transaction state synchronization usage as within transaction it is always sync)
135
+ def mark_in_transaction(message, offset_metadata, async)
136
+ raise Errors::TransactionRequiredError unless @_in_transaction
137
+
138
+ producer.transaction_mark_as_consumed(
139
+ client,
140
+ message,
141
+ offset_metadata
142
+ )
143
+
144
+ @_transaction_marked ||= []
145
+ @_transaction_marked << [message, offset_metadata, async]
146
+ end
147
+
30
148
  # No actions needed for the standard flow here
31
- def handle_before_enqueue
32
- Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
149
+ def handle_before_schedule_consume
150
+ Karafka.monitor.instrument('consumer.before_schedule_consume', caller: self)
33
151
 
34
152
  nil
35
153
  end
@@ -87,7 +205,7 @@ module Karafka
87
205
  end
88
206
  end
89
207
 
90
- # Standard
208
+ # Standard flow for revocation
91
209
  def handle_revoked
92
210
  coordinator.on_revoked do
93
211
  resume
@@ -100,6 +218,21 @@ module Karafka
100
218
  revoked
101
219
  end
102
220
  end
221
+
222
+ # No action needed for the tick standard flow
223
+ def handle_before_schedule_tick
224
+ Karafka.monitor.instrument('consumer.before_schedule_tick', caller: self)
225
+
226
+ nil
227
+ end
228
+
229
+ # Runs the consumer `#tick` method with reporting
230
+ def handle_tick
231
+ Karafka.monitor.instrument('consumer.tick', caller: self)
232
+ Karafka.monitor.instrument('consumer.ticked', caller: self) do
233
+ tick
234
+ end
235
+ end
103
236
  end
104
237
  end
105
238
  end