karafka 2.2.13 → 2.3.0.alpha1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +38 -12
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +161 -125
  6. data/Gemfile.lock +12 -12
  7. data/README.md +0 -2
  8. data/SECURITY.md +23 -0
  9. data/config/locales/errors.yml +7 -1
  10. data/config/locales/pro_errors.yml +22 -0
  11. data/docker-compose.yml +3 -1
  12. data/karafka.gemspec +2 -2
  13. data/lib/karafka/admin/acl.rb +287 -0
  14. data/lib/karafka/admin.rb +118 -16
  15. data/lib/karafka/app.rb +12 -3
  16. data/lib/karafka/base_consumer.rb +32 -31
  17. data/lib/karafka/cli/base.rb +1 -1
  18. data/lib/karafka/connection/client.rb +94 -84
  19. data/lib/karafka/connection/conductor.rb +28 -0
  20. data/lib/karafka/connection/listener.rb +165 -46
  21. data/lib/karafka/connection/listeners_batch.rb +5 -11
  22. data/lib/karafka/connection/manager.rb +72 -0
  23. data/lib/karafka/connection/messages_buffer.rb +12 -0
  24. data/lib/karafka/connection/proxy.rb +17 -0
  25. data/lib/karafka/connection/status.rb +75 -0
  26. data/lib/karafka/contracts/config.rb +14 -10
  27. data/lib/karafka/contracts/consumer_group.rb +9 -1
  28. data/lib/karafka/contracts/topic.rb +3 -1
  29. data/lib/karafka/errors.rb +13 -0
  30. data/lib/karafka/instrumentation/assignments_tracker.rb +96 -0
  31. data/lib/karafka/instrumentation/callbacks/rebalance.rb +10 -7
  32. data/lib/karafka/instrumentation/logger_listener.rb +3 -9
  33. data/lib/karafka/instrumentation/notifications.rb +19 -9
  34. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +31 -28
  35. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +22 -3
  36. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +15 -12
  37. data/lib/karafka/instrumentation/vendors/kubernetes/liveness_listener.rb +39 -36
  38. data/lib/karafka/pro/base_consumer.rb +47 -0
  39. data/lib/karafka/pro/connection/manager.rb +300 -0
  40. data/lib/karafka/pro/connection/multiplexing/listener.rb +40 -0
  41. data/lib/karafka/pro/instrumentation/performance_tracker.rb +85 -0
  42. data/lib/karafka/pro/iterator/tpl_builder.rb +1 -1
  43. data/lib/karafka/pro/iterator.rb +1 -6
  44. data/lib/karafka/pro/loader.rb +16 -2
  45. data/lib/karafka/pro/processing/coordinator.rb +2 -1
  46. data/lib/karafka/pro/processing/executor.rb +37 -0
  47. data/lib/karafka/pro/processing/expansions_selector.rb +32 -0
  48. data/lib/karafka/pro/processing/jobs/periodic.rb +41 -0
  49. data/lib/karafka/pro/processing/jobs/periodic_non_blocking.rb +32 -0
  50. data/lib/karafka/pro/processing/jobs_builder.rb +14 -3
  51. data/lib/karafka/pro/processing/offset_metadata/consumer.rb +44 -0
  52. data/lib/karafka/pro/processing/offset_metadata/fetcher.rb +131 -0
  53. data/lib/karafka/pro/processing/offset_metadata/listener.rb +46 -0
  54. data/lib/karafka/pro/processing/schedulers/base.rb +143 -0
  55. data/lib/karafka/pro/processing/schedulers/default.rb +107 -0
  56. data/lib/karafka/pro/processing/strategies/aj/lrj_mom_vp.rb +1 -1
  57. data/lib/karafka/pro/processing/strategies/default.rb +136 -3
  58. data/lib/karafka/pro/processing/strategies/dlq/default.rb +35 -0
  59. data/lib/karafka/pro/processing/strategies/lrj/default.rb +1 -1
  60. data/lib/karafka/pro/processing/strategies/lrj/mom.rb +1 -1
  61. data/lib/karafka/pro/processing/strategies/vp/default.rb +60 -26
  62. data/lib/karafka/pro/processing/virtual_offset_manager.rb +41 -11
  63. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +2 -0
  64. data/lib/karafka/pro/routing/features/multiplexing/config.rb +38 -0
  65. data/lib/karafka/pro/routing/features/multiplexing/contracts/topic.rb +114 -0
  66. data/lib/karafka/pro/routing/features/multiplexing/patches/contracts/consumer_group.rb +42 -0
  67. data/lib/karafka/pro/routing/features/multiplexing/proxy.rb +38 -0
  68. data/lib/karafka/pro/routing/features/multiplexing/subscription_group.rb +42 -0
  69. data/lib/karafka/pro/routing/features/multiplexing/subscription_groups_builder.rb +40 -0
  70. data/lib/karafka/pro/routing/features/multiplexing.rb +59 -0
  71. data/lib/karafka/pro/routing/features/non_blocking_job/topic.rb +32 -0
  72. data/lib/karafka/pro/routing/features/non_blocking_job.rb +37 -0
  73. data/lib/karafka/pro/routing/features/offset_metadata/config.rb +33 -0
  74. data/lib/karafka/pro/routing/features/offset_metadata/contracts/topic.rb +42 -0
  75. data/lib/karafka/pro/routing/features/offset_metadata/topic.rb +65 -0
  76. data/lib/karafka/pro/routing/features/offset_metadata.rb +40 -0
  77. data/lib/karafka/pro/routing/features/patterns/contracts/consumer_group.rb +4 -0
  78. data/lib/karafka/pro/routing/features/patterns/detector.rb +18 -10
  79. data/lib/karafka/pro/routing/features/periodic_job/config.rb +37 -0
  80. data/lib/karafka/pro/routing/features/periodic_job/contracts/topic.rb +44 -0
  81. data/lib/karafka/pro/routing/features/periodic_job/topic.rb +94 -0
  82. data/lib/karafka/pro/routing/features/periodic_job.rb +27 -0
  83. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +1 -0
  84. data/lib/karafka/pro/routing/features/virtual_partitions/contracts/topic.rb +1 -0
  85. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +7 -2
  86. data/lib/karafka/process.rb +5 -3
  87. data/lib/karafka/processing/coordinator.rb +5 -1
  88. data/lib/karafka/processing/executor.rb +43 -13
  89. data/lib/karafka/processing/executors_buffer.rb +22 -7
  90. data/lib/karafka/processing/jobs/base.rb +19 -2
  91. data/lib/karafka/processing/jobs/consume.rb +3 -3
  92. data/lib/karafka/processing/jobs/idle.rb +5 -0
  93. data/lib/karafka/processing/jobs/revoked.rb +5 -0
  94. data/lib/karafka/processing/jobs/shutdown.rb +5 -0
  95. data/lib/karafka/processing/jobs_queue.rb +19 -8
  96. data/lib/karafka/processing/schedulers/default.rb +42 -0
  97. data/lib/karafka/processing/strategies/base.rb +13 -4
  98. data/lib/karafka/processing/strategies/default.rb +23 -7
  99. data/lib/karafka/processing/strategies/dlq.rb +36 -0
  100. data/lib/karafka/processing/worker.rb +4 -1
  101. data/lib/karafka/routing/builder.rb +12 -2
  102. data/lib/karafka/routing/consumer_group.rb +5 -5
  103. data/lib/karafka/routing/features/base.rb +44 -8
  104. data/lib/karafka/routing/features/dead_letter_queue/config.rb +6 -1
  105. data/lib/karafka/routing/features/dead_letter_queue/contracts/topic.rb +1 -0
  106. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +9 -2
  107. data/lib/karafka/routing/proxy.rb +4 -3
  108. data/lib/karafka/routing/subscription_group.rb +2 -2
  109. data/lib/karafka/routing/subscription_groups_builder.rb +11 -2
  110. data/lib/karafka/routing/topic.rb +8 -10
  111. data/lib/karafka/routing/topics.rb +1 -1
  112. data/lib/karafka/runner.rb +13 -3
  113. data/lib/karafka/server.rb +5 -9
  114. data/lib/karafka/setup/config.rb +21 -1
  115. data/lib/karafka/status.rb +23 -14
  116. data/lib/karafka/templates/karafka.rb.erb +7 -0
  117. data/lib/karafka/time_trackers/partition_usage.rb +56 -0
  118. data/lib/karafka/version.rb +1 -1
  119. data.tar.gz.sig +0 -0
  120. metadata +47 -13
  121. metadata.gz.sig +0 -0
  122. data/lib/karafka/connection/consumer_group_coordinator.rb +0 -48
  123. data/lib/karafka/pro/performance_tracker.rb +0 -84
  124. data/lib/karafka/pro/processing/scheduler.rb +0 -74
  125. data/lib/karafka/processing/scheduler.rb +0 -38
@@ -16,12 +16,12 @@ module Karafka
16
16
  module Processing
17
17
  # Pro jobs builder that supports lrj
18
18
  class JobsBuilder < ::Karafka::Processing::JobsBuilder
19
- # @param executor [Karafka::Processing::Executor]
19
+ # @param executor [Karafka::Pro::Processing::Executor]
20
20
  def idle(executor)
21
21
  Karafka::Processing::Jobs::Idle.new(executor)
22
22
  end
23
23
 
24
- # @param executor [Karafka::Processing::Executor]
24
+ # @param executor [Karafka::Pro::Processing::Executor]
25
25
  # @param messages [Karafka::Messages::Messages] messages batch to be consumed
26
26
  # @return [Karafka::Processing::Jobs::Consume] blocking job
27
27
  # @return [Karafka::Pro::Processing::Jobs::ConsumeNonBlocking] non blocking for lrj
@@ -33,7 +33,7 @@ module Karafka
33
33
  end
34
34
  end
35
35
 
36
- # @param executor [Karafka::Processing::Executor]
36
+ # @param executor [Karafka::Pro::Processing::Executor]
37
37
  # @return [Karafka::Processing::Jobs::Revoked] revocation job for non LRJ
38
38
  # @return [Karafka::Processing::Jobs::RevokedNonBlocking] revocation job that is
39
39
  # non-blocking, so when revocation job is scheduled for LRJ it also will not block
@@ -44,6 +44,17 @@ module Karafka
44
44
  super
45
45
  end
46
46
  end
47
+
48
+ # @param executor [Karafka::Pro::Processing::Executor]
49
+ # @return [Jobs::Periodic] Periodic job
50
+ # @return [Jobs::PeriodicNonBlocking] Periodic non-blocking job
51
+ def periodic(executor)
52
+ if executor.topic.long_running_job?
53
+ Jobs::PeriodicNonBlocking.new(executor)
54
+ else
55
+ Jobs::Periodic.new(executor)
56
+ end
57
+ end
47
58
  end
48
59
  end
49
60
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Offset Metadata support on the processing side
18
+ module OffsetMetadata
19
+ # Extra API methods for offset metadata fetching
20
+ # @note Part of this feature API is embedded directly into the strategies because it alters
21
+ # how marking methods (`#mark_as_consumed` and `#mark_as_consumed!`) operate. Because
22
+ # of that, they had to be embedded into the strategies.
23
+ module Consumer
24
+ # @param cache [Boolean] should we use cached result if present (true by default)
25
+ # @return [false, Object] false in case we do not own the partition anymore or
26
+ # deserialized metadata based on the deserializer
27
+ # @note Caching is on as the assumption here is, that most of the time user will be
28
+ # interested only in the offset metadata that "came" from the time prior to the
29
+ # rebalance. That is because the rest of the metadata (current) is created and
30
+ # controlled by the user himself, thus there is no need to retrieve it. In case this
31
+ # is not true and user wants to always get the Kafka metadata, `cache` value of this
32
+ # feature can be set to false.
33
+ def offset_metadata(cache: true)
34
+ return false if revoked?
35
+
36
+ Fetcher.find(topic, partition, cache: cache)
37
+ end
38
+
39
+ alias committed_offset_metadata offset_metadata
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Offset Metadata support on the processing side
18
+ module OffsetMetadata
19
+ # This fetcher is responsible for fetching and caching committed offsets metadata
20
+ # information.
21
+ #
22
+ # By design we fetch all information for a requested topic assignments. Not all topics from
23
+ # the same subscription group may need metadata and even if, we can run the few smaller
24
+ # queries. This approach prevents us from querying all assigned topics data in one go
25
+ # preventing excessive queries.
26
+ #
27
+ # Since the assumption is, that user will not have to reach out for the later metadata
28
+ # since it is produced in the context of a given consumer assignment, we can cache the
29
+ # initial result and only allow users for explicit invalidation.
30
+ class Fetcher
31
+ include Singleton
32
+
33
+ class << self
34
+ extend Forwardable
35
+
36
+ def_delegators :instance, :register, :clear, :find
37
+ end
38
+
39
+ def initialize
40
+ @mutexes = {}
41
+ @clients = {}
42
+ @tpls = {}
43
+ end
44
+
45
+ # Registers a client of a given subscription group, so we can use it for queries later on
46
+ # @param client [Karafka::Connection::Client]
47
+ # @note Since we store the client reference and not the underlying rdkafka consumer
48
+ # instance, we do not have to deal with the recovery as it is abstracted away
49
+ def register(client)
50
+ @clients[client.subscription_group] = client
51
+ # We use one mutex per SG because independent SGs can query in parallel
52
+ @mutexes[client.subscription_group] = Mutex.new
53
+ @tpls[client.subscription_group] = {}
54
+ end
55
+
56
+ # Queries or retrieves from cache the given offset metadata for the selected partition
57
+ #
58
+ # @param topic [Karafka::Routing::Topic] routing topic with subscription group reference
59
+ # @param partition [Integer] partition for which we want to get stored offset metadata
60
+ # @param cache [Boolean] forces explicit query to Kafka when false and cache refresh.
61
+ # By default we use the setting from the topic level but this can be overwritten on
62
+ # a per request basis if needed.
63
+ # @return [Object, false] deserialized metadata (string deserializer by default) or
64
+ # false in case we were not able to obtain the details because we have lost the
65
+ # assignment
66
+ def find(topic, partition, cache: true)
67
+ cache = topic.offset_metadata.cache? && cache
68
+
69
+ tpls = fetch(topic, cache)
70
+
71
+ return false unless tpls
72
+
73
+ t_partitions = tpls.fetch(topic.name, [])
74
+ t_partition = t_partitions.find { |t_p| t_p.partition == partition }
75
+
76
+ # If we do not have given topic partition here, it means it is no longer part of our
77
+ # assignment and we should return false
78
+ return false unless t_partition
79
+
80
+ topic.offset_metadata.deserializer.call(t_partition.metadata)
81
+ end
82
+
83
+ # Clears cache of a given subscription group. It is triggered on assignment changes.
84
+ #
85
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group that
86
+ # we want to clear.
87
+ def clear(subscription_group)
88
+ @mutexes.fetch(subscription_group).synchronize do
89
+ @tpls[subscription_group].clear
90
+ end
91
+ end
92
+
93
+ private
94
+
95
+ # Fetches from Kafka all committed offsets for the given topic partitions that are
96
+ # assigned to this process.
97
+ #
98
+ # We fetch all because in majority of the cases, the behavior of the end user code is
99
+ # not specific to a given partition both same for all. In such cases we save on
100
+ # querying as we get all data for all partitions in one go.
101
+ #
102
+ # @param topic [Karafka::Routing::Topic] topic for which we want to fetch tpls data
103
+ # @param cache [Boolean] should we return cached data if present
104
+ def fetch(topic, cache)
105
+ subscription_group = topic.subscription_group
106
+ t_tpls = @tpls.fetch(subscription_group, false)
107
+ t_tpl = t_tpls[topic]
108
+
109
+ return t_tpl if t_tpl && cache
110
+
111
+ assigned_tpls = @clients.fetch(subscription_group).assignment
112
+ t_tpl = assigned_tpls.to_h.fetch(topic.name, false)
113
+
114
+ # May be false in case we lost given assignment but still run LRJ
115
+ return false unless t_tpl
116
+ return false if t_tpl.empty?
117
+
118
+ @mutexes.fetch(subscription_group).synchronize do
119
+ rd_tpl = Rdkafka::Consumer::TopicPartitionList.new(topic.name => t_tpl)
120
+
121
+ # While in theory we could lost assignment while being here, this will work and will
122
+ # return us proper tpl, we do not deal with this case on this layer and report anyhow
123
+ # There will not be any exception and this will operate correctly
124
+ t_tpls[topic] = @clients.fetch(subscription_group).committed(rd_tpl).to_h
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ module OffsetMetadata
18
+ # Keeps track of rebalances and updates the fetcher
19
+ # Since we cache the tpls with metadata, we need to invalidate them on events that would
20
+ # cause changes in the assignments
21
+ class Listener
22
+ # When we start listening we need to register this client in the metadata fetcher, so
23
+ # we have the client related to a given subscription group that we can use in fetcher
24
+ # since fetcher may be used in filtering API and other places outside of the standard
25
+ # consumer flow
26
+ # @param event [Karafka::Core::Monitoring::Event]
27
+ def on_connection_listener_before_fetch_loop(event)
28
+ Fetcher.register event[:client]
29
+ end
30
+
31
+ # Invalidates internal cache when assignments change so we can get correct metadata
32
+ # @param event [Karafka::Core::Monitoring::Event]
33
+ def on_rebalance_partitions_assigned(event)
34
+ Fetcher.clear event[:subscription_group]
35
+ end
36
+
37
+ # Invalidates internal cache when assignments change so we can get correct metadata
38
+ # @param event [Karafka::Core::Monitoring::Event]
39
+ def on_rebalance_partitions_revoked(event)
40
+ Fetcher.clear event[:subscription_group]
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for Pro schedulers related components
18
+ module Schedulers
19
+ # Base for all the Pro custom schedulers
20
+ #
21
+ # It wraps the Scheduler API with mutex to ensure, that during scheduling we do not start
22
+ # scheduling other work that could impact the decision making in between multiple
23
+ # subscription groups running in separate threads.
24
+ #
25
+ # @note All the `on_` methods can be redefined with a non-thread-safe versions without
26
+ # locks if needed, however when doing so, ensure that your scheduler is stateless.
27
+ class Base
28
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
29
+ def initialize(queue)
30
+ @queue = queue
31
+ @mutex = Mutex.new
32
+ end
33
+
34
+ # Schedules any jobs provided in a fifo order
35
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
36
+ def schedule_fifo(jobs_array)
37
+ jobs_array.each do |job|
38
+ @queue << job
39
+ end
40
+ end
41
+
42
+ # Runs the consumption jobs scheduling flow under a mutex
43
+ #
44
+ # @param jobs_array
45
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
46
+ # jobs for scheduling
47
+ def on_schedule_consumption(jobs_array)
48
+ @mutex.synchronize do
49
+ schedule_consumption(jobs_array)
50
+ end
51
+ end
52
+
53
+ # Should schedule the consumption jobs
54
+ #
55
+ # @param _jobs_array
56
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
57
+ # jobs for scheduling
58
+ def schedule_consumption(_jobs_array)
59
+ raise NotImplementedError, 'Implement in a subclass'
60
+ end
61
+
62
+ # Runs the revocation jobs scheduling flow under a mutex
63
+ #
64
+ # @param jobs_array
65
+ # [Array<Karafka::Processing::Jobs::Revoked, Processing::Jobs::RevokedNonBlocking>]
66
+ # jobs for scheduling
67
+ def on_schedule_revocation(jobs_array)
68
+ @mutex.synchronize do
69
+ schedule_revocation(jobs_array)
70
+ end
71
+ end
72
+
73
+ # Runs the shutdown jobs scheduling flow under a mutex
74
+ #
75
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Shutdown>] jobs for scheduling
76
+ def on_schedule_shutdown(jobs_array)
77
+ @mutex.synchronize do
78
+ schedule_shutdown(jobs_array)
79
+ end
80
+ end
81
+
82
+ # Runs the idle jobs scheduling flow under a mutex
83
+ #
84
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Idle>] jobs for scheduling
85
+ def on_schedule_idle(jobs_array)
86
+ @mutex.synchronize do
87
+ schedule_idle(jobs_array)
88
+ end
89
+ end
90
+
91
+ # Runs the periodic jobs scheduling flow under a mutex
92
+ #
93
+ # @param jobs_array
94
+ # [Array<Processing::Jobs::Periodic, Processing::Jobs::PeriodicNonBlocking>]
95
+ # jobs for scheduling
96
+ def on_schedule_periodic(jobs_array)
97
+ @mutex.synchronize do
98
+ schedule_periodic(jobs_array)
99
+ end
100
+ end
101
+
102
+ # Schedule by default all except consumption as fifo
103
+ alias schedule_revocation schedule_fifo
104
+ alias schedule_shutdown schedule_fifo
105
+ alias schedule_idle schedule_fifo
106
+ alias schedule_periodic schedule_fifo
107
+
108
+ # Runs the manage tick under mutex
109
+ def on_manage
110
+ @mutex.synchronize { manage }
111
+ end
112
+
113
+ # Should manage scheduling on jobs state changes
114
+ #
115
+ # By default does nothing as default schedulers are stateless
116
+ def manage
117
+ nil
118
+ end
119
+
120
+ # Runs clearing under mutex
121
+ #
122
+ # @param group_id [String] Subscription group id
123
+ def on_clear(group_id)
124
+ @mutex.synchronize { clear(group_id) }
125
+ end
126
+
127
+ # By default schedulers are stateless, so nothing to clear.
128
+ #
129
+ # @param _group_id [String] Subscription group id
130
+ def clear(_group_id)
131
+ nil
132
+ end
133
+
134
+ private
135
+
136
+ # @return [Karafka::Processing::JobsQueue] jobs queue reference for internal usage
137
+ # inside of the scheduler
138
+ attr_reader :queue
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Processing
17
+ # Namespace for Pro schedulers
18
+ module Schedulers
19
+ # Optimizes scheduler that takes into consideration of execution time needed to process
20
+ # messages from given topics partitions. It uses the non-preemptive LJF algorithm
21
+ #
22
+ # This scheduler is designed to optimize execution times on jobs that perform IO operations
23
+ # as when taking IO into consideration, the can achieve optimized parallel processing.
24
+ #
25
+ # This scheduler can also work with virtual partitions.
26
+ #
27
+ # Aside from consumption jobs, other jobs do not run often, thus we can leave them with
28
+ # default FIFO scheduler from the default Karafka scheduler
29
+ #
30
+ # @note This is a stateless scheduler, thus we can override the `#on_` API.
31
+ class Default < Base
32
+ # Schedules jobs in the LJF order for consumption
33
+ #
34
+ # @param jobs_array
35
+ # [Array<Karafka::Processing::Jobs::Consume, Processing::Jobs::ConsumeNonBlocking>]
36
+ # jobs for scheduling
37
+ def on_schedule_consumption(jobs_array)
38
+ perf_tracker = Instrumentation::PerformanceTracker.instance
39
+
40
+ ordered = []
41
+
42
+ jobs_array.each do |job|
43
+ ordered << [
44
+ job,
45
+ processing_cost(perf_tracker, job)
46
+ ]
47
+ end
48
+
49
+ ordered.sort_by!(&:last)
50
+ ordered.reverse!
51
+ ordered.map!(&:first)
52
+
53
+ ordered.each do |job|
54
+ @queue << job
55
+ end
56
+ end
57
+
58
+ # Schedules any jobs provided in a fifo order
59
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>]
60
+ def schedule_fifo(jobs_array)
61
+ jobs_array.each do |job|
62
+ @queue << job
63
+ end
64
+ end
65
+
66
+ # By default all non-consumption work is scheduled in a fifo order
67
+ alias on_schedule_revocation schedule_fifo
68
+ alias on_schedule_shutdown schedule_fifo
69
+ alias on_schedule_idle schedule_fifo
70
+ alias on_schedule_periodic schedule_fifo
71
+
72
+ # This scheduler does not have anything to manage as it is a pass through and has no
73
+ # state
74
+ def on_manage
75
+ nil
76
+ end
77
+
78
+ # This scheduler does not need to be cleared because it is stateless
79
+ #
80
+ # @param _group_id [String] Subscription group id
81
+ def on_clear(_group_id)
82
+ nil
83
+ end
84
+
85
+ private
86
+
87
+ # @param perf_tracker [PerformanceTracker]
88
+ # @param job [Karafka::Processing::Jobs::Consume] job we will be processing
89
+ # @return [Numeric] estimated cost of processing this job
90
+ def processing_cost(perf_tracker, job)
91
+ if job.is_a?(::Karafka::Processing::Jobs::Consume)
92
+ messages = job.messages
93
+ message = messages.first
94
+
95
+ perf_tracker.processing_time_p95(message.topic, message.partition) * messages.size
96
+ else
97
+ # LJF will set first the most expensive, but we want to run the zero cost jobs
98
+ # related to the lifecycle always first. That is why we "emulate" that they
99
+ # the longest possible jobs that anyone can run
100
+ Float::INFINITY
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -33,7 +33,7 @@ module Karafka
33
33
  ].freeze
34
34
 
35
35
  # No actions needed for the standard flow here
36
- def handle_before_enqueue
36
+ def handle_before_schedule_consume
37
37
  super
38
38
 
39
39
  coordinator.on_enqueued do
@@ -27,9 +27,127 @@ module Karafka
27
27
  # Apply strategy for a non-feature based flow
28
28
  FEATURES = %i[].freeze
29
29
 
30
+ # Marks message as consumed in an async way.
31
+ #
32
+ # @param message [Messages::Message] last successfully processed message.
33
+ # @param offset_metadata [String, nil] offset metadata string or nil if nothing
34
+ # @return [Boolean] true if we were able to mark the offset, false otherwise.
35
+ # False indicates that we were not able and that we have lost the partition.
36
+ #
37
+ # @note We keep track of this offset in case we would mark as consumed and got error when
38
+ # processing another message. In case like this we do not pause on the message we've
39
+ # already processed but rather at the next one. This applies to both sync and async
40
+ # versions of this method.
41
+ def mark_as_consumed(message, offset_metadata = nil)
42
+ if @_in_transaction
43
+ mark_in_transaction(message, offset_metadata, true)
44
+ else
45
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
46
+ # In case like this we ignore marking
47
+ return true if coordinator.seek_offset.nil?
48
+ # Ignore earlier offsets than the one we already committed
49
+ return true if coordinator.seek_offset > message.offset
50
+ return false if revoked?
51
+ return revoked? unless client.mark_as_consumed(message, offset_metadata)
52
+
53
+ coordinator.seek_offset = message.offset + 1
54
+ end
55
+
56
+ true
57
+ end
58
+
59
+ # Marks message as consumed in a sync way.
60
+ #
61
+ # @param message [Messages::Message] last successfully processed message.
62
+ # @param offset_metadata [String, nil] offset metadata string or nil if nothing
63
+ # @return [Boolean] true if we were able to mark the offset, false otherwise.
64
+ # False indicates that we were not able and that we have lost the partition.
65
+ def mark_as_consumed!(message, offset_metadata = nil)
66
+ if @_in_transaction
67
+ mark_in_transaction(message, offset_metadata, false)
68
+ else
69
+ # seek offset can be nil only in case `#seek` was invoked with offset reset request
70
+ # In case like this we ignore marking
71
+ return true if coordinator.seek_offset.nil?
72
+ # Ignore earlier offsets than the one we already committed
73
+ return true if coordinator.seek_offset > message.offset
74
+ return false if revoked?
75
+
76
+ return revoked? unless client.mark_as_consumed!(message, offset_metadata)
77
+
78
+ coordinator.seek_offset = message.offset + 1
79
+ end
80
+
81
+ true
82
+ end
83
+
84
+ # Starts producer transaction, saves the transaction context for transactional marking
85
+ # and runs user code in this context
86
+ #
87
+ # Transactions on a consumer level differ from those initiated by the producer as they
88
+ # allow to mark offsets inside of the transaction. If the transaction is initialized
89
+ # only from the consumer, the offset will be stored in a regular fashion.
90
+ #
91
+ # @param block [Proc] code that we want to run in a transaction
92
+ def transaction(&block)
93
+ transaction_started = false
94
+
95
+ # Prevent from nested transactions. It would not make any sense
96
+ raise Errors::TransactionAlreadyInitializedError if @_in_transaction
97
+
98
+ transaction_started = true
99
+ @_transaction_marked = []
100
+ @_in_transaction = true
101
+
102
+ producer.transaction(&block)
103
+
104
+ @_in_transaction = false
105
+
106
+ # This offset is already stored in transaction but we set it here anyhow because we
107
+ # want to make sure our internal in-memory state is aligned with the transaction
108
+ #
109
+ # @note We never need to use the blocking `#mark_as_consumed!` here because the offset
110
+ # anyhow was already stored during the transaction
111
+ #
112
+ # @note In theory we could only keep reference to the most recent marking and reject
113
+ # others. We however do not do it for two reasons:
114
+ # - User may have non standard flow relying on some alternative order and we want to
115
+ # mimic this
116
+ # - Complex strategies like VPs can use this in VPs to mark in parallel without
117
+ # having to redefine the transactional flow completely
118
+ @_transaction_marked.each do |marking|
119
+ marking.pop ? mark_as_consumed(*marking) : mark_as_consumed!(*marking)
120
+ end
121
+ ensure
122
+ if transaction_started
123
+ @_transaction_marked.clear
124
+ @_in_transaction = false
125
+ end
126
+ end
127
+
128
+ # Stores the next offset for processing inside of the transaction and stores it in a
129
+ # local accumulator for post-transaction status update
130
+ #
131
+ # @param message [Messages::Message] message we want to commit inside of a transaction
132
+ # @param offset_metadata [String, nil] offset metadata or nil if none
133
+ # @param async [Boolean] should we mark in async or sync way (applicable only to post
134
+ # transaction state synchronization usage as within transaction it is always sync)
135
+ def mark_in_transaction(message, offset_metadata, async)
136
+ raise Errors::TransactionRequiredError unless @_in_transaction
137
+
138
+ producer.transaction_mark_as_consumed(
139
+ client,
140
+ message,
141
+ offset_metadata
142
+ )
143
+
144
+ @_transaction_marked ||= []
145
+ @_transaction_marked << [message, offset_metadata, async]
146
+ end
147
+
30
148
  # No actions needed for the standard flow here
31
- def handle_before_enqueue
32
- Karafka.monitor.instrument('consumer.before_enqueue', caller: self)
149
+ def handle_before_schedule_consume
150
+ Karafka.monitor.instrument('consumer.before_schedule_consume', caller: self)
33
151
 
34
152
  nil
35
153
  end
@@ -87,7 +205,7 @@ module Karafka
87
205
  end
88
206
  end
89
207
 
90
- # Standard
208
+ # Standard flow for revocation
91
209
  def handle_revoked
92
210
  coordinator.on_revoked do
93
211
  resume
@@ -100,6 +218,21 @@ module Karafka
100
218
  revoked
101
219
  end
102
220
  end
221
+
222
+ # No action needed for the tick standard flow
223
+ def handle_before_schedule_tick
224
+ Karafka.monitor.instrument('consumer.before_schedule_tick', caller: self)
225
+
226
+ nil
227
+ end
228
+
229
+ # Runs the consumer `#tick` method with reporting
230
+ def handle_tick
231
+ Karafka.monitor.instrument('consumer.tick', caller: self)
232
+ Karafka.monitor.instrument('consumer.ticked', caller: self) do
233
+ tick
234
+ end
235
+ end
103
236
  end
104
237
  end
105
238
  end