karafka 1.4.13 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -3
  3. data/.github/workflows/ci.yml +85 -30
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +268 -7
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +44 -87
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +44 -48
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +237 -0
  16. data/bin/karafka +4 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/bin/wait_for_kafka +20 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +55 -40
  23. data/docker-compose.yml +39 -3
  24. data/karafka.gemspec +11 -17
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +31 -0
  32. data/lib/karafka/app.rb +15 -20
  33. data/lib/karafka/base_consumer.rb +181 -31
  34. data/lib/karafka/cli/base.rb +4 -4
  35. data/lib/karafka/cli/info.rb +43 -9
  36. data/lib/karafka/cli/install.rb +19 -10
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -11
  39. data/lib/karafka/connection/client.rb +385 -90
  40. data/lib/karafka/connection/listener.rb +246 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -189
  49. data/lib/karafka/contracts/consumer_group_topic.rb +34 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger_listener.rb +164 -0
  60. data/lib/karafka/instrumentation/monitor.rb +13 -61
  61. data/lib/karafka/instrumentation/notifications.rb +52 -0
  62. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  63. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  64. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  65. data/lib/karafka/instrumentation.rb +21 -0
  66. data/lib/karafka/licenser.rb +75 -0
  67. data/lib/karafka/messages/batch_metadata.rb +45 -0
  68. data/lib/karafka/messages/builders/batch_metadata.rb +40 -0
  69. data/lib/karafka/messages/builders/message.rb +39 -0
  70. data/lib/karafka/messages/builders/messages.rb +32 -0
  71. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  72. data/lib/karafka/messages/messages.rb +64 -0
  73. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  74. data/lib/karafka/messages/seek.rb +9 -0
  75. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  76. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  77. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  78. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  79. data/lib/karafka/pro/base_consumer.rb +82 -0
  80. data/lib/karafka/pro/contracts/base.rb +21 -0
  81. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  82. data/lib/karafka/pro/contracts/consumer_group_topic.rb +33 -0
  83. data/lib/karafka/pro/loader.rb +76 -0
  84. data/lib/karafka/pro/performance_tracker.rb +80 -0
  85. data/lib/karafka/pro/processing/coordinator.rb +72 -0
  86. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +37 -0
  87. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  88. data/lib/karafka/pro/processing/partitioner.rb +60 -0
  89. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  90. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  91. data/lib/karafka/pro/routing/topic_extensions.rb +38 -0
  92. data/lib/karafka/pro.rb +13 -0
  93. data/lib/karafka/process.rb +1 -0
  94. data/lib/karafka/processing/coordinator.rb +88 -0
  95. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  96. data/lib/karafka/processing/executor.rb +118 -0
  97. data/lib/karafka/processing/executors_buffer.rb +88 -0
  98. data/lib/karafka/processing/jobs/base.rb +51 -0
  99. data/lib/karafka/processing/jobs/consume.rb +42 -0
  100. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  101. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  102. data/lib/karafka/processing/jobs_builder.rb +29 -0
  103. data/lib/karafka/processing/jobs_queue.rb +144 -0
  104. data/lib/karafka/processing/partitioner.rb +22 -0
  105. data/lib/karafka/processing/result.rb +29 -0
  106. data/lib/karafka/processing/scheduler.rb +22 -0
  107. data/lib/karafka/processing/worker.rb +88 -0
  108. data/lib/karafka/processing/workers_batch.rb +27 -0
  109. data/lib/karafka/railtie.rb +113 -0
  110. data/lib/karafka/routing/builder.rb +15 -24
  111. data/lib/karafka/routing/consumer_group.rb +11 -19
  112. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  113. data/lib/karafka/routing/router.rb +1 -1
  114. data/lib/karafka/routing/subscription_group.rb +53 -0
  115. data/lib/karafka/routing/subscription_groups_builder.rb +53 -0
  116. data/lib/karafka/routing/topic.rb +61 -24
  117. data/lib/karafka/routing/topics.rb +38 -0
  118. data/lib/karafka/runner.rb +51 -0
  119. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  120. data/lib/karafka/server.rb +67 -26
  121. data/lib/karafka/setup/config.rb +147 -175
  122. data/lib/karafka/status.rb +14 -5
  123. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  124. data/lib/karafka/templates/karafka.rb.erb +15 -51
  125. data/lib/karafka/time_trackers/base.rb +19 -0
  126. data/lib/karafka/time_trackers/pause.rb +92 -0
  127. data/lib/karafka/time_trackers/poll.rb +65 -0
  128. data/lib/karafka/version.rb +1 -1
  129. data/lib/karafka.rb +38 -17
  130. data.tar.gz.sig +0 -0
  131. metadata +118 -120
  132. metadata.gz.sig +0 -0
  133. data/MIT-LICENCE +0 -18
  134. data/lib/karafka/assignment_strategies/round_robin.rb +0 -13
  135. data/lib/karafka/attributes_map.rb +0 -63
  136. data/lib/karafka/backends/inline.rb +0 -16
  137. data/lib/karafka/base_responder.rb +0 -226
  138. data/lib/karafka/cli/flow.rb +0 -48
  139. data/lib/karafka/cli/missingno.rb +0 -19
  140. data/lib/karafka/code_reloader.rb +0 -67
  141. data/lib/karafka/connection/api_adapter.rb +0 -158
  142. data/lib/karafka/connection/batch_delegator.rb +0 -55
  143. data/lib/karafka/connection/builder.rb +0 -23
  144. data/lib/karafka/connection/message_delegator.rb +0 -36
  145. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  146. data/lib/karafka/consumers/callbacks.rb +0 -71
  147. data/lib/karafka/consumers/includer.rb +0 -64
  148. data/lib/karafka/consumers/responders.rb +0 -24
  149. data/lib/karafka/consumers/single_params.rb +0 -15
  150. data/lib/karafka/contracts/responder_usage.rb +0 -54
  151. data/lib/karafka/fetcher.rb +0 -42
  152. data/lib/karafka/helpers/class_matcher.rb +0 -88
  153. data/lib/karafka/helpers/config_retriever.rb +0 -46
  154. data/lib/karafka/helpers/inflector.rb +0 -26
  155. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  156. data/lib/karafka/params/batch_metadata.rb +0 -26
  157. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  158. data/lib/karafka/params/builders/params.rb +0 -38
  159. data/lib/karafka/params/builders/params_batch.rb +0 -25
  160. data/lib/karafka/params/params_batch.rb +0 -60
  161. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  162. data/lib/karafka/persistence/client.rb +0 -29
  163. data/lib/karafka/persistence/consumers.rb +0 -45
  164. data/lib/karafka/persistence/topics.rb +0 -48
  165. data/lib/karafka/responders/builder.rb +0 -36
  166. data/lib/karafka/responders/topic.rb +0 -55
  167. data/lib/karafka/routing/topic_mapper.rb +0 -53
  168. data/lib/karafka/serialization/json/serializer.rb +0 -31
  169. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  170. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Coordinators builder used to build coordinators per topic partition
6
+ #
7
+ # It provides direct pauses access for revocation
8
+ #
9
+ # @note This buffer operates only from the listener loop, thus we do not have to make it
10
+ # thread-safe.
11
+ class CoordinatorsBuffer
12
+ def initialize
13
+ @pauses_manager = Connection::PausesManager.new
14
+ @coordinator_class = ::Karafka::App.config.internal.processing.coordinator_class
15
+ @coordinators = Hash.new { |h, k| h[k] = {} }
16
+ end
17
+
18
+ # @param topic [String] topic name
19
+ # @param partition [Integer] partition number
20
+ def find_or_create(topic, partition)
21
+ @coordinators[topic][partition] ||= @coordinator_class.new(
22
+ @pauses_manager.fetch(topic, partition)
23
+ )
24
+ end
25
+
26
+ # Resumes processing of partitions for which pause time has ended.
27
+ # @param block we want to run for resumed topic partitions
28
+ # @yieldparam [String] topic name
29
+ # @yieldparam [Integer] partition number
30
+ def resume(&block)
31
+ @pauses_manager.resume(&block)
32
+ end
33
+
34
+ # @param topic [String] topic name
35
+ # @param partition [Integer] partition number
36
+ def revoke(topic, partition)
37
+ return unless @coordinators[topic].key?(partition)
38
+
39
+ # The fact that we delete here does not change the fact that the executor still holds the
40
+ # reference to this coordinator. We delete it here, as we will no longer process any
41
+ # new stuff with it and we may need a new coordinator if we regain this partition, but the
42
+ # coordinator may still be in use
43
+ @coordinators[topic].delete(partition).revoke
44
+ end
45
+
46
+ # Clears coordinators and re-created the pauses manager
47
+ # This should be used only for critical errors recovery
48
+ def reset
49
+ @pauses_manager = Connection::PausesManager.new
50
+ @coordinators.clear
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace that encapsulates all the logic related to processing data.
5
+ module Processing
6
+ # Executors:
7
+ # - run consumers code (for `#call`) or run given preparation / teardown operations when needed
8
+ # from separate threads.
9
+ # - they re-create consumer instances in case of partitions that were revoked and assigned
10
+ # back.
11
+ #
12
+ # @note Executors are not removed after partition is revoked. They are not that big and will
13
+ # be re-used in case of a re-claim
14
+ class Executor
15
+ # @return [String] unique id that we use to ensure, that we use for state tracking
16
+ attr_reader :id
17
+
18
+ # @return [String] subscription group id to which a given executor belongs
19
+ attr_reader :group_id
20
+
21
+ # @return [Karafka::Messages::Messages] messages batch
22
+ attr_reader :messages
23
+
24
+ # Topic accessibility may be needed for the jobs builder to be able to build a proper job
25
+ # based on the topic settings defined by the end user
26
+ #
27
+ # @return [Karafka::Routing::Topic] topic of this executor
28
+ attr_reader :topic
29
+
30
+ # @param group_id [String] id of the subscription group to which the executor belongs
31
+ # @param client [Karafka::Connection::Client] kafka client
32
+ # @param topic [Karafka::Routing::Topic] topic for which this executor will run
33
+ def initialize(group_id, client, topic)
34
+ @id = SecureRandom.uuid
35
+ @group_id = group_id
36
+ @client = client
37
+ @topic = topic
38
+ end
39
+
40
+ # Builds the consumer instance, builds messages batch and sets all that is needed to run the
41
+ # user consumption logic
42
+ #
43
+ # @param messages [Array<Karafka::Messages::Message>]
44
+ # @param received_at [Time] the moment we've received the batch (actually the moment we've)
45
+ # enqueued it, but good enough
46
+ # @param coordinator [Karafka::Processing::Coordinator] coordinator for processing management
47
+ def before_consume(messages, received_at, coordinator)
48
+ # Recreate consumer with each batch if persistence is not enabled
49
+ # We reload the consumers with each batch instead of relying on some external signals
50
+ # when needed for consistency. That way devs may have it on or off and not in this
51
+ # middle state, where re-creation of a consumer instance would occur only sometimes
52
+ @consumer = nil unless ::Karafka::App.config.consumer_persistence
53
+
54
+ consumer.coordinator = coordinator
55
+
56
+ # First we build messages batch...
57
+ consumer.messages = Messages::Builders::Messages.call(
58
+ messages,
59
+ @topic,
60
+ received_at
61
+ )
62
+
63
+ consumer.on_before_consume
64
+ end
65
+
66
+ # Runs consumer data processing against given batch and handles failures and errors.
67
+ def consume
68
+ # We run the consumer client logic...
69
+ consumer.on_consume
70
+ end
71
+
72
+ # Runs consumer after consumption code
73
+ def after_consume
74
+ consumer.on_after_consume
75
+ end
76
+
77
+ # Runs the controller `#revoked` method that should be triggered when a given consumer is
78
+ # no longer needed due to partitions reassignment.
79
+ #
80
+ # @note Clearing the consumer will ensure, that if we get the partition back, it will be
81
+ # handled with a consumer with a clean state.
82
+ #
83
+ # @note We run it only when consumer was present, because presence indicates, that at least
84
+ # a single message has been consumed.
85
+ #
86
+ # @note We do not reset the consumer but we indicate need for recreation instead, because
87
+ # after the revocation, there still may be `#after_consume` running that needs a given
88
+ # consumer instance.
89
+ def revoked
90
+ consumer.on_revoked if @consumer
91
+ end
92
+
93
+ # Runs the controller `#shutdown` method that should be triggered when a given consumer is
94
+ # no longer needed as we're closing the process.
95
+ #
96
+ # @note While we do not need to clear the consumer here, it's a good habit to clean after
97
+ # work is done.
98
+ def shutdown
99
+ # There is a case, where the consumer no longer exists because it was revoked, in case like
100
+ # that we do not build a new instance and shutdown should not be triggered.
101
+ consumer.on_shutdown if @consumer
102
+ end
103
+
104
+ private
105
+
106
+ # @return [Object] cached consumer instance
107
+ def consumer
108
+ @consumer ||= begin
109
+ consumer = @topic.consumer_class.new
110
+ consumer.topic = @topic
111
+ consumer.client = @client
112
+ consumer.producer = ::Karafka::App.producer
113
+ consumer
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Buffer for executors of a given subscription group. It wraps around the concept of building
6
+ # and caching them, so we can re-use them instead of creating new each time.
7
+ class ExecutorsBuffer
8
+ # @param client [Connection::Client]
9
+ # @param subscription_group [Routing::SubscriptionGroup]
10
+ # @return [ExecutorsBuffer]
11
+ def initialize(client, subscription_group)
12
+ @subscription_group = subscription_group
13
+ @client = client
14
+ # We need two layers here to keep track of topics, partitions and processing groups
15
+ @buffer = Hash.new { |h, k| h[k] = Hash.new { |h2, k2| h2[k2] = {} } }
16
+ end
17
+
18
+ # Finds or creates an executor based on the provided details
19
+ #
20
+ # @param topic [String] topic name
21
+ # @param partition [Integer] partition number
22
+ # @param parallel_key [String] parallel group key
23
+ # @return [Executor] consumer executor
24
+ def find_or_create(topic, partition, parallel_key)
25
+ ktopic = find_topic(topic)
26
+
27
+ @buffer[ktopic][partition][parallel_key] ||= Executor.new(
28
+ @subscription_group.id,
29
+ @client,
30
+ ktopic
31
+ )
32
+ end
33
+
34
+ # Revokes executors of a given topic partition, so they won't be used anymore for incoming
35
+ # messages
36
+ #
37
+ # @param topic [String] topic name
38
+ # @param partition [Integer] partition number
39
+ def revoke(topic, partition)
40
+ ktopic = find_topic(topic)
41
+
42
+ @buffer[ktopic][partition].clear
43
+ end
44
+
45
+ # Finds all the executors available for a given topic partition
46
+ #
47
+ # @param topic [String] topic name
48
+ # @param partition [Integer] partition number
49
+ # @return [Array<Executor>] executors in use for this topic + partition
50
+ def find_all(topic, partition)
51
+ ktopic = find_topic(topic)
52
+
53
+ @buffer[ktopic][partition].values
54
+ end
55
+
56
+ # Iterates over all available executors and yields them together with topic and partition
57
+ # info
58
+ # @yieldparam [Routing::Topic] karafka routing topic object
59
+ # @yieldparam [Integer] partition number
60
+ # @yieldparam [Executor] given executor
61
+ def each
62
+ @buffer.each do |ktopic, partitions|
63
+ partitions.each do |partition, executors|
64
+ executors.each do |_parallel_key, executor|
65
+ # We skip the parallel key here as it does not serve any value when iterating
66
+ yield(ktopic, partition, executor)
67
+ end
68
+ end
69
+ end
70
+ end
71
+
72
+ # Clears the executors buffer. Useful for critical errors recovery.
73
+ def clear
74
+ @buffer.clear
75
+ end
76
+
77
+ private
78
+
79
+ # Finds topic based on its name
80
+ #
81
+ # @param topic [String] topic we're looking for
82
+ # @return [Karafka::Routing::Topic] topic we're interested in
83
+ def find_topic(topic)
84
+ @subscription_group.topics.find(topic) || raise(Errors::TopicNotFoundError, topic)
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Namespace for all the jobs that are suppose to run in workers.
6
+ module Jobs
7
+ # Base class for all the jobs types that are suppose to run in workers threads.
8
+ # Each job can have 3 main entry-points: `#before_call`, `#call` and `#after_call`
9
+ # Only `#call` is required.
10
+ class Base
11
+ extend Forwardable
12
+
13
+ # @note Since one job has always one executer, we use the jobs id and group id as reference
14
+ def_delegators :executor, :id, :group_id
15
+
16
+ attr_reader :executor
17
+
18
+ # Creates a new job instance
19
+ def initialize
20
+ # All jobs are blocking by default and they can release the lock when blocking operations
21
+ # are done (if needed)
22
+ @non_blocking = false
23
+ end
24
+
25
+ # When redefined can run any code that should run before executing the proper code
26
+ def before_call; end
27
+
28
+ # The main entry-point of a job
29
+ def call
30
+ raise NotImplementedError, 'Please implement in a subclass'
31
+ end
32
+
33
+ # When redefined can run any code that should run after executing the proper code
34
+ def after_call; end
35
+
36
+ # @return [Boolean] is this a non-blocking job
37
+ #
38
+ # @note Blocking job is a job, that will cause the job queue to wait until it is finished
39
+ # before removing the lock on new jobs being added
40
+ #
41
+ # @note All the jobs are blocking by default
42
+ #
43
+ # @note Job **needs** to mark itself as non-blocking only **after** it is done with all
44
+ # the blocking things (pausing partition, etc).
45
+ def non_blocking?
46
+ @non_blocking
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ module Jobs
6
+ # The main job type. It runs the executor that triggers given topic partition messages
7
+ # processing in an underlying consumer instance.
8
+ class Consume < Base
9
+ # @return [Array<Rdkafka::Consumer::Message>] array with messages
10
+ attr_reader :messages
11
+
12
+ # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
13
+ # job
14
+ # @param messages [Karafka::Messages::Messages] karafka messages batch
15
+ # @param coordinator [Karafka::Processing::Coordinator] processing coordinator
16
+ # @return [Consume]
17
+ def initialize(executor, messages, coordinator)
18
+ @executor = executor
19
+ @messages = messages
20
+ @coordinator = coordinator
21
+ @created_at = Time.now
22
+ super()
23
+ end
24
+
25
+ # Runs the before consumption preparations on the executor
26
+ def before_call
27
+ executor.before_consume(@messages, @created_at, @coordinator)
28
+ end
29
+
30
+ # Runs the given executor
31
+ def call
32
+ executor.consume
33
+ end
34
+
35
+ # Runs any error handling and other post-consumption stuff on the executor
36
+ def after_call
37
+ executor.after_consume
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ module Jobs
6
+ # Job that runs the revoked operation when we loose a partition on a consumer that lost it.
7
+ class Revoked < Base
8
+ # @param executor [Karafka::Processing::Executor] executor that is suppose to run the job
9
+ # @return [Revoked]
10
+ def initialize(executor)
11
+ @executor = executor
12
+ super()
13
+ end
14
+
15
+ # Runs the revoking job via an executor.
16
+ def call
17
+ executor.revoked
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ module Jobs
6
+ # Job that runs on each active consumer upon process shutdown (one job per consumer).
7
+ class Shutdown < Base
8
+ # @param executor [Karafka::Processing::Executor] executor that is suppose to run a given
9
+ # job on an active consumer
10
+ # @return [Shutdown]
11
+ def initialize(executor)
12
+ @executor = executor
13
+ super()
14
+ end
15
+
16
+ # Runs the shutdown job via an executor.
17
+ def call
18
+ executor.shutdown
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Class responsible for deciding what type of job should we build to run a given command and
6
+ # for building a proper job for it.
7
+ class JobsBuilder
8
+ # @param executor [Karafka::Processing::Executor]
9
+ # @param messages [Karafka::Messages::Messages] messages batch to be consumed
10
+ # @param coordinator [Karafka::Processing::Coordinator]
11
+ # @return [Karafka::Processing::Jobs::Consume] consumption job
12
+ def consume(executor, messages, coordinator)
13
+ Jobs::Consume.new(executor, messages, coordinator)
14
+ end
15
+
16
+ # @param executor [Karafka::Processing::Executor]
17
+ # @return [Karafka::Processing::Jobs::Revoked] revocation job
18
+ def revoked(executor)
19
+ Jobs::Revoked.new(executor)
20
+ end
21
+
22
+ # @param executor [Karafka::Processing::Executor]
23
+ # @return [Karafka::Processing::Jobs::Shutdown] shutdown job
24
+ def shutdown(executor)
25
+ Jobs::Shutdown.new(executor)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,144 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # This is the key work component for Karafka jobs distribution. It provides API for running
6
+ # jobs in parallel while operating within more than one subscription group.
7
+ #
8
+ # We need to take into consideration fact, that more than one subscription group can operate
9
+ # on this queue, that's why internally we keep track of processing per group.
10
+ #
11
+ # We work with the assumption, that partitions data is evenly distributed.
12
+ class JobsQueue
13
+ # @return [Karafka::Processing::JobsQueue]
14
+ def initialize
15
+ @queue = Queue.new
16
+ # Those queues will act as a semaphores internally. Since we need an indicator for waiting
17
+ # we could use Thread.pass but this is expensive. Instead we can just lock until any
18
+ # of the workers finishes their work and we can re-check. This means that in the worse
19
+ # scenario, we will context switch 10 times per poll instead of getting this thread
20
+ # scheduled by Ruby hundreds of thousands of times per group.
21
+ # We cannot use a single semaphore as it could potentially block in listeners that should
22
+ # process with their data and also could unlock when a given group needs to remain locked
23
+ @semaphores = Hash.new { |h, k| h[k] = Queue.new }
24
+ @in_processing = Hash.new { |h, k| h[k] = [] }
25
+ @mutex = Mutex.new
26
+ end
27
+
28
+ # Returns number of jobs that are either enqueued or in processing (but not finished)
29
+ # @return [Integer] number of elements in the queue
30
+ # @note Using `#pop` won't decrease this number as only marking job as completed does this
31
+ def size
32
+ @in_processing.values.map(&:size).sum
33
+ end
34
+
35
+ # Adds the job to the internal main queue, scheduling it for execution in a worker and marks
36
+ # this job as in processing pipeline.
37
+ #
38
+ # @param job [Jobs::Base] job that we want to run
39
+ def <<(job)
40
+ # We do not push the job if the queue is closed as it means that it would anyhow not be
41
+ # executed
42
+ return if @queue.closed?
43
+
44
+ @mutex.synchronize do
45
+ group = @in_processing[job.group_id]
46
+
47
+ raise(Errors::JobsQueueSynchronizationError, job.group_id) if group.include?(job)
48
+
49
+ group << job
50
+ end
51
+
52
+ @queue << job
53
+ end
54
+
55
+ # @return [Jobs::Base, nil] waits for a job from the main queue and returns it once available
56
+ # or returns nil if the queue has been stopped and there won't be anything more to process
57
+ # ever.
58
+ # @note This command is blocking and will wait until any job is available on the main queue
59
+ def pop
60
+ @queue.pop
61
+ end
62
+
63
+ # Causes the wait lock to re-check the lock conditions and potential unlock.
64
+ # @param group_id [String] id of the group we want to unlock for one tick
65
+ # @note This does not release the wait lock. It just causes a conditions recheck
66
+ def tick(group_id)
67
+ @semaphores[group_id] << true
68
+ end
69
+
70
+ # Marks a given job from a given group as completed. When there are no more jobs from a given
71
+ # group to be executed, we won't wait.
72
+ #
73
+ # @param [Jobs::Base] job that was completed
74
+ def complete(job)
75
+ @mutex.synchronize do
76
+ @in_processing[job.group_id].delete(job)
77
+ tick(job.group_id)
78
+ end
79
+ end
80
+
81
+ # Clears the processing states for a provided group. Useful when a recovery happens and we
82
+ # need to clean up state but only for a given subscription group.
83
+ #
84
+ # @param group_id [String]
85
+ def clear(group_id)
86
+ @mutex.synchronize do
87
+ @in_processing[group_id].clear
88
+ # We unlock it just in case it was blocked when clearing started
89
+ tick(group_id)
90
+ end
91
+ end
92
+
93
+ # Stops the whole processing queue.
94
+ def close
95
+ @mutex.synchronize do
96
+ return if @queue.closed?
97
+
98
+ @queue.close
99
+ @semaphores.values.each(&:close)
100
+ end
101
+ end
102
+
103
+ # @param group_id [String]
104
+ #
105
+ # @return [Boolean] tell us if we have anything in the processing (or for processing) from
106
+ # a given group.
107
+ def empty?(group_id)
108
+ @in_processing[group_id].empty?
109
+ end
110
+
111
+ # Blocks when there are things in the queue in a given group and waits until all the blocking
112
+ # jobs from a given group are completed
113
+ #
114
+ # @param group_id [String] id of the group in which jobs we're interested.
115
+ # @note This method is blocking.
116
+ def wait(group_id)
117
+ # Go doing other things while we cannot process and wait for anyone to finish their work
118
+ # and re-check the wait status
119
+ @semaphores[group_id].pop while wait?(group_id)
120
+ end
121
+
122
+ # - `processing` - number of jobs that are currently being processed (active work)
123
+ # - `enqueued` - number of jobs in the queue that are waiting to be picked up by a worker
124
+ #
125
+ # @return [Hash] hash with basic usage statistics of this queue.
126
+ def statistics
127
+ {
128
+ processing: size - @queue.size,
129
+ enqueued: @queue.size
130
+ }.freeze
131
+ end
132
+
133
+ private
134
+
135
+ # @param group_id [String] id of the group in which jobs we're interested.
136
+ # @return [Boolean] should we keep waiting or not
137
+ # @note We do not wait for non-blocking jobs. Their flow should allow for `poll` running
138
+ # as they may exceed `max.poll.interval`
139
+ def wait?(group_id)
140
+ !@in_processing[group_id].all?(&:non_blocking?)
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # Basic partitioner for work division
6
+ # It does not divide any work.
7
+ class Partitioner
8
+ # @param subscription_group [Karafka::Routing::SubscriptionGroup] subscription group
9
+ def initialize(subscription_group)
10
+ @subscription_group = subscription_group
11
+ end
12
+
13
+ # @param _topic [String] topic name
14
+ # @param messages [Array<Karafka::Messages::Message>] karafka messages
15
+ # @yieldparam [Integer] group id
16
+ # @yieldparam [Array<Karafka::Messages::Message>] karafka messages
17
+ def call(_topic, messages)
18
+ yield(0, messages)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # A simple object that allows us to keep track of processing state.
6
+ # It allows to indicate if given thing moved from success to a failure or the other way around
7
+ # Useful for tracking consumption state
8
+ class Result
9
+ def initialize
10
+ @success = true
11
+ end
12
+
13
+ # @return [Boolean]
14
+ def success?
15
+ @success
16
+ end
17
+
18
+ # Marks state as successful
19
+ def success!
20
+ @success = true
21
+ end
22
+
23
+ # Marks state as failure
24
+ def failure!
25
+ @success = false
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Processing
5
+ # FIFO scheduler for messages coming from various topics and partitions
6
+ class Scheduler
7
+ # Schedules jobs in the fifo order
8
+ #
9
+ # @param queue [Karafka::Processing::JobsQueue] queue where we want to put the jobs
10
+ # @param jobs_array [Array<Karafka::Processing::Jobs::Base>] jobs we want to schedule
11
+ def schedule_consumption(queue, jobs_array)
12
+ jobs_array.each do |job|
13
+ queue << job
14
+ end
15
+ end
16
+
17
+ # Both revocation and shutdown jobs can also run in fifo by default
18
+ alias schedule_revocation schedule_consumption
19
+ alias schedule_shutdown schedule_consumption
20
+ end
21
+ end
22
+ end