karafka 1.4.0 → 2.0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +89 -18
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +365 -1
  6. data/CONTRIBUTING.md +10 -19
  7. data/Gemfile +6 -0
  8. data/Gemfile.lock +56 -112
  9. data/LICENSE +17 -0
  10. data/LICENSE-COMM +89 -0
  11. data/LICENSE-LGPL +165 -0
  12. data/README.md +61 -68
  13. data/bin/benchmarks +85 -0
  14. data/bin/create_token +22 -0
  15. data/bin/integrations +272 -0
  16. data/bin/karafka +10 -0
  17. data/bin/scenario +29 -0
  18. data/bin/stress_many +13 -0
  19. data/bin/stress_one +13 -0
  20. data/certs/cert_chain.pem +26 -0
  21. data/certs/karafka-pro.pem +11 -0
  22. data/config/errors.yml +59 -38
  23. data/docker-compose.yml +10 -3
  24. data/karafka.gemspec +18 -21
  25. data/lib/active_job/karafka.rb +21 -0
  26. data/lib/active_job/queue_adapters/karafka_adapter.rb +26 -0
  27. data/lib/karafka/active_job/consumer.rb +26 -0
  28. data/lib/karafka/active_job/dispatcher.rb +38 -0
  29. data/lib/karafka/active_job/job_extensions.rb +34 -0
  30. data/lib/karafka/active_job/job_options_contract.rb +21 -0
  31. data/lib/karafka/active_job/routing/extensions.rb +33 -0
  32. data/lib/karafka/admin.rb +63 -0
  33. data/lib/karafka/app.rb +15 -20
  34. data/lib/karafka/base_consumer.rb +197 -31
  35. data/lib/karafka/cli/info.rb +44 -10
  36. data/lib/karafka/cli/install.rb +22 -12
  37. data/lib/karafka/cli/server.rb +17 -42
  38. data/lib/karafka/cli.rb +4 -3
  39. data/lib/karafka/connection/client.rb +379 -89
  40. data/lib/karafka/connection/listener.rb +250 -38
  41. data/lib/karafka/connection/listeners_batch.rb +24 -0
  42. data/lib/karafka/connection/messages_buffer.rb +84 -0
  43. data/lib/karafka/connection/pauses_manager.rb +46 -0
  44. data/lib/karafka/connection/raw_messages_buffer.rb +101 -0
  45. data/lib/karafka/connection/rebalance_manager.rb +78 -0
  46. data/lib/karafka/contracts/base.rb +17 -0
  47. data/lib/karafka/contracts/config.rb +88 -11
  48. data/lib/karafka/contracts/consumer_group.rb +21 -184
  49. data/lib/karafka/contracts/consumer_group_topic.rb +35 -11
  50. data/lib/karafka/contracts/server_cli_options.rb +19 -18
  51. data/lib/karafka/contracts.rb +1 -1
  52. data/lib/karafka/env.rb +46 -0
  53. data/lib/karafka/errors.rb +21 -21
  54. data/lib/karafka/helpers/async.rb +33 -0
  55. data/lib/karafka/helpers/colorize.rb +20 -0
  56. data/lib/karafka/helpers/multi_delegator.rb +2 -2
  57. data/lib/karafka/instrumentation/callbacks/error.rb +40 -0
  58. data/lib/karafka/instrumentation/callbacks/statistics.rb +41 -0
  59. data/lib/karafka/instrumentation/logger.rb +6 -10
  60. data/lib/karafka/instrumentation/logger_listener.rb +174 -0
  61. data/lib/karafka/instrumentation/monitor.rb +13 -61
  62. data/lib/karafka/instrumentation/notifications.rb +53 -0
  63. data/lib/karafka/instrumentation/proctitle_listener.rb +3 -3
  64. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -0
  65. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +232 -0
  66. data/lib/karafka/instrumentation.rb +21 -0
  67. data/lib/karafka/licenser.rb +75 -0
  68. data/lib/karafka/messages/batch_metadata.rb +45 -0
  69. data/lib/karafka/messages/builders/batch_metadata.rb +39 -0
  70. data/lib/karafka/messages/builders/message.rb +39 -0
  71. data/lib/karafka/messages/builders/messages.rb +34 -0
  72. data/lib/karafka/{params/params.rb → messages/message.rb} +7 -12
  73. data/lib/karafka/messages/messages.rb +64 -0
  74. data/lib/karafka/{params → messages}/metadata.rb +4 -6
  75. data/lib/karafka/messages/seek.rb +9 -0
  76. data/lib/karafka/patches/rdkafka/consumer.rb +22 -0
  77. data/lib/karafka/pro/active_job/consumer.rb +46 -0
  78. data/lib/karafka/pro/active_job/dispatcher.rb +61 -0
  79. data/lib/karafka/pro/active_job/job_options_contract.rb +32 -0
  80. data/lib/karafka/pro/base_consumer.rb +107 -0
  81. data/lib/karafka/pro/contracts/base.rb +21 -0
  82. data/lib/karafka/pro/contracts/consumer_group.rb +34 -0
  83. data/lib/karafka/pro/contracts/consumer_group_topic.rb +69 -0
  84. data/lib/karafka/pro/loader.rb +76 -0
  85. data/lib/karafka/pro/performance_tracker.rb +80 -0
  86. data/lib/karafka/pro/processing/coordinator.rb +85 -0
  87. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +38 -0
  88. data/lib/karafka/pro/processing/jobs_builder.rb +32 -0
  89. data/lib/karafka/pro/processing/partitioner.rb +58 -0
  90. data/lib/karafka/pro/processing/scheduler.rb +56 -0
  91. data/lib/karafka/pro/routing/builder_extensions.rb +30 -0
  92. data/lib/karafka/pro/routing/topic_extensions.rb +74 -0
  93. data/lib/karafka/pro.rb +13 -0
  94. data/lib/karafka/process.rb +1 -0
  95. data/lib/karafka/processing/coordinator.rb +103 -0
  96. data/lib/karafka/processing/coordinators_buffer.rb +54 -0
  97. data/lib/karafka/processing/executor.rb +126 -0
  98. data/lib/karafka/processing/executors_buffer.rb +88 -0
  99. data/lib/karafka/processing/jobs/base.rb +55 -0
  100. data/lib/karafka/processing/jobs/consume.rb +47 -0
  101. data/lib/karafka/processing/jobs/revoked.rb +22 -0
  102. data/lib/karafka/processing/jobs/shutdown.rb +23 -0
  103. data/lib/karafka/processing/jobs_builder.rb +29 -0
  104. data/lib/karafka/processing/jobs_queue.rb +144 -0
  105. data/lib/karafka/processing/partitioner.rb +22 -0
  106. data/lib/karafka/processing/result.rb +37 -0
  107. data/lib/karafka/processing/scheduler.rb +22 -0
  108. data/lib/karafka/processing/worker.rb +91 -0
  109. data/lib/karafka/processing/workers_batch.rb +27 -0
  110. data/lib/karafka/railtie.rb +127 -0
  111. data/lib/karafka/routing/builder.rb +26 -23
  112. data/lib/karafka/routing/consumer_group.rb +37 -17
  113. data/lib/karafka/routing/consumer_mapper.rb +1 -2
  114. data/lib/karafka/routing/proxy.rb +9 -16
  115. data/lib/karafka/routing/router.rb +1 -1
  116. data/lib/karafka/routing/subscription_group.rb +53 -0
  117. data/lib/karafka/routing/subscription_groups_builder.rb +54 -0
  118. data/lib/karafka/routing/topic.rb +65 -24
  119. data/lib/karafka/routing/topics.rb +38 -0
  120. data/lib/karafka/runner.rb +51 -0
  121. data/lib/karafka/serialization/json/deserializer.rb +6 -15
  122. data/lib/karafka/server.rb +67 -26
  123. data/lib/karafka/setup/config.rb +153 -175
  124. data/lib/karafka/status.rb +14 -5
  125. data/lib/karafka/templates/example_consumer.rb.erb +16 -0
  126. data/lib/karafka/templates/karafka.rb.erb +17 -55
  127. data/lib/karafka/time_trackers/base.rb +19 -0
  128. data/lib/karafka/time_trackers/pause.rb +92 -0
  129. data/lib/karafka/time_trackers/poll.rb +65 -0
  130. data/lib/karafka/version.rb +1 -1
  131. data/lib/karafka.rb +46 -16
  132. data.tar.gz.sig +0 -0
  133. metadata +145 -171
  134. metadata.gz.sig +0 -0
  135. data/.github/FUNDING.yml +0 -3
  136. data/MIT-LICENCE +0 -18
  137. data/certs/mensfeld.pem +0 -25
  138. data/lib/karafka/attributes_map.rb +0 -62
  139. data/lib/karafka/backends/inline.rb +0 -16
  140. data/lib/karafka/base_responder.rb +0 -226
  141. data/lib/karafka/cli/flow.rb +0 -48
  142. data/lib/karafka/code_reloader.rb +0 -67
  143. data/lib/karafka/connection/api_adapter.rb +0 -161
  144. data/lib/karafka/connection/batch_delegator.rb +0 -55
  145. data/lib/karafka/connection/builder.rb +0 -18
  146. data/lib/karafka/connection/message_delegator.rb +0 -36
  147. data/lib/karafka/consumers/batch_metadata.rb +0 -10
  148. data/lib/karafka/consumers/callbacks.rb +0 -71
  149. data/lib/karafka/consumers/includer.rb +0 -64
  150. data/lib/karafka/consumers/responders.rb +0 -24
  151. data/lib/karafka/consumers/single_params.rb +0 -15
  152. data/lib/karafka/contracts/responder_usage.rb +0 -54
  153. data/lib/karafka/fetcher.rb +0 -42
  154. data/lib/karafka/helpers/class_matcher.rb +0 -88
  155. data/lib/karafka/helpers/config_retriever.rb +0 -46
  156. data/lib/karafka/helpers/inflector.rb +0 -26
  157. data/lib/karafka/instrumentation/stdout_listener.rb +0 -140
  158. data/lib/karafka/params/batch_metadata.rb +0 -26
  159. data/lib/karafka/params/builders/batch_metadata.rb +0 -30
  160. data/lib/karafka/params/builders/params.rb +0 -38
  161. data/lib/karafka/params/builders/params_batch.rb +0 -25
  162. data/lib/karafka/params/params_batch.rb +0 -60
  163. data/lib/karafka/patches/ruby_kafka.rb +0 -47
  164. data/lib/karafka/persistence/client.rb +0 -29
  165. data/lib/karafka/persistence/consumers.rb +0 -45
  166. data/lib/karafka/persistence/topics.rb +0 -48
  167. data/lib/karafka/responders/builder.rb +0 -36
  168. data/lib/karafka/responders/topic.rb +0 -55
  169. data/lib/karafka/routing/topic_mapper.rb +0 -53
  170. data/lib/karafka/serialization/json/serializer.rb +0 -31
  171. data/lib/karafka/setup/configurators/water_drop.rb +0 -36
  172. data/lib/karafka/templates/application_responder.rb.erb +0 -11
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ module Callbacks
6
+ # Statistics callback handler
7
+ # @see `WaterDrop::Instrumentation::Callbacks::Statistics` for details on why we decorate
8
+ # those statistics
9
+ class Statistics
10
+ # @param subscription_group_id [String] id of the current subscription group
11
+ # @param consumer_group_id [String] id of the current consumer group
12
+ # @param client_name [String] rdkafka client name
13
+ # @param monitor [WaterDrop::Instrumentation::Monitor] monitor we are using
14
+ def initialize(subscription_group_id, consumer_group_id, client_name, monitor)
15
+ @subscription_group_id = subscription_group_id
16
+ @consumer_group_id = consumer_group_id
17
+ @client_name = client_name
18
+ @monitor = monitor
19
+ @statistics_decorator = ::Karafka::Core::Monitoring::StatisticsDecorator.new
20
+ end
21
+
22
+ # Emits decorated statistics to the monitor
23
+ # @param statistics [Hash] rdkafka statistics
24
+ def call(statistics)
25
+ # Emit only statistics related to our client
26
+ # rdkafka does not have per-instance statistics hook, thus we need to make sure that we
27
+ # emit only stats that are related to current producer. Otherwise we would emit all of
28
+ # all the time.
29
+ return unless @client_name == statistics['name']
30
+
31
+ @monitor.instrument(
32
+ 'statistics.emitted',
33
+ subscription_group_id: @subscription_group_id,
34
+ consumer_group_id: @consumer_group_id,
35
+ statistics: @statistics_decorator.call(statistics)
36
+ )
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -20,7 +20,6 @@ module Karafka
20
20
  # @param _args Any arguments that we don't care about but that are needed in order to
21
21
  # make this logger compatible with the default Ruby one
22
22
  def initialize(*_args)
23
- ensure_dir_exists
24
23
  super(target)
25
24
  self.level = ENV_MAP[Karafka.env] || ENV_MAP['default']
26
25
  end
@@ -29,18 +28,11 @@ module Karafka
29
28
 
30
29
  # @return [Karafka::Helpers::MultiDelegator] multi delegator instance
31
30
  # to which we will be writing logs
32
- # We use this approach to log stuff to file and to the STDOUT at the same time
31
+ # We use this approach to log stuff to file and to the $stdout at the same time
33
32
  def target
34
33
  Karafka::Helpers::MultiDelegator
35
34
  .delegate(:write, :close)
36
- .to(STDOUT, file)
37
- end
38
-
39
- # Makes sure the log directory exists as long as we can write to it
40
- def ensure_dir_exists
41
- FileUtils.mkdir_p(File.dirname(log_path))
42
- rescue Errno::EACCES
43
- nil
35
+ .to(*[$stdout, file].compact)
44
36
  end
45
37
 
46
38
  # @return [Pathname] Path to a file to which we should log
@@ -51,7 +43,11 @@ module Karafka
51
43
  # @return [File] file to which we want to write our logs
52
44
  # @note File is being opened in append mode ('a')
53
45
  def file
46
+ FileUtils.mkdir_p(File.dirname(log_path))
47
+
54
48
  @file ||= File.open(log_path, 'a')
49
+ rescue Errno::EACCES, Errno::EROFS
50
+ nil
55
51
  end
56
52
  end
57
53
  end
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Default listener that hooks up to our instrumentation and uses its events for logging
6
+ # It can be removed/replaced or anything without any harm to the Karafka app flow.
7
+ class LoggerListener
8
+ # Log levels that we use in this particular listener
9
+ USED_LOG_LEVELS = %i[
10
+ debug
11
+ info
12
+ warn
13
+ error
14
+ fatal
15
+ ].freeze
16
+
17
+ # Logs each messages fetching attempt
18
+ #
19
+ # @param event [Dry::Events::Event] event details including payload
20
+ def on_connection_listener_fetch_loop(event)
21
+ listener = event[:caller]
22
+ debug "[#{listener.id}] Polling messages..."
23
+ end
24
+
25
+ # Logs about messages that we've received from Kafka
26
+ #
27
+ # @param event [Dry::Events::Event] event details including payload
28
+ def on_connection_listener_fetch_loop_received(event)
29
+ listener = event[:caller]
30
+ time = event[:time]
31
+ messages_count = event[:messages_buffer].size
32
+
33
+ message = "[#{listener.id}] Polled #{messages_count} messages in #{time}ms"
34
+
35
+ # We don't want the "polled 0" in dev as it would spam the log
36
+ # Instead we publish only info when there was anything we could poll and fail over to the
37
+ # zero notifications when in debug mode
38
+ messages_count.zero? ? debug(message) : info(message)
39
+ end
40
+
41
+ # Prints info about the fact that a given job has started
42
+ #
43
+ # @param event [Dry::Events::Event] event details including payload
44
+ def on_worker_process(event)
45
+ job = event[:job]
46
+ job_type = job.class.to_s.split('::').last
47
+ consumer = job.executor.topic.consumer
48
+ topic = job.executor.topic.name
49
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
50
+ end
51
+
52
+ # Prints info about the fact that a given job has finished
53
+ #
54
+ # @param event [Dry::Events::Event] event details including payload
55
+ def on_worker_processed(event)
56
+ job = event[:job]
57
+ time = event[:time]
58
+ job_type = job.class.to_s.split('::').last
59
+ consumer = job.executor.topic.consumer
60
+ topic = job.executor.topic.name
61
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
62
+ end
63
+
64
+ # Logs info about system signals that Karafka received and prints backtrace for threads in
65
+ # case of ttin
66
+ #
67
+ # @param event [Dry::Events::Event] event details including payload
68
+ def on_process_notice_signal(event)
69
+ info "Received #{event[:signal]} system signal"
70
+
71
+ # We print backtrace only for ttin
72
+ return unless event[:signal] == :SIGTTIN
73
+
74
+ # Inspired by Sidekiq
75
+ Thread.list.each do |thread|
76
+ tid = (thread.object_id ^ ::Process.pid).to_s(36)
77
+
78
+ warn "Thread TID-#{tid} #{thread['label']}"
79
+
80
+ if thread.backtrace
81
+ warn thread.backtrace.join("\n")
82
+ else
83
+ warn '<no backtrace available>'
84
+ end
85
+ end
86
+ end
87
+
88
+ # Logs info that we're initializing Karafka app.
89
+ #
90
+ # @param _event [Dry::Events::Event] event details including payload
91
+ def on_app_initializing(_event)
92
+ info 'Initializing Karafka framework'
93
+ end
94
+
95
+ # Logs info that we're running Karafka app.
96
+ #
97
+ # @param _event [Dry::Events::Event] event details including payload
98
+ def on_app_running(_event)
99
+ info "Running in #{RUBY_DESCRIPTION}"
100
+ info "Running Karafka #{Karafka::VERSION} server"
101
+
102
+ return if Karafka.pro?
103
+
104
+ info 'See LICENSE and the LGPL-3.0 for licensing details.'
105
+ end
106
+
107
+ # Logs info that we're going to stop the Karafka server.
108
+ #
109
+ # @param _event [Dry::Events::Event] event details including payload
110
+ def on_app_stopping(_event)
111
+ info 'Stopping Karafka server'
112
+ end
113
+
114
+ # Logs info that we stopped the Karafka server.
115
+ #
116
+ # @param _event [Dry::Events::Event] event details including payload
117
+ def on_app_stopped(_event)
118
+ info 'Stopped Karafka server'
119
+ end
120
+
121
+ # There are many types of errors that can occur in many places, but we provide a single
122
+ # handler for all of them to simplify error instrumentation.
123
+ # @param event [Dry::Events::Event] event details including payload
124
+ def on_error_occurred(event)
125
+ type = event[:type]
126
+ error = event[:error]
127
+ details = (error.backtrace || []).join("\n")
128
+
129
+ case type
130
+ when 'consumer.consume.error'
131
+ error "Consumer consuming error: #{error}"
132
+ error details
133
+ when 'consumer.revoked.error'
134
+ error "Consumer on revoked failed due to an error: #{error}"
135
+ error details
136
+ when 'consumer.shutdown.error'
137
+ error "Consumer on shutdown failed due to an error: #{error}"
138
+ error details
139
+ when 'worker.process.error'
140
+ fatal "Worker processing failed due to an error: #{error}"
141
+ fatal details
142
+ when 'connection.listener.fetch_loop.error'
143
+ error "Listener fetch loop error: #{error}"
144
+ error details
145
+ when 'licenser.expired'
146
+ error error
147
+ error details
148
+ when 'runner.call.error'
149
+ fatal "Runner crashed due to an error: #{error}"
150
+ fatal details
151
+ when 'app.stopping.error'
152
+ error 'Forceful Karafka server stop'
153
+ when 'librdkafka.error'
154
+ error "librdkafka internal error occurred: #{error}"
155
+ error details
156
+ # Those will only occur when retries in the client fail and when they did not stop after
157
+ # backoffs
158
+ when 'connection.client.poll.error'
159
+ error "Data polling error occurred: #{error}"
160
+ error details
161
+ else
162
+ # This should never happen. Please contact the maintainers
163
+ raise Errors::UnsupportedCaseError, event
164
+ end
165
+ end
166
+
167
+ USED_LOG_LEVELS.each do |log_level|
168
+ define_method log_level do |*args|
169
+ Karafka.logger.send(log_level, *args)
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
@@ -1,69 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Karafka
4
- # Namespace for all the things related with Karafka instrumentation process
5
4
  module Instrumentation
6
- # Monitor is used to hookup external monitoring services to monitor how Karafka works
7
- # It provides a standardized API for checking incoming messages/enqueueing etc
8
- # Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
9
- # same time, which means that you might have for example file logging and NewRelic at the same
10
- # time
11
- # @note This class acts as a singleton because we are only permitted to have single monitor
12
- # per running process (just as logger)
13
- class Monitor < Dry::Monitor::Notifications
14
- # List of events that we support in the system and to which a monitor client can hook up
15
- # @note The non-error once support timestamp benchmarking
16
- # @note Depending on Karafka extensions and additional engines, this might not be the
17
- # complete list of all the events. Please use the #available_events on fully loaded
18
- # Karafka system to determine all of the events you can use.
19
- # Last 4 events are from WaterDrop but for convenience we use the same monitor for the
20
- # whole karafka ecosystem
21
- BASE_EVENTS = %w[
22
- params.params.deserialize
23
- params.params.deserialize.error
24
- connection.listener.before_fetch_loop
25
- connection.listener.fetch_loop
26
- connection.listener.fetch_loop.error
27
- connection.client.fetch_loop.error
28
- connection.batch_delegator.call
29
- connection.message_delegator.call
30
- fetcher.call.error
31
- backends.inline.process
32
- process.notice_signal
33
- consumers.responders.respond_with
34
- async_producer.call.error
35
- async_producer.call.retry
36
- sync_producer.call.error
37
- sync_producer.call.retry
38
- app.initializing
39
- app.initialized
40
- app.running
41
- app.stopping
42
- app.stopping.error
43
- app.stopped
44
- ].freeze
5
+ # Karafka instrumentation monitor that we use to publish events
6
+ # By default uses our internal notifications bus but can be used with
7
+ # `ActiveSupport::Notifications` as well
8
+ class Monitor < ::Karafka::Core::Monitoring::Monitor
9
+ attr_reader :notifications_bus
45
10
 
46
- private_constant :BASE_EVENTS
47
-
48
- # @return [Karafka::Instrumentation::Monitor] monitor instance for system instrumentation
49
- def initialize
50
- super(:karafka)
51
- BASE_EVENTS.each(&method(:register_event))
52
- end
53
-
54
- # Allows us to subscribe to events with a code that will be yielded upon events
55
- # @param event_name_or_listener [String, Object] name of the event we want to subscribe to
56
- # or a listener if we decide to go with object listener
57
- def subscribe(event_name_or_listener)
58
- return super unless event_name_or_listener.is_a?(String)
59
- return super if available_events.include?(event_name_or_listener)
60
-
61
- raise Errors::UnregisteredMonitorEventError, event_name_or_listener
62
- end
63
-
64
- # @return [Array<String>] names of available events to which we can subscribe
65
- def available_events
66
- __bus__.events.keys
11
+ # @param notifications_bus [Object] either our internal notifications bus or
12
+ # `ActiveSupport::Notifications`
13
+ # @param namespace [String, nil] namespace for events or nil if no namespace
14
+ def initialize(
15
+ notifications_bus = ::Karafka::Instrumentation::Notifications.new,
16
+ namespace = nil
17
+ )
18
+ super(notifications_bus, namespace)
67
19
  end
68
20
  end
69
21
  end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ # Namespace for all the things related with Karafka instrumentation process
5
+ module Instrumentation
6
+ # Monitor is used to hookup external monitoring services to monitor how Karafka works
7
+ # It provides a standardized API for checking incoming messages/enqueueing etc
8
+ # Since it is a pub-sub based on dry-monitor, you can use as many subscribers/loggers at the
9
+ # same time, which means that you might have for example file logging and NewRelic at the same
10
+ # time
11
+ # @note This class acts as a singleton because we are only permitted to have single monitor
12
+ # per running process (just as logger)
13
+ class Notifications < Karafka::Core::Monitoring::Notifications
14
+ # List of events that we support in the system and to which a monitor client can hook up
15
+ # @note The non-error once support timestamp benchmarking
16
+ # @note Depending on Karafka extensions and additional engines, this might not be the
17
+ # complete list of all the events. Please use the #available_events on fully loaded
18
+ # Karafka system to determine all of the events you can use.
19
+ EVENTS = %w[
20
+ app.initialized
21
+ app.running
22
+ app.stopping
23
+ app.stopped
24
+
25
+ consumer.consumed
26
+ consumer.revoked
27
+ consumer.shutdown
28
+
29
+ process.notice_signal
30
+
31
+ connection.listener.before_fetch_loop
32
+ connection.listener.fetch_loop
33
+ connection.listener.fetch_loop.received
34
+
35
+ worker.process
36
+ worker.processed
37
+ worker.completed
38
+
39
+ statistics.emitted
40
+
41
+ error.occurred
42
+ ].freeze
43
+
44
+ private_constant :EVENTS
45
+
46
+ # @return [Karafka::Instrumentation::Monitor] monitor instance for system instrumentation
47
+ def initialize
48
+ super
49
+ EVENTS.each { |event| register_event(event) }
50
+ end
51
+ end
52
+ end
53
+ end
@@ -5,19 +5,19 @@ module Karafka
5
5
  # Listener that sets a proc title with a nice descriptive value
6
6
  class ProctitleListener
7
7
  # Updates proc title to an initializing one
8
- # @param _event [Dry::Events::Event] event details including payload
8
+ # @param _event [Karafka::Core::Monitoring::Event] event details including payload
9
9
  def on_app_initializing(_event)
10
10
  setproctitle('initializing')
11
11
  end
12
12
 
13
13
  # Updates proc title to a running one
14
- # @param _event [Dry::Events::Event] event details including payload
14
+ # @param _event [Karafka::Core::Monitoring::Event] event details including payload
15
15
  def on_app_running(_event)
16
16
  setproctitle('running')
17
17
  end
18
18
 
19
19
  # Updates proc title to a stopping one
20
- # @param _event [Dry::Events::Event] event details including payload
20
+ # @param _event [Karafka::Core::Monitoring::Event] event details including payload
21
21
  def on_app_stopping(_event)
22
22
  setproctitle('stopping')
23
23
  end
@@ -0,0 +1 @@
1
+ {"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":5}},{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":5,"width":12,"height":3}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.95percentile{*}"},{"name":"query2","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.max{*}"},{"name":"query3","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.avg{*}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":7,"is_column_break":true}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}