karafka 2.2.7 → 2.2.8.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +12 -0
  4. data/Gemfile.lock +3 -5
  5. data/bin/karafka +2 -3
  6. data/docker-compose.yml +3 -1
  7. data/karafka.gemspec +1 -2
  8. data/lib/karafka/base_consumer.rb +1 -0
  9. data/lib/karafka/cli/base.rb +45 -34
  10. data/lib/karafka/cli/console.rb +5 -4
  11. data/lib/karafka/cli/help.rb +24 -0
  12. data/lib/karafka/cli/info.rb +2 -2
  13. data/lib/karafka/cli/install.rb +4 -4
  14. data/lib/karafka/cli/server.rb +68 -33
  15. data/lib/karafka/cli/topics.rb +1 -1
  16. data/lib/karafka/cli.rb +23 -19
  17. data/lib/karafka/connection/client.rb +9 -4
  18. data/lib/karafka/connection/rebalance_manager.rb +36 -21
  19. data/lib/karafka/errors.rb +3 -0
  20. data/lib/karafka/instrumentation/callbacks/rebalance.rb +64 -0
  21. data/lib/karafka/instrumentation/notifications.rb +5 -1
  22. data/lib/karafka/instrumentation/vendors/appsignal/base.rb +30 -0
  23. data/lib/karafka/instrumentation/vendors/appsignal/client.rb +122 -0
  24. data/lib/karafka/instrumentation/vendors/appsignal/dashboard.json +222 -0
  25. data/lib/karafka/instrumentation/vendors/appsignal/errors_listener.rb +30 -0
  26. data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +331 -0
  27. data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
  28. data/lib/karafka/patches/rdkafka/bindings.rb +22 -39
  29. data/lib/karafka/patches/rdkafka/opaque.rb +36 -0
  30. data/lib/karafka/pro/processing/coordinator.rb +6 -7
  31. data/lib/karafka/pro/processing/strategies/vp/default.rb +20 -0
  32. data/lib/karafka/version.rb +1 -1
  33. data/lib/karafka.rb +1 -1
  34. data.tar.gz.sig +0 -0
  35. metadata +14 -20
  36. metadata.gz.sig +0 -0
@@ -60,5 +60,8 @@ module Karafka
60
60
  # Raised when we run operations that require certain result but despite successfully finishing
61
61
  # it is not yet available due to some synchronization mechanisms and caches
62
62
  ResultNotVisibleError = Class.new(BaseError)
63
+
64
+ # Raised when there is an attempt to run an unrecognized CLI command
65
+ UnrecognizedCommandError = Class.new(BaseError)
63
66
  end
64
67
  end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ module Callbacks
6
+ # Callback that connects to the librdkafka rebalance callback and converts those events into
7
+ # our internal events
8
+ class Rebalance
9
+ # @param subscription_group_id [String] id of the current subscription group instance
10
+ # @param consumer_group_id [String] id of the current consumer group
11
+ def initialize(subscription_group_id, consumer_group_id)
12
+ @subscription_group_id = subscription_group_id
13
+ @consumer_group_id = consumer_group_id
14
+ end
15
+
16
+ # Publishes an event that partitions are going to be revoked.
17
+ # At this stage we can still commit offsets, etc.
18
+ #
19
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList]
20
+ def on_partitions_revoke(tpl)
21
+ instrument('partitions_revoke', tpl)
22
+ end
23
+
24
+ # Publishes an event that partitions are going to be assigned
25
+ #
26
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList]
27
+ def on_partitions_assign(tpl)
28
+ instrument('partitions_assign', tpl)
29
+ end
30
+
31
+ # Publishes an event that partitions were revoked. This is after we've lost them, so no
32
+ # option to commit offsets.
33
+ #
34
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList]
35
+ def on_partitions_revoked(tpl)
36
+ instrument('partitions_revoked', tpl)
37
+ end
38
+
39
+ # Publishes an event that partitions were assigned.
40
+ #
41
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList]
42
+ def on_partitions_assigned(tpl)
43
+ instrument('partitions_assigned', tpl)
44
+ end
45
+
46
+ private
47
+
48
+ # Publishes info that a rebalance event of a given type has happened
49
+ #
50
+ # @param name [String] name of the event
51
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList]
52
+ def instrument(name, tpl)
53
+ ::Karafka.monitor.instrument(
54
+ "rebalance.#{name}",
55
+ caller: self,
56
+ subscription_group_id: @subscription_group_id,
57
+ consumer_group_id: @consumer_group_id,
58
+ tpl: tpl
59
+ )
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -35,10 +35,14 @@ module Karafka
35
35
  connection.listener.fetch_loop
36
36
  connection.listener.fetch_loop.received
37
37
 
38
- connection.client.rebalance_callback
39
38
  connection.client.poll.error
40
39
  connection.client.unsubscribe.error
41
40
 
41
+ rebalance.partitions_assign
42
+ rebalance.partitions_assigned
43
+ rebalance.partitions_revoke
44
+ rebalance.partitions_revoked
45
+
42
46
  consumer.consume
43
47
  consumer.consumed
44
48
  consumer.consuming.pause
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'client'
4
+
5
+ module Karafka
6
+ module Instrumentation
7
+ module Vendors
8
+ # Namespace for Appsignal instrumentation
9
+ module Appsignal
10
+ # Base for all the instrumentation listeners
11
+ class Base
12
+ include ::Karafka::Core::Configurable
13
+ extend Forwardable
14
+
15
+ # @param block [Proc] configuration block
16
+ def initialize(&block)
17
+ configure
18
+ setup(&block) if block
19
+ end
20
+
21
+ # @param block [Proc] configuration block
22
+ # @note We define this alias to be consistent with `Karafka#setup`
23
+ def setup(&block)
24
+ configure(&block)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ module Vendors
6
+ module Appsignal
7
+ # Appsignal client wrapper
8
+ # We wrap the native client so we can inject our own stub in specs when needed
9
+ #
10
+ # It also abstracts away the notion of transactions and their management
11
+ #
12
+ # @note This client is abstract, it has no notion of Karafka whatsoever
13
+ class Client
14
+ # Starts an appsignal transaction with a given action name
15
+ #
16
+ # @param action_name [String] action name. For processing this should be equal to
17
+ # consumer class + method name
18
+ def start_transaction(action_name)
19
+ transaction = ::Appsignal::Transaction.create(
20
+ SecureRandom.uuid,
21
+ ::Appsignal::Transaction::BACKGROUND_JOB,
22
+ ::Appsignal::Transaction::GenericRequest.new({})
23
+ )
24
+
25
+ transaction.set_action_if_nil(action_name)
26
+ end
27
+
28
+ # Stops the current transaction (if any)
29
+ def stop_transaction
30
+ return unless transaction?
31
+
32
+ ::Appsignal::Transaction.complete_current!
33
+ end
34
+
35
+ # Sets metadata on a current transaction (if any)
36
+ #
37
+ # @param metadata_hash [Hash] hash with metadata we want to set
38
+ def metadata=(metadata_hash)
39
+ return unless transaction?
40
+
41
+ transaction = ::Appsignal::Transaction.current
42
+
43
+ stringify_hash(metadata_hash).each do |key, value|
44
+ transaction.set_metadata(key, value)
45
+ end
46
+ end
47
+
48
+ # Increments counter with the given value and tags
49
+ #
50
+ # @param key [String] key we want to use
51
+ # @param value [Integer] increment value
52
+ # @param tags [Hash] additional extra tags
53
+ def count(key, value, tags)
54
+ ::Appsignal.increment_counter(
55
+ key,
56
+ value,
57
+ stringify_hash(tags)
58
+ )
59
+ end
60
+
61
+ # Sets gauge with the given value and tags
62
+ #
63
+ # @param key [String] key we want to use
64
+ # @param value [Integer] gauge value
65
+ # @param tags [Hash] additional extra tags
66
+ def gauge(key, value, tags)
67
+ ::Appsignal.set_gauge(
68
+ key,
69
+ value,
70
+ stringify_hash(tags)
71
+ )
72
+ end
73
+
74
+ # Sends the error that occurred to Appsignal
75
+ #
76
+ # @param error [Object] error we want to ship to Appsignal
77
+ def send_error(error)
78
+ # If we have an active transaction we should use it instead of creating a generic one
79
+ # That way proper namespace and other data may be transferred
80
+ #
81
+ # In case there is no transaction, a new generic background job one will be used
82
+ if transaction?
83
+ transaction.set_error(error)
84
+ else
85
+ ::Appsignal.send_error(error) do |transaction|
86
+ transaction.set_namespace(::Appsignal::Transaction::BACKGROUND_JOB)
87
+ end
88
+ end
89
+ end
90
+
91
+ # Registers the probe under a given name
92
+ # @param name [Symbol] probe name
93
+ # @param probe [Proc] code to run every minute
94
+ def register_probe(name, probe)
95
+ ::Appsignal::Minutely.probes.register(name, probe)
96
+ end
97
+
98
+ private
99
+
100
+ # @return [Boolean] do we have a transaction
101
+ def transaction?
102
+ ::Appsignal::Transaction.current?
103
+ end
104
+
105
+ # @return [::Appsignal::Transaction, nil] transaction or nil if not started
106
+ def transaction
107
+ ::Appsignal::Transaction.current
108
+ end
109
+
110
+ # Converts both keys and values of a hash into strings
111
+ # @param hash [Hash]
112
+ # @return [Hash]
113
+ def stringify_hash(hash)
114
+ hash
115
+ .transform_values(&:to_s)
116
+ .transform_keys!(&:to_s)
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,222 @@
1
+ {
2
+ "title": "Karafka",
3
+ "description": "This dashboard gives an overview of the overall Karafka status.\nCheckout topics lag, throughput/performance per consumer,\nprocess/worker counts, and many other things.\n",
4
+ "visuals": [
5
+ {
6
+ "title": "Consumed messages",
7
+ "description": "",
8
+ "line_label": "%topic%[%consumer_group%]",
9
+ "display": "LINE",
10
+ "format": "number",
11
+ "draw_null_as_zero": true,
12
+ "metrics": [
13
+ {
14
+ "name": "karafka_consumer_messages",
15
+ "fields": [
16
+ {
17
+ "field": "COUNTER"
18
+ }
19
+ ],
20
+ "tags": [
21
+ {
22
+ "key": "consumer_group",
23
+ "value": "*"
24
+ },
25
+ {
26
+ "key": "topic",
27
+ "value": "*"
28
+ }
29
+ ]
30
+ }
31
+ ],
32
+ "type": "timeseries"
33
+ },
34
+ {
35
+ "title": "Consumed batches",
36
+ "line_label": "%topic%[%consumer_group%]",
37
+ "display": "LINE",
38
+ "format": "number",
39
+ "draw_null_as_zero": true,
40
+ "metrics": [
41
+ {
42
+ "name": "karafka_consumer_batches",
43
+ "fields": [
44
+ {
45
+ "field": "COUNTER"
46
+ }
47
+ ],
48
+ "tags": [
49
+ {
50
+ "key": "consumer_group",
51
+ "value": "*"
52
+ },
53
+ {
54
+ "key": "topic",
55
+ "value": "*"
56
+ }
57
+ ]
58
+ }
59
+ ],
60
+ "type": "timeseries"
61
+ },
62
+ {
63
+ "title": "Lags",
64
+ "line_label": "%topic%[%consumer_group%]",
65
+ "display": "LINE",
66
+ "format": "number",
67
+ "draw_null_as_zero": true,
68
+ "metrics": [
69
+ {
70
+ "name": "karafka_consumer_aggregated_lag",
71
+ "fields": [
72
+ {
73
+ "field": "GAUGE"
74
+ }
75
+ ],
76
+ "tags": [
77
+ {
78
+ "key": "consumer_group",
79
+ "value": "*"
80
+ },
81
+ {
82
+ "key": "topic",
83
+ "value": "*"
84
+ }
85
+ ]
86
+ }
87
+ ],
88
+ "type": "timeseries"
89
+ },
90
+ {
91
+ "title": "Errors and DLQ",
92
+ "description": "",
93
+ "line_label": "%name% %topic%[%consumer_group%]",
94
+ "display": "LINE",
95
+ "format": "number",
96
+ "draw_null_as_zero": true,
97
+ "metrics": [
98
+ {
99
+ "name": "karafka_consumer_errors",
100
+ "fields": [
101
+ {
102
+ "field": "COUNTER"
103
+ }
104
+ ],
105
+ "tags": [
106
+ {
107
+ "key": "consumer_group",
108
+ "value": "*"
109
+ },
110
+ {
111
+ "key": "topic",
112
+ "value": "*"
113
+ }
114
+ ]
115
+ },
116
+ {
117
+ "name": "karafka_consumer_dead",
118
+ "fields": [
119
+ {
120
+ "field": "COUNTER"
121
+ }
122
+ ],
123
+ "tags": [
124
+ {
125
+ "key": "consumer_group",
126
+ "value": "*"
127
+ },
128
+ {
129
+ "key": "topic",
130
+ "value": "*"
131
+ }
132
+ ]
133
+ }
134
+ ],
135
+ "type": "timeseries"
136
+ },
137
+ {
138
+ "title": "Connection stability",
139
+ "description": "",
140
+ "line_label": "%name%",
141
+ "display": "LINE",
142
+ "format": "number",
143
+ "draw_null_as_zero": true,
144
+ "metrics": [
145
+ {
146
+ "name": "karafka_connection_disconnects",
147
+ "fields": [
148
+ {
149
+ "field": "COUNTER"
150
+ }
151
+ ],
152
+ "tags": []
153
+ },
154
+ {
155
+ "name": "karafka_connection_connects",
156
+ "fields": [
157
+ {
158
+ "field": "COUNTER"
159
+ }
160
+ ],
161
+ "tags": []
162
+ },
163
+ {
164
+ "name": "karafka_requests_retries",
165
+ "fields": [
166
+ {
167
+ "field": "COUNTER"
168
+ }
169
+ ],
170
+ "tags": []
171
+ },
172
+ {
173
+ "name": "karafka_transmission_errors",
174
+ "fields": [
175
+ {
176
+ "field": "COUNTER"
177
+ }
178
+ ],
179
+ "tags": []
180
+ },
181
+ {
182
+ "name": "karafka_receive_errors",
183
+ "fields": [
184
+ {
185
+ "field": "COUNTER"
186
+ }
187
+ ],
188
+ "tags": []
189
+ }
190
+ ],
191
+ "type": "timeseries"
192
+ },
193
+ {
194
+ "title": "Concurrency",
195
+ "line_label": "%name%",
196
+ "display": "LINE",
197
+ "format": "number",
198
+ "draw_null_as_zero": true,
199
+ "metrics": [
200
+ {
201
+ "name": "karafka_processes_count",
202
+ "fields": [
203
+ {
204
+ "field": "COUNTER"
205
+ }
206
+ ],
207
+ "tags": []
208
+ },
209
+ {
210
+ "name": "karafka_threads_count",
211
+ "fields": [
212
+ {
213
+ "field": "COUNTER"
214
+ }
215
+ ],
216
+ "tags": []
217
+ }
218
+ ],
219
+ "type": "timeseries"
220
+ }
221
+ ]
222
+ }
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ module Karafka
6
+ module Instrumentation
7
+ module Vendors
8
+ # Namespace for Appsignal instrumentation
9
+ module Appsignal
10
+ # Listener for reporting errors from both consumers and producers
11
+ # Since we have the same API for WaterDrop and Karafka, we can use one listener with
12
+ # independent instances
13
+ class ErrorsListener < Base
14
+ def_delegators :config, :client
15
+
16
+ setting :client, default: Client.new
17
+
18
+ configure
19
+
20
+ # Sends error details to Appsignal
21
+ #
22
+ # @param event [Karafka::Core::Monitoring::Event]
23
+ def on_error_occurred(event)
24
+ client.send_error(event[:error])
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end