karafka 2.2.7 → 2.2.8.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGELOG.md +12 -0
- data/Gemfile.lock +3 -5
- data/bin/karafka +2 -3
- data/docker-compose.yml +3 -1
- data/karafka.gemspec +1 -2
- data/lib/karafka/base_consumer.rb +1 -0
- data/lib/karafka/cli/base.rb +45 -34
- data/lib/karafka/cli/console.rb +5 -4
- data/lib/karafka/cli/help.rb +24 -0
- data/lib/karafka/cli/info.rb +2 -2
- data/lib/karafka/cli/install.rb +4 -4
- data/lib/karafka/cli/server.rb +68 -33
- data/lib/karafka/cli/topics.rb +1 -1
- data/lib/karafka/cli.rb +23 -19
- data/lib/karafka/connection/client.rb +9 -4
- data/lib/karafka/connection/rebalance_manager.rb +36 -21
- data/lib/karafka/errors.rb +3 -0
- data/lib/karafka/instrumentation/callbacks/rebalance.rb +64 -0
- data/lib/karafka/instrumentation/notifications.rb +5 -1
- data/lib/karafka/instrumentation/vendors/appsignal/base.rb +30 -0
- data/lib/karafka/instrumentation/vendors/appsignal/client.rb +122 -0
- data/lib/karafka/instrumentation/vendors/appsignal/dashboard.json +222 -0
- data/lib/karafka/instrumentation/vendors/appsignal/errors_listener.rb +30 -0
- data/lib/karafka/instrumentation/vendors/appsignal/metrics_listener.rb +331 -0
- data/lib/karafka/instrumentation/vendors/datadog/metrics_listener.rb +2 -2
- data/lib/karafka/patches/rdkafka/bindings.rb +22 -39
- data/lib/karafka/patches/rdkafka/opaque.rb +36 -0
- data/lib/karafka/pro/processing/coordinator.rb +6 -7
- data/lib/karafka/pro/processing/strategies/vp/default.rb +20 -0
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +14 -20
- metadata.gz.sig +0 -0
data/lib/karafka/errors.rb
CHANGED
@@ -60,5 +60,8 @@ module Karafka
|
|
60
60
|
# Raised when we run operations that require certain result but despite successfully finishing
|
61
61
|
# it is not yet available due to some synchronization mechanisms and caches
|
62
62
|
ResultNotVisibleError = Class.new(BaseError)
|
63
|
+
|
64
|
+
# Raised when there is an attempt to run an unrecognized CLI command
|
65
|
+
UnrecognizedCommandError = Class.new(BaseError)
|
63
66
|
end
|
64
67
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Instrumentation
|
5
|
+
module Callbacks
|
6
|
+
# Callback that connects to the librdkafka rebalance callback and converts those events into
|
7
|
+
# our internal events
|
8
|
+
class Rebalance
|
9
|
+
# @param subscription_group_id [String] id of the current subscription group instance
|
10
|
+
# @param consumer_group_id [String] id of the current consumer group
|
11
|
+
def initialize(subscription_group_id, consumer_group_id)
|
12
|
+
@subscription_group_id = subscription_group_id
|
13
|
+
@consumer_group_id = consumer_group_id
|
14
|
+
end
|
15
|
+
|
16
|
+
# Publishes an event that partitions are going to be revoked.
|
17
|
+
# At this stage we can still commit offsets, etc.
|
18
|
+
#
|
19
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
20
|
+
def on_partitions_revoke(tpl)
|
21
|
+
instrument('partitions_revoke', tpl)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Publishes an event that partitions are going to be assigned
|
25
|
+
#
|
26
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
27
|
+
def on_partitions_assign(tpl)
|
28
|
+
instrument('partitions_assign', tpl)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Publishes an event that partitions were revoked. This is after we've lost them, so no
|
32
|
+
# option to commit offsets.
|
33
|
+
#
|
34
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
35
|
+
def on_partitions_revoked(tpl)
|
36
|
+
instrument('partitions_revoked', tpl)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Publishes an event that partitions were assigned.
|
40
|
+
#
|
41
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
42
|
+
def on_partitions_assigned(tpl)
|
43
|
+
instrument('partitions_assigned', tpl)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
# Publishes info that a rebalance event of a given type has happened
|
49
|
+
#
|
50
|
+
# @param name [String] name of the event
|
51
|
+
# @param tpl [Rdkafka::Consumer::TopicPartitionList]
|
52
|
+
def instrument(name, tpl)
|
53
|
+
::Karafka.monitor.instrument(
|
54
|
+
"rebalance.#{name}",
|
55
|
+
caller: self,
|
56
|
+
subscription_group_id: @subscription_group_id,
|
57
|
+
consumer_group_id: @consumer_group_id,
|
58
|
+
tpl: tpl
|
59
|
+
)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -35,10 +35,14 @@ module Karafka
|
|
35
35
|
connection.listener.fetch_loop
|
36
36
|
connection.listener.fetch_loop.received
|
37
37
|
|
38
|
-
connection.client.rebalance_callback
|
39
38
|
connection.client.poll.error
|
40
39
|
connection.client.unsubscribe.error
|
41
40
|
|
41
|
+
rebalance.partitions_assign
|
42
|
+
rebalance.partitions_assigned
|
43
|
+
rebalance.partitions_revoke
|
44
|
+
rebalance.partitions_revoked
|
45
|
+
|
42
46
|
consumer.consume
|
43
47
|
consumer.consumed
|
44
48
|
consumer.consuming.pause
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'client'
|
4
|
+
|
5
|
+
module Karafka
|
6
|
+
module Instrumentation
|
7
|
+
module Vendors
|
8
|
+
# Namespace for Appsignal instrumentation
|
9
|
+
module Appsignal
|
10
|
+
# Base for all the instrumentation listeners
|
11
|
+
class Base
|
12
|
+
include ::Karafka::Core::Configurable
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
# @param block [Proc] configuration block
|
16
|
+
def initialize(&block)
|
17
|
+
configure
|
18
|
+
setup(&block) if block
|
19
|
+
end
|
20
|
+
|
21
|
+
# @param block [Proc] configuration block
|
22
|
+
# @note We define this alias to be consistent with `Karafka#setup`
|
23
|
+
def setup(&block)
|
24
|
+
configure(&block)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Instrumentation
|
5
|
+
module Vendors
|
6
|
+
module Appsignal
|
7
|
+
# Appsignal client wrapper
|
8
|
+
# We wrap the native client so we can inject our own stub in specs when needed
|
9
|
+
#
|
10
|
+
# It also abstracts away the notion of transactions and their management
|
11
|
+
#
|
12
|
+
# @note This client is abstract, it has no notion of Karafka whatsoever
|
13
|
+
class Client
|
14
|
+
# Starts an appsignal transaction with a given action name
|
15
|
+
#
|
16
|
+
# @param action_name [String] action name. For processing this should be equal to
|
17
|
+
# consumer class + method name
|
18
|
+
def start_transaction(action_name)
|
19
|
+
transaction = ::Appsignal::Transaction.create(
|
20
|
+
SecureRandom.uuid,
|
21
|
+
::Appsignal::Transaction::BACKGROUND_JOB,
|
22
|
+
::Appsignal::Transaction::GenericRequest.new({})
|
23
|
+
)
|
24
|
+
|
25
|
+
transaction.set_action_if_nil(action_name)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Stops the current transaction (if any)
|
29
|
+
def stop_transaction
|
30
|
+
return unless transaction?
|
31
|
+
|
32
|
+
::Appsignal::Transaction.complete_current!
|
33
|
+
end
|
34
|
+
|
35
|
+
# Sets metadata on a current transaction (if any)
|
36
|
+
#
|
37
|
+
# @param metadata_hash [Hash] hash with metadata we want to set
|
38
|
+
def metadata=(metadata_hash)
|
39
|
+
return unless transaction?
|
40
|
+
|
41
|
+
transaction = ::Appsignal::Transaction.current
|
42
|
+
|
43
|
+
stringify_hash(metadata_hash).each do |key, value|
|
44
|
+
transaction.set_metadata(key, value)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Increments counter with the given value and tags
|
49
|
+
#
|
50
|
+
# @param key [String] key we want to use
|
51
|
+
# @param value [Integer] increment value
|
52
|
+
# @param tags [Hash] additional extra tags
|
53
|
+
def count(key, value, tags)
|
54
|
+
::Appsignal.increment_counter(
|
55
|
+
key,
|
56
|
+
value,
|
57
|
+
stringify_hash(tags)
|
58
|
+
)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Sets gauge with the given value and tags
|
62
|
+
#
|
63
|
+
# @param key [String] key we want to use
|
64
|
+
# @param value [Integer] gauge value
|
65
|
+
# @param tags [Hash] additional extra tags
|
66
|
+
def gauge(key, value, tags)
|
67
|
+
::Appsignal.set_gauge(
|
68
|
+
key,
|
69
|
+
value,
|
70
|
+
stringify_hash(tags)
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Sends the error that occurred to Appsignal
|
75
|
+
#
|
76
|
+
# @param error [Object] error we want to ship to Appsignal
|
77
|
+
def send_error(error)
|
78
|
+
# If we have an active transaction we should use it instead of creating a generic one
|
79
|
+
# That way proper namespace and other data may be transferred
|
80
|
+
#
|
81
|
+
# In case there is no transaction, a new generic background job one will be used
|
82
|
+
if transaction?
|
83
|
+
transaction.set_error(error)
|
84
|
+
else
|
85
|
+
::Appsignal.send_error(error) do |transaction|
|
86
|
+
transaction.set_namespace(::Appsignal::Transaction::BACKGROUND_JOB)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# Registers the probe under a given name
|
92
|
+
# @param name [Symbol] probe name
|
93
|
+
# @param probe [Proc] code to run every minute
|
94
|
+
def register_probe(name, probe)
|
95
|
+
::Appsignal::Minutely.probes.register(name, probe)
|
96
|
+
end
|
97
|
+
|
98
|
+
private
|
99
|
+
|
100
|
+
# @return [Boolean] do we have a transaction
|
101
|
+
def transaction?
|
102
|
+
::Appsignal::Transaction.current?
|
103
|
+
end
|
104
|
+
|
105
|
+
# @return [::Appsignal::Transaction, nil] transaction or nil if not started
|
106
|
+
def transaction
|
107
|
+
::Appsignal::Transaction.current
|
108
|
+
end
|
109
|
+
|
110
|
+
# Converts both keys and values of a hash into strings
|
111
|
+
# @param hash [Hash]
|
112
|
+
# @return [Hash]
|
113
|
+
def stringify_hash(hash)
|
114
|
+
hash
|
115
|
+
.transform_values(&:to_s)
|
116
|
+
.transform_keys!(&:to_s)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,222 @@
|
|
1
|
+
{
|
2
|
+
"title": "Karafka",
|
3
|
+
"description": "This dashboard gives an overview of the overall Karafka status.\nCheckout topics lag, throughput/performance per consumer,\nprocess/worker counts, and many other things.\n",
|
4
|
+
"visuals": [
|
5
|
+
{
|
6
|
+
"title": "Consumed messages",
|
7
|
+
"description": "",
|
8
|
+
"line_label": "%topic%[%consumer_group%]",
|
9
|
+
"display": "LINE",
|
10
|
+
"format": "number",
|
11
|
+
"draw_null_as_zero": true,
|
12
|
+
"metrics": [
|
13
|
+
{
|
14
|
+
"name": "karafka_consumer_messages",
|
15
|
+
"fields": [
|
16
|
+
{
|
17
|
+
"field": "COUNTER"
|
18
|
+
}
|
19
|
+
],
|
20
|
+
"tags": [
|
21
|
+
{
|
22
|
+
"key": "consumer_group",
|
23
|
+
"value": "*"
|
24
|
+
},
|
25
|
+
{
|
26
|
+
"key": "topic",
|
27
|
+
"value": "*"
|
28
|
+
}
|
29
|
+
]
|
30
|
+
}
|
31
|
+
],
|
32
|
+
"type": "timeseries"
|
33
|
+
},
|
34
|
+
{
|
35
|
+
"title": "Consumed batches",
|
36
|
+
"line_label": "%topic%[%consumer_group%]",
|
37
|
+
"display": "LINE",
|
38
|
+
"format": "number",
|
39
|
+
"draw_null_as_zero": true,
|
40
|
+
"metrics": [
|
41
|
+
{
|
42
|
+
"name": "karafka_consumer_batches",
|
43
|
+
"fields": [
|
44
|
+
{
|
45
|
+
"field": "COUNTER"
|
46
|
+
}
|
47
|
+
],
|
48
|
+
"tags": [
|
49
|
+
{
|
50
|
+
"key": "consumer_group",
|
51
|
+
"value": "*"
|
52
|
+
},
|
53
|
+
{
|
54
|
+
"key": "topic",
|
55
|
+
"value": "*"
|
56
|
+
}
|
57
|
+
]
|
58
|
+
}
|
59
|
+
],
|
60
|
+
"type": "timeseries"
|
61
|
+
},
|
62
|
+
{
|
63
|
+
"title": "Lags",
|
64
|
+
"line_label": "%topic%[%consumer_group%]",
|
65
|
+
"display": "LINE",
|
66
|
+
"format": "number",
|
67
|
+
"draw_null_as_zero": true,
|
68
|
+
"metrics": [
|
69
|
+
{
|
70
|
+
"name": "karafka_consumer_aggregated_lag",
|
71
|
+
"fields": [
|
72
|
+
{
|
73
|
+
"field": "GAUGE"
|
74
|
+
}
|
75
|
+
],
|
76
|
+
"tags": [
|
77
|
+
{
|
78
|
+
"key": "consumer_group",
|
79
|
+
"value": "*"
|
80
|
+
},
|
81
|
+
{
|
82
|
+
"key": "topic",
|
83
|
+
"value": "*"
|
84
|
+
}
|
85
|
+
]
|
86
|
+
}
|
87
|
+
],
|
88
|
+
"type": "timeseries"
|
89
|
+
},
|
90
|
+
{
|
91
|
+
"title": "Errors and DLQ",
|
92
|
+
"description": "",
|
93
|
+
"line_label": "%name% %topic%[%consumer_group%]",
|
94
|
+
"display": "LINE",
|
95
|
+
"format": "number",
|
96
|
+
"draw_null_as_zero": true,
|
97
|
+
"metrics": [
|
98
|
+
{
|
99
|
+
"name": "karafka_consumer_errors",
|
100
|
+
"fields": [
|
101
|
+
{
|
102
|
+
"field": "COUNTER"
|
103
|
+
}
|
104
|
+
],
|
105
|
+
"tags": [
|
106
|
+
{
|
107
|
+
"key": "consumer_group",
|
108
|
+
"value": "*"
|
109
|
+
},
|
110
|
+
{
|
111
|
+
"key": "topic",
|
112
|
+
"value": "*"
|
113
|
+
}
|
114
|
+
]
|
115
|
+
},
|
116
|
+
{
|
117
|
+
"name": "karafka_consumer_dead",
|
118
|
+
"fields": [
|
119
|
+
{
|
120
|
+
"field": "COUNTER"
|
121
|
+
}
|
122
|
+
],
|
123
|
+
"tags": [
|
124
|
+
{
|
125
|
+
"key": "consumer_group",
|
126
|
+
"value": "*"
|
127
|
+
},
|
128
|
+
{
|
129
|
+
"key": "topic",
|
130
|
+
"value": "*"
|
131
|
+
}
|
132
|
+
]
|
133
|
+
}
|
134
|
+
],
|
135
|
+
"type": "timeseries"
|
136
|
+
},
|
137
|
+
{
|
138
|
+
"title": "Connection stability",
|
139
|
+
"description": "",
|
140
|
+
"line_label": "%name%",
|
141
|
+
"display": "LINE",
|
142
|
+
"format": "number",
|
143
|
+
"draw_null_as_zero": true,
|
144
|
+
"metrics": [
|
145
|
+
{
|
146
|
+
"name": "karafka_connection_disconnects",
|
147
|
+
"fields": [
|
148
|
+
{
|
149
|
+
"field": "COUNTER"
|
150
|
+
}
|
151
|
+
],
|
152
|
+
"tags": []
|
153
|
+
},
|
154
|
+
{
|
155
|
+
"name": "karafka_connection_connects",
|
156
|
+
"fields": [
|
157
|
+
{
|
158
|
+
"field": "COUNTER"
|
159
|
+
}
|
160
|
+
],
|
161
|
+
"tags": []
|
162
|
+
},
|
163
|
+
{
|
164
|
+
"name": "karafka_requests_retries",
|
165
|
+
"fields": [
|
166
|
+
{
|
167
|
+
"field": "COUNTER"
|
168
|
+
}
|
169
|
+
],
|
170
|
+
"tags": []
|
171
|
+
},
|
172
|
+
{
|
173
|
+
"name": "karafka_transmission_errors",
|
174
|
+
"fields": [
|
175
|
+
{
|
176
|
+
"field": "COUNTER"
|
177
|
+
}
|
178
|
+
],
|
179
|
+
"tags": []
|
180
|
+
},
|
181
|
+
{
|
182
|
+
"name": "karafka_receive_errors",
|
183
|
+
"fields": [
|
184
|
+
{
|
185
|
+
"field": "COUNTER"
|
186
|
+
}
|
187
|
+
],
|
188
|
+
"tags": []
|
189
|
+
}
|
190
|
+
],
|
191
|
+
"type": "timeseries"
|
192
|
+
},
|
193
|
+
{
|
194
|
+
"title": "Concurrency",
|
195
|
+
"line_label": "%name%",
|
196
|
+
"display": "LINE",
|
197
|
+
"format": "number",
|
198
|
+
"draw_null_as_zero": true,
|
199
|
+
"metrics": [
|
200
|
+
{
|
201
|
+
"name": "karafka_processes_count",
|
202
|
+
"fields": [
|
203
|
+
{
|
204
|
+
"field": "COUNTER"
|
205
|
+
}
|
206
|
+
],
|
207
|
+
"tags": []
|
208
|
+
},
|
209
|
+
{
|
210
|
+
"name": "karafka_threads_count",
|
211
|
+
"fields": [
|
212
|
+
{
|
213
|
+
"field": "COUNTER"
|
214
|
+
}
|
215
|
+
],
|
216
|
+
"tags": []
|
217
|
+
}
|
218
|
+
],
|
219
|
+
"type": "timeseries"
|
220
|
+
}
|
221
|
+
]
|
222
|
+
}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
module Karafka
|
6
|
+
module Instrumentation
|
7
|
+
module Vendors
|
8
|
+
# Namespace for Appsignal instrumentation
|
9
|
+
module Appsignal
|
10
|
+
# Listener for reporting errors from both consumers and producers
|
11
|
+
# Since we have the same API for WaterDrop and Karafka, we can use one listener with
|
12
|
+
# independent instances
|
13
|
+
class ErrorsListener < Base
|
14
|
+
def_delegators :config, :client
|
15
|
+
|
16
|
+
setting :client, default: Client.new
|
17
|
+
|
18
|
+
configure
|
19
|
+
|
20
|
+
# Sends error details to Appsignal
|
21
|
+
#
|
22
|
+
# @param event [Karafka::Core::Monitoring::Event]
|
23
|
+
def on_error_occurred(event)
|
24
|
+
client.send_error(event[:error])
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|