racecar 2.0.0 → 2.10.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +17 -0
- data/.github/workflows/ci.yml +46 -0
- data/.github/workflows/publish.yml +12 -0
- data/.gitignore +1 -2
- data/CHANGELOG.md +83 -1
- data/Dockerfile +9 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +72 -0
- data/README.md +303 -82
- data/Rakefile +5 -0
- data/docker-compose.yml +65 -0
- data/examples/batch_consumer.rb +4 -2
- data/examples/cat_consumer.rb +2 -0
- data/examples/producing_consumer.rb +2 -0
- data/exe/racecar +37 -14
- data/extra/datadog-dashboard.json +1 -0
- data/lib/ensure_hash_compact.rb +2 -0
- data/lib/generators/racecar/consumer_generator.rb +2 -0
- data/lib/generators/racecar/install_generator.rb +2 -0
- data/lib/racecar/cli.rb +26 -21
- data/lib/racecar/config.rb +80 -4
- data/lib/racecar/consumer.rb +51 -6
- data/lib/racecar/consumer_set.rb +113 -44
- data/lib/racecar/ctl.rb +31 -3
- data/lib/racecar/daemon.rb +4 -2
- data/lib/racecar/datadog.rb +83 -3
- data/lib/racecar/delivery_callback.rb +27 -0
- data/lib/racecar/erroneous_state_error.rb +34 -0
- data/lib/racecar/heroku.rb +49 -0
- data/lib/racecar/instrumenter.rb +4 -7
- data/lib/racecar/liveness_probe.rb +78 -0
- data/lib/racecar/message.rb +6 -1
- data/lib/racecar/message_delivery_error.rb +112 -0
- data/lib/racecar/null_instrumenter.rb +2 -0
- data/lib/racecar/parallel_runner.rb +110 -0
- data/lib/racecar/pause.rb +8 -4
- data/lib/racecar/producer.rb +139 -0
- data/lib/racecar/rails_config_file_loader.rb +7 -1
- data/lib/racecar/rebalance_listener.rb +58 -0
- data/lib/racecar/runner.rb +79 -37
- data/lib/racecar/version.rb +3 -1
- data/lib/racecar.rb +36 -8
- data/racecar.gemspec +7 -4
- metadata +47 -25
- data/.github/workflows/rspec.yml +0 -24
data/examples/batch_consumer.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class BatchConsumer < Racecar::Consumer
|
2
4
|
subscribes_to "messages", start_from_beginning: false
|
3
5
|
|
4
|
-
def process_batch(
|
5
|
-
|
6
|
+
def process_batch(messages)
|
7
|
+
messages.each do |message|
|
6
8
|
puts message.value
|
7
9
|
end
|
8
10
|
end
|
data/examples/cat_consumer.rb
CHANGED
data/exe/racecar
CHANGED
@@ -3,19 +3,42 @@
|
|
3
3
|
require "racecar"
|
4
4
|
require "racecar/cli"
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
6
|
+
module Racecar
|
7
|
+
class << self
|
8
|
+
def start(argv)
|
9
|
+
Cli.main(argv)
|
10
|
+
rescue SignalException => e
|
11
|
+
# We might receive SIGTERM before our signal handler is installed.
|
12
|
+
if Signal.signame(e.signo) == "TERM"
|
13
|
+
exit(0)
|
14
|
+
else
|
15
|
+
raise
|
16
|
+
end
|
17
|
+
rescue SystemExit
|
18
|
+
raise
|
19
|
+
rescue Exception => e
|
20
|
+
$stderr.puts "=> Crashed: #{exception_with_causes(e)}\n#{e.backtrace.join("\n")}"
|
21
|
+
|
22
|
+
Racecar.config.error_handler.call(e)
|
23
|
+
|
24
|
+
exit(1)
|
25
|
+
else
|
26
|
+
exit(0)
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def exception_with_causes(e)
|
32
|
+
result = +"#{e.class}: #{e}"
|
33
|
+
if e.cause
|
34
|
+
result << "\n"
|
35
|
+
result << "--- Caused by: ---\n"
|
36
|
+
result << exception_with_causes(e.cause)
|
37
|
+
end
|
38
|
+
result
|
39
|
+
end
|
14
40
|
end
|
15
|
-
rescue
|
16
|
-
# Exceptions are printed to STDERR and sent to the error handler
|
17
|
-
# in `Racecar::Cli#run`, so we don't need to do anything here.
|
18
|
-
exit(1)
|
19
|
-
else
|
20
|
-
exit(0)
|
21
41
|
end
|
42
|
+
|
43
|
+
# Start your engines!
|
44
|
+
Racecar.start(ARGV)
|
@@ -0,0 +1 @@
|
|
1
|
+
{"title":"Racecar consumer groups","description":"Dashboard for monitoring [Racecar](https://github.com/zendesk/racecar) Kafka consumer groups.","widgets":[{"id":4916208698459109,"definition":{"title":"Single-message processing","reflow_type":"fixed","type":"group","layout_type":"ordered","widgets":[{"id":82605028,"definition":{"title":"95th percentile message processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.process_message.latency.95percentile{$group_id,$client,$topic,$partition,$env} by {topic,group_id}","style":{"palette":"dog_classic","line_type":"solid","line_width":"thin"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2857871641649870,"definition":{"title":"Max message processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.process_message.latency.max{$group_id,$client,$topic,$partition,$env} by {topic,group_id}","style":{"palette":"dog_classic","line_type":"solid","line_width":"thin"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":88579656,"definition":{"title":"Median message processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.process_message.latency.median{$group_id,$client,$topic,$partition,$env} by {topic}","style":{"palette":"dog_classic","line_type":"solid","line_width":"thin"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":8,"y":0,"width":4,"height":2}}]}},{"id":4068194420543030,"definition":{"title":"Batch processing","reflow_type":"fixed","type":"group","layout_type":"ordered","widgets":[{"id":341686567,"definition":{"title":"95th percentile batch processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.process_batch.latency.95percentile{$group_id,$client,$topic,$partition,$env} by {topic,group_id}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":341687897,"definition":{"title":"Median batch processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.process_batch.latency.median{$group_id,$client,$topic,$partition,$env} by {topic}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5352911818003929,"definition":{"title":"Max batch processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"max:racecar.consumer.process_batch.latency.max{$group_id,$client,$topic,$partition,$env} by {topic}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":1654098217056312,"definition":{"title":"Max message batch size","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"max:racecar.consumer.batch_size.max{$group_id,$client,$topic,$partition,$env} by {topic}","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":7718619791149134,"definition":{"title":"Average per-message latency in batch processing mode","show_legend":false,"legend_size":"0","legend_layout":"vertical","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"q":"max:racecar.consumer.process_batch.latency.avg{$group_id,$client,$topic,$partition,$env}/max:racecar.consumer.batch_size.avg{$group_id,$client,$topic,$partition,$env}","metadata":[{"expression":"max:racecar.consumer.process_batch.latency.avg{$env,$pod,$group_id,$client,$topic,$partition}/max:racecar.consumer.batch_size.avg{$env,$pod,$group_id,$client,$topic,$partition}","alias_name":"ms"}],"style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":4,"y":2,"width":4,"height":2}}]}},{"id":7110612496425151,"definition":{"title":"Throughput & Lag","reflow_type":"fixed","type":"group","layout_type":"ordered","widgets":[{"id":301212748,"definition":{"title":"Message lag changes","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"derivative(max:racecar.consumer.offset{$group_id,$client,$topic,$partition,$env} by {topic,partition,pod})","style":{"palette":"dog_classic","line_type":"solid","line_width":"thin"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":82604183,"definition":{"title":"Processing throughput by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.messages{$group_id,$client,$topic,$partition,$env} by {topic,group_id}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5547724125706857,"definition":{"title":"Processing throughput by group","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.messages{$group_id,$client,$topic,$partition,$env} by {group_id}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":375397853,"definition":{"title":"Processing throughput by host","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.messages{$group_id,$client,$topic,$partition,$env} by {group_id,host}.as_rate()","style":{"palette":"dog_classic","line_type":"solid","line_width":"thin"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":7820607170949322,"definition":{"title":"Messages consumed in timeframe","type":"query_value","requests":[{"q":"sum:racecar.consumer.messages{$group_id,$client,$topic,$partition,$env}.as_count()","aggregator":"sum"}],"autoscale":true,"precision":0},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":1428183857213882,"definition":{"title":"Time lag (end-to-end latency)","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.time_lag{$group_id,$client,$topic,$partition,$env} by {group_id,pod}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":8,"y":2,"width":4,"height":2}}]}},{"id":1487807434456879,"definition":{"title":"Processing Errors & Group Stability","reflow_type":"fixed","type":"group","layout_type":"ordered","widgets":[{"id":82605029,"definition":{"title":"Processing errors","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.process_batch.errors{$group_id,$client,$topic,$partition,$env} by {topic,pod,group_id,partition}.as_count()+sum:racecar.consumer.process_message.errors{$group_id,$client,$topic,$partition,$env} by {topic,pod,group_id,partition}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":83104736,"definition":{"title":"Processing error rate by topic (%)","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"(sum:racecar.consumer.process_message.errors{$group_id,$client,$topic,$partition,$env} by {topic}.as_count()/(sum:racecar.consumer.process_message.errors{$group_id,$client,$topic,$partition,$env} by {topic}.as_count()+sum:racecar.consumer.messages{$group_id,$client,$topic,$partition,$env} by {topic}.as_count()))*100","style":{"palette":"orange","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6572534533091871,"definition":{"title":"Processing errors in timeframe","type":"query_value","requests":[{"q":"sum:racecar.consumer.process_batch.errors{$topic,$client,$group_id,$env}.as_count()+sum:racecar.consumer.process_message.errors{$topic,$client,$group_id,$env}.as_count()","aggregator":"sum","conditional_formats":[{"comparator":">","palette":"white_on_red","value":0},{"comparator":"<=","palette":"white_on_green","value":0}]}],"autoscale":true,"precision":0},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":302705923,"definition":{"title":"Pause duration","show_legend":false,"legend_size":"0","legend_layout":"vertical","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"q":"avg:racecar.consumer.pause.duration{$client,$group_id,$topic,$env} by {pod,group_id,topic,partition}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":235544854,"definition":{"title":"Group joins","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.join_group.count{$group_id,$client,$env} by {group_id,host}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":235544862,"definition":{"title":"Group leaves","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.leave_group.count{$group_id,$client,$env} by {group_id,host}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":235545167,"definition":{"title":"Group syncs","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.sync_group.count{$group_id,$client,$env} by {group_id,host}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":4,"width":4,"height":2}}]}},{"id":8013176155436939,"definition":{"title":"Producer & message delivery","reflow_type":"fixed","type":"group","layout_type":"ordered","widgets":[{"id":5948628389625057,"definition":{"title":"Message delivery latency (median)","title_size":"16","title_align":"left","show_legend":false,"type":"timeseries","requests":[{"q":"avg:racecar.producer.deliver.latency.median{$client,$env}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3158040379950811,"definition":{"title":"Producer buffer size (max)","title_size":"16","title_align":"left","show_legend":false,"type":"timeseries","requests":[{"q":"max:racecar.producer.buffer.size.max{$client,$env} by {topic}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6916375790222772,"definition":{"title":"Producer buffer size (avg) kp","title_size":"16","title_align":"left","show_legend":false,"legend_layout":"vertical","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"q":"avg:racecar.producer.buffer.size.avg{$client,$env} by {topic,host}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":3160735194874896,"definition":{"title":"Message size (95p)","title_size":"16","title_align":"left","show_legend":false,"legend_layout":"vertical","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"q":"avg:racecar.producer.produce.message_size.95percentile{$env} by {topic}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":0,"y":2,"width":4,"height":2}}]}}],"template_variables":[{"name":"env","default":"production","prefix":"env"},{"name":"group_id","default":"*","prefix":"group_id"},{"name":"client","default":"*","prefix":"client"},{"name":"topic","default":"*","prefix":"topic"},{"name":"partition","default":"*","prefix":"partition"}],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"ywc-z36-g29"}
|
data/lib/ensure_hash_compact.rb
CHANGED
data/lib/racecar/cli.rb
CHANGED
@@ -1,23 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "optparse"
|
2
4
|
require "logger"
|
3
5
|
require "fileutils"
|
4
6
|
require "racecar/rails_config_file_loader"
|
5
7
|
require "racecar/daemon"
|
8
|
+
require "racecar/liveness_probe"
|
6
9
|
|
7
10
|
module Racecar
|
8
11
|
class Cli
|
9
|
-
|
10
|
-
|
12
|
+
class << self
|
13
|
+
def main(args)
|
14
|
+
new(args).run
|
15
|
+
end
|
11
16
|
end
|
12
17
|
|
13
18
|
def initialize(args)
|
14
19
|
@parser = build_parser
|
15
20
|
@parser.parse!(args)
|
16
21
|
@consumer_name = args.first or raise Racecar::Error, "no consumer specified"
|
17
|
-
|
18
|
-
|
19
|
-
def config
|
20
|
-
Racecar.config
|
22
|
+
@runner = nil
|
21
23
|
end
|
22
24
|
|
23
25
|
def run
|
@@ -58,21 +60,29 @@ module Racecar
|
|
58
60
|
$stderr.puts "=> Ctrl-C to shutdown consumer"
|
59
61
|
end
|
60
62
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
$stderr.puts "=> Crashed: #{e.class}: #{e}\n#{e.backtrace.join("\n")}"
|
63
|
+
if config.liveness_probe_enabled
|
64
|
+
$stderr.puts "=> Liveness probe enabled"
|
65
|
+
config.install_liveness_probe
|
66
|
+
end
|
66
67
|
|
67
|
-
|
68
|
+
processor = consumer_class.new
|
69
|
+
@runner = Racecar.runner(processor)
|
70
|
+
@runner.run
|
71
|
+
nil
|
72
|
+
end
|
68
73
|
|
69
|
-
|
74
|
+
def stop
|
75
|
+
@runner.stop
|
70
76
|
end
|
71
77
|
|
72
78
|
private
|
73
79
|
|
74
80
|
attr_reader :consumer_name
|
75
81
|
|
82
|
+
def config
|
83
|
+
Racecar.config
|
84
|
+
end
|
85
|
+
|
76
86
|
def daemonize!
|
77
87
|
daemon = Daemon.new(File.expand_path(config.pidfile))
|
78
88
|
|
@@ -102,12 +112,7 @@ module Racecar
|
|
102
112
|
opts.on("-r", "--require STRING", "Require a library before starting the consumer") do |lib|
|
103
113
|
$LOAD_PATH.unshift(Dir.pwd) unless load_path_modified
|
104
114
|
load_path_modified = true
|
105
|
-
|
106
|
-
require lib
|
107
|
-
rescue => e
|
108
|
-
$stderr.puts "=> #{lib} failed to load: #{e.message}"
|
109
|
-
exit
|
110
|
-
end
|
115
|
+
require lib
|
111
116
|
end
|
112
117
|
|
113
118
|
opts.on("-l", "--log STRING", "Log to the specified file") do |logfile|
|
@@ -115,13 +120,13 @@ module Racecar
|
|
115
120
|
end
|
116
121
|
|
117
122
|
Racecar::Config.variables.each do |variable|
|
118
|
-
opt_name = "
|
123
|
+
opt_name = +"--#{variable.name.to_s.gsub('_', '-')}"
|
119
124
|
opt_name << " #{variable.type.upcase}" unless variable.boolean?
|
120
125
|
|
121
126
|
desc = variable.description || "N/A"
|
122
127
|
|
123
128
|
if variable.default
|
124
|
-
desc
|
129
|
+
desc += " (default: #{variable.default.inspect})"
|
125
130
|
end
|
126
131
|
|
127
132
|
opts.on(opt_name, desc) do |value|
|
data/lib/racecar/config.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "tmpdir"
|
4
|
+
|
1
5
|
require "king_konf"
|
2
6
|
|
7
|
+
require "racecar/liveness_probe"
|
8
|
+
require "racecar/instrumenter"
|
9
|
+
require "racecar/rebalance_listener"
|
10
|
+
|
3
11
|
module Racecar
|
4
12
|
class Config < KingKonf::Config
|
5
13
|
env_prefix :racecar
|
6
14
|
|
15
|
+
STATISTICS_DISABLED_VALUE = 0
|
16
|
+
|
7
17
|
desc "A list of Kafka brokers in the cluster that you're consuming from"
|
8
18
|
list :brokers, default: ["localhost:9092"]
|
9
19
|
|
@@ -19,6 +29,9 @@ module Racecar
|
|
19
29
|
desc "The minimum number of messages in the local consumer queue"
|
20
30
|
integer :min_message_queue_size, default: 2000
|
21
31
|
|
32
|
+
desc "Which partition assignment strategy to use, range, roundrobin or cooperative-sticky. -- https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
|
33
|
+
string :partition_assignment_strategy, default: "range,roundrobin"
|
34
|
+
|
22
35
|
desc "Kafka consumer configuration options, separated with '=' -- https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
|
23
36
|
list :consumer, default: []
|
24
37
|
|
@@ -52,9 +65,12 @@ module Racecar
|
|
52
65
|
desc "How long to wait when trying to communicate with a Kafka broker"
|
53
66
|
float :socket_timeout, default: 30
|
54
67
|
|
55
|
-
desc "How long to allow the Kafka brokers to wait before returning messages"
|
68
|
+
desc "How long to allow the Kafka brokers to wait before returning messages (in seconds)"
|
56
69
|
float :max_wait_time, default: 1
|
57
70
|
|
71
|
+
desc "How long to try to deliver a produced message before finally giving up (in seconds)"
|
72
|
+
float :message_timeout, default: 5*60
|
73
|
+
|
58
74
|
desc "Maximum amount of data the broker shall return for a Fetch request"
|
59
75
|
integer :max_bytes, default: 10485760
|
60
76
|
|
@@ -70,6 +86,9 @@ module Racecar
|
|
70
86
|
desc "The log level for the Racecar logs"
|
71
87
|
string :log_level, default: "info"
|
72
88
|
|
89
|
+
desc "The strategy used to determine which topic partition a message is written to when Racecar produces a value to Kafka; defaults to `consistent_random`"
|
90
|
+
symbol :partitioner, allowed_values: %i{consistent consistent_random murmur2 murmur2_random fnv1a fnv1a_random}, default: :consistent_random
|
91
|
+
|
73
92
|
desc "Protocol used to communicate with brokers"
|
74
93
|
symbol :security_protocol, allowed_values: %i{plaintext ssl sasl_plaintext sasl_ssl}
|
75
94
|
|
@@ -151,10 +170,33 @@ module Racecar
|
|
151
170
|
desc "Whether to boot Rails when starting the consumer"
|
152
171
|
boolean :without_rails, default: false
|
153
172
|
|
173
|
+
desc "How frequently librdkafka should report statistics to your application (in seconds). A statistics callback
|
174
|
+
must also be provided. This should be defined with a `statistics_callback` method on your processor. Stats
|
175
|
+
are disabled if this value is set to 0, or there is no callback defined. This is set by default to 1 second
|
176
|
+
for backward compatibility, however this can be quite memory intensive"
|
177
|
+
integer :statistics_interval, default: 1
|
178
|
+
|
179
|
+
desc "Whether to enable liveness probe behavior (touch the file)"
|
180
|
+
boolean :liveness_probe_enabled, default: false
|
181
|
+
|
182
|
+
desc "Path to a file Racecar will touch to show liveness"
|
183
|
+
string :liveness_probe_file_path, default: "#{Dir.tmpdir}/racecar-liveness"
|
184
|
+
|
185
|
+
desc "Used only by the liveness probe: Max time (in seconds) between liveness events before the process is considered not healthy"
|
186
|
+
integer :liveness_probe_max_interval, default: 5
|
187
|
+
|
154
188
|
# The error handler must be set directly on the object.
|
155
189
|
attr_reader :error_handler
|
156
190
|
|
157
|
-
attr_accessor :subscriptions, :logger
|
191
|
+
attr_accessor :subscriptions, :logger, :parallel_workers
|
192
|
+
|
193
|
+
def statistics_interval_ms
|
194
|
+
if Rdkafka::Config.statistics_callback
|
195
|
+
statistics_interval * 1000
|
196
|
+
else
|
197
|
+
STATISTICS_DISABLED_VALUE
|
198
|
+
end
|
199
|
+
end
|
158
200
|
|
159
201
|
def max_wait_time_ms
|
160
202
|
max_wait_time * 1000
|
@@ -189,6 +231,7 @@ module Racecar
|
|
189
231
|
end
|
190
232
|
|
191
233
|
def load_consumer_class(consumer_class)
|
234
|
+
self.consumer_class = consumer_class
|
192
235
|
self.group_id = consumer_class.group_id || self.group_id
|
193
236
|
|
194
237
|
self.group_id ||= [
|
@@ -196,13 +239,16 @@ module Racecar
|
|
196
239
|
group_id_prefix,
|
197
240
|
|
198
241
|
# MyFunnyConsumer => my-funny-consumer
|
199
|
-
consumer_class.name.gsub(/[a-z][A-Z]/) {|str| str[0]
|
200
|
-
].compact.join
|
242
|
+
consumer_class.name.gsub(/[a-z][A-Z]/) { |str| "#{str[0]}-#{str[1]}" }.downcase,
|
243
|
+
].compact.join
|
201
244
|
|
245
|
+
self.parallel_workers = consumer_class.parallel_workers
|
202
246
|
self.subscriptions = consumer_class.subscriptions
|
203
247
|
self.max_wait_time = consumer_class.max_wait_time || self.max_wait_time
|
248
|
+
self.fetch_messages = consumer_class.fetch_messages || self.fetch_messages
|
204
249
|
self.pidfile ||= "#{group_id}.pid"
|
205
250
|
end
|
251
|
+
attr_accessor :consumer_class
|
206
252
|
|
207
253
|
def on_error(&handler)
|
208
254
|
@error_handler = handler
|
@@ -224,11 +270,41 @@ module Racecar
|
|
224
270
|
producer_config
|
225
271
|
end
|
226
272
|
|
273
|
+
def instrumenter
|
274
|
+
@instrumenter ||= begin
|
275
|
+
default_payload = { client_id: client_id, group_id: group_id }
|
276
|
+
|
277
|
+
if defined?(ActiveSupport::Notifications)
|
278
|
+
# ActiveSupport needs `concurrent-ruby` but doesn't `require` it.
|
279
|
+
require 'concurrent/utility/monotonic_time'
|
280
|
+
Instrumenter.new(backend: ActiveSupport::Notifications, default_payload: default_payload)
|
281
|
+
else
|
282
|
+
logger.warn "ActiveSupport::Notifications not available, instrumentation is disabled"
|
283
|
+
NullInstrumenter
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
attr_writer :instrumenter
|
288
|
+
|
289
|
+
def install_liveness_probe
|
290
|
+
liveness_probe.tap(&:install)
|
291
|
+
end
|
292
|
+
|
293
|
+
def liveness_probe
|
294
|
+
require "active_support/notifications"
|
295
|
+
@liveness_probe ||= LivenessProbe.new(
|
296
|
+
ActiveSupport::Notifications,
|
297
|
+
liveness_probe_file_path,
|
298
|
+
liveness_probe_max_interval
|
299
|
+
)
|
300
|
+
end
|
301
|
+
|
227
302
|
private
|
228
303
|
|
229
304
|
def rdkafka_security_config
|
230
305
|
{
|
231
306
|
"security.protocol" => security_protocol,
|
307
|
+
"enable.ssl.certificate.verification" => ssl_verify_hostname,
|
232
308
|
"ssl.ca.location" => ssl_ca_location,
|
233
309
|
"ssl.crl.location" => ssl_crl_location,
|
234
310
|
"ssl.keystore.location" => ssl_keystore_location,
|
data/lib/racecar/consumer.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "racecar/message_delivery_error"
|
4
|
+
|
1
5
|
module Racecar
|
2
6
|
class Consumer
|
3
7
|
Subscription = Struct.new(:topic, :start_from_beginning, :max_bytes_per_partition, :additional_config)
|
@@ -5,7 +9,7 @@ module Racecar
|
|
5
9
|
class << self
|
6
10
|
attr_accessor :max_wait_time
|
7
11
|
attr_accessor :group_id
|
8
|
-
attr_accessor :producer, :consumer
|
12
|
+
attr_accessor :producer, :consumer, :parallel_workers, :fetch_messages
|
9
13
|
|
10
14
|
def subscriptions
|
11
15
|
@subscriptions ||= []
|
@@ -23,29 +27,68 @@ module Racecar
|
|
23
27
|
# @param additional_config [Hash] Configuration properties for consumer.
|
24
28
|
# See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
25
29
|
# @return [nil]
|
26
|
-
def subscribes_to(
|
30
|
+
def subscribes_to(
|
31
|
+
*topics,
|
32
|
+
start_from_beginning: true,
|
33
|
+
max_bytes_per_partition: 1048576,
|
34
|
+
additional_config: {}
|
35
|
+
)
|
27
36
|
topics.each do |topic|
|
28
37
|
subscriptions << Subscription.new(topic, start_from_beginning, max_bytes_per_partition, additional_config)
|
29
38
|
end
|
30
39
|
end
|
40
|
+
|
41
|
+
# Rebalance hooks for subclasses to override
|
42
|
+
def on_partitions_assigned(rebalance_event); end
|
43
|
+
def on_partitions_revoked(rebalance_event); end
|
31
44
|
end
|
32
45
|
|
33
|
-
def configure(producer:, consumer:, instrumenter: NullInstrumenter)
|
46
|
+
def configure(producer:, consumer:, instrumenter: NullInstrumenter, config: Racecar.config)
|
34
47
|
@producer = producer
|
48
|
+
@delivery_handles = []
|
49
|
+
|
35
50
|
@consumer = consumer
|
51
|
+
|
36
52
|
@instrumenter = instrumenter
|
53
|
+
@config = config
|
37
54
|
end
|
38
55
|
|
39
56
|
def teardown; end
|
40
57
|
|
41
|
-
#
|
58
|
+
# Blocks until all messages produced so far have been successfully published. If
|
59
|
+
# message delivery finally fails, a Racecar::MessageDeliveryError is raised. The
|
60
|
+
# delivery failed for the reason in the exception. The error can be broker side
|
61
|
+
# (e.g. downtime, configuration issue) or specific to the message being sent. The
|
62
|
+
# caller must handle the latter cases or run into head of line blocking.
|
42
63
|
def deliver!
|
43
64
|
@delivery_handles ||= []
|
44
65
|
if @delivery_handles.any?
|
45
66
|
instrumentation_payload = { delivered_message_count: @delivery_handles.size }
|
46
67
|
|
47
68
|
@instrumenter.instrument('deliver_messages', instrumentation_payload) do
|
48
|
-
@delivery_handles.each
|
69
|
+
@delivery_handles.each do |handle|
|
70
|
+
begin
|
71
|
+
# rdkafka-ruby checks every wait_timeout seconds if the message was
|
72
|
+
# successfully delivered, up to max_wait_timeout seconds before raising
|
73
|
+
# Rdkafka::AbstractHandle::WaitTimeoutError. librdkafka will (re)try to
|
74
|
+
# deliver all messages in the background, until "config.message_timeout"
|
75
|
+
# (message.timeout.ms) is exceeded. Phrased differently, rdkafka-ruby's
|
76
|
+
# WaitTimeoutError is just informative.
|
77
|
+
# The raising can be avoided if max_wait_timeout below is greater than
|
78
|
+
# config.message_timeout, but config is not available here (without
|
79
|
+
# changing the interface).
|
80
|
+
handle.wait(max_wait_timeout: 60, wait_timeout: 0.1)
|
81
|
+
rescue Rdkafka::AbstractHandle::WaitTimeoutError => e
|
82
|
+
partition = MessageDeliveryError.partition_from_delivery_handle(handle)
|
83
|
+
# ideally we could use the logger passed to the Runner, but it is not
|
84
|
+
# available here. The runner sets it for Rdkafka, though, so we can use
|
85
|
+
# that instead.
|
86
|
+
@config.logger.debug "Still trying to deliver message to (partition #{partition})... (will try up to Racecar.config.message_timeout)"
|
87
|
+
retry
|
88
|
+
rescue Rdkafka::RdkafkaError => e
|
89
|
+
raise MessageDeliveryError.new(e, handle)
|
90
|
+
end
|
91
|
+
end
|
49
92
|
end
|
50
93
|
end
|
51
94
|
@delivery_handles.clear
|
@@ -54,13 +97,14 @@ module Racecar
|
|
54
97
|
protected
|
55
98
|
|
56
99
|
# https://github.com/appsignal/rdkafka-ruby#producing-messages
|
57
|
-
def produce(payload, topic:, key
|
100
|
+
def produce(payload, topic:, key: nil, partition: nil, partition_key: nil, headers: nil, create_time: nil)
|
58
101
|
@delivery_handles ||= []
|
59
102
|
message_size = payload.respond_to?(:bytesize) ? payload.bytesize : 0
|
60
103
|
instrumentation_payload = {
|
61
104
|
value: payload,
|
62
105
|
headers: headers,
|
63
106
|
key: key,
|
107
|
+
partition: partition,
|
64
108
|
partition_key: partition_key,
|
65
109
|
topic: topic,
|
66
110
|
message_size: message_size,
|
@@ -73,6 +117,7 @@ module Racecar
|
|
73
117
|
topic: topic,
|
74
118
|
payload: payload,
|
75
119
|
key: key,
|
120
|
+
partition: partition,
|
76
121
|
partition_key: partition_key,
|
77
122
|
timestamp: create_time,
|
78
123
|
headers: headers,
|