racecar 2.0.0 → 2.10.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +17 -0
- data/.github/workflows/ci.yml +46 -0
- data/.github/workflows/publish.yml +12 -0
- data/.gitignore +1 -2
- data/CHANGELOG.md +83 -1
- data/Dockerfile +9 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +72 -0
- data/README.md +303 -82
- data/Rakefile +5 -0
- data/docker-compose.yml +65 -0
- data/examples/batch_consumer.rb +4 -2
- data/examples/cat_consumer.rb +2 -0
- data/examples/producing_consumer.rb +2 -0
- data/exe/racecar +37 -14
- data/extra/datadog-dashboard.json +1 -0
- data/lib/ensure_hash_compact.rb +2 -0
- data/lib/generators/racecar/consumer_generator.rb +2 -0
- data/lib/generators/racecar/install_generator.rb +2 -0
- data/lib/racecar/cli.rb +26 -21
- data/lib/racecar/config.rb +80 -4
- data/lib/racecar/consumer.rb +51 -6
- data/lib/racecar/consumer_set.rb +113 -44
- data/lib/racecar/ctl.rb +31 -3
- data/lib/racecar/daemon.rb +4 -2
- data/lib/racecar/datadog.rb +83 -3
- data/lib/racecar/delivery_callback.rb +27 -0
- data/lib/racecar/erroneous_state_error.rb +34 -0
- data/lib/racecar/heroku.rb +49 -0
- data/lib/racecar/instrumenter.rb +4 -7
- data/lib/racecar/liveness_probe.rb +78 -0
- data/lib/racecar/message.rb +6 -1
- data/lib/racecar/message_delivery_error.rb +112 -0
- data/lib/racecar/null_instrumenter.rb +2 -0
- data/lib/racecar/parallel_runner.rb +110 -0
- data/lib/racecar/pause.rb +8 -4
- data/lib/racecar/producer.rb +139 -0
- data/lib/racecar/rails_config_file_loader.rb +7 -1
- data/lib/racecar/rebalance_listener.rb +58 -0
- data/lib/racecar/runner.rb +79 -37
- data/lib/racecar/version.rb +3 -1
- data/lib/racecar.rb +36 -8
- data/racecar.gemspec +7 -4
- metadata +47 -25
- data/.github/workflows/rspec.yml +0 -24
data/examples/batch_consumer.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class BatchConsumer < Racecar::Consumer
|
2
4
|
subscribes_to "messages", start_from_beginning: false
|
3
5
|
|
4
|
-
def process_batch(
|
5
|
-
|
6
|
+
def process_batch(messages)
|
7
|
+
messages.each do |message|
|
6
8
|
puts message.value
|
7
9
|
end
|
8
10
|
end
|
data/examples/cat_consumer.rb
CHANGED
data/exe/racecar
CHANGED
@@ -3,19 +3,42 @@
|
|
3
3
|
require "racecar"
|
4
4
|
require "racecar/cli"
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
6
|
+
module Racecar
|
7
|
+
class << self
|
8
|
+
def start(argv)
|
9
|
+
Cli.main(argv)
|
10
|
+
rescue SignalException => e
|
11
|
+
# We might receive SIGTERM before our signal handler is installed.
|
12
|
+
if Signal.signame(e.signo) == "TERM"
|
13
|
+
exit(0)
|
14
|
+
else
|
15
|
+
raise
|
16
|
+
end
|
17
|
+
rescue SystemExit
|
18
|
+
raise
|
19
|
+
rescue Exception => e
|
20
|
+
$stderr.puts "=> Crashed: #{exception_with_causes(e)}\n#{e.backtrace.join("\n")}"
|
21
|
+
|
22
|
+
Racecar.config.error_handler.call(e)
|
23
|
+
|
24
|
+
exit(1)
|
25
|
+
else
|
26
|
+
exit(0)
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def exception_with_causes(e)
|
32
|
+
result = +"#{e.class}: #{e}"
|
33
|
+
if e.cause
|
34
|
+
result << "\n"
|
35
|
+
result << "--- Caused by: ---\n"
|
36
|
+
result << exception_with_causes(e.cause)
|
37
|
+
end
|
38
|
+
result
|
39
|
+
end
|
14
40
|
end
|
15
|
-
rescue
|
16
|
-
# Exceptions are printed to STDERR and sent to the error handler
|
17
|
-
# in `Racecar::Cli#run`, so we don't need to do anything here.
|
18
|
-
exit(1)
|
19
|
-
else
|
20
|
-
exit(0)
|
21
41
|
end
|
42
|
+
|
43
|
+
# Start your engines!
|
44
|
+
Racecar.start(ARGV)
|
@@ -0,0 +1 @@
|
|
1
|
+
{"title":"Racecar consumer groups","description":"Dashboard for monitoring [Racecar](https://github.com/zendesk/racecar) Kafka consumer groups.","widgets":[{"id":4916208698459109,"definition":{"title":"Single-message processing","reflow_type":"fixed","type":"group","layout_type":"ordered","widgets":[{"id":82605028,"definition":{"title":"95th percentile message processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.process_message.latency.95percentile{$group_id,$client,$topic,$partition,$env} by {topic,group_id}","style":{"palette":"dog_classic","line_type":"solid","line_width":"thin"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2857871641649870,"definition":{"title":"Max message processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.process_message.latency.max{$group_id,$client,$topic,$partition,$env} by {topic,group_id}","style":{"palette":"dog_classic","line_type":"solid","line_width":"thin"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":88579656,"definition":{"title":"Median message processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.process_message.latency.median{$group_id,$client,$topic,$partition,$env} by {topic}","style":{"palette":"dog_classic","line_type":"solid","line_width":"thin"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":8,"y":0,"width":4,"height":2}}]}},{"id":4068194420543030,"definition":{"title":"Batch processing","reflow_type":"fixed","type":"group","layout_type":"ordered","widgets":[{"id":341686567,"definition":{"title":"95th percentile batch processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.process_batch.latency.95percentile{$group_id,$client,$topic,$partition,$env} by {topic,group_id}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":341687897,"definition":{"title":"Median batch processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.process_batch.latency.median{$group_id,$client,$topic,$partition,$env} by {topic}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5352911818003929,"definition":{"title":"Max batch processing latency by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"max:racecar.consumer.process_batch.latency.max{$group_id,$client,$topic,$partition,$env} by {topic}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":1654098217056312,"definition":{"title":"Max message batch size","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"max:racecar.consumer.batch_size.max{$group_id,$client,$topic,$partition,$env} by {topic}","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":7718619791149134,"definition":{"title":"Average per-message latency in batch processing mode","show_legend":false,"legend_size":"0","legend_layout":"vertical","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"q":"max:racecar.consumer.process_batch.latency.avg{$group_id,$client,$topic,$partition,$env}/max:racecar.consumer.batch_size.avg{$group_id,$client,$topic,$partition,$env}","metadata":[{"expression":"max:racecar.consumer.process_batch.latency.avg{$env,$pod,$group_id,$client,$topic,$partition}/max:racecar.consumer.batch_size.avg{$env,$pod,$group_id,$client,$topic,$partition}","alias_name":"ms"}],"style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":4,"y":2,"width":4,"height":2}}]}},{"id":7110612496425151,"definition":{"title":"Throughput & Lag","reflow_type":"fixed","type":"group","layout_type":"ordered","widgets":[{"id":301212748,"definition":{"title":"Message lag changes","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"derivative(max:racecar.consumer.offset{$group_id,$client,$topic,$partition,$env} by {topic,partition,pod})","style":{"palette":"dog_classic","line_type":"solid","line_width":"thin"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":82604183,"definition":{"title":"Processing throughput by topic","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.messages{$group_id,$client,$topic,$partition,$env} by {topic,group_id}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5547724125706857,"definition":{"title":"Processing throughput by group","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.messages{$group_id,$client,$topic,$partition,$env} by {group_id}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":375397853,"definition":{"title":"Processing throughput by host","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.messages{$group_id,$client,$topic,$partition,$env} by {group_id,host}.as_rate()","style":{"palette":"dog_classic","line_type":"solid","line_width":"thin"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":7820607170949322,"definition":{"title":"Messages consumed in timeframe","type":"query_value","requests":[{"q":"sum:racecar.consumer.messages{$group_id,$client,$topic,$partition,$env}.as_count()","aggregator":"sum"}],"autoscale":true,"precision":0},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":1428183857213882,"definition":{"title":"Time lag (end-to-end latency)","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"avg:racecar.consumer.time_lag{$group_id,$client,$topic,$partition,$env} by {group_id,pod}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":8,"y":2,"width":4,"height":2}}]}},{"id":1487807434456879,"definition":{"title":"Processing Errors & Group Stability","reflow_type":"fixed","type":"group","layout_type":"ordered","widgets":[{"id":82605029,"definition":{"title":"Processing errors","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.process_batch.errors{$group_id,$client,$topic,$partition,$env} by {topic,pod,group_id,partition}.as_count()+sum:racecar.consumer.process_message.errors{$group_id,$client,$topic,$partition,$env} by {topic,pod,group_id,partition}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":83104736,"definition":{"title":"Processing error rate by topic (%)","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"(sum:racecar.consumer.process_message.errors{$group_id,$client,$topic,$partition,$env} by {topic}.as_count()/(sum:racecar.consumer.process_message.errors{$group_id,$client,$topic,$partition,$env} by {topic}.as_count()+sum:racecar.consumer.messages{$group_id,$client,$topic,$partition,$env} by {topic}.as_count()))*100","style":{"palette":"orange","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6572534533091871,"definition":{"title":"Processing errors in timeframe","type":"query_value","requests":[{"q":"sum:racecar.consumer.process_batch.errors{$topic,$client,$group_id,$env}.as_count()+sum:racecar.consumer.process_message.errors{$topic,$client,$group_id,$env}.as_count()","aggregator":"sum","conditional_formats":[{"comparator":">","palette":"white_on_red","value":0},{"comparator":"<=","palette":"white_on_green","value":0}]}],"autoscale":true,"precision":0},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":302705923,"definition":{"title":"Pause duration","show_legend":false,"legend_size":"0","legend_layout":"vertical","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"q":"avg:racecar.consumer.pause.duration{$client,$group_id,$topic,$env} by {pod,group_id,topic,partition}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":235544854,"definition":{"title":"Group joins","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.join_group.count{$group_id,$client,$env} by {group_id,host}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":235544862,"definition":{"title":"Group leaves","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.leave_group.count{$group_id,$client,$env} by {group_id,host}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":235545167,"definition":{"title":"Group syncs","show_legend":false,"legend_size":"0","type":"timeseries","requests":[{"q":"sum:racecar.consumer.sync_group.count{$group_id,$client,$env} by {group_id,host}.as_count()","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"}},"layout":{"x":0,"y":4,"width":4,"height":2}}]}},{"id":8013176155436939,"definition":{"title":"Producer & message delivery","reflow_type":"fixed","type":"group","layout_type":"ordered","widgets":[{"id":5948628389625057,"definition":{"title":"Message delivery latency (median)","title_size":"16","title_align":"left","show_legend":false,"type":"timeseries","requests":[{"q":"avg:racecar.producer.deliver.latency.median{$client,$env}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3158040379950811,"definition":{"title":"Producer buffer size (max)","title_size":"16","title_align":"left","show_legend":false,"type":"timeseries","requests":[{"q":"max:racecar.producer.buffer.size.max{$client,$env} by {topic}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6916375790222772,"definition":{"title":"Producer buffer size (avg) kp","title_size":"16","title_align":"left","show_legend":false,"legend_layout":"vertical","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"q":"avg:racecar.producer.buffer.size.avg{$client,$env} by {topic,host}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":3160735194874896,"definition":{"title":"Message size (95p)","title_size":"16","title_align":"left","show_legend":false,"legend_layout":"vertical","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"q":"avg:racecar.producer.produce.message_size.95percentile{$env} by {topic}","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"scale":"linear","label":"","include_zero":true,"min":"auto","max":"auto"},"markers":[]},"layout":{"x":0,"y":2,"width":4,"height":2}}]}}],"template_variables":[{"name":"env","default":"production","prefix":"env"},{"name":"group_id","default":"*","prefix":"group_id"},{"name":"client","default":"*","prefix":"client"},{"name":"topic","default":"*","prefix":"topic"},{"name":"partition","default":"*","prefix":"partition"}],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"ywc-z36-g29"}
|
data/lib/ensure_hash_compact.rb
CHANGED
data/lib/racecar/cli.rb
CHANGED
@@ -1,23 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "optparse"
|
2
4
|
require "logger"
|
3
5
|
require "fileutils"
|
4
6
|
require "racecar/rails_config_file_loader"
|
5
7
|
require "racecar/daemon"
|
8
|
+
require "racecar/liveness_probe"
|
6
9
|
|
7
10
|
module Racecar
|
8
11
|
class Cli
|
9
|
-
|
10
|
-
|
12
|
+
class << self
|
13
|
+
def main(args)
|
14
|
+
new(args).run
|
15
|
+
end
|
11
16
|
end
|
12
17
|
|
13
18
|
def initialize(args)
|
14
19
|
@parser = build_parser
|
15
20
|
@parser.parse!(args)
|
16
21
|
@consumer_name = args.first or raise Racecar::Error, "no consumer specified"
|
17
|
-
|
18
|
-
|
19
|
-
def config
|
20
|
-
Racecar.config
|
22
|
+
@runner = nil
|
21
23
|
end
|
22
24
|
|
23
25
|
def run
|
@@ -58,21 +60,29 @@ module Racecar
|
|
58
60
|
$stderr.puts "=> Ctrl-C to shutdown consumer"
|
59
61
|
end
|
60
62
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
$stderr.puts "=> Crashed: #{e.class}: #{e}\n#{e.backtrace.join("\n")}"
|
63
|
+
if config.liveness_probe_enabled
|
64
|
+
$stderr.puts "=> Liveness probe enabled"
|
65
|
+
config.install_liveness_probe
|
66
|
+
end
|
66
67
|
|
67
|
-
|
68
|
+
processor = consumer_class.new
|
69
|
+
@runner = Racecar.runner(processor)
|
70
|
+
@runner.run
|
71
|
+
nil
|
72
|
+
end
|
68
73
|
|
69
|
-
|
74
|
+
def stop
|
75
|
+
@runner.stop
|
70
76
|
end
|
71
77
|
|
72
78
|
private
|
73
79
|
|
74
80
|
attr_reader :consumer_name
|
75
81
|
|
82
|
+
def config
|
83
|
+
Racecar.config
|
84
|
+
end
|
85
|
+
|
76
86
|
def daemonize!
|
77
87
|
daemon = Daemon.new(File.expand_path(config.pidfile))
|
78
88
|
|
@@ -102,12 +112,7 @@ module Racecar
|
|
102
112
|
opts.on("-r", "--require STRING", "Require a library before starting the consumer") do |lib|
|
103
113
|
$LOAD_PATH.unshift(Dir.pwd) unless load_path_modified
|
104
114
|
load_path_modified = true
|
105
|
-
|
106
|
-
require lib
|
107
|
-
rescue => e
|
108
|
-
$stderr.puts "=> #{lib} failed to load: #{e.message}"
|
109
|
-
exit
|
110
|
-
end
|
115
|
+
require lib
|
111
116
|
end
|
112
117
|
|
113
118
|
opts.on("-l", "--log STRING", "Log to the specified file") do |logfile|
|
@@ -115,13 +120,13 @@ module Racecar
|
|
115
120
|
end
|
116
121
|
|
117
122
|
Racecar::Config.variables.each do |variable|
|
118
|
-
opt_name = "
|
123
|
+
opt_name = +"--#{variable.name.to_s.gsub('_', '-')}"
|
119
124
|
opt_name << " #{variable.type.upcase}" unless variable.boolean?
|
120
125
|
|
121
126
|
desc = variable.description || "N/A"
|
122
127
|
|
123
128
|
if variable.default
|
124
|
-
desc
|
129
|
+
desc += " (default: #{variable.default.inspect})"
|
125
130
|
end
|
126
131
|
|
127
132
|
opts.on(opt_name, desc) do |value|
|
data/lib/racecar/config.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "tmpdir"
|
4
|
+
|
1
5
|
require "king_konf"
|
2
6
|
|
7
|
+
require "racecar/liveness_probe"
|
8
|
+
require "racecar/instrumenter"
|
9
|
+
require "racecar/rebalance_listener"
|
10
|
+
|
3
11
|
module Racecar
|
4
12
|
class Config < KingKonf::Config
|
5
13
|
env_prefix :racecar
|
6
14
|
|
15
|
+
STATISTICS_DISABLED_VALUE = 0
|
16
|
+
|
7
17
|
desc "A list of Kafka brokers in the cluster that you're consuming from"
|
8
18
|
list :brokers, default: ["localhost:9092"]
|
9
19
|
|
@@ -19,6 +29,9 @@ module Racecar
|
|
19
29
|
desc "The minimum number of messages in the local consumer queue"
|
20
30
|
integer :min_message_queue_size, default: 2000
|
21
31
|
|
32
|
+
desc "Which partition assignment strategy to use, range, roundrobin or cooperative-sticky. -- https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
|
33
|
+
string :partition_assignment_strategy, default: "range,roundrobin"
|
34
|
+
|
22
35
|
desc "Kafka consumer configuration options, separated with '=' -- https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md"
|
23
36
|
list :consumer, default: []
|
24
37
|
|
@@ -52,9 +65,12 @@ module Racecar
|
|
52
65
|
desc "How long to wait when trying to communicate with a Kafka broker"
|
53
66
|
float :socket_timeout, default: 30
|
54
67
|
|
55
|
-
desc "How long to allow the Kafka brokers to wait before returning messages"
|
68
|
+
desc "How long to allow the Kafka brokers to wait before returning messages (in seconds)"
|
56
69
|
float :max_wait_time, default: 1
|
57
70
|
|
71
|
+
desc "How long to try to deliver a produced message before finally giving up (in seconds)"
|
72
|
+
float :message_timeout, default: 5*60
|
73
|
+
|
58
74
|
desc "Maximum amount of data the broker shall return for a Fetch request"
|
59
75
|
integer :max_bytes, default: 10485760
|
60
76
|
|
@@ -70,6 +86,9 @@ module Racecar
|
|
70
86
|
desc "The log level for the Racecar logs"
|
71
87
|
string :log_level, default: "info"
|
72
88
|
|
89
|
+
desc "The strategy used to determine which topic partition a message is written to when Racecar produces a value to Kafka; defaults to `consistent_random`"
|
90
|
+
symbol :partitioner, allowed_values: %i{consistent consistent_random murmur2 murmur2_random fnv1a fnv1a_random}, default: :consistent_random
|
91
|
+
|
73
92
|
desc "Protocol used to communicate with brokers"
|
74
93
|
symbol :security_protocol, allowed_values: %i{plaintext ssl sasl_plaintext sasl_ssl}
|
75
94
|
|
@@ -151,10 +170,33 @@ module Racecar
|
|
151
170
|
desc "Whether to boot Rails when starting the consumer"
|
152
171
|
boolean :without_rails, default: false
|
153
172
|
|
173
|
+
desc "How frequently librdkafka should report statistics to your application (in seconds). A statistics callback
|
174
|
+
must also be provided. This should be defined with a `statistics_callback` method on your processor. Stats
|
175
|
+
are disabled if this value is set to 0, or there is no callback defined. This is set by default to 1 second
|
176
|
+
for backward compatibility, however this can be quite memory intensive"
|
177
|
+
integer :statistics_interval, default: 1
|
178
|
+
|
179
|
+
desc "Whether to enable liveness probe behavior (touch the file)"
|
180
|
+
boolean :liveness_probe_enabled, default: false
|
181
|
+
|
182
|
+
desc "Path to a file Racecar will touch to show liveness"
|
183
|
+
string :liveness_probe_file_path, default: "#{Dir.tmpdir}/racecar-liveness"
|
184
|
+
|
185
|
+
desc "Used only by the liveness probe: Max time (in seconds) between liveness events before the process is considered not healthy"
|
186
|
+
integer :liveness_probe_max_interval, default: 5
|
187
|
+
|
154
188
|
# The error handler must be set directly on the object.
|
155
189
|
attr_reader :error_handler
|
156
190
|
|
157
|
-
attr_accessor :subscriptions, :logger
|
191
|
+
attr_accessor :subscriptions, :logger, :parallel_workers
|
192
|
+
|
193
|
+
def statistics_interval_ms
|
194
|
+
if Rdkafka::Config.statistics_callback
|
195
|
+
statistics_interval * 1000
|
196
|
+
else
|
197
|
+
STATISTICS_DISABLED_VALUE
|
198
|
+
end
|
199
|
+
end
|
158
200
|
|
159
201
|
def max_wait_time_ms
|
160
202
|
max_wait_time * 1000
|
@@ -189,6 +231,7 @@ module Racecar
|
|
189
231
|
end
|
190
232
|
|
191
233
|
def load_consumer_class(consumer_class)
|
234
|
+
self.consumer_class = consumer_class
|
192
235
|
self.group_id = consumer_class.group_id || self.group_id
|
193
236
|
|
194
237
|
self.group_id ||= [
|
@@ -196,13 +239,16 @@ module Racecar
|
|
196
239
|
group_id_prefix,
|
197
240
|
|
198
241
|
# MyFunnyConsumer => my-funny-consumer
|
199
|
-
consumer_class.name.gsub(/[a-z][A-Z]/) {|str| str[0]
|
200
|
-
].compact.join
|
242
|
+
consumer_class.name.gsub(/[a-z][A-Z]/) { |str| "#{str[0]}-#{str[1]}" }.downcase,
|
243
|
+
].compact.join
|
201
244
|
|
245
|
+
self.parallel_workers = consumer_class.parallel_workers
|
202
246
|
self.subscriptions = consumer_class.subscriptions
|
203
247
|
self.max_wait_time = consumer_class.max_wait_time || self.max_wait_time
|
248
|
+
self.fetch_messages = consumer_class.fetch_messages || self.fetch_messages
|
204
249
|
self.pidfile ||= "#{group_id}.pid"
|
205
250
|
end
|
251
|
+
attr_accessor :consumer_class
|
206
252
|
|
207
253
|
def on_error(&handler)
|
208
254
|
@error_handler = handler
|
@@ -224,11 +270,41 @@ module Racecar
|
|
224
270
|
producer_config
|
225
271
|
end
|
226
272
|
|
273
|
+
def instrumenter
|
274
|
+
@instrumenter ||= begin
|
275
|
+
default_payload = { client_id: client_id, group_id: group_id }
|
276
|
+
|
277
|
+
if defined?(ActiveSupport::Notifications)
|
278
|
+
# ActiveSupport needs `concurrent-ruby` but doesn't `require` it.
|
279
|
+
require 'concurrent/utility/monotonic_time'
|
280
|
+
Instrumenter.new(backend: ActiveSupport::Notifications, default_payload: default_payload)
|
281
|
+
else
|
282
|
+
logger.warn "ActiveSupport::Notifications not available, instrumentation is disabled"
|
283
|
+
NullInstrumenter
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
attr_writer :instrumenter
|
288
|
+
|
289
|
+
def install_liveness_probe
|
290
|
+
liveness_probe.tap(&:install)
|
291
|
+
end
|
292
|
+
|
293
|
+
def liveness_probe
|
294
|
+
require "active_support/notifications"
|
295
|
+
@liveness_probe ||= LivenessProbe.new(
|
296
|
+
ActiveSupport::Notifications,
|
297
|
+
liveness_probe_file_path,
|
298
|
+
liveness_probe_max_interval
|
299
|
+
)
|
300
|
+
end
|
301
|
+
|
227
302
|
private
|
228
303
|
|
229
304
|
def rdkafka_security_config
|
230
305
|
{
|
231
306
|
"security.protocol" => security_protocol,
|
307
|
+
"enable.ssl.certificate.verification" => ssl_verify_hostname,
|
232
308
|
"ssl.ca.location" => ssl_ca_location,
|
233
309
|
"ssl.crl.location" => ssl_crl_location,
|
234
310
|
"ssl.keystore.location" => ssl_keystore_location,
|
data/lib/racecar/consumer.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "racecar/message_delivery_error"
|
4
|
+
|
1
5
|
module Racecar
|
2
6
|
class Consumer
|
3
7
|
Subscription = Struct.new(:topic, :start_from_beginning, :max_bytes_per_partition, :additional_config)
|
@@ -5,7 +9,7 @@ module Racecar
|
|
5
9
|
class << self
|
6
10
|
attr_accessor :max_wait_time
|
7
11
|
attr_accessor :group_id
|
8
|
-
attr_accessor :producer, :consumer
|
12
|
+
attr_accessor :producer, :consumer, :parallel_workers, :fetch_messages
|
9
13
|
|
10
14
|
def subscriptions
|
11
15
|
@subscriptions ||= []
|
@@ -23,29 +27,68 @@ module Racecar
|
|
23
27
|
# @param additional_config [Hash] Configuration properties for consumer.
|
24
28
|
# See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
25
29
|
# @return [nil]
|
26
|
-
def subscribes_to(
|
30
|
+
def subscribes_to(
|
31
|
+
*topics,
|
32
|
+
start_from_beginning: true,
|
33
|
+
max_bytes_per_partition: 1048576,
|
34
|
+
additional_config: {}
|
35
|
+
)
|
27
36
|
topics.each do |topic|
|
28
37
|
subscriptions << Subscription.new(topic, start_from_beginning, max_bytes_per_partition, additional_config)
|
29
38
|
end
|
30
39
|
end
|
40
|
+
|
41
|
+
# Rebalance hooks for subclasses to override
|
42
|
+
def on_partitions_assigned(rebalance_event); end
|
43
|
+
def on_partitions_revoked(rebalance_event); end
|
31
44
|
end
|
32
45
|
|
33
|
-
def configure(producer:, consumer:, instrumenter: NullInstrumenter)
|
46
|
+
def configure(producer:, consumer:, instrumenter: NullInstrumenter, config: Racecar.config)
|
34
47
|
@producer = producer
|
48
|
+
@delivery_handles = []
|
49
|
+
|
35
50
|
@consumer = consumer
|
51
|
+
|
36
52
|
@instrumenter = instrumenter
|
53
|
+
@config = config
|
37
54
|
end
|
38
55
|
|
39
56
|
def teardown; end
|
40
57
|
|
41
|
-
#
|
58
|
+
# Blocks until all messages produced so far have been successfully published. If
|
59
|
+
# message delivery finally fails, a Racecar::MessageDeliveryError is raised. The
|
60
|
+
# delivery failed for the reason in the exception. The error can be broker side
|
61
|
+
# (e.g. downtime, configuration issue) or specific to the message being sent. The
|
62
|
+
# caller must handle the latter cases or run into head of line blocking.
|
42
63
|
def deliver!
|
43
64
|
@delivery_handles ||= []
|
44
65
|
if @delivery_handles.any?
|
45
66
|
instrumentation_payload = { delivered_message_count: @delivery_handles.size }
|
46
67
|
|
47
68
|
@instrumenter.instrument('deliver_messages', instrumentation_payload) do
|
48
|
-
@delivery_handles.each
|
69
|
+
@delivery_handles.each do |handle|
|
70
|
+
begin
|
71
|
+
# rdkafka-ruby checks every wait_timeout seconds if the message was
|
72
|
+
# successfully delivered, up to max_wait_timeout seconds before raising
|
73
|
+
# Rdkafka::AbstractHandle::WaitTimeoutError. librdkafka will (re)try to
|
74
|
+
# deliver all messages in the background, until "config.message_timeout"
|
75
|
+
# (message.timeout.ms) is exceeded. Phrased differently, rdkafka-ruby's
|
76
|
+
# WaitTimeoutError is just informative.
|
77
|
+
# The raising can be avoided if max_wait_timeout below is greater than
|
78
|
+
# config.message_timeout, but config is not available here (without
|
79
|
+
# changing the interface).
|
80
|
+
handle.wait(max_wait_timeout: 60, wait_timeout: 0.1)
|
81
|
+
rescue Rdkafka::AbstractHandle::WaitTimeoutError => e
|
82
|
+
partition = MessageDeliveryError.partition_from_delivery_handle(handle)
|
83
|
+
# ideally we could use the logger passed to the Runner, but it is not
|
84
|
+
# available here. The runner sets it for Rdkafka, though, so we can use
|
85
|
+
# that instead.
|
86
|
+
@config.logger.debug "Still trying to deliver message to (partition #{partition})... (will try up to Racecar.config.message_timeout)"
|
87
|
+
retry
|
88
|
+
rescue Rdkafka::RdkafkaError => e
|
89
|
+
raise MessageDeliveryError.new(e, handle)
|
90
|
+
end
|
91
|
+
end
|
49
92
|
end
|
50
93
|
end
|
51
94
|
@delivery_handles.clear
|
@@ -54,13 +97,14 @@ module Racecar
|
|
54
97
|
protected
|
55
98
|
|
56
99
|
# https://github.com/appsignal/rdkafka-ruby#producing-messages
|
57
|
-
def produce(payload, topic:, key
|
100
|
+
def produce(payload, topic:, key: nil, partition: nil, partition_key: nil, headers: nil, create_time: nil)
|
58
101
|
@delivery_handles ||= []
|
59
102
|
message_size = payload.respond_to?(:bytesize) ? payload.bytesize : 0
|
60
103
|
instrumentation_payload = {
|
61
104
|
value: payload,
|
62
105
|
headers: headers,
|
63
106
|
key: key,
|
107
|
+
partition: partition,
|
64
108
|
partition_key: partition_key,
|
65
109
|
topic: topic,
|
66
110
|
message_size: message_size,
|
@@ -73,6 +117,7 @@ module Racecar
|
|
73
117
|
topic: topic,
|
74
118
|
payload: payload,
|
75
119
|
key: key,
|
120
|
+
partition: partition,
|
76
121
|
partition_key: partition_key,
|
77
122
|
timestamp: create_time,
|
78
123
|
headers: headers,
|