karafka 2.0.14 → 2.0.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +1 -1
- data/.rspec +2 -0
- data/CHANGELOG.md +84 -0
- data/Gemfile.lock +14 -14
- data/LICENSE +1 -1
- data/README.md +2 -1
- data/bin/integrations +3 -2
- data/bin/rspecs +4 -0
- data/config/errors.yml +10 -4
- data/lib/active_job/karafka.rb +0 -6
- data/lib/karafka/active_job/consumer.rb +1 -0
- data/lib/karafka/admin.rb +6 -3
- data/lib/karafka/base_consumer.rb +31 -21
- data/lib/karafka/connection/client.rb +2 -4
- data/lib/karafka/connection/listener.rb +6 -4
- data/lib/karafka/contracts/consumer_group.rb +0 -14
- data/lib/karafka/contracts/{consumer_group_topic.rb → topic.rb} +2 -3
- data/lib/karafka/errors.rb +6 -4
- data/lib/karafka/instrumentation/logger_listener.rb +25 -11
- data/lib/karafka/instrumentation/notifications.rb +2 -0
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -1
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +59 -32
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +153 -0
- data/lib/karafka/pro/active_job/consumer.rb +3 -1
- data/lib/karafka/pro/active_job/dispatcher.rb +3 -1
- data/lib/karafka/pro/active_job/job_options_contract.rb +3 -1
- data/lib/karafka/pro/base_consumer.rb +3 -85
- data/lib/karafka/pro/loader.rb +31 -24
- data/lib/karafka/pro/performance_tracker.rb +3 -1
- data/lib/karafka/pro/processing/coordinator.rb +16 -1
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +3 -1
- data/lib/karafka/pro/processing/jobs_builder.rb +3 -1
- data/lib/karafka/pro/processing/partitioner.rb +14 -17
- data/lib/karafka/pro/processing/scheduler.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +40 -0
- data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +62 -0
- data/lib/karafka/pro/processing/strategies/aj_lrj_mom.rb +35 -0
- data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +69 -0
- data/lib/karafka/pro/processing/strategies/aj_mom.rb +33 -0
- data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +58 -0
- data/lib/karafka/pro/processing/strategies/base.rb +26 -0
- data/lib/karafka/pro/processing/strategies/default.rb +69 -0
- data/lib/karafka/pro/processing/strategies/dlq.rb +88 -0
- data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +64 -0
- data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +60 -0
- data/lib/karafka/pro/processing/strategies/dlq_mom.rb +58 -0
- data/lib/karafka/pro/processing/strategies/lrj.rb +76 -0
- data/lib/karafka/pro/processing/strategies/lrj_mom.rb +68 -0
- data/lib/karafka/pro/processing/strategies/lrj_vp.rb +33 -0
- data/lib/karafka/pro/processing/strategies/mom.rb +43 -0
- data/lib/karafka/pro/processing/strategies/vp.rb +32 -0
- data/lib/karafka/pro/processing/strategy_selector.rb +58 -0
- data/lib/karafka/pro/{contracts → routing/features}/base.rb +8 -5
- data/lib/karafka/pro/routing/features/dead_letter_queue/contract.rb +49 -0
- data/lib/karafka/pro/routing/{builder_extensions.rb → features/dead_letter_queue.rb} +9 -12
- data/lib/karafka/pro/routing/features/long_running_job/config.rb +28 -0
- data/lib/karafka/pro/routing/features/long_running_job/contract.rb +37 -0
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/long_running_job.rb +28 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +30 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +69 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +56 -0
- data/lib/karafka/pro/routing/features/virtual_partitions.rb +27 -0
- data/lib/karafka/processing/coordinator.rb +1 -1
- data/lib/karafka/processing/executor.rb +6 -0
- data/lib/karafka/processing/strategies/aj_dlq_mom.rb +44 -0
- data/lib/karafka/processing/strategies/aj_mom.rb +21 -0
- data/lib/karafka/processing/strategies/base.rb +37 -0
- data/lib/karafka/processing/strategies/default.rb +52 -0
- data/lib/karafka/processing/strategies/dlq.rb +77 -0
- data/lib/karafka/processing/strategies/dlq_mom.rb +42 -0
- data/lib/karafka/processing/strategies/mom.rb +29 -0
- data/lib/karafka/processing/strategy_selector.rb +30 -0
- data/lib/karafka/railtie.rb +9 -8
- data/lib/karafka/routing/builder.rb +6 -0
- data/lib/karafka/routing/features/active_job/builder.rb +33 -0
- data/lib/karafka/routing/features/active_job/config.rb +15 -0
- data/lib/karafka/routing/features/active_job/contract.rb +41 -0
- data/lib/karafka/routing/features/active_job/topic.rb +33 -0
- data/lib/karafka/routing/features/active_job.rb +13 -0
- data/lib/karafka/routing/features/base/expander.rb +53 -0
- data/lib/karafka/routing/features/base.rb +34 -0
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +19 -0
- data/lib/karafka/routing/features/dead_letter_queue/contract.rb +40 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +40 -0
- data/lib/karafka/routing/features/dead_letter_queue.rb +16 -0
- data/lib/karafka/routing/features/manual_offset_management/config.rb +15 -0
- data/lib/karafka/routing/features/manual_offset_management/contract.rb +24 -0
- data/lib/karafka/routing/features/manual_offset_management/topic.rb +35 -0
- data/lib/karafka/routing/features/manual_offset_management.rb +18 -0
- data/lib/karafka/routing/topic.rb +2 -10
- data/lib/karafka/server.rb +4 -2
- data/lib/karafka/setup/attributes_map.rb +5 -0
- data/lib/karafka/setup/config.rb +4 -4
- data/lib/karafka/time_trackers/pause.rb +21 -12
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +7 -11
- data.tar.gz.sig +0 -0
- metadata +57 -9
- metadata.gz.sig +0 -0
- data/lib/karafka/active_job/routing/extensions.rb +0 -33
- data/lib/karafka/pro/contracts/consumer_group.rb +0 -34
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +0 -69
- data/lib/karafka/pro/routing/topic_extensions.rb +0 -74
@@ -1 +1 @@
|
|
1
|
-
{"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":5}},{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":5,"width":12,"height":3}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.95percentile{*}"},{"name":"query2","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.max{*}"},{"name":"query3","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.avg{*}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":7,"is_column_break":true}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
|
1
|
+
{"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"queries":[{"query":"sum:karafka.connection.connects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"queries":[{"query":"sum:karafka.error_occurred{*} by {type,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.revoked{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":5}},{"id":7288186528768428,"definition":{"title":"Topics overview","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":1533435157804573,"definition":{"title":"Topics lags","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages lag per topic partition","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":6,"height":2}},{"id":1411506453982604,"definition":{"title":"Topics lag trends","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic partition consumption trend","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags_delta{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":6,"y":0,"width":6,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3,"is_column_break":true}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic batches processed","formula":"query1"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"queries":[{"query":"max:karafka.consumer.consumption_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Lag in ms (p95)","formula":"query1"},{"alias":"Lag in ms (max)","formula":"query2"},{"alias":"Lag in ms (avg)","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 time","formula":"query1"},{"alias":"max time","formula":"query2"},{"alias":"avg time","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms","formula":"query1"},{"alias":"max ms","formula":"query2"},{"alias":"average ms","formula":"query3"}],"queries":[{"query":"avg:karafka.listener.polling.time_taken.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"avg:karafka.listener.polling.time_taken.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"avg:karafka.listener.polling.time_taken.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":7}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
|
@@ -46,7 +46,11 @@ module Karafka
|
|
46
46
|
RdKafkaMetric.new(:count, :brokers, 'connection.disconnects', 'disconnects_d'),
|
47
47
|
RdKafkaMetric.new(:gauge, :brokers, 'network.latency.avg', %w[rtt avg]),
|
48
48
|
RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p95', %w[rtt p95]),
|
49
|
-
RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99])
|
49
|
+
RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99]),
|
50
|
+
|
51
|
+
# Topics metrics
|
52
|
+
RdKafkaMetric.new(:gauge, :topics, 'consumer.lags', 'consumer_lag_stored'),
|
53
|
+
RdKafkaMetric.new(:gauge, :topics, 'consumer.lags_delta', 'consumer_lag_stored_d')
|
50
54
|
].freeze
|
51
55
|
|
52
56
|
configure
|
@@ -68,9 +72,12 @@ module Karafka
|
|
68
72
|
# @param event [Karafka::Core::Monitoring::Event]
|
69
73
|
def on_statistics_emitted(event)
|
70
74
|
statistics = event[:statistics]
|
75
|
+
consumer_group_id = event[:consumer_group_id]
|
76
|
+
|
77
|
+
base_tags = default_tags + ["consumer_group:#{consumer_group_id}"]
|
71
78
|
|
72
79
|
rd_kafka_metrics.each do |metric|
|
73
|
-
report_metric(metric, statistics)
|
80
|
+
report_metric(metric, statistics, base_tags)
|
74
81
|
end
|
75
82
|
end
|
76
83
|
|
@@ -81,12 +88,7 @@ module Karafka
|
|
81
88
|
extra_tags = ["type:#{event[:type]}"]
|
82
89
|
|
83
90
|
if event.payload[:caller].respond_to?(:messages)
|
84
|
-
|
85
|
-
|
86
|
-
extra_tags += [
|
87
|
-
"topic:#{metadata.topic}",
|
88
|
-
"partition:#{metadata.partition}"
|
89
|
-
]
|
91
|
+
extra_tags += consumer_tags(event.payload[:caller])
|
90
92
|
end
|
91
93
|
|
92
94
|
count('error_occurred', 1, tags: default_tags + extra_tags)
|
@@ -99,21 +101,23 @@ module Karafka
|
|
99
101
|
time_taken = event[:time]
|
100
102
|
messages_count = event[:messages_buffer].size
|
101
103
|
|
102
|
-
|
103
|
-
|
104
|
+
consumer_group_id = event[:subscription_group].consumer_group_id
|
105
|
+
|
106
|
+
extra_tags = ["consumer_group:#{consumer_group_id}"]
|
107
|
+
|
108
|
+
histogram('listener.polling.time_taken', time_taken, tags: default_tags + extra_tags)
|
109
|
+
histogram('listener.polling.messages', messages_count, tags: default_tags + extra_tags)
|
104
110
|
end
|
105
111
|
|
106
112
|
# Here we report majority of things related to processing as we have access to the
|
107
113
|
# consumer
|
108
114
|
# @param event [Karafka::Core::Monitoring::Event]
|
109
115
|
def on_consumer_consumed(event)
|
110
|
-
|
116
|
+
consumer = event.payload[:caller]
|
117
|
+
messages = consumer.messages
|
111
118
|
metadata = messages.metadata
|
112
119
|
|
113
|
-
tags = default_tags +
|
114
|
-
"topic:#{metadata.topic}",
|
115
|
-
"partition:#{metadata.partition}"
|
116
|
-
]
|
120
|
+
tags = default_tags + consumer_tags(consumer)
|
117
121
|
|
118
122
|
count('consumer.messages', messages.count, tags: tags)
|
119
123
|
count('consumer.batches', 1, tags: tags)
|
@@ -126,26 +130,14 @@ module Karafka
|
|
126
130
|
|
127
131
|
# @param event [Karafka::Core::Monitoring::Event]
|
128
132
|
def on_consumer_revoked(event)
|
129
|
-
|
130
|
-
metadata = messages.metadata
|
131
|
-
|
132
|
-
tags = default_tags + [
|
133
|
-
"topic:#{metadata.topic}",
|
134
|
-
"partition:#{metadata.partition}"
|
135
|
-
]
|
133
|
+
tags = default_tags + consumer_tags(event.payload[:caller])
|
136
134
|
|
137
135
|
count('consumer.revoked', 1, tags: tags)
|
138
136
|
end
|
139
137
|
|
140
138
|
# @param event [Karafka::Core::Monitoring::Event]
|
141
139
|
def on_consumer_shutdown(event)
|
142
|
-
|
143
|
-
metadata = messages.metadata
|
144
|
-
|
145
|
-
tags = default_tags + [
|
146
|
-
"topic:#{metadata.topic}",
|
147
|
-
"partition:#{metadata.partition}"
|
148
|
-
]
|
140
|
+
tags = default_tags + consumer_tags(event.payload[:caller])
|
149
141
|
|
150
142
|
count('consumer.shutdown', 1, tags: tags)
|
151
143
|
end
|
@@ -198,14 +190,15 @@ module Karafka
|
|
198
190
|
# Reports a given metric statistics to Datadog
|
199
191
|
# @param metric [RdKafkaMetric] metric value object
|
200
192
|
# @param statistics [Hash] hash with all the statistics emitted
|
201
|
-
|
193
|
+
# @param base_tags [Array<String>] base tags we want to start with
|
194
|
+
def report_metric(metric, statistics, base_tags)
|
202
195
|
case metric.scope
|
203
196
|
when :root
|
204
197
|
public_send(
|
205
198
|
metric.type,
|
206
199
|
metric.name,
|
207
200
|
statistics.fetch(*metric.key_location),
|
208
|
-
tags:
|
201
|
+
tags: base_tags
|
209
202
|
)
|
210
203
|
when :brokers
|
211
204
|
statistics.fetch('brokers').each_value do |broker_statistics|
|
@@ -218,13 +211,47 @@ module Karafka
|
|
218
211
|
metric.type,
|
219
212
|
metric.name,
|
220
213
|
broker_statistics.dig(*metric.key_location),
|
221
|
-
tags:
|
214
|
+
tags: base_tags + ["broker:#{broker_statistics['nodename']}"]
|
222
215
|
)
|
223
216
|
end
|
217
|
+
when :topics
|
218
|
+
statistics.fetch('topics').each do |topic_name, topic_values|
|
219
|
+
topic_values['partitions'].each do |partition_name, partition_statistics|
|
220
|
+
next if partition_name == '-1'
|
221
|
+
# Skip until lag info is available
|
222
|
+
next if partition_statistics['consumer_lag'] == -1
|
223
|
+
|
224
|
+
public_send(
|
225
|
+
metric.type,
|
226
|
+
metric.name,
|
227
|
+
partition_statistics.dig(*metric.key_location),
|
228
|
+
tags: base_tags + [
|
229
|
+
"topic:#{topic_name}",
|
230
|
+
"partition:#{partition_name}"
|
231
|
+
]
|
232
|
+
)
|
233
|
+
end
|
234
|
+
end
|
224
235
|
else
|
225
236
|
raise ArgumentError, metric.scope
|
226
237
|
end
|
227
238
|
end
|
239
|
+
|
240
|
+
# Builds basic per consumer tags for publication
|
241
|
+
#
|
242
|
+
# @param consumer [Karafka::BaseConsumer]
|
243
|
+
# @return [Array<String>]
|
244
|
+
def consumer_tags(consumer)
|
245
|
+
messages = consumer.messages
|
246
|
+
metadata = messages.metadata
|
247
|
+
consumer_group_id = consumer.topic.consumer_group.id
|
248
|
+
|
249
|
+
[
|
250
|
+
"topic:#{metadata.topic}",
|
251
|
+
"partition:#{metadata.partition}",
|
252
|
+
"consumer_group:#{consumer_group_id}"
|
253
|
+
]
|
254
|
+
end
|
228
255
|
end
|
229
256
|
end
|
230
257
|
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Instrumentation
|
5
|
+
# Namespace for vendor specific instrumentation
|
6
|
+
module Vendors
|
7
|
+
# Datadog specific instrumentation
|
8
|
+
module Datadog
|
9
|
+
# A karafka's logger listener for Datadog
|
10
|
+
# It depends on the 'ddtrace' gem
|
11
|
+
class LoggerListener
|
12
|
+
include ::Karafka::Core::Configurable
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
def_delegators :config, :client
|
16
|
+
|
17
|
+
# `Datadog::Tracing` client that we should use to trace stuff
|
18
|
+
setting :client
|
19
|
+
|
20
|
+
configure
|
21
|
+
|
22
|
+
# Log levels that we use in this particular listener
|
23
|
+
USED_LOG_LEVELS = %i[
|
24
|
+
info
|
25
|
+
error
|
26
|
+
fatal
|
27
|
+
].freeze
|
28
|
+
|
29
|
+
private_constant :USED_LOG_LEVELS
|
30
|
+
|
31
|
+
# @param block [Proc] configuration block
|
32
|
+
def initialize(&block)
|
33
|
+
configure
|
34
|
+
setup(&block) if block
|
35
|
+
end
|
36
|
+
|
37
|
+
# @param block [Proc] configuration block
|
38
|
+
# @note We define this alias to be consistent with `WaterDrop#setup`
|
39
|
+
def setup(&block)
|
40
|
+
configure(&block)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Prints info about the fact that a given job has started
|
44
|
+
#
|
45
|
+
# @param event [Dry::Events::Event] event details including payload
|
46
|
+
def on_worker_process(event)
|
47
|
+
current_span = client.trace('karafka.consumer')
|
48
|
+
push_tags
|
49
|
+
|
50
|
+
job = event[:job]
|
51
|
+
job_type = job.class.to_s.split('::').last
|
52
|
+
consumer = job.executor.topic.consumer
|
53
|
+
topic = job.executor.topic.name
|
54
|
+
|
55
|
+
current_span.resource = "#{consumer}#consume"
|
56
|
+
info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
|
57
|
+
|
58
|
+
pop_tags
|
59
|
+
end
|
60
|
+
|
61
|
+
# Prints info about the fact that a given job has finished
|
62
|
+
#
|
63
|
+
# @param event [Dry::Events::Event] event details including payload
|
64
|
+
def on_worker_processed(event)
|
65
|
+
push_tags
|
66
|
+
|
67
|
+
job = event[:job]
|
68
|
+
time = event[:time]
|
69
|
+
job_type = job.class.to_s.split('::').last
|
70
|
+
consumer = job.executor.topic.consumer
|
71
|
+
topic = job.executor.topic.name
|
72
|
+
|
73
|
+
info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
|
74
|
+
|
75
|
+
current_span = client.active_span
|
76
|
+
current_span.finish if current_span.present?
|
77
|
+
|
78
|
+
pop_tags
|
79
|
+
end
|
80
|
+
|
81
|
+
# There are many types of errors that can occur in many places, but we provide a single
|
82
|
+
# handler for all of them to simplify error instrumentation.
|
83
|
+
# @param event [Dry::Events::Event] event details including payload
|
84
|
+
def on_error_occurred(event)
|
85
|
+
push_tags
|
86
|
+
|
87
|
+
error = event[:error]
|
88
|
+
client.active_span&.set_error(error)
|
89
|
+
|
90
|
+
case event[:type]
|
91
|
+
when 'consumer.consume.error'
|
92
|
+
error "Consumer consuming error: #{error}"
|
93
|
+
when 'consumer.revoked.error'
|
94
|
+
error "Consumer on revoked failed due to an error: #{error}"
|
95
|
+
when 'consumer.before_enqueue.error'
|
96
|
+
error "Consumer before enqueue failed due to an error: #{error}"
|
97
|
+
when 'consumer.before_consume.error'
|
98
|
+
error "Consumer before consume failed due to an error: #{error}"
|
99
|
+
when 'consumer.after_consume.error'
|
100
|
+
error "Consumer after consume failed due to an error: #{error}"
|
101
|
+
when 'consumer.shutdown.error'
|
102
|
+
error "Consumer on shutdown failed due to an error: #{error}"
|
103
|
+
when 'worker.process.error'
|
104
|
+
fatal "Worker processing failed due to an error: #{error}"
|
105
|
+
when 'connection.listener.fetch_loop.error'
|
106
|
+
error "Listener fetch loop error: #{error}"
|
107
|
+
when 'runner.call.error'
|
108
|
+
fatal "Runner crashed due to an error: #{error}"
|
109
|
+
when 'app.stopping.error'
|
110
|
+
error 'Forceful Karafka server stop'
|
111
|
+
when 'librdkafka.error'
|
112
|
+
error "librdkafka internal error occurred: #{error}"
|
113
|
+
# Those will only occur when retries in the client fail and when they did not stop
|
114
|
+
# after back-offs
|
115
|
+
when 'connection.client.poll.error'
|
116
|
+
error "Data polling error occurred: #{error}"
|
117
|
+
else
|
118
|
+
pop_tags
|
119
|
+
# This should never happen. Please contact the maintainers
|
120
|
+
raise Errors::UnsupportedCaseError, event
|
121
|
+
end
|
122
|
+
|
123
|
+
pop_tags
|
124
|
+
end
|
125
|
+
|
126
|
+
USED_LOG_LEVELS.each do |log_level|
|
127
|
+
define_method log_level do |*args|
|
128
|
+
Karafka.logger.send(log_level, *args)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Pushes datadog's tags to the logger
|
133
|
+
# This is required when tracing log lines asynchronously to correlate logs of the same
|
134
|
+
# process together
|
135
|
+
def push_tags
|
136
|
+
return unless Karafka.logger.respond_to?(:push_tags)
|
137
|
+
|
138
|
+
Karafka.logger.push_tags(client.log_correlation)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Pops datadog's tags from the logger
|
142
|
+
# This is required when tracing log lines asynchronously to avoid the logs of the
|
143
|
+
# different processes to be correlated
|
144
|
+
def pop_tags
|
145
|
+
return unless Karafka.logger.respond_to?(:pop_tags)
|
146
|
+
|
147
|
+
Karafka.logger.pop_tags
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
7
|
# repository and their usage requires commercial license agreement.
|
6
8
|
#
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
7
|
# repository and their usage requires commercial license agreement.
|
6
8
|
#
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
7
|
# repository and their usage requires commercial license agreement.
|
6
8
|
#
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
7
|
# repository and their usage requires commercial license agreement.
|
6
8
|
#
|
@@ -18,90 +20,6 @@ module Karafka
|
|
18
20
|
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
19
21
|
# after each batch is processed.
|
20
22
|
class BaseConsumer < Karafka::BaseConsumer
|
21
|
-
# Pause for tops 31 years
|
22
|
-
MAX_PAUSE_TIME = 1_000_000_000_000
|
23
|
-
|
24
|
-
private_constant :MAX_PAUSE_TIME
|
25
|
-
|
26
|
-
# Pauses processing of a given partition until we're done with the processing.
|
27
|
-
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
28
|
-
# @note This needs to happen in the listener thread, because we cannot wait on this being
|
29
|
-
# executed in the workers. Workers may be already running some LRJ jobs that are blocking
|
30
|
-
# all the threads until finished, yet unless we pause the incoming partitions information,
|
31
|
-
# we may be kicked out of the consumer group due to not polling often enough
|
32
|
-
def on_before_enqueue
|
33
|
-
return unless topic.long_running_job?
|
34
|
-
|
35
|
-
# This ensures that when running LRJ with VP, things operate as expected run only once
|
36
|
-
# for all the virtual partitions collectively
|
37
|
-
coordinator.on_enqueued do
|
38
|
-
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
39
|
-
# any messages.
|
40
|
-
#
|
41
|
-
# For VP it applies the same way and since VP cannot be used with MOM we should not have
|
42
|
-
# any edge cases here.
|
43
|
-
pause(coordinator.seek_offset, MAX_PAUSE_TIME)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
# Runs extra logic after consumption that is related to handling long-running jobs
|
48
|
-
# @note This overwrites the '#on_after_consume' from the base consumer
|
49
|
-
def on_after_consume
|
50
|
-
coordinator.on_finished do |last_group_message|
|
51
|
-
on_after_consume_regular(last_group_message)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
# Trigger method for running on partition revocation.
|
56
|
-
#
|
57
|
-
# @private
|
58
|
-
def on_revoked
|
59
|
-
# We do not want to resume on revocation in case of a LRJ.
|
60
|
-
# For LRJ we resume after the successful processing or do a backoff pause in case of a
|
61
|
-
# failure. Double non-blocking resume could cause problems in coordination.
|
62
|
-
resume unless topic.long_running_job?
|
63
|
-
|
64
|
-
coordinator.revoke
|
65
|
-
|
66
|
-
Karafka.monitor.instrument('consumer.revoked', caller: self) do
|
67
|
-
revoked
|
68
|
-
end
|
69
|
-
rescue StandardError => e
|
70
|
-
Karafka.monitor.instrument(
|
71
|
-
'error.occurred',
|
72
|
-
error: e,
|
73
|
-
caller: self,
|
74
|
-
type: 'consumer.revoked.error'
|
75
|
-
)
|
76
|
-
end
|
77
|
-
|
78
|
-
private
|
79
|
-
|
80
|
-
# Handles the post-consumption flow depending on topic settings
|
81
|
-
#
|
82
|
-
# @param last_group_message [Karafka::Messages::Message]
|
83
|
-
def on_after_consume_regular(last_group_message)
|
84
|
-
if coordinator.success?
|
85
|
-
coordinator.pause_tracker.reset
|
86
|
-
|
87
|
-
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
88
|
-
# with manual offset management
|
89
|
-
# Mark as consumed only if manual offset management is not on
|
90
|
-
mark_as_consumed(last_group_message) unless topic.manual_offset_management? || revoked?
|
91
|
-
|
92
|
-
# If this is not a long-running job there is nothing for us to do here
|
93
|
-
return unless topic.long_running_job?
|
94
|
-
|
95
|
-
seek(coordinator.seek_offset) unless revoked?
|
96
|
-
|
97
|
-
resume
|
98
|
-
else
|
99
|
-
# If processing failed, we need to pause
|
100
|
-
# For long running job this will overwrite the default never-ending pause and will cause
|
101
|
-
# the processing to keep going after the error backoff
|
102
|
-
pause(coordinator.seek_offset)
|
103
|
-
end
|
104
|
-
end
|
105
23
|
end
|
106
24
|
end
|
107
25
|
end
|