karafka 2.0.15 → 2.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +1 -1
- data/.rspec +2 -0
- data/CHANGELOG.md +78 -0
- data/Gemfile.lock +14 -14
- data/LICENSE +1 -1
- data/README.md +2 -1
- data/bin/integrations +3 -2
- data/bin/rspecs +4 -0
- data/config/errors.yml +10 -4
- data/lib/active_job/karafka.rb +0 -6
- data/lib/karafka/active_job/consumer.rb +1 -0
- data/lib/karafka/admin.rb +2 -2
- data/lib/karafka/base_consumer.rb +31 -21
- data/lib/karafka/connection/listener.rb +6 -4
- data/lib/karafka/contracts/consumer_group.rb +0 -14
- data/lib/karafka/contracts/{consumer_group_topic.rb → topic.rb} +2 -3
- data/lib/karafka/errors.rb +6 -4
- data/lib/karafka/instrumentation/logger_listener.rb +25 -11
- data/lib/karafka/instrumentation/notifications.rb +2 -0
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -1
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +37 -32
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +153 -0
- data/lib/karafka/pro/active_job/consumer.rb +3 -1
- data/lib/karafka/pro/active_job/dispatcher.rb +3 -1
- data/lib/karafka/pro/active_job/job_options_contract.rb +3 -1
- data/lib/karafka/pro/base_consumer.rb +3 -85
- data/lib/karafka/pro/loader.rb +31 -24
- data/lib/karafka/pro/performance_tracker.rb +3 -1
- data/lib/karafka/pro/processing/coordinator.rb +16 -1
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +3 -1
- data/lib/karafka/pro/processing/jobs_builder.rb +3 -1
- data/lib/karafka/pro/processing/partitioner.rb +3 -1
- data/lib/karafka/pro/processing/scheduler.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +40 -0
- data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +62 -0
- data/lib/karafka/pro/processing/strategies/aj_lrj_mom.rb +35 -0
- data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +69 -0
- data/lib/karafka/pro/processing/strategies/aj_mom.rb +33 -0
- data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +58 -0
- data/lib/karafka/pro/processing/strategies/base.rb +26 -0
- data/lib/karafka/pro/processing/strategies/default.rb +69 -0
- data/lib/karafka/pro/processing/strategies/dlq.rb +88 -0
- data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +64 -0
- data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +60 -0
- data/lib/karafka/pro/processing/strategies/dlq_mom.rb +58 -0
- data/lib/karafka/pro/processing/strategies/lrj.rb +76 -0
- data/lib/karafka/pro/processing/strategies/lrj_mom.rb +68 -0
- data/lib/karafka/pro/processing/strategies/lrj_vp.rb +33 -0
- data/lib/karafka/pro/processing/strategies/mom.rb +43 -0
- data/lib/karafka/pro/processing/strategies/vp.rb +32 -0
- data/lib/karafka/pro/processing/strategy_selector.rb +58 -0
- data/lib/karafka/pro/{contracts → routing/features}/base.rb +8 -5
- data/lib/karafka/pro/routing/features/dead_letter_queue/contract.rb +49 -0
- data/lib/karafka/pro/routing/{builder_extensions.rb → features/dead_letter_queue.rb} +9 -12
- data/lib/karafka/pro/routing/features/long_running_job/config.rb +28 -0
- data/lib/karafka/pro/routing/features/long_running_job/contract.rb +37 -0
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/long_running_job.rb +28 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +30 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +69 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +56 -0
- data/lib/karafka/pro/routing/features/virtual_partitions.rb +27 -0
- data/lib/karafka/processing/coordinator.rb +1 -1
- data/lib/karafka/processing/executor.rb +6 -0
- data/lib/karafka/processing/strategies/aj_dlq_mom.rb +44 -0
- data/lib/karafka/processing/strategies/aj_mom.rb +21 -0
- data/lib/karafka/processing/strategies/base.rb +37 -0
- data/lib/karafka/processing/strategies/default.rb +52 -0
- data/lib/karafka/processing/strategies/dlq.rb +77 -0
- data/lib/karafka/processing/strategies/dlq_mom.rb +42 -0
- data/lib/karafka/processing/strategies/mom.rb +29 -0
- data/lib/karafka/processing/strategy_selector.rb +30 -0
- data/lib/karafka/railtie.rb +9 -8
- data/lib/karafka/routing/builder.rb +6 -0
- data/lib/karafka/routing/features/active_job/builder.rb +33 -0
- data/lib/karafka/routing/features/active_job/config.rb +15 -0
- data/lib/karafka/routing/features/active_job/contract.rb +41 -0
- data/lib/karafka/routing/features/active_job/topic.rb +33 -0
- data/lib/karafka/routing/features/active_job.rb +13 -0
- data/lib/karafka/routing/features/base/expander.rb +53 -0
- data/lib/karafka/routing/features/base.rb +34 -0
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +19 -0
- data/lib/karafka/routing/features/dead_letter_queue/contract.rb +40 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +40 -0
- data/lib/karafka/routing/features/dead_letter_queue.rb +16 -0
- data/lib/karafka/routing/features/manual_offset_management/config.rb +15 -0
- data/lib/karafka/routing/features/manual_offset_management/contract.rb +24 -0
- data/lib/karafka/routing/features/manual_offset_management/topic.rb +35 -0
- data/lib/karafka/routing/features/manual_offset_management.rb +18 -0
- data/lib/karafka/routing/topic.rb +2 -10
- data/lib/karafka/server.rb +4 -2
- data/lib/karafka/setup/attributes_map.rb +5 -0
- data/lib/karafka/setup/config.rb +4 -4
- data/lib/karafka/time_trackers/pause.rb +21 -12
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +7 -11
- data.tar.gz.sig +0 -0
- metadata +57 -9
- metadata.gz.sig +0 -0
- data/lib/karafka/active_job/routing/extensions.rb +0 -33
- data/lib/karafka/pro/contracts/consumer_group.rb +0 -34
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +0 -69
- data/lib/karafka/pro/routing/topic_extensions.rb +0 -74
|
@@ -1 +1 @@
|
|
|
1
|
-
{"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":5}},{"id":7288186528768428,"definition":{"title":"Topics overview","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":1533435157804573,"definition":{"title":"Topics lags","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.consumer.lags{*} by {partition,topic}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":6,"height":2}},{"id":1411506453982604,"definition":{"title":"Topics lag trends","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.consumer.lags_delta{*} by {partition,topic}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":6,"y":0,"width":6,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3,"is_column_break":true}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"avg:karafka.listener.polling.time_taken.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"avg:karafka.listener.polling.time_taken.max{*}","data_source":"metrics","name":"query2"},{"query":"avg:karafka.listener.polling.time_taken.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":7}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
|
|
1
|
+
{"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"queries":[{"query":"sum:karafka.connection.connects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"queries":[{"query":"sum:karafka.error_occurred{*} by {type,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.revoked{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":5}},{"id":7288186528768428,"definition":{"title":"Topics overview","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":1533435157804573,"definition":{"title":"Topics lags","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages lag per topic partition","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":6,"height":2}},{"id":1411506453982604,"definition":{"title":"Topics lag trends","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic partition consumption trend","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags_delta{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":6,"y":0,"width":6,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3,"is_column_break":true}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic batches processed","formula":"query1"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"queries":[{"query":"max:karafka.consumer.consumption_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Lag in ms (p95)","formula":"query1"},{"alias":"Lag in ms (max)","formula":"query2"},{"alias":"Lag in ms (avg)","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 time","formula":"query1"},{"alias":"max time","formula":"query2"},{"alias":"avg time","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms","formula":"query1"},{"alias":"max ms","formula":"query2"},{"alias":"average ms","formula":"query3"}],"queries":[{"query":"avg:karafka.listener.polling.time_taken.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"avg:karafka.listener.polling.time_taken.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"avg:karafka.listener.polling.time_taken.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":7}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
|
|
@@ -72,9 +72,12 @@ module Karafka
|
|
|
72
72
|
# @param event [Karafka::Core::Monitoring::Event]
|
|
73
73
|
def on_statistics_emitted(event)
|
|
74
74
|
statistics = event[:statistics]
|
|
75
|
+
consumer_group_id = event[:consumer_group_id]
|
|
76
|
+
|
|
77
|
+
base_tags = default_tags + ["consumer_group:#{consumer_group_id}"]
|
|
75
78
|
|
|
76
79
|
rd_kafka_metrics.each do |metric|
|
|
77
|
-
report_metric(metric, statistics)
|
|
80
|
+
report_metric(metric, statistics, base_tags)
|
|
78
81
|
end
|
|
79
82
|
end
|
|
80
83
|
|
|
@@ -85,12 +88,7 @@ module Karafka
|
|
|
85
88
|
extra_tags = ["type:#{event[:type]}"]
|
|
86
89
|
|
|
87
90
|
if event.payload[:caller].respond_to?(:messages)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
extra_tags += [
|
|
91
|
-
"topic:#{metadata.topic}",
|
|
92
|
-
"partition:#{metadata.partition}"
|
|
93
|
-
]
|
|
91
|
+
extra_tags += consumer_tags(event.payload[:caller])
|
|
94
92
|
end
|
|
95
93
|
|
|
96
94
|
count('error_occurred', 1, tags: default_tags + extra_tags)
|
|
@@ -103,21 +101,23 @@ module Karafka
|
|
|
103
101
|
time_taken = event[:time]
|
|
104
102
|
messages_count = event[:messages_buffer].size
|
|
105
103
|
|
|
106
|
-
|
|
107
|
-
|
|
104
|
+
consumer_group_id = event[:subscription_group].consumer_group_id
|
|
105
|
+
|
|
106
|
+
extra_tags = ["consumer_group:#{consumer_group_id}"]
|
|
107
|
+
|
|
108
|
+
histogram('listener.polling.time_taken', time_taken, tags: default_tags + extra_tags)
|
|
109
|
+
histogram('listener.polling.messages', messages_count, tags: default_tags + extra_tags)
|
|
108
110
|
end
|
|
109
111
|
|
|
110
112
|
# Here we report majority of things related to processing as we have access to the
|
|
111
113
|
# consumer
|
|
112
114
|
# @param event [Karafka::Core::Monitoring::Event]
|
|
113
115
|
def on_consumer_consumed(event)
|
|
114
|
-
|
|
116
|
+
consumer = event.payload[:caller]
|
|
117
|
+
messages = consumer.messages
|
|
115
118
|
metadata = messages.metadata
|
|
116
119
|
|
|
117
|
-
tags = default_tags +
|
|
118
|
-
"topic:#{metadata.topic}",
|
|
119
|
-
"partition:#{metadata.partition}"
|
|
120
|
-
]
|
|
120
|
+
tags = default_tags + consumer_tags(consumer)
|
|
121
121
|
|
|
122
122
|
count('consumer.messages', messages.count, tags: tags)
|
|
123
123
|
count('consumer.batches', 1, tags: tags)
|
|
@@ -130,26 +130,14 @@ module Karafka
|
|
|
130
130
|
|
|
131
131
|
# @param event [Karafka::Core::Monitoring::Event]
|
|
132
132
|
def on_consumer_revoked(event)
|
|
133
|
-
|
|
134
|
-
metadata = messages.metadata
|
|
135
|
-
|
|
136
|
-
tags = default_tags + [
|
|
137
|
-
"topic:#{metadata.topic}",
|
|
138
|
-
"partition:#{metadata.partition}"
|
|
139
|
-
]
|
|
133
|
+
tags = default_tags + consumer_tags(event.payload[:caller])
|
|
140
134
|
|
|
141
135
|
count('consumer.revoked', 1, tags: tags)
|
|
142
136
|
end
|
|
143
137
|
|
|
144
138
|
# @param event [Karafka::Core::Monitoring::Event]
|
|
145
139
|
def on_consumer_shutdown(event)
|
|
146
|
-
|
|
147
|
-
metadata = messages.metadata
|
|
148
|
-
|
|
149
|
-
tags = default_tags + [
|
|
150
|
-
"topic:#{metadata.topic}",
|
|
151
|
-
"partition:#{metadata.partition}"
|
|
152
|
-
]
|
|
140
|
+
tags = default_tags + consumer_tags(event.payload[:caller])
|
|
153
141
|
|
|
154
142
|
count('consumer.shutdown', 1, tags: tags)
|
|
155
143
|
end
|
|
@@ -202,14 +190,15 @@ module Karafka
|
|
|
202
190
|
# Reports a given metric statistics to Datadog
|
|
203
191
|
# @param metric [RdKafkaMetric] metric value object
|
|
204
192
|
# @param statistics [Hash] hash with all the statistics emitted
|
|
205
|
-
|
|
193
|
+
# @param base_tags [Array<String>] base tags we want to start with
|
|
194
|
+
def report_metric(metric, statistics, base_tags)
|
|
206
195
|
case metric.scope
|
|
207
196
|
when :root
|
|
208
197
|
public_send(
|
|
209
198
|
metric.type,
|
|
210
199
|
metric.name,
|
|
211
200
|
statistics.fetch(*metric.key_location),
|
|
212
|
-
tags:
|
|
201
|
+
tags: base_tags
|
|
213
202
|
)
|
|
214
203
|
when :brokers
|
|
215
204
|
statistics.fetch('brokers').each_value do |broker_statistics|
|
|
@@ -222,7 +211,7 @@ module Karafka
|
|
|
222
211
|
metric.type,
|
|
223
212
|
metric.name,
|
|
224
213
|
broker_statistics.dig(*metric.key_location),
|
|
225
|
-
tags:
|
|
214
|
+
tags: base_tags + ["broker:#{broker_statistics['nodename']}"]
|
|
226
215
|
)
|
|
227
216
|
end
|
|
228
217
|
when :topics
|
|
@@ -236,7 +225,7 @@ module Karafka
|
|
|
236
225
|
metric.type,
|
|
237
226
|
metric.name,
|
|
238
227
|
partition_statistics.dig(*metric.key_location),
|
|
239
|
-
tags:
|
|
228
|
+
tags: base_tags + [
|
|
240
229
|
"topic:#{topic_name}",
|
|
241
230
|
"partition:#{partition_name}"
|
|
242
231
|
]
|
|
@@ -247,6 +236,22 @@ module Karafka
|
|
|
247
236
|
raise ArgumentError, metric.scope
|
|
248
237
|
end
|
|
249
238
|
end
|
|
239
|
+
|
|
240
|
+
# Builds basic per consumer tags for publication
|
|
241
|
+
#
|
|
242
|
+
# @param consumer [Karafka::BaseConsumer]
|
|
243
|
+
# @return [Array<String>]
|
|
244
|
+
def consumer_tags(consumer)
|
|
245
|
+
messages = consumer.messages
|
|
246
|
+
metadata = messages.metadata
|
|
247
|
+
consumer_group_id = consumer.topic.consumer_group.id
|
|
248
|
+
|
|
249
|
+
[
|
|
250
|
+
"topic:#{metadata.topic}",
|
|
251
|
+
"partition:#{metadata.partition}",
|
|
252
|
+
"consumer_group:#{consumer_group_id}"
|
|
253
|
+
]
|
|
254
|
+
end
|
|
250
255
|
end
|
|
251
256
|
end
|
|
252
257
|
end
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Karafka
|
|
4
|
+
module Instrumentation
|
|
5
|
+
# Namespace for vendor specific instrumentation
|
|
6
|
+
module Vendors
|
|
7
|
+
# Datadog specific instrumentation
|
|
8
|
+
module Datadog
|
|
9
|
+
# A karafka's logger listener for Datadog
|
|
10
|
+
# It depends on the 'ddtrace' gem
|
|
11
|
+
class LoggerListener
|
|
12
|
+
include ::Karafka::Core::Configurable
|
|
13
|
+
extend Forwardable
|
|
14
|
+
|
|
15
|
+
def_delegators :config, :client
|
|
16
|
+
|
|
17
|
+
# `Datadog::Tracing` client that we should use to trace stuff
|
|
18
|
+
setting :client
|
|
19
|
+
|
|
20
|
+
configure
|
|
21
|
+
|
|
22
|
+
# Log levels that we use in this particular listener
|
|
23
|
+
USED_LOG_LEVELS = %i[
|
|
24
|
+
info
|
|
25
|
+
error
|
|
26
|
+
fatal
|
|
27
|
+
].freeze
|
|
28
|
+
|
|
29
|
+
private_constant :USED_LOG_LEVELS
|
|
30
|
+
|
|
31
|
+
# @param block [Proc] configuration block
|
|
32
|
+
def initialize(&block)
|
|
33
|
+
configure
|
|
34
|
+
setup(&block) if block
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# @param block [Proc] configuration block
|
|
38
|
+
# @note We define this alias to be consistent with `WaterDrop#setup`
|
|
39
|
+
def setup(&block)
|
|
40
|
+
configure(&block)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Prints info about the fact that a given job has started
|
|
44
|
+
#
|
|
45
|
+
# @param event [Dry::Events::Event] event details including payload
|
|
46
|
+
def on_worker_process(event)
|
|
47
|
+
current_span = client.trace('karafka.consumer')
|
|
48
|
+
push_tags
|
|
49
|
+
|
|
50
|
+
job = event[:job]
|
|
51
|
+
job_type = job.class.to_s.split('::').last
|
|
52
|
+
consumer = job.executor.topic.consumer
|
|
53
|
+
topic = job.executor.topic.name
|
|
54
|
+
|
|
55
|
+
current_span.resource = "#{consumer}#consume"
|
|
56
|
+
info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
|
|
57
|
+
|
|
58
|
+
pop_tags
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Prints info about the fact that a given job has finished
|
|
62
|
+
#
|
|
63
|
+
# @param event [Dry::Events::Event] event details including payload
|
|
64
|
+
def on_worker_processed(event)
|
|
65
|
+
push_tags
|
|
66
|
+
|
|
67
|
+
job = event[:job]
|
|
68
|
+
time = event[:time]
|
|
69
|
+
job_type = job.class.to_s.split('::').last
|
|
70
|
+
consumer = job.executor.topic.consumer
|
|
71
|
+
topic = job.executor.topic.name
|
|
72
|
+
|
|
73
|
+
info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
|
|
74
|
+
|
|
75
|
+
current_span = client.active_span
|
|
76
|
+
current_span.finish if current_span.present?
|
|
77
|
+
|
|
78
|
+
pop_tags
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# There are many types of errors that can occur in many places, but we provide a single
|
|
82
|
+
# handler for all of them to simplify error instrumentation.
|
|
83
|
+
# @param event [Dry::Events::Event] event details including payload
|
|
84
|
+
def on_error_occurred(event)
|
|
85
|
+
push_tags
|
|
86
|
+
|
|
87
|
+
error = event[:error]
|
|
88
|
+
client.active_span&.set_error(error)
|
|
89
|
+
|
|
90
|
+
case event[:type]
|
|
91
|
+
when 'consumer.consume.error'
|
|
92
|
+
error "Consumer consuming error: #{error}"
|
|
93
|
+
when 'consumer.revoked.error'
|
|
94
|
+
error "Consumer on revoked failed due to an error: #{error}"
|
|
95
|
+
when 'consumer.before_enqueue.error'
|
|
96
|
+
error "Consumer before enqueue failed due to an error: #{error}"
|
|
97
|
+
when 'consumer.before_consume.error'
|
|
98
|
+
error "Consumer before consume failed due to an error: #{error}"
|
|
99
|
+
when 'consumer.after_consume.error'
|
|
100
|
+
error "Consumer after consume failed due to an error: #{error}"
|
|
101
|
+
when 'consumer.shutdown.error'
|
|
102
|
+
error "Consumer on shutdown failed due to an error: #{error}"
|
|
103
|
+
when 'worker.process.error'
|
|
104
|
+
fatal "Worker processing failed due to an error: #{error}"
|
|
105
|
+
when 'connection.listener.fetch_loop.error'
|
|
106
|
+
error "Listener fetch loop error: #{error}"
|
|
107
|
+
when 'runner.call.error'
|
|
108
|
+
fatal "Runner crashed due to an error: #{error}"
|
|
109
|
+
when 'app.stopping.error'
|
|
110
|
+
error 'Forceful Karafka server stop'
|
|
111
|
+
when 'librdkafka.error'
|
|
112
|
+
error "librdkafka internal error occurred: #{error}"
|
|
113
|
+
# Those will only occur when retries in the client fail and when they did not stop
|
|
114
|
+
# after back-offs
|
|
115
|
+
when 'connection.client.poll.error'
|
|
116
|
+
error "Data polling error occurred: #{error}"
|
|
117
|
+
else
|
|
118
|
+
pop_tags
|
|
119
|
+
# This should never happen. Please contact the maintainers
|
|
120
|
+
raise Errors::UnsupportedCaseError, event
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
pop_tags
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
USED_LOG_LEVELS.each do |log_level|
|
|
127
|
+
define_method log_level do |*args|
|
|
128
|
+
Karafka.logger.send(log_level, *args)
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Pushes datadog's tags to the logger
|
|
133
|
+
# This is required when tracing log lines asynchronously to correlate logs of the same
|
|
134
|
+
# process together
|
|
135
|
+
def push_tags
|
|
136
|
+
return unless Karafka.logger.respond_to?(:push_tags)
|
|
137
|
+
|
|
138
|
+
Karafka.logger.push_tags(client.log_correlation)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Pops datadog's tags from the logger
|
|
142
|
+
# This is required when tracing log lines asynchronously to avoid the logs of the
|
|
143
|
+
# different processes to be correlated
|
|
144
|
+
def pop_tags
|
|
145
|
+
return unless Karafka.logger.respond_to?(:pop_tags)
|
|
146
|
+
|
|
147
|
+
Karafka.logger.pop_tags
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# This Karafka component is a Pro component.
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
7
|
# repository and their usage requires commercial license agreement.
|
|
6
8
|
#
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# This Karafka component is a Pro component.
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
7
|
# repository and their usage requires commercial license agreement.
|
|
6
8
|
#
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# This Karafka component is a Pro component.
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
7
|
# repository and their usage requires commercial license agreement.
|
|
6
8
|
#
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# This Karafka component is a Pro component.
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
7
|
# repository and their usage requires commercial license agreement.
|
|
6
8
|
#
|
|
@@ -18,90 +20,6 @@ module Karafka
|
|
|
18
20
|
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
|
19
21
|
# after each batch is processed.
|
|
20
22
|
class BaseConsumer < Karafka::BaseConsumer
|
|
21
|
-
# Pause for tops 31 years
|
|
22
|
-
MAX_PAUSE_TIME = 1_000_000_000_000
|
|
23
|
-
|
|
24
|
-
private_constant :MAX_PAUSE_TIME
|
|
25
|
-
|
|
26
|
-
# Pauses processing of a given partition until we're done with the processing.
|
|
27
|
-
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
|
28
|
-
# @note This needs to happen in the listener thread, because we cannot wait on this being
|
|
29
|
-
# executed in the workers. Workers may be already running some LRJ jobs that are blocking
|
|
30
|
-
# all the threads until finished, yet unless we pause the incoming partitions information,
|
|
31
|
-
# we may be kicked out of the consumer group due to not polling often enough
|
|
32
|
-
def on_before_enqueue
|
|
33
|
-
return unless topic.long_running_job?
|
|
34
|
-
|
|
35
|
-
# This ensures that when running LRJ with VP, things operate as expected run only once
|
|
36
|
-
# for all the virtual partitions collectively
|
|
37
|
-
coordinator.on_enqueued do
|
|
38
|
-
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
|
39
|
-
# any messages.
|
|
40
|
-
#
|
|
41
|
-
# For VP it applies the same way and since VP cannot be used with MOM we should not have
|
|
42
|
-
# any edge cases here.
|
|
43
|
-
pause(coordinator.seek_offset, MAX_PAUSE_TIME)
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# Runs extra logic after consumption that is related to handling long-running jobs
|
|
48
|
-
# @note This overwrites the '#on_after_consume' from the base consumer
|
|
49
|
-
def on_after_consume
|
|
50
|
-
coordinator.on_finished do |last_group_message|
|
|
51
|
-
on_after_consume_regular(last_group_message)
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Trigger method for running on partition revocation.
|
|
56
|
-
#
|
|
57
|
-
# @private
|
|
58
|
-
def on_revoked
|
|
59
|
-
# We do not want to resume on revocation in case of a LRJ.
|
|
60
|
-
# For LRJ we resume after the successful processing or do a backoff pause in case of a
|
|
61
|
-
# failure. Double non-blocking resume could cause problems in coordination.
|
|
62
|
-
resume unless topic.long_running_job?
|
|
63
|
-
|
|
64
|
-
coordinator.revoke
|
|
65
|
-
|
|
66
|
-
Karafka.monitor.instrument('consumer.revoked', caller: self) do
|
|
67
|
-
revoked
|
|
68
|
-
end
|
|
69
|
-
rescue StandardError => e
|
|
70
|
-
Karafka.monitor.instrument(
|
|
71
|
-
'error.occurred',
|
|
72
|
-
error: e,
|
|
73
|
-
caller: self,
|
|
74
|
-
type: 'consumer.revoked.error'
|
|
75
|
-
)
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
private
|
|
79
|
-
|
|
80
|
-
# Handles the post-consumption flow depending on topic settings
|
|
81
|
-
#
|
|
82
|
-
# @param last_group_message [Karafka::Messages::Message]
|
|
83
|
-
def on_after_consume_regular(last_group_message)
|
|
84
|
-
if coordinator.success?
|
|
85
|
-
coordinator.pause_tracker.reset
|
|
86
|
-
|
|
87
|
-
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
|
88
|
-
# with manual offset management
|
|
89
|
-
# Mark as consumed only if manual offset management is not on
|
|
90
|
-
mark_as_consumed(last_group_message) unless topic.manual_offset_management? || revoked?
|
|
91
|
-
|
|
92
|
-
# If this is not a long-running job there is nothing for us to do here
|
|
93
|
-
return unless topic.long_running_job?
|
|
94
|
-
|
|
95
|
-
seek(coordinator.seek_offset) unless revoked?
|
|
96
|
-
|
|
97
|
-
resume
|
|
98
|
-
else
|
|
99
|
-
# If processing failed, we need to pause
|
|
100
|
-
# For long running job this will overwrite the default never-ending pause and will cause
|
|
101
|
-
# the processing to keep going after the error backoff
|
|
102
|
-
pause(coordinator.seek_offset)
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
23
|
end
|
|
106
24
|
end
|
|
107
25
|
end
|
data/lib/karafka/pro/loader.rb
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# This Karafka component is a Pro component.
|
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
|
5
|
+
#
|
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
|
5
7
|
# repository and their usage requires commercial license agreement.
|
|
6
8
|
#
|
|
@@ -13,37 +15,41 @@ module Karafka
|
|
|
13
15
|
module Pro
|
|
14
16
|
# Loader requires and loads all the pro components only when they are needed
|
|
15
17
|
class Loader
|
|
16
|
-
#
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
processing/scheduler
|
|
21
|
-
processing/jobs/consume_non_blocking
|
|
22
|
-
processing/jobs_builder
|
|
23
|
-
processing/coordinator
|
|
24
|
-
processing/partitioner
|
|
25
|
-
contracts/base
|
|
26
|
-
contracts/consumer_group
|
|
27
|
-
contracts/consumer_group_topic
|
|
28
|
-
routing/topic_extensions
|
|
29
|
-
routing/builder_extensions
|
|
30
|
-
active_job/consumer
|
|
18
|
+
# There seems to be a conflict in between using two Zeitwerk instances and it makes lookups
|
|
19
|
+
# for nested namespaces instead of creating them.
|
|
20
|
+
# We require those not to deal with this and then all works as expected
|
|
21
|
+
FORCE_LOADED = %w[
|
|
31
22
|
active_job/dispatcher
|
|
32
|
-
|
|
23
|
+
processing/jobs/consume_non_blocking
|
|
24
|
+
processing/strategies/base
|
|
25
|
+
routing/features/base
|
|
33
26
|
].freeze
|
|
34
27
|
|
|
35
|
-
|
|
28
|
+
# Zeitwerk pro loader
|
|
29
|
+
# We need to have one per process, that's why it's set as a constant
|
|
30
|
+
PRO_LOADER = Zeitwerk::Loader.new
|
|
31
|
+
|
|
32
|
+
private_constant :PRO_LOADER
|
|
36
33
|
|
|
37
34
|
class << self
|
|
35
|
+
# Requires all the components without using them anywhere
|
|
36
|
+
def require_all
|
|
37
|
+
FORCE_LOADED.each { |file| require_relative(file) }
|
|
38
|
+
|
|
39
|
+
PRO_LOADER.push_dir(Karafka.core_root.join('pro'), namespace: Karafka::Pro)
|
|
40
|
+
PRO_LOADER.setup
|
|
41
|
+
PRO_LOADER.eager_load
|
|
42
|
+
end
|
|
43
|
+
|
|
38
44
|
# Loads all the pro components and configures them wherever it is expected
|
|
39
45
|
# @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
|
|
40
46
|
# components
|
|
41
47
|
def setup(config)
|
|
42
|
-
|
|
48
|
+
require_all
|
|
43
49
|
|
|
44
50
|
reconfigure(config)
|
|
45
51
|
|
|
46
|
-
|
|
52
|
+
load_topic_features
|
|
47
53
|
end
|
|
48
54
|
|
|
49
55
|
private
|
|
@@ -57,6 +63,7 @@ module Karafka
|
|
|
57
63
|
icfg.processing.partitioner_class = Processing::Partitioner
|
|
58
64
|
icfg.processing.scheduler = Processing::Scheduler.new
|
|
59
65
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
|
66
|
+
icfg.processing.strategy_selector = Processing::StrategySelector.new
|
|
60
67
|
|
|
61
68
|
icfg.active_job.consumer_class = ActiveJob::Consumer
|
|
62
69
|
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
|
@@ -65,10 +72,10 @@ module Karafka
|
|
|
65
72
|
config.monitor.subscribe(PerformanceTracker.instance)
|
|
66
73
|
end
|
|
67
74
|
|
|
68
|
-
# Loads
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
::Karafka::Routing::
|
|
75
|
+
# Loads the Pro features of Karafka
|
|
76
|
+
# @note Object space lookup is not the fastest but we do it once during boot, so it's ok
|
|
77
|
+
def load_topic_features
|
|
78
|
+
::Karafka::Pro::Routing::Features::Base.load_all
|
|
72
79
|
end
|
|
73
80
|
end
|
|
74
81
|
end
|