karafka 2.0.15 → 2.0.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/.github/workflows/ci.yml +2 -1
- data/.rspec +2 -0
- data/CHANGELOG.md +98 -0
- data/Gemfile.lock +14 -14
- data/LICENSE +1 -1
- data/README.md +4 -1
- data/bin/integrations +3 -2
- data/bin/rspecs +4 -0
- data/config/errors.yml +10 -4
- data/lib/active_job/karafka.rb +0 -6
- data/lib/karafka/active_job/consumer.rb +1 -0
- data/lib/karafka/admin.rb +2 -2
- data/lib/karafka/base_consumer.rb +31 -21
- data/lib/karafka/connection/listener.rb +6 -4
- data/lib/karafka/contracts/consumer_group.rb +0 -14
- data/lib/karafka/contracts/{consumer_group_topic.rb → topic.rb} +2 -3
- data/lib/karafka/errors.rb +6 -4
- data/lib/karafka/instrumentation/logger_listener.rb +25 -11
- data/lib/karafka/instrumentation/notifications.rb +2 -0
- data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -1
- data/lib/karafka/instrumentation/vendors/datadog/listener.rb +37 -32
- data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +153 -0
- data/lib/karafka/pro/active_job/consumer.rb +3 -1
- data/lib/karafka/pro/active_job/dispatcher.rb +3 -1
- data/lib/karafka/pro/active_job/job_options_contract.rb +3 -1
- data/lib/karafka/pro/base_consumer.rb +3 -85
- data/lib/karafka/pro/loader.rb +31 -24
- data/lib/karafka/pro/performance_tracker.rb +3 -1
- data/lib/karafka/pro/processing/coordinator.rb +16 -1
- data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +3 -1
- data/lib/karafka/pro/processing/jobs_builder.rb +3 -1
- data/lib/karafka/pro/processing/partitioner.rb +3 -1
- data/lib/karafka/pro/processing/scheduler.rb +3 -1
- data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +40 -0
- data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +62 -0
- data/lib/karafka/pro/processing/strategies/aj_lrj_mom.rb +35 -0
- data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +69 -0
- data/lib/karafka/pro/processing/strategies/aj_mom.rb +33 -0
- data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +58 -0
- data/lib/karafka/pro/processing/strategies/base.rb +26 -0
- data/lib/karafka/pro/processing/strategies/default.rb +69 -0
- data/lib/karafka/pro/processing/strategies/dlq.rb +88 -0
- data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +64 -0
- data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +60 -0
- data/lib/karafka/pro/processing/strategies/dlq_mom.rb +58 -0
- data/lib/karafka/pro/processing/strategies/lrj.rb +76 -0
- data/lib/karafka/pro/processing/strategies/lrj_mom.rb +68 -0
- data/lib/karafka/pro/processing/strategies/lrj_vp.rb +33 -0
- data/lib/karafka/pro/processing/strategies/mom.rb +43 -0
- data/lib/karafka/pro/processing/strategies/vp.rb +32 -0
- data/lib/karafka/pro/processing/strategy_selector.rb +58 -0
- data/lib/karafka/pro/{contracts → routing/features}/base.rb +8 -5
- data/lib/karafka/pro/routing/features/dead_letter_queue/contract.rb +49 -0
- data/lib/karafka/pro/routing/{builder_extensions.rb → features/dead_letter_queue.rb} +9 -12
- data/lib/karafka/pro/routing/features/long_running_job/config.rb +28 -0
- data/lib/karafka/pro/routing/features/long_running_job/contract.rb +37 -0
- data/lib/karafka/pro/routing/features/long_running_job/topic.rb +42 -0
- data/lib/karafka/pro/routing/features/long_running_job.rb +28 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +30 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +69 -0
- data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +56 -0
- data/lib/karafka/pro/routing/features/virtual_partitions.rb +27 -0
- data/lib/karafka/processing/coordinator.rb +1 -1
- data/lib/karafka/processing/executor.rb +6 -0
- data/lib/karafka/processing/strategies/aj_dlq_mom.rb +44 -0
- data/lib/karafka/processing/strategies/aj_mom.rb +21 -0
- data/lib/karafka/processing/strategies/base.rb +37 -0
- data/lib/karafka/processing/strategies/default.rb +52 -0
- data/lib/karafka/processing/strategies/dlq.rb +77 -0
- data/lib/karafka/processing/strategies/dlq_mom.rb +42 -0
- data/lib/karafka/processing/strategies/mom.rb +29 -0
- data/lib/karafka/processing/strategy_selector.rb +30 -0
- data/lib/karafka/railtie.rb +9 -8
- data/lib/karafka/routing/builder.rb +6 -0
- data/lib/karafka/routing/features/active_job/builder.rb +33 -0
- data/lib/karafka/routing/features/active_job/config.rb +15 -0
- data/lib/karafka/routing/features/active_job/contract.rb +41 -0
- data/lib/karafka/routing/features/active_job/topic.rb +33 -0
- data/lib/karafka/routing/features/active_job.rb +13 -0
- data/lib/karafka/routing/features/base/expander.rb +53 -0
- data/lib/karafka/routing/features/base.rb +34 -0
- data/lib/karafka/routing/features/dead_letter_queue/config.rb +19 -0
- data/lib/karafka/routing/features/dead_letter_queue/contract.rb +40 -0
- data/lib/karafka/routing/features/dead_letter_queue/topic.rb +40 -0
- data/lib/karafka/routing/features/dead_letter_queue.rb +16 -0
- data/lib/karafka/routing/features/manual_offset_management/config.rb +15 -0
- data/lib/karafka/routing/features/manual_offset_management/contract.rb +24 -0
- data/lib/karafka/routing/features/manual_offset_management/topic.rb +35 -0
- data/lib/karafka/routing/features/manual_offset_management.rb +18 -0
- data/lib/karafka/routing/topic.rb +2 -10
- data/lib/karafka/server.rb +4 -2
- data/lib/karafka/setup/attributes_map.rb +5 -0
- data/lib/karafka/setup/config.rb +4 -4
- data/lib/karafka/time_trackers/pause.rb +21 -12
- data/lib/karafka/version.rb +1 -1
- data/lib/karafka.rb +7 -11
- data.tar.gz.sig +0 -0
- metadata +57 -9
- metadata.gz.sig +0 -0
- data/lib/karafka/active_job/routing/extensions.rb +0 -33
- data/lib/karafka/pro/contracts/consumer_group.rb +0 -34
- data/lib/karafka/pro/contracts/consumer_group_topic.rb +0 -69
- data/lib/karafka/pro/routing/topic_extensions.rb +0 -74
@@ -1 +1 @@
|
|
1
|
-
{"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":5}},{"id":7288186528768428,"definition":{"title":"Topics overview","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":1533435157804573,"definition":{"title":"Topics lags","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.consumer.lags{*} by {partition,topic}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":6,"height":2}},{"id":1411506453982604,"definition":{"title":"Topics lag trends","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.consumer.lags_delta{*} by {partition,topic}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":6,"y":0,"width":6,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3,"is_column_break":true}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"avg:karafka.listener.polling.time_taken.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"avg:karafka.listener.polling.time_taken.max{*}","data_source":"metrics","name":"query2"},{"query":"avg:karafka.listener.polling.time_taken.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":7}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
|
1
|
+
{"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"queries":[{"query":"sum:karafka.connection.connects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"queries":[{"query":"sum:karafka.error_occurred{*} by {type,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.revoked{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":5}},{"id":7288186528768428,"definition":{"title":"Topics overview","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":1533435157804573,"definition":{"title":"Topics lags","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages lag per topic partition","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":6,"height":2}},{"id":1411506453982604,"definition":{"title":"Topics lag trends","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic partition consumption trend","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags_delta{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":6,"y":0,"width":6,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3,"is_column_break":true}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic batches processed","formula":"query1"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"queries":[{"query":"max:karafka.consumer.consumption_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Lag in ms (p95)","formula":"query1"},{"alias":"Lag in ms (max)","formula":"query2"},{"alias":"Lag in ms (avg)","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 time","formula":"query1"},{"alias":"max time","formula":"query2"},{"alias":"avg time","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms","formula":"query1"},{"alias":"max ms","formula":"query2"},{"alias":"average ms","formula":"query3"}],"queries":[{"query":"avg:karafka.listener.polling.time_taken.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"avg:karafka.listener.polling.time_taken.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"avg:karafka.listener.polling.time_taken.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":7}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
|
@@ -72,9 +72,12 @@ module Karafka
|
|
72
72
|
# @param event [Karafka::Core::Monitoring::Event]
|
73
73
|
def on_statistics_emitted(event)
|
74
74
|
statistics = event[:statistics]
|
75
|
+
consumer_group_id = event[:consumer_group_id]
|
76
|
+
|
77
|
+
base_tags = default_tags + ["consumer_group:#{consumer_group_id}"]
|
75
78
|
|
76
79
|
rd_kafka_metrics.each do |metric|
|
77
|
-
report_metric(metric, statistics)
|
80
|
+
report_metric(metric, statistics, base_tags)
|
78
81
|
end
|
79
82
|
end
|
80
83
|
|
@@ -85,12 +88,7 @@ module Karafka
|
|
85
88
|
extra_tags = ["type:#{event[:type]}"]
|
86
89
|
|
87
90
|
if event.payload[:caller].respond_to?(:messages)
|
88
|
-
|
89
|
-
|
90
|
-
extra_tags += [
|
91
|
-
"topic:#{metadata.topic}",
|
92
|
-
"partition:#{metadata.partition}"
|
93
|
-
]
|
91
|
+
extra_tags += consumer_tags(event.payload[:caller])
|
94
92
|
end
|
95
93
|
|
96
94
|
count('error_occurred', 1, tags: default_tags + extra_tags)
|
@@ -103,21 +101,23 @@ module Karafka
|
|
103
101
|
time_taken = event[:time]
|
104
102
|
messages_count = event[:messages_buffer].size
|
105
103
|
|
106
|
-
|
107
|
-
|
104
|
+
consumer_group_id = event[:subscription_group].consumer_group_id
|
105
|
+
|
106
|
+
extra_tags = ["consumer_group:#{consumer_group_id}"]
|
107
|
+
|
108
|
+
histogram('listener.polling.time_taken', time_taken, tags: default_tags + extra_tags)
|
109
|
+
histogram('listener.polling.messages', messages_count, tags: default_tags + extra_tags)
|
108
110
|
end
|
109
111
|
|
110
112
|
# Here we report majority of things related to processing as we have access to the
|
111
113
|
# consumer
|
112
114
|
# @param event [Karafka::Core::Monitoring::Event]
|
113
115
|
def on_consumer_consumed(event)
|
114
|
-
|
116
|
+
consumer = event.payload[:caller]
|
117
|
+
messages = consumer.messages
|
115
118
|
metadata = messages.metadata
|
116
119
|
|
117
|
-
tags = default_tags +
|
118
|
-
"topic:#{metadata.topic}",
|
119
|
-
"partition:#{metadata.partition}"
|
120
|
-
]
|
120
|
+
tags = default_tags + consumer_tags(consumer)
|
121
121
|
|
122
122
|
count('consumer.messages', messages.count, tags: tags)
|
123
123
|
count('consumer.batches', 1, tags: tags)
|
@@ -130,26 +130,14 @@ module Karafka
|
|
130
130
|
|
131
131
|
# @param event [Karafka::Core::Monitoring::Event]
|
132
132
|
def on_consumer_revoked(event)
|
133
|
-
|
134
|
-
metadata = messages.metadata
|
135
|
-
|
136
|
-
tags = default_tags + [
|
137
|
-
"topic:#{metadata.topic}",
|
138
|
-
"partition:#{metadata.partition}"
|
139
|
-
]
|
133
|
+
tags = default_tags + consumer_tags(event.payload[:caller])
|
140
134
|
|
141
135
|
count('consumer.revoked', 1, tags: tags)
|
142
136
|
end
|
143
137
|
|
144
138
|
# @param event [Karafka::Core::Monitoring::Event]
|
145
139
|
def on_consumer_shutdown(event)
|
146
|
-
|
147
|
-
metadata = messages.metadata
|
148
|
-
|
149
|
-
tags = default_tags + [
|
150
|
-
"topic:#{metadata.topic}",
|
151
|
-
"partition:#{metadata.partition}"
|
152
|
-
]
|
140
|
+
tags = default_tags + consumer_tags(event.payload[:caller])
|
153
141
|
|
154
142
|
count('consumer.shutdown', 1, tags: tags)
|
155
143
|
end
|
@@ -202,14 +190,15 @@ module Karafka
|
|
202
190
|
# Reports a given metric statistics to Datadog
|
203
191
|
# @param metric [RdKafkaMetric] metric value object
|
204
192
|
# @param statistics [Hash] hash with all the statistics emitted
|
205
|
-
|
193
|
+
# @param base_tags [Array<String>] base tags we want to start with
|
194
|
+
def report_metric(metric, statistics, base_tags)
|
206
195
|
case metric.scope
|
207
196
|
when :root
|
208
197
|
public_send(
|
209
198
|
metric.type,
|
210
199
|
metric.name,
|
211
200
|
statistics.fetch(*metric.key_location),
|
212
|
-
tags:
|
201
|
+
tags: base_tags
|
213
202
|
)
|
214
203
|
when :brokers
|
215
204
|
statistics.fetch('brokers').each_value do |broker_statistics|
|
@@ -222,7 +211,7 @@ module Karafka
|
|
222
211
|
metric.type,
|
223
212
|
metric.name,
|
224
213
|
broker_statistics.dig(*metric.key_location),
|
225
|
-
tags:
|
214
|
+
tags: base_tags + ["broker:#{broker_statistics['nodename']}"]
|
226
215
|
)
|
227
216
|
end
|
228
217
|
when :topics
|
@@ -236,7 +225,7 @@ module Karafka
|
|
236
225
|
metric.type,
|
237
226
|
metric.name,
|
238
227
|
partition_statistics.dig(*metric.key_location),
|
239
|
-
tags:
|
228
|
+
tags: base_tags + [
|
240
229
|
"topic:#{topic_name}",
|
241
230
|
"partition:#{partition_name}"
|
242
231
|
]
|
@@ -247,6 +236,22 @@ module Karafka
|
|
247
236
|
raise ArgumentError, metric.scope
|
248
237
|
end
|
249
238
|
end
|
239
|
+
|
240
|
+
# Builds basic per consumer tags for publication
|
241
|
+
#
|
242
|
+
# @param consumer [Karafka::BaseConsumer]
|
243
|
+
# @return [Array<String>]
|
244
|
+
def consumer_tags(consumer)
|
245
|
+
messages = consumer.messages
|
246
|
+
metadata = messages.metadata
|
247
|
+
consumer_group_id = consumer.topic.consumer_group.id
|
248
|
+
|
249
|
+
[
|
250
|
+
"topic:#{metadata.topic}",
|
251
|
+
"partition:#{metadata.partition}",
|
252
|
+
"consumer_group:#{consumer_group_id}"
|
253
|
+
]
|
254
|
+
end
|
250
255
|
end
|
251
256
|
end
|
252
257
|
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Karafka
|
4
|
+
module Instrumentation
|
5
|
+
# Namespace for vendor specific instrumentation
|
6
|
+
module Vendors
|
7
|
+
# Datadog specific instrumentation
|
8
|
+
module Datadog
|
9
|
+
# A karafka's logger listener for Datadog
|
10
|
+
# It depends on the 'ddtrace' gem
|
11
|
+
class LoggerListener
|
12
|
+
include ::Karafka::Core::Configurable
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
def_delegators :config, :client
|
16
|
+
|
17
|
+
# `Datadog::Tracing` client that we should use to trace stuff
|
18
|
+
setting :client
|
19
|
+
|
20
|
+
configure
|
21
|
+
|
22
|
+
# Log levels that we use in this particular listener
|
23
|
+
USED_LOG_LEVELS = %i[
|
24
|
+
info
|
25
|
+
error
|
26
|
+
fatal
|
27
|
+
].freeze
|
28
|
+
|
29
|
+
private_constant :USED_LOG_LEVELS
|
30
|
+
|
31
|
+
# @param block [Proc] configuration block
|
32
|
+
def initialize(&block)
|
33
|
+
configure
|
34
|
+
setup(&block) if block
|
35
|
+
end
|
36
|
+
|
37
|
+
# @param block [Proc] configuration block
|
38
|
+
# @note We define this alias to be consistent with `WaterDrop#setup`
|
39
|
+
def setup(&block)
|
40
|
+
configure(&block)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Prints info about the fact that a given job has started
|
44
|
+
#
|
45
|
+
# @param event [Dry::Events::Event] event details including payload
|
46
|
+
def on_worker_process(event)
|
47
|
+
current_span = client.trace('karafka.consumer')
|
48
|
+
push_tags
|
49
|
+
|
50
|
+
job = event[:job]
|
51
|
+
job_type = job.class.to_s.split('::').last
|
52
|
+
consumer = job.executor.topic.consumer
|
53
|
+
topic = job.executor.topic.name
|
54
|
+
|
55
|
+
current_span.resource = "#{consumer}#consume"
|
56
|
+
info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
|
57
|
+
|
58
|
+
pop_tags
|
59
|
+
end
|
60
|
+
|
61
|
+
# Prints info about the fact that a given job has finished
|
62
|
+
#
|
63
|
+
# @param event [Dry::Events::Event] event details including payload
|
64
|
+
def on_worker_processed(event)
|
65
|
+
push_tags
|
66
|
+
|
67
|
+
job = event[:job]
|
68
|
+
time = event[:time]
|
69
|
+
job_type = job.class.to_s.split('::').last
|
70
|
+
consumer = job.executor.topic.consumer
|
71
|
+
topic = job.executor.topic.name
|
72
|
+
|
73
|
+
info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
|
74
|
+
|
75
|
+
current_span = client.active_span
|
76
|
+
current_span.finish if current_span.present?
|
77
|
+
|
78
|
+
pop_tags
|
79
|
+
end
|
80
|
+
|
81
|
+
# There are many types of errors that can occur in many places, but we provide a single
|
82
|
+
# handler for all of them to simplify error instrumentation.
|
83
|
+
# @param event [Dry::Events::Event] event details including payload
|
84
|
+
def on_error_occurred(event)
|
85
|
+
push_tags
|
86
|
+
|
87
|
+
error = event[:error]
|
88
|
+
client.active_span&.set_error(error)
|
89
|
+
|
90
|
+
case event[:type]
|
91
|
+
when 'consumer.consume.error'
|
92
|
+
error "Consumer consuming error: #{error}"
|
93
|
+
when 'consumer.revoked.error'
|
94
|
+
error "Consumer on revoked failed due to an error: #{error}"
|
95
|
+
when 'consumer.before_enqueue.error'
|
96
|
+
error "Consumer before enqueue failed due to an error: #{error}"
|
97
|
+
when 'consumer.before_consume.error'
|
98
|
+
error "Consumer before consume failed due to an error: #{error}"
|
99
|
+
when 'consumer.after_consume.error'
|
100
|
+
error "Consumer after consume failed due to an error: #{error}"
|
101
|
+
when 'consumer.shutdown.error'
|
102
|
+
error "Consumer on shutdown failed due to an error: #{error}"
|
103
|
+
when 'worker.process.error'
|
104
|
+
fatal "Worker processing failed due to an error: #{error}"
|
105
|
+
when 'connection.listener.fetch_loop.error'
|
106
|
+
error "Listener fetch loop error: #{error}"
|
107
|
+
when 'runner.call.error'
|
108
|
+
fatal "Runner crashed due to an error: #{error}"
|
109
|
+
when 'app.stopping.error'
|
110
|
+
error 'Forceful Karafka server stop'
|
111
|
+
when 'librdkafka.error'
|
112
|
+
error "librdkafka internal error occurred: #{error}"
|
113
|
+
# Those will only occur when retries in the client fail and when they did not stop
|
114
|
+
# after back-offs
|
115
|
+
when 'connection.client.poll.error'
|
116
|
+
error "Data polling error occurred: #{error}"
|
117
|
+
else
|
118
|
+
pop_tags
|
119
|
+
# This should never happen. Please contact the maintainers
|
120
|
+
raise Errors::UnsupportedCaseError, event
|
121
|
+
end
|
122
|
+
|
123
|
+
pop_tags
|
124
|
+
end
|
125
|
+
|
126
|
+
USED_LOG_LEVELS.each do |log_level|
|
127
|
+
define_method log_level do |*args|
|
128
|
+
Karafka.logger.send(log_level, *args)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Pushes datadog's tags to the logger
|
133
|
+
# This is required when tracing log lines asynchronously to correlate logs of the same
|
134
|
+
# process together
|
135
|
+
def push_tags
|
136
|
+
return unless Karafka.logger.respond_to?(:push_tags)
|
137
|
+
|
138
|
+
Karafka.logger.push_tags(client.log_correlation)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Pops datadog's tags from the logger
|
142
|
+
# This is required when tracing log lines asynchronously to avoid the logs of the
|
143
|
+
# different processes to be correlated
|
144
|
+
def pop_tags
|
145
|
+
return unless Karafka.logger.respond_to?(:pop_tags)
|
146
|
+
|
147
|
+
Karafka.logger.pop_tags
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
7
|
# repository and their usage requires commercial license agreement.
|
6
8
|
#
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
7
|
# repository and their usage requires commercial license agreement.
|
6
8
|
#
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
7
|
# repository and their usage requires commercial license agreement.
|
6
8
|
#
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
7
|
# repository and their usage requires commercial license agreement.
|
6
8
|
#
|
@@ -18,90 +20,6 @@ module Karafka
|
|
18
20
|
# @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
|
19
21
|
# after each batch is processed.
|
20
22
|
class BaseConsumer < Karafka::BaseConsumer
|
21
|
-
# Pause for tops 31 years
|
22
|
-
MAX_PAUSE_TIME = 1_000_000_000_000
|
23
|
-
|
24
|
-
private_constant :MAX_PAUSE_TIME
|
25
|
-
|
26
|
-
# Pauses processing of a given partition until we're done with the processing.
|
27
|
-
# This ensures, that we can easily poll not reaching the `max.poll.interval`
|
28
|
-
# @note This needs to happen in the listener thread, because we cannot wait on this being
|
29
|
-
# executed in the workers. Workers may be already running some LRJ jobs that are blocking
|
30
|
-
# all the threads until finished, yet unless we pause the incoming partitions information,
|
31
|
-
# we may be kicked out of the consumer group due to not polling often enough
|
32
|
-
def on_before_enqueue
|
33
|
-
return unless topic.long_running_job?
|
34
|
-
|
35
|
-
# This ensures that when running LRJ with VP, things operate as expected run only once
|
36
|
-
# for all the virtual partitions collectively
|
37
|
-
coordinator.on_enqueued do
|
38
|
-
# Pause at the first message in a batch. That way in case of a crash, we will not loose
|
39
|
-
# any messages.
|
40
|
-
#
|
41
|
-
# For VP it applies the same way and since VP cannot be used with MOM we should not have
|
42
|
-
# any edge cases here.
|
43
|
-
pause(coordinator.seek_offset, MAX_PAUSE_TIME)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
# Runs extra logic after consumption that is related to handling long-running jobs
|
48
|
-
# @note This overwrites the '#on_after_consume' from the base consumer
|
49
|
-
def on_after_consume
|
50
|
-
coordinator.on_finished do |last_group_message|
|
51
|
-
on_after_consume_regular(last_group_message)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
# Trigger method for running on partition revocation.
|
56
|
-
#
|
57
|
-
# @private
|
58
|
-
def on_revoked
|
59
|
-
# We do not want to resume on revocation in case of a LRJ.
|
60
|
-
# For LRJ we resume after the successful processing or do a backoff pause in case of a
|
61
|
-
# failure. Double non-blocking resume could cause problems in coordination.
|
62
|
-
resume unless topic.long_running_job?
|
63
|
-
|
64
|
-
coordinator.revoke
|
65
|
-
|
66
|
-
Karafka.monitor.instrument('consumer.revoked', caller: self) do
|
67
|
-
revoked
|
68
|
-
end
|
69
|
-
rescue StandardError => e
|
70
|
-
Karafka.monitor.instrument(
|
71
|
-
'error.occurred',
|
72
|
-
error: e,
|
73
|
-
caller: self,
|
74
|
-
type: 'consumer.revoked.error'
|
75
|
-
)
|
76
|
-
end
|
77
|
-
|
78
|
-
private
|
79
|
-
|
80
|
-
# Handles the post-consumption flow depending on topic settings
|
81
|
-
#
|
82
|
-
# @param last_group_message [Karafka::Messages::Message]
|
83
|
-
def on_after_consume_regular(last_group_message)
|
84
|
-
if coordinator.success?
|
85
|
-
coordinator.pause_tracker.reset
|
86
|
-
|
87
|
-
# We use the non-blocking one here. If someone needs the blocking one, can implement it
|
88
|
-
# with manual offset management
|
89
|
-
# Mark as consumed only if manual offset management is not on
|
90
|
-
mark_as_consumed(last_group_message) unless topic.manual_offset_management? || revoked?
|
91
|
-
|
92
|
-
# If this is not a long-running job there is nothing for us to do here
|
93
|
-
return unless topic.long_running_job?
|
94
|
-
|
95
|
-
seek(coordinator.seek_offset) unless revoked?
|
96
|
-
|
97
|
-
resume
|
98
|
-
else
|
99
|
-
# If processing failed, we need to pause
|
100
|
-
# For long running job this will overwrite the default never-ending pause and will cause
|
101
|
-
# the processing to keep going after the error backoff
|
102
|
-
pause(coordinator.seek_offset)
|
103
|
-
end
|
104
|
-
end
|
105
23
|
end
|
106
24
|
end
|
107
25
|
end
|
data/lib/karafka/pro/loader.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# This Karafka component is a Pro component.
|
3
|
+
# This Karafka component is a Pro component under a commercial license.
|
4
|
+
# This Karafka component is NOT licensed under LGPL.
|
5
|
+
#
|
4
6
|
# All of the commercial components are present in the lib/karafka/pro directory of this
|
5
7
|
# repository and their usage requires commercial license agreement.
|
6
8
|
#
|
@@ -13,37 +15,41 @@ module Karafka
|
|
13
15
|
module Pro
|
14
16
|
# Loader requires and loads all the pro components only when they are needed
|
15
17
|
class Loader
|
16
|
-
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
processing/scheduler
|
21
|
-
processing/jobs/consume_non_blocking
|
22
|
-
processing/jobs_builder
|
23
|
-
processing/coordinator
|
24
|
-
processing/partitioner
|
25
|
-
contracts/base
|
26
|
-
contracts/consumer_group
|
27
|
-
contracts/consumer_group_topic
|
28
|
-
routing/topic_extensions
|
29
|
-
routing/builder_extensions
|
30
|
-
active_job/consumer
|
18
|
+
# There seems to be a conflict in between using two Zeitwerk instances and it makes lookups
|
19
|
+
# for nested namespaces instead of creating them.
|
20
|
+
# We require those not to deal with this and then all works as expected
|
21
|
+
FORCE_LOADED = %w[
|
31
22
|
active_job/dispatcher
|
32
|
-
|
23
|
+
processing/jobs/consume_non_blocking
|
24
|
+
processing/strategies/base
|
25
|
+
routing/features/base
|
33
26
|
].freeze
|
34
27
|
|
35
|
-
|
28
|
+
# Zeitwerk pro loader
|
29
|
+
# We need to have one per process, that's why it's set as a constant
|
30
|
+
PRO_LOADER = Zeitwerk::Loader.new
|
31
|
+
|
32
|
+
private_constant :PRO_LOADER
|
36
33
|
|
37
34
|
class << self
|
35
|
+
# Requires all the components without using them anywhere
|
36
|
+
def require_all
|
37
|
+
FORCE_LOADED.each { |file| require_relative(file) }
|
38
|
+
|
39
|
+
PRO_LOADER.push_dir(Karafka.core_root.join('pro'), namespace: Karafka::Pro)
|
40
|
+
PRO_LOADER.setup
|
41
|
+
PRO_LOADER.eager_load
|
42
|
+
end
|
43
|
+
|
38
44
|
# Loads all the pro components and configures them wherever it is expected
|
39
45
|
# @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
|
40
46
|
# components
|
41
47
|
def setup(config)
|
42
|
-
|
48
|
+
require_all
|
43
49
|
|
44
50
|
reconfigure(config)
|
45
51
|
|
46
|
-
|
52
|
+
load_topic_features
|
47
53
|
end
|
48
54
|
|
49
55
|
private
|
@@ -57,6 +63,7 @@ module Karafka
|
|
57
63
|
icfg.processing.partitioner_class = Processing::Partitioner
|
58
64
|
icfg.processing.scheduler = Processing::Scheduler.new
|
59
65
|
icfg.processing.jobs_builder = Processing::JobsBuilder.new
|
66
|
+
icfg.processing.strategy_selector = Processing::StrategySelector.new
|
60
67
|
|
61
68
|
icfg.active_job.consumer_class = ActiveJob::Consumer
|
62
69
|
icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
|
@@ -65,10 +72,10 @@ module Karafka
|
|
65
72
|
config.monitor.subscribe(PerformanceTracker.instance)
|
66
73
|
end
|
67
74
|
|
68
|
-
# Loads
|
69
|
-
|
70
|
-
|
71
|
-
::Karafka::Routing::
|
75
|
+
# Loads the Pro features of Karafka
|
76
|
+
# @note Object space lookup is not the fastest but we do it once during boot, so it's ok
|
77
|
+
def load_topic_features
|
78
|
+
::Karafka::Pro::Routing::Features::Base.load_all
|
72
79
|
end
|
73
80
|
end
|
74
81
|
end
|