karafka 2.0.15 → 2.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +1 -1
  4. data/.rspec +2 -0
  5. data/CHANGELOG.md +78 -0
  6. data/Gemfile.lock +14 -14
  7. data/LICENSE +1 -1
  8. data/README.md +2 -1
  9. data/bin/integrations +3 -2
  10. data/bin/rspecs +4 -0
  11. data/config/errors.yml +10 -4
  12. data/lib/active_job/karafka.rb +0 -6
  13. data/lib/karafka/active_job/consumer.rb +1 -0
  14. data/lib/karafka/admin.rb +2 -2
  15. data/lib/karafka/base_consumer.rb +31 -21
  16. data/lib/karafka/connection/listener.rb +6 -4
  17. data/lib/karafka/contracts/consumer_group.rb +0 -14
  18. data/lib/karafka/contracts/{consumer_group_topic.rb → topic.rb} +2 -3
  19. data/lib/karafka/errors.rb +6 -4
  20. data/lib/karafka/instrumentation/logger_listener.rb +25 -11
  21. data/lib/karafka/instrumentation/notifications.rb +2 -0
  22. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -1
  23. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +37 -32
  24. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +153 -0
  25. data/lib/karafka/pro/active_job/consumer.rb +3 -1
  26. data/lib/karafka/pro/active_job/dispatcher.rb +3 -1
  27. data/lib/karafka/pro/active_job/job_options_contract.rb +3 -1
  28. data/lib/karafka/pro/base_consumer.rb +3 -85
  29. data/lib/karafka/pro/loader.rb +31 -24
  30. data/lib/karafka/pro/performance_tracker.rb +3 -1
  31. data/lib/karafka/pro/processing/coordinator.rb +16 -1
  32. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +3 -1
  33. data/lib/karafka/pro/processing/jobs_builder.rb +3 -1
  34. data/lib/karafka/pro/processing/partitioner.rb +3 -1
  35. data/lib/karafka/pro/processing/scheduler.rb +3 -1
  36. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +40 -0
  37. data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +62 -0
  38. data/lib/karafka/pro/processing/strategies/aj_lrj_mom.rb +35 -0
  39. data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +69 -0
  40. data/lib/karafka/pro/processing/strategies/aj_mom.rb +33 -0
  41. data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +58 -0
  42. data/lib/karafka/pro/processing/strategies/base.rb +26 -0
  43. data/lib/karafka/pro/processing/strategies/default.rb +69 -0
  44. data/lib/karafka/pro/processing/strategies/dlq.rb +88 -0
  45. data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +64 -0
  46. data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +60 -0
  47. data/lib/karafka/pro/processing/strategies/dlq_mom.rb +58 -0
  48. data/lib/karafka/pro/processing/strategies/lrj.rb +76 -0
  49. data/lib/karafka/pro/processing/strategies/lrj_mom.rb +68 -0
  50. data/lib/karafka/pro/processing/strategies/lrj_vp.rb +33 -0
  51. data/lib/karafka/pro/processing/strategies/mom.rb +43 -0
  52. data/lib/karafka/pro/processing/strategies/vp.rb +32 -0
  53. data/lib/karafka/pro/processing/strategy_selector.rb +58 -0
  54. data/lib/karafka/pro/{contracts → routing/features}/base.rb +8 -5
  55. data/lib/karafka/pro/routing/features/dead_letter_queue/contract.rb +49 -0
  56. data/lib/karafka/pro/routing/{builder_extensions.rb → features/dead_letter_queue.rb} +9 -12
  57. data/lib/karafka/pro/routing/features/long_running_job/config.rb +28 -0
  58. data/lib/karafka/pro/routing/features/long_running_job/contract.rb +37 -0
  59. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +42 -0
  60. data/lib/karafka/pro/routing/features/long_running_job.rb +28 -0
  61. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +30 -0
  62. data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +69 -0
  63. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +56 -0
  64. data/lib/karafka/pro/routing/features/virtual_partitions.rb +27 -0
  65. data/lib/karafka/processing/coordinator.rb +1 -1
  66. data/lib/karafka/processing/executor.rb +6 -0
  67. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +44 -0
  68. data/lib/karafka/processing/strategies/aj_mom.rb +21 -0
  69. data/lib/karafka/processing/strategies/base.rb +37 -0
  70. data/lib/karafka/processing/strategies/default.rb +52 -0
  71. data/lib/karafka/processing/strategies/dlq.rb +77 -0
  72. data/lib/karafka/processing/strategies/dlq_mom.rb +42 -0
  73. data/lib/karafka/processing/strategies/mom.rb +29 -0
  74. data/lib/karafka/processing/strategy_selector.rb +30 -0
  75. data/lib/karafka/railtie.rb +9 -8
  76. data/lib/karafka/routing/builder.rb +6 -0
  77. data/lib/karafka/routing/features/active_job/builder.rb +33 -0
  78. data/lib/karafka/routing/features/active_job/config.rb +15 -0
  79. data/lib/karafka/routing/features/active_job/contract.rb +41 -0
  80. data/lib/karafka/routing/features/active_job/topic.rb +33 -0
  81. data/lib/karafka/routing/features/active_job.rb +13 -0
  82. data/lib/karafka/routing/features/base/expander.rb +53 -0
  83. data/lib/karafka/routing/features/base.rb +34 -0
  84. data/lib/karafka/routing/features/dead_letter_queue/config.rb +19 -0
  85. data/lib/karafka/routing/features/dead_letter_queue/contract.rb +40 -0
  86. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +40 -0
  87. data/lib/karafka/routing/features/dead_letter_queue.rb +16 -0
  88. data/lib/karafka/routing/features/manual_offset_management/config.rb +15 -0
  89. data/lib/karafka/routing/features/manual_offset_management/contract.rb +24 -0
  90. data/lib/karafka/routing/features/manual_offset_management/topic.rb +35 -0
  91. data/lib/karafka/routing/features/manual_offset_management.rb +18 -0
  92. data/lib/karafka/routing/topic.rb +2 -10
  93. data/lib/karafka/server.rb +4 -2
  94. data/lib/karafka/setup/attributes_map.rb +5 -0
  95. data/lib/karafka/setup/config.rb +4 -4
  96. data/lib/karafka/time_trackers/pause.rb +21 -12
  97. data/lib/karafka/version.rb +1 -1
  98. data/lib/karafka.rb +7 -11
  99. data.tar.gz.sig +0 -0
  100. metadata +57 -9
  101. metadata.gz.sig +0 -0
  102. data/lib/karafka/active_job/routing/extensions.rb +0 -33
  103. data/lib/karafka/pro/contracts/consumer_group.rb +0 -34
  104. data/lib/karafka/pro/contracts/consumer_group_topic.rb +0 -69
  105. data/lib/karafka/pro/routing/topic_extensions.rb +0 -74
@@ -1 +1 @@
1
- {"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":5}},{"id":7288186528768428,"definition":{"title":"Topics overview","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":1533435157804573,"definition":{"title":"Topics lags","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.consumer.lags{*} by {partition,topic}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":6,"height":2}},{"id":1411506453982604,"definition":{"title":"Topics lag trends","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.consumer.lags_delta{*} by {partition,topic}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":6,"y":0,"width":6,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3,"is_column_break":true}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"avg:karafka.listener.polling.time_taken.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"avg:karafka.listener.polling.time_taken.max{*}","data_source":"metrics","name":"query2"},{"query":"avg:karafka.listener.polling.time_taken.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":7}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
1
+ {"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"queries":[{"query":"sum:karafka.connection.connects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"queries":[{"query":"sum:karafka.error_occurred{*} by {type,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.revoked{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":5}},{"id":7288186528768428,"definition":{"title":"Topics overview","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":1533435157804573,"definition":{"title":"Topics lags","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages lag per topic partition","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":6,"height":2}},{"id":1411506453982604,"definition":{"title":"Topics lag trends","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic partition consumption trend","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags_delta{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":6,"y":0,"width":6,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3,"is_column_break":true}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic batches processed","formula":"query1"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"queries":[{"query":"max:karafka.consumer.consumption_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Lag in ms (p95)","formula":"query1"},{"alias":"Lag in ms (max)","formula":"query2"},{"alias":"Lag in ms (avg)","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 time","formula":"query1"},{"alias":"max time","formula":"query2"},{"alias":"avg time","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms","formula":"query1"},{"alias":"max ms","formula":"query2"},{"alias":"average ms","formula":"query3"}],"queries":[{"query":"avg:karafka.listener.polling.time_taken.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"avg:karafka.listener.polling.time_taken.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"avg:karafka.listener.polling.time_taken.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":7}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
@@ -72,9 +72,12 @@ module Karafka
72
72
  # @param event [Karafka::Core::Monitoring::Event]
73
73
  def on_statistics_emitted(event)
74
74
  statistics = event[:statistics]
75
+ consumer_group_id = event[:consumer_group_id]
76
+
77
+ base_tags = default_tags + ["consumer_group:#{consumer_group_id}"]
75
78
 
76
79
  rd_kafka_metrics.each do |metric|
77
- report_metric(metric, statistics)
80
+ report_metric(metric, statistics, base_tags)
78
81
  end
79
82
  end
80
83
 
@@ -85,12 +88,7 @@ module Karafka
85
88
  extra_tags = ["type:#{event[:type]}"]
86
89
 
87
90
  if event.payload[:caller].respond_to?(:messages)
88
- metadata = event.payload[:caller].messages.metadata
89
-
90
- extra_tags += [
91
- "topic:#{metadata.topic}",
92
- "partition:#{metadata.partition}"
93
- ]
91
+ extra_tags += consumer_tags(event.payload[:caller])
94
92
  end
95
93
 
96
94
  count('error_occurred', 1, tags: default_tags + extra_tags)
@@ -103,21 +101,23 @@ module Karafka
103
101
  time_taken = event[:time]
104
102
  messages_count = event[:messages_buffer].size
105
103
 
106
- histogram('listener.polling.time_taken', time_taken, tags: default_tags)
107
- histogram('listener.polling.messages', messages_count, tags: default_tags)
104
+ consumer_group_id = event[:subscription_group].consumer_group_id
105
+
106
+ extra_tags = ["consumer_group:#{consumer_group_id}"]
107
+
108
+ histogram('listener.polling.time_taken', time_taken, tags: default_tags + extra_tags)
109
+ histogram('listener.polling.messages', messages_count, tags: default_tags + extra_tags)
108
110
  end
109
111
 
110
112
  # Here we report majority of things related to processing as we have access to the
111
113
  # consumer
112
114
  # @param event [Karafka::Core::Monitoring::Event]
113
115
  def on_consumer_consumed(event)
114
- messages = event.payload[:caller].messages
116
+ consumer = event.payload[:caller]
117
+ messages = consumer.messages
115
118
  metadata = messages.metadata
116
119
 
117
- tags = default_tags + [
118
- "topic:#{metadata.topic}",
119
- "partition:#{metadata.partition}"
120
- ]
120
+ tags = default_tags + consumer_tags(consumer)
121
121
 
122
122
  count('consumer.messages', messages.count, tags: tags)
123
123
  count('consumer.batches', 1, tags: tags)
@@ -130,26 +130,14 @@ module Karafka
130
130
 
131
131
  # @param event [Karafka::Core::Monitoring::Event]
132
132
  def on_consumer_revoked(event)
133
- messages = event.payload[:caller].messages
134
- metadata = messages.metadata
135
-
136
- tags = default_tags + [
137
- "topic:#{metadata.topic}",
138
- "partition:#{metadata.partition}"
139
- ]
133
+ tags = default_tags + consumer_tags(event.payload[:caller])
140
134
 
141
135
  count('consumer.revoked', 1, tags: tags)
142
136
  end
143
137
 
144
138
  # @param event [Karafka::Core::Monitoring::Event]
145
139
  def on_consumer_shutdown(event)
146
- messages = event.payload[:caller].messages
147
- metadata = messages.metadata
148
-
149
- tags = default_tags + [
150
- "topic:#{metadata.topic}",
151
- "partition:#{metadata.partition}"
152
- ]
140
+ tags = default_tags + consumer_tags(event.payload[:caller])
153
141
 
154
142
  count('consumer.shutdown', 1, tags: tags)
155
143
  end
@@ -202,14 +190,15 @@ module Karafka
202
190
  # Reports a given metric statistics to Datadog
203
191
  # @param metric [RdKafkaMetric] metric value object
204
192
  # @param statistics [Hash] hash with all the statistics emitted
205
- def report_metric(metric, statistics)
193
+ # @param base_tags [Array<String>] base tags we want to start with
194
+ def report_metric(metric, statistics, base_tags)
206
195
  case metric.scope
207
196
  when :root
208
197
  public_send(
209
198
  metric.type,
210
199
  metric.name,
211
200
  statistics.fetch(*metric.key_location),
212
- tags: default_tags
201
+ tags: base_tags
213
202
  )
214
203
  when :brokers
215
204
  statistics.fetch('brokers').each_value do |broker_statistics|
@@ -222,7 +211,7 @@ module Karafka
222
211
  metric.type,
223
212
  metric.name,
224
213
  broker_statistics.dig(*metric.key_location),
225
- tags: default_tags + ["broker:#{broker_statistics['nodename']}"]
214
+ tags: base_tags + ["broker:#{broker_statistics['nodename']}"]
226
215
  )
227
216
  end
228
217
  when :topics
@@ -236,7 +225,7 @@ module Karafka
236
225
  metric.type,
237
226
  metric.name,
238
227
  partition_statistics.dig(*metric.key_location),
239
- tags: default_tags + [
228
+ tags: base_tags + [
240
229
  "topic:#{topic_name}",
241
230
  "partition:#{partition_name}"
242
231
  ]
@@ -247,6 +236,22 @@ module Karafka
247
236
  raise ArgumentError, metric.scope
248
237
  end
249
238
  end
239
+
240
+ # Builds basic per consumer tags for publication
241
+ #
242
+ # @param consumer [Karafka::BaseConsumer]
243
+ # @return [Array<String>]
244
+ def consumer_tags(consumer)
245
+ messages = consumer.messages
246
+ metadata = messages.metadata
247
+ consumer_group_id = consumer.topic.consumer_group.id
248
+
249
+ [
250
+ "topic:#{metadata.topic}",
251
+ "partition:#{metadata.partition}",
252
+ "consumer_group:#{consumer_group_id}"
253
+ ]
254
+ end
250
255
  end
251
256
  end
252
257
  end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Namespace for vendor specific instrumentation
6
+ module Vendors
7
+ # Datadog specific instrumentation
8
+ module Datadog
9
+ # A karafka's logger listener for Datadog
10
+ # It depends on the 'ddtrace' gem
11
+ class LoggerListener
12
+ include ::Karafka::Core::Configurable
13
+ extend Forwardable
14
+
15
+ def_delegators :config, :client
16
+
17
+ # `Datadog::Tracing` client that we should use to trace stuff
18
+ setting :client
19
+
20
+ configure
21
+
22
+ # Log levels that we use in this particular listener
23
+ USED_LOG_LEVELS = %i[
24
+ info
25
+ error
26
+ fatal
27
+ ].freeze
28
+
29
+ private_constant :USED_LOG_LEVELS
30
+
31
+ # @param block [Proc] configuration block
32
+ def initialize(&block)
33
+ configure
34
+ setup(&block) if block
35
+ end
36
+
37
+ # @param block [Proc] configuration block
38
+ # @note We define this alias to be consistent with `WaterDrop#setup`
39
+ def setup(&block)
40
+ configure(&block)
41
+ end
42
+
43
+ # Prints info about the fact that a given job has started
44
+ #
45
+ # @param event [Dry::Events::Event] event details including payload
46
+ def on_worker_process(event)
47
+ current_span = client.trace('karafka.consumer')
48
+ push_tags
49
+
50
+ job = event[:job]
51
+ job_type = job.class.to_s.split('::').last
52
+ consumer = job.executor.topic.consumer
53
+ topic = job.executor.topic.name
54
+
55
+ current_span.resource = "#{consumer}#consume"
56
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
57
+
58
+ pop_tags
59
+ end
60
+
61
+ # Prints info about the fact that a given job has finished
62
+ #
63
+ # @param event [Dry::Events::Event] event details including payload
64
+ def on_worker_processed(event)
65
+ push_tags
66
+
67
+ job = event[:job]
68
+ time = event[:time]
69
+ job_type = job.class.to_s.split('::').last
70
+ consumer = job.executor.topic.consumer
71
+ topic = job.executor.topic.name
72
+
73
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
74
+
75
+ current_span = client.active_span
76
+ current_span.finish if current_span.present?
77
+
78
+ pop_tags
79
+ end
80
+
81
+ # There are many types of errors that can occur in many places, but we provide a single
82
+ # handler for all of them to simplify error instrumentation.
83
+ # @param event [Dry::Events::Event] event details including payload
84
+ def on_error_occurred(event)
85
+ push_tags
86
+
87
+ error = event[:error]
88
+ client.active_span&.set_error(error)
89
+
90
+ case event[:type]
91
+ when 'consumer.consume.error'
92
+ error "Consumer consuming error: #{error}"
93
+ when 'consumer.revoked.error'
94
+ error "Consumer on revoked failed due to an error: #{error}"
95
+ when 'consumer.before_enqueue.error'
96
+ error "Consumer before enqueue failed due to an error: #{error}"
97
+ when 'consumer.before_consume.error'
98
+ error "Consumer before consume failed due to an error: #{error}"
99
+ when 'consumer.after_consume.error'
100
+ error "Consumer after consume failed due to an error: #{error}"
101
+ when 'consumer.shutdown.error'
102
+ error "Consumer on shutdown failed due to an error: #{error}"
103
+ when 'worker.process.error'
104
+ fatal "Worker processing failed due to an error: #{error}"
105
+ when 'connection.listener.fetch_loop.error'
106
+ error "Listener fetch loop error: #{error}"
107
+ when 'runner.call.error'
108
+ fatal "Runner crashed due to an error: #{error}"
109
+ when 'app.stopping.error'
110
+ error 'Forceful Karafka server stop'
111
+ when 'librdkafka.error'
112
+ error "librdkafka internal error occurred: #{error}"
113
+ # Those will only occur when retries in the client fail and when they did not stop
114
+ # after back-offs
115
+ when 'connection.client.poll.error'
116
+ error "Data polling error occurred: #{error}"
117
+ else
118
+ pop_tags
119
+ # This should never happen. Please contact the maintainers
120
+ raise Errors::UnsupportedCaseError, event
121
+ end
122
+
123
+ pop_tags
124
+ end
125
+
126
+ USED_LOG_LEVELS.each do |log_level|
127
+ define_method log_level do |*args|
128
+ Karafka.logger.send(log_level, *args)
129
+ end
130
+ end
131
+
132
+ # Pushes datadog's tags to the logger
133
+ # This is required when tracing log lines asynchronously to correlate logs of the same
134
+ # process together
135
+ def push_tags
136
+ return unless Karafka.logger.respond_to?(:push_tags)
137
+
138
+ Karafka.logger.push_tags(client.log_correlation)
139
+ end
140
+
141
+ # Pops datadog's tags from the logger
142
+ # This is required when tracing log lines asynchronously to avoid the logs of the
143
+ # different processes to be correlated
144
+ def pop_tags
145
+ return unless Karafka.logger.respond_to?(:pop_tags)
146
+
147
+ Karafka.logger.pop_tags
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -18,90 +20,6 @@ module Karafka
18
20
  # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
19
21
  # after each batch is processed.
20
22
  class BaseConsumer < Karafka::BaseConsumer
21
- # Pause for tops 31 years
22
- MAX_PAUSE_TIME = 1_000_000_000_000
23
-
24
- private_constant :MAX_PAUSE_TIME
25
-
26
- # Pauses processing of a given partition until we're done with the processing.
27
- # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
- # @note This needs to happen in the listener thread, because we cannot wait on this being
29
- # executed in the workers. Workers may be already running some LRJ jobs that are blocking
30
- # all the threads until finished, yet unless we pause the incoming partitions information,
31
- # we may be kicked out of the consumer group due to not polling often enough
32
- def on_before_enqueue
33
- return unless topic.long_running_job?
34
-
35
- # This ensures that when running LRJ with VP, things operate as expected run only once
36
- # for all the virtual partitions collectively
37
- coordinator.on_enqueued do
38
- # Pause at the first message in a batch. That way in case of a crash, we will not loose
39
- # any messages.
40
- #
41
- # For VP it applies the same way and since VP cannot be used with MOM we should not have
42
- # any edge cases here.
43
- pause(coordinator.seek_offset, MAX_PAUSE_TIME)
44
- end
45
- end
46
-
47
- # Runs extra logic after consumption that is related to handling long-running jobs
48
- # @note This overwrites the '#on_after_consume' from the base consumer
49
- def on_after_consume
50
- coordinator.on_finished do |last_group_message|
51
- on_after_consume_regular(last_group_message)
52
- end
53
- end
54
-
55
- # Trigger method for running on partition revocation.
56
- #
57
- # @private
58
- def on_revoked
59
- # We do not want to resume on revocation in case of a LRJ.
60
- # For LRJ we resume after the successful processing or do a backoff pause in case of a
61
- # failure. Double non-blocking resume could cause problems in coordination.
62
- resume unless topic.long_running_job?
63
-
64
- coordinator.revoke
65
-
66
- Karafka.monitor.instrument('consumer.revoked', caller: self) do
67
- revoked
68
- end
69
- rescue StandardError => e
70
- Karafka.monitor.instrument(
71
- 'error.occurred',
72
- error: e,
73
- caller: self,
74
- type: 'consumer.revoked.error'
75
- )
76
- end
77
-
78
- private
79
-
80
- # Handles the post-consumption flow depending on topic settings
81
- #
82
- # @param last_group_message [Karafka::Messages::Message]
83
- def on_after_consume_regular(last_group_message)
84
- if coordinator.success?
85
- coordinator.pause_tracker.reset
86
-
87
- # We use the non-blocking one here. If someone needs the blocking one, can implement it
88
- # with manual offset management
89
- # Mark as consumed only if manual offset management is not on
90
- mark_as_consumed(last_group_message) unless topic.manual_offset_management? || revoked?
91
-
92
- # If this is not a long-running job there is nothing for us to do here
93
- return unless topic.long_running_job?
94
-
95
- seek(coordinator.seek_offset) unless revoked?
96
-
97
- resume
98
- else
99
- # If processing failed, we need to pause
100
- # For long running job this will overwrite the default never-ending pause and will cause
101
- # the processing to keep going after the error backoff
102
- pause(coordinator.seek_offset)
103
- end
104
- end
105
23
  end
106
24
  end
107
25
  end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -13,37 +15,41 @@ module Karafka
13
15
  module Pro
14
16
  # Loader requires and loads all the pro components only when they are needed
15
17
  class Loader
16
- # All the pro components that need to be loaded
17
- COMPONENTS = %w[
18
- base_consumer
19
- performance_tracker
20
- processing/scheduler
21
- processing/jobs/consume_non_blocking
22
- processing/jobs_builder
23
- processing/coordinator
24
- processing/partitioner
25
- contracts/base
26
- contracts/consumer_group
27
- contracts/consumer_group_topic
28
- routing/topic_extensions
29
- routing/builder_extensions
30
- active_job/consumer
18
+ # There seems to be a conflict in between using two Zeitwerk instances and it makes lookups
19
+ # for nested namespaces instead of creating them.
20
+ # We require those not to deal with this and then all works as expected
21
+ FORCE_LOADED = %w[
31
22
  active_job/dispatcher
32
- active_job/job_options_contract
23
+ processing/jobs/consume_non_blocking
24
+ processing/strategies/base
25
+ routing/features/base
33
26
  ].freeze
34
27
 
35
- private_constant :COMPONENTS
28
+ # Zeitwerk pro loader
29
+ # We need to have one per process, that's why it's set as a constant
30
+ PRO_LOADER = Zeitwerk::Loader.new
31
+
32
+ private_constant :PRO_LOADER
36
33
 
37
34
  class << self
35
+ # Requires all the components without using them anywhere
36
+ def require_all
37
+ FORCE_LOADED.each { |file| require_relative(file) }
38
+
39
+ PRO_LOADER.push_dir(Karafka.core_root.join('pro'), namespace: Karafka::Pro)
40
+ PRO_LOADER.setup
41
+ PRO_LOADER.eager_load
42
+ end
43
+
38
44
  # Loads all the pro components and configures them wherever it is expected
39
45
  # @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
40
46
  # components
41
47
  def setup(config)
42
- COMPONENTS.each { |component| require_relative(component) }
48
+ require_all
43
49
 
44
50
  reconfigure(config)
45
51
 
46
- load_routing_extensions
52
+ load_topic_features
47
53
  end
48
54
 
49
55
  private
@@ -57,6 +63,7 @@ module Karafka
57
63
  icfg.processing.partitioner_class = Processing::Partitioner
58
64
  icfg.processing.scheduler = Processing::Scheduler.new
59
65
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
66
+ icfg.processing.strategy_selector = Processing::StrategySelector.new
60
67
 
61
68
  icfg.active_job.consumer_class = ActiveJob::Consumer
62
69
  icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
@@ -65,10 +72,10 @@ module Karafka
65
72
  config.monitor.subscribe(PerformanceTracker.instance)
66
73
  end
67
74
 
68
- # Loads routing extensions
69
- def load_routing_extensions
70
- ::Karafka::Routing::Topic.prepend(Routing::TopicExtensions)
71
- ::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
75
+ # Loads the Pro features of Karafka
76
+ # @note Object space lookup is not the fastest but we do it once during boot, so it's ok
77
+ def load_topic_features
78
+ ::Karafka::Pro::Routing::Features::Base.load_all
72
79
  end
73
80
  end
74
81
  end