karafka 2.0.15 → 2.0.16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +1 -1
  4. data/.rspec +2 -0
  5. data/CHANGELOG.md +78 -0
  6. data/Gemfile.lock +14 -14
  7. data/LICENSE +1 -1
  8. data/README.md +2 -1
  9. data/bin/integrations +3 -2
  10. data/bin/rspecs +4 -0
  11. data/config/errors.yml +10 -4
  12. data/lib/active_job/karafka.rb +0 -6
  13. data/lib/karafka/active_job/consumer.rb +1 -0
  14. data/lib/karafka/admin.rb +2 -2
  15. data/lib/karafka/base_consumer.rb +31 -21
  16. data/lib/karafka/connection/listener.rb +6 -4
  17. data/lib/karafka/contracts/consumer_group.rb +0 -14
  18. data/lib/karafka/contracts/{consumer_group_topic.rb → topic.rb} +2 -3
  19. data/lib/karafka/errors.rb +6 -4
  20. data/lib/karafka/instrumentation/logger_listener.rb +25 -11
  21. data/lib/karafka/instrumentation/notifications.rb +2 -0
  22. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -1
  23. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +37 -32
  24. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +153 -0
  25. data/lib/karafka/pro/active_job/consumer.rb +3 -1
  26. data/lib/karafka/pro/active_job/dispatcher.rb +3 -1
  27. data/lib/karafka/pro/active_job/job_options_contract.rb +3 -1
  28. data/lib/karafka/pro/base_consumer.rb +3 -85
  29. data/lib/karafka/pro/loader.rb +31 -24
  30. data/lib/karafka/pro/performance_tracker.rb +3 -1
  31. data/lib/karafka/pro/processing/coordinator.rb +16 -1
  32. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +3 -1
  33. data/lib/karafka/pro/processing/jobs_builder.rb +3 -1
  34. data/lib/karafka/pro/processing/partitioner.rb +3 -1
  35. data/lib/karafka/pro/processing/scheduler.rb +3 -1
  36. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +40 -0
  37. data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +62 -0
  38. data/lib/karafka/pro/processing/strategies/aj_lrj_mom.rb +35 -0
  39. data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +69 -0
  40. data/lib/karafka/pro/processing/strategies/aj_mom.rb +33 -0
  41. data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +58 -0
  42. data/lib/karafka/pro/processing/strategies/base.rb +26 -0
  43. data/lib/karafka/pro/processing/strategies/default.rb +69 -0
  44. data/lib/karafka/pro/processing/strategies/dlq.rb +88 -0
  45. data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +64 -0
  46. data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +60 -0
  47. data/lib/karafka/pro/processing/strategies/dlq_mom.rb +58 -0
  48. data/lib/karafka/pro/processing/strategies/lrj.rb +76 -0
  49. data/lib/karafka/pro/processing/strategies/lrj_mom.rb +68 -0
  50. data/lib/karafka/pro/processing/strategies/lrj_vp.rb +33 -0
  51. data/lib/karafka/pro/processing/strategies/mom.rb +43 -0
  52. data/lib/karafka/pro/processing/strategies/vp.rb +32 -0
  53. data/lib/karafka/pro/processing/strategy_selector.rb +58 -0
  54. data/lib/karafka/pro/{contracts → routing/features}/base.rb +8 -5
  55. data/lib/karafka/pro/routing/features/dead_letter_queue/contract.rb +49 -0
  56. data/lib/karafka/pro/routing/{builder_extensions.rb → features/dead_letter_queue.rb} +9 -12
  57. data/lib/karafka/pro/routing/features/long_running_job/config.rb +28 -0
  58. data/lib/karafka/pro/routing/features/long_running_job/contract.rb +37 -0
  59. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +42 -0
  60. data/lib/karafka/pro/routing/features/long_running_job.rb +28 -0
  61. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +30 -0
  62. data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +69 -0
  63. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +56 -0
  64. data/lib/karafka/pro/routing/features/virtual_partitions.rb +27 -0
  65. data/lib/karafka/processing/coordinator.rb +1 -1
  66. data/lib/karafka/processing/executor.rb +6 -0
  67. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +44 -0
  68. data/lib/karafka/processing/strategies/aj_mom.rb +21 -0
  69. data/lib/karafka/processing/strategies/base.rb +37 -0
  70. data/lib/karafka/processing/strategies/default.rb +52 -0
  71. data/lib/karafka/processing/strategies/dlq.rb +77 -0
  72. data/lib/karafka/processing/strategies/dlq_mom.rb +42 -0
  73. data/lib/karafka/processing/strategies/mom.rb +29 -0
  74. data/lib/karafka/processing/strategy_selector.rb +30 -0
  75. data/lib/karafka/railtie.rb +9 -8
  76. data/lib/karafka/routing/builder.rb +6 -0
  77. data/lib/karafka/routing/features/active_job/builder.rb +33 -0
  78. data/lib/karafka/routing/features/active_job/config.rb +15 -0
  79. data/lib/karafka/routing/features/active_job/contract.rb +41 -0
  80. data/lib/karafka/routing/features/active_job/topic.rb +33 -0
  81. data/lib/karafka/routing/features/active_job.rb +13 -0
  82. data/lib/karafka/routing/features/base/expander.rb +53 -0
  83. data/lib/karafka/routing/features/base.rb +34 -0
  84. data/lib/karafka/routing/features/dead_letter_queue/config.rb +19 -0
  85. data/lib/karafka/routing/features/dead_letter_queue/contract.rb +40 -0
  86. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +40 -0
  87. data/lib/karafka/routing/features/dead_letter_queue.rb +16 -0
  88. data/lib/karafka/routing/features/manual_offset_management/config.rb +15 -0
  89. data/lib/karafka/routing/features/manual_offset_management/contract.rb +24 -0
  90. data/lib/karafka/routing/features/manual_offset_management/topic.rb +35 -0
  91. data/lib/karafka/routing/features/manual_offset_management.rb +18 -0
  92. data/lib/karafka/routing/topic.rb +2 -10
  93. data/lib/karafka/server.rb +4 -2
  94. data/lib/karafka/setup/attributes_map.rb +5 -0
  95. data/lib/karafka/setup/config.rb +4 -4
  96. data/lib/karafka/time_trackers/pause.rb +21 -12
  97. data/lib/karafka/version.rb +1 -1
  98. data/lib/karafka.rb +7 -11
  99. data.tar.gz.sig +0 -0
  100. metadata +57 -9
  101. metadata.gz.sig +0 -0
  102. data/lib/karafka/active_job/routing/extensions.rb +0 -33
  103. data/lib/karafka/pro/contracts/consumer_group.rb +0 -34
  104. data/lib/karafka/pro/contracts/consumer_group_topic.rb +0 -69
  105. data/lib/karafka/pro/routing/topic_extensions.rb +0 -74
@@ -1 +1 @@
1
- {"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":5}},{"id":7288186528768428,"definition":{"title":"Topics overview","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":1533435157804573,"definition":{"title":"Topics lags","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.consumer.lags{*} by {partition,topic}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":6,"height":2}},{"id":1411506453982604,"definition":{"title":"Topics lag trends","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.consumer.lags_delta{*} by {partition,topic}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":6,"y":0,"width":6,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3,"is_column_break":true}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"avg:karafka.listener.polling.time_taken.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"avg:karafka.listener.polling.time_taken.max{*}","data_source":"metrics","name":"query2"},{"query":"avg:karafka.listener.polling.time_taken.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":7}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
1
+ {"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"queries":[{"query":"sum:karafka.connection.connects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"queries":[{"query":"sum:karafka.error_occurred{*} by {type,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.revoked{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":5}},{"id":7288186528768428,"definition":{"title":"Topics overview","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":1533435157804573,"definition":{"title":"Topics lags","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages lag per topic partition","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":6,"height":2}},{"id":1411506453982604,"definition":{"title":"Topics lag trends","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic partition consumption trend","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags_delta{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":6,"y":0,"width":6,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3,"is_column_break":true}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic batches processed","formula":"query1"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"queries":[{"query":"max:karafka.consumer.consumption_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Lag in ms (p95)","formula":"query1"},{"alias":"Lag in ms (max)","formula":"query2"},{"alias":"Lag in ms (avg)","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 time","formula":"query1"},{"alias":"max time","formula":"query2"},{"alias":"avg time","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms","formula":"query1"},{"alias":"max ms","formula":"query2"},{"alias":"average ms","formula":"query3"}],"queries":[{"query":"avg:karafka.listener.polling.time_taken.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"avg:karafka.listener.polling.time_taken.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"avg:karafka.listener.polling.time_taken.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":7}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
@@ -72,9 +72,12 @@ module Karafka
72
72
  # @param event [Karafka::Core::Monitoring::Event]
73
73
  def on_statistics_emitted(event)
74
74
  statistics = event[:statistics]
75
+ consumer_group_id = event[:consumer_group_id]
76
+
77
+ base_tags = default_tags + ["consumer_group:#{consumer_group_id}"]
75
78
 
76
79
  rd_kafka_metrics.each do |metric|
77
- report_metric(metric, statistics)
80
+ report_metric(metric, statistics, base_tags)
78
81
  end
79
82
  end
80
83
 
@@ -85,12 +88,7 @@ module Karafka
85
88
  extra_tags = ["type:#{event[:type]}"]
86
89
 
87
90
  if event.payload[:caller].respond_to?(:messages)
88
- metadata = event.payload[:caller].messages.metadata
89
-
90
- extra_tags += [
91
- "topic:#{metadata.topic}",
92
- "partition:#{metadata.partition}"
93
- ]
91
+ extra_tags += consumer_tags(event.payload[:caller])
94
92
  end
95
93
 
96
94
  count('error_occurred', 1, tags: default_tags + extra_tags)
@@ -103,21 +101,23 @@ module Karafka
103
101
  time_taken = event[:time]
104
102
  messages_count = event[:messages_buffer].size
105
103
 
106
- histogram('listener.polling.time_taken', time_taken, tags: default_tags)
107
- histogram('listener.polling.messages', messages_count, tags: default_tags)
104
+ consumer_group_id = event[:subscription_group].consumer_group_id
105
+
106
+ extra_tags = ["consumer_group:#{consumer_group_id}"]
107
+
108
+ histogram('listener.polling.time_taken', time_taken, tags: default_tags + extra_tags)
109
+ histogram('listener.polling.messages', messages_count, tags: default_tags + extra_tags)
108
110
  end
109
111
 
110
112
  # Here we report majority of things related to processing as we have access to the
111
113
  # consumer
112
114
  # @param event [Karafka::Core::Monitoring::Event]
113
115
  def on_consumer_consumed(event)
114
- messages = event.payload[:caller].messages
116
+ consumer = event.payload[:caller]
117
+ messages = consumer.messages
115
118
  metadata = messages.metadata
116
119
 
117
- tags = default_tags + [
118
- "topic:#{metadata.topic}",
119
- "partition:#{metadata.partition}"
120
- ]
120
+ tags = default_tags + consumer_tags(consumer)
121
121
 
122
122
  count('consumer.messages', messages.count, tags: tags)
123
123
  count('consumer.batches', 1, tags: tags)
@@ -130,26 +130,14 @@ module Karafka
130
130
 
131
131
  # @param event [Karafka::Core::Monitoring::Event]
132
132
  def on_consumer_revoked(event)
133
- messages = event.payload[:caller].messages
134
- metadata = messages.metadata
135
-
136
- tags = default_tags + [
137
- "topic:#{metadata.topic}",
138
- "partition:#{metadata.partition}"
139
- ]
133
+ tags = default_tags + consumer_tags(event.payload[:caller])
140
134
 
141
135
  count('consumer.revoked', 1, tags: tags)
142
136
  end
143
137
 
144
138
  # @param event [Karafka::Core::Monitoring::Event]
145
139
  def on_consumer_shutdown(event)
146
- messages = event.payload[:caller].messages
147
- metadata = messages.metadata
148
-
149
- tags = default_tags + [
150
- "topic:#{metadata.topic}",
151
- "partition:#{metadata.partition}"
152
- ]
140
+ tags = default_tags + consumer_tags(event.payload[:caller])
153
141
 
154
142
  count('consumer.shutdown', 1, tags: tags)
155
143
  end
@@ -202,14 +190,15 @@ module Karafka
202
190
  # Reports a given metric statistics to Datadog
203
191
  # @param metric [RdKafkaMetric] metric value object
204
192
  # @param statistics [Hash] hash with all the statistics emitted
205
- def report_metric(metric, statistics)
193
+ # @param base_tags [Array<String>] base tags we want to start with
194
+ def report_metric(metric, statistics, base_tags)
206
195
  case metric.scope
207
196
  when :root
208
197
  public_send(
209
198
  metric.type,
210
199
  metric.name,
211
200
  statistics.fetch(*metric.key_location),
212
- tags: default_tags
201
+ tags: base_tags
213
202
  )
214
203
  when :brokers
215
204
  statistics.fetch('brokers').each_value do |broker_statistics|
@@ -222,7 +211,7 @@ module Karafka
222
211
  metric.type,
223
212
  metric.name,
224
213
  broker_statistics.dig(*metric.key_location),
225
- tags: default_tags + ["broker:#{broker_statistics['nodename']}"]
214
+ tags: base_tags + ["broker:#{broker_statistics['nodename']}"]
226
215
  )
227
216
  end
228
217
  when :topics
@@ -236,7 +225,7 @@ module Karafka
236
225
  metric.type,
237
226
  metric.name,
238
227
  partition_statistics.dig(*metric.key_location),
239
- tags: default_tags + [
228
+ tags: base_tags + [
240
229
  "topic:#{topic_name}",
241
230
  "partition:#{partition_name}"
242
231
  ]
@@ -247,6 +236,22 @@ module Karafka
247
236
  raise ArgumentError, metric.scope
248
237
  end
249
238
  end
239
+
240
+ # Builds basic per consumer tags for publication
241
+ #
242
+ # @param consumer [Karafka::BaseConsumer]
243
+ # @return [Array<String>]
244
+ def consumer_tags(consumer)
245
+ messages = consumer.messages
246
+ metadata = messages.metadata
247
+ consumer_group_id = consumer.topic.consumer_group.id
248
+
249
+ [
250
+ "topic:#{metadata.topic}",
251
+ "partition:#{metadata.partition}",
252
+ "consumer_group:#{consumer_group_id}"
253
+ ]
254
+ end
250
255
  end
251
256
  end
252
257
  end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Namespace for vendor specific instrumentation
6
+ module Vendors
7
+ # Datadog specific instrumentation
8
+ module Datadog
9
+ # A karafka's logger listener for Datadog
10
+ # It depends on the 'ddtrace' gem
11
+ class LoggerListener
12
+ include ::Karafka::Core::Configurable
13
+ extend Forwardable
14
+
15
+ def_delegators :config, :client
16
+
17
+ # `Datadog::Tracing` client that we should use to trace stuff
18
+ setting :client
19
+
20
+ configure
21
+
22
+ # Log levels that we use in this particular listener
23
+ USED_LOG_LEVELS = %i[
24
+ info
25
+ error
26
+ fatal
27
+ ].freeze
28
+
29
+ private_constant :USED_LOG_LEVELS
30
+
31
+ # @param block [Proc] configuration block
32
+ def initialize(&block)
33
+ configure
34
+ setup(&block) if block
35
+ end
36
+
37
+ # @param block [Proc] configuration block
38
+ # @note We define this alias to be consistent with `WaterDrop#setup`
39
+ def setup(&block)
40
+ configure(&block)
41
+ end
42
+
43
+ # Prints info about the fact that a given job has started
44
+ #
45
+ # @param event [Dry::Events::Event] event details including payload
46
+ def on_worker_process(event)
47
+ current_span = client.trace('karafka.consumer')
48
+ push_tags
49
+
50
+ job = event[:job]
51
+ job_type = job.class.to_s.split('::').last
52
+ consumer = job.executor.topic.consumer
53
+ topic = job.executor.topic.name
54
+
55
+ current_span.resource = "#{consumer}#consume"
56
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
57
+
58
+ pop_tags
59
+ end
60
+
61
+ # Prints info about the fact that a given job has finished
62
+ #
63
+ # @param event [Dry::Events::Event] event details including payload
64
+ def on_worker_processed(event)
65
+ push_tags
66
+
67
+ job = event[:job]
68
+ time = event[:time]
69
+ job_type = job.class.to_s.split('::').last
70
+ consumer = job.executor.topic.consumer
71
+ topic = job.executor.topic.name
72
+
73
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
74
+
75
+ current_span = client.active_span
76
+ current_span.finish if current_span.present?
77
+
78
+ pop_tags
79
+ end
80
+
81
+ # There are many types of errors that can occur in many places, but we provide a single
82
+ # handler for all of them to simplify error instrumentation.
83
+ # @param event [Dry::Events::Event] event details including payload
84
+ def on_error_occurred(event)
85
+ push_tags
86
+
87
+ error = event[:error]
88
+ client.active_span&.set_error(error)
89
+
90
+ case event[:type]
91
+ when 'consumer.consume.error'
92
+ error "Consumer consuming error: #{error}"
93
+ when 'consumer.revoked.error'
94
+ error "Consumer on revoked failed due to an error: #{error}"
95
+ when 'consumer.before_enqueue.error'
96
+ error "Consumer before enqueue failed due to an error: #{error}"
97
+ when 'consumer.before_consume.error'
98
+ error "Consumer before consume failed due to an error: #{error}"
99
+ when 'consumer.after_consume.error'
100
+ error "Consumer after consume failed due to an error: #{error}"
101
+ when 'consumer.shutdown.error'
102
+ error "Consumer on shutdown failed due to an error: #{error}"
103
+ when 'worker.process.error'
104
+ fatal "Worker processing failed due to an error: #{error}"
105
+ when 'connection.listener.fetch_loop.error'
106
+ error "Listener fetch loop error: #{error}"
107
+ when 'runner.call.error'
108
+ fatal "Runner crashed due to an error: #{error}"
109
+ when 'app.stopping.error'
110
+ error 'Forceful Karafka server stop'
111
+ when 'librdkafka.error'
112
+ error "librdkafka internal error occurred: #{error}"
113
+ # Those will only occur when retries in the client fail and when they did not stop
114
+ # after back-offs
115
+ when 'connection.client.poll.error'
116
+ error "Data polling error occurred: #{error}"
117
+ else
118
+ pop_tags
119
+ # This should never happen. Please contact the maintainers
120
+ raise Errors::UnsupportedCaseError, event
121
+ end
122
+
123
+ pop_tags
124
+ end
125
+
126
+ USED_LOG_LEVELS.each do |log_level|
127
+ define_method log_level do |*args|
128
+ Karafka.logger.send(log_level, *args)
129
+ end
130
+ end
131
+
132
+ # Pushes datadog's tags to the logger
133
+ # This is required when tracing log lines asynchronously to correlate logs of the same
134
+ # process together
135
+ def push_tags
136
+ return unless Karafka.logger.respond_to?(:push_tags)
137
+
138
+ Karafka.logger.push_tags(client.log_correlation)
139
+ end
140
+
141
+ # Pops datadog's tags from the logger
142
+ # This is required when tracing log lines asynchronously to avoid the logs of the
143
+ # different processes to be correlated
144
+ def pop_tags
145
+ return unless Karafka.logger.respond_to?(:pop_tags)
146
+
147
+ Karafka.logger.pop_tags
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -18,90 +20,6 @@ module Karafka
18
20
  # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
19
21
  # after each batch is processed.
20
22
  class BaseConsumer < Karafka::BaseConsumer
21
- # Pause for tops 31 years
22
- MAX_PAUSE_TIME = 1_000_000_000_000
23
-
24
- private_constant :MAX_PAUSE_TIME
25
-
26
- # Pauses processing of a given partition until we're done with the processing.
27
- # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
- # @note This needs to happen in the listener thread, because we cannot wait on this being
29
- # executed in the workers. Workers may be already running some LRJ jobs that are blocking
30
- # all the threads until finished, yet unless we pause the incoming partitions information,
31
- # we may be kicked out of the consumer group due to not polling often enough
32
- def on_before_enqueue
33
- return unless topic.long_running_job?
34
-
35
- # This ensures that when running LRJ with VP, things operate as expected run only once
36
- # for all the virtual partitions collectively
37
- coordinator.on_enqueued do
38
- # Pause at the first message in a batch. That way in case of a crash, we will not loose
39
- # any messages.
40
- #
41
- # For VP it applies the same way and since VP cannot be used with MOM we should not have
42
- # any edge cases here.
43
- pause(coordinator.seek_offset, MAX_PAUSE_TIME)
44
- end
45
- end
46
-
47
- # Runs extra logic after consumption that is related to handling long-running jobs
48
- # @note This overwrites the '#on_after_consume' from the base consumer
49
- def on_after_consume
50
- coordinator.on_finished do |last_group_message|
51
- on_after_consume_regular(last_group_message)
52
- end
53
- end
54
-
55
- # Trigger method for running on partition revocation.
56
- #
57
- # @private
58
- def on_revoked
59
- # We do not want to resume on revocation in case of a LRJ.
60
- # For LRJ we resume after the successful processing or do a backoff pause in case of a
61
- # failure. Double non-blocking resume could cause problems in coordination.
62
- resume unless topic.long_running_job?
63
-
64
- coordinator.revoke
65
-
66
- Karafka.monitor.instrument('consumer.revoked', caller: self) do
67
- revoked
68
- end
69
- rescue StandardError => e
70
- Karafka.monitor.instrument(
71
- 'error.occurred',
72
- error: e,
73
- caller: self,
74
- type: 'consumer.revoked.error'
75
- )
76
- end
77
-
78
- private
79
-
80
- # Handles the post-consumption flow depending on topic settings
81
- #
82
- # @param last_group_message [Karafka::Messages::Message]
83
- def on_after_consume_regular(last_group_message)
84
- if coordinator.success?
85
- coordinator.pause_tracker.reset
86
-
87
- # We use the non-blocking one here. If someone needs the blocking one, can implement it
88
- # with manual offset management
89
- # Mark as consumed only if manual offset management is not on
90
- mark_as_consumed(last_group_message) unless topic.manual_offset_management? || revoked?
91
-
92
- # If this is not a long-running job there is nothing for us to do here
93
- return unless topic.long_running_job?
94
-
95
- seek(coordinator.seek_offset) unless revoked?
96
-
97
- resume
98
- else
99
- # If processing failed, we need to pause
100
- # For long running job this will overwrite the default never-ending pause and will cause
101
- # the processing to keep going after the error backoff
102
- pause(coordinator.seek_offset)
103
- end
104
- end
105
23
  end
106
24
  end
107
25
  end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -13,37 +15,41 @@ module Karafka
13
15
  module Pro
14
16
  # Loader requires and loads all the pro components only when they are needed
15
17
  class Loader
16
- # All the pro components that need to be loaded
17
- COMPONENTS = %w[
18
- base_consumer
19
- performance_tracker
20
- processing/scheduler
21
- processing/jobs/consume_non_blocking
22
- processing/jobs_builder
23
- processing/coordinator
24
- processing/partitioner
25
- contracts/base
26
- contracts/consumer_group
27
- contracts/consumer_group_topic
28
- routing/topic_extensions
29
- routing/builder_extensions
30
- active_job/consumer
18
+ # There seems to be a conflict in between using two Zeitwerk instances and it makes lookups
19
+ # for nested namespaces instead of creating them.
20
+ # We require those not to deal with this and then all works as expected
21
+ FORCE_LOADED = %w[
31
22
  active_job/dispatcher
32
- active_job/job_options_contract
23
+ processing/jobs/consume_non_blocking
24
+ processing/strategies/base
25
+ routing/features/base
33
26
  ].freeze
34
27
 
35
- private_constant :COMPONENTS
28
+ # Zeitwerk pro loader
29
+ # We need to have one per process, that's why it's set as a constant
30
+ PRO_LOADER = Zeitwerk::Loader.new
31
+
32
+ private_constant :PRO_LOADER
36
33
 
37
34
  class << self
35
+ # Requires all the components without using them anywhere
36
+ def require_all
37
+ FORCE_LOADED.each { |file| require_relative(file) }
38
+
39
+ PRO_LOADER.push_dir(Karafka.core_root.join('pro'), namespace: Karafka::Pro)
40
+ PRO_LOADER.setup
41
+ PRO_LOADER.eager_load
42
+ end
43
+
38
44
  # Loads all the pro components and configures them wherever it is expected
39
45
  # @param config [Karafka::Core::Configurable::Node] app config that we can alter with pro
40
46
  # components
41
47
  def setup(config)
42
- COMPONENTS.each { |component| require_relative(component) }
48
+ require_all
43
49
 
44
50
  reconfigure(config)
45
51
 
46
- load_routing_extensions
52
+ load_topic_features
47
53
  end
48
54
 
49
55
  private
@@ -57,6 +63,7 @@ module Karafka
57
63
  icfg.processing.partitioner_class = Processing::Partitioner
58
64
  icfg.processing.scheduler = Processing::Scheduler.new
59
65
  icfg.processing.jobs_builder = Processing::JobsBuilder.new
66
+ icfg.processing.strategy_selector = Processing::StrategySelector.new
60
67
 
61
68
  icfg.active_job.consumer_class = ActiveJob::Consumer
62
69
  icfg.active_job.dispatcher = ActiveJob::Dispatcher.new
@@ -65,10 +72,10 @@ module Karafka
65
72
  config.monitor.subscribe(PerformanceTracker.instance)
66
73
  end
67
74
 
68
- # Loads routing extensions
69
- def load_routing_extensions
70
- ::Karafka::Routing::Topic.prepend(Routing::TopicExtensions)
71
- ::Karafka::Routing::Builder.prepend(Routing::BuilderExtensions)
75
+ # Loads the Pro features of Karafka
76
+ # @note Object space lookup is not the fastest but we do it once during boot, so it's ok
77
+ def load_topic_features
78
+ ::Karafka::Pro::Routing::Features::Base.load_all
72
79
  end
73
80
  end
74
81
  end