karafka 2.0.14 → 2.0.16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/.github/workflows/ci.yml +1 -1
  4. data/.rspec +2 -0
  5. data/CHANGELOG.md +84 -0
  6. data/Gemfile.lock +14 -14
  7. data/LICENSE +1 -1
  8. data/README.md +2 -1
  9. data/bin/integrations +3 -2
  10. data/bin/rspecs +4 -0
  11. data/config/errors.yml +10 -4
  12. data/lib/active_job/karafka.rb +0 -6
  13. data/lib/karafka/active_job/consumer.rb +1 -0
  14. data/lib/karafka/admin.rb +6 -3
  15. data/lib/karafka/base_consumer.rb +31 -21
  16. data/lib/karafka/connection/client.rb +2 -4
  17. data/lib/karafka/connection/listener.rb +6 -4
  18. data/lib/karafka/contracts/consumer_group.rb +0 -14
  19. data/lib/karafka/contracts/{consumer_group_topic.rb → topic.rb} +2 -3
  20. data/lib/karafka/errors.rb +6 -4
  21. data/lib/karafka/instrumentation/logger_listener.rb +25 -11
  22. data/lib/karafka/instrumentation/notifications.rb +2 -0
  23. data/lib/karafka/instrumentation/vendors/datadog/dashboard.json +1 -1
  24. data/lib/karafka/instrumentation/vendors/datadog/listener.rb +59 -32
  25. data/lib/karafka/instrumentation/vendors/datadog/logger_listener.rb +153 -0
  26. data/lib/karafka/pro/active_job/consumer.rb +3 -1
  27. data/lib/karafka/pro/active_job/dispatcher.rb +3 -1
  28. data/lib/karafka/pro/active_job/job_options_contract.rb +3 -1
  29. data/lib/karafka/pro/base_consumer.rb +3 -85
  30. data/lib/karafka/pro/loader.rb +31 -24
  31. data/lib/karafka/pro/performance_tracker.rb +3 -1
  32. data/lib/karafka/pro/processing/coordinator.rb +16 -1
  33. data/lib/karafka/pro/processing/jobs/consume_non_blocking.rb +3 -1
  34. data/lib/karafka/pro/processing/jobs_builder.rb +3 -1
  35. data/lib/karafka/pro/processing/partitioner.rb +14 -17
  36. data/lib/karafka/pro/processing/scheduler.rb +3 -1
  37. data/lib/karafka/pro/processing/strategies/aj_dlq_lrj_mom.rb +40 -0
  38. data/lib/karafka/pro/processing/strategies/aj_dlq_mom.rb +62 -0
  39. data/lib/karafka/pro/processing/strategies/aj_lrj_mom.rb +35 -0
  40. data/lib/karafka/pro/processing/strategies/aj_lrj_mom_vp.rb +69 -0
  41. data/lib/karafka/pro/processing/strategies/aj_mom.rb +33 -0
  42. data/lib/karafka/pro/processing/strategies/aj_mom_vp.rb +58 -0
  43. data/lib/karafka/pro/processing/strategies/base.rb +26 -0
  44. data/lib/karafka/pro/processing/strategies/default.rb +69 -0
  45. data/lib/karafka/pro/processing/strategies/dlq.rb +88 -0
  46. data/lib/karafka/pro/processing/strategies/dlq_lrj.rb +64 -0
  47. data/lib/karafka/pro/processing/strategies/dlq_lrj_mom.rb +60 -0
  48. data/lib/karafka/pro/processing/strategies/dlq_mom.rb +58 -0
  49. data/lib/karafka/pro/processing/strategies/lrj.rb +76 -0
  50. data/lib/karafka/pro/processing/strategies/lrj_mom.rb +68 -0
  51. data/lib/karafka/pro/processing/strategies/lrj_vp.rb +33 -0
  52. data/lib/karafka/pro/processing/strategies/mom.rb +43 -0
  53. data/lib/karafka/pro/processing/strategies/vp.rb +32 -0
  54. data/lib/karafka/pro/processing/strategy_selector.rb +58 -0
  55. data/lib/karafka/pro/{contracts → routing/features}/base.rb +8 -5
  56. data/lib/karafka/pro/routing/features/dead_letter_queue/contract.rb +49 -0
  57. data/lib/karafka/pro/routing/{builder_extensions.rb → features/dead_letter_queue.rb} +9 -12
  58. data/lib/karafka/pro/routing/features/long_running_job/config.rb +28 -0
  59. data/lib/karafka/pro/routing/features/long_running_job/contract.rb +37 -0
  60. data/lib/karafka/pro/routing/features/long_running_job/topic.rb +42 -0
  61. data/lib/karafka/pro/routing/features/long_running_job.rb +28 -0
  62. data/lib/karafka/pro/routing/features/virtual_partitions/config.rb +30 -0
  63. data/lib/karafka/pro/routing/features/virtual_partitions/contract.rb +69 -0
  64. data/lib/karafka/pro/routing/features/virtual_partitions/topic.rb +56 -0
  65. data/lib/karafka/pro/routing/features/virtual_partitions.rb +27 -0
  66. data/lib/karafka/processing/coordinator.rb +1 -1
  67. data/lib/karafka/processing/executor.rb +6 -0
  68. data/lib/karafka/processing/strategies/aj_dlq_mom.rb +44 -0
  69. data/lib/karafka/processing/strategies/aj_mom.rb +21 -0
  70. data/lib/karafka/processing/strategies/base.rb +37 -0
  71. data/lib/karafka/processing/strategies/default.rb +52 -0
  72. data/lib/karafka/processing/strategies/dlq.rb +77 -0
  73. data/lib/karafka/processing/strategies/dlq_mom.rb +42 -0
  74. data/lib/karafka/processing/strategies/mom.rb +29 -0
  75. data/lib/karafka/processing/strategy_selector.rb +30 -0
  76. data/lib/karafka/railtie.rb +9 -8
  77. data/lib/karafka/routing/builder.rb +6 -0
  78. data/lib/karafka/routing/features/active_job/builder.rb +33 -0
  79. data/lib/karafka/routing/features/active_job/config.rb +15 -0
  80. data/lib/karafka/routing/features/active_job/contract.rb +41 -0
  81. data/lib/karafka/routing/features/active_job/topic.rb +33 -0
  82. data/lib/karafka/routing/features/active_job.rb +13 -0
  83. data/lib/karafka/routing/features/base/expander.rb +53 -0
  84. data/lib/karafka/routing/features/base.rb +34 -0
  85. data/lib/karafka/routing/features/dead_letter_queue/config.rb +19 -0
  86. data/lib/karafka/routing/features/dead_letter_queue/contract.rb +40 -0
  87. data/lib/karafka/routing/features/dead_letter_queue/topic.rb +40 -0
  88. data/lib/karafka/routing/features/dead_letter_queue.rb +16 -0
  89. data/lib/karafka/routing/features/manual_offset_management/config.rb +15 -0
  90. data/lib/karafka/routing/features/manual_offset_management/contract.rb +24 -0
  91. data/lib/karafka/routing/features/manual_offset_management/topic.rb +35 -0
  92. data/lib/karafka/routing/features/manual_offset_management.rb +18 -0
  93. data/lib/karafka/routing/topic.rb +2 -10
  94. data/lib/karafka/server.rb +4 -2
  95. data/lib/karafka/setup/attributes_map.rb +5 -0
  96. data/lib/karafka/setup/config.rb +4 -4
  97. data/lib/karafka/time_trackers/pause.rb +21 -12
  98. data/lib/karafka/version.rb +1 -1
  99. data/lib/karafka.rb +7 -11
  100. data.tar.gz.sig +0 -0
  101. metadata +57 -9
  102. metadata.gz.sig +0 -0
  103. data/lib/karafka/active_job/routing/extensions.rb +0 -33
  104. data/lib/karafka/pro/contracts/consumer_group.rb +0 -34
  105. data/lib/karafka/pro/contracts/consumer_group_topic.rb +0 -69
  106. data/lib/karafka/pro/routing/topic_extensions.rb +0 -74
@@ -1 +1 @@
1
- {"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.connection.connects{*} by {host}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{*} by {type}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*}.as_count()","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.revoked{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":5}},{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":5,"width":12,"height":3}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumption_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Processing lag in ms (p95)","formula":"query1"},{"alias":"Processing lag in ms (max)","formula":"query2"},{"alias":"Processing lag in ms (avg)","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"},{"formula":"query2"},{"formula":"query3"}],"response_format":"timeseries","queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic}","data_source":"metrics","name":"query3"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.consumer.messages{*}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*}.as_count()","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms polling time","formula":"query1"},{"alias":"max ms polling time","formula":"query2"},{"alias":"average ms polling time","formula":"query3"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.95percentile{*}"},{"name":"query2","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.max{*}"},{"name":"query3","data_source":"metrics","query":"avg:karafka.listener.polling.time_taken.avg{*}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":7,"is_column_break":true}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
1
+ {"title":"Karafka monitoring dashboard","description":"","widgets":[{"id":5988438511387100,"definition":{"title":"Workers poll","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8769294644934352,"definition":{"title":"Enqueued jobs","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Enqueued jobs","formula":"query1"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.enqueued_jobs.avg{*}","data_source":"metrics","name":"query1"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":2714502141463873,"definition":{"title":"Workers usage","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"Busy workers (p95)","formula":"query1"},{"alias":"Total workers","formula":"query2"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5370086629441984,"definition":{"title":"Workers % utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"% workers utilization","formula":"(query1 / query2) * 100"}],"response_format":"timeseries","queries":[{"query":"sum:karafka.worker.processing.95percentile{*}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.worker.total_threads{*}","data_source":"metrics","name":"query2"}],"style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":7444969424381053,"definition":{"title":"Stability & errors","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":8304008422587936,"definition":{"title":"Client connects and disconnects","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Connects","formula":"query1"},{"alias":"Disconnects","formula":"query2"}],"queries":[{"query":"sum:karafka.connection.connects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.connection.disconnects{*} by {host,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":3722865443336921,"definition":{"title":"Errors encountered (any)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"in-karafka errors","formula":"query1"},{"alias":"librdkafka consume errors","formula":"query2"},{"alias":"librdkafka receive errors","formula":"query3"}],"queries":[{"query":"sum:karafka.error_occurred{*} by {type,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consume.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"},{"query":"sum:karafka.receive.errors{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":5477381252952760,"definition":{"title":"Processing errors","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2357301680769076,"definition":{"title":"Processing errors rate per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"% error rate per topic","formula":"(query1 / (query1 + query2)) * 100"}],"queries":[{"query":"sum:karafka.error_occurred{type:consumer.consume.error} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {topic,partition,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":3902930069982135,"definition":{"title":"Batches successful vs failures","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Successfully processed batch","formula":"query1"},{"alias":"Batch processing with error","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"avg:karafka.error_occurred{type:consumer.consume.error} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":718749162159145,"definition":{"title":"Consumer instances revocations and shutdowns","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumer instances revokations","formula":"query1"},{"alias":"Consumer instances shutdowns","formula":"query2"}],"queries":[{"query":"sum:karafka.consumer.revoked{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.shutdown{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":5}},{"id":7288186528768428,"definition":{"title":"Topics overview","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":1533435157804573,"definition":{"title":"Topics lags","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages lag per topic partition","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":6,"height":2}},{"id":1411506453982604,"definition":{"title":"Topics lag trends","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic partition consumption trend","formula":"query1"}],"queries":[{"query":"avg:karafka.consumer.lags_delta{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":6,"y":0,"width":6,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3,"is_column_break":true}},{"id":8544040083223278,"definition":{"title":"Throughput ","type":"group","show_title":true,"layout_type":"ordered","widgets":[{"id":3740207481939733,"definition":{"title":"Offset lag changes","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"formula":"derivative(query1)"}],"queries":[{"query":"max:karafka.consumer.offset{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":6319110548544878,"definition":{"title":"Batches processed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Topic batches processed","formula":"query1"}],"queries":[{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":6232784865331443,"definition":{"title":"Messages consumed per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {partition,topic,consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":0,"width":4,"height":2}},{"id":2321394598982770,"definition":{"title":"Consumption lag (in seconds)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Consumption lag in s (max)","formula":"query2 / 1000"},{"alias":"Consumption lag in s (avg)","formula":"query3 / 1000"},{"alias":"Consumption lag in s (p95)","formula":"query1 / 1000"}],"queries":[{"query":"max:karafka.consumer.consumption_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumption_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"},{"query":"max:karafka.consumer.consumption_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":2,"width":4,"height":2}},{"id":1062074781483741,"definition":{"title":"Processing lag (in ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Lag in ms (p95)","formula":"query1"},{"alias":"Lag in ms (max)","formula":"query2"},{"alias":"Lag in ms (avg)","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.processing_lag.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.processing_lag.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.processing_lag.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":2,"width":4,"height":2}},{"id":7497794728674267,"definition":{"title":"Batch processing time","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 time","formula":"query1"},{"alias":"max time","formula":"query2"},{"alias":"avg time","formula":"query3"}],"queries":[{"query":"max:karafka.consumer.consumed.time_taken.95percentile{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query1"},{"query":"max:karafka.consumer.consumed.time_taken.max{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query2"},{"query":"max:karafka.consumer.consumed.time_taken.avg{*} by {topic,partition,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":2,"width":4,"height":2}},{"id":4192833027984161,"definition":{"title":"Batch size per topic","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Batch size p95","formula":"query1"},{"alias":"Batch size avg","formula":"query2"},{"alias":"Batch size max","formula":"query3"}],"queries":[{"query":"sum:karafka.consumer.batch_size.95percentile{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batch_size.avg{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query2"},{"query":"sum:karafka.consumer.batch_size.max{*} by {partition,topic,consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":4,"width":4,"height":2}},{"id":4741598444771147,"definition":{"title":"Messages consumed overall","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"Messages consumed","formula":"query1"},{"alias":"Average batch size","formula":"query1 / query2"}],"queries":[{"query":"sum:karafka.consumer.messages{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query1"},{"query":"sum:karafka.consumer.batches{*} by {consumer_group}.as_count()","data_source":"metrics","name":"query2"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":4,"y":4,"width":4,"height":2}},{"id":4502534794102513,"definition":{"title":"Polling times (ms)","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"time":{},"type":"timeseries","requests":[{"formulas":[{"alias":"p95 ms","formula":"query1"},{"alias":"max ms","formula":"query2"},{"alias":"average ms","formula":"query3"}],"queries":[{"query":"avg:karafka.listener.polling.time_taken.95percentile{*} by {consumer_group}","data_source":"metrics","name":"query1"},{"query":"avg:karafka.listener.polling.time_taken.max{*} by {consumer_group}","data_source":"metrics","name":"query2"},{"query":"avg:karafka.listener.polling.time_taken.avg{*} by {consumer_group}","data_source":"metrics","name":"query3"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":8,"y":4,"width":4,"height":2}}]},"layout":{"x":0,"y":3,"width":12,"height":7}}],"template_variables":[],"layout_type":"ordered","is_read_only":false,"notify_list":[],"reflow_type":"fixed","id":"s3u-z47-i6u"}
@@ -46,7 +46,11 @@ module Karafka
46
46
  RdKafkaMetric.new(:count, :brokers, 'connection.disconnects', 'disconnects_d'),
47
47
  RdKafkaMetric.new(:gauge, :brokers, 'network.latency.avg', %w[rtt avg]),
48
48
  RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p95', %w[rtt p95]),
49
- RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99])
49
+ RdKafkaMetric.new(:gauge, :brokers, 'network.latency.p99', %w[rtt p99]),
50
+
51
+ # Topics metrics
52
+ RdKafkaMetric.new(:gauge, :topics, 'consumer.lags', 'consumer_lag_stored'),
53
+ RdKafkaMetric.new(:gauge, :topics, 'consumer.lags_delta', 'consumer_lag_stored_d')
50
54
  ].freeze
51
55
 
52
56
  configure
@@ -68,9 +72,12 @@ module Karafka
68
72
  # @param event [Karafka::Core::Monitoring::Event]
69
73
  def on_statistics_emitted(event)
70
74
  statistics = event[:statistics]
75
+ consumer_group_id = event[:consumer_group_id]
76
+
77
+ base_tags = default_tags + ["consumer_group:#{consumer_group_id}"]
71
78
 
72
79
  rd_kafka_metrics.each do |metric|
73
- report_metric(metric, statistics)
80
+ report_metric(metric, statistics, base_tags)
74
81
  end
75
82
  end
76
83
 
@@ -81,12 +88,7 @@ module Karafka
81
88
  extra_tags = ["type:#{event[:type]}"]
82
89
 
83
90
  if event.payload[:caller].respond_to?(:messages)
84
- metadata = event.payload[:caller].messages.metadata
85
-
86
- extra_tags += [
87
- "topic:#{metadata.topic}",
88
- "partition:#{metadata.partition}"
89
- ]
91
+ extra_tags += consumer_tags(event.payload[:caller])
90
92
  end
91
93
 
92
94
  count('error_occurred', 1, tags: default_tags + extra_tags)
@@ -99,21 +101,23 @@ module Karafka
99
101
  time_taken = event[:time]
100
102
  messages_count = event[:messages_buffer].size
101
103
 
102
- histogram('listener.polling.time_taken', time_taken, tags: default_tags)
103
- histogram('listener.polling.messages', messages_count, tags: default_tags)
104
+ consumer_group_id = event[:subscription_group].consumer_group_id
105
+
106
+ extra_tags = ["consumer_group:#{consumer_group_id}"]
107
+
108
+ histogram('listener.polling.time_taken', time_taken, tags: default_tags + extra_tags)
109
+ histogram('listener.polling.messages', messages_count, tags: default_tags + extra_tags)
104
110
  end
105
111
 
106
112
  # Here we report majority of things related to processing as we have access to the
107
113
  # consumer
108
114
  # @param event [Karafka::Core::Monitoring::Event]
109
115
  def on_consumer_consumed(event)
110
- messages = event.payload[:caller].messages
116
+ consumer = event.payload[:caller]
117
+ messages = consumer.messages
111
118
  metadata = messages.metadata
112
119
 
113
- tags = default_tags + [
114
- "topic:#{metadata.topic}",
115
- "partition:#{metadata.partition}"
116
- ]
120
+ tags = default_tags + consumer_tags(consumer)
117
121
 
118
122
  count('consumer.messages', messages.count, tags: tags)
119
123
  count('consumer.batches', 1, tags: tags)
@@ -126,26 +130,14 @@ module Karafka
126
130
 
127
131
  # @param event [Karafka::Core::Monitoring::Event]
128
132
  def on_consumer_revoked(event)
129
- messages = event.payload[:caller].messages
130
- metadata = messages.metadata
131
-
132
- tags = default_tags + [
133
- "topic:#{metadata.topic}",
134
- "partition:#{metadata.partition}"
135
- ]
133
+ tags = default_tags + consumer_tags(event.payload[:caller])
136
134
 
137
135
  count('consumer.revoked', 1, tags: tags)
138
136
  end
139
137
 
140
138
  # @param event [Karafka::Core::Monitoring::Event]
141
139
  def on_consumer_shutdown(event)
142
- messages = event.payload[:caller].messages
143
- metadata = messages.metadata
144
-
145
- tags = default_tags + [
146
- "topic:#{metadata.topic}",
147
- "partition:#{metadata.partition}"
148
- ]
140
+ tags = default_tags + consumer_tags(event.payload[:caller])
149
141
 
150
142
  count('consumer.shutdown', 1, tags: tags)
151
143
  end
@@ -198,14 +190,15 @@ module Karafka
198
190
  # Reports a given metric statistics to Datadog
199
191
  # @param metric [RdKafkaMetric] metric value object
200
192
  # @param statistics [Hash] hash with all the statistics emitted
201
- def report_metric(metric, statistics)
193
+ # @param base_tags [Array<String>] base tags we want to start with
194
+ def report_metric(metric, statistics, base_tags)
202
195
  case metric.scope
203
196
  when :root
204
197
  public_send(
205
198
  metric.type,
206
199
  metric.name,
207
200
  statistics.fetch(*metric.key_location),
208
- tags: default_tags
201
+ tags: base_tags
209
202
  )
210
203
  when :brokers
211
204
  statistics.fetch('brokers').each_value do |broker_statistics|
@@ -218,13 +211,47 @@ module Karafka
218
211
  metric.type,
219
212
  metric.name,
220
213
  broker_statistics.dig(*metric.key_location),
221
- tags: default_tags + ["broker:#{broker_statistics['nodename']}"]
214
+ tags: base_tags + ["broker:#{broker_statistics['nodename']}"]
222
215
  )
223
216
  end
217
+ when :topics
218
+ statistics.fetch('topics').each do |topic_name, topic_values|
219
+ topic_values['partitions'].each do |partition_name, partition_statistics|
220
+ next if partition_name == '-1'
221
+ # Skip until lag info is available
222
+ next if partition_statistics['consumer_lag'] == -1
223
+
224
+ public_send(
225
+ metric.type,
226
+ metric.name,
227
+ partition_statistics.dig(*metric.key_location),
228
+ tags: base_tags + [
229
+ "topic:#{topic_name}",
230
+ "partition:#{partition_name}"
231
+ ]
232
+ )
233
+ end
234
+ end
224
235
  else
225
236
  raise ArgumentError, metric.scope
226
237
  end
227
238
  end
239
+
240
+ # Builds basic per consumer tags for publication
241
+ #
242
+ # @param consumer [Karafka::BaseConsumer]
243
+ # @return [Array<String>]
244
+ def consumer_tags(consumer)
245
+ messages = consumer.messages
246
+ metadata = messages.metadata
247
+ consumer_group_id = consumer.topic.consumer_group.id
248
+
249
+ [
250
+ "topic:#{metadata.topic}",
251
+ "partition:#{metadata.partition}",
252
+ "consumer_group:#{consumer_group_id}"
253
+ ]
254
+ end
228
255
  end
229
256
  end
230
257
  end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Instrumentation
5
+ # Namespace for vendor specific instrumentation
6
+ module Vendors
7
+ # Datadog specific instrumentation
8
+ module Datadog
9
+ # A karafka's logger listener for Datadog
10
+ # It depends on the 'ddtrace' gem
11
+ class LoggerListener
12
+ include ::Karafka::Core::Configurable
13
+ extend Forwardable
14
+
15
+ def_delegators :config, :client
16
+
17
+ # `Datadog::Tracing` client that we should use to trace stuff
18
+ setting :client
19
+
20
+ configure
21
+
22
+ # Log levels that we use in this particular listener
23
+ USED_LOG_LEVELS = %i[
24
+ info
25
+ error
26
+ fatal
27
+ ].freeze
28
+
29
+ private_constant :USED_LOG_LEVELS
30
+
31
+ # @param block [Proc] configuration block
32
+ def initialize(&block)
33
+ configure
34
+ setup(&block) if block
35
+ end
36
+
37
+ # @param block [Proc] configuration block
38
+ # @note We define this alias to be consistent with `WaterDrop#setup`
39
+ def setup(&block)
40
+ configure(&block)
41
+ end
42
+
43
+ # Prints info about the fact that a given job has started
44
+ #
45
+ # @param event [Dry::Events::Event] event details including payload
46
+ def on_worker_process(event)
47
+ current_span = client.trace('karafka.consumer')
48
+ push_tags
49
+
50
+ job = event[:job]
51
+ job_type = job.class.to_s.split('::').last
52
+ consumer = job.executor.topic.consumer
53
+ topic = job.executor.topic.name
54
+
55
+ current_span.resource = "#{consumer}#consume"
56
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} started"
57
+
58
+ pop_tags
59
+ end
60
+
61
+ # Prints info about the fact that a given job has finished
62
+ #
63
+ # @param event [Dry::Events::Event] event details including payload
64
+ def on_worker_processed(event)
65
+ push_tags
66
+
67
+ job = event[:job]
68
+ time = event[:time]
69
+ job_type = job.class.to_s.split('::').last
70
+ consumer = job.executor.topic.consumer
71
+ topic = job.executor.topic.name
72
+
73
+ info "[#{job.id}] #{job_type} job for #{consumer} on #{topic} finished in #{time}ms"
74
+
75
+ current_span = client.active_span
76
+ current_span.finish if current_span.present?
77
+
78
+ pop_tags
79
+ end
80
+
81
+ # There are many types of errors that can occur in many places, but we provide a single
82
+ # handler for all of them to simplify error instrumentation.
83
+ # @param event [Dry::Events::Event] event details including payload
84
+ def on_error_occurred(event)
85
+ push_tags
86
+
87
+ error = event[:error]
88
+ client.active_span&.set_error(error)
89
+
90
+ case event[:type]
91
+ when 'consumer.consume.error'
92
+ error "Consumer consuming error: #{error}"
93
+ when 'consumer.revoked.error'
94
+ error "Consumer on revoked failed due to an error: #{error}"
95
+ when 'consumer.before_enqueue.error'
96
+ error "Consumer before enqueue failed due to an error: #{error}"
97
+ when 'consumer.before_consume.error'
98
+ error "Consumer before consume failed due to an error: #{error}"
99
+ when 'consumer.after_consume.error'
100
+ error "Consumer after consume failed due to an error: #{error}"
101
+ when 'consumer.shutdown.error'
102
+ error "Consumer on shutdown failed due to an error: #{error}"
103
+ when 'worker.process.error'
104
+ fatal "Worker processing failed due to an error: #{error}"
105
+ when 'connection.listener.fetch_loop.error'
106
+ error "Listener fetch loop error: #{error}"
107
+ when 'runner.call.error'
108
+ fatal "Runner crashed due to an error: #{error}"
109
+ when 'app.stopping.error'
110
+ error 'Forceful Karafka server stop'
111
+ when 'librdkafka.error'
112
+ error "librdkafka internal error occurred: #{error}"
113
+ # Those will only occur when retries in the client fail and when they did not stop
114
+ # after back-offs
115
+ when 'connection.client.poll.error'
116
+ error "Data polling error occurred: #{error}"
117
+ else
118
+ pop_tags
119
+ # This should never happen. Please contact the maintainers
120
+ raise Errors::UnsupportedCaseError, event
121
+ end
122
+
123
+ pop_tags
124
+ end
125
+
126
+ USED_LOG_LEVELS.each do |log_level|
127
+ define_method log_level do |*args|
128
+ Karafka.logger.send(log_level, *args)
129
+ end
130
+ end
131
+
132
+ # Pushes datadog's tags to the logger
133
+ # This is required when tracing log lines asynchronously to correlate logs of the same
134
+ # process together
135
+ def push_tags
136
+ return unless Karafka.logger.respond_to?(:push_tags)
137
+
138
+ Karafka.logger.push_tags(client.log_correlation)
139
+ end
140
+
141
+ # Pops datadog's tags from the logger
142
+ # This is required when tracing log lines asynchronously to avoid the logs of the
143
+ # different processes to be correlated
144
+ def pop_tags
145
+ return unless Karafka.logger.respond_to?(:pop_tags)
146
+
147
+ Karafka.logger.pop_tags
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # This Karafka component is a Pro component.
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
4
6
  # All of the commercial components are present in the lib/karafka/pro directory of this
5
7
  # repository and their usage requires commercial license agreement.
6
8
  #
@@ -18,90 +20,6 @@ module Karafka
18
20
  # @note In case of using lrj, manual pausing may not be the best idea as resume needs to happen
19
21
  # after each batch is processed.
20
22
  class BaseConsumer < Karafka::BaseConsumer
21
- # Pause for tops 31 years
22
- MAX_PAUSE_TIME = 1_000_000_000_000
23
-
24
- private_constant :MAX_PAUSE_TIME
25
-
26
- # Pauses processing of a given partition until we're done with the processing.
27
- # This ensures, that we can easily poll not reaching the `max.poll.interval`
28
- # @note This needs to happen in the listener thread, because we cannot wait on this being
29
- # executed in the workers. Workers may be already running some LRJ jobs that are blocking
30
- # all the threads until finished, yet unless we pause the incoming partitions information,
31
- # we may be kicked out of the consumer group due to not polling often enough
32
- def on_before_enqueue
33
- return unless topic.long_running_job?
34
-
35
- # This ensures that when running LRJ with VP, things operate as expected run only once
36
- # for all the virtual partitions collectively
37
- coordinator.on_enqueued do
38
- # Pause at the first message in a batch. That way in case of a crash, we will not loose
39
- # any messages.
40
- #
41
- # For VP it applies the same way and since VP cannot be used with MOM we should not have
42
- # any edge cases here.
43
- pause(coordinator.seek_offset, MAX_PAUSE_TIME)
44
- end
45
- end
46
-
47
- # Runs extra logic after consumption that is related to handling long-running jobs
48
- # @note This overwrites the '#on_after_consume' from the base consumer
49
- def on_after_consume
50
- coordinator.on_finished do |last_group_message|
51
- on_after_consume_regular(last_group_message)
52
- end
53
- end
54
-
55
- # Trigger method for running on partition revocation.
56
- #
57
- # @private
58
- def on_revoked
59
- # We do not want to resume on revocation in case of a LRJ.
60
- # For LRJ we resume after the successful processing or do a backoff pause in case of a
61
- # failure. Double non-blocking resume could cause problems in coordination.
62
- resume unless topic.long_running_job?
63
-
64
- coordinator.revoke
65
-
66
- Karafka.monitor.instrument('consumer.revoked', caller: self) do
67
- revoked
68
- end
69
- rescue StandardError => e
70
- Karafka.monitor.instrument(
71
- 'error.occurred',
72
- error: e,
73
- caller: self,
74
- type: 'consumer.revoked.error'
75
- )
76
- end
77
-
78
- private
79
-
80
- # Handles the post-consumption flow depending on topic settings
81
- #
82
- # @param last_group_message [Karafka::Messages::Message]
83
- def on_after_consume_regular(last_group_message)
84
- if coordinator.success?
85
- coordinator.pause_tracker.reset
86
-
87
- # We use the non-blocking one here. If someone needs the blocking one, can implement it
88
- # with manual offset management
89
- # Mark as consumed only if manual offset management is not on
90
- mark_as_consumed(last_group_message) unless topic.manual_offset_management? || revoked?
91
-
92
- # If this is not a long-running job there is nothing for us to do here
93
- return unless topic.long_running_job?
94
-
95
- seek(coordinator.seek_offset) unless revoked?
96
-
97
- resume
98
- else
99
- # If processing failed, we need to pause
100
- # For long running job this will overwrite the default never-ending pause and will cause
101
- # the processing to keep going after the error backoff
102
- pause(coordinator.seek_offset)
103
- end
104
- end
105
23
  end
106
24
  end
107
25
  end