ruby-kafka-aws-iam 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +314 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1356 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka/async_producer.rb +297 -0
  28. data/lib/kafka/broker.rb +217 -0
  29. data/lib/kafka/broker_info.rb +16 -0
  30. data/lib/kafka/broker_pool.rb +41 -0
  31. data/lib/kafka/broker_uri.rb +43 -0
  32. data/lib/kafka/client.rb +838 -0
  33. data/lib/kafka/cluster.rb +513 -0
  34. data/lib/kafka/compression.rb +45 -0
  35. data/lib/kafka/compressor.rb +86 -0
  36. data/lib/kafka/connection.rb +228 -0
  37. data/lib/kafka/connection_builder.rb +33 -0
  38. data/lib/kafka/consumer.rb +642 -0
  39. data/lib/kafka/consumer_group/assignor.rb +63 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/crc32_hash.rb +15 -0
  42. data/lib/kafka/datadog.rb +420 -0
  43. data/lib/kafka/digest.rb +22 -0
  44. data/lib/kafka/fetch_operation.rb +115 -0
  45. data/lib/kafka/fetched_batch.rb +58 -0
  46. data/lib/kafka/fetched_batch_generator.rb +120 -0
  47. data/lib/kafka/fetched_message.rb +48 -0
  48. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  49. data/lib/kafka/fetcher.rb +224 -0
  50. data/lib/kafka/gzip_codec.rb +34 -0
  51. data/lib/kafka/heartbeat.rb +25 -0
  52. data/lib/kafka/instrumenter.rb +38 -0
  53. data/lib/kafka/interceptors.rb +33 -0
  54. data/lib/kafka/lz4_codec.rb +27 -0
  55. data/lib/kafka/message_buffer.rb +87 -0
  56. data/lib/kafka/murmur2_hash.rb +17 -0
  57. data/lib/kafka/offset_manager.rb +259 -0
  58. data/lib/kafka/partitioner.rb +40 -0
  59. data/lib/kafka/pause.rb +92 -0
  60. data/lib/kafka/pending_message.rb +29 -0
  61. data/lib/kafka/pending_message_queue.rb +41 -0
  62. data/lib/kafka/produce_operation.rb +205 -0
  63. data/lib/kafka/producer.rb +528 -0
  64. data/lib/kafka/prometheus.rb +316 -0
  65. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  66. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  67. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  68. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  69. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  70. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  71. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  72. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  73. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  74. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  75. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  76. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  77. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  78. data/lib/kafka/protocol/decoder.rb +175 -0
  79. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  80. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  81. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  82. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  83. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  84. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  85. data/lib/kafka/protocol/encoder.rb +184 -0
  86. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  87. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  88. data/lib/kafka/protocol/fetch_request.rb +70 -0
  89. data/lib/kafka/protocol/fetch_response.rb +136 -0
  90. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  91. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  92. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  93. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  94. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  95. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  96. data/lib/kafka/protocol/join_group_request.rb +47 -0
  97. data/lib/kafka/protocol/join_group_response.rb +41 -0
  98. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  99. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  100. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  101. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  102. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  103. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  104. data/lib/kafka/protocol/member_assignment.rb +42 -0
  105. data/lib/kafka/protocol/message.rb +172 -0
  106. data/lib/kafka/protocol/message_set.rb +55 -0
  107. data/lib/kafka/protocol/metadata_request.rb +31 -0
  108. data/lib/kafka/protocol/metadata_response.rb +185 -0
  109. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  110. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  111. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  112. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  113. data/lib/kafka/protocol/produce_request.rb +94 -0
  114. data/lib/kafka/protocol/produce_response.rb +63 -0
  115. data/lib/kafka/protocol/record.rb +88 -0
  116. data/lib/kafka/protocol/record_batch.rb +223 -0
  117. data/lib/kafka/protocol/request_message.rb +26 -0
  118. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  119. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  120. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  121. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  122. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  123. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  124. data/lib/kafka/protocol.rb +225 -0
  125. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  126. data/lib/kafka/sasl/awsmskiam.rb +128 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +73 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/kafka.rb +373 -0
  143. data/lib/ruby-kafka.rb +5 -0
  144. data/ruby-kafka.gemspec +54 -0
  145. metadata +520 -0
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka"
4
+
5
+ ready "message serialization" do
6
+ before do
7
+ message = Kafka::Protocol::Message.new(
8
+ value: "hello",
9
+ key: "world",
10
+ )
11
+
12
+ @io = StringIO.new
13
+ encoder = Kafka::Protocol::Encoder.new(@io)
14
+ message.encode(encoder)
15
+
16
+ @decoder = Kafka::Protocol::Decoder.new(@io)
17
+ end
18
+
19
+ go "decoding" do
20
+ @io.rewind
21
+ Kafka::Protocol::Message.decode(@decoder)
22
+ end
23
+ end
data/bin/console ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "kafka"
6
+
7
+ require "pry"
8
+ Pry.start
data/bin/setup ADDED
@@ -0,0 +1,5 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
@@ -0,0 +1,39 @@
1
+ version: '2'
2
+ services:
3
+ zookeeper:
4
+ image: wurstmeister/zookeeper
5
+ ports:
6
+ - "2181:2181"
7
+ kafka1:
8
+ image: wurstmeister/kafka:0.11.0.1
9
+ ports:
10
+ - "9092:9092"
11
+ environment:
12
+ KAFKA_BROKER_ID: 1
13
+ KAFKA_ADVERTISED_HOST_NAME: 192.168.99.100
14
+ KAFKA_ADVERTISED_PORT: 9092
15
+ KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
16
+ volumes:
17
+ - /var/run/docker.sock:/var/run/docker.sock
18
+ kafka2:
19
+ image: wurstmeister/kafka:0.11.0.1
20
+ ports:
21
+ - "9093:9092"
22
+ environment:
23
+ KAFKA_BROKER_ID: 2
24
+ KAFKA_ADVERTISED_HOST_NAME: 192.168.99.100
25
+ KAFKA_ADVERTISED_PORT: 9093
26
+ KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
27
+ volumes:
28
+ - /var/run/docker.sock:/var/run/docker.sock
29
+ kafka3:
30
+ image: wurstmeister/kafka:0.11.0.1
31
+ ports:
32
+ - "9094:9092"
33
+ environment:
34
+ KAFKA_BROKER_ID: 3
35
+ KAFKA_ADVERTISED_HOST_NAME: 192.168.99.100
36
+ KAFKA_ADVERTISED_PORT: 9094
37
+ KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
38
+ volumes:
39
+ - /var/run/docker.sock:/var/run/docker.sock
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
4
+
5
+ require "kafka"
6
+
7
+ logger = Logger.new(STDOUT)
8
+ brokers = ENV.fetch("KAFKA_BROKERS", "localhost:9092").split(",")
9
+
10
+ # Make sure to create this topic in your Kafka cluster or configure the
11
+ # cluster to auto-create topics.
12
+ topic = "text"
13
+
14
+ kafka = Kafka.new(
15
+ seed_brokers: brokers,
16
+ client_id: "test",
17
+ socket_timeout: 20,
18
+ logger: logger,
19
+ )
20
+
21
+ consumer = kafka.consumer(group_id: "test")
22
+ consumer.subscribe(topic)
23
+
24
+ trap("TERM") { consumer.stop }
25
+ trap("INT") { consumer.stop }
26
+
27
+ begin
28
+ consumer.each_message do |message|
29
+ end
30
+ rescue Kafka::ProcessingError => e
31
+ warn "Got #{e.cause}"
32
+ consumer.pause(e.topic, e.partition, timeout: 20)
33
+
34
+ retry
35
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
4
+
5
+ require "kafka"
6
+ require "dotenv"
7
+
8
+ Dotenv.load
9
+
10
+ KAFKA_CLIENT_CERT = ENV.fetch("KAFKA_CLIENT_CERT")
11
+ KAFKA_CLIENT_CERT_KEY = ENV.fetch("KAFKA_CLIENT_CERT_KEY")
12
+ KAFKA_SERVER_CERT = ENV.fetch("KAFKA_SERVER_CERT")
13
+ KAFKA_URL = ENV.fetch("KAFKA_URL")
14
+ KAFKA_BROKERS = KAFKA_URL
15
+ KAFKA_TOPIC = "test-messages"
16
+
17
+ NUM_THREADS = 4
18
+
19
+ queue = Queue.new
20
+
21
+ threads = NUM_THREADS.times.map do |worker_id|
22
+ Thread.new do
23
+ logger = Logger.new($stderr)
24
+ logger.level = Logger::INFO
25
+
26
+ logger.formatter = proc {|severity, datetime, progname, msg|
27
+ "[#{worker_id}] #{severity.ljust(5)} -- #{msg}\n"
28
+ }
29
+
30
+ kafka = Kafka.new(
31
+ seed_brokers: KAFKA_BROKERS,
32
+ logger: logger,
33
+ connect_timeout: 30,
34
+ socket_timeout: 30,
35
+ ssl_client_cert: KAFKA_CLIENT_CERT,
36
+ ssl_client_cert_key: KAFKA_CLIENT_CERT_KEY,
37
+ ssl_ca_cert: KAFKA_SERVER_CERT,
38
+ )
39
+
40
+ consumer = kafka.consumer(group_id: "firehose")
41
+ consumer.subscribe(KAFKA_TOPIC)
42
+
43
+ i = 0
44
+ consumer.each_message do |message|
45
+ i += 1
46
+
47
+ if i % 1000 == 0
48
+ queue << i
49
+ i = 0
50
+ end
51
+
52
+ sleep 0.01
53
+ end
54
+ end
55
+ end
56
+
57
+ threads.each {|t| t.abort_on_exception = true }
58
+
59
+ received_messages = 0
60
+
61
+ loop do
62
+ received_messages += queue.pop
63
+ puts "===> Received #{received_messages} messages"
64
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
4
+
5
+ require "kafka"
6
+ require "dotenv"
7
+
8
+ Dotenv.load
9
+
10
+ KAFKA_CLIENT_CERT = ENV.fetch("KAFKA_CLIENT_CERT")
11
+ KAFKA_CLIENT_CERT_KEY = ENV.fetch("KAFKA_CLIENT_CERT_KEY")
12
+ KAFKA_SERVER_CERT = ENV.fetch("KAFKA_SERVER_CERT")
13
+ KAFKA_URL = ENV.fetch("KAFKA_URL")
14
+ KAFKA_BROKERS = KAFKA_URL
15
+ KAFKA_TOPIC = "test-messages"
16
+
17
+ NUM_THREADS = 20
18
+
19
+ threads = NUM_THREADS.times.map do
20
+ Thread.new do
21
+ logger = Logger.new($stderr)
22
+ logger.level = Logger::INFO
23
+
24
+ kafka = Kafka.new(
25
+ seed_brokers: KAFKA_BROKERS,
26
+ logger: logger,
27
+ ssl_client_cert: KAFKA_CLIENT_CERT,
28
+ ssl_client_cert_key: KAFKA_CLIENT_CERT_KEY,
29
+ ssl_ca_cert: KAFKA_SERVER_CERT,
30
+ )
31
+
32
+ producer = kafka.async_producer(
33
+ delivery_interval: 1,
34
+ max_queue_size: 5_000,
35
+ max_buffer_size: 10_000,
36
+ )
37
+
38
+ begin
39
+ loop do
40
+ producer.produce(rand.to_s, key: rand.to_s, topic: KAFKA_TOPIC)
41
+ end
42
+ rescue Kafka::BufferOverflow
43
+ logger.error "Buffer overflow, backing off for 1s"
44
+ sleep 1
45
+ retry
46
+ ensure
47
+ producer.shutdown
48
+ end
49
+ end
50
+ end
51
+
52
+ threads.each {|t| t.abort_on_exception = true }
53
+
54
+ threads.map(&:join)
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Consumes lines from a Kafka partition and writes them to STDOUT.
4
+ #
5
+ # You need to define the environment variable KAFKA_BROKERS for this
6
+ # to work, e.g.
7
+ #
8
+ # export KAFKA_BROKERS=localhost:9092
9
+ #
10
+
11
+ $LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
12
+
13
+ require "kafka"
14
+
15
+ # We don't want log output to clutter the console. Replace `StringIO.new`
16
+ # with e.g. `$stderr` if you want to see what's happening under the hood.
17
+ logger = Logger.new(StringIO.new)
18
+
19
+ brokers = ENV.fetch("KAFKA_BROKERS").split(",")
20
+
21
+ # Make sure to create this topic in your Kafka cluster or configure the
22
+ # cluster to auto-create topics.
23
+ topic = "text"
24
+
25
+ kafka = Kafka.new(
26
+ seed_brokers: brokers,
27
+ client_id: "simple-consumer",
28
+ socket_timeout: 20,
29
+ logger: logger,
30
+ )
31
+
32
+ kafka.each_message(topic: topic) do |message|
33
+ puts message.value
34
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Reads lines from STDIN, writing them to Kafka.
4
+ #
5
+ # You need to define the environment variable KAFKA_BROKERS for this
6
+ # to work, e.g.
7
+ #
8
+ # export KAFKA_BROKERS=localhost:9092
9
+ #
10
+
11
+ $LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
12
+
13
+ require "kafka"
14
+
15
+ logger = Logger.new($stderr)
16
+ brokers = ENV.fetch("KAFKA_BROKERS")
17
+
18
+ # Make sure to create this topic in your Kafka cluster or configure the
19
+ # cluster to auto-create topics.
20
+ topic = "text"
21
+
22
+ kafka = Kafka.new(
23
+ seed_brokers: brokers,
24
+ client_id: "simple-producer",
25
+ logger: logger,
26
+ )
27
+
28
+ producer = kafka.producer
29
+
30
+ begin
31
+ $stdin.each_with_index do |line, index|
32
+ producer.produce(line, topic: topic)
33
+
34
+ # Send messages for every 10 lines.
35
+ producer.deliver_messages if index % 10 == 0
36
+ end
37
+ ensure
38
+ # Make sure to send any remaining messages.
39
+ producer.deliver_messages
40
+
41
+ producer.shutdown
42
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Reads lines from STDIN, writing them to Kafka.
4
+
5
+ $LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
6
+
7
+ require "kafka"
8
+
9
+ logger = Logger.new($stderr)
10
+ brokers = ENV.fetch("KAFKA_BROKERS")
11
+
12
+ # Make sure to create this topic in your Kafka cluster or configure the
13
+ # cluster to auto-create topics.
14
+ topic = "page-visits"
15
+
16
+ ssl_context = OpenSSL::SSL::SSLContext.new
17
+ ssl_context.set_params(
18
+ cert: OpenSSL::X509::Certificate.new(ENV.fetch("KAFKA_CLIENT_CERT")),
19
+ key: OpenSSL::PKey::RSA.new(ENV.fetch("KAFKA_CLIENT_CERT_KEY")),
20
+ )
21
+
22
+ kafka = Kafka.new(
23
+ seed_brokers: brokers,
24
+ client_id: "ssl-producer",
25
+ logger: logger,
26
+ ssl: true,
27
+ ssl_context: ssl_context,
28
+ )
29
+
30
+ producer = kafka.producer
31
+
32
+ begin
33
+ $stdin.each_with_index do |line, index|
34
+ producer.produce(line, topic: topic)
35
+
36
+ # Send messages for every 10 lines.
37
+ producer.deliver_messages if index % 10 == 0
38
+ end
39
+ ensure
40
+ # Make sure to send any remaining messages.
41
+ producer.deliver_messages
42
+
43
+ producer.shutdown
44
+ end
@@ -0,0 +1,297 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thread"
4
+
5
+ module Kafka
6
+
7
+ # A Kafka producer that does all its work in the background so as to not block
8
+ # the calling thread. Calls to {#deliver_messages} are asynchronous and return
9
+ # immediately.
10
+ #
11
+ # In addition to this property it's possible to define automatic delivery
12
+ # policies. These allow placing an upper bound on the number of buffered
13
+ # messages and the time between message deliveries.
14
+ #
15
+ # * If `delivery_threshold` is set to a value _n_ higher than zero, the producer
16
+ # will automatically deliver its messages once its buffer size reaches _n_.
17
+ # * If `delivery_interval` is set to a value _n_ higher than zero, the producer
18
+ # will automatically deliver its messages every _n_ seconds.
19
+ #
20
+ # By default, automatic delivery is disabled and you'll have to call
21
+ # {#deliver_messages} manually.
22
+ #
23
+ # ## Buffer Overflow and Backpressure
24
+ #
25
+ # The calling thread communicates with the background thread doing the actual
26
+ # work using a thread safe queue. While the background thread is busy delivering
27
+ # messages, new messages will be buffered in the queue. In order to avoid
28
+ # the queue growing uncontrollably in cases where the background thread gets
29
+ # stuck or can't follow the pace of the calling thread, there's a maximum
30
+ # number of messages that is allowed to be buffered. You can configure this
31
+ # value by setting `max_queue_size`.
32
+ #
33
+ # If you produce messages faster than the background producer thread can
34
+ # deliver them to Kafka you will eventually fill the producer's buffer. Once
35
+ # this happens, the background thread will stop popping messages off the
36
+ # queue until it can successfully deliver the buffered messages. The queue
37
+ # will therefore grow in size, potentially hitting the `max_queue_size` limit.
38
+ # Once this happens, calls to {#produce} will raise a {BufferOverflow} error.
39
+ #
40
+ # Depending on your use case you may want to slow down the rate of messages
41
+ # being produced or perhaps halt your application completely until the
42
+ # producer can deliver the buffered messages and clear the message queue.
43
+ #
44
+ # ## Example
45
+ #
46
+ # producer = kafka.async_producer(
47
+ # # Keep at most 1.000 messages in the buffer before delivering:
48
+ # delivery_threshold: 1000,
49
+ #
50
+ # # Deliver messages every 30 seconds:
51
+ # delivery_interval: 30,
52
+ # )
53
+ #
54
+ # # There's no need to manually call #deliver_messages, it will happen
55
+ # # automatically in the background.
56
+ # producer.produce("hello", topic: "greetings")
57
+ #
58
+ # # Remember to shut down the producer when you're done with it.
59
+ # producer.shutdown
60
+ #
61
+ class AsyncProducer
62
+ # Initializes a new AsyncProducer.
63
+ #
64
+ # @param sync_producer [Kafka::Producer] the synchronous producer that should
65
+ # be used in the background.
66
+ # @param max_queue_size [Integer] the maximum number of messages allowed in
67
+ # the queue.
68
+ # @param delivery_threshold [Integer] if greater than zero, the number of
69
+ # buffered messages that will automatically trigger a delivery.
70
+ # @param delivery_interval [Integer] if greater than zero, the number of
71
+ # seconds between automatic message deliveries.
72
+ #
73
+ def initialize(sync_producer:, max_queue_size: 1000, delivery_threshold: 0, delivery_interval: 0, max_retries: -1, retry_backoff: 0, instrumenter:, logger:)
74
+ raise ArgumentError unless max_queue_size > 0
75
+ raise ArgumentError unless delivery_threshold >= 0
76
+ raise ArgumentError unless delivery_interval >= 0
77
+
78
+ @queue = Queue.new
79
+ @max_queue_size = max_queue_size
80
+ @instrumenter = instrumenter
81
+ @logger = TaggedLogger.new(logger)
82
+
83
+ @worker = Worker.new(
84
+ queue: @queue,
85
+ producer: sync_producer,
86
+ delivery_threshold: delivery_threshold,
87
+ max_retries: max_retries,
88
+ retry_backoff: retry_backoff,
89
+ instrumenter: instrumenter,
90
+ logger: logger
91
+ )
92
+
93
+ # The timer will no-op if the delivery interval is zero.
94
+ @timer = Timer.new(queue: @queue, interval: delivery_interval)
95
+
96
+ @thread_mutex = Mutex.new
97
+ end
98
+
99
+ # Produces a message to the specified topic.
100
+ #
101
+ # @see Kafka::Producer#produce
102
+ # @param (see Kafka::Producer#produce)
103
+ # @raise [BufferOverflow] if the message queue is full.
104
+ # @return [nil]
105
+ def produce(value, topic:, **options)
106
+ # We want to fail fast if `topic` isn't a String
107
+ topic = topic.to_str
108
+
109
+ ensure_threads_running!
110
+
111
+ if @queue.size >= @max_queue_size
112
+ buffer_overflow topic,
113
+ "Cannot produce to #{topic}, max queue size (#{@max_queue_size} messages) reached"
114
+ end
115
+
116
+ args = [value, **options.merge(topic: topic)]
117
+ @queue << [:produce, args]
118
+
119
+ @instrumenter.instrument("enqueue_message.async_producer", {
120
+ topic: topic,
121
+ queue_size: @queue.size,
122
+ max_queue_size: @max_queue_size,
123
+ })
124
+
125
+ nil
126
+ end
127
+
128
+ # Asynchronously delivers the buffered messages. This method will return
129
+ # immediately and the actual work will be done in the background.
130
+ #
131
+ # @see Kafka::Producer#deliver_messages
132
+ # @return [nil]
133
+ def deliver_messages
134
+ ensure_threads_running!
135
+
136
+ @queue << [:deliver_messages, nil]
137
+
138
+ nil
139
+ end
140
+
141
+ # Shuts down the producer, releasing the network resources used. This
142
+ # method will block until the buffered messages have been delivered.
143
+ #
144
+ # @see Kafka::Producer#shutdown
145
+ # @return [nil]
146
+ def shutdown
147
+ ensure_threads_running!
148
+
149
+ @timer_thread && @timer_thread.exit
150
+ @queue << [:shutdown, nil]
151
+ @worker_thread && @worker_thread.join
152
+
153
+ nil
154
+ end
155
+
156
+ private
157
+
158
+ def ensure_threads_running!
159
+ return if worker_thread_alive? && timer_thread_alive?
160
+
161
+ @thread_mutex.synchronize do
162
+ @worker_thread = Thread.new { @worker.run } unless worker_thread_alive?
163
+ @timer_thread = Thread.new { @timer.run } unless timer_thread_alive?
164
+ end
165
+ end
166
+
167
+ def worker_thread_alive?
168
+ !!@worker_thread && @worker_thread.alive?
169
+ end
170
+
171
+ def timer_thread_alive?
172
+ !!@timer_thread && @timer_thread.alive?
173
+ end
174
+
175
+ def buffer_overflow(topic, message)
176
+ @instrumenter.instrument("buffer_overflow.async_producer", {
177
+ topic: topic,
178
+ })
179
+
180
+ raise BufferOverflow, message
181
+ end
182
+
183
+ class Timer
184
+ def initialize(interval:, queue:)
185
+ @queue = queue
186
+ @interval = interval
187
+ end
188
+
189
+ def run
190
+ # Permanently sleep if the timer interval is zero.
191
+ Thread.stop if @interval.zero?
192
+
193
+ loop do
194
+ sleep(@interval)
195
+ @queue << [:deliver_messages, nil]
196
+ end
197
+ end
198
+ end
199
+
200
+ class Worker
201
+ def initialize(queue:, producer:, delivery_threshold:, max_retries: -1, retry_backoff: 0, instrumenter:, logger:)
202
+ @queue = queue
203
+ @producer = producer
204
+ @delivery_threshold = delivery_threshold
205
+ @max_retries = max_retries
206
+ @retry_backoff = retry_backoff
207
+ @instrumenter = instrumenter
208
+ @logger = TaggedLogger.new(logger)
209
+ end
210
+
211
+ def run
212
+ @logger.push_tags(@producer.to_s)
213
+ @logger.info "Starting async producer in the background..."
214
+
215
+ do_loop
216
+ rescue Exception => e
217
+ @logger.error "Unexpected Kafka error #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
218
+ @logger.error "Async producer crashed!"
219
+ ensure
220
+ @producer.shutdown
221
+ @logger.pop_tags
222
+ end
223
+
224
+ private
225
+
226
+ def do_loop
227
+ loop do
228
+ begin
229
+ operation, payload = @queue.pop
230
+
231
+ case operation
232
+ when :produce
233
+ produce(payload[0], **payload[1])
234
+ deliver_messages if threshold_reached?
235
+ when :deliver_messages
236
+ deliver_messages
237
+ when :shutdown
238
+ begin
239
+ # Deliver any pending messages first.
240
+ @producer.deliver_messages
241
+ rescue Error => e
242
+ @logger.error("Failed to deliver messages during shutdown: #{e.message}")
243
+
244
+ @instrumenter.instrument("drop_messages.async_producer", {
245
+ message_count: @producer.buffer_size + @queue.size,
246
+ })
247
+ end
248
+
249
+ # Stop the run loop.
250
+ break
251
+ else
252
+ raise "Unknown operation #{operation.inspect}"
253
+ end
254
+ end
255
+ end
256
+ rescue Kafka::Error => e
257
+ @logger.error "Unexpected Kafka error #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
258
+ @logger.info "Restarting in 10 seconds..."
259
+
260
+ sleep 10
261
+ retry
262
+ end
263
+
264
+ def produce(value, **kwargs)
265
+ retries = 0
266
+ begin
267
+ @producer.produce(value, **kwargs)
268
+ rescue BufferOverflow => e
269
+ deliver_messages
270
+ if @max_retries == -1
271
+ retry
272
+ elsif retries < @max_retries
273
+ retries += 1
274
+ sleep @retry_backoff**retries
275
+ retry
276
+ else
277
+ @logger.error("Failed to asynchronously produce messages due to BufferOverflow")
278
+ @instrumenter.instrument("error.async_producer", { error: e })
279
+ end
280
+ end
281
+ end
282
+
283
+ def deliver_messages
284
+ @producer.deliver_messages
285
+ rescue DeliveryFailed, ConnectionError => e
286
+ # Failed to deliver messages -- nothing to do but log and try again later.
287
+ @logger.error("Failed to asynchronously deliver messages: #{e.message}")
288
+ @instrumenter.instrument("error.async_producer", { error: e })
289
+ end
290
+
291
+ def threshold_reached?
292
+ @delivery_threshold > 0 &&
293
+ @producer.buffer_size >= @delivery_threshold
294
+ end
295
+ end
296
+ end
297
+ end