ruby-kafka-aws-iam 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/config.yml +393 -0
- data/.github/workflows/stale.yml +19 -0
- data/.gitignore +13 -0
- data/.readygo +1 -0
- data/.rspec +3 -0
- data/.rubocop.yml +44 -0
- data/.ruby-version +1 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +314 -0
- data/Gemfile +5 -0
- data/ISSUE_TEMPLATE.md +23 -0
- data/LICENSE.txt +176 -0
- data/Procfile +2 -0
- data/README.md +1356 -0
- data/Rakefile +8 -0
- data/benchmarks/message_encoding.rb +23 -0
- data/bin/console +8 -0
- data/bin/setup +5 -0
- data/docker-compose.yml +39 -0
- data/examples/consumer-group.rb +35 -0
- data/examples/firehose-consumer.rb +64 -0
- data/examples/firehose-producer.rb +54 -0
- data/examples/simple-consumer.rb +34 -0
- data/examples/simple-producer.rb +42 -0
- data/examples/ssl-producer.rb +44 -0
- data/lib/kafka/async_producer.rb +297 -0
- data/lib/kafka/broker.rb +217 -0
- data/lib/kafka/broker_info.rb +16 -0
- data/lib/kafka/broker_pool.rb +41 -0
- data/lib/kafka/broker_uri.rb +43 -0
- data/lib/kafka/client.rb +838 -0
- data/lib/kafka/cluster.rb +513 -0
- data/lib/kafka/compression.rb +45 -0
- data/lib/kafka/compressor.rb +86 -0
- data/lib/kafka/connection.rb +228 -0
- data/lib/kafka/connection_builder.rb +33 -0
- data/lib/kafka/consumer.rb +642 -0
- data/lib/kafka/consumer_group/assignor.rb +63 -0
- data/lib/kafka/consumer_group.rb +231 -0
- data/lib/kafka/crc32_hash.rb +15 -0
- data/lib/kafka/datadog.rb +420 -0
- data/lib/kafka/digest.rb +22 -0
- data/lib/kafka/fetch_operation.rb +115 -0
- data/lib/kafka/fetched_batch.rb +58 -0
- data/lib/kafka/fetched_batch_generator.rb +120 -0
- data/lib/kafka/fetched_message.rb +48 -0
- data/lib/kafka/fetched_offset_resolver.rb +48 -0
- data/lib/kafka/fetcher.rb +224 -0
- data/lib/kafka/gzip_codec.rb +34 -0
- data/lib/kafka/heartbeat.rb +25 -0
- data/lib/kafka/instrumenter.rb +38 -0
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/lz4_codec.rb +27 -0
- data/lib/kafka/message_buffer.rb +87 -0
- data/lib/kafka/murmur2_hash.rb +17 -0
- data/lib/kafka/offset_manager.rb +259 -0
- data/lib/kafka/partitioner.rb +40 -0
- data/lib/kafka/pause.rb +92 -0
- data/lib/kafka/pending_message.rb +29 -0
- data/lib/kafka/pending_message_queue.rb +41 -0
- data/lib/kafka/produce_operation.rb +205 -0
- data/lib/kafka/producer.rb +528 -0
- data/lib/kafka/prometheus.rb +316 -0
- data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
- data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
- data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
- data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
- data/lib/kafka/protocol/alter_configs_request.rb +44 -0
- data/lib/kafka/protocol/alter_configs_response.rb +49 -0
- data/lib/kafka/protocol/api_versions_request.rb +21 -0
- data/lib/kafka/protocol/api_versions_response.rb +53 -0
- data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
- data/lib/kafka/protocol/create_partitions_request.rb +42 -0
- data/lib/kafka/protocol/create_partitions_response.rb +28 -0
- data/lib/kafka/protocol/create_topics_request.rb +45 -0
- data/lib/kafka/protocol/create_topics_response.rb +26 -0
- data/lib/kafka/protocol/decoder.rb +175 -0
- data/lib/kafka/protocol/delete_topics_request.rb +33 -0
- data/lib/kafka/protocol/delete_topics_response.rb +26 -0
- data/lib/kafka/protocol/describe_configs_request.rb +35 -0
- data/lib/kafka/protocol/describe_configs_response.rb +73 -0
- data/lib/kafka/protocol/describe_groups_request.rb +27 -0
- data/lib/kafka/protocol/describe_groups_response.rb +73 -0
- data/lib/kafka/protocol/encoder.rb +184 -0
- data/lib/kafka/protocol/end_txn_request.rb +29 -0
- data/lib/kafka/protocol/end_txn_response.rb +19 -0
- data/lib/kafka/protocol/fetch_request.rb +70 -0
- data/lib/kafka/protocol/fetch_response.rb +136 -0
- data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
- data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
- data/lib/kafka/protocol/heartbeat_request.rb +27 -0
- data/lib/kafka/protocol/heartbeat_response.rb +17 -0
- data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
- data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
- data/lib/kafka/protocol/join_group_request.rb +47 -0
- data/lib/kafka/protocol/join_group_response.rb +41 -0
- data/lib/kafka/protocol/leave_group_request.rb +25 -0
- data/lib/kafka/protocol/leave_group_response.rb +17 -0
- data/lib/kafka/protocol/list_groups_request.rb +23 -0
- data/lib/kafka/protocol/list_groups_response.rb +35 -0
- data/lib/kafka/protocol/list_offset_request.rb +53 -0
- data/lib/kafka/protocol/list_offset_response.rb +89 -0
- data/lib/kafka/protocol/member_assignment.rb +42 -0
- data/lib/kafka/protocol/message.rb +172 -0
- data/lib/kafka/protocol/message_set.rb +55 -0
- data/lib/kafka/protocol/metadata_request.rb +31 -0
- data/lib/kafka/protocol/metadata_response.rb +185 -0
- data/lib/kafka/protocol/offset_commit_request.rb +47 -0
- data/lib/kafka/protocol/offset_commit_response.rb +29 -0
- data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
- data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
- data/lib/kafka/protocol/produce_request.rb +94 -0
- data/lib/kafka/protocol/produce_response.rb +63 -0
- data/lib/kafka/protocol/record.rb +88 -0
- data/lib/kafka/protocol/record_batch.rb +223 -0
- data/lib/kafka/protocol/request_message.rb +26 -0
- data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
- data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
- data/lib/kafka/protocol/sync_group_request.rb +33 -0
- data/lib/kafka/protocol/sync_group_response.rb +26 -0
- data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
- data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
- data/lib/kafka/protocol.rb +225 -0
- data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
- data/lib/kafka/sasl/awsmskiam.rb +128 -0
- data/lib/kafka/sasl/gssapi.rb +76 -0
- data/lib/kafka/sasl/oauth.rb +64 -0
- data/lib/kafka/sasl/plain.rb +39 -0
- data/lib/kafka/sasl/scram.rb +180 -0
- data/lib/kafka/sasl_authenticator.rb +73 -0
- data/lib/kafka/snappy_codec.rb +29 -0
- data/lib/kafka/socket_with_timeout.rb +96 -0
- data/lib/kafka/ssl_context.rb +66 -0
- data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
- data/lib/kafka/statsd.rb +296 -0
- data/lib/kafka/tagged_logger.rb +77 -0
- data/lib/kafka/transaction_manager.rb +306 -0
- data/lib/kafka/transaction_state_machine.rb +72 -0
- data/lib/kafka/version.rb +5 -0
- data/lib/kafka/zstd_codec.rb +27 -0
- data/lib/kafka.rb +373 -0
- data/lib/ruby-kafka.rb +5 -0
- data/ruby-kafka.gemspec +54 -0
- metadata +520 -0
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka"
|
4
|
+
|
5
|
+
ready "message serialization" do
|
6
|
+
before do
|
7
|
+
message = Kafka::Protocol::Message.new(
|
8
|
+
value: "hello",
|
9
|
+
key: "world",
|
10
|
+
)
|
11
|
+
|
12
|
+
@io = StringIO.new
|
13
|
+
encoder = Kafka::Protocol::Encoder.new(@io)
|
14
|
+
message.encode(encoder)
|
15
|
+
|
16
|
+
@decoder = Kafka::Protocol::Decoder.new(@io)
|
17
|
+
end
|
18
|
+
|
19
|
+
go "decoding" do
|
20
|
+
@io.rewind
|
21
|
+
Kafka::Protocol::Message.decode(@decoder)
|
22
|
+
end
|
23
|
+
end
|
data/bin/console
ADDED
data/bin/setup
ADDED
data/docker-compose.yml
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
version: '2'
|
2
|
+
services:
|
3
|
+
zookeeper:
|
4
|
+
image: wurstmeister/zookeeper
|
5
|
+
ports:
|
6
|
+
- "2181:2181"
|
7
|
+
kafka1:
|
8
|
+
image: wurstmeister/kafka:0.11.0.1
|
9
|
+
ports:
|
10
|
+
- "9092:9092"
|
11
|
+
environment:
|
12
|
+
KAFKA_BROKER_ID: 1
|
13
|
+
KAFKA_ADVERTISED_HOST_NAME: 192.168.99.100
|
14
|
+
KAFKA_ADVERTISED_PORT: 9092
|
15
|
+
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
16
|
+
volumes:
|
17
|
+
- /var/run/docker.sock:/var/run/docker.sock
|
18
|
+
kafka2:
|
19
|
+
image: wurstmeister/kafka:0.11.0.1
|
20
|
+
ports:
|
21
|
+
- "9093:9092"
|
22
|
+
environment:
|
23
|
+
KAFKA_BROKER_ID: 2
|
24
|
+
KAFKA_ADVERTISED_HOST_NAME: 192.168.99.100
|
25
|
+
KAFKA_ADVERTISED_PORT: 9093
|
26
|
+
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
27
|
+
volumes:
|
28
|
+
- /var/run/docker.sock:/var/run/docker.sock
|
29
|
+
kafka3:
|
30
|
+
image: wurstmeister/kafka:0.11.0.1
|
31
|
+
ports:
|
32
|
+
- "9094:9092"
|
33
|
+
environment:
|
34
|
+
KAFKA_BROKER_ID: 3
|
35
|
+
KAFKA_ADVERTISED_HOST_NAME: 192.168.99.100
|
36
|
+
KAFKA_ADVERTISED_PORT: 9094
|
37
|
+
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
38
|
+
volumes:
|
39
|
+
- /var/run/docker.sock:/var/run/docker.sock
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
|
4
|
+
|
5
|
+
require "kafka"
|
6
|
+
|
7
|
+
logger = Logger.new(STDOUT)
|
8
|
+
brokers = ENV.fetch("KAFKA_BROKERS", "localhost:9092").split(",")
|
9
|
+
|
10
|
+
# Make sure to create this topic in your Kafka cluster or configure the
|
11
|
+
# cluster to auto-create topics.
|
12
|
+
topic = "text"
|
13
|
+
|
14
|
+
kafka = Kafka.new(
|
15
|
+
seed_brokers: brokers,
|
16
|
+
client_id: "test",
|
17
|
+
socket_timeout: 20,
|
18
|
+
logger: logger,
|
19
|
+
)
|
20
|
+
|
21
|
+
consumer = kafka.consumer(group_id: "test")
|
22
|
+
consumer.subscribe(topic)
|
23
|
+
|
24
|
+
trap("TERM") { consumer.stop }
|
25
|
+
trap("INT") { consumer.stop }
|
26
|
+
|
27
|
+
begin
|
28
|
+
consumer.each_message do |message|
|
29
|
+
end
|
30
|
+
rescue Kafka::ProcessingError => e
|
31
|
+
warn "Got #{e.cause}"
|
32
|
+
consumer.pause(e.topic, e.partition, timeout: 20)
|
33
|
+
|
34
|
+
retry
|
35
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
|
4
|
+
|
5
|
+
require "kafka"
|
6
|
+
require "dotenv"
|
7
|
+
|
8
|
+
Dotenv.load
|
9
|
+
|
10
|
+
KAFKA_CLIENT_CERT = ENV.fetch("KAFKA_CLIENT_CERT")
|
11
|
+
KAFKA_CLIENT_CERT_KEY = ENV.fetch("KAFKA_CLIENT_CERT_KEY")
|
12
|
+
KAFKA_SERVER_CERT = ENV.fetch("KAFKA_SERVER_CERT")
|
13
|
+
KAFKA_URL = ENV.fetch("KAFKA_URL")
|
14
|
+
KAFKA_BROKERS = KAFKA_URL
|
15
|
+
KAFKA_TOPIC = "test-messages"
|
16
|
+
|
17
|
+
NUM_THREADS = 4
|
18
|
+
|
19
|
+
queue = Queue.new
|
20
|
+
|
21
|
+
threads = NUM_THREADS.times.map do |worker_id|
|
22
|
+
Thread.new do
|
23
|
+
logger = Logger.new($stderr)
|
24
|
+
logger.level = Logger::INFO
|
25
|
+
|
26
|
+
logger.formatter = proc {|severity, datetime, progname, msg|
|
27
|
+
"[#{worker_id}] #{severity.ljust(5)} -- #{msg}\n"
|
28
|
+
}
|
29
|
+
|
30
|
+
kafka = Kafka.new(
|
31
|
+
seed_brokers: KAFKA_BROKERS,
|
32
|
+
logger: logger,
|
33
|
+
connect_timeout: 30,
|
34
|
+
socket_timeout: 30,
|
35
|
+
ssl_client_cert: KAFKA_CLIENT_CERT,
|
36
|
+
ssl_client_cert_key: KAFKA_CLIENT_CERT_KEY,
|
37
|
+
ssl_ca_cert: KAFKA_SERVER_CERT,
|
38
|
+
)
|
39
|
+
|
40
|
+
consumer = kafka.consumer(group_id: "firehose")
|
41
|
+
consumer.subscribe(KAFKA_TOPIC)
|
42
|
+
|
43
|
+
i = 0
|
44
|
+
consumer.each_message do |message|
|
45
|
+
i += 1
|
46
|
+
|
47
|
+
if i % 1000 == 0
|
48
|
+
queue << i
|
49
|
+
i = 0
|
50
|
+
end
|
51
|
+
|
52
|
+
sleep 0.01
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
threads.each {|t| t.abort_on_exception = true }
|
58
|
+
|
59
|
+
received_messages = 0
|
60
|
+
|
61
|
+
loop do
|
62
|
+
received_messages += queue.pop
|
63
|
+
puts "===> Received #{received_messages} messages"
|
64
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
|
4
|
+
|
5
|
+
require "kafka"
|
6
|
+
require "dotenv"
|
7
|
+
|
8
|
+
Dotenv.load
|
9
|
+
|
10
|
+
KAFKA_CLIENT_CERT = ENV.fetch("KAFKA_CLIENT_CERT")
|
11
|
+
KAFKA_CLIENT_CERT_KEY = ENV.fetch("KAFKA_CLIENT_CERT_KEY")
|
12
|
+
KAFKA_SERVER_CERT = ENV.fetch("KAFKA_SERVER_CERT")
|
13
|
+
KAFKA_URL = ENV.fetch("KAFKA_URL")
|
14
|
+
KAFKA_BROKERS = KAFKA_URL
|
15
|
+
KAFKA_TOPIC = "test-messages"
|
16
|
+
|
17
|
+
NUM_THREADS = 20
|
18
|
+
|
19
|
+
threads = NUM_THREADS.times.map do
|
20
|
+
Thread.new do
|
21
|
+
logger = Logger.new($stderr)
|
22
|
+
logger.level = Logger::INFO
|
23
|
+
|
24
|
+
kafka = Kafka.new(
|
25
|
+
seed_brokers: KAFKA_BROKERS,
|
26
|
+
logger: logger,
|
27
|
+
ssl_client_cert: KAFKA_CLIENT_CERT,
|
28
|
+
ssl_client_cert_key: KAFKA_CLIENT_CERT_KEY,
|
29
|
+
ssl_ca_cert: KAFKA_SERVER_CERT,
|
30
|
+
)
|
31
|
+
|
32
|
+
producer = kafka.async_producer(
|
33
|
+
delivery_interval: 1,
|
34
|
+
max_queue_size: 5_000,
|
35
|
+
max_buffer_size: 10_000,
|
36
|
+
)
|
37
|
+
|
38
|
+
begin
|
39
|
+
loop do
|
40
|
+
producer.produce(rand.to_s, key: rand.to_s, topic: KAFKA_TOPIC)
|
41
|
+
end
|
42
|
+
rescue Kafka::BufferOverflow
|
43
|
+
logger.error "Buffer overflow, backing off for 1s"
|
44
|
+
sleep 1
|
45
|
+
retry
|
46
|
+
ensure
|
47
|
+
producer.shutdown
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
threads.each {|t| t.abort_on_exception = true }
|
53
|
+
|
54
|
+
threads.map(&:join)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Consumes lines from a Kafka partition and writes them to STDOUT.
|
4
|
+
#
|
5
|
+
# You need to define the environment variable KAFKA_BROKERS for this
|
6
|
+
# to work, e.g.
|
7
|
+
#
|
8
|
+
# export KAFKA_BROKERS=localhost:9092
|
9
|
+
#
|
10
|
+
|
11
|
+
$LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
|
12
|
+
|
13
|
+
require "kafka"
|
14
|
+
|
15
|
+
# We don't want log output to clutter the console. Replace `StringIO.new`
|
16
|
+
# with e.g. `$stderr` if you want to see what's happening under the hood.
|
17
|
+
logger = Logger.new(StringIO.new)
|
18
|
+
|
19
|
+
brokers = ENV.fetch("KAFKA_BROKERS").split(",")
|
20
|
+
|
21
|
+
# Make sure to create this topic in your Kafka cluster or configure the
|
22
|
+
# cluster to auto-create topics.
|
23
|
+
topic = "text"
|
24
|
+
|
25
|
+
kafka = Kafka.new(
|
26
|
+
seed_brokers: brokers,
|
27
|
+
client_id: "simple-consumer",
|
28
|
+
socket_timeout: 20,
|
29
|
+
logger: logger,
|
30
|
+
)
|
31
|
+
|
32
|
+
kafka.each_message(topic: topic) do |message|
|
33
|
+
puts message.value
|
34
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Reads lines from STDIN, writing them to Kafka.
|
4
|
+
#
|
5
|
+
# You need to define the environment variable KAFKA_BROKERS for this
|
6
|
+
# to work, e.g.
|
7
|
+
#
|
8
|
+
# export KAFKA_BROKERS=localhost:9092
|
9
|
+
#
|
10
|
+
|
11
|
+
$LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
|
12
|
+
|
13
|
+
require "kafka"
|
14
|
+
|
15
|
+
logger = Logger.new($stderr)
|
16
|
+
brokers = ENV.fetch("KAFKA_BROKERS")
|
17
|
+
|
18
|
+
# Make sure to create this topic in your Kafka cluster or configure the
|
19
|
+
# cluster to auto-create topics.
|
20
|
+
topic = "text"
|
21
|
+
|
22
|
+
kafka = Kafka.new(
|
23
|
+
seed_brokers: brokers,
|
24
|
+
client_id: "simple-producer",
|
25
|
+
logger: logger,
|
26
|
+
)
|
27
|
+
|
28
|
+
producer = kafka.producer
|
29
|
+
|
30
|
+
begin
|
31
|
+
$stdin.each_with_index do |line, index|
|
32
|
+
producer.produce(line, topic: topic)
|
33
|
+
|
34
|
+
# Send messages for every 10 lines.
|
35
|
+
producer.deliver_messages if index % 10 == 0
|
36
|
+
end
|
37
|
+
ensure
|
38
|
+
# Make sure to send any remaining messages.
|
39
|
+
producer.deliver_messages
|
40
|
+
|
41
|
+
producer.shutdown
|
42
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Reads lines from STDIN, writing them to Kafka.
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift(File.expand_path("../../lib", __FILE__))
|
6
|
+
|
7
|
+
require "kafka"
|
8
|
+
|
9
|
+
logger = Logger.new($stderr)
|
10
|
+
brokers = ENV.fetch("KAFKA_BROKERS")
|
11
|
+
|
12
|
+
# Make sure to create this topic in your Kafka cluster or configure the
|
13
|
+
# cluster to auto-create topics.
|
14
|
+
topic = "page-visits"
|
15
|
+
|
16
|
+
ssl_context = OpenSSL::SSL::SSLContext.new
|
17
|
+
ssl_context.set_params(
|
18
|
+
cert: OpenSSL::X509::Certificate.new(ENV.fetch("KAFKA_CLIENT_CERT")),
|
19
|
+
key: OpenSSL::PKey::RSA.new(ENV.fetch("KAFKA_CLIENT_CERT_KEY")),
|
20
|
+
)
|
21
|
+
|
22
|
+
kafka = Kafka.new(
|
23
|
+
seed_brokers: brokers,
|
24
|
+
client_id: "ssl-producer",
|
25
|
+
logger: logger,
|
26
|
+
ssl: true,
|
27
|
+
ssl_context: ssl_context,
|
28
|
+
)
|
29
|
+
|
30
|
+
producer = kafka.producer
|
31
|
+
|
32
|
+
begin
|
33
|
+
$stdin.each_with_index do |line, index|
|
34
|
+
producer.produce(line, topic: topic)
|
35
|
+
|
36
|
+
# Send messages for every 10 lines.
|
37
|
+
producer.deliver_messages if index % 10 == 0
|
38
|
+
end
|
39
|
+
ensure
|
40
|
+
# Make sure to send any remaining messages.
|
41
|
+
producer.deliver_messages
|
42
|
+
|
43
|
+
producer.shutdown
|
44
|
+
end
|
@@ -0,0 +1,297 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "thread"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
|
7
|
+
# A Kafka producer that does all its work in the background so as to not block
|
8
|
+
# the calling thread. Calls to {#deliver_messages} are asynchronous and return
|
9
|
+
# immediately.
|
10
|
+
#
|
11
|
+
# In addition to this property it's possible to define automatic delivery
|
12
|
+
# policies. These allow placing an upper bound on the number of buffered
|
13
|
+
# messages and the time between message deliveries.
|
14
|
+
#
|
15
|
+
# * If `delivery_threshold` is set to a value _n_ higher than zero, the producer
|
16
|
+
# will automatically deliver its messages once its buffer size reaches _n_.
|
17
|
+
# * If `delivery_interval` is set to a value _n_ higher than zero, the producer
|
18
|
+
# will automatically deliver its messages every _n_ seconds.
|
19
|
+
#
|
20
|
+
# By default, automatic delivery is disabled and you'll have to call
|
21
|
+
# {#deliver_messages} manually.
|
22
|
+
#
|
23
|
+
# ## Buffer Overflow and Backpressure
|
24
|
+
#
|
25
|
+
# The calling thread communicates with the background thread doing the actual
|
26
|
+
# work using a thread safe queue. While the background thread is busy delivering
|
27
|
+
# messages, new messages will be buffered in the queue. In order to avoid
|
28
|
+
# the queue growing uncontrollably in cases where the background thread gets
|
29
|
+
# stuck or can't follow the pace of the calling thread, there's a maximum
|
30
|
+
# number of messages that is allowed to be buffered. You can configure this
|
31
|
+
# value by setting `max_queue_size`.
|
32
|
+
#
|
33
|
+
# If you produce messages faster than the background producer thread can
|
34
|
+
# deliver them to Kafka you will eventually fill the producer's buffer. Once
|
35
|
+
# this happens, the background thread will stop popping messages off the
|
36
|
+
# queue until it can successfully deliver the buffered messages. The queue
|
37
|
+
# will therefore grow in size, potentially hitting the `max_queue_size` limit.
|
38
|
+
# Once this happens, calls to {#produce} will raise a {BufferOverflow} error.
|
39
|
+
#
|
40
|
+
# Depending on your use case you may want to slow down the rate of messages
|
41
|
+
# being produced or perhaps halt your application completely until the
|
42
|
+
# producer can deliver the buffered messages and clear the message queue.
|
43
|
+
#
|
44
|
+
# ## Example
|
45
|
+
#
|
46
|
+
# producer = kafka.async_producer(
|
47
|
+
# # Keep at most 1.000 messages in the buffer before delivering:
|
48
|
+
# delivery_threshold: 1000,
|
49
|
+
#
|
50
|
+
# # Deliver messages every 30 seconds:
|
51
|
+
# delivery_interval: 30,
|
52
|
+
# )
|
53
|
+
#
|
54
|
+
# # There's no need to manually call #deliver_messages, it will happen
|
55
|
+
# # automatically in the background.
|
56
|
+
# producer.produce("hello", topic: "greetings")
|
57
|
+
#
|
58
|
+
# # Remember to shut down the producer when you're done with it.
|
59
|
+
# producer.shutdown
|
60
|
+
#
|
61
|
+
class AsyncProducer
|
62
|
+
# Initializes a new AsyncProducer.
|
63
|
+
#
|
64
|
+
# @param sync_producer [Kafka::Producer] the synchronous producer that should
|
65
|
+
# be used in the background.
|
66
|
+
# @param max_queue_size [Integer] the maximum number of messages allowed in
|
67
|
+
# the queue.
|
68
|
+
# @param delivery_threshold [Integer] if greater than zero, the number of
|
69
|
+
# buffered messages that will automatically trigger a delivery.
|
70
|
+
# @param delivery_interval [Integer] if greater than zero, the number of
|
71
|
+
# seconds between automatic message deliveries.
|
72
|
+
#
|
73
|
+
def initialize(sync_producer:, max_queue_size: 1000, delivery_threshold: 0, delivery_interval: 0, max_retries: -1, retry_backoff: 0, instrumenter:, logger:)
|
74
|
+
raise ArgumentError unless max_queue_size > 0
|
75
|
+
raise ArgumentError unless delivery_threshold >= 0
|
76
|
+
raise ArgumentError unless delivery_interval >= 0
|
77
|
+
|
78
|
+
@queue = Queue.new
|
79
|
+
@max_queue_size = max_queue_size
|
80
|
+
@instrumenter = instrumenter
|
81
|
+
@logger = TaggedLogger.new(logger)
|
82
|
+
|
83
|
+
@worker = Worker.new(
|
84
|
+
queue: @queue,
|
85
|
+
producer: sync_producer,
|
86
|
+
delivery_threshold: delivery_threshold,
|
87
|
+
max_retries: max_retries,
|
88
|
+
retry_backoff: retry_backoff,
|
89
|
+
instrumenter: instrumenter,
|
90
|
+
logger: logger
|
91
|
+
)
|
92
|
+
|
93
|
+
# The timer will no-op if the delivery interval is zero.
|
94
|
+
@timer = Timer.new(queue: @queue, interval: delivery_interval)
|
95
|
+
|
96
|
+
@thread_mutex = Mutex.new
|
97
|
+
end
|
98
|
+
|
99
|
+
# Produces a message to the specified topic.
|
100
|
+
#
|
101
|
+
# @see Kafka::Producer#produce
|
102
|
+
# @param (see Kafka::Producer#produce)
|
103
|
+
# @raise [BufferOverflow] if the message queue is full.
|
104
|
+
# @return [nil]
|
105
|
+
def produce(value, topic:, **options)
|
106
|
+
# We want to fail fast if `topic` isn't a String
|
107
|
+
topic = topic.to_str
|
108
|
+
|
109
|
+
ensure_threads_running!
|
110
|
+
|
111
|
+
if @queue.size >= @max_queue_size
|
112
|
+
buffer_overflow topic,
|
113
|
+
"Cannot produce to #{topic}, max queue size (#{@max_queue_size} messages) reached"
|
114
|
+
end
|
115
|
+
|
116
|
+
args = [value, **options.merge(topic: topic)]
|
117
|
+
@queue << [:produce, args]
|
118
|
+
|
119
|
+
@instrumenter.instrument("enqueue_message.async_producer", {
|
120
|
+
topic: topic,
|
121
|
+
queue_size: @queue.size,
|
122
|
+
max_queue_size: @max_queue_size,
|
123
|
+
})
|
124
|
+
|
125
|
+
nil
|
126
|
+
end
|
127
|
+
|
128
|
+
# Asynchronously delivers the buffered messages. This method will return
|
129
|
+
# immediately and the actual work will be done in the background.
|
130
|
+
#
|
131
|
+
# @see Kafka::Producer#deliver_messages
|
132
|
+
# @return [nil]
|
133
|
+
def deliver_messages
|
134
|
+
ensure_threads_running!
|
135
|
+
|
136
|
+
@queue << [:deliver_messages, nil]
|
137
|
+
|
138
|
+
nil
|
139
|
+
end
|
140
|
+
|
141
|
+
# Shuts down the producer, releasing the network resources used. This
|
142
|
+
# method will block until the buffered messages have been delivered.
|
143
|
+
#
|
144
|
+
# @see Kafka::Producer#shutdown
|
145
|
+
# @return [nil]
|
146
|
+
def shutdown
|
147
|
+
ensure_threads_running!
|
148
|
+
|
149
|
+
@timer_thread && @timer_thread.exit
|
150
|
+
@queue << [:shutdown, nil]
|
151
|
+
@worker_thread && @worker_thread.join
|
152
|
+
|
153
|
+
nil
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def ensure_threads_running!
|
159
|
+
return if worker_thread_alive? && timer_thread_alive?
|
160
|
+
|
161
|
+
@thread_mutex.synchronize do
|
162
|
+
@worker_thread = Thread.new { @worker.run } unless worker_thread_alive?
|
163
|
+
@timer_thread = Thread.new { @timer.run } unless timer_thread_alive?
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def worker_thread_alive?
|
168
|
+
!!@worker_thread && @worker_thread.alive?
|
169
|
+
end
|
170
|
+
|
171
|
+
def timer_thread_alive?
|
172
|
+
!!@timer_thread && @timer_thread.alive?
|
173
|
+
end
|
174
|
+
|
175
|
+
def buffer_overflow(topic, message)
|
176
|
+
@instrumenter.instrument("buffer_overflow.async_producer", {
|
177
|
+
topic: topic,
|
178
|
+
})
|
179
|
+
|
180
|
+
raise BufferOverflow, message
|
181
|
+
end
|
182
|
+
|
183
|
+
class Timer
|
184
|
+
def initialize(interval:, queue:)
|
185
|
+
@queue = queue
|
186
|
+
@interval = interval
|
187
|
+
end
|
188
|
+
|
189
|
+
def run
|
190
|
+
# Permanently sleep if the timer interval is zero.
|
191
|
+
Thread.stop if @interval.zero?
|
192
|
+
|
193
|
+
loop do
|
194
|
+
sleep(@interval)
|
195
|
+
@queue << [:deliver_messages, nil]
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
class Worker
|
201
|
+
def initialize(queue:, producer:, delivery_threshold:, max_retries: -1, retry_backoff: 0, instrumenter:, logger:)
|
202
|
+
@queue = queue
|
203
|
+
@producer = producer
|
204
|
+
@delivery_threshold = delivery_threshold
|
205
|
+
@max_retries = max_retries
|
206
|
+
@retry_backoff = retry_backoff
|
207
|
+
@instrumenter = instrumenter
|
208
|
+
@logger = TaggedLogger.new(logger)
|
209
|
+
end
|
210
|
+
|
211
|
+
def run
|
212
|
+
@logger.push_tags(@producer.to_s)
|
213
|
+
@logger.info "Starting async producer in the background..."
|
214
|
+
|
215
|
+
do_loop
|
216
|
+
rescue Exception => e
|
217
|
+
@logger.error "Unexpected Kafka error #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
|
218
|
+
@logger.error "Async producer crashed!"
|
219
|
+
ensure
|
220
|
+
@producer.shutdown
|
221
|
+
@logger.pop_tags
|
222
|
+
end
|
223
|
+
|
224
|
+
private
|
225
|
+
|
226
|
+
def do_loop
|
227
|
+
loop do
|
228
|
+
begin
|
229
|
+
operation, payload = @queue.pop
|
230
|
+
|
231
|
+
case operation
|
232
|
+
when :produce
|
233
|
+
produce(payload[0], **payload[1])
|
234
|
+
deliver_messages if threshold_reached?
|
235
|
+
when :deliver_messages
|
236
|
+
deliver_messages
|
237
|
+
when :shutdown
|
238
|
+
begin
|
239
|
+
# Deliver any pending messages first.
|
240
|
+
@producer.deliver_messages
|
241
|
+
rescue Error => e
|
242
|
+
@logger.error("Failed to deliver messages during shutdown: #{e.message}")
|
243
|
+
|
244
|
+
@instrumenter.instrument("drop_messages.async_producer", {
|
245
|
+
message_count: @producer.buffer_size + @queue.size,
|
246
|
+
})
|
247
|
+
end
|
248
|
+
|
249
|
+
# Stop the run loop.
|
250
|
+
break
|
251
|
+
else
|
252
|
+
raise "Unknown operation #{operation.inspect}"
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
rescue Kafka::Error => e
|
257
|
+
@logger.error "Unexpected Kafka error #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
|
258
|
+
@logger.info "Restarting in 10 seconds..."
|
259
|
+
|
260
|
+
sleep 10
|
261
|
+
retry
|
262
|
+
end
|
263
|
+
|
264
|
+
def produce(value, **kwargs)
|
265
|
+
retries = 0
|
266
|
+
begin
|
267
|
+
@producer.produce(value, **kwargs)
|
268
|
+
rescue BufferOverflow => e
|
269
|
+
deliver_messages
|
270
|
+
if @max_retries == -1
|
271
|
+
retry
|
272
|
+
elsif retries < @max_retries
|
273
|
+
retries += 1
|
274
|
+
sleep @retry_backoff**retries
|
275
|
+
retry
|
276
|
+
else
|
277
|
+
@logger.error("Failed to asynchronously produce messages due to BufferOverflow")
|
278
|
+
@instrumenter.instrument("error.async_producer", { error: e })
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
def deliver_messages
|
284
|
+
@producer.deliver_messages
|
285
|
+
rescue DeliveryFailed, ConnectionError => e
|
286
|
+
# Failed to deliver messages -- nothing to do but log and try again later.
|
287
|
+
@logger.error("Failed to asynchronously deliver messages: #{e.message}")
|
288
|
+
@instrumenter.instrument("error.async_producer", { error: e })
|
289
|
+
end
|
290
|
+
|
291
|
+
def threshold_reached?
|
292
|
+
@delivery_threshold > 0 &&
|
293
|
+
@producer.buffer_size >= @delivery_threshold
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|