poseidon 0.0.4 → 0.0.5.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.travis.yml +2 -0
  4. data/CHANGES.md +4 -0
  5. data/README.md +4 -9
  6. data/Rakefile +3 -0
  7. data/lib/poseidon.rb +41 -24
  8. data/lib/poseidon/broker_pool.rb +7 -3
  9. data/lib/poseidon/cluster_metadata.rb +17 -1
  10. data/lib/poseidon/connection.rb +33 -11
  11. data/lib/poseidon/message_conductor.rb +2 -2
  12. data/lib/poseidon/messages_for_broker.rb +17 -0
  13. data/lib/poseidon/messages_to_send.rb +4 -4
  14. data/lib/poseidon/partition_consumer.rb +67 -24
  15. data/lib/poseidon/producer.rb +4 -1
  16. data/lib/poseidon/protocol/request_buffer.rb +12 -4
  17. data/lib/poseidon/sync_producer.rb +55 -22
  18. data/lib/poseidon/topic_metadata.rb +23 -8
  19. data/lib/poseidon/version.rb +1 -1
  20. data/log/.gitkeep +0 -0
  21. data/poseidon.gemspec +2 -2
  22. data/spec/integration/multiple_brokers/consumer_spec.rb +1 -1
  23. data/spec/integration/multiple_brokers/metadata_failures_spec.rb +35 -0
  24. data/spec/integration/multiple_brokers/rebalance_spec.rb +67 -0
  25. data/spec/integration/multiple_brokers/round_robin_spec.rb +4 -4
  26. data/spec/integration/multiple_brokers/spec_helper.rb +29 -7
  27. data/spec/integration/simple/compression_spec.rb +1 -0
  28. data/spec/integration/simple/connection_spec.rb +1 -1
  29. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +25 -2
  30. data/spec/integration/simple/spec_helper.rb +2 -2
  31. data/spec/integration/simple/truncated_messages_spec.rb +1 -1
  32. data/spec/integration/simple/unavailable_broker_spec.rb +9 -16
  33. data/spec/spec_helper.rb +3 -0
  34. data/spec/test_cluster.rb +51 -48
  35. data/spec/unit/broker_pool_spec.rb +28 -7
  36. data/spec/unit/cluster_metadata_spec.rb +3 -3
  37. data/spec/unit/message_conductor_spec.rb +27 -14
  38. data/spec/unit/messages_to_send_spec.rb +3 -3
  39. data/spec/unit/partition_consumer_spec.rb +28 -10
  40. data/spec/unit/sync_producer_spec.rb +16 -12
  41. metadata +24 -35
  42. data/spec/bin/kafka-run-class.sh +0 -65
@@ -125,9 +125,12 @@ module Poseidon
125
125
  # @option options [Integer] :required_acks (0)
126
126
  # The number of acks required per request.
127
127
  #
128
- # @option options [Integer] :request_timeout_ms (1500)
128
+ # @option options [Integer] :ack_timeout_ms (1500)
129
129
  # How long the producer waits for acks.
130
130
  #
131
+ # @option options [Integer] :socket_timeout_ms] (10000)
132
+ # How long the producer socket waits for any reply from server.
133
+ #
131
134
  # @api public
132
135
  def initialize(brokers, client_id, options = {})
133
136
  options = options.dup
@@ -7,12 +7,10 @@ module Poseidon
7
7
  # (https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProtocolPrimitiveTypes)
8
8
  class RequestBuffer
9
9
  def initialize
10
- @s = ''.encode("ASCII-8BIT")
10
+ @s = ''
11
11
  end
12
12
 
13
13
  def append(string)
14
- string = string.dup
15
- string.force_encoding("ASCII-8BIT")
16
14
  @s << string
17
15
  nil
18
16
  end
@@ -55,23 +53,33 @@ module Poseidon
55
53
  end
56
54
 
57
55
  def prepend_crc32
56
+ ensure_ascii
58
57
  checksum_pos = @s.bytesize
59
58
  @s += " "
60
59
  yield
60
+ ensure_ascii
61
61
  @s[checksum_pos] = [Zlib::crc32(@s[(checksum_pos+1)..-1])].pack("N")
62
62
  nil
63
63
  end
64
64
 
65
65
  def prepend_size
66
+ ensure_ascii
66
67
  size_pos = @s.bytesize
67
68
  @s += " "
68
69
  yield
70
+ ensure_ascii
69
71
  @s[size_pos] = [(@s.bytesize-1) - size_pos].pack("N")
70
72
  nil
71
73
  end
72
74
 
73
75
  def to_s
74
- @s
76
+ ensure_ascii
77
+ end
78
+
79
+ private
80
+
81
+ def ensure_ascii
82
+ @s.force_encoding("ASCII-8BIT")
75
83
  end
76
84
  end
77
85
  end
@@ -21,10 +21,11 @@ module Poseidon
21
21
  :retry_backoff_ms => 100,
22
22
  :required_acks => 0,
23
23
  :ack_timeout_ms => 1500,
24
+ :socket_timeout_ms => 10_000
24
25
  }
25
26
 
26
27
  attr_reader :client_id, :retry_backoff_ms, :max_send_retries,
27
- :metadata_refresh_interval_ms, :required_acks, :ack_timeout_ms
28
+ :metadata_refresh_interval_ms, :required_acks, :ack_timeout_ms, :socket_timeout_ms
28
29
  def initialize(client_id, seed_brokers, options = {})
29
30
  @client_id = client_id
30
31
 
@@ -32,7 +33,7 @@ module Poseidon
32
33
 
33
34
  @cluster_metadata = ClusterMetadata.new
34
35
  @message_conductor = MessageConductor.new(@cluster_metadata, @partitioner)
35
- @broker_pool = BrokerPool.new(client_id, seed_brokers)
36
+ @broker_pool = BrokerPool.new(client_id, seed_brokers, socket_timeout_ms)
36
37
  end
37
38
 
38
39
  def send_messages(messages)
@@ -40,22 +41,20 @@ module Poseidon
40
41
 
41
42
  messages_to_send = MessagesToSend.new(messages, @cluster_metadata)
42
43
 
43
- (@max_send_retries+1).times do
44
- if messages_to_send.needs_metadata? || refresh_interval_elapsed?
45
- refreshed_metadata = refresh_metadata(messages_to_send.topic_set)
46
- if !refreshed_metadata
47
- # If we can't refresh metadata we have to give up.
48
- break
49
- end
50
- end
44
+ if refresh_interval_elapsed?
45
+ refresh_metadata(messages_to_send.topic_set)
46
+ end
47
+
48
+ ensure_metadata_available_for_topics(messages_to_send)
51
49
 
50
+ (@max_send_retries+1).times do
52
51
  messages_to_send.messages_for_brokers(@message_conductor).each do |messages_for_broker|
53
- if send_to_broker(messages_for_broker)
54
- messages_to_send.successfully_sent(messages_for_broker)
52
+ if sent = send_to_broker(messages_for_broker)
53
+ messages_to_send.successfully_sent(sent)
55
54
  end
56
55
  end
57
56
 
58
- if messages_to_send.all_sent? || @max_send_retries == 0
57
+ if !messages_to_send.pending_messages? || @max_send_retries == 0
59
58
  break
60
59
  else
61
60
  Kernel.sleep retry_backoff_ms / 1000.0
@@ -63,7 +62,11 @@ module Poseidon
63
62
  end
64
63
  end
65
64
 
66
- messages_to_send.all_sent?
65
+ if messages_to_send.pending_messages?
66
+ raise "Failed to send all messages: #{messages_to_send.messages} remaining"
67
+ else
68
+ true
69
+ end
67
70
  end
68
71
 
69
72
  def shutdown
@@ -71,8 +74,27 @@ module Poseidon
71
74
  end
72
75
 
73
76
  private
77
+
78
+ def ensure_metadata_available_for_topics(messages_to_send)
79
+ return if !messages_to_send.needs_metadata?
80
+
81
+ Poseidon.logger.debug { "Fetching metadata for #{messages_to_send.topic_set}. (Attempt 1)" }
82
+ refresh_metadata(messages_to_send.topic_set)
83
+ return if !messages_to_send.needs_metadata?
84
+
85
+ 2.times do |n|
86
+ sleep 5
87
+
88
+ Poseidon.logger.debug { "Fetching metadata for #{messages_to_send.topic_set}. (Attempt #{n+2})" }
89
+ refresh_metadata(messages_to_send.topic_set)
90
+ return if !messages_to_send.needs_metadata?
91
+ end
92
+ raise Errors::UnableToFetchMetadata
93
+ end
94
+
74
95
  def handle_options(options)
75
96
  @ack_timeout_ms = handle_option(options, :ack_timeout_ms)
97
+ @socket_timeout_ms = handle_option(options, :socket_timeout_ms)
76
98
  @retry_backoff_ms = handle_option(options, :retry_backoff_ms)
77
99
 
78
100
  @metadata_refresh_interval_ms =
@@ -95,23 +117,34 @@ module Poseidon
95
117
  end
96
118
 
97
119
  def refresh_interval_elapsed?
98
- (Time.now - @cluster_metadata.last_refreshed_at) > metadata_refresh_interval_ms
120
+ @cluster_metadata.last_refreshed_at.nil? ||
121
+ (Time.now - @cluster_metadata.last_refreshed_at) > metadata_refresh_interval_ms
99
122
  end
100
123
 
101
124
  def refresh_metadata(topics)
102
- @cluster_metadata.update(@broker_pool.fetch_metadata(topics))
125
+ topics_to_refresh = topics.dup
126
+
127
+ @cluster_metadata.topics.each do |topic|
128
+ topics_to_refresh.add(topic)
129
+ end
130
+
131
+ @cluster_metadata.update(@broker_pool.fetch_metadata(topics_to_refresh))
103
132
  @broker_pool.update_known_brokers(@cluster_metadata.brokers)
104
- true
105
- rescue Errors::UnableToFetchMetadata
106
- false
107
133
  end
108
134
 
109
135
  def send_to_broker(messages_for_broker)
110
136
  return false if messages_for_broker.broker_id == -1
111
137
  to_send = messages_for_broker.build_protocol_objects(@compression_config)
112
- @broker_pool.execute_api_call(messages_for_broker.broker_id, :produce,
113
- required_acks, ack_timeout_ms,
114
- to_send)
138
+
139
+ Poseidon.logger.debug { "Sending messages to broker #{messages_for_broker.broker_id}" }
140
+ response = @broker_pool.execute_api_call(messages_for_broker.broker_id, :produce,
141
+ required_acks, ack_timeout_ms,
142
+ to_send)
143
+ if required_acks == 0
144
+ messages_for_broker.messages
145
+ else
146
+ messages_for_broker.successfully_sent(response)
147
+ end
115
148
  rescue Connection::ConnectionFailedError
116
149
  false
117
150
  end
@@ -26,10 +26,6 @@ module Poseidon
26
26
  nil
27
27
  end
28
28
 
29
- def partitions
30
- struct.partitions
31
- end
32
-
33
29
  def name
34
30
  struct.name
35
31
  end
@@ -38,6 +34,10 @@ module Poseidon
38
34
  eql?(o)
39
35
  end
40
36
 
37
+ def exists?
38
+ struct.error == 0
39
+ end
40
+
41
41
  def eql?(o)
42
42
  struct.eql?(o.struct)
43
43
  end
@@ -54,21 +54,36 @@ module Poseidon
54
54
  @partition_count ||= struct.partitions.count
55
55
  end
56
56
 
57
- def available_partition_leader_ids
58
- @available_partition_leader_ids ||= struct.partitions.select(&:leader)
57
+ def available_partitions
58
+ @available_partitions ||= struct.partitions.select do |partition|
59
+ partition.error == 0 && partition.leader != -1
60
+ end
59
61
  end
60
62
 
61
63
  def available_partition_count
62
- @available_partition_count ||= available_partition_leader_ids.count
64
+ available_partitions.count
63
65
  end
64
66
 
65
67
  def partition_leader(partition_id)
66
- partition = struct.partitions.find { |p| p.id == partition_id }
68
+ partition = partitions_by_id[partition_id]
67
69
  if partition
68
70
  partition.leader
69
71
  else
70
72
  nil
71
73
  end
72
74
  end
75
+
76
+ def to_s
77
+ struct.partitions.map { |p| p.inspect }.join("\n")
78
+ end
79
+
80
+ private
81
+ def partitions_by_id
82
+ @partitions_by_id ||= Hash[partitions.map { |p| [p.id, p] }]
83
+ end
84
+
85
+ def partitions
86
+ struct.partitions
87
+ end
73
88
  end
74
89
  end
@@ -1,4 +1,4 @@
1
1
  module Poseidon
2
2
  # Unstable! API May Change!
3
- VERSION = "0.0.4"
3
+ VERSION = "0.0.5.pre1"
4
4
  end
File without changes
@@ -12,14 +12,14 @@ Gem::Specification.new do |gem|
12
12
  gem.summary = %q{Poseidon is a producer and consumer implementation for Kafka >= 0.8}
13
13
  gem.homepage = "https://github.com/bpot/poseidon"
14
14
  gem.licenses = ["MIT"]
15
+ gem.required_ruby_version = '>= 1.9.3'
15
16
 
16
17
  gem.files = `git ls-files`.split($/)
17
18
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
19
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
20
  gem.require_paths = ["lib"]
20
21
 
21
- gem.add_development_dependency(%q<rspec>)
22
+ gem.add_development_dependency(%q<rspec>, '~> 2.13.0')
22
23
  gem.add_development_dependency(%q<yard>)
23
24
  gem.add_development_dependency(%q<simplecov>)
24
- gem.add_development_dependency(%q<daemon_controller>)
25
25
  end
@@ -3,7 +3,7 @@ require 'integration/multiple_brokers/spec_helper'
3
3
  describe "consuming with multiple brokers" do
4
4
  before(:each) do
5
5
  # autocreate the topic by asking for information about it
6
- c = Connection.new("localhost", 9092, "metadata_fetcher")
6
+ c = Connection.new("localhost", 9092, "metadata_fetcher", 10_000)
7
7
  md = c.topic_metadata(["test"])
8
8
  sleep 1
9
9
  end
@@ -0,0 +1,35 @@
1
+ require 'integration/multiple_brokers/spec_helper'
2
+
3
+ describe "handling failures" do
4
+ describe "metadata failures" do
5
+ before(:each) do
6
+ @messages_to_send = [
7
+ MessageToSend.new("topic1", "hello"),
8
+ MessageToSend.new("topic2", "hello")
9
+ ]
10
+ end
11
+
12
+ describe "unable to connect to brokers" do
13
+ before(:each) do
14
+ @p = Producer.new(["localhost:1092","localhost:1093","localhost:1094"], "producer")
15
+ end
16
+
17
+ it "triggers callback failures for both topics" do
18
+ expect {
19
+ @p.send_messages(@messages_to_send)
20
+ }.to raise_error(Poseidon::Errors::UnableToFetchMetadata)
21
+ end
22
+ end
23
+ end
24
+
25
+ describe "unknown topic" do
26
+ it "receives error callback" do
27
+ pending "need a way to turn off auto-topic creation just for this test"
28
+ @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "producer")
29
+
30
+ expect {
31
+ @p.send_messages([MessageToSend.new("imnothere", "hello")])
32
+ }.to raise_error(Poseidon::Errors::UnableToFetchMetadata)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,67 @@
1
+ require 'integration/multiple_brokers/spec_helper'
2
+
3
+ describe "producer handles rebalancing" do
4
+ before(:each) do
5
+ # autocreate the topic by asking for information about it
6
+ @c = Connection.new("localhost", 9093, "metadata_fetcher", 10_000)
7
+ @c.topic_metadata(["failure_spec"])
8
+ sleep 1
9
+ end
10
+
11
+ def current_leadership_mapping(c)
12
+ metadata = c.topic_metadata(["failure_spec"])
13
+ topic_metadata = metadata.topics.find { |t| t.name == "failure_spec" }
14
+ (0..2).map { |p| topic_metadata.partition_leader(p) }
15
+ end
16
+
17
+ it "produces a bunch of messages and consumes all without error" do
18
+ @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "test",
19
+ :required_acks => -1)
20
+
21
+ 1.upto(25) do |n|
22
+ @p.send_messages([MessageToSend.new("failure_spec", n.to_s)])
23
+ end
24
+
25
+ # The goal here is to have the producer attempt to send messages
26
+ # to a broker which is no longer the leader for the partition.
27
+ #
28
+ # We accomplish this by turning off a broker which causes leadership
29
+ # to failover. Then we turn that broker back on and begin sending
30
+ # messages. While sending messages, the kafka cluster should rebalance
31
+ # the partitions causing leadership to switch back to the original
32
+ # broker in the midst of messages being sent.
33
+ #
34
+ # We compare leadership before and after the message sending period
35
+ # to make sure we were successful.
36
+ $tc.stop_first_broker
37
+ sleep 30
38
+ SPEC_LOGGER.info "Pre start #{current_leadership_mapping(@c).inspect}"
39
+ $tc.start_first_broker
40
+
41
+ pre_send_leadership = current_leadership_mapping(@c)
42
+ SPEC_LOGGER.info "Pre send #{pre_send_leadership.inspect}"
43
+ 26.upto(50) do |n|
44
+ sleep 0.5
45
+ @p.send_messages([MessageToSend.new("failure_spec", n.to_s)])
46
+ end
47
+ post_send_leadership = current_leadership_mapping(@c)
48
+ SPEC_LOGGER.info "Post send #{post_send_leadership.inspect}"
49
+
50
+ expect(pre_send_leadership).to_not eq(post_send_leadership)
51
+
52
+ messages = []
53
+ 0.upto(2) do |partition|
54
+ consumer = PartitionConsumer.consumer_for_partition("consumer_failure_spect",
55
+ ["localhost:9092","localhost:9093","localhost:9094"],
56
+ "failure_spec",
57
+ partition,
58
+ :earliest_offset)
59
+ while (fetched = consumer.fetch).any?
60
+ messages.push(*fetched)
61
+ end
62
+ end
63
+
64
+ expect(messages.size).to eq(50)
65
+ expect(messages.map { |m| m.value.to_i }.sort).to eq((1..50).to_a)
66
+ end
67
+ end
@@ -3,14 +3,14 @@ require 'integration/multiple_brokers/spec_helper'
3
3
  describe "round robin sending" do
4
4
  describe "with small message batches" do
5
5
  it "evenly distributes messages across brokers" do
6
- c = Connection.new("localhost", 9092, "metadata_fetcher")
6
+ c = Connection.new("localhost", 9092, "metadata_fetcher", 10_000)
7
7
  md = c.topic_metadata(["test"])
8
8
  sleep 1
9
9
  md = c.topic_metadata(["test"])
10
10
 
11
11
  test_topic = md.topics.first
12
12
 
13
- consumers = test_topic.partitions.map do |partition|
13
+ consumers = test_topic.send(:partitions).map do |partition|
14
14
  leader_id = partition.leader
15
15
  broker = md.brokers.find { |b| b.id == leader_id }
16
16
  PartitionConsumer.new("test_consumer_#{partition.id}", broker.host,
@@ -22,14 +22,14 @@ describe "round robin sending" do
22
22
  c.fetch
23
23
  end
24
24
 
25
-
26
25
  @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "test",
27
26
  :required_acks => 1)
28
-
29
27
  24.times do
30
28
  @p.send_messages([MessageToSend.new("test", "hello")])
31
29
  end
32
30
 
31
+ sleep 5
32
+
33
33
  consumers.each do |c|
34
34
  messages = c.fetch
35
35
  expect(messages.size).to eq(8)
@@ -3,32 +3,54 @@ require 'spec_helper'
3
3
  require 'test_cluster'
4
4
 
5
5
  class ThreeBrokerCluster
6
- def initialize
6
+ def initialize(properties = {})
7
7
  @zookeeper = ZookeeperRunner.new
8
- @brokers = (9092..9094).map { |port| BrokerRunner.new(port - 9092, port, 3) }
8
+ @brokers = (9092..9094).map { |port| BrokerRunner.new(port - 9092, port,
9
+ 3,
10
+ 2,
11
+ properties) }
9
12
  end
10
13
 
11
14
  def start
12
15
  @zookeeper.start
13
16
  @brokers.each(&:start)
17
+ sleep 5
14
18
  end
15
19
 
16
20
  def stop
17
- @zookeeper.stop
21
+ SPEC_LOGGER.info "Stopping three broker cluster"
22
+ SPEC_LOGGER.info "Stopping brokers"
18
23
  @brokers.each(&:stop)
24
+ sleep 5
25
+
26
+ SPEC_LOGGER.info "Stopping ZK"
27
+ @zookeeper.stop
28
+ sleep 5
29
+ end
30
+
31
+ def stop_first_broker
32
+ SPEC_LOGGER.info "Stopping first broker"
33
+ @brokers.first.stop
34
+ sleep 5
35
+ end
36
+
37
+ def start_first_broker
38
+ SPEC_LOGGER.info "Starting first broker"
39
+ @brokers.first.start
19
40
  end
20
41
  end
21
42
 
22
43
  RSpec.configure do |config|
23
- config.before(:suite) do
44
+ config.before(:each) do
24
45
  JavaRunner.remove_tmp
25
46
  JavaRunner.set_kafka_path!
26
47
  $tc = ThreeBrokerCluster.new
27
48
  $tc.start
28
- sleep 5 # wait for cluster to come up
49
+ SPEC_LOGGER.info "Waiting on cluster"
50
+ sleep 10 # wait for cluster to come up
29
51
  end
30
52
 
31
- config.after(:suite) do
32
- $tc.stop
53
+ config.after(:each) do
54
+ $tc.stop if $tc
33
55
  end
34
56
  end