poseidon 0.0.4 → 0.0.5.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.travis.yml +2 -0
  4. data/CHANGES.md +4 -0
  5. data/README.md +4 -9
  6. data/Rakefile +3 -0
  7. data/lib/poseidon.rb +41 -24
  8. data/lib/poseidon/broker_pool.rb +7 -3
  9. data/lib/poseidon/cluster_metadata.rb +17 -1
  10. data/lib/poseidon/connection.rb +33 -11
  11. data/lib/poseidon/message_conductor.rb +2 -2
  12. data/lib/poseidon/messages_for_broker.rb +17 -0
  13. data/lib/poseidon/messages_to_send.rb +4 -4
  14. data/lib/poseidon/partition_consumer.rb +67 -24
  15. data/lib/poseidon/producer.rb +4 -1
  16. data/lib/poseidon/protocol/request_buffer.rb +12 -4
  17. data/lib/poseidon/sync_producer.rb +55 -22
  18. data/lib/poseidon/topic_metadata.rb +23 -8
  19. data/lib/poseidon/version.rb +1 -1
  20. data/log/.gitkeep +0 -0
  21. data/poseidon.gemspec +2 -2
  22. data/spec/integration/multiple_brokers/consumer_spec.rb +1 -1
  23. data/spec/integration/multiple_brokers/metadata_failures_spec.rb +35 -0
  24. data/spec/integration/multiple_brokers/rebalance_spec.rb +67 -0
  25. data/spec/integration/multiple_brokers/round_robin_spec.rb +4 -4
  26. data/spec/integration/multiple_brokers/spec_helper.rb +29 -7
  27. data/spec/integration/simple/compression_spec.rb +1 -0
  28. data/spec/integration/simple/connection_spec.rb +1 -1
  29. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +25 -2
  30. data/spec/integration/simple/spec_helper.rb +2 -2
  31. data/spec/integration/simple/truncated_messages_spec.rb +1 -1
  32. data/spec/integration/simple/unavailable_broker_spec.rb +9 -16
  33. data/spec/spec_helper.rb +3 -0
  34. data/spec/test_cluster.rb +51 -48
  35. data/spec/unit/broker_pool_spec.rb +28 -7
  36. data/spec/unit/cluster_metadata_spec.rb +3 -3
  37. data/spec/unit/message_conductor_spec.rb +27 -14
  38. data/spec/unit/messages_to_send_spec.rb +3 -3
  39. data/spec/unit/partition_consumer_spec.rb +28 -10
  40. data/spec/unit/sync_producer_spec.rb +16 -12
  41. metadata +24 -35
  42. data/spec/bin/kafka-run-class.sh +0 -65
@@ -125,9 +125,12 @@ module Poseidon
125
125
  # @option options [Integer] :required_acks (0)
126
126
  # The number of acks required per request.
127
127
  #
128
- # @option options [Integer] :request_timeout_ms (1500)
128
+ # @option options [Integer] :ack_timeout_ms (1500)
129
129
  # How long the producer waits for acks.
130
130
  #
131
+ # @option options [Integer] :socket_timeout_ms] (10000)
132
+ # How long the producer socket waits for any reply from server.
133
+ #
131
134
  # @api public
132
135
  def initialize(brokers, client_id, options = {})
133
136
  options = options.dup
@@ -7,12 +7,10 @@ module Poseidon
7
7
  # (https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProtocolPrimitiveTypes)
8
8
  class RequestBuffer
9
9
  def initialize
10
- @s = ''.encode("ASCII-8BIT")
10
+ @s = ''
11
11
  end
12
12
 
13
13
  def append(string)
14
- string = string.dup
15
- string.force_encoding("ASCII-8BIT")
16
14
  @s << string
17
15
  nil
18
16
  end
@@ -55,23 +53,33 @@ module Poseidon
55
53
  end
56
54
 
57
55
  def prepend_crc32
56
+ ensure_ascii
58
57
  checksum_pos = @s.bytesize
59
58
  @s += " "
60
59
  yield
60
+ ensure_ascii
61
61
  @s[checksum_pos] = [Zlib::crc32(@s[(checksum_pos+1)..-1])].pack("N")
62
62
  nil
63
63
  end
64
64
 
65
65
  def prepend_size
66
+ ensure_ascii
66
67
  size_pos = @s.bytesize
67
68
  @s += " "
68
69
  yield
70
+ ensure_ascii
69
71
  @s[size_pos] = [(@s.bytesize-1) - size_pos].pack("N")
70
72
  nil
71
73
  end
72
74
 
73
75
  def to_s
74
- @s
76
+ ensure_ascii
77
+ end
78
+
79
+ private
80
+
81
+ def ensure_ascii
82
+ @s.force_encoding("ASCII-8BIT")
75
83
  end
76
84
  end
77
85
  end
@@ -21,10 +21,11 @@ module Poseidon
21
21
  :retry_backoff_ms => 100,
22
22
  :required_acks => 0,
23
23
  :ack_timeout_ms => 1500,
24
+ :socket_timeout_ms => 10_000
24
25
  }
25
26
 
26
27
  attr_reader :client_id, :retry_backoff_ms, :max_send_retries,
27
- :metadata_refresh_interval_ms, :required_acks, :ack_timeout_ms
28
+ :metadata_refresh_interval_ms, :required_acks, :ack_timeout_ms, :socket_timeout_ms
28
29
  def initialize(client_id, seed_brokers, options = {})
29
30
  @client_id = client_id
30
31
 
@@ -32,7 +33,7 @@ module Poseidon
32
33
 
33
34
  @cluster_metadata = ClusterMetadata.new
34
35
  @message_conductor = MessageConductor.new(@cluster_metadata, @partitioner)
35
- @broker_pool = BrokerPool.new(client_id, seed_brokers)
36
+ @broker_pool = BrokerPool.new(client_id, seed_brokers, socket_timeout_ms)
36
37
  end
37
38
 
38
39
  def send_messages(messages)
@@ -40,22 +41,20 @@ module Poseidon
40
41
 
41
42
  messages_to_send = MessagesToSend.new(messages, @cluster_metadata)
42
43
 
43
- (@max_send_retries+1).times do
44
- if messages_to_send.needs_metadata? || refresh_interval_elapsed?
45
- refreshed_metadata = refresh_metadata(messages_to_send.topic_set)
46
- if !refreshed_metadata
47
- # If we can't refresh metadata we have to give up.
48
- break
49
- end
50
- end
44
+ if refresh_interval_elapsed?
45
+ refresh_metadata(messages_to_send.topic_set)
46
+ end
47
+
48
+ ensure_metadata_available_for_topics(messages_to_send)
51
49
 
50
+ (@max_send_retries+1).times do
52
51
  messages_to_send.messages_for_brokers(@message_conductor).each do |messages_for_broker|
53
- if send_to_broker(messages_for_broker)
54
- messages_to_send.successfully_sent(messages_for_broker)
52
+ if sent = send_to_broker(messages_for_broker)
53
+ messages_to_send.successfully_sent(sent)
55
54
  end
56
55
  end
57
56
 
58
- if messages_to_send.all_sent? || @max_send_retries == 0
57
+ if !messages_to_send.pending_messages? || @max_send_retries == 0
59
58
  break
60
59
  else
61
60
  Kernel.sleep retry_backoff_ms / 1000.0
@@ -63,7 +62,11 @@ module Poseidon
63
62
  end
64
63
  end
65
64
 
66
- messages_to_send.all_sent?
65
+ if messages_to_send.pending_messages?
66
+ raise "Failed to send all messages: #{messages_to_send.messages} remaining"
67
+ else
68
+ true
69
+ end
67
70
  end
68
71
 
69
72
  def shutdown
@@ -71,8 +74,27 @@ module Poseidon
71
74
  end
72
75
 
73
76
  private
77
+
78
+ def ensure_metadata_available_for_topics(messages_to_send)
79
+ return if !messages_to_send.needs_metadata?
80
+
81
+ Poseidon.logger.debug { "Fetching metadata for #{messages_to_send.topic_set}. (Attempt 1)" }
82
+ refresh_metadata(messages_to_send.topic_set)
83
+ return if !messages_to_send.needs_metadata?
84
+
85
+ 2.times do |n|
86
+ sleep 5
87
+
88
+ Poseidon.logger.debug { "Fetching metadata for #{messages_to_send.topic_set}. (Attempt #{n+2})" }
89
+ refresh_metadata(messages_to_send.topic_set)
90
+ return if !messages_to_send.needs_metadata?
91
+ end
92
+ raise Errors::UnableToFetchMetadata
93
+ end
94
+
74
95
  def handle_options(options)
75
96
  @ack_timeout_ms = handle_option(options, :ack_timeout_ms)
97
+ @socket_timeout_ms = handle_option(options, :socket_timeout_ms)
76
98
  @retry_backoff_ms = handle_option(options, :retry_backoff_ms)
77
99
 
78
100
  @metadata_refresh_interval_ms =
@@ -95,23 +117,34 @@ module Poseidon
95
117
  end
96
118
 
97
119
  def refresh_interval_elapsed?
98
- (Time.now - @cluster_metadata.last_refreshed_at) > metadata_refresh_interval_ms
120
+ @cluster_metadata.last_refreshed_at.nil? ||
121
+ (Time.now - @cluster_metadata.last_refreshed_at) > metadata_refresh_interval_ms
99
122
  end
100
123
 
101
124
  def refresh_metadata(topics)
102
- @cluster_metadata.update(@broker_pool.fetch_metadata(topics))
125
+ topics_to_refresh = topics.dup
126
+
127
+ @cluster_metadata.topics.each do |topic|
128
+ topics_to_refresh.add(topic)
129
+ end
130
+
131
+ @cluster_metadata.update(@broker_pool.fetch_metadata(topics_to_refresh))
103
132
  @broker_pool.update_known_brokers(@cluster_metadata.brokers)
104
- true
105
- rescue Errors::UnableToFetchMetadata
106
- false
107
133
  end
108
134
 
109
135
  def send_to_broker(messages_for_broker)
110
136
  return false if messages_for_broker.broker_id == -1
111
137
  to_send = messages_for_broker.build_protocol_objects(@compression_config)
112
- @broker_pool.execute_api_call(messages_for_broker.broker_id, :produce,
113
- required_acks, ack_timeout_ms,
114
- to_send)
138
+
139
+ Poseidon.logger.debug { "Sending messages to broker #{messages_for_broker.broker_id}" }
140
+ response = @broker_pool.execute_api_call(messages_for_broker.broker_id, :produce,
141
+ required_acks, ack_timeout_ms,
142
+ to_send)
143
+ if required_acks == 0
144
+ messages_for_broker.messages
145
+ else
146
+ messages_for_broker.successfully_sent(response)
147
+ end
115
148
  rescue Connection::ConnectionFailedError
116
149
  false
117
150
  end
@@ -26,10 +26,6 @@ module Poseidon
26
26
  nil
27
27
  end
28
28
 
29
- def partitions
30
- struct.partitions
31
- end
32
-
33
29
  def name
34
30
  struct.name
35
31
  end
@@ -38,6 +34,10 @@ module Poseidon
38
34
  eql?(o)
39
35
  end
40
36
 
37
+ def exists?
38
+ struct.error == 0
39
+ end
40
+
41
41
  def eql?(o)
42
42
  struct.eql?(o.struct)
43
43
  end
@@ -54,21 +54,36 @@ module Poseidon
54
54
  @partition_count ||= struct.partitions.count
55
55
  end
56
56
 
57
- def available_partition_leader_ids
58
- @available_partition_leader_ids ||= struct.partitions.select(&:leader)
57
+ def available_partitions
58
+ @available_partitions ||= struct.partitions.select do |partition|
59
+ partition.error == 0 && partition.leader != -1
60
+ end
59
61
  end
60
62
 
61
63
  def available_partition_count
62
- @available_partition_count ||= available_partition_leader_ids.count
64
+ available_partitions.count
63
65
  end
64
66
 
65
67
  def partition_leader(partition_id)
66
- partition = struct.partitions.find { |p| p.id == partition_id }
68
+ partition = partitions_by_id[partition_id]
67
69
  if partition
68
70
  partition.leader
69
71
  else
70
72
  nil
71
73
  end
72
74
  end
75
+
76
+ def to_s
77
+ struct.partitions.map { |p| p.inspect }.join("\n")
78
+ end
79
+
80
+ private
81
+ def partitions_by_id
82
+ @partitions_by_id ||= Hash[partitions.map { |p| [p.id, p] }]
83
+ end
84
+
85
+ def partitions
86
+ struct.partitions
87
+ end
73
88
  end
74
89
  end
@@ -1,4 +1,4 @@
1
1
  module Poseidon
2
2
  # Unstable! API May Change!
3
- VERSION = "0.0.4"
3
+ VERSION = "0.0.5.pre1"
4
4
  end
File without changes
@@ -12,14 +12,14 @@ Gem::Specification.new do |gem|
12
12
  gem.summary = %q{Poseidon is a producer and consumer implementation for Kafka >= 0.8}
13
13
  gem.homepage = "https://github.com/bpot/poseidon"
14
14
  gem.licenses = ["MIT"]
15
+ gem.required_ruby_version = '>= 1.9.3'
15
16
 
16
17
  gem.files = `git ls-files`.split($/)
17
18
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
19
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
20
  gem.require_paths = ["lib"]
20
21
 
21
- gem.add_development_dependency(%q<rspec>)
22
+ gem.add_development_dependency(%q<rspec>, '~> 2.13.0')
22
23
  gem.add_development_dependency(%q<yard>)
23
24
  gem.add_development_dependency(%q<simplecov>)
24
- gem.add_development_dependency(%q<daemon_controller>)
25
25
  end
@@ -3,7 +3,7 @@ require 'integration/multiple_brokers/spec_helper'
3
3
  describe "consuming with multiple brokers" do
4
4
  before(:each) do
5
5
  # autocreate the topic by asking for information about it
6
- c = Connection.new("localhost", 9092, "metadata_fetcher")
6
+ c = Connection.new("localhost", 9092, "metadata_fetcher", 10_000)
7
7
  md = c.topic_metadata(["test"])
8
8
  sleep 1
9
9
  end
@@ -0,0 +1,35 @@
1
+ require 'integration/multiple_brokers/spec_helper'
2
+
3
+ describe "handling failures" do
4
+ describe "metadata failures" do
5
+ before(:each) do
6
+ @messages_to_send = [
7
+ MessageToSend.new("topic1", "hello"),
8
+ MessageToSend.new("topic2", "hello")
9
+ ]
10
+ end
11
+
12
+ describe "unable to connect to brokers" do
13
+ before(:each) do
14
+ @p = Producer.new(["localhost:1092","localhost:1093","localhost:1094"], "producer")
15
+ end
16
+
17
+ it "triggers callback failures for both topics" do
18
+ expect {
19
+ @p.send_messages(@messages_to_send)
20
+ }.to raise_error(Poseidon::Errors::UnableToFetchMetadata)
21
+ end
22
+ end
23
+ end
24
+
25
+ describe "unknown topic" do
26
+ it "receives error callback" do
27
+ pending "need a way to turn off auto-topic creation just for this test"
28
+ @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "producer")
29
+
30
+ expect {
31
+ @p.send_messages([MessageToSend.new("imnothere", "hello")])
32
+ }.to raise_error(Poseidon::Errors::UnableToFetchMetadata)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,67 @@
1
+ require 'integration/multiple_brokers/spec_helper'
2
+
3
+ describe "producer handles rebalancing" do
4
+ before(:each) do
5
+ # autocreate the topic by asking for information about it
6
+ @c = Connection.new("localhost", 9093, "metadata_fetcher", 10_000)
7
+ @c.topic_metadata(["failure_spec"])
8
+ sleep 1
9
+ end
10
+
11
+ def current_leadership_mapping(c)
12
+ metadata = c.topic_metadata(["failure_spec"])
13
+ topic_metadata = metadata.topics.find { |t| t.name == "failure_spec" }
14
+ (0..2).map { |p| topic_metadata.partition_leader(p) }
15
+ end
16
+
17
+ it "produces a bunch of messages and consumes all without error" do
18
+ @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "test",
19
+ :required_acks => -1)
20
+
21
+ 1.upto(25) do |n|
22
+ @p.send_messages([MessageToSend.new("failure_spec", n.to_s)])
23
+ end
24
+
25
+ # The goal here is to have the producer attempt to send messages
26
+ # to a broker which is no longer the leader for the partition.
27
+ #
28
+ # We accomplish this by turning off a broker which causes leadership
29
+ # to failover. Then we turn that broker back on and begin sending
30
+ # messages. While sending messages, the kafka cluster should rebalance
31
+ # the partitions causing leadership to switch back to the original
32
+ # broker in the midst of messages being sent.
33
+ #
34
+ # We compare leadership before and after the message sending period
35
+ # to make sure we were successful.
36
+ $tc.stop_first_broker
37
+ sleep 30
38
+ SPEC_LOGGER.info "Pre start #{current_leadership_mapping(@c).inspect}"
39
+ $tc.start_first_broker
40
+
41
+ pre_send_leadership = current_leadership_mapping(@c)
42
+ SPEC_LOGGER.info "Pre send #{pre_send_leadership.inspect}"
43
+ 26.upto(50) do |n|
44
+ sleep 0.5
45
+ @p.send_messages([MessageToSend.new("failure_spec", n.to_s)])
46
+ end
47
+ post_send_leadership = current_leadership_mapping(@c)
48
+ SPEC_LOGGER.info "Post send #{post_send_leadership.inspect}"
49
+
50
+ expect(pre_send_leadership).to_not eq(post_send_leadership)
51
+
52
+ messages = []
53
+ 0.upto(2) do |partition|
54
+ consumer = PartitionConsumer.consumer_for_partition("consumer_failure_spect",
55
+ ["localhost:9092","localhost:9093","localhost:9094"],
56
+ "failure_spec",
57
+ partition,
58
+ :earliest_offset)
59
+ while (fetched = consumer.fetch).any?
60
+ messages.push(*fetched)
61
+ end
62
+ end
63
+
64
+ expect(messages.size).to eq(50)
65
+ expect(messages.map { |m| m.value.to_i }.sort).to eq((1..50).to_a)
66
+ end
67
+ end
@@ -3,14 +3,14 @@ require 'integration/multiple_brokers/spec_helper'
3
3
  describe "round robin sending" do
4
4
  describe "with small message batches" do
5
5
  it "evenly distributes messages across brokers" do
6
- c = Connection.new("localhost", 9092, "metadata_fetcher")
6
+ c = Connection.new("localhost", 9092, "metadata_fetcher", 10_000)
7
7
  md = c.topic_metadata(["test"])
8
8
  sleep 1
9
9
  md = c.topic_metadata(["test"])
10
10
 
11
11
  test_topic = md.topics.first
12
12
 
13
- consumers = test_topic.partitions.map do |partition|
13
+ consumers = test_topic.send(:partitions).map do |partition|
14
14
  leader_id = partition.leader
15
15
  broker = md.brokers.find { |b| b.id == leader_id }
16
16
  PartitionConsumer.new("test_consumer_#{partition.id}", broker.host,
@@ -22,14 +22,14 @@ describe "round robin sending" do
22
22
  c.fetch
23
23
  end
24
24
 
25
-
26
25
  @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "test",
27
26
  :required_acks => 1)
28
-
29
27
  24.times do
30
28
  @p.send_messages([MessageToSend.new("test", "hello")])
31
29
  end
32
30
 
31
+ sleep 5
32
+
33
33
  consumers.each do |c|
34
34
  messages = c.fetch
35
35
  expect(messages.size).to eq(8)
@@ -3,32 +3,54 @@ require 'spec_helper'
3
3
  require 'test_cluster'
4
4
 
5
5
  class ThreeBrokerCluster
6
- def initialize
6
+ def initialize(properties = {})
7
7
  @zookeeper = ZookeeperRunner.new
8
- @brokers = (9092..9094).map { |port| BrokerRunner.new(port - 9092, port, 3) }
8
+ @brokers = (9092..9094).map { |port| BrokerRunner.new(port - 9092, port,
9
+ 3,
10
+ 2,
11
+ properties) }
9
12
  end
10
13
 
11
14
  def start
12
15
  @zookeeper.start
13
16
  @brokers.each(&:start)
17
+ sleep 5
14
18
  end
15
19
 
16
20
  def stop
17
- @zookeeper.stop
21
+ SPEC_LOGGER.info "Stopping three broker cluster"
22
+ SPEC_LOGGER.info "Stopping brokers"
18
23
  @brokers.each(&:stop)
24
+ sleep 5
25
+
26
+ SPEC_LOGGER.info "Stopping ZK"
27
+ @zookeeper.stop
28
+ sleep 5
29
+ end
30
+
31
+ def stop_first_broker
32
+ SPEC_LOGGER.info "Stopping first broker"
33
+ @brokers.first.stop
34
+ sleep 5
35
+ end
36
+
37
+ def start_first_broker
38
+ SPEC_LOGGER.info "Starting first broker"
39
+ @brokers.first.start
19
40
  end
20
41
  end
21
42
 
22
43
  RSpec.configure do |config|
23
- config.before(:suite) do
44
+ config.before(:each) do
24
45
  JavaRunner.remove_tmp
25
46
  JavaRunner.set_kafka_path!
26
47
  $tc = ThreeBrokerCluster.new
27
48
  $tc.start
28
- sleep 5 # wait for cluster to come up
49
+ SPEC_LOGGER.info "Waiting on cluster"
50
+ sleep 10 # wait for cluster to come up
29
51
  end
30
52
 
31
- config.after(:suite) do
32
- $tc.stop
53
+ config.after(:each) do
54
+ $tc.stop if $tc
33
55
  end
34
56
  end