codeclimate-poseidon 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/.yardopts +8 -0
  6. data/CHANGES.md +31 -0
  7. data/Gemfile +13 -0
  8. data/LICENSE.txt +22 -0
  9. data/README.md +72 -0
  10. data/Rakefile +20 -0
  11. data/TODO.md +27 -0
  12. data/examples/consumer.rb +18 -0
  13. data/examples/producer.rb +9 -0
  14. data/lib/poseidon.rb +120 -0
  15. data/lib/poseidon/broker_pool.rb +86 -0
  16. data/lib/poseidon/cluster_metadata.rb +94 -0
  17. data/lib/poseidon/compressed_value.rb +23 -0
  18. data/lib/poseidon/compression.rb +30 -0
  19. data/lib/poseidon/compression/gzip_codec.rb +23 -0
  20. data/lib/poseidon/compression/snappy_codec.rb +29 -0
  21. data/lib/poseidon/connection.rb +169 -0
  22. data/lib/poseidon/fetched_message.rb +37 -0
  23. data/lib/poseidon/message.rb +151 -0
  24. data/lib/poseidon/message_conductor.rb +86 -0
  25. data/lib/poseidon/message_set.rb +80 -0
  26. data/lib/poseidon/message_to_send.rb +33 -0
  27. data/lib/poseidon/messages_for_broker.rb +56 -0
  28. data/lib/poseidon/messages_to_send.rb +47 -0
  29. data/lib/poseidon/messages_to_send_batch.rb +27 -0
  30. data/lib/poseidon/partition_consumer.rb +225 -0
  31. data/lib/poseidon/producer.rb +199 -0
  32. data/lib/poseidon/producer_compression_config.rb +37 -0
  33. data/lib/poseidon/protocol.rb +122 -0
  34. data/lib/poseidon/protocol/protocol_struct.rb +256 -0
  35. data/lib/poseidon/protocol/request_buffer.rb +77 -0
  36. data/lib/poseidon/protocol/response_buffer.rb +72 -0
  37. data/lib/poseidon/sync_producer.rb +161 -0
  38. data/lib/poseidon/topic_metadata.rb +89 -0
  39. data/lib/poseidon/version.rb +4 -0
  40. data/log/.gitkeep +0 -0
  41. data/poseidon.gemspec +27 -0
  42. data/spec/integration/multiple_brokers/consumer_spec.rb +45 -0
  43. data/spec/integration/multiple_brokers/metadata_failures_spec.rb +144 -0
  44. data/spec/integration/multiple_brokers/rebalance_spec.rb +69 -0
  45. data/spec/integration/multiple_brokers/round_robin_spec.rb +41 -0
  46. data/spec/integration/multiple_brokers/spec_helper.rb +60 -0
  47. data/spec/integration/simple/compression_spec.rb +23 -0
  48. data/spec/integration/simple/connection_spec.rb +35 -0
  49. data/spec/integration/simple/multiple_brokers_spec.rb +10 -0
  50. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +121 -0
  51. data/spec/integration/simple/spec_helper.rb +16 -0
  52. data/spec/integration/simple/truncated_messages_spec.rb +46 -0
  53. data/spec/integration/simple/unavailable_broker_spec.rb +72 -0
  54. data/spec/spec_helper.rb +32 -0
  55. data/spec/test_cluster.rb +211 -0
  56. data/spec/unit/broker_pool_spec.rb +98 -0
  57. data/spec/unit/cluster_metadata_spec.rb +46 -0
  58. data/spec/unit/compression/gzip_codec_spec.rb +34 -0
  59. data/spec/unit/compression/snappy_codec_spec.rb +49 -0
  60. data/spec/unit/compression_spec.rb +17 -0
  61. data/spec/unit/connection_spec.rb +4 -0
  62. data/spec/unit/fetched_message_spec.rb +11 -0
  63. data/spec/unit/message_conductor_spec.rb +164 -0
  64. data/spec/unit/message_set_spec.rb +42 -0
  65. data/spec/unit/message_spec.rb +129 -0
  66. data/spec/unit/message_to_send_spec.rb +10 -0
  67. data/spec/unit/messages_for_broker_spec.rb +54 -0
  68. data/spec/unit/messages_to_send_batch_spec.rb +25 -0
  69. data/spec/unit/messages_to_send_spec.rb +63 -0
  70. data/spec/unit/partition_consumer_spec.rb +142 -0
  71. data/spec/unit/producer_compression_config_spec.rb +42 -0
  72. data/spec/unit/producer_spec.rb +51 -0
  73. data/spec/unit/protocol/request_buffer_spec.rb +16 -0
  74. data/spec/unit/protocol_spec.rb +54 -0
  75. data/spec/unit/sync_producer_spec.rb +156 -0
  76. data/spec/unit/topic_metadata_spec.rb +43 -0
  77. metadata +225 -0
@@ -0,0 +1,77 @@
1
+ module Poseidon
2
+ module Protocol
3
+ # RequestBuffer allows you to build a Binary string for API requests
4
+ #
5
+ # API parallels the primitive types described on the wiki, with some
6
+ # sugar for prepending message sizes and checksums.
7
+ # (https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProtocolPrimitiveTypes)
8
+ class RequestBuffer
9
+ def initialize
10
+ @s = ''.encode(Encoding::BINARY)
11
+ end
12
+
13
+ def append(string)
14
+ string = string.dup.force_encoding(Encoding::BINARY)
15
+ @s << string
16
+ nil
17
+ end
18
+
19
+ def int8(int8)
20
+ append([int8].pack("C"))
21
+ end
22
+
23
+ def int16(int16)
24
+ append([int16].pack("s>"))
25
+ end
26
+
27
+ def int32(int32)
28
+ append([int32].pack("l>"))
29
+ end
30
+
31
+ def int64(int64)
32
+ append([int64].pack("q>"))
33
+ end
34
+
35
+ # Add a string
36
+ #
37
+ # @param [String] string
38
+ def string(string)
39
+ if string.nil?
40
+ int16(-1)
41
+ else
42
+ int16(string.bytesize)
43
+ append(string)
44
+ end
45
+ end
46
+
47
+ def bytes(string)
48
+ if string.nil?
49
+ int32(-1)
50
+ else
51
+ int32(string.bytesize)
52
+ append(string)
53
+ end
54
+ end
55
+
56
+ def prepend_crc32
57
+ checksum_pos = @s.bytesize
58
+ @s += " "
59
+ yield
60
+ @s[checksum_pos] = [Zlib::crc32(@s[(checksum_pos+1)..-1])].pack("N")
61
+ nil
62
+ end
63
+
64
+ def prepend_size
65
+ size_pos = @s.bytesize
66
+ @s += " "
67
+ yield
68
+ @s[size_pos] = [(@s.bytesize-1) - size_pos].pack("N")
69
+ nil
70
+ end
71
+
72
+ def to_s
73
+ @s
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,72 @@
1
+ module Poseidon
2
+ module Protocol
3
+ class ResponseBuffer
4
+ def initialize(response)
5
+ @s = response
6
+ @pos = 0
7
+ end
8
+
9
+ def int8
10
+ byte = @s.byteslice(@pos, 1).unpack("C").first
11
+ @pos += 1
12
+ byte
13
+ end
14
+
15
+ def int16
16
+ short = @s.byteslice(@pos, 2).unpack("s>").first
17
+ @pos += 2
18
+ short
19
+ end
20
+
21
+ def int32
22
+ int = @s.byteslice(@pos, 4).unpack("l>").first
23
+ @pos += 4
24
+ int
25
+ end
26
+
27
+ def int64
28
+ long = @s.byteslice(@pos, 8).unpack("q>").first
29
+ @pos += 8
30
+ long
31
+ end
32
+
33
+ def string
34
+ len = int16
35
+ string = @s.byteslice(@pos, len)
36
+ @pos += len
37
+ string
38
+ end
39
+
40
+ def read(bytes)
41
+ data = @s.byteslice(@pos, bytes)
42
+ @pos += bytes
43
+ data
44
+ end
45
+
46
+ def peek(bytes)
47
+ @s.byteslice(@pos, bytes)
48
+ end
49
+
50
+ def bytes
51
+ n = int32
52
+ if n == -1
53
+ return nil
54
+ else
55
+ read(n)
56
+ end
57
+ end
58
+
59
+ def bytes_remaining
60
+ @s.bytesize - @pos
61
+ end
62
+
63
+ def eof?
64
+ @pos == @s.bytesize
65
+ end
66
+
67
+ def to_s
68
+ @s
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,161 @@
1
+ module Poseidon
2
+ # Used by +Producer+ for sending messages to the kafka cluster.
3
+ #
4
+ # You should not use this interface directly
5
+ #
6
+ # Fetches metadata at appropriate times.
7
+ # Builds MessagesToSend
8
+ # Handle MessageBatchToSend lifecyle
9
+ #
10
+ # Who is responsible for fetching metadata from broker seed list?
11
+ # Do we want to be fetching from real live brokers eventually?
12
+ #
13
+ # @api private
14
+ class SyncProducer
15
+ OPTION_DEFAULTS = {
16
+ :compression_codec => nil,
17
+ :compressed_topics => nil,
18
+ :metadata_refresh_interval_ms => 600_000,
19
+ :partitioner => nil,
20
+ :max_send_retries => 3,
21
+ :retry_backoff_ms => 100,
22
+ :required_acks => 0,
23
+ :ack_timeout_ms => 1500,
24
+ :socket_timeout_ms => 10_000
25
+ }
26
+
27
+ attr_reader :client_id, :retry_backoff_ms, :max_send_retries,
28
+ :metadata_refresh_interval_ms, :required_acks, :ack_timeout_ms, :socket_timeout_ms
29
+ def initialize(client_id, seed_brokers, options = {})
30
+ @client_id = client_id
31
+
32
+ handle_options(options.dup)
33
+
34
+ @cluster_metadata = ClusterMetadata.new
35
+ @message_conductor = MessageConductor.new(@cluster_metadata, @partitioner)
36
+ @broker_pool = BrokerPool.new(client_id, seed_brokers, socket_timeout_ms)
37
+ end
38
+
39
+ def send_messages(messages)
40
+ return if messages.empty?
41
+
42
+ messages_to_send = MessagesToSend.new(messages, @cluster_metadata)
43
+
44
+ if refresh_interval_elapsed?
45
+ refresh_metadata(messages_to_send.topic_set)
46
+ end
47
+
48
+ ensure_metadata_available_for_topics(messages_to_send)
49
+
50
+ (@max_send_retries+1).times do
51
+ messages_to_send.messages_for_brokers(@message_conductor).each do |messages_for_broker|
52
+ if sent = send_to_broker(messages_for_broker)
53
+ messages_to_send.successfully_sent(sent)
54
+ end
55
+ end
56
+
57
+ if !messages_to_send.pending_messages? || @max_send_retries == 0
58
+ break
59
+ else
60
+ Kernel.sleep retry_backoff_ms / 1000.0
61
+ reset_metadata
62
+ ensure_metadata_available_for_topics(messages_to_send)
63
+ end
64
+ end
65
+
66
+ if messages_to_send.pending_messages?
67
+ raise "Failed to send all messages: #{messages_to_send.messages} remaining"
68
+ else
69
+ true
70
+ end
71
+ end
72
+
73
+ def close
74
+ @broker_pool.close
75
+ end
76
+
77
+ alias_method :shutdown, :close
78
+
79
+ private
80
+
81
+ def ensure_metadata_available_for_topics(messages_to_send)
82
+ return if !messages_to_send.needs_metadata?
83
+
84
+ Poseidon.logger.debug { "Fetching metadata for #{messages_to_send.topic_set.inspect}. (Attempt 1)" }
85
+ refresh_metadata(messages_to_send.topic_set)
86
+ return if !messages_to_send.needs_metadata?
87
+
88
+ 2.times do |n|
89
+ sleep 5
90
+
91
+ Poseidon.logger.debug { "Fetching metadata for #{messages_to_send.topic_set.inspect}. (Attempt #{n+2})" }
92
+ refresh_metadata(messages_to_send.topic_set)
93
+ return if !messages_to_send.needs_metadata?
94
+ end
95
+ raise Errors::UnableToFetchMetadata
96
+ end
97
+
98
+ def handle_options(options)
99
+ @ack_timeout_ms = handle_option(options, :ack_timeout_ms)
100
+ @socket_timeout_ms = handle_option(options, :socket_timeout_ms)
101
+ @retry_backoff_ms = handle_option(options, :retry_backoff_ms)
102
+
103
+ @metadata_refresh_interval_ms =
104
+ handle_option(options, :metadata_refresh_interval_ms)
105
+
106
+ @required_acks = handle_option(options, :required_acks)
107
+ @max_send_retries = handle_option(options, :max_send_retries)
108
+
109
+ @compression_config = ProducerCompressionConfig.new(
110
+ handle_option(options, :compression_codec),
111
+ handle_option(options, :compressed_topics))
112
+
113
+ @partitioner = handle_option(options, :partitioner)
114
+
115
+ raise ArgumentError, "Unknown options: #{options.keys.inspect}" if options.keys.any?
116
+ end
117
+
118
+ def handle_option(options, sym)
119
+ options.delete(sym) || OPTION_DEFAULTS[sym]
120
+ end
121
+
122
+ def refresh_interval_elapsed?
123
+ @cluster_metadata.last_refreshed_at.nil? ||
124
+ (Time.now - @cluster_metadata.last_refreshed_at) * 1000 > metadata_refresh_interval_ms
125
+ end
126
+
127
+ def refresh_metadata(topics)
128
+ topics_to_refresh = topics.dup
129
+
130
+ @cluster_metadata.topics.each do |topic|
131
+ topics_to_refresh.add(topic)
132
+ end
133
+
134
+ @cluster_metadata.update(@broker_pool.fetch_metadata(topics_to_refresh))
135
+ @broker_pool.update_known_brokers(@cluster_metadata.brokers)
136
+ end
137
+
138
+ def reset_metadata
139
+ Poseidon.logger.debug { "Resetting metdata" }
140
+ @cluster_metadata.reset
141
+ @broker_pool.close
142
+ end
143
+
144
+ def send_to_broker(messages_for_broker)
145
+ return false if messages_for_broker.broker_id == -1
146
+ to_send = messages_for_broker.build_protocol_objects(@compression_config)
147
+
148
+ Poseidon.logger.debug { "Sending messages to broker #{messages_for_broker.broker_id}" }
149
+ response = @broker_pool.execute_api_call(messages_for_broker.broker_id, :produce,
150
+ required_acks, ack_timeout_ms,
151
+ to_send)
152
+ if required_acks == 0
153
+ messages_for_broker.messages
154
+ else
155
+ messages_for_broker.successfully_sent(response)
156
+ end
157
+ rescue Connection::ConnectionFailedError
158
+ false
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,89 @@
1
+ module Poseidon
2
+ # @api private
3
+ class TopicMetadata
4
+ # Build a new TopicMetadata object from its binary representation
5
+ #
6
+ # @param [ResponseBuffer] buffer
7
+ # @return [TopicMetadata]
8
+ #
9
+ def self.read(buffer)
10
+ tm = TopicMetadata.new
11
+ tm.struct = Protocol::TopicMetadataStruct.read(buffer)
12
+ tm
13
+ end
14
+
15
+ attr_accessor :struct
16
+ def initialize(struct=nil)
17
+ self.struct = struct
18
+ end
19
+
20
+ # Write a binary representation of the TopicMetadata to buffer
21
+ #
22
+ # @param [RequestBuffer] buffer
23
+ # @return [nil]
24
+ def write(buffer)
25
+ struct.write(buffer)
26
+ nil
27
+ end
28
+
29
+ def name
30
+ struct.name
31
+ end
32
+
33
+ def ==(o)
34
+ eql?(o)
35
+ end
36
+
37
+ def exists?
38
+ struct.error == Errors::NO_ERROR_CODE
39
+ end
40
+
41
+ def eql?(o)
42
+ struct.eql?(o.struct)
43
+ end
44
+
45
+ def objects_with_errors
46
+ struct.objects_with_errors
47
+ end
48
+
49
+ def leader_available?
50
+ struct.error_class != Errors::LeaderNotAvailable
51
+ end
52
+
53
+ def partition_count
54
+ @partition_count ||= struct.partitions.count
55
+ end
56
+
57
+ def available_partitions
58
+ @available_partitions ||= struct.partitions.select do |partition|
59
+ (partition.error == Errors::NO_ERROR_CODE || partition.error_class == Errors::ReplicaNotAvailable) && partition.leader != -1
60
+ end
61
+ end
62
+
63
+ def available_partition_count
64
+ available_partitions.count
65
+ end
66
+
67
+ def partition_leader(partition_id)
68
+ partition = partitions_by_id[partition_id]
69
+ if partition
70
+ partition.leader
71
+ else
72
+ nil
73
+ end
74
+ end
75
+
76
+ def to_s
77
+ struct.partitions.map { |p| p.inspect }.join("\n")
78
+ end
79
+
80
+ private
81
+ def partitions_by_id
82
+ @partitions_by_id ||= Hash[partitions.map { |p| [p.id, p] }]
83
+ end
84
+
85
+ def partitions
86
+ struct.partitions
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,4 @@
1
+ module Poseidon
2
+ # Unstable! API May Change!
3
+ VERSION = "0.0.8"
4
+ end
data/log/.gitkeep ADDED
File without changes
data/poseidon.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'poseidon/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "codeclimate-poseidon"
8
+ gem.version = Poseidon::VERSION
9
+ gem.authors = ["Bob Potter"]
10
+ gem.email = ["bobby.potter@gmail.com"]
11
+ gem.description = %q{A Kafka (http://kafka.apache.org/) producer and consumer}
12
+ gem.summary = %q{Poseidon is a producer and consumer implementation for Kafka >= 0.8}
13
+ gem.homepage = "https://github.com/bpot/poseidon"
14
+ gem.licenses = ["MIT"]
15
+ gem.required_ruby_version = '>= 1.9.3'
16
+
17
+ gem.files = `git ls-files`.split($/)
18
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
19
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
20
+ gem.require_paths = ["lib"]
21
+
22
+ gem.add_development_dependency(%q<rspec>, '>= 3')
23
+ gem.add_development_dependency(%q<yard>)
24
+ gem.add_development_dependency(%q<simplecov>)
25
+ gem.add_development_dependency(%q<snappy>)
26
+ gem.add_development_dependency(%q<timecop>)
27
+ end
@@ -0,0 +1,45 @@
1
+ require 'integration/multiple_brokers/spec_helper'
2
+
3
+ RSpec.describe "consuming with multiple brokers", :type => :request do
4
+ include_context "a multiple broker cluster"
5
+
6
+ before(:each) do
7
+ # autocreate the topic by asking for information about it
8
+ c = Connection.new("localhost", 9092, "metadata_fetcher", 10_000)
9
+ md = c.topic_metadata(["test"])
10
+ sleep 1
11
+ end
12
+
13
+ it "finds the lead broker for each partition" do
14
+ brokers = Set.new
15
+ 0.upto(2) do |partition|
16
+ pc = PartitionConsumer.consumer_for_partition("test_client",
17
+ ["localhost:9092"],
18
+ "test", partition,
19
+ :earliest_offset)
20
+
21
+ brokers.add("#{pc.host}:#{pc.port}")
22
+ end
23
+ expect(brokers.size).to eq(3)
24
+ end
25
+
26
+ it "consumes from all partitions" do
27
+ @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "test",
28
+ :required_acks => 1)
29
+
30
+ msgs = 24.times.map { |n| "hello_#{n}" }
31
+ msgs.each do |msg|
32
+ @p.send_messages([MessageToSend.new("test", msg)])
33
+ end
34
+
35
+ fetched_messages = []
36
+ 0.upto(2) do |partition|
37
+ pc = PartitionConsumer.consumer_for_partition("test_client",
38
+ ["localhost:9092"],
39
+ "test", partition,
40
+ :earliest_offset)
41
+ fetched_messages.push(*pc.fetch)
42
+ end
43
+ expect(fetched_messages.map(&:value).sort).to eq(msgs.sort)
44
+ end
45
+ end