codeclimate-poseidon 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/.yardopts +8 -0
  6. data/CHANGES.md +31 -0
  7. data/Gemfile +13 -0
  8. data/LICENSE.txt +22 -0
  9. data/README.md +72 -0
  10. data/Rakefile +20 -0
  11. data/TODO.md +27 -0
  12. data/examples/consumer.rb +18 -0
  13. data/examples/producer.rb +9 -0
  14. data/lib/poseidon.rb +120 -0
  15. data/lib/poseidon/broker_pool.rb +86 -0
  16. data/lib/poseidon/cluster_metadata.rb +94 -0
  17. data/lib/poseidon/compressed_value.rb +23 -0
  18. data/lib/poseidon/compression.rb +30 -0
  19. data/lib/poseidon/compression/gzip_codec.rb +23 -0
  20. data/lib/poseidon/compression/snappy_codec.rb +29 -0
  21. data/lib/poseidon/connection.rb +169 -0
  22. data/lib/poseidon/fetched_message.rb +37 -0
  23. data/lib/poseidon/message.rb +151 -0
  24. data/lib/poseidon/message_conductor.rb +86 -0
  25. data/lib/poseidon/message_set.rb +80 -0
  26. data/lib/poseidon/message_to_send.rb +33 -0
  27. data/lib/poseidon/messages_for_broker.rb +56 -0
  28. data/lib/poseidon/messages_to_send.rb +47 -0
  29. data/lib/poseidon/messages_to_send_batch.rb +27 -0
  30. data/lib/poseidon/partition_consumer.rb +225 -0
  31. data/lib/poseidon/producer.rb +199 -0
  32. data/lib/poseidon/producer_compression_config.rb +37 -0
  33. data/lib/poseidon/protocol.rb +122 -0
  34. data/lib/poseidon/protocol/protocol_struct.rb +256 -0
  35. data/lib/poseidon/protocol/request_buffer.rb +77 -0
  36. data/lib/poseidon/protocol/response_buffer.rb +72 -0
  37. data/lib/poseidon/sync_producer.rb +161 -0
  38. data/lib/poseidon/topic_metadata.rb +89 -0
  39. data/lib/poseidon/version.rb +4 -0
  40. data/log/.gitkeep +0 -0
  41. data/poseidon.gemspec +27 -0
  42. data/spec/integration/multiple_brokers/consumer_spec.rb +45 -0
  43. data/spec/integration/multiple_brokers/metadata_failures_spec.rb +144 -0
  44. data/spec/integration/multiple_brokers/rebalance_spec.rb +69 -0
  45. data/spec/integration/multiple_brokers/round_robin_spec.rb +41 -0
  46. data/spec/integration/multiple_brokers/spec_helper.rb +60 -0
  47. data/spec/integration/simple/compression_spec.rb +23 -0
  48. data/spec/integration/simple/connection_spec.rb +35 -0
  49. data/spec/integration/simple/multiple_brokers_spec.rb +10 -0
  50. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +121 -0
  51. data/spec/integration/simple/spec_helper.rb +16 -0
  52. data/spec/integration/simple/truncated_messages_spec.rb +46 -0
  53. data/spec/integration/simple/unavailable_broker_spec.rb +72 -0
  54. data/spec/spec_helper.rb +32 -0
  55. data/spec/test_cluster.rb +211 -0
  56. data/spec/unit/broker_pool_spec.rb +98 -0
  57. data/spec/unit/cluster_metadata_spec.rb +46 -0
  58. data/spec/unit/compression/gzip_codec_spec.rb +34 -0
  59. data/spec/unit/compression/snappy_codec_spec.rb +49 -0
  60. data/spec/unit/compression_spec.rb +17 -0
  61. data/spec/unit/connection_spec.rb +4 -0
  62. data/spec/unit/fetched_message_spec.rb +11 -0
  63. data/spec/unit/message_conductor_spec.rb +164 -0
  64. data/spec/unit/message_set_spec.rb +42 -0
  65. data/spec/unit/message_spec.rb +129 -0
  66. data/spec/unit/message_to_send_spec.rb +10 -0
  67. data/spec/unit/messages_for_broker_spec.rb +54 -0
  68. data/spec/unit/messages_to_send_batch_spec.rb +25 -0
  69. data/spec/unit/messages_to_send_spec.rb +63 -0
  70. data/spec/unit/partition_consumer_spec.rb +142 -0
  71. data/spec/unit/producer_compression_config_spec.rb +42 -0
  72. data/spec/unit/producer_spec.rb +51 -0
  73. data/spec/unit/protocol/request_buffer_spec.rb +16 -0
  74. data/spec/unit/protocol_spec.rb +54 -0
  75. data/spec/unit/sync_producer_spec.rb +156 -0
  76. data/spec/unit/topic_metadata_spec.rb +43 -0
  77. metadata +225 -0
@@ -0,0 +1,86 @@
1
+ module Poseidon
2
+ # @api private
3
+ class MessageConductor
4
+ NO_PARTITION = -1
5
+ NO_BROKER = -1
6
+ # Create a new message conductor
7
+ #
8
+ # @param [Hash<String,TopicMetadata>] topics_metadata
9
+ # Metadata for all topics this conductor may receive.
10
+ # @param [Object] partitioner
11
+ # Custom partitioner
12
+ def initialize(cluster_metadata, partitioner)
13
+ @cluster_metadata = cluster_metadata
14
+ @partitioner = partitioner
15
+
16
+ # Don't always start from partition 0
17
+ @partition_counter = rand(65536)
18
+ end
19
+
20
+ # Determines which partition a message should be sent to.
21
+ #
22
+ # @param [String] topic
23
+ # Topic we are sending this message to
24
+ #
25
+ # @param [Object] key
26
+ # Key for this message, may be nil
27
+ #
28
+ # @return [Integer,Integer]
29
+ # partition_id and broker_id to which this message should be sent
30
+ def destination(topic, key = nil)
31
+ topic_metadata = topic_metadatas[topic]
32
+ if topic_metadata && topic_metadata.leader_available?
33
+ partition_id = determine_partition(topic_metadata, key)
34
+ broker_id = topic_metadata.partition_leader(partition_id) || NO_BROKER
35
+ else
36
+ partition_id = NO_PARTITION
37
+ broker_id = NO_BROKER
38
+ end
39
+
40
+ return partition_id, broker_id
41
+ end
42
+
43
+ private
44
+
45
+ def topic_metadatas
46
+ @cluster_metadata.topic_metadata
47
+ end
48
+
49
+ def determine_partition(topic_metadata, key)
50
+ if key
51
+ partition_for_keyed_message(topic_metadata, key)
52
+ else
53
+ partition_for_keyless_message(topic_metadata)
54
+ end
55
+ end
56
+
57
+ def partition_for_keyed_message(topic_metadata, key)
58
+ partition_count = topic_metadata.partition_count
59
+ if @partitioner
60
+ partition_id = @partitioner.call(key, partition_count)
61
+
62
+ if partition_id >= partition_count
63
+ raise Errors::InvalidPartitionError, "partitioner (#{@partitioner.inspect}) requested #{partition_id} while only #{partition_count} partitions exist"
64
+ end
65
+ else
66
+ partition_id = Zlib::crc32(key) % partition_count
67
+ end
68
+
69
+ partition_id
70
+ end
71
+
72
+ def partition_for_keyless_message(topic_metadata)
73
+ partition_count = topic_metadata.available_partition_count
74
+
75
+ if partition_count > 0
76
+ topic_metadata.available_partitions[next_partition_counter % partition_count].id
77
+ else
78
+ NO_PARTITION
79
+ end
80
+ end
81
+
82
+ def next_partition_counter
83
+ @partition_counter += 1
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,80 @@
1
+ module Poseidon
2
+ # @api private
3
+ class MessageSet
4
+ # Build a message set object from a binary encoded message set
5
+ #
6
+ # @param [String] string binary encoded message set
7
+ # @return [MessageSet]
8
+
9
+
10
+ def self.read(buffer)
11
+ ms = MessageSet.new
12
+ ms.struct = Protocol::MessageSetStructWithSize.read(buffer)
13
+ ms
14
+ end
15
+
16
+ def self.read_without_size(buffer)
17
+ ms = MessageSet.new
18
+ ms.struct = Protocol::MessageSetStruct.read(buffer)
19
+ ms
20
+ end
21
+
22
+ attr_accessor :struct
23
+ def initialize(messages = [])
24
+ self.struct = Protocol::MessageSetStructWithSize.new(messages)
25
+ end
26
+
27
+ def ==(other)
28
+ eql?(other)
29
+ end
30
+
31
+ def eql?(other)
32
+ struct.eql?(other.struct)
33
+ end
34
+
35
+ def objects_with_errors
36
+ struct.objects_with_errors
37
+ end
38
+
39
+ def write(buffer)
40
+ struct.write(buffer)
41
+ end
42
+
43
+ def <<(message)
44
+ struct.messages << message
45
+ end
46
+
47
+ def messages
48
+ struct.messages
49
+ end
50
+
51
+ def compress(codec)
52
+ MessageSet.new([to_compressed_message(codec)])
53
+ end
54
+
55
+ # Builds an array of Message objects from the MessageStruct objects.
56
+ # Decompressing messages if necessary.
57
+ #
58
+ # @return [Array<Message>]
59
+ def flatten
60
+ messages = struct.messages.map do |message|
61
+ if message.compressed?
62
+ s = message.decompressed_value
63
+ MessageSet.read_without_size(Protocol::ResponseBuffer.new(s)).flatten
64
+ else
65
+ message
66
+ end
67
+ end.flatten
68
+ end
69
+
70
+ private
71
+ def to_compressed_message(codec)
72
+ buffer = Protocol::RequestBuffer.new
73
+ struct.write(buffer)
74
+
75
+ value = codec.compress(buffer.to_s[4..-1])
76
+ Message.new(:value => value, :attributes => codec.codec_id)
77
+ end
78
+
79
+ end
80
+ end
@@ -0,0 +1,33 @@
1
+ module Poseidon
2
+ # A message we want to send to Kafka. Comprised of the
3
+ # topic we want to send it to, the body of the message
4
+ # and an optional key.
5
+ #
6
+ # mts = Poseidon::MessageToSend.new("topic", "value", "opt_key")
7
+ #
8
+ # @api public
9
+ class MessageToSend
10
+ attr_reader :value, :key, :topic
11
+
12
+ # Create a new message for sending to a Kafka broker.
13
+ #
14
+ # @param [String] topic
15
+ # Topic this message should be sent to.
16
+ #
17
+ # @param [String] value
18
+ # Value of the message we want to send.
19
+ #
20
+ # @param [String] key
21
+ # Optional. Message's key, used to route a message
22
+ # to a specific broker. Otherwise, messages will be
23
+ # sent to brokers in a round-robin manner.
24
+ #
25
+ # @api public
26
+ def initialize(topic, value, key = nil)
27
+ raise ArgumentError, "Must provide a non-nil topic" if topic.nil?
28
+ @topic = topic
29
+ @value = value
30
+ @key = key
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,56 @@
1
+ module Poseidon
2
+ # Messages that should be sent to a particular broker.
3
+ # @api private
4
+ class MessagesForBroker
5
+ attr_reader :broker_id, :messages
6
+
7
+ def initialize(broker_id)
8
+ @broker_id = broker_id
9
+ @topics = {}
10
+ @messages = []
11
+ end
12
+
13
+ # Add a messages for this broker
14
+ def add(message, partition_id)
15
+ @messages << message
16
+
17
+ @topics[message.topic] ||= {}
18
+ @topics[message.topic][partition_id] ||= []
19
+ @topics[message.topic][partition_id] << message
20
+ end
21
+
22
+ # Build protocol objects for this broker!
23
+ def build_protocol_objects(compression_config)
24
+ @topics.map do |topic, messages_by_partition|
25
+ codec = compression_config.compression_codec_for_topic(topic)
26
+
27
+ messages_for_partitions = messages_by_partition.map do |partition, messages|
28
+ message_set = MessageSet.new(messages)
29
+ if codec
30
+ Protocol::MessagesForPartition.new(partition, message_set.compress(codec))
31
+ else
32
+ Protocol::MessagesForPartition.new(partition, message_set)
33
+ end
34
+ end
35
+ Protocol::MessagesForTopic.new(topic, messages_for_partitions)
36
+ end
37
+ end
38
+
39
+ # We can always retry these errors because they mean none of the kafka brokers persisted the message
40
+ ALWAYS_RETRYABLE = [Poseidon::Errors::LeaderNotAvailable, Poseidon::Errors::NotLeaderForPartition]
41
+
42
+ def successfully_sent(producer_response)
43
+ failed = []
44
+ producer_response.topic_response.each do |topic_response|
45
+ topic_response.partitions.each do |partition|
46
+ if ALWAYS_RETRYABLE.include?(partition.error_class)
47
+ Poseidon.logger.debug { "Received #{partition.error_class} when attempting to send messages to #{topic_response.topic} on #{partition.partition}" }
48
+ failed.push(*@topics[topic_response.topic][partition.partition])
49
+ end
50
+ end
51
+ end
52
+
53
+ return @messages - failed
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,47 @@
1
+ module Poseidon
2
+ # A set of messages that we need to send to the cluster. May be used
3
+ # across multiple send attempts.
4
+ #
5
+ # If a custom partitioner is not used than a messages are distributed
6
+ # in round robin fasion to each partition with an available leader.
7
+ #
8
+ # @api private
9
+ class MessagesToSend
10
+ class InvalidPartitionError < StandardError; end
11
+ attr_reader :topic_set, :messages
12
+
13
+ # Create a new messages to send object.
14
+ #
15
+ # @param [Array<Message>] messages List of messages we want to send.
16
+ # @param [ClusterMetadta] cluster_metadata
17
+ def initialize(messages, cluster_metadata)
18
+ @messages = messages
19
+ @cluster_metadata = cluster_metadata
20
+
21
+ build_topic_set
22
+ end
23
+
24
+ def needs_metadata?
25
+ !@cluster_metadata.have_metadata_for_topics?(topic_set)
26
+ end
27
+
28
+ def messages_for_brokers(message_conductor)
29
+ topic_metadatas = @cluster_metadata.metadata_for_topics(topic_set)
30
+ MessagesToSendBatch.new(@messages, message_conductor).messages_for_brokers
31
+ end
32
+
33
+ def successfully_sent(messages_sent)
34
+ @messages -= messages_sent
35
+ end
36
+
37
+ def pending_messages?
38
+ @messages.any?
39
+ end
40
+
41
+ private
42
+ def build_topic_set
43
+ @topic_set = Set.new
44
+ @messages.each { |m| @topic_set.add(m.topic) }
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,27 @@
1
+ module Poseidon
2
+ # A batch of messages for an individual send attempt to the cluster.
3
+ # @api private
4
+ class MessagesToSendBatch
5
+ def initialize(messages, message_conductor)
6
+ @messages = messages
7
+ @message_conductor = message_conductor
8
+ end
9
+
10
+ # Groups messages by broker and preps them for transmission.
11
+ #
12
+ # @return [Array<MessagesForBroker>]
13
+ def messages_for_brokers
14
+ messages_for_broker_ids = {}
15
+ @messages.each do |message|
16
+ partition_id, broker_id = @message_conductor.destination(message.topic,
17
+ message.key)
18
+
19
+ # Create a nested hash to group messages by broker_id, topic, partition.
20
+ messages_for_broker_ids[broker_id] ||= MessagesForBroker.new(broker_id)
21
+ messages_for_broker_ids[broker_id].add(message, partition_id)
22
+ end
23
+
24
+ messages_for_broker_ids.values
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,225 @@
1
+ module Poseidon
2
+ # A primitive Kafka Consumer which operates on a specific broker, topic and partition.
3
+ #
4
+ # Example in the README.
5
+ #
6
+ # @api public
7
+ class PartitionConsumer
8
+ # The offset of the latest message the broker recieved for this partition.
9
+ # Useful for knowning how far behind the consumer is. This value is only
10
+ # as recent as the last fetch call.
11
+ attr_reader :highwater_mark
12
+
13
+ attr_reader :host, :port
14
+
15
+ attr_reader :offset
16
+
17
+ attr_reader :topic
18
+
19
+ # Returns a consumer pointing at the lead broker for the partition.
20
+ #
21
+ # Eventually this will be replaced by higher level consumer functionality,
22
+ # this is a stop-gap.
23
+ #
24
+ def self.consumer_for_partition(client_id, seed_brokers, topic, partition, offset, options = {})
25
+
26
+ broker = BrokerPool.open(client_id, seed_brokers, options[:socket_timeout_ms] || 10_000) do |broker_pool|
27
+ cluster_metadata = ClusterMetadata.new
28
+ cluster_metadata.update(broker_pool.fetch_metadata([topic]))
29
+
30
+ cluster_metadata.lead_broker_for_partition(topic, partition)
31
+ end
32
+
33
+ new(client_id, broker.host, broker.port, topic, partition, offset, options)
34
+ end
35
+
36
+ # Create a new consumer which reads the specified topic and partition from
37
+ # the host.
38
+ #
39
+ # @param [String] client_id Used to identify this client should be unique.
40
+ # @param [String] host
41
+ # @param [Integer] port
42
+ # @param [String] topic Topic to read from
43
+ # @param [Integer] partition Partitions are zero indexed.
44
+ # @param [Integer,Symbol] offset
45
+ # Offset to start reading from. A negative offset can also be passed.
46
+ # There are a couple special offsets which can be passed as symbols:
47
+ # :earliest_offset Start reading from the first offset the server has.
48
+ # :latest_offset Start reading from the latest offset the server has.
49
+ #
50
+ # @param [Hash] options
51
+ # Theses options can all be overridden in each individual fetch command.
52
+ #
53
+ # @option options [Integer] :max_bytes
54
+ # Maximum number of bytes to fetch
55
+ # Default: 1048576 (1MB)
56
+ #
57
+ # @option options [Integer] :max_wait_ms
58
+ # How long to block until the server sends us data.
59
+ # NOTE: This is only enforced if min_bytes is > 0.
60
+ # Default: 100 (100ms)
61
+ #
62
+ # @option options [Integer] :min_bytes
63
+ # Smallest amount of data the server should send us.
64
+ # Default: 1 (Send us data as soon as it is ready)
65
+ #
66
+ # @option options [Integer] :socket_timeout_ms
67
+ # How long to wait for reply from server. Should be higher than max_wait_ms.
68
+ # Default: 10000 (10s)
69
+ #
70
+ # @api public
71
+ def initialize(client_id, host, port, topic, partition, offset, options = {})
72
+ @host = host
73
+ @port = port
74
+
75
+ handle_options(options)
76
+
77
+ @connection = Connection.new(host, port, client_id, @socket_timeout_ms)
78
+ @topic = topic
79
+ @partition = partition
80
+ if Symbol === offset
81
+ raise ArgumentError, "Unknown special offset type: #{offset}" unless [:earliest_offset, :latest_offset].include?(offset)
82
+ end
83
+ @offset = offset
84
+ end
85
+
86
+ # Fetch messages from the broker.
87
+ #
88
+ # @param [Hash] options
89
+ #
90
+ # @option options [Integer] :max_bytes
91
+ # Maximum number of bytes to fetch
92
+ #
93
+ # @option options [Integer] :max_wait_ms
94
+ # How long to block until the server sends us data.
95
+ #
96
+ # @option options [Integer] :min_bytes
97
+ # Smallest amount of data the server should send us.
98
+ #
99
+ # @api public
100
+ def fetch(options = {})
101
+ fetch_max_wait = options.delete(:max_wait_ms) || max_wait_ms
102
+ fetch_max_bytes = options.delete(:max_bytes) || max_bytes
103
+ fetch_min_bytes = options.delete(:min_bytes) || min_bytes
104
+
105
+ if options.keys.any?
106
+ raise ArgumentError, "Unknown options: #{options.keys.inspect}"
107
+ end
108
+
109
+ topic_fetches = build_topic_fetch_request(fetch_max_bytes)
110
+ fetch_response = @connection.fetch(fetch_max_wait, fetch_min_bytes, topic_fetches)
111
+ topic_response = fetch_response.topic_fetch_responses.first
112
+ partition_response = topic_response.partition_fetch_responses.first
113
+
114
+ unless partition_response.error == Errors::NO_ERROR_CODE
115
+ if @offset < 0 &&
116
+ Errors::ERROR_CODES[partition_response.error] == Errors::OffsetOutOfRange
117
+ @offset = :earliest_offset
118
+ return fetch(options)
119
+ end
120
+
121
+ raise Errors::ERROR_CODES[partition_response.error]
122
+ else
123
+ @highwater_mark = partition_response.highwater_mark_offset
124
+ messages = partition_response.message_set.flatten.map do |m|
125
+ FetchedMessage.new(topic_response.topic, m.value, m.key, m.offset)
126
+ end
127
+ if messages.any?
128
+ @offset = messages.last.offset + 1
129
+ end
130
+ messages
131
+ end
132
+ end
133
+
134
+ # @return [Integer] next offset we will fetch
135
+ #
136
+ # @api public
137
+ def next_offset
138
+ resolve_offset_if_necessary
139
+ @offset
140
+ end
141
+
142
+ # Close the connection to the kafka broker
143
+ #
144
+ # @return [Nil]
145
+ #
146
+ # @api public
147
+ def close
148
+ @connection.close
149
+ nil
150
+ end
151
+
152
+ private
153
+ def handle_options(options)
154
+ @max_bytes = options.delete(:max_bytes) || 1024*1024
155
+ @min_bytes = options.delete(:min_bytes) || 1
156
+ @max_wait_ms = options.delete(:max_wait_ms) || 10_000
157
+ @socket_timeout_ms = options.delete(:socket_timeout_ms) || @max_wait_ms + 10_000
158
+
159
+ if @socket_timeout_ms < @max_wait_ms
160
+ raise ArgumentError, "Setting socket_timeout_ms should be higher than max_wait_ms"
161
+ end
162
+
163
+ if options.keys.any?
164
+ raise ArgumentError, "Unknown options: #{options.keys.inspect}"
165
+ end
166
+ end
167
+
168
+ def max_wait_ms
169
+ @max_wait_ms
170
+ end
171
+
172
+ def max_bytes
173
+ @max_bytes
174
+ end
175
+
176
+ def min_bytes
177
+ @min_bytes
178
+ end
179
+
180
+ def resolve_offset_if_necessary
181
+ return unless Symbol === @offset || @offset < 0
182
+
183
+ protocol_offset = case @offset
184
+ when :earliest_offset
185
+ -2
186
+ when :latest_offset
187
+ -1
188
+ else
189
+ -1
190
+ end
191
+
192
+ topic_offset_responses = @connection.offset(build_topic_offset_request(protocol_offset))
193
+ partition_offsets = topic_offset_responses.first.partition_offsets
194
+ if partition_offsets.first.error != Errors::NO_ERROR_CODE
195
+ raise Errors::ERROR_CODES[partition_offsets.first.error]
196
+ end
197
+
198
+ offset_struct = partition_offsets.first.offsets.first
199
+
200
+ @offset = if offset_struct.nil?
201
+ 0
202
+ elsif @offset.kind_of?(Fixnum) && @offset < 0
203
+ offset_struct.offset + @offset
204
+ else
205
+ offset_struct.offset
206
+ end
207
+ end
208
+
209
+ def build_topic_offset_request(protocol_offset)
210
+ partition_offset_request = Protocol::PartitionOffsetRequest.new(
211
+ @partition,
212
+ protocol_offset,
213
+ max_number_of_offsets = 1)
214
+
215
+ [Protocol::TopicOffsetRequest.new(topic, [partition_offset_request])]
216
+ end
217
+
218
+ def build_topic_fetch_request(max_bytes)
219
+ partition_fetches = [Protocol::PartitionFetch.new(@partition,
220
+ next_offset,
221
+ max_bytes)]
222
+ topic_fetches = [Protocol::TopicFetch.new(topic, partition_fetches)]
223
+ end
224
+ end
225
+ end