codeclimate-poseidon 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/.yardopts +8 -0
  6. data/CHANGES.md +31 -0
  7. data/Gemfile +13 -0
  8. data/LICENSE.txt +22 -0
  9. data/README.md +72 -0
  10. data/Rakefile +20 -0
  11. data/TODO.md +27 -0
  12. data/examples/consumer.rb +18 -0
  13. data/examples/producer.rb +9 -0
  14. data/lib/poseidon.rb +120 -0
  15. data/lib/poseidon/broker_pool.rb +86 -0
  16. data/lib/poseidon/cluster_metadata.rb +94 -0
  17. data/lib/poseidon/compressed_value.rb +23 -0
  18. data/lib/poseidon/compression.rb +30 -0
  19. data/lib/poseidon/compression/gzip_codec.rb +23 -0
  20. data/lib/poseidon/compression/snappy_codec.rb +29 -0
  21. data/lib/poseidon/connection.rb +169 -0
  22. data/lib/poseidon/fetched_message.rb +37 -0
  23. data/lib/poseidon/message.rb +151 -0
  24. data/lib/poseidon/message_conductor.rb +86 -0
  25. data/lib/poseidon/message_set.rb +80 -0
  26. data/lib/poseidon/message_to_send.rb +33 -0
  27. data/lib/poseidon/messages_for_broker.rb +56 -0
  28. data/lib/poseidon/messages_to_send.rb +47 -0
  29. data/lib/poseidon/messages_to_send_batch.rb +27 -0
  30. data/lib/poseidon/partition_consumer.rb +225 -0
  31. data/lib/poseidon/producer.rb +199 -0
  32. data/lib/poseidon/producer_compression_config.rb +37 -0
  33. data/lib/poseidon/protocol.rb +122 -0
  34. data/lib/poseidon/protocol/protocol_struct.rb +256 -0
  35. data/lib/poseidon/protocol/request_buffer.rb +77 -0
  36. data/lib/poseidon/protocol/response_buffer.rb +72 -0
  37. data/lib/poseidon/sync_producer.rb +161 -0
  38. data/lib/poseidon/topic_metadata.rb +89 -0
  39. data/lib/poseidon/version.rb +4 -0
  40. data/log/.gitkeep +0 -0
  41. data/poseidon.gemspec +27 -0
  42. data/spec/integration/multiple_brokers/consumer_spec.rb +45 -0
  43. data/spec/integration/multiple_brokers/metadata_failures_spec.rb +144 -0
  44. data/spec/integration/multiple_brokers/rebalance_spec.rb +69 -0
  45. data/spec/integration/multiple_brokers/round_robin_spec.rb +41 -0
  46. data/spec/integration/multiple_brokers/spec_helper.rb +60 -0
  47. data/spec/integration/simple/compression_spec.rb +23 -0
  48. data/spec/integration/simple/connection_spec.rb +35 -0
  49. data/spec/integration/simple/multiple_brokers_spec.rb +10 -0
  50. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +121 -0
  51. data/spec/integration/simple/spec_helper.rb +16 -0
  52. data/spec/integration/simple/truncated_messages_spec.rb +46 -0
  53. data/spec/integration/simple/unavailable_broker_spec.rb +72 -0
  54. data/spec/spec_helper.rb +32 -0
  55. data/spec/test_cluster.rb +211 -0
  56. data/spec/unit/broker_pool_spec.rb +98 -0
  57. data/spec/unit/cluster_metadata_spec.rb +46 -0
  58. data/spec/unit/compression/gzip_codec_spec.rb +34 -0
  59. data/spec/unit/compression/snappy_codec_spec.rb +49 -0
  60. data/spec/unit/compression_spec.rb +17 -0
  61. data/spec/unit/connection_spec.rb +4 -0
  62. data/spec/unit/fetched_message_spec.rb +11 -0
  63. data/spec/unit/message_conductor_spec.rb +164 -0
  64. data/spec/unit/message_set_spec.rb +42 -0
  65. data/spec/unit/message_spec.rb +129 -0
  66. data/spec/unit/message_to_send_spec.rb +10 -0
  67. data/spec/unit/messages_for_broker_spec.rb +54 -0
  68. data/spec/unit/messages_to_send_batch_spec.rb +25 -0
  69. data/spec/unit/messages_to_send_spec.rb +63 -0
  70. data/spec/unit/partition_consumer_spec.rb +142 -0
  71. data/spec/unit/producer_compression_config_spec.rb +42 -0
  72. data/spec/unit/producer_spec.rb +51 -0
  73. data/spec/unit/protocol/request_buffer_spec.rb +16 -0
  74. data/spec/unit/protocol_spec.rb +54 -0
  75. data/spec/unit/sync_producer_spec.rb +156 -0
  76. data/spec/unit/topic_metadata_spec.rb +43 -0
  77. metadata +225 -0
@@ -0,0 +1,86 @@
1
+ module Poseidon
2
+ # @api private
3
+ class MessageConductor
4
+ NO_PARTITION = -1
5
+ NO_BROKER = -1
6
+ # Create a new message conductor
7
+ #
8
+ # @param [Hash<String,TopicMetadata>] topics_metadata
9
+ # Metadata for all topics this conductor may receive.
10
+ # @param [Object] partitioner
11
+ # Custom partitioner
12
+ def initialize(cluster_metadata, partitioner)
13
+ @cluster_metadata = cluster_metadata
14
+ @partitioner = partitioner
15
+
16
+ # Don't always start from partition 0
17
+ @partition_counter = rand(65536)
18
+ end
19
+
20
+ # Determines which partition a message should be sent to.
21
+ #
22
+ # @param [String] topic
23
+ # Topic we are sending this message to
24
+ #
25
+ # @param [Object] key
26
+ # Key for this message, may be nil
27
+ #
28
+ # @return [Integer,Integer]
29
+ # partition_id and broker_id to which this message should be sent
30
+ def destination(topic, key = nil)
31
+ topic_metadata = topic_metadatas[topic]
32
+ if topic_metadata && topic_metadata.leader_available?
33
+ partition_id = determine_partition(topic_metadata, key)
34
+ broker_id = topic_metadata.partition_leader(partition_id) || NO_BROKER
35
+ else
36
+ partition_id = NO_PARTITION
37
+ broker_id = NO_BROKER
38
+ end
39
+
40
+ return partition_id, broker_id
41
+ end
42
+
43
+ private
44
+
45
+ def topic_metadatas
46
+ @cluster_metadata.topic_metadata
47
+ end
48
+
49
+ def determine_partition(topic_metadata, key)
50
+ if key
51
+ partition_for_keyed_message(topic_metadata, key)
52
+ else
53
+ partition_for_keyless_message(topic_metadata)
54
+ end
55
+ end
56
+
57
+ def partition_for_keyed_message(topic_metadata, key)
58
+ partition_count = topic_metadata.partition_count
59
+ if @partitioner
60
+ partition_id = @partitioner.call(key, partition_count)
61
+
62
+ if partition_id >= partition_count
63
+ raise Errors::InvalidPartitionError, "partitioner (#{@partitioner.inspect}) requested #{partition_id} while only #{partition_count} partitions exist"
64
+ end
65
+ else
66
+ partition_id = Zlib::crc32(key) % partition_count
67
+ end
68
+
69
+ partition_id
70
+ end
71
+
72
+ def partition_for_keyless_message(topic_metadata)
73
+ partition_count = topic_metadata.available_partition_count
74
+
75
+ if partition_count > 0
76
+ topic_metadata.available_partitions[next_partition_counter % partition_count].id
77
+ else
78
+ NO_PARTITION
79
+ end
80
+ end
81
+
82
+ def next_partition_counter
83
+ @partition_counter += 1
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,80 @@
1
+ module Poseidon
2
+ # @api private
3
+ class MessageSet
4
+ # Build a message set object from a binary encoded message set
5
+ #
6
+ # @param [String] string binary encoded message set
7
+ # @return [MessageSet]
8
+
9
+
10
+ def self.read(buffer)
11
+ ms = MessageSet.new
12
+ ms.struct = Protocol::MessageSetStructWithSize.read(buffer)
13
+ ms
14
+ end
15
+
16
+ def self.read_without_size(buffer)
17
+ ms = MessageSet.new
18
+ ms.struct = Protocol::MessageSetStruct.read(buffer)
19
+ ms
20
+ end
21
+
22
+ attr_accessor :struct
23
+ def initialize(messages = [])
24
+ self.struct = Protocol::MessageSetStructWithSize.new(messages)
25
+ end
26
+
27
+ def ==(other)
28
+ eql?(other)
29
+ end
30
+
31
+ def eql?(other)
32
+ struct.eql?(other.struct)
33
+ end
34
+
35
+ def objects_with_errors
36
+ struct.objects_with_errors
37
+ end
38
+
39
+ def write(buffer)
40
+ struct.write(buffer)
41
+ end
42
+
43
+ def <<(message)
44
+ struct.messages << message
45
+ end
46
+
47
+ def messages
48
+ struct.messages
49
+ end
50
+
51
+ def compress(codec)
52
+ MessageSet.new([to_compressed_message(codec)])
53
+ end
54
+
55
+ # Builds an array of Message objects from the MessageStruct objects.
56
+ # Decompressing messages if necessary.
57
+ #
58
+ # @return [Array<Message>]
59
+ def flatten
60
+ messages = struct.messages.map do |message|
61
+ if message.compressed?
62
+ s = message.decompressed_value
63
+ MessageSet.read_without_size(Protocol::ResponseBuffer.new(s)).flatten
64
+ else
65
+ message
66
+ end
67
+ end.flatten
68
+ end
69
+
70
+ private
71
+ def to_compressed_message(codec)
72
+ buffer = Protocol::RequestBuffer.new
73
+ struct.write(buffer)
74
+
75
+ value = codec.compress(buffer.to_s[4..-1])
76
+ Message.new(:value => value, :attributes => codec.codec_id)
77
+ end
78
+
79
+ end
80
+ end
@@ -0,0 +1,33 @@
1
+ module Poseidon
2
+ # A message we want to send to Kafka. Comprised of the
3
+ # topic we want to send it to, the body of the message
4
+ # and an optional key.
5
+ #
6
+ # mts = Poseidon::MessageToSend.new("topic", "value", "opt_key")
7
+ #
8
+ # @api public
9
+ class MessageToSend
10
+ attr_reader :value, :key, :topic
11
+
12
+ # Create a new message for sending to a Kafka broker.
13
+ #
14
+ # @param [String] topic
15
+ # Topic this message should be sent to.
16
+ #
17
+ # @param [String] value
18
+ # Value of the message we want to send.
19
+ #
20
+ # @param [String] key
21
+ # Optional. Message's key, used to route a message
22
+ # to a specific broker. Otherwise, messages will be
23
+ # sent to brokers in a round-robin manner.
24
+ #
25
+ # @api public
26
+ def initialize(topic, value, key = nil)
27
+ raise ArgumentError, "Must provide a non-nil topic" if topic.nil?
28
+ @topic = topic
29
+ @value = value
30
+ @key = key
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,56 @@
1
+ module Poseidon
2
+ # Messages that should be sent to a particular broker.
3
+ # @api private
4
+ class MessagesForBroker
5
+ attr_reader :broker_id, :messages
6
+
7
+ def initialize(broker_id)
8
+ @broker_id = broker_id
9
+ @topics = {}
10
+ @messages = []
11
+ end
12
+
13
+ # Add a messages for this broker
14
+ def add(message, partition_id)
15
+ @messages << message
16
+
17
+ @topics[message.topic] ||= {}
18
+ @topics[message.topic][partition_id] ||= []
19
+ @topics[message.topic][partition_id] << message
20
+ end
21
+
22
+ # Build protocol objects for this broker!
23
+ def build_protocol_objects(compression_config)
24
+ @topics.map do |topic, messages_by_partition|
25
+ codec = compression_config.compression_codec_for_topic(topic)
26
+
27
+ messages_for_partitions = messages_by_partition.map do |partition, messages|
28
+ message_set = MessageSet.new(messages)
29
+ if codec
30
+ Protocol::MessagesForPartition.new(partition, message_set.compress(codec))
31
+ else
32
+ Protocol::MessagesForPartition.new(partition, message_set)
33
+ end
34
+ end
35
+ Protocol::MessagesForTopic.new(topic, messages_for_partitions)
36
+ end
37
+ end
38
+
39
+ # We can always retry these errors because they mean none of the kafka brokers persisted the message
40
+ ALWAYS_RETRYABLE = [Poseidon::Errors::LeaderNotAvailable, Poseidon::Errors::NotLeaderForPartition]
41
+
42
+ def successfully_sent(producer_response)
43
+ failed = []
44
+ producer_response.topic_response.each do |topic_response|
45
+ topic_response.partitions.each do |partition|
46
+ if ALWAYS_RETRYABLE.include?(partition.error_class)
47
+ Poseidon.logger.debug { "Received #{partition.error_class} when attempting to send messages to #{topic_response.topic} on #{partition.partition}" }
48
+ failed.push(*@topics[topic_response.topic][partition.partition])
49
+ end
50
+ end
51
+ end
52
+
53
+ return @messages - failed
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,47 @@
1
+ module Poseidon
2
+ # A set of messages that we need to send to the cluster. May be used
3
+ # across multiple send attempts.
4
+ #
5
+ # If a custom partitioner is not used than a messages are distributed
6
+ # in round robin fasion to each partition with an available leader.
7
+ #
8
+ # @api private
9
+ class MessagesToSend
10
+ class InvalidPartitionError < StandardError; end
11
+ attr_reader :topic_set, :messages
12
+
13
+ # Create a new messages to send object.
14
+ #
15
+ # @param [Array<Message>] messages List of messages we want to send.
16
+ # @param [ClusterMetadta] cluster_metadata
17
+ def initialize(messages, cluster_metadata)
18
+ @messages = messages
19
+ @cluster_metadata = cluster_metadata
20
+
21
+ build_topic_set
22
+ end
23
+
24
+ def needs_metadata?
25
+ !@cluster_metadata.have_metadata_for_topics?(topic_set)
26
+ end
27
+
28
+ def messages_for_brokers(message_conductor)
29
+ topic_metadatas = @cluster_metadata.metadata_for_topics(topic_set)
30
+ MessagesToSendBatch.new(@messages, message_conductor).messages_for_brokers
31
+ end
32
+
33
+ def successfully_sent(messages_sent)
34
+ @messages -= messages_sent
35
+ end
36
+
37
+ def pending_messages?
38
+ @messages.any?
39
+ end
40
+
41
+ private
42
+ def build_topic_set
43
+ @topic_set = Set.new
44
+ @messages.each { |m| @topic_set.add(m.topic) }
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,27 @@
1
+ module Poseidon
2
+ # A batch of messages for an individual send attempt to the cluster.
3
+ # @api private
4
+ class MessagesToSendBatch
5
+ def initialize(messages, message_conductor)
6
+ @messages = messages
7
+ @message_conductor = message_conductor
8
+ end
9
+
10
+ # Groups messages by broker and preps them for transmission.
11
+ #
12
+ # @return [Array<MessagesForBroker>]
13
+ def messages_for_brokers
14
+ messages_for_broker_ids = {}
15
+ @messages.each do |message|
16
+ partition_id, broker_id = @message_conductor.destination(message.topic,
17
+ message.key)
18
+
19
+ # Create a nested hash to group messages by broker_id, topic, partition.
20
+ messages_for_broker_ids[broker_id] ||= MessagesForBroker.new(broker_id)
21
+ messages_for_broker_ids[broker_id].add(message, partition_id)
22
+ end
23
+
24
+ messages_for_broker_ids.values
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,225 @@
1
+ module Poseidon
2
+ # A primitive Kafka Consumer which operates on a specific broker, topic and partition.
3
+ #
4
+ # Example in the README.
5
+ #
6
+ # @api public
7
+ class PartitionConsumer
8
+ # The offset of the latest message the broker recieved for this partition.
9
+ # Useful for knowning how far behind the consumer is. This value is only
10
+ # as recent as the last fetch call.
11
+ attr_reader :highwater_mark
12
+
13
+ attr_reader :host, :port
14
+
15
+ attr_reader :offset
16
+
17
+ attr_reader :topic
18
+
19
+ # Returns a consumer pointing at the lead broker for the partition.
20
+ #
21
+ # Eventually this will be replaced by higher level consumer functionality,
22
+ # this is a stop-gap.
23
+ #
24
+ def self.consumer_for_partition(client_id, seed_brokers, topic, partition, offset, options = {})
25
+
26
+ broker = BrokerPool.open(client_id, seed_brokers, options[:socket_timeout_ms] || 10_000) do |broker_pool|
27
+ cluster_metadata = ClusterMetadata.new
28
+ cluster_metadata.update(broker_pool.fetch_metadata([topic]))
29
+
30
+ cluster_metadata.lead_broker_for_partition(topic, partition)
31
+ end
32
+
33
+ new(client_id, broker.host, broker.port, topic, partition, offset, options)
34
+ end
35
+
36
+ # Create a new consumer which reads the specified topic and partition from
37
+ # the host.
38
+ #
39
+ # @param [String] client_id Used to identify this client should be unique.
40
+ # @param [String] host
41
+ # @param [Integer] port
42
+ # @param [String] topic Topic to read from
43
+ # @param [Integer] partition Partitions are zero indexed.
44
+ # @param [Integer,Symbol] offset
45
+ # Offset to start reading from. A negative offset can also be passed.
46
+ # There are a couple special offsets which can be passed as symbols:
47
+ # :earliest_offset Start reading from the first offset the server has.
48
+ # :latest_offset Start reading from the latest offset the server has.
49
+ #
50
+ # @param [Hash] options
51
+ # Theses options can all be overridden in each individual fetch command.
52
+ #
53
+ # @option options [Integer] :max_bytes
54
+ # Maximum number of bytes to fetch
55
+ # Default: 1048576 (1MB)
56
+ #
57
+ # @option options [Integer] :max_wait_ms
58
+ # How long to block until the server sends us data.
59
+ # NOTE: This is only enforced if min_bytes is > 0.
60
+ # Default: 100 (100ms)
61
+ #
62
+ # @option options [Integer] :min_bytes
63
+ # Smallest amount of data the server should send us.
64
+ # Default: 1 (Send us data as soon as it is ready)
65
+ #
66
+ # @option options [Integer] :socket_timeout_ms
67
+ # How long to wait for reply from server. Should be higher than max_wait_ms.
68
+ # Default: 10000 (10s)
69
+ #
70
+ # @api public
71
+ def initialize(client_id, host, port, topic, partition, offset, options = {})
72
+ @host = host
73
+ @port = port
74
+
75
+ handle_options(options)
76
+
77
+ @connection = Connection.new(host, port, client_id, @socket_timeout_ms)
78
+ @topic = topic
79
+ @partition = partition
80
+ if Symbol === offset
81
+ raise ArgumentError, "Unknown special offset type: #{offset}" unless [:earliest_offset, :latest_offset].include?(offset)
82
+ end
83
+ @offset = offset
84
+ end
85
+
86
+ # Fetch messages from the broker.
87
+ #
88
+ # @param [Hash] options
89
+ #
90
+ # @option options [Integer] :max_bytes
91
+ # Maximum number of bytes to fetch
92
+ #
93
+ # @option options [Integer] :max_wait_ms
94
+ # How long to block until the server sends us data.
95
+ #
96
+ # @option options [Integer] :min_bytes
97
+ # Smallest amount of data the server should send us.
98
+ #
99
+ # @api public
100
+ def fetch(options = {})
101
+ fetch_max_wait = options.delete(:max_wait_ms) || max_wait_ms
102
+ fetch_max_bytes = options.delete(:max_bytes) || max_bytes
103
+ fetch_min_bytes = options.delete(:min_bytes) || min_bytes
104
+
105
+ if options.keys.any?
106
+ raise ArgumentError, "Unknown options: #{options.keys.inspect}"
107
+ end
108
+
109
+ topic_fetches = build_topic_fetch_request(fetch_max_bytes)
110
+ fetch_response = @connection.fetch(fetch_max_wait, fetch_min_bytes, topic_fetches)
111
+ topic_response = fetch_response.topic_fetch_responses.first
112
+ partition_response = topic_response.partition_fetch_responses.first
113
+
114
+ unless partition_response.error == Errors::NO_ERROR_CODE
115
+ if @offset < 0 &&
116
+ Errors::ERROR_CODES[partition_response.error] == Errors::OffsetOutOfRange
117
+ @offset = :earliest_offset
118
+ return fetch(options)
119
+ end
120
+
121
+ raise Errors::ERROR_CODES[partition_response.error]
122
+ else
123
+ @highwater_mark = partition_response.highwater_mark_offset
124
+ messages = partition_response.message_set.flatten.map do |m|
125
+ FetchedMessage.new(topic_response.topic, m.value, m.key, m.offset)
126
+ end
127
+ if messages.any?
128
+ @offset = messages.last.offset + 1
129
+ end
130
+ messages
131
+ end
132
+ end
133
+
134
+ # @return [Integer] next offset we will fetch
135
+ #
136
+ # @api public
137
+ def next_offset
138
+ resolve_offset_if_necessary
139
+ @offset
140
+ end
141
+
142
+ # Close the connection to the kafka broker
143
+ #
144
+ # @return [Nil]
145
+ #
146
+ # @api public
147
+ def close
148
+ @connection.close
149
+ nil
150
+ end
151
+
152
+ private
153
+ def handle_options(options)
154
+ @max_bytes = options.delete(:max_bytes) || 1024*1024
155
+ @min_bytes = options.delete(:min_bytes) || 1
156
+ @max_wait_ms = options.delete(:max_wait_ms) || 10_000
157
+ @socket_timeout_ms = options.delete(:socket_timeout_ms) || @max_wait_ms + 10_000
158
+
159
+ if @socket_timeout_ms < @max_wait_ms
160
+ raise ArgumentError, "Setting socket_timeout_ms should be higher than max_wait_ms"
161
+ end
162
+
163
+ if options.keys.any?
164
+ raise ArgumentError, "Unknown options: #{options.keys.inspect}"
165
+ end
166
+ end
167
+
168
+ def max_wait_ms
169
+ @max_wait_ms
170
+ end
171
+
172
+ def max_bytes
173
+ @max_bytes
174
+ end
175
+
176
+ def min_bytes
177
+ @min_bytes
178
+ end
179
+
180
+ def resolve_offset_if_necessary
181
+ return unless Symbol === @offset || @offset < 0
182
+
183
+ protocol_offset = case @offset
184
+ when :earliest_offset
185
+ -2
186
+ when :latest_offset
187
+ -1
188
+ else
189
+ -1
190
+ end
191
+
192
+ topic_offset_responses = @connection.offset(build_topic_offset_request(protocol_offset))
193
+ partition_offsets = topic_offset_responses.first.partition_offsets
194
+ if partition_offsets.first.error != Errors::NO_ERROR_CODE
195
+ raise Errors::ERROR_CODES[partition_offsets.first.error]
196
+ end
197
+
198
+ offset_struct = partition_offsets.first.offsets.first
199
+
200
+ @offset = if offset_struct.nil?
201
+ 0
202
+ elsif @offset.kind_of?(Fixnum) && @offset < 0
203
+ offset_struct.offset + @offset
204
+ else
205
+ offset_struct.offset
206
+ end
207
+ end
208
+
209
+ def build_topic_offset_request(protocol_offset)
210
+ partition_offset_request = Protocol::PartitionOffsetRequest.new(
211
+ @partition,
212
+ protocol_offset,
213
+ max_number_of_offsets = 1)
214
+
215
+ [Protocol::TopicOffsetRequest.new(topic, [partition_offset_request])]
216
+ end
217
+
218
+ def build_topic_fetch_request(max_bytes)
219
+ partition_fetches = [Protocol::PartitionFetch.new(@partition,
220
+ next_offset,
221
+ max_bytes)]
222
+ topic_fetches = [Protocol::TopicFetch.new(topic, partition_fetches)]
223
+ end
224
+ end
225
+ end