poseidon 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. data/.gitignore +19 -0
  2. data/.rspec +2 -0
  3. data/.travis.yml +12 -0
  4. data/.yardopts +8 -0
  5. data/Gemfile +13 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +71 -0
  8. data/Rakefile +17 -0
  9. data/TODO.md +27 -0
  10. data/examples/consumer.rb +18 -0
  11. data/examples/producer.rb +9 -0
  12. data/lib/poseidon/broker_pool.rb +72 -0
  13. data/lib/poseidon/cluster_metadata.rb +63 -0
  14. data/lib/poseidon/compressed_value.rb +23 -0
  15. data/lib/poseidon/compression/gzip_codec.rb +23 -0
  16. data/lib/poseidon/compression/snappy_codec.rb +17 -0
  17. data/lib/poseidon/compression.rb +30 -0
  18. data/lib/poseidon/connection.rb +138 -0
  19. data/lib/poseidon/fetched_message.rb +37 -0
  20. data/lib/poseidon/message.rb +151 -0
  21. data/lib/poseidon/message_conductor.rb +84 -0
  22. data/lib/poseidon/message_set.rb +80 -0
  23. data/lib/poseidon/message_to_send.rb +33 -0
  24. data/lib/poseidon/messages_for_broker.rb +39 -0
  25. data/lib/poseidon/messages_to_send.rb +47 -0
  26. data/lib/poseidon/messages_to_send_batch.rb +27 -0
  27. data/lib/poseidon/partition_consumer.rb +154 -0
  28. data/lib/poseidon/producer.rb +193 -0
  29. data/lib/poseidon/producer_compression_config.rb +36 -0
  30. data/lib/poseidon/protocol/protocol_struct.rb +238 -0
  31. data/lib/poseidon/protocol/request_buffer.rb +78 -0
  32. data/lib/poseidon/protocol/response_buffer.rb +72 -0
  33. data/lib/poseidon/protocol.rb +122 -0
  34. data/lib/poseidon/sync_producer.rb +117 -0
  35. data/lib/poseidon/topic_metadata.rb +65 -0
  36. data/lib/poseidon/version.rb +4 -0
  37. data/lib/poseidon.rb +102 -0
  38. data/poseidon.gemspec +24 -0
  39. data/spec/bin/kafka-run-class.sh +65 -0
  40. data/spec/integration/multiple_brokers/round_robin_spec.rb +39 -0
  41. data/spec/integration/multiple_brokers/spec_helper.rb +34 -0
  42. data/spec/integration/simple/compression_spec.rb +20 -0
  43. data/spec/integration/simple/connection_spec.rb +33 -0
  44. data/spec/integration/simple/multiple_brokers_spec.rb +8 -0
  45. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +97 -0
  46. data/spec/integration/simple/spec_helper.rb +17 -0
  47. data/spec/integration/simple/unavailable_broker_spec.rb +77 -0
  48. data/spec/spec_helper.rb +32 -0
  49. data/spec/test_cluster.rb +205 -0
  50. data/spec/unit/broker_pool_spec.rb +77 -0
  51. data/spec/unit/cluster_metadata_spec.rb +41 -0
  52. data/spec/unit/compression_spec.rb +17 -0
  53. data/spec/unit/connection_spec.rb +4 -0
  54. data/spec/unit/fetched_message_spec.rb +11 -0
  55. data/spec/unit/message_conductor_spec.rb +147 -0
  56. data/spec/unit/message_set_spec.rb +42 -0
  57. data/spec/unit/message_spec.rb +112 -0
  58. data/spec/unit/message_to_send_spec.rb +10 -0
  59. data/spec/unit/messages_for_broker_spec.rb +54 -0
  60. data/spec/unit/messages_to_send_batch_spec.rb +25 -0
  61. data/spec/unit/messages_to_send_spec.rb +63 -0
  62. data/spec/unit/partition_consumer_spec.rb +124 -0
  63. data/spec/unit/producer_compression_config_spec.rb +35 -0
  64. data/spec/unit/producer_spec.rb +45 -0
  65. data/spec/unit/protocol_spec.rb +54 -0
  66. data/spec/unit/sync_producer_spec.rb +141 -0
  67. data/spec/unit/topic_metadata_spec.rb +17 -0
  68. metadata +206 -0
@@ -0,0 +1,193 @@
1
+ module Poseidon
2
+ # Provides a high level interface for sending messages to a cluster
3
+ # of Kafka brokers.
4
+ #
5
+ # ## Producer Creation
6
+ #
7
+ # Producer requires a broker list and a client_id:
8
+ #
9
+ # producer = Producer.new(["broker1:port1", "broker2:port1"], "my_client_id",
10
+ # :type => :sync)
11
+ #
12
+ # The broker list is only used to bootstrap our knowledge of the cluster --
13
+ # it does not need to contain every broker. The client id should be unique
14
+ # across all clients in the cluster.
15
+ #
16
+ # ## Sending Messages
17
+ #
18
+ # Messages must have a topic before being sent:
19
+ #
20
+ # messages = []
21
+ # messages << Poseidon::MessageToSend.new("topic1", "Hello Word")
22
+ # messages << Poseidon::MessageToSend.new("user_updates_topic", user.update, user.id)
23
+ # producer.send_messages(messages)
24
+ #
25
+ # ## Producer Types
26
+ #
27
+ # There are two types of producers: sync and async. They can be specified
28
+ # via the :type option when creating a producer.
29
+ #
30
+ # ## Sync Producer
31
+ #
32
+ # The :sync producer blocks while sends messages to the cluster. The more
33
+ # messages you can send per #send_messages call the more efficient it will
34
+ # be.
35
+ #
36
+ # ## Compression
37
+ #
38
+ # When creating the producer you can specify a compression method:
39
+ #
40
+ # producer = Producer.new(["broker1:port1"], "my_client_id",
41
+ # :type => :sync, :compression_codec => :gzip)
42
+ #
43
+ # If you don't specify which topics to compress it will compress all topics.
44
+ # You can specify a set of topics to compress when creating the producer:
45
+ #
46
+ # producer = Producer.new(["broker1:port1"], "my_client_id",
47
+ # :type => :sync, :compression_codec => :gzip,
48
+ # :compressed_topics => ["compressed_topic_1"])
49
+ #
50
+ # ## Partitioning
51
+ #
52
+ # For keyless messages the producer will round-robin messages to all
53
+ # _available_ partitions for at topic. This means that if we are unable to
54
+ # send messages to a specific broker we'll retry sending those to a different
55
+ # broker.
56
+ #
57
+ # However, if you specify a key when creating the message, the producer
58
+ # will choose a partition based on the key and only send to that partition.
59
+ #
60
+ # ## Custom Partitioning
61
+ #
62
+ # You may also specify a custom partitioning scheme for messages by passing
63
+ # a Proc (or any object that responds to #call) to the Producer. The proc
64
+ # must return a Fixnum >= 0 and less-than partition_count.
65
+ #
66
+ # my_partitioner = Proc.new { |key, partition_count| Zlib::crc32(key) % partition_count }
67
+ #
68
+ # producer = Producer.new(["broker1:port1", "broker2:port1"], "my_client_id",
69
+ # :type => :sync, :partitioner => my_partitioner)
70
+ #
71
+ # @api public
72
+ class Producer
73
+ # @api private
74
+ VALID_OPTIONS = [
75
+ :type,
76
+ :compression_codec,
77
+ :compressed_topics,
78
+ :metadata_refresh_interval_ms,
79
+ :partitioner,
80
+ :max_send_retries,
81
+ :retry_backoff_ms,
82
+ :required_acks,
83
+ :ack_timeout_ms
84
+ ]
85
+
86
+ # @api private
87
+ OPTION_DEFAULTS = {
88
+ :type => :sync
89
+ }
90
+
91
+ # Returns a new Producer.
92
+ #
93
+ # @param [Array<String>] brokers An array of brokers in the form "host1:port1"
94
+ #
95
+ # @param [String] client_id A client_id used to indentify the producer.
96
+ #
97
+ # @param [Hash] options
98
+ #
99
+ # @option options [:sync / :async] :type (:sync)
100
+ # Whether we should send messages right away or queue them and send
101
+ # them in the background.
102
+ #
103
+ # @option options [:gzip / :snappy / :none] :compression_codec (:none)
104
+ # Type of compression to use.
105
+ #
106
+ # @option options [Enumberable<String>] :compressed_topics (nil)
107
+ # Topics to compress. If this is not specified we will compress all
108
+ # topics provided that +:compression_codec+ is set.
109
+ #
110
+ # @option options [Integer: Seconds] :metadata_refresh_interval_ms (600_000)
111
+ # How frequently we should update the topic metadata in milliseconds.
112
+ #
113
+ # @option options [#call, nil] :partitioner
114
+ # Object which partitions messages based on key.
115
+ # Responds to #call(key, partition_count).
116
+ #
117
+ # @option options [Integer] :max_send_retries (3)
118
+ # Number of times to retry sending of messages to a leader.
119
+ #
120
+ # @option options [Integer] :retry_backoff_ms (100)
121
+ # The amount of time (in milliseconds) to wait before refreshing the metadata
122
+ # after we are unable to send messages.
123
+ # Number of times to retry sending of messages to a leader.
124
+ #
125
+ # @option options [Integer] :required_acks (0)
126
+ # The number of acks required per request.
127
+ #
128
+ # @option options [Integer] :request_timeout_ms (1500)
129
+ # How long the producer waits for acks.
130
+ #
131
+ # @api public
132
+ def initialize(brokers, client_id, options = {})
133
+ options = options.dup
134
+ validate_options(options)
135
+
136
+ if !brokers.respond_to?(:each)
137
+ raise ArgumentError, "brokers must respond to #each"
138
+ end
139
+ @brokers = brokers
140
+ @client_id = client_id
141
+ @producer = build_producer(options)
142
+ @shutdown = false
143
+ end
144
+
145
+ # Send messages to the cluster.
146
+ #
147
+ # @param [Enumerable<MessageToSend>] messages
148
+ # Messages must have a +topic+ set and may have a +key+ set.
149
+ #
150
+ # @return [Boolean]
151
+ #
152
+ # @api public
153
+ def send_messages(messages)
154
+ raise Errors::ProducerShutdownError if @shutdown
155
+ if !messages.respond_to?(:each)
156
+ raise ArgumentError, "messages must respond to #each"
157
+ end
158
+
159
+ @producer.send_messages(convert_to_messages_objects(messages))
160
+ end
161
+
162
+ # Closes all open connections to brokers
163
+ def shutdown
164
+ @shutdown = true
165
+ @producer.shutdown
166
+ end
167
+
168
+ private
169
+ def validate_options(options)
170
+ unknown_keys = options.keys - VALID_OPTIONS
171
+ if unknown_keys.any?
172
+ raise ArgumentError, "Unknown options: #{unknown_keys.inspect}"
173
+ end
174
+
175
+ @type = options.delete(:type) || :sync
176
+ end
177
+
178
+ def convert_to_messages_objects(messages)
179
+ messages.map do |m|
180
+ Message.new(:value => m.value, :topic => m.topic, :key => m.key)
181
+ end
182
+ end
183
+
184
+ def build_producer(options)
185
+ case @type
186
+ when :sync
187
+ SyncProducer.new(@client_id, @brokers, options)
188
+ when :async
189
+ raise "Not implemented yet"
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,36 @@
1
+ module Poseidon
2
+ # @api private
3
+ class ProducerCompressionConfig
4
+ COMPRESSION_CODEC_MAP = {
5
+ :gzip => Compression::GzipCodec,
6
+ :snappy => Compression::SnappyCodec
7
+ }
8
+
9
+ def initialize(compression_codec, compressed_topics)
10
+ if compression_codec
11
+ @compression_codec = COMPRESSION_CODEC_MAP[compression_codec]
12
+ if @compression_codec.nil?
13
+ raise ArgumentError, "Uknown compression codec: '#{compression_codec}' (accepted: #{COMPRESSION_CODEC_MAP.keys.inspect})"
14
+ end
15
+ else
16
+ @compression_codec = nil
17
+ end
18
+
19
+ if compressed_topics
20
+ @compressed_topics = Set.new(compressed_topics)
21
+ else
22
+ @compressed_topics = nil
23
+ end
24
+ end
25
+
26
+ def compression_codec_for_topic(topic)
27
+ return false if @compression_codec.nil?
28
+
29
+ if @compressed_topics.nil? || (@compressed_topics && @compressed_topics.include?(topic))
30
+ @compression_codec
31
+ else
32
+ false
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,238 @@
1
+ module Poseidon
2
+ module Protocol
3
+ class ProtocolStruct < Struct
4
+ class EncodingError < StandardError;end
5
+ class DecodingError < StandardError;end
6
+
7
+ def self.new(hash)
8
+ klass = super(*hash.keys)
9
+ klass.type_map = hash
10
+ klass
11
+ end
12
+
13
+ def self.type_map=(type_map)
14
+ @type_map = type_map
15
+ end
16
+
17
+ def self.type_map
18
+ @type_map
19
+ end
20
+
21
+ def self.prepend_size
22
+ @prepend_size = true
23
+ self
24
+ end
25
+
26
+ def self.prepend_crc32
27
+ @prepend_crc32 = true
28
+ self
29
+ end
30
+
31
+ def self.truncatable
32
+ @truncatable = true
33
+ self
34
+ end
35
+
36
+ def self.prepend_size?
37
+ @prepend_size
38
+ end
39
+
40
+ def self.prepend_crc32?
41
+ @prepend_crc32
42
+ end
43
+
44
+ def self.truncatable?
45
+ @truncatable
46
+ end
47
+
48
+ def self.size_bound_array(member)
49
+ @size_bound_members ||= []
50
+ @size_bound_members << member
51
+ self
52
+ end
53
+
54
+ def self.size_bound_array?(member)
55
+ @size_bound_members ||= []
56
+ @size_bound_members.include?(member)
57
+ end
58
+
59
+ # Recursively find all objects with errors
60
+ def objects_with_errors
61
+ children = []
62
+ each_pair do |member, value|
63
+ case value
64
+ when Array
65
+ value.each do |v|
66
+ if v.respond_to?(:objects_with_errors)
67
+ children << v
68
+ end
69
+ end
70
+ else
71
+ if value.respond_to?(:objects_with_errors)
72
+ children << value
73
+ end
74
+ end
75
+ end
76
+
77
+ children_with_errors = children.map(&:objects_with_errors).flatten
78
+ if members.include?(:error) && self[:error] != Errors::NO_ERROR_CODE
79
+ children_with_errors + [self]
80
+ else
81
+ children_with_errors
82
+ end
83
+ end
84
+
85
+ def raise_error
86
+ raise error_class if error_class
87
+ end
88
+
89
+ def error_class
90
+ Errors::ERROR_CODES[self[:error]]
91
+ end
92
+
93
+ def raise_error_if_one_exists
94
+ objects_with_errors.each do |object|
95
+ object.raise_error
96
+ end
97
+ end
98
+
99
+ def write(buffer)
100
+ maybe_prepend_size(buffer) do
101
+ maybe_prepend_crc32(buffer) do
102
+ each_pair do |member, value|
103
+ begin
104
+ write_member(buffer, member, value)
105
+ rescue
106
+ raise EncodingError, "Error writting #{member} in #{self.class} (#{$!.class}: #{$!.message})"
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
112
+
113
+ def maybe_prepend_size(buffer)
114
+ if self.class.prepend_size?
115
+ buffer.prepend_size do
116
+ yield
117
+ end
118
+ else
119
+ yield
120
+ end
121
+ end
122
+
123
+ def maybe_prepend_crc32(buffer)
124
+ if self.class.prepend_crc32?
125
+ buffer.prepend_crc32 do
126
+ yield
127
+ end
128
+ else
129
+ yield
130
+ end
131
+ end
132
+
133
+ def write_member(buffer, member, value)
134
+ case type = type_map[member]
135
+ when Array
136
+ buffer.int32(value.size) unless self.class.size_bound_array?(member)
137
+ value.each { |v| write_type(buffer, type.first, v) }
138
+ else
139
+ write_type(buffer, type, value)
140
+ end
141
+ end
142
+
143
+ def write_type(buffer, type, value)
144
+ case type
145
+ when Symbol
146
+ buffer.send(type, value)
147
+ else
148
+ value.write(buffer)
149
+ end
150
+ end
151
+
152
+ # Populate struct from buffer based on members and their type definition.
153
+ def self.read(buffer)
154
+ s = new
155
+ s.read(buffer)
156
+ s
157
+ end
158
+
159
+ def read(buffer)
160
+ if self.class.prepend_size?
161
+ @size = buffer.int32
162
+
163
+ if self.class.prepend_crc32?
164
+ @crc32 = buffer.int32
165
+ @computed_crc32 = [Zlib::crc32(buffer.peek(@size-4))].pack("l>").unpack("l>").first
166
+ if @crc32 != @computed_crc32
167
+ @checksum_failed = true
168
+ end
169
+ expected_bytes_remaining = @size - 4
170
+ else
171
+ expected_bytes_remaining = @size
172
+ end
173
+
174
+
175
+ if self.class.truncatable? && expected_bytes_remaining > buffer.bytes_remaining
176
+ @truncated = true
177
+ return
178
+ end
179
+ end
180
+
181
+ members.each do |member|
182
+ begin
183
+ self[member] = read_member(buffer, member)
184
+ rescue DecodingError
185
+ # Just reraise instead of producing a crazy nested exception
186
+ raise
187
+ rescue
188
+ raise DecodingError, "Error while reading #{member} in #{self.class} (#{$!.class}: #{$!.message}))"
189
+ end
190
+ end
191
+ end
192
+
193
+ def read_member(buffer, member)
194
+ case type = type_map[member]
195
+ when Array
196
+ if self.class.size_bound_array?(member)
197
+ if @size
198
+ array_buffer = ResponseBuffer.new(buffer.read(@size))
199
+ else
200
+ array_buffer = buffer
201
+ end
202
+
203
+ array = []
204
+ while !array_buffer.eof? && (v = read_type(array_buffer, type.first))
205
+ array << v
206
+ end
207
+ array
208
+ else
209
+ buffer.int32.times.map { read_type(buffer, type.first) }
210
+ end
211
+ else
212
+ read_type(buffer, type)
213
+ end
214
+ end
215
+
216
+ def read_type(buffer, type)
217
+ case type
218
+ when Symbol
219
+ buffer.send(type)
220
+ else
221
+ type.read(buffer)
222
+ end
223
+ end
224
+
225
+ def type_map
226
+ self.class.type_map
227
+ end
228
+
229
+ def checksum_failed?
230
+ @checksum_failed
231
+ end
232
+
233
+ def truncated?
234
+ @truncated
235
+ end
236
+ end
237
+ end
238
+ end
@@ -0,0 +1,78 @@
1
+ module Poseidon
2
+ module Protocol
3
+ # RequestBuffer allows you to build a Binary string for API requests
4
+ #
5
+ # API parallels the primitive types described on the wiki, with some
6
+ # sugar for prepending message sizes and checksums.
7
+ # (https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProtocolPrimitiveTypes)
8
+ class RequestBuffer
9
+ def initialize
10
+ @s = ''.encode("ASCII-8BIT")
11
+ end
12
+
13
+ def append(string)
14
+ string = string.dup
15
+ string.force_encoding("ASCII-8BIT")
16
+ @s << string
17
+ nil
18
+ end
19
+
20
+ def int8(int8)
21
+ append([int8].pack("C"))
22
+ end
23
+
24
+ def int16(int16)
25
+ append([int16].pack("s>"))
26
+ end
27
+
28
+ def int32(int32)
29
+ append([int32].pack("l>"))
30
+ end
31
+
32
+ def int64(int64)
33
+ append([int64].pack("q>"))
34
+ end
35
+
36
+ # Add a string
37
+ #
38
+ # @param [String] string
39
+ def string(string)
40
+ if string.nil?
41
+ int16(-1)
42
+ else
43
+ int16(string.size)
44
+ append(string)
45
+ end
46
+ end
47
+
48
+ def bytes(string)
49
+ if string.nil?
50
+ int32(-1)
51
+ else
52
+ int32(string.size)
53
+ append(string)
54
+ end
55
+ end
56
+
57
+ def prepend_crc32
58
+ checksum_pos = @s.size
59
+ @s += " "
60
+ yield
61
+ @s[checksum_pos] = [Zlib::crc32(@s[(checksum_pos+1)..-1])].pack("N")
62
+ nil
63
+ end
64
+
65
+ def prepend_size
66
+ size_pos = @s.size
67
+ @s += " "
68
+ yield
69
+ @s[size_pos] = [(@s.size-1) - size_pos].pack("N")
70
+ nil
71
+ end
72
+
73
+ def to_s
74
+ @s
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,72 @@
1
+ module Poseidon
2
+ module Protocol
3
+ class ResponseBuffer
4
+ def initialize(response)
5
+ @s = response
6
+ @pos = 0
7
+ end
8
+
9
+ def int8
10
+ byte = @s.slice(@pos, 1).unpack("C").first
11
+ @pos += 1
12
+ byte
13
+ end
14
+
15
+ def int16
16
+ short = @s.slice(@pos, 2).unpack("s>").first
17
+ @pos += 2
18
+ short
19
+ end
20
+
21
+ def int32
22
+ int = @s.slice(@pos, 4).unpack("l>").first
23
+ @pos += 4
24
+ int
25
+ end
26
+
27
+ def int64
28
+ long = @s.slice(@pos, 8).unpack("q>").first
29
+ @pos += 8
30
+ long
31
+ end
32
+
33
+ def string
34
+ len = int16
35
+ string = @s.slice(@pos, len)
36
+ @pos += len
37
+ string
38
+ end
39
+
40
+ def read(bytes)
41
+ data = @s.slice(@pos, bytes)
42
+ @pos += bytes
43
+ data
44
+ end
45
+
46
+ def peek(bytes)
47
+ @s.slice(@pos, bytes)
48
+ end
49
+
50
+ def bytes
51
+ n = int32
52
+ if n == -1
53
+ return nil
54
+ else
55
+ read(n)
56
+ end
57
+ end
58
+
59
+ def bytes_remaining
60
+ @s.size - @pos
61
+ end
62
+
63
+ def eof?
64
+ @pos == @s.size
65
+ end
66
+
67
+ def to_s
68
+ @s
69
+ end
70
+ end
71
+ end
72
+ end