poseidon 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. data/.gitignore +19 -0
  2. data/.rspec +2 -0
  3. data/.travis.yml +12 -0
  4. data/.yardopts +8 -0
  5. data/Gemfile +13 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +71 -0
  8. data/Rakefile +17 -0
  9. data/TODO.md +27 -0
  10. data/examples/consumer.rb +18 -0
  11. data/examples/producer.rb +9 -0
  12. data/lib/poseidon/broker_pool.rb +72 -0
  13. data/lib/poseidon/cluster_metadata.rb +63 -0
  14. data/lib/poseidon/compressed_value.rb +23 -0
  15. data/lib/poseidon/compression/gzip_codec.rb +23 -0
  16. data/lib/poseidon/compression/snappy_codec.rb +17 -0
  17. data/lib/poseidon/compression.rb +30 -0
  18. data/lib/poseidon/connection.rb +138 -0
  19. data/lib/poseidon/fetched_message.rb +37 -0
  20. data/lib/poseidon/message.rb +151 -0
  21. data/lib/poseidon/message_conductor.rb +84 -0
  22. data/lib/poseidon/message_set.rb +80 -0
  23. data/lib/poseidon/message_to_send.rb +33 -0
  24. data/lib/poseidon/messages_for_broker.rb +39 -0
  25. data/lib/poseidon/messages_to_send.rb +47 -0
  26. data/lib/poseidon/messages_to_send_batch.rb +27 -0
  27. data/lib/poseidon/partition_consumer.rb +154 -0
  28. data/lib/poseidon/producer.rb +193 -0
  29. data/lib/poseidon/producer_compression_config.rb +36 -0
  30. data/lib/poseidon/protocol/protocol_struct.rb +238 -0
  31. data/lib/poseidon/protocol/request_buffer.rb +78 -0
  32. data/lib/poseidon/protocol/response_buffer.rb +72 -0
  33. data/lib/poseidon/protocol.rb +122 -0
  34. data/lib/poseidon/sync_producer.rb +117 -0
  35. data/lib/poseidon/topic_metadata.rb +65 -0
  36. data/lib/poseidon/version.rb +4 -0
  37. data/lib/poseidon.rb +102 -0
  38. data/poseidon.gemspec +24 -0
  39. data/spec/bin/kafka-run-class.sh +65 -0
  40. data/spec/integration/multiple_brokers/round_robin_spec.rb +39 -0
  41. data/spec/integration/multiple_brokers/spec_helper.rb +34 -0
  42. data/spec/integration/simple/compression_spec.rb +20 -0
  43. data/spec/integration/simple/connection_spec.rb +33 -0
  44. data/spec/integration/simple/multiple_brokers_spec.rb +8 -0
  45. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +97 -0
  46. data/spec/integration/simple/spec_helper.rb +17 -0
  47. data/spec/integration/simple/unavailable_broker_spec.rb +77 -0
  48. data/spec/spec_helper.rb +32 -0
  49. data/spec/test_cluster.rb +205 -0
  50. data/spec/unit/broker_pool_spec.rb +77 -0
  51. data/spec/unit/cluster_metadata_spec.rb +41 -0
  52. data/spec/unit/compression_spec.rb +17 -0
  53. data/spec/unit/connection_spec.rb +4 -0
  54. data/spec/unit/fetched_message_spec.rb +11 -0
  55. data/spec/unit/message_conductor_spec.rb +147 -0
  56. data/spec/unit/message_set_spec.rb +42 -0
  57. data/spec/unit/message_spec.rb +112 -0
  58. data/spec/unit/message_to_send_spec.rb +10 -0
  59. data/spec/unit/messages_for_broker_spec.rb +54 -0
  60. data/spec/unit/messages_to_send_batch_spec.rb +25 -0
  61. data/spec/unit/messages_to_send_spec.rb +63 -0
  62. data/spec/unit/partition_consumer_spec.rb +124 -0
  63. data/spec/unit/producer_compression_config_spec.rb +35 -0
  64. data/spec/unit/producer_spec.rb +45 -0
  65. data/spec/unit/protocol_spec.rb +54 -0
  66. data/spec/unit/sync_producer_spec.rb +141 -0
  67. data/spec/unit/topic_metadata_spec.rb +17 -0
  68. metadata +206 -0
@@ -0,0 +1,193 @@
1
+ module Poseidon
2
+ # Provides a high level interface for sending messages to a cluster
3
+ # of Kafka brokers.
4
+ #
5
+ # ## Producer Creation
6
+ #
7
+ # Producer requires a broker list and a client_id:
8
+ #
9
+ # producer = Producer.new(["broker1:port1", "broker2:port1"], "my_client_id",
10
+ # :type => :sync)
11
+ #
12
+ # The broker list is only used to bootstrap our knowledge of the cluster --
13
+ # it does not need to contain every broker. The client id should be unique
14
+ # across all clients in the cluster.
15
+ #
16
+ # ## Sending Messages
17
+ #
18
+ # Messages must have a topic before being sent:
19
+ #
20
+ # messages = []
21
+ # messages << Poseidon::MessageToSend.new("topic1", "Hello Word")
22
+ # messages << Poseidon::MessageToSend.new("user_updates_topic", user.update, user.id)
23
+ # producer.send_messages(messages)
24
+ #
25
+ # ## Producer Types
26
+ #
27
+ # There are two types of producers: sync and async. They can be specified
28
+ # via the :type option when creating a producer.
29
+ #
30
+ # ## Sync Producer
31
+ #
32
+ # The :sync producer blocks while sends messages to the cluster. The more
33
+ # messages you can send per #send_messages call the more efficient it will
34
+ # be.
35
+ #
36
+ # ## Compression
37
+ #
38
+ # When creating the producer you can specify a compression method:
39
+ #
40
+ # producer = Producer.new(["broker1:port1"], "my_client_id",
41
+ # :type => :sync, :compression_codec => :gzip)
42
+ #
43
+ # If you don't specify which topics to compress it will compress all topics.
44
+ # You can specify a set of topics to compress when creating the producer:
45
+ #
46
+ # producer = Producer.new(["broker1:port1"], "my_client_id",
47
+ # :type => :sync, :compression_codec => :gzip,
48
+ # :compressed_topics => ["compressed_topic_1"])
49
+ #
50
+ # ## Partitioning
51
+ #
52
+ # For keyless messages the producer will round-robin messages to all
53
+ # _available_ partitions for at topic. This means that if we are unable to
54
+ # send messages to a specific broker we'll retry sending those to a different
55
+ # broker.
56
+ #
57
+ # However, if you specify a key when creating the message, the producer
58
+ # will choose a partition based on the key and only send to that partition.
59
+ #
60
+ # ## Custom Partitioning
61
+ #
62
+ # You may also specify a custom partitioning scheme for messages by passing
63
+ # a Proc (or any object that responds to #call) to the Producer. The proc
64
+ # must return a Fixnum >= 0 and less-than partition_count.
65
+ #
66
+ # my_partitioner = Proc.new { |key, partition_count| Zlib::crc32(key) % partition_count }
67
+ #
68
+ # producer = Producer.new(["broker1:port1", "broker2:port1"], "my_client_id",
69
+ # :type => :sync, :partitioner => my_partitioner)
70
+ #
71
+ # @api public
72
+ class Producer
73
+ # @api private
74
+ VALID_OPTIONS = [
75
+ :type,
76
+ :compression_codec,
77
+ :compressed_topics,
78
+ :metadata_refresh_interval_ms,
79
+ :partitioner,
80
+ :max_send_retries,
81
+ :retry_backoff_ms,
82
+ :required_acks,
83
+ :ack_timeout_ms
84
+ ]
85
+
86
+ # @api private
87
+ OPTION_DEFAULTS = {
88
+ :type => :sync
89
+ }
90
+
91
+ # Returns a new Producer.
92
+ #
93
+ # @param [Array<String>] brokers An array of brokers in the form "host1:port1"
94
+ #
95
+ # @param [String] client_id A client_id used to indentify the producer.
96
+ #
97
+ # @param [Hash] options
98
+ #
99
+ # @option options [:sync / :async] :type (:sync)
100
+ # Whether we should send messages right away or queue them and send
101
+ # them in the background.
102
+ #
103
+ # @option options [:gzip / :snappy / :none] :compression_codec (:none)
104
+ # Type of compression to use.
105
+ #
106
+ # @option options [Enumberable<String>] :compressed_topics (nil)
107
+ # Topics to compress. If this is not specified we will compress all
108
+ # topics provided that +:compression_codec+ is set.
109
+ #
110
+ # @option options [Integer: Seconds] :metadata_refresh_interval_ms (600_000)
111
+ # How frequently we should update the topic metadata in milliseconds.
112
+ #
113
+ # @option options [#call, nil] :partitioner
114
+ # Object which partitions messages based on key.
115
+ # Responds to #call(key, partition_count).
116
+ #
117
+ # @option options [Integer] :max_send_retries (3)
118
+ # Number of times to retry sending of messages to a leader.
119
+ #
120
+ # @option options [Integer] :retry_backoff_ms (100)
121
+ # The amount of time (in milliseconds) to wait before refreshing the metadata
122
+ # after we are unable to send messages.
123
+ # Number of times to retry sending of messages to a leader.
124
+ #
125
+ # @option options [Integer] :required_acks (0)
126
+ # The number of acks required per request.
127
+ #
128
+ # @option options [Integer] :request_timeout_ms (1500)
129
+ # How long the producer waits for acks.
130
+ #
131
+ # @api public
132
+ def initialize(brokers, client_id, options = {})
133
+ options = options.dup
134
+ validate_options(options)
135
+
136
+ if !brokers.respond_to?(:each)
137
+ raise ArgumentError, "brokers must respond to #each"
138
+ end
139
+ @brokers = brokers
140
+ @client_id = client_id
141
+ @producer = build_producer(options)
142
+ @shutdown = false
143
+ end
144
+
145
+ # Send messages to the cluster.
146
+ #
147
+ # @param [Enumerable<MessageToSend>] messages
148
+ # Messages must have a +topic+ set and may have a +key+ set.
149
+ #
150
+ # @return [Boolean]
151
+ #
152
+ # @api public
153
+ def send_messages(messages)
154
+ raise Errors::ProducerShutdownError if @shutdown
155
+ if !messages.respond_to?(:each)
156
+ raise ArgumentError, "messages must respond to #each"
157
+ end
158
+
159
+ @producer.send_messages(convert_to_messages_objects(messages))
160
+ end
161
+
162
+ # Closes all open connections to brokers
163
+ def shutdown
164
+ @shutdown = true
165
+ @producer.shutdown
166
+ end
167
+
168
+ private
169
+ def validate_options(options)
170
+ unknown_keys = options.keys - VALID_OPTIONS
171
+ if unknown_keys.any?
172
+ raise ArgumentError, "Unknown options: #{unknown_keys.inspect}"
173
+ end
174
+
175
+ @type = options.delete(:type) || :sync
176
+ end
177
+
178
+ def convert_to_messages_objects(messages)
179
+ messages.map do |m|
180
+ Message.new(:value => m.value, :topic => m.topic, :key => m.key)
181
+ end
182
+ end
183
+
184
+ def build_producer(options)
185
+ case @type
186
+ when :sync
187
+ SyncProducer.new(@client_id, @brokers, options)
188
+ when :async
189
+ raise "Not implemented yet"
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,36 @@
1
+ module Poseidon
2
+ # @api private
3
+ class ProducerCompressionConfig
4
+ COMPRESSION_CODEC_MAP = {
5
+ :gzip => Compression::GzipCodec,
6
+ :snappy => Compression::SnappyCodec
7
+ }
8
+
9
+ def initialize(compression_codec, compressed_topics)
10
+ if compression_codec
11
+ @compression_codec = COMPRESSION_CODEC_MAP[compression_codec]
12
+ if @compression_codec.nil?
13
+ raise ArgumentError, "Uknown compression codec: '#{compression_codec}' (accepted: #{COMPRESSION_CODEC_MAP.keys.inspect})"
14
+ end
15
+ else
16
+ @compression_codec = nil
17
+ end
18
+
19
+ if compressed_topics
20
+ @compressed_topics = Set.new(compressed_topics)
21
+ else
22
+ @compressed_topics = nil
23
+ end
24
+ end
25
+
26
+ def compression_codec_for_topic(topic)
27
+ return false if @compression_codec.nil?
28
+
29
+ if @compressed_topics.nil? || (@compressed_topics && @compressed_topics.include?(topic))
30
+ @compression_codec
31
+ else
32
+ false
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,238 @@
1
+ module Poseidon
2
+ module Protocol
3
+ class ProtocolStruct < Struct
4
+ class EncodingError < StandardError;end
5
+ class DecodingError < StandardError;end
6
+
7
+ def self.new(hash)
8
+ klass = super(*hash.keys)
9
+ klass.type_map = hash
10
+ klass
11
+ end
12
+
13
+ def self.type_map=(type_map)
14
+ @type_map = type_map
15
+ end
16
+
17
+ def self.type_map
18
+ @type_map
19
+ end
20
+
21
+ def self.prepend_size
22
+ @prepend_size = true
23
+ self
24
+ end
25
+
26
+ def self.prepend_crc32
27
+ @prepend_crc32 = true
28
+ self
29
+ end
30
+
31
+ def self.truncatable
32
+ @truncatable = true
33
+ self
34
+ end
35
+
36
+ def self.prepend_size?
37
+ @prepend_size
38
+ end
39
+
40
+ def self.prepend_crc32?
41
+ @prepend_crc32
42
+ end
43
+
44
+ def self.truncatable?
45
+ @truncatable
46
+ end
47
+
48
+ def self.size_bound_array(member)
49
+ @size_bound_members ||= []
50
+ @size_bound_members << member
51
+ self
52
+ end
53
+
54
+ def self.size_bound_array?(member)
55
+ @size_bound_members ||= []
56
+ @size_bound_members.include?(member)
57
+ end
58
+
59
+ # Recursively find all objects with errors
60
+ def objects_with_errors
61
+ children = []
62
+ each_pair do |member, value|
63
+ case value
64
+ when Array
65
+ value.each do |v|
66
+ if v.respond_to?(:objects_with_errors)
67
+ children << v
68
+ end
69
+ end
70
+ else
71
+ if value.respond_to?(:objects_with_errors)
72
+ children << value
73
+ end
74
+ end
75
+ end
76
+
77
+ children_with_errors = children.map(&:objects_with_errors).flatten
78
+ if members.include?(:error) && self[:error] != Errors::NO_ERROR_CODE
79
+ children_with_errors + [self]
80
+ else
81
+ children_with_errors
82
+ end
83
+ end
84
+
85
+ def raise_error
86
+ raise error_class if error_class
87
+ end
88
+
89
+ def error_class
90
+ Errors::ERROR_CODES[self[:error]]
91
+ end
92
+
93
+ def raise_error_if_one_exists
94
+ objects_with_errors.each do |object|
95
+ object.raise_error
96
+ end
97
+ end
98
+
99
+ def write(buffer)
100
+ maybe_prepend_size(buffer) do
101
+ maybe_prepend_crc32(buffer) do
102
+ each_pair do |member, value|
103
+ begin
104
+ write_member(buffer, member, value)
105
+ rescue
106
+ raise EncodingError, "Error writting #{member} in #{self.class} (#{$!.class}: #{$!.message})"
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
112
+
113
+ def maybe_prepend_size(buffer)
114
+ if self.class.prepend_size?
115
+ buffer.prepend_size do
116
+ yield
117
+ end
118
+ else
119
+ yield
120
+ end
121
+ end
122
+
123
+ def maybe_prepend_crc32(buffer)
124
+ if self.class.prepend_crc32?
125
+ buffer.prepend_crc32 do
126
+ yield
127
+ end
128
+ else
129
+ yield
130
+ end
131
+ end
132
+
133
+ def write_member(buffer, member, value)
134
+ case type = type_map[member]
135
+ when Array
136
+ buffer.int32(value.size) unless self.class.size_bound_array?(member)
137
+ value.each { |v| write_type(buffer, type.first, v) }
138
+ else
139
+ write_type(buffer, type, value)
140
+ end
141
+ end
142
+
143
+ def write_type(buffer, type, value)
144
+ case type
145
+ when Symbol
146
+ buffer.send(type, value)
147
+ else
148
+ value.write(buffer)
149
+ end
150
+ end
151
+
152
+ # Populate struct from buffer based on members and their type definition.
153
+ def self.read(buffer)
154
+ s = new
155
+ s.read(buffer)
156
+ s
157
+ end
158
+
159
+ def read(buffer)
160
+ if self.class.prepend_size?
161
+ @size = buffer.int32
162
+
163
+ if self.class.prepend_crc32?
164
+ @crc32 = buffer.int32
165
+ @computed_crc32 = [Zlib::crc32(buffer.peek(@size-4))].pack("l>").unpack("l>").first
166
+ if @crc32 != @computed_crc32
167
+ @checksum_failed = true
168
+ end
169
+ expected_bytes_remaining = @size - 4
170
+ else
171
+ expected_bytes_remaining = @size
172
+ end
173
+
174
+
175
+ if self.class.truncatable? && expected_bytes_remaining > buffer.bytes_remaining
176
+ @truncated = true
177
+ return
178
+ end
179
+ end
180
+
181
+ members.each do |member|
182
+ begin
183
+ self[member] = read_member(buffer, member)
184
+ rescue DecodingError
185
+ # Just reraise instead of producing a crazy nested exception
186
+ raise
187
+ rescue
188
+ raise DecodingError, "Error while reading #{member} in #{self.class} (#{$!.class}: #{$!.message}))"
189
+ end
190
+ end
191
+ end
192
+
193
+ def read_member(buffer, member)
194
+ case type = type_map[member]
195
+ when Array
196
+ if self.class.size_bound_array?(member)
197
+ if @size
198
+ array_buffer = ResponseBuffer.new(buffer.read(@size))
199
+ else
200
+ array_buffer = buffer
201
+ end
202
+
203
+ array = []
204
+ while !array_buffer.eof? && (v = read_type(array_buffer, type.first))
205
+ array << v
206
+ end
207
+ array
208
+ else
209
+ buffer.int32.times.map { read_type(buffer, type.first) }
210
+ end
211
+ else
212
+ read_type(buffer, type)
213
+ end
214
+ end
215
+
216
+ def read_type(buffer, type)
217
+ case type
218
+ when Symbol
219
+ buffer.send(type)
220
+ else
221
+ type.read(buffer)
222
+ end
223
+ end
224
+
225
+ def type_map
226
+ self.class.type_map
227
+ end
228
+
229
+ def checksum_failed?
230
+ @checksum_failed
231
+ end
232
+
233
+ def truncated?
234
+ @truncated
235
+ end
236
+ end
237
+ end
238
+ end
@@ -0,0 +1,78 @@
1
+ module Poseidon
2
+ module Protocol
3
+ # RequestBuffer allows you to build a Binary string for API requests
4
+ #
5
+ # API parallels the primitive types described on the wiki, with some
6
+ # sugar for prepending message sizes and checksums.
7
+ # (https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProtocolPrimitiveTypes)
8
+ class RequestBuffer
9
+ def initialize
10
+ @s = ''.encode("ASCII-8BIT")
11
+ end
12
+
13
+ def append(string)
14
+ string = string.dup
15
+ string.force_encoding("ASCII-8BIT")
16
+ @s << string
17
+ nil
18
+ end
19
+
20
+ def int8(int8)
21
+ append([int8].pack("C"))
22
+ end
23
+
24
+ def int16(int16)
25
+ append([int16].pack("s>"))
26
+ end
27
+
28
+ def int32(int32)
29
+ append([int32].pack("l>"))
30
+ end
31
+
32
+ def int64(int64)
33
+ append([int64].pack("q>"))
34
+ end
35
+
36
+ # Add a string
37
+ #
38
+ # @param [String] string
39
+ def string(string)
40
+ if string.nil?
41
+ int16(-1)
42
+ else
43
+ int16(string.size)
44
+ append(string)
45
+ end
46
+ end
47
+
48
+ def bytes(string)
49
+ if string.nil?
50
+ int32(-1)
51
+ else
52
+ int32(string.size)
53
+ append(string)
54
+ end
55
+ end
56
+
57
+ def prepend_crc32
58
+ checksum_pos = @s.size
59
+ @s += " "
60
+ yield
61
+ @s[checksum_pos] = [Zlib::crc32(@s[(checksum_pos+1)..-1])].pack("N")
62
+ nil
63
+ end
64
+
65
+ def prepend_size
66
+ size_pos = @s.size
67
+ @s += " "
68
+ yield
69
+ @s[size_pos] = [(@s.size-1) - size_pos].pack("N")
70
+ nil
71
+ end
72
+
73
+ def to_s
74
+ @s
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,72 @@
1
+ module Poseidon
2
+ module Protocol
3
+ class ResponseBuffer
4
+ def initialize(response)
5
+ @s = response
6
+ @pos = 0
7
+ end
8
+
9
+ def int8
10
+ byte = @s.slice(@pos, 1).unpack("C").first
11
+ @pos += 1
12
+ byte
13
+ end
14
+
15
+ def int16
16
+ short = @s.slice(@pos, 2).unpack("s>").first
17
+ @pos += 2
18
+ short
19
+ end
20
+
21
+ def int32
22
+ int = @s.slice(@pos, 4).unpack("l>").first
23
+ @pos += 4
24
+ int
25
+ end
26
+
27
+ def int64
28
+ long = @s.slice(@pos, 8).unpack("q>").first
29
+ @pos += 8
30
+ long
31
+ end
32
+
33
+ def string
34
+ len = int16
35
+ string = @s.slice(@pos, len)
36
+ @pos += len
37
+ string
38
+ end
39
+
40
+ def read(bytes)
41
+ data = @s.slice(@pos, bytes)
42
+ @pos += bytes
43
+ data
44
+ end
45
+
46
+ def peek(bytes)
47
+ @s.slice(@pos, bytes)
48
+ end
49
+
50
+ def bytes
51
+ n = int32
52
+ if n == -1
53
+ return nil
54
+ else
55
+ read(n)
56
+ end
57
+ end
58
+
59
+ def bytes_remaining
60
+ @s.size - @pos
61
+ end
62
+
63
+ def eof?
64
+ @pos == @s.size
65
+ end
66
+
67
+ def to_s
68
+ @s
69
+ end
70
+ end
71
+ end
72
+ end