poseidon 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. data/.gitignore +19 -0
  2. data/.rspec +2 -0
  3. data/.travis.yml +12 -0
  4. data/.yardopts +8 -0
  5. data/Gemfile +13 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +71 -0
  8. data/Rakefile +17 -0
  9. data/TODO.md +27 -0
  10. data/examples/consumer.rb +18 -0
  11. data/examples/producer.rb +9 -0
  12. data/lib/poseidon/broker_pool.rb +72 -0
  13. data/lib/poseidon/cluster_metadata.rb +63 -0
  14. data/lib/poseidon/compressed_value.rb +23 -0
  15. data/lib/poseidon/compression/gzip_codec.rb +23 -0
  16. data/lib/poseidon/compression/snappy_codec.rb +17 -0
  17. data/lib/poseidon/compression.rb +30 -0
  18. data/lib/poseidon/connection.rb +138 -0
  19. data/lib/poseidon/fetched_message.rb +37 -0
  20. data/lib/poseidon/message.rb +151 -0
  21. data/lib/poseidon/message_conductor.rb +84 -0
  22. data/lib/poseidon/message_set.rb +80 -0
  23. data/lib/poseidon/message_to_send.rb +33 -0
  24. data/lib/poseidon/messages_for_broker.rb +39 -0
  25. data/lib/poseidon/messages_to_send.rb +47 -0
  26. data/lib/poseidon/messages_to_send_batch.rb +27 -0
  27. data/lib/poseidon/partition_consumer.rb +154 -0
  28. data/lib/poseidon/producer.rb +193 -0
  29. data/lib/poseidon/producer_compression_config.rb +36 -0
  30. data/lib/poseidon/protocol/protocol_struct.rb +238 -0
  31. data/lib/poseidon/protocol/request_buffer.rb +78 -0
  32. data/lib/poseidon/protocol/response_buffer.rb +72 -0
  33. data/lib/poseidon/protocol.rb +122 -0
  34. data/lib/poseidon/sync_producer.rb +117 -0
  35. data/lib/poseidon/topic_metadata.rb +65 -0
  36. data/lib/poseidon/version.rb +4 -0
  37. data/lib/poseidon.rb +102 -0
  38. data/poseidon.gemspec +24 -0
  39. data/spec/bin/kafka-run-class.sh +65 -0
  40. data/spec/integration/multiple_brokers/round_robin_spec.rb +39 -0
  41. data/spec/integration/multiple_brokers/spec_helper.rb +34 -0
  42. data/spec/integration/simple/compression_spec.rb +20 -0
  43. data/spec/integration/simple/connection_spec.rb +33 -0
  44. data/spec/integration/simple/multiple_brokers_spec.rb +8 -0
  45. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +97 -0
  46. data/spec/integration/simple/spec_helper.rb +17 -0
  47. data/spec/integration/simple/unavailable_broker_spec.rb +77 -0
  48. data/spec/spec_helper.rb +32 -0
  49. data/spec/test_cluster.rb +205 -0
  50. data/spec/unit/broker_pool_spec.rb +77 -0
  51. data/spec/unit/cluster_metadata_spec.rb +41 -0
  52. data/spec/unit/compression_spec.rb +17 -0
  53. data/spec/unit/connection_spec.rb +4 -0
  54. data/spec/unit/fetched_message_spec.rb +11 -0
  55. data/spec/unit/message_conductor_spec.rb +147 -0
  56. data/spec/unit/message_set_spec.rb +42 -0
  57. data/spec/unit/message_spec.rb +112 -0
  58. data/spec/unit/message_to_send_spec.rb +10 -0
  59. data/spec/unit/messages_for_broker_spec.rb +54 -0
  60. data/spec/unit/messages_to_send_batch_spec.rb +25 -0
  61. data/spec/unit/messages_to_send_spec.rb +63 -0
  62. data/spec/unit/partition_consumer_spec.rb +124 -0
  63. data/spec/unit/producer_compression_config_spec.rb +35 -0
  64. data/spec/unit/producer_spec.rb +45 -0
  65. data/spec/unit/protocol_spec.rb +54 -0
  66. data/spec/unit/sync_producer_spec.rb +141 -0
  67. data/spec/unit/topic_metadata_spec.rb +17 -0
  68. metadata +206 -0
@@ -0,0 +1,151 @@
1
+ module Poseidon
2
+ # The Message class is used by both Producer and Consumer classes.
3
+ #
4
+ # = Basic usage
5
+ #
6
+ # message = Poseidon::Message.new(:value => "hello",
7
+ # :key => "user:123",
8
+ # :topic => "salutations")
9
+ #
10
+ # = Sending a message
11
+ #
12
+ # When sending a message you must set the topic for the message, this
13
+ # can be done during creation or afterwards.
14
+ #
15
+ # = Compression
16
+ #
17
+ # In normal usage you should never have to worry about compressed
18
+ # Message objects. When producing the producer takes care of
19
+ # compressing the messages and when fetching the fetcher will
20
+ # return them decompressed.
21
+ #
22
+ # @api private
23
+ class Message
24
+ # Last 3 bits are used to indicate compression
25
+ COMPRESSION_MASK = 0x7
26
+ MAGIC_TYPE = 0
27
+
28
+ # Build a new Message object from its binary representation
29
+ #
30
+ # @param [ResponseBuffer] buffer
31
+ # a response buffer containing binary data representing a message.
32
+ #
33
+ # @return [Message]
34
+ def self.read(buffer)
35
+ m = Message.new
36
+ m.struct = Protocol::MessageWithOffsetStruct.read(buffer)
37
+
38
+ # Return nil if the message is truncated.
39
+ if m.struct.message.truncated?
40
+ return nil
41
+ end
42
+
43
+ if m.struct.message.checksum_failed?
44
+ raise Errors::ChecksumError
45
+ end
46
+ m
47
+ end
48
+
49
+ attr_accessor :struct, :topic
50
+
51
+ # Create a new message object
52
+ #
53
+ # @param [Hash] options
54
+ #
55
+ # @option options [String] :value (nil)
56
+ # The messages value. Optional.
57
+ #
58
+ # @option options [String] :key (nil)
59
+ # The messages key. Optional.
60
+ #
61
+ # @option options [String] :topic (nil)
62
+ # The topic we should send this message to. Optional.
63
+ #
64
+ # @option options [String] :attributes (nil)
65
+ # Attributes field for the message currently only idicates
66
+ # whether or not the message is compressed.
67
+ def initialize(options = {})
68
+ build_struct(options)
69
+
70
+ @topic = options.delete(:topic)
71
+
72
+ if options.any?
73
+ raise ArgumentError, "Unknown options: #{options.keys.inspect}"
74
+ end
75
+ end
76
+
77
+ def ==(other)
78
+ eql?(other)
79
+ end
80
+
81
+ def eql?(other)
82
+ struct.eql?(other.struct)
83
+ end
84
+
85
+ def objects_with_errors
86
+ struct.objects_with_errors
87
+ end
88
+
89
+ # Write a binary representation of the message to buffer
90
+ #
91
+ # @param [RequestBuffer] buffer
92
+ # @return [nil]
93
+ def write(buffer)
94
+ @struct.write(buffer)
95
+ nil
96
+ end
97
+
98
+ # @return [String] the Message's key
99
+ def key
100
+ @struct.message.key
101
+ end
102
+
103
+ # @return [String] the Message's value
104
+ def value
105
+ @struct.message.value
106
+ end
107
+
108
+ # @return [Integer] the Message's offset
109
+ def offset
110
+ @struct.offset
111
+ end
112
+
113
+ # Is the value compressed?
114
+ #
115
+ # @return [Boolean]
116
+ def compressed?
117
+ compression_codec_id > 0
118
+ end
119
+
120
+ # Decompressed value
121
+ #
122
+ # @return [String] decompressed value
123
+ def decompressed_value
124
+ compression_codec.decompress(value)
125
+ end
126
+
127
+ private
128
+ def attributes
129
+ @struct.message.attributes
130
+ end
131
+
132
+ def compression_codec
133
+ Compression.find_codec(compression_codec_id)
134
+ end
135
+
136
+ def compression_codec_id
137
+ attributes & COMPRESSION_MASK
138
+ end
139
+
140
+ def build_struct(options)
141
+ message_struct = Protocol::MessageStruct.new(
142
+ MAGIC_TYPE,
143
+ options.delete(:attributes) || 0,
144
+ options.delete(:key),
145
+ options.delete(:value)
146
+ )
147
+ struct = Protocol::MessageWithOffsetStruct.new(options.delete(:offset) || 0, message_struct)
148
+ self.struct = struct
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,84 @@
1
+ module Poseidon
2
+ # @api private
3
+ class MessageConductor
4
+ NO_PARTITION = -1
5
+ NO_BROKER = -1
6
+ # Create a new message conductor
7
+ #
8
+ # @param [Hash<String,TopicMetadata>] topics_metadata
9
+ # Metadata for all topics this conductor may receive.
10
+ # @param [Object] partitioner
11
+ # Custom partitioner
12
+ def initialize(cluster_metadata, partitioner)
13
+ @cluster_metadata = cluster_metadata
14
+ @partitioner = partitioner
15
+ @partition_counter = -1
16
+ end
17
+
18
+ # Determines which partition a message should be sent to.
19
+ #
20
+ # @param [String] topic
21
+ # Topic we are sending this message to
22
+ #
23
+ # @param [Object] key
24
+ # Key for this message, may be nil
25
+ #
26
+ # @return [Integer,Integer]
27
+ # partition_id and broker_id to which this message should be sent
28
+ def destination(topic, key = nil)
29
+ topic_metadata = topic_metadatas[topic]
30
+ if topic_metadata && topic_metadata.leader_available?
31
+ partition_id = determine_partition(topic_metadata, key)
32
+ broker_id = topic_metadata.partitions[partition_id].leader || NO_BROKER
33
+ else
34
+ partition_id = NO_PARTITION
35
+ broker_id = NO_BROKER
36
+ end
37
+
38
+ return partition_id, broker_id
39
+ end
40
+
41
+ private
42
+
43
+ def topic_metadatas
44
+ @cluster_metadata.topic_metadata
45
+ end
46
+
47
+ def determine_partition(topic_metadata, key)
48
+ if key
49
+ partition_for_keyed_message(topic_metadata, key)
50
+ else
51
+ partition_for_keyless_message(topic_metadata)
52
+ end
53
+ end
54
+
55
+ def partition_for_keyed_message(topic_metadata, key)
56
+ partition_count = topic_metadata.partition_count
57
+ if @partitioner
58
+ partition_id = @partitioner.call(key, partition_count)
59
+
60
+ if partition_id >= partition_count
61
+ raise Errors::InvalidPartitionError, "partitioner (#{@partitioner.inspect}) requested #{partition_id} while only #{partition_count} partitions exist"
62
+ end
63
+ else
64
+ partition_id = Zlib::crc32(key) % partition_count
65
+ end
66
+
67
+ partition_id
68
+ end
69
+
70
+ def partition_for_keyless_message(topic_metadata)
71
+ partition_count = topic_metadata.available_partition_count
72
+
73
+ if partition_count > 0
74
+ next_partition_counter % partition_count
75
+ else
76
+ NO_PARTITION
77
+ end
78
+ end
79
+
80
+ def next_partition_counter
81
+ @partition_counter += 1
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,80 @@
1
+ module Poseidon
2
+ # @api private
3
+ class MessageSet
4
+ # Build a message set object from a binary encoded message set
5
+ #
6
+ # @param [String] string binary encoded message set
7
+ # @return [MessageSet]
8
+
9
+
10
+ def self.read(buffer)
11
+ ms = MessageSet.new
12
+ ms.struct = Protocol::MessageSetStructWithSize.read(buffer)
13
+ ms
14
+ end
15
+
16
+ def self.read_without_size(buffer)
17
+ ms = MessageSet.new
18
+ ms.struct = Protocol::MessageSetStruct.read(buffer)
19
+ ms
20
+ end
21
+
22
+ attr_accessor :struct
23
+ def initialize(messages = [])
24
+ self.struct = Protocol::MessageSetStructWithSize.new(messages)
25
+ end
26
+
27
+ def ==(other)
28
+ eql?(other)
29
+ end
30
+
31
+ def eql?(other)
32
+ struct.eql?(other.struct)
33
+ end
34
+
35
+ def objects_with_errors
36
+ struct.objects_with_errors
37
+ end
38
+
39
+ def write(buffer)
40
+ struct.write(buffer)
41
+ end
42
+
43
+ def <<(message)
44
+ struct.messages << message
45
+ end
46
+
47
+ def messages
48
+ struct.messages
49
+ end
50
+
51
+ def compress(codec)
52
+ MessageSet.new([to_compressed_message(codec)])
53
+ end
54
+
55
+ # Builds an array of Message objects from the MessageStruct objects.
56
+ # Decompressing messages if necessary.
57
+ #
58
+ # @return [Array<Message>]
59
+ def flatten
60
+ messages = struct.messages.map do |message|
61
+ if message.compressed?
62
+ s = message.decompressed_value
63
+ MessageSet.read_without_size(Protocol::ResponseBuffer.new(s)).flatten
64
+ else
65
+ message
66
+ end
67
+ end.flatten
68
+ end
69
+
70
+ private
71
+ def to_compressed_message(codec)
72
+ buffer = Protocol::RequestBuffer.new
73
+ struct.write(buffer)
74
+
75
+ value = codec.compress(buffer.to_s[4..-1])
76
+ Message.new(:value => value, :attributes => codec.codec_id)
77
+ end
78
+
79
+ end
80
+ end
@@ -0,0 +1,33 @@
1
+ module Poseidon
2
+ # A message we want to send to Kafka. Comprised of the
3
+ # topic we want to send it to, the body of the message
4
+ # and an optional key.
5
+ #
6
+ # mts = Poseidon::MessageToSend.new("topic", "value", "opt_key")
7
+ #
8
+ # @api public
9
+ class MessageToSend
10
+ attr_reader :value, :key, :topic
11
+
12
+ # Create a new message for sending to a Kafka broker.
13
+ #
14
+ # @param [String] topic
15
+ # Topic this message should be sent to.
16
+ #
17
+ # @param [String] value
18
+ # Value of the message we want to send.
19
+ #
20
+ # @param [String] key
21
+ # Optional. Message's key, used to route a message
22
+ # to a specific broker. Otherwise, messages will be
23
+ # sent to brokers in a round-robin manner.
24
+ #
25
+ # @api public
26
+ def initialize(topic, value, key = nil)
27
+ raise ArgumentError, "Must provide a non-nil topic" if topic.nil?
28
+ @topic = topic
29
+ @value = value
30
+ @key = key
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,39 @@
1
+ module Poseidon
2
+ # Messages that should be sent to a particular broker.
3
+ # @api private
4
+ class MessagesForBroker
5
+ attr_reader :broker_id, :messages
6
+
7
+ def initialize(broker_id)
8
+ @broker_id = broker_id
9
+ @topics = {}
10
+ @messages = []
11
+ end
12
+
13
+ # Add a messages for this broker
14
+ def add(message, partition_id)
15
+ @messages << message
16
+
17
+ @topics[message.topic] ||= {}
18
+ @topics[message.topic][partition_id] ||= []
19
+ @topics[message.topic][partition_id] << message
20
+ end
21
+
22
+ # Build protocol objects for this broker!
23
+ def build_protocol_objects(compression_config)
24
+ @topics.map do |topic, messages_by_partition|
25
+ codec = compression_config.compression_codec_for_topic(topic)
26
+
27
+ messages_for_partitions = messages_by_partition.map do |partition, messages|
28
+ message_set = MessageSet.new(messages)
29
+ if codec
30
+ Protocol::MessagesForPartition.new(partition, message_set.compress(codec))
31
+ else
32
+ Protocol::MessagesForPartition.new(partition, message_set)
33
+ end
34
+ end
35
+ Protocol::MessagesForTopic.new(topic, messages_for_partitions)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,47 @@
1
+ module Poseidon
2
+ # A set of messages that we need to send to the cluster. May be used
3
+ # across multiple send attempts.
4
+ #
5
+ # If a custom partitioner is not used than a messages are distributed
6
+ # in round robin fasion to each partition with an available leader.
7
+ #
8
+ # @api private
9
+ class MessagesToSend
10
+ class InvalidPartitionError < StandardError; end
11
+ attr_reader :topic_set, :messages
12
+
13
+ # Create a new messages to send object.
14
+ #
15
+ # @param [Array<Message>] messages List of messages we want to send.
16
+ # @param [ClusterMetadta] cluster_metadata
17
+ def initialize(messages, cluster_metadata)
18
+ @messages = messages
19
+ @cluster_metadata = cluster_metadata
20
+
21
+ build_topic_set
22
+ end
23
+
24
+ def needs_metadata?
25
+ !@cluster_metadata.have_metadata_for_topics?(topic_set)
26
+ end
27
+
28
+ def messages_for_brokers(message_conductor)
29
+ topic_metadatas = @cluster_metadata.metadata_for_topics(topic_set)
30
+ MessagesToSendBatch.new(@messages, message_conductor).messages_for_brokers
31
+ end
32
+
33
+ def successfully_sent(messages_for_broker)
34
+ @messages -= messages_for_broker.messages
35
+ end
36
+
37
+ def all_sent?
38
+ !@messages.any?
39
+ end
40
+
41
+ private
42
+ def build_topic_set
43
+ @topic_set = Set.new
44
+ @messages.each { |m| @topic_set.add(m.topic) }
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,27 @@
1
+ module Poseidon
2
+ # A batch of messages for an individual send attempt to the cluster.
3
+ # @api private
4
+ class MessagesToSendBatch
5
+ def initialize(messages, message_conductor)
6
+ @messages = messages
7
+ @message_conductor = message_conductor
8
+ end
9
+
10
+ # Groups messages by broker and preps them for transmission.
11
+ #
12
+ # @return [Array<MessagesForBroker>]
13
+ def messages_for_brokers
14
+ messages_for_broker_ids = {}
15
+ @messages.each do |message|
16
+ partition_id, broker_id = @message_conductor.destination(message.topic,
17
+ message.key)
18
+
19
+ # Create a nested hash to group messages by broker_id, topic, partition.
20
+ messages_for_broker_ids[broker_id] ||= MessagesForBroker.new(broker_id)
21
+ messages_for_broker_ids[broker_id].add(message, partition_id)
22
+ end
23
+
24
+ messages_for_broker_ids.values
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,154 @@
1
+ module Poseidon
2
+ # A primitive Kafka Consumer which operates on a specific broker, topic and partition.
3
+ #
4
+ # Example in the README.
5
+ #
6
+ # @api public
7
+ class PartitionConsumer
8
+ # The offset of the latest message the broker recieved for this partition.
9
+ # Useful for knowning how far behind the consumer is. This value is only
10
+ # as recent as the last fetch call.
11
+ attr_reader :highwater_mark
12
+
13
+ # Create a new consumer which reads the specified topic and partition from
14
+ # the host.
15
+ #
16
+ # @param [String] client_id Used to identify this client should be unique.
17
+ # @param [String] host
18
+ # @param [Integer] port
19
+ # @param [String] topic Topic to read from
20
+ # @param [Integer] partition Partitions are zero indexed.
21
+ # @param [Integer,Symbol] offset
22
+ # Offset to start reading from.
23
+ # There are a couple special offsets which can be passed as symbols:
24
+ # :earliest_offset Start reading from the first offset the server has.
25
+ # :latest_offset Start reading from the latest offset the server has.
26
+ #
27
+ # @param [Hash] options
28
+ # Theses options can all be overridden in each individual fetch command.
29
+ #
30
+ # @option options [:max_bytes] Maximum number of bytes to fetch
31
+ # Default: 1048576 (1MB)
32
+ # @option options [:max_wait_ms]
33
+ # How long to block until the server sends us data.
34
+ # Default: 100 (100ms)
35
+ # @option options [:min_bytes] Smallest amount of data the server should send us.
36
+ # Default: 0 (Send us data as soon as it is ready)
37
+ #
38
+ # @api public
39
+ def initialize(client_id, host, port, topic, partition, offset, options = {})
40
+ @connection = Connection.new(host, port, client_id)
41
+ @topic = topic
42
+ @partition = partition
43
+ if Symbol === offset
44
+ raise ArgumentError, "Unknown special offset type: #{offset}" unless [:earliest_offset, :latest_offset].include?(offset)
45
+ end
46
+ @offset = offset
47
+ handle_options(options)
48
+ end
49
+
50
+ # Fetch messages from the broker.
51
+ #
52
+ # @param [Hash] options
53
+ #
54
+ # @option options [:max_bytes]
55
+ # Maximum number of bytes to fetch
56
+ # @option options [:max_wait_ms]
57
+ # How long to block until the server sends us data.
58
+ # @option options [:min_bytes]
59
+ # Smallest amount of data the server should send us.
60
+ #
61
+ # @api public
62
+ def fetch(options = {})
63
+ fetch_max_wait = options[:max_wait] || max_wait_ms
64
+ fetch_max_bytes = options[:max_bytes] || max_bytes
65
+ fetch_min_bytes = options[:min_bytes] || min_bytes
66
+
67
+ topic_fetches = build_topic_fetch_request(fetch_max_bytes)
68
+ fetch_response = @connection.fetch(fetch_max_wait, fetch_min_bytes, topic_fetches)
69
+ topic_response = fetch_response.topic_fetch_responses.first
70
+ partition_response = topic_response.partition_fetch_responses.first
71
+ if partition_response.error != Errors::NO_ERROR_CODE
72
+ raise Errors::ERROR_CODES[partition_response.error]
73
+ else
74
+ @highwater_mark = partition_response.highwater_mark_offset
75
+ messages = partition_response.message_set.flatten.map do |m|
76
+ FetchedMessage.new(topic_response.topic, m.value, m.key, m.offset)
77
+ end
78
+ if messages.any?
79
+ @offset = messages.last.offset + 1
80
+ end
81
+ messages
82
+ end
83
+ end
84
+
85
+ # @return [Integer] next offset we will fetch
86
+ #
87
+ # @api public
88
+ def next_offset
89
+ resolve_offset_if_necessary
90
+ @offset
91
+ end
92
+
93
+ private
94
+ def handle_options(options)
95
+ @max_bytes = options.delete(:max_bytes) || 1024*1024
96
+ @min_bytes = options.delete(:min_bytes) || 0
97
+ @max_wait_ms = options.delete(:max_wait_ms) || 10_000
98
+ if options.keys.any?
99
+ raise ArgumentError, "Unknown options: #{options.keys.inspect}"
100
+ end
101
+ end
102
+
103
+ def max_wait_ms
104
+ @max_wait_ms
105
+ end
106
+
107
+ def max_bytes
108
+ @max_bytes
109
+ end
110
+
111
+ def min_bytes
112
+ @min_bytes
113
+ end
114
+
115
+ def resolve_offset_if_necessary
116
+ return unless Symbol === @offset || @offset < 0
117
+
118
+ if @offset == :earliest_offset
119
+ @offset = -2
120
+ elsif @offset == :latest_offset
121
+ @offset = -1
122
+ end
123
+
124
+ topic_offset_responses = @connection.offset(build_topic_offset_request)
125
+ partition_offsets = topic_offset_responses.first.partition_offsets
126
+ if partition_offsets.first.error != Errors::NO_ERROR_CODE
127
+ raise Errors::ERROR_CODES[partition_offsets.first.error]
128
+ end
129
+
130
+ offset_struct = partition_offsets.first.offsets.first
131
+ if offset_struct.nil?
132
+ @offset = 0
133
+ else
134
+ @offset = offset_struct.offset
135
+ end
136
+ end
137
+
138
+ def build_topic_offset_request
139
+ partition_offset_request = Protocol::PartitionOffsetRequest.new(
140
+ @partition,
141
+ @offset,
142
+ max_number_of_offsets = 1)
143
+
144
+ [Protocol::TopicOffsetRequest.new(@topic, [partition_offset_request])]
145
+ end
146
+
147
+ def build_topic_fetch_request(max_bytes)
148
+ partition_fetches = [Protocol::PartitionFetch.new(@partition,
149
+ next_offset,
150
+ max_bytes)]
151
+ topic_fetches = [Protocol::TopicFetch.new(@topic, partition_fetches)]
152
+ end
153
+ end
154
+ end