codeclimate-poseidon 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/.yardopts +8 -0
  6. data/CHANGES.md +31 -0
  7. data/Gemfile +13 -0
  8. data/LICENSE.txt +22 -0
  9. data/README.md +72 -0
  10. data/Rakefile +20 -0
  11. data/TODO.md +27 -0
  12. data/examples/consumer.rb +18 -0
  13. data/examples/producer.rb +9 -0
  14. data/lib/poseidon.rb +120 -0
  15. data/lib/poseidon/broker_pool.rb +86 -0
  16. data/lib/poseidon/cluster_metadata.rb +94 -0
  17. data/lib/poseidon/compressed_value.rb +23 -0
  18. data/lib/poseidon/compression.rb +30 -0
  19. data/lib/poseidon/compression/gzip_codec.rb +23 -0
  20. data/lib/poseidon/compression/snappy_codec.rb +29 -0
  21. data/lib/poseidon/connection.rb +169 -0
  22. data/lib/poseidon/fetched_message.rb +37 -0
  23. data/lib/poseidon/message.rb +151 -0
  24. data/lib/poseidon/message_conductor.rb +86 -0
  25. data/lib/poseidon/message_set.rb +80 -0
  26. data/lib/poseidon/message_to_send.rb +33 -0
  27. data/lib/poseidon/messages_for_broker.rb +56 -0
  28. data/lib/poseidon/messages_to_send.rb +47 -0
  29. data/lib/poseidon/messages_to_send_batch.rb +27 -0
  30. data/lib/poseidon/partition_consumer.rb +225 -0
  31. data/lib/poseidon/producer.rb +199 -0
  32. data/lib/poseidon/producer_compression_config.rb +37 -0
  33. data/lib/poseidon/protocol.rb +122 -0
  34. data/lib/poseidon/protocol/protocol_struct.rb +256 -0
  35. data/lib/poseidon/protocol/request_buffer.rb +77 -0
  36. data/lib/poseidon/protocol/response_buffer.rb +72 -0
  37. data/lib/poseidon/sync_producer.rb +161 -0
  38. data/lib/poseidon/topic_metadata.rb +89 -0
  39. data/lib/poseidon/version.rb +4 -0
  40. data/log/.gitkeep +0 -0
  41. data/poseidon.gemspec +27 -0
  42. data/spec/integration/multiple_brokers/consumer_spec.rb +45 -0
  43. data/spec/integration/multiple_brokers/metadata_failures_spec.rb +144 -0
  44. data/spec/integration/multiple_brokers/rebalance_spec.rb +69 -0
  45. data/spec/integration/multiple_brokers/round_robin_spec.rb +41 -0
  46. data/spec/integration/multiple_brokers/spec_helper.rb +60 -0
  47. data/spec/integration/simple/compression_spec.rb +23 -0
  48. data/spec/integration/simple/connection_spec.rb +35 -0
  49. data/spec/integration/simple/multiple_brokers_spec.rb +10 -0
  50. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +121 -0
  51. data/spec/integration/simple/spec_helper.rb +16 -0
  52. data/spec/integration/simple/truncated_messages_spec.rb +46 -0
  53. data/spec/integration/simple/unavailable_broker_spec.rb +72 -0
  54. data/spec/spec_helper.rb +32 -0
  55. data/spec/test_cluster.rb +211 -0
  56. data/spec/unit/broker_pool_spec.rb +98 -0
  57. data/spec/unit/cluster_metadata_spec.rb +46 -0
  58. data/spec/unit/compression/gzip_codec_spec.rb +34 -0
  59. data/spec/unit/compression/snappy_codec_spec.rb +49 -0
  60. data/spec/unit/compression_spec.rb +17 -0
  61. data/spec/unit/connection_spec.rb +4 -0
  62. data/spec/unit/fetched_message_spec.rb +11 -0
  63. data/spec/unit/message_conductor_spec.rb +164 -0
  64. data/spec/unit/message_set_spec.rb +42 -0
  65. data/spec/unit/message_spec.rb +129 -0
  66. data/spec/unit/message_to_send_spec.rb +10 -0
  67. data/spec/unit/messages_for_broker_spec.rb +54 -0
  68. data/spec/unit/messages_to_send_batch_spec.rb +25 -0
  69. data/spec/unit/messages_to_send_spec.rb +63 -0
  70. data/spec/unit/partition_consumer_spec.rb +142 -0
  71. data/spec/unit/producer_compression_config_spec.rb +42 -0
  72. data/spec/unit/producer_spec.rb +51 -0
  73. data/spec/unit/protocol/request_buffer_spec.rb +16 -0
  74. data/spec/unit/protocol_spec.rb +54 -0
  75. data/spec/unit/sync_producer_spec.rb +156 -0
  76. data/spec/unit/topic_metadata_spec.rb +43 -0
  77. metadata +225 -0
@@ -0,0 +1,94 @@
1
+ module Poseidon
2
+ # Encapsulates what we known about brokers, topics and partitions
3
+ # from Metadata API calls.
4
+ #
5
+ # @api private
6
+ class ClusterMetadata
7
+ attr_reader :brokers, :last_refreshed_at, :topic_metadata
8
+ def initialize
9
+ @brokers = {}
10
+ @topic_metadata = {}
11
+ @last_refreshed_at = nil
12
+ end
13
+
14
+ # Update what we know about the cluter based on MetadataResponse
15
+ #
16
+ # @param [MetadataResponse] topic_metadata_response
17
+ # @return nil
18
+ def update(topic_metadata_response)
19
+ update_brokers(topic_metadata_response.brokers)
20
+ update_topics(topic_metadata_response.topics)
21
+
22
+ @last_refreshed_at = Time.now
23
+ nil
24
+ end
25
+
26
+ # Do we have metadata for these topics already?
27
+ #
28
+ # @param [Enumberable<String>] topic_names A set of topics.
29
+ # @return [Boolean] true if we have metadata for all +topic_names+, otherwise false.
30
+ def have_metadata_for_topics?(topic_names)
31
+ topic_names.all? { |topic| @topic_metadata[topic] }
32
+ end
33
+
34
+ # Provides metadata for each topic
35
+ #
36
+ # @param [Enumerable<String>] topic_names Topics we should return metadata for
37
+ # @return [Hash<String,TopicMetadata>]
38
+ def metadata_for_topics(topic_names)
39
+ Hash[topic_names.map { |name| [name, @topic_metadata[name]] }]
40
+ end
41
+
42
+ # Provides a Broker object for +broker_id+. This corresponds to the
43
+ # broker ids in the TopicMetadata objects.
44
+ #
45
+ # @param [Integer] broker_id Broker id
46
+ def broker(broker_id)
47
+ @brokers[broker_id]
48
+ end
49
+
50
+ # Return lead broker for topic and partition
51
+ def lead_broker_for_partition(topic_name, partition)
52
+ broker_id = @topic_metadata[topic_name].partition_leader(partition)
53
+ if broker_id
54
+ @brokers[broker_id]
55
+ else
56
+ nil
57
+ end
58
+ end
59
+
60
+ def topics
61
+ @topic_metadata.keys
62
+ end
63
+
64
+ def to_s
65
+ out = ""
66
+ @topic_metadata.each do |topic, metadata|
67
+ out << "Topic: #{topic}"
68
+ out << "-------------------------"
69
+ out << metadata.to_s
70
+ end
71
+ out
72
+ end
73
+
74
+ def reset
75
+ @brokers = {}
76
+ @topic_metadata = {}
77
+ end
78
+
79
+ private
80
+ def update_topics(topics)
81
+ topics.each do |topic|
82
+ if topic.exists?
83
+ @topic_metadata[topic.name] = topic
84
+ end
85
+ end
86
+ end
87
+
88
+ def update_brokers(brokers)
89
+ brokers.each do |broker|
90
+ @brokers[broker.id] = broker
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,23 @@
1
+ module Poseidon
2
+ # @api private
3
+ class CompressedValue
4
+ def initialize(value, codec_id)
5
+ @value = value
6
+ @codec_id = codec_id
7
+ end
8
+
9
+ # Decompressed value
10
+ #
11
+ # Raises ??? if the compression codec is uknown
12
+ #
13
+ # @return [String] decompressed value
14
+ def decompressed
15
+ @decompressed ||= decompress
16
+ end
17
+
18
+ def compression_codec
19
+ Compression.find_codec(codec_id)
20
+ end
21
+ private
22
+ end
23
+ end
@@ -0,0 +1,30 @@
1
+ module Poseidon
2
+ # @api private
3
+ module Compression
4
+ class UnrecognizedCompressionCodec < StandardError; end
5
+
6
+ require "poseidon/compression/gzip_codec"
7
+ require "poseidon/compression/snappy_codec"
8
+
9
+ CODECS = {
10
+ #0 => no codec
11
+ 1 => GzipCodec,
12
+ 2 => SnappyCodec
13
+ }
14
+
15
+ # Fetches codec module for +codec_id+
16
+ # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-Compression
17
+ #
18
+ # @param [Integer] codec_id codec's as defined by the Kafka Protocol
19
+ # @return [Module] codec module for codec_id
20
+ #
21
+ # @private
22
+ def self.find_codec(codec_id)
23
+ codec = CODECS[codec_id]
24
+ if codec.nil?
25
+ raise UnrecognizedCompressionCodec, codec_id
26
+ end
27
+ codec
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,23 @@
1
+ module Poseidon
2
+ module Compression
3
+ module GzipCodec
4
+ def self.codec_id
5
+ 1
6
+ end
7
+
8
+ def self.compress(s)
9
+ io = StringIO.new
10
+ io.set_encoding(Encoding::BINARY)
11
+ gz = Zlib::GzipWriter.new io, Zlib::DEFAULT_COMPRESSION, Zlib::DEFAULT_STRATEGY
12
+ gz.write s
13
+ gz.close
14
+ io.string
15
+ end
16
+
17
+ def self.decompress(s)
18
+ io = StringIO.new(s)
19
+ Zlib::GzipReader.new(io).read
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,29 @@
1
+ module Poseidon
2
+ module Compression
3
+ module SnappyCodec
4
+ def self.codec_id
5
+ 2
6
+ end
7
+
8
+ def self.compress(s)
9
+ check!
10
+ Snappy.deflate(s)
11
+ end
12
+
13
+ def self.decompress(s)
14
+ check!
15
+ Snappy::Reader.new(StringIO.new(s)).read
16
+ end
17
+
18
+ def self.check!
19
+ @checked ||= begin
20
+ require 'snappy'
21
+ true
22
+ rescue LoadError
23
+ raise "Snappy compression is not available, please install the 'snappy' gem"
24
+ end
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,169 @@
1
+ module Poseidon
2
+ # High level internal interface to a remote broker. Provides access to
3
+ # the broker API.
4
+ # @api private
5
+ class Connection
6
+ include Protocol
7
+
8
+ class ConnectionFailedError < StandardError; end
9
+ class TimeoutException < Exception; end
10
+
11
+ API_VERSION = 0
12
+ REPLICA_ID = -1 # Replica id is always -1 for non-brokers
13
+
14
+ # @yieldparam [Connection]
15
+ def self.open(host, port, client_id, socket_timeout_ms, &block)
16
+ connection = new(host, port, client_id, socket_timeout_ms)
17
+
18
+ yield connection
19
+ ensure
20
+ connection.close
21
+ end
22
+
23
+ attr_reader :host, :port
24
+
25
+ # Create a new connection
26
+ #
27
+ # @param [String] host Host to connect to
28
+ # @param [Integer] port Port broker listens on
29
+ # @param [String] client_id Unique across processes?
30
+ def initialize(host, port, client_id, socket_timeout_ms)
31
+ @host = host
32
+ @port = port
33
+
34
+ @client_id = client_id
35
+ @socket_timeout_ms = socket_timeout_ms
36
+ end
37
+
38
+ # Close broker connection
39
+ def close
40
+ @socket && @socket.close
41
+ end
42
+
43
+ # Execute a produce call
44
+ #
45
+ # @param [Integer] required_acks
46
+ # @param [Integer] timeout
47
+ # @param [Array<Protocol::MessagesForTopics>] messages_for_topics Messages to send
48
+ # @return [ProduceResponse]
49
+ def produce(required_acks, timeout, messages_for_topics)
50
+ ensure_connected
51
+ req = ProduceRequest.new( request_common(:produce),
52
+ required_acks,
53
+ timeout,
54
+ messages_for_topics)
55
+ send_request(req)
56
+ if required_acks != 0
57
+ read_response(ProduceResponse)
58
+ else
59
+ true
60
+ end
61
+ end
62
+
63
+ # Execute a fetch call
64
+ #
65
+ # @param [Integer] max_wait_time
66
+ # @param [Integer] min_bytes
67
+ # @param [Integer] topic_fetches
68
+ def fetch(max_wait_time, min_bytes, topic_fetches)
69
+ ensure_connected
70
+ req = FetchRequest.new( request_common(:fetch),
71
+ REPLICA_ID,
72
+ max_wait_time,
73
+ min_bytes,
74
+ topic_fetches)
75
+ send_request(req)
76
+ read_response(FetchResponse)
77
+ end
78
+
79
+ def offset(offset_topic_requests)
80
+ ensure_connected
81
+ req = OffsetRequest.new(request_common(:offset),
82
+ REPLICA_ID,
83
+ offset_topic_requests)
84
+ send_request(req)
85
+ read_response(OffsetResponse).topic_offset_responses
86
+ end
87
+
88
+ # Fetch metadata for +topic_names+
89
+ #
90
+ # @param [Enumberable<String>] topic_names
91
+ # A list of topics to retrive metadata for
92
+ # @return [TopicMetadataResponse] metadata for the topics
93
+ def topic_metadata(topic_names)
94
+ ensure_connected
95
+ req = MetadataRequest.new( request_common(:metadata),
96
+ topic_names)
97
+ send_request(req)
98
+ read_response(MetadataResponse)
99
+ end
100
+
101
+ private
102
+ def ensure_connected
103
+ if @socket.nil? || @socket.closed?
104
+ begin
105
+ @socket = TCPSocket.new(@host, @port)
106
+ rescue SystemCallError
107
+ raise_connection_failed_error
108
+ end
109
+ end
110
+ end
111
+
112
+ def read_response(response_class)
113
+ r = ensure_read_or_timeout(4)
114
+ if r.nil?
115
+ raise_connection_failed_error
116
+ end
117
+ n = r.unpack("N").first
118
+ s = ensure_read_or_timeout(n)
119
+ buffer = Protocol::ResponseBuffer.new(s)
120
+ response_class.read(buffer)
121
+ rescue Errno::ECONNRESET, SocketError, TimeoutException
122
+ @socket = nil
123
+ raise_connection_failed_error
124
+ end
125
+
126
+ def ensure_read_or_timeout(maxlen)
127
+ if IO.select([@socket], nil, nil, @socket_timeout_ms / 1000.0)
128
+ @socket.read(maxlen)
129
+ else
130
+ raise TimeoutException.new
131
+ end
132
+ end
133
+
134
+ def send_request(request)
135
+ buffer = Protocol::RequestBuffer.new
136
+ request.write(buffer)
137
+ ensure_write_or_timeout([buffer.to_s.bytesize].pack("N") + buffer.to_s)
138
+ rescue Errno::EPIPE, Errno::ECONNRESET, TimeoutException
139
+ @socket = nil
140
+ raise_connection_failed_error
141
+ end
142
+
143
+ def ensure_write_or_timeout(data)
144
+ if IO.select(nil, [@socket], nil, @socket_timeout_ms / 1000.0)
145
+ @socket.write(data)
146
+ else
147
+ raise TimeoutException.new
148
+ end
149
+ end
150
+
151
+ def request_common(request_type)
152
+ RequestCommon.new(
153
+ API_KEYS[request_type],
154
+ API_VERSION,
155
+ next_correlation_id,
156
+ @client_id
157
+ )
158
+ end
159
+
160
+ def next_correlation_id
161
+ @correlation_id ||= 0
162
+ @correlation_id += 1
163
+ end
164
+
165
+ def raise_connection_failed_error
166
+ raise ConnectionFailedError, "Failed to connect to #{@host}:#{@port}"
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,37 @@
1
+ module Poseidon
2
+
3
+ # A message fetched from a Kafka broker.
4
+ #
5
+ # ```
6
+ # fetched_messages = consumer.fetch
7
+ # fetched_messages.each do |fm|
8
+ # puts "Topic: #{fm.topic}"
9
+ # puts "Value #{fm.value}"
10
+ # puts "Key: #{fm.key}"
11
+ # puts "Offset: #{fm.offset}"
12
+ # end
13
+ # ```
14
+ #
15
+ # @param [String] topic
16
+ # Topic this message should be sent to.
17
+ #
18
+ # @param [String] value
19
+ # Value of the message we want to send.
20
+ #
21
+ # @param [String] key
22
+ # Optional. Message's key, used to route a message
23
+ # to a specific broker. Otherwise, keys will be
24
+ # sent to brokers in a round-robin manner.
25
+ #
26
+ # @api public
27
+ class FetchedMessage
28
+ attr_reader :value, :key, :topic, :offset
29
+
30
+ def initialize(topic, value, key, offset)
31
+ @topic = topic
32
+ @value = value
33
+ @key = key
34
+ @offset = offset
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,151 @@
1
+ module Poseidon
2
+ # The Message class is used by both Producer and Consumer classes.
3
+ #
4
+ # = Basic usage
5
+ #
6
+ # message = Poseidon::Message.new(:value => "hello",
7
+ # :key => "user:123",
8
+ # :topic => "salutations")
9
+ #
10
+ # = Sending a message
11
+ #
12
+ # When sending a message you must set the topic for the message, this
13
+ # can be done during creation or afterwards.
14
+ #
15
+ # = Compression
16
+ #
17
+ # In normal usage you should never have to worry about compressed
18
+ # Message objects. When producing the producer takes care of
19
+ # compressing the messages and when fetching the fetcher will
20
+ # return them decompressed.
21
+ #
22
+ # @api private
23
+ class Message
24
+ # Last 3 bits are used to indicate compression
25
+ COMPRESSION_MASK = 0x7
26
+ MAGIC_TYPE = 0
27
+
28
+ # Build a new Message object from its binary representation
29
+ #
30
+ # @param [ResponseBuffer] buffer
31
+ # a response buffer containing binary data representing a message.
32
+ #
33
+ # @return [Message]
34
+ def self.read(buffer)
35
+ m = Message.new
36
+ m.struct = Protocol::MessageWithOffsetStruct.read(buffer)
37
+
38
+ # Return nil if the message is truncated.
39
+ if m.struct.message.truncated?
40
+ return nil
41
+ end
42
+
43
+ if m.struct.message.checksum_failed?
44
+ raise Errors::ChecksumError
45
+ end
46
+ m
47
+ end
48
+
49
+ attr_accessor :struct, :topic
50
+
51
+ # Create a new message object
52
+ #
53
+ # @param [Hash] options
54
+ #
55
+ # @option options [String] :value (nil)
56
+ # The messages value. Optional.
57
+ #
58
+ # @option options [String] :key (nil)
59
+ # The messages key. Optional.
60
+ #
61
+ # @option options [String] :topic (nil)
62
+ # The topic we should send this message to. Optional.
63
+ #
64
+ # @option options [String] :attributes (nil)
65
+ # Attributes field for the message currently only idicates
66
+ # whether or not the message is compressed.
67
+ def initialize(options = {})
68
+ build_struct(options)
69
+
70
+ @topic = options.delete(:topic)
71
+
72
+ if options.any?
73
+ raise ArgumentError, "Unknown options: #{options.keys.inspect}"
74
+ end
75
+ end
76
+
77
+ def ==(other)
78
+ eql?(other)
79
+ end
80
+
81
+ def eql?(other)
82
+ struct.eql?(other.struct)
83
+ end
84
+
85
+ def objects_with_errors
86
+ struct.objects_with_errors
87
+ end
88
+
89
+ # Write a binary representation of the message to buffer
90
+ #
91
+ # @param [RequestBuffer] buffer
92
+ # @return [nil]
93
+ def write(buffer)
94
+ @struct.write(buffer)
95
+ nil
96
+ end
97
+
98
+ # @return [String] the Message's key
99
+ def key
100
+ @struct.message.key
101
+ end
102
+
103
+ # @return [String] the Message's value
104
+ def value
105
+ @struct.message.value
106
+ end
107
+
108
+ # @return [Integer] the Message's offset
109
+ def offset
110
+ @struct.offset
111
+ end
112
+
113
+ # Is the value compressed?
114
+ #
115
+ # @return [Boolean]
116
+ def compressed?
117
+ compression_codec_id > 0
118
+ end
119
+
120
+ # Decompressed value
121
+ #
122
+ # @return [String] decompressed value
123
+ def decompressed_value
124
+ compression_codec.decompress(value)
125
+ end
126
+
127
+ private
128
+ def attributes
129
+ @struct.message.attributes
130
+ end
131
+
132
+ def compression_codec
133
+ Compression.find_codec(compression_codec_id)
134
+ end
135
+
136
+ def compression_codec_id
137
+ attributes & COMPRESSION_MASK
138
+ end
139
+
140
+ def build_struct(options)
141
+ message_struct = Protocol::MessageStruct.new(
142
+ MAGIC_TYPE,
143
+ options.delete(:attributes) || 0,
144
+ options.delete(:key),
145
+ options.delete(:value)
146
+ )
147
+ struct = Protocol::MessageWithOffsetStruct.new(options.delete(:offset) || 0, message_struct)
148
+ self.struct = struct
149
+ end
150
+ end
151
+ end