codeclimate-poseidon 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/.yardopts +8 -0
  6. data/CHANGES.md +31 -0
  7. data/Gemfile +13 -0
  8. data/LICENSE.txt +22 -0
  9. data/README.md +72 -0
  10. data/Rakefile +20 -0
  11. data/TODO.md +27 -0
  12. data/examples/consumer.rb +18 -0
  13. data/examples/producer.rb +9 -0
  14. data/lib/poseidon.rb +120 -0
  15. data/lib/poseidon/broker_pool.rb +86 -0
  16. data/lib/poseidon/cluster_metadata.rb +94 -0
  17. data/lib/poseidon/compressed_value.rb +23 -0
  18. data/lib/poseidon/compression.rb +30 -0
  19. data/lib/poseidon/compression/gzip_codec.rb +23 -0
  20. data/lib/poseidon/compression/snappy_codec.rb +29 -0
  21. data/lib/poseidon/connection.rb +169 -0
  22. data/lib/poseidon/fetched_message.rb +37 -0
  23. data/lib/poseidon/message.rb +151 -0
  24. data/lib/poseidon/message_conductor.rb +86 -0
  25. data/lib/poseidon/message_set.rb +80 -0
  26. data/lib/poseidon/message_to_send.rb +33 -0
  27. data/lib/poseidon/messages_for_broker.rb +56 -0
  28. data/lib/poseidon/messages_to_send.rb +47 -0
  29. data/lib/poseidon/messages_to_send_batch.rb +27 -0
  30. data/lib/poseidon/partition_consumer.rb +225 -0
  31. data/lib/poseidon/producer.rb +199 -0
  32. data/lib/poseidon/producer_compression_config.rb +37 -0
  33. data/lib/poseidon/protocol.rb +122 -0
  34. data/lib/poseidon/protocol/protocol_struct.rb +256 -0
  35. data/lib/poseidon/protocol/request_buffer.rb +77 -0
  36. data/lib/poseidon/protocol/response_buffer.rb +72 -0
  37. data/lib/poseidon/sync_producer.rb +161 -0
  38. data/lib/poseidon/topic_metadata.rb +89 -0
  39. data/lib/poseidon/version.rb +4 -0
  40. data/log/.gitkeep +0 -0
  41. data/poseidon.gemspec +27 -0
  42. data/spec/integration/multiple_brokers/consumer_spec.rb +45 -0
  43. data/spec/integration/multiple_brokers/metadata_failures_spec.rb +144 -0
  44. data/spec/integration/multiple_brokers/rebalance_spec.rb +69 -0
  45. data/spec/integration/multiple_brokers/round_robin_spec.rb +41 -0
  46. data/spec/integration/multiple_brokers/spec_helper.rb +60 -0
  47. data/spec/integration/simple/compression_spec.rb +23 -0
  48. data/spec/integration/simple/connection_spec.rb +35 -0
  49. data/spec/integration/simple/multiple_brokers_spec.rb +10 -0
  50. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +121 -0
  51. data/spec/integration/simple/spec_helper.rb +16 -0
  52. data/spec/integration/simple/truncated_messages_spec.rb +46 -0
  53. data/spec/integration/simple/unavailable_broker_spec.rb +72 -0
  54. data/spec/spec_helper.rb +32 -0
  55. data/spec/test_cluster.rb +211 -0
  56. data/spec/unit/broker_pool_spec.rb +98 -0
  57. data/spec/unit/cluster_metadata_spec.rb +46 -0
  58. data/spec/unit/compression/gzip_codec_spec.rb +34 -0
  59. data/spec/unit/compression/snappy_codec_spec.rb +49 -0
  60. data/spec/unit/compression_spec.rb +17 -0
  61. data/spec/unit/connection_spec.rb +4 -0
  62. data/spec/unit/fetched_message_spec.rb +11 -0
  63. data/spec/unit/message_conductor_spec.rb +164 -0
  64. data/spec/unit/message_set_spec.rb +42 -0
  65. data/spec/unit/message_spec.rb +129 -0
  66. data/spec/unit/message_to_send_spec.rb +10 -0
  67. data/spec/unit/messages_for_broker_spec.rb +54 -0
  68. data/spec/unit/messages_to_send_batch_spec.rb +25 -0
  69. data/spec/unit/messages_to_send_spec.rb +63 -0
  70. data/spec/unit/partition_consumer_spec.rb +142 -0
  71. data/spec/unit/producer_compression_config_spec.rb +42 -0
  72. data/spec/unit/producer_spec.rb +51 -0
  73. data/spec/unit/protocol/request_buffer_spec.rb +16 -0
  74. data/spec/unit/protocol_spec.rb +54 -0
  75. data/spec/unit/sync_producer_spec.rb +156 -0
  76. data/spec/unit/topic_metadata_spec.rb +43 -0
  77. metadata +225 -0
@@ -0,0 +1,94 @@
1
+ module Poseidon
2
+ # Encapsulates what we known about brokers, topics and partitions
3
+ # from Metadata API calls.
4
+ #
5
+ # @api private
6
+ class ClusterMetadata
7
+ attr_reader :brokers, :last_refreshed_at, :topic_metadata
8
+ def initialize
9
+ @brokers = {}
10
+ @topic_metadata = {}
11
+ @last_refreshed_at = nil
12
+ end
13
+
14
+ # Update what we know about the cluter based on MetadataResponse
15
+ #
16
+ # @param [MetadataResponse] topic_metadata_response
17
+ # @return nil
18
+ def update(topic_metadata_response)
19
+ update_brokers(topic_metadata_response.brokers)
20
+ update_topics(topic_metadata_response.topics)
21
+
22
+ @last_refreshed_at = Time.now
23
+ nil
24
+ end
25
+
26
+ # Do we have metadata for these topics already?
27
+ #
28
+ # @param [Enumberable<String>] topic_names A set of topics.
29
+ # @return [Boolean] true if we have metadata for all +topic_names+, otherwise false.
30
+ def have_metadata_for_topics?(topic_names)
31
+ topic_names.all? { |topic| @topic_metadata[topic] }
32
+ end
33
+
34
+ # Provides metadata for each topic
35
+ #
36
+ # @param [Enumerable<String>] topic_names Topics we should return metadata for
37
+ # @return [Hash<String,TopicMetadata>]
38
+ def metadata_for_topics(topic_names)
39
+ Hash[topic_names.map { |name| [name, @topic_metadata[name]] }]
40
+ end
41
+
42
+ # Provides a Broker object for +broker_id+. This corresponds to the
43
+ # broker ids in the TopicMetadata objects.
44
+ #
45
+ # @param [Integer] broker_id Broker id
46
+ def broker(broker_id)
47
+ @brokers[broker_id]
48
+ end
49
+
50
+ # Return lead broker for topic and partition
51
+ def lead_broker_for_partition(topic_name, partition)
52
+ broker_id = @topic_metadata[topic_name].partition_leader(partition)
53
+ if broker_id
54
+ @brokers[broker_id]
55
+ else
56
+ nil
57
+ end
58
+ end
59
+
60
+ def topics
61
+ @topic_metadata.keys
62
+ end
63
+
64
+ def to_s
65
+ out = ""
66
+ @topic_metadata.each do |topic, metadata|
67
+ out << "Topic: #{topic}"
68
+ out << "-------------------------"
69
+ out << metadata.to_s
70
+ end
71
+ out
72
+ end
73
+
74
+ def reset
75
+ @brokers = {}
76
+ @topic_metadata = {}
77
+ end
78
+
79
+ private
80
+ def update_topics(topics)
81
+ topics.each do |topic|
82
+ if topic.exists?
83
+ @topic_metadata[topic.name] = topic
84
+ end
85
+ end
86
+ end
87
+
88
+ def update_brokers(brokers)
89
+ brokers.each do |broker|
90
+ @brokers[broker.id] = broker
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,23 @@
1
+ module Poseidon
2
+ # @api private
3
+ class CompressedValue
4
+ def initialize(value, codec_id)
5
+ @value = value
6
+ @codec_id = codec_id
7
+ end
8
+
9
+ # Decompressed value
10
+ #
11
+ # Raises ??? if the compression codec is uknown
12
+ #
13
+ # @return [String] decompressed value
14
+ def decompressed
15
+ @decompressed ||= decompress
16
+ end
17
+
18
+ def compression_codec
19
+ Compression.find_codec(codec_id)
20
+ end
21
+ private
22
+ end
23
+ end
@@ -0,0 +1,30 @@
1
+ module Poseidon
2
+ # @api private
3
+ module Compression
4
+ class UnrecognizedCompressionCodec < StandardError; end
5
+
6
+ require "poseidon/compression/gzip_codec"
7
+ require "poseidon/compression/snappy_codec"
8
+
9
+ CODECS = {
10
+ #0 => no codec
11
+ 1 => GzipCodec,
12
+ 2 => SnappyCodec
13
+ }
14
+
15
+ # Fetches codec module for +codec_id+
16
+ # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-Compression
17
+ #
18
+ # @param [Integer] codec_id codec's as defined by the Kafka Protocol
19
+ # @return [Module] codec module for codec_id
20
+ #
21
+ # @private
22
+ def self.find_codec(codec_id)
23
+ codec = CODECS[codec_id]
24
+ if codec.nil?
25
+ raise UnrecognizedCompressionCodec, codec_id
26
+ end
27
+ codec
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,23 @@
1
+ module Poseidon
2
+ module Compression
3
+ module GzipCodec
4
+ def self.codec_id
5
+ 1
6
+ end
7
+
8
+ def self.compress(s)
9
+ io = StringIO.new
10
+ io.set_encoding(Encoding::BINARY)
11
+ gz = Zlib::GzipWriter.new io, Zlib::DEFAULT_COMPRESSION, Zlib::DEFAULT_STRATEGY
12
+ gz.write s
13
+ gz.close
14
+ io.string
15
+ end
16
+
17
+ def self.decompress(s)
18
+ io = StringIO.new(s)
19
+ Zlib::GzipReader.new(io).read
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,29 @@
1
+ module Poseidon
2
+ module Compression
3
+ module SnappyCodec
4
+ def self.codec_id
5
+ 2
6
+ end
7
+
8
+ def self.compress(s)
9
+ check!
10
+ Snappy.deflate(s)
11
+ end
12
+
13
+ def self.decompress(s)
14
+ check!
15
+ Snappy::Reader.new(StringIO.new(s)).read
16
+ end
17
+
18
+ def self.check!
19
+ @checked ||= begin
20
+ require 'snappy'
21
+ true
22
+ rescue LoadError
23
+ raise "Snappy compression is not available, please install the 'snappy' gem"
24
+ end
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,169 @@
1
+ module Poseidon
2
+ # High level internal interface to a remote broker. Provides access to
3
+ # the broker API.
4
+ # @api private
5
+ class Connection
6
+ include Protocol
7
+
8
+ class ConnectionFailedError < StandardError; end
9
+ class TimeoutException < Exception; end
10
+
11
+ API_VERSION = 0
12
+ REPLICA_ID = -1 # Replica id is always -1 for non-brokers
13
+
14
+ # @yieldparam [Connection]
15
+ def self.open(host, port, client_id, socket_timeout_ms, &block)
16
+ connection = new(host, port, client_id, socket_timeout_ms)
17
+
18
+ yield connection
19
+ ensure
20
+ connection.close
21
+ end
22
+
23
+ attr_reader :host, :port
24
+
25
+ # Create a new connection
26
+ #
27
+ # @param [String] host Host to connect to
28
+ # @param [Integer] port Port broker listens on
29
+ # @param [String] client_id Unique across processes?
30
+ def initialize(host, port, client_id, socket_timeout_ms)
31
+ @host = host
32
+ @port = port
33
+
34
+ @client_id = client_id
35
+ @socket_timeout_ms = socket_timeout_ms
36
+ end
37
+
38
+ # Close broker connection
39
+ def close
40
+ @socket && @socket.close
41
+ end
42
+
43
+ # Execute a produce call
44
+ #
45
+ # @param [Integer] required_acks
46
+ # @param [Integer] timeout
47
+ # @param [Array<Protocol::MessagesForTopics>] messages_for_topics Messages to send
48
+ # @return [ProduceResponse]
49
+ def produce(required_acks, timeout, messages_for_topics)
50
+ ensure_connected
51
+ req = ProduceRequest.new( request_common(:produce),
52
+ required_acks,
53
+ timeout,
54
+ messages_for_topics)
55
+ send_request(req)
56
+ if required_acks != 0
57
+ read_response(ProduceResponse)
58
+ else
59
+ true
60
+ end
61
+ end
62
+
63
+ # Execute a fetch call
64
+ #
65
+ # @param [Integer] max_wait_time
66
+ # @param [Integer] min_bytes
67
+ # @param [Integer] topic_fetches
68
+ def fetch(max_wait_time, min_bytes, topic_fetches)
69
+ ensure_connected
70
+ req = FetchRequest.new( request_common(:fetch),
71
+ REPLICA_ID,
72
+ max_wait_time,
73
+ min_bytes,
74
+ topic_fetches)
75
+ send_request(req)
76
+ read_response(FetchResponse)
77
+ end
78
+
79
+ def offset(offset_topic_requests)
80
+ ensure_connected
81
+ req = OffsetRequest.new(request_common(:offset),
82
+ REPLICA_ID,
83
+ offset_topic_requests)
84
+ send_request(req)
85
+ read_response(OffsetResponse).topic_offset_responses
86
+ end
87
+
88
+ # Fetch metadata for +topic_names+
89
+ #
90
+ # @param [Enumberable<String>] topic_names
91
+ # A list of topics to retrive metadata for
92
+ # @return [TopicMetadataResponse] metadata for the topics
93
+ def topic_metadata(topic_names)
94
+ ensure_connected
95
+ req = MetadataRequest.new( request_common(:metadata),
96
+ topic_names)
97
+ send_request(req)
98
+ read_response(MetadataResponse)
99
+ end
100
+
101
+ private
102
+ def ensure_connected
103
+ if @socket.nil? || @socket.closed?
104
+ begin
105
+ @socket = TCPSocket.new(@host, @port)
106
+ rescue SystemCallError
107
+ raise_connection_failed_error
108
+ end
109
+ end
110
+ end
111
+
112
+ def read_response(response_class)
113
+ r = ensure_read_or_timeout(4)
114
+ if r.nil?
115
+ raise_connection_failed_error
116
+ end
117
+ n = r.unpack("N").first
118
+ s = ensure_read_or_timeout(n)
119
+ buffer = Protocol::ResponseBuffer.new(s)
120
+ response_class.read(buffer)
121
+ rescue Errno::ECONNRESET, SocketError, TimeoutException
122
+ @socket = nil
123
+ raise_connection_failed_error
124
+ end
125
+
126
+ def ensure_read_or_timeout(maxlen)
127
+ if IO.select([@socket], nil, nil, @socket_timeout_ms / 1000.0)
128
+ @socket.read(maxlen)
129
+ else
130
+ raise TimeoutException.new
131
+ end
132
+ end
133
+
134
+ def send_request(request)
135
+ buffer = Protocol::RequestBuffer.new
136
+ request.write(buffer)
137
+ ensure_write_or_timeout([buffer.to_s.bytesize].pack("N") + buffer.to_s)
138
+ rescue Errno::EPIPE, Errno::ECONNRESET, TimeoutException
139
+ @socket = nil
140
+ raise_connection_failed_error
141
+ end
142
+
143
+ def ensure_write_or_timeout(data)
144
+ if IO.select(nil, [@socket], nil, @socket_timeout_ms / 1000.0)
145
+ @socket.write(data)
146
+ else
147
+ raise TimeoutException.new
148
+ end
149
+ end
150
+
151
+ def request_common(request_type)
152
+ RequestCommon.new(
153
+ API_KEYS[request_type],
154
+ API_VERSION,
155
+ next_correlation_id,
156
+ @client_id
157
+ )
158
+ end
159
+
160
+ def next_correlation_id
161
+ @correlation_id ||= 0
162
+ @correlation_id += 1
163
+ end
164
+
165
+ def raise_connection_failed_error
166
+ raise ConnectionFailedError, "Failed to connect to #{@host}:#{@port}"
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,37 @@
1
+ module Poseidon
2
+
3
+ # A message fetched from a Kafka broker.
4
+ #
5
+ # ```
6
+ # fetched_messages = consumer.fetch
7
+ # fetched_messages.each do |fm|
8
+ # puts "Topic: #{fm.topic}"
9
+ # puts "Value #{fm.value}"
10
+ # puts "Key: #{fm.key}"
11
+ # puts "Offset: #{fm.offset}"
12
+ # end
13
+ # ```
14
+ #
15
+ # @param [String] topic
16
+ # Topic this message should be sent to.
17
+ #
18
+ # @param [String] value
19
+ # Value of the message we want to send.
20
+ #
21
+ # @param [String] key
22
+ # Optional. Message's key, used to route a message
23
+ # to a specific broker. Otherwise, keys will be
24
+ # sent to brokers in a round-robin manner.
25
+ #
26
+ # @api public
27
+ class FetchedMessage
28
+ attr_reader :value, :key, :topic, :offset
29
+
30
+ def initialize(topic, value, key, offset)
31
+ @topic = topic
32
+ @value = value
33
+ @key = key
34
+ @offset = offset
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,151 @@
1
+ module Poseidon
2
+ # The Message class is used by both Producer and Consumer classes.
3
+ #
4
+ # = Basic usage
5
+ #
6
+ # message = Poseidon::Message.new(:value => "hello",
7
+ # :key => "user:123",
8
+ # :topic => "salutations")
9
+ #
10
+ # = Sending a message
11
+ #
12
+ # When sending a message you must set the topic for the message, this
13
+ # can be done during creation or afterwards.
14
+ #
15
+ # = Compression
16
+ #
17
+ # In normal usage you should never have to worry about compressed
18
+ # Message objects. When producing the producer takes care of
19
+ # compressing the messages and when fetching the fetcher will
20
+ # return them decompressed.
21
+ #
22
+ # @api private
23
+ class Message
24
+ # Last 3 bits are used to indicate compression
25
+ COMPRESSION_MASK = 0x7
26
+ MAGIC_TYPE = 0
27
+
28
+ # Build a new Message object from its binary representation
29
+ #
30
+ # @param [ResponseBuffer] buffer
31
+ # a response buffer containing binary data representing a message.
32
+ #
33
+ # @return [Message]
34
+ def self.read(buffer)
35
+ m = Message.new
36
+ m.struct = Protocol::MessageWithOffsetStruct.read(buffer)
37
+
38
+ # Return nil if the message is truncated.
39
+ if m.struct.message.truncated?
40
+ return nil
41
+ end
42
+
43
+ if m.struct.message.checksum_failed?
44
+ raise Errors::ChecksumError
45
+ end
46
+ m
47
+ end
48
+
49
+ attr_accessor :struct, :topic
50
+
51
+ # Create a new message object
52
+ #
53
+ # @param [Hash] options
54
+ #
55
+ # @option options [String] :value (nil)
56
+ # The messages value. Optional.
57
+ #
58
+ # @option options [String] :key (nil)
59
+ # The messages key. Optional.
60
+ #
61
+ # @option options [String] :topic (nil)
62
+ # The topic we should send this message to. Optional.
63
+ #
64
+ # @option options [String] :attributes (nil)
65
+ # Attributes field for the message currently only idicates
66
+ # whether or not the message is compressed.
67
+ def initialize(options = {})
68
+ build_struct(options)
69
+
70
+ @topic = options.delete(:topic)
71
+
72
+ if options.any?
73
+ raise ArgumentError, "Unknown options: #{options.keys.inspect}"
74
+ end
75
+ end
76
+
77
+ def ==(other)
78
+ eql?(other)
79
+ end
80
+
81
+ def eql?(other)
82
+ struct.eql?(other.struct)
83
+ end
84
+
85
+ def objects_with_errors
86
+ struct.objects_with_errors
87
+ end
88
+
89
+ # Write a binary representation of the message to buffer
90
+ #
91
+ # @param [RequestBuffer] buffer
92
+ # @return [nil]
93
+ def write(buffer)
94
+ @struct.write(buffer)
95
+ nil
96
+ end
97
+
98
+ # @return [String] the Message's key
99
+ def key
100
+ @struct.message.key
101
+ end
102
+
103
+ # @return [String] the Message's value
104
+ def value
105
+ @struct.message.value
106
+ end
107
+
108
+ # @return [Integer] the Message's offset
109
+ def offset
110
+ @struct.offset
111
+ end
112
+
113
+ # Is the value compressed?
114
+ #
115
+ # @return [Boolean]
116
+ def compressed?
117
+ compression_codec_id > 0
118
+ end
119
+
120
+ # Decompressed value
121
+ #
122
+ # @return [String] decompressed value
123
+ def decompressed_value
124
+ compression_codec.decompress(value)
125
+ end
126
+
127
+ private
128
+ def attributes
129
+ @struct.message.attributes
130
+ end
131
+
132
+ def compression_codec
133
+ Compression.find_codec(compression_codec_id)
134
+ end
135
+
136
+ def compression_codec_id
137
+ attributes & COMPRESSION_MASK
138
+ end
139
+
140
+ def build_struct(options)
141
+ message_struct = Protocol::MessageStruct.new(
142
+ MAGIC_TYPE,
143
+ options.delete(:attributes) || 0,
144
+ options.delete(:key),
145
+ options.delete(:value)
146
+ )
147
+ struct = Protocol::MessageWithOffsetStruct.new(options.delete(:offset) || 0, message_struct)
148
+ self.struct = struct
149
+ end
150
+ end
151
+ end