codeclimate-poseidon 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +14 -0
  5. data/.yardopts +8 -0
  6. data/CHANGES.md +31 -0
  7. data/Gemfile +13 -0
  8. data/LICENSE.txt +22 -0
  9. data/README.md +72 -0
  10. data/Rakefile +20 -0
  11. data/TODO.md +27 -0
  12. data/examples/consumer.rb +18 -0
  13. data/examples/producer.rb +9 -0
  14. data/lib/poseidon.rb +120 -0
  15. data/lib/poseidon/broker_pool.rb +86 -0
  16. data/lib/poseidon/cluster_metadata.rb +94 -0
  17. data/lib/poseidon/compressed_value.rb +23 -0
  18. data/lib/poseidon/compression.rb +30 -0
  19. data/lib/poseidon/compression/gzip_codec.rb +23 -0
  20. data/lib/poseidon/compression/snappy_codec.rb +29 -0
  21. data/lib/poseidon/connection.rb +169 -0
  22. data/lib/poseidon/fetched_message.rb +37 -0
  23. data/lib/poseidon/message.rb +151 -0
  24. data/lib/poseidon/message_conductor.rb +86 -0
  25. data/lib/poseidon/message_set.rb +80 -0
  26. data/lib/poseidon/message_to_send.rb +33 -0
  27. data/lib/poseidon/messages_for_broker.rb +56 -0
  28. data/lib/poseidon/messages_to_send.rb +47 -0
  29. data/lib/poseidon/messages_to_send_batch.rb +27 -0
  30. data/lib/poseidon/partition_consumer.rb +225 -0
  31. data/lib/poseidon/producer.rb +199 -0
  32. data/lib/poseidon/producer_compression_config.rb +37 -0
  33. data/lib/poseidon/protocol.rb +122 -0
  34. data/lib/poseidon/protocol/protocol_struct.rb +256 -0
  35. data/lib/poseidon/protocol/request_buffer.rb +77 -0
  36. data/lib/poseidon/protocol/response_buffer.rb +72 -0
  37. data/lib/poseidon/sync_producer.rb +161 -0
  38. data/lib/poseidon/topic_metadata.rb +89 -0
  39. data/lib/poseidon/version.rb +4 -0
  40. data/log/.gitkeep +0 -0
  41. data/poseidon.gemspec +27 -0
  42. data/spec/integration/multiple_brokers/consumer_spec.rb +45 -0
  43. data/spec/integration/multiple_brokers/metadata_failures_spec.rb +144 -0
  44. data/spec/integration/multiple_brokers/rebalance_spec.rb +69 -0
  45. data/spec/integration/multiple_brokers/round_robin_spec.rb +41 -0
  46. data/spec/integration/multiple_brokers/spec_helper.rb +60 -0
  47. data/spec/integration/simple/compression_spec.rb +23 -0
  48. data/spec/integration/simple/connection_spec.rb +35 -0
  49. data/spec/integration/simple/multiple_brokers_spec.rb +10 -0
  50. data/spec/integration/simple/simple_producer_and_consumer_spec.rb +121 -0
  51. data/spec/integration/simple/spec_helper.rb +16 -0
  52. data/spec/integration/simple/truncated_messages_spec.rb +46 -0
  53. data/spec/integration/simple/unavailable_broker_spec.rb +72 -0
  54. data/spec/spec_helper.rb +32 -0
  55. data/spec/test_cluster.rb +211 -0
  56. data/spec/unit/broker_pool_spec.rb +98 -0
  57. data/spec/unit/cluster_metadata_spec.rb +46 -0
  58. data/spec/unit/compression/gzip_codec_spec.rb +34 -0
  59. data/spec/unit/compression/snappy_codec_spec.rb +49 -0
  60. data/spec/unit/compression_spec.rb +17 -0
  61. data/spec/unit/connection_spec.rb +4 -0
  62. data/spec/unit/fetched_message_spec.rb +11 -0
  63. data/spec/unit/message_conductor_spec.rb +164 -0
  64. data/spec/unit/message_set_spec.rb +42 -0
  65. data/spec/unit/message_spec.rb +129 -0
  66. data/spec/unit/message_to_send_spec.rb +10 -0
  67. data/spec/unit/messages_for_broker_spec.rb +54 -0
  68. data/spec/unit/messages_to_send_batch_spec.rb +25 -0
  69. data/spec/unit/messages_to_send_spec.rb +63 -0
  70. data/spec/unit/partition_consumer_spec.rb +142 -0
  71. data/spec/unit/producer_compression_config_spec.rb +42 -0
  72. data/spec/unit/producer_spec.rb +51 -0
  73. data/spec/unit/protocol/request_buffer_spec.rb +16 -0
  74. data/spec/unit/protocol_spec.rb +54 -0
  75. data/spec/unit/sync_producer_spec.rb +156 -0
  76. data/spec/unit/topic_metadata_spec.rb +43 -0
  77. metadata +225 -0
@@ -0,0 +1,77 @@
1
+ module Poseidon
2
+ module Protocol
3
+ # RequestBuffer allows you to build a Binary string for API requests
4
+ #
5
+ # API parallels the primitive types described on the wiki, with some
6
+ # sugar for prepending message sizes and checksums.
7
+ # (https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-ProtocolPrimitiveTypes)
8
+ class RequestBuffer
9
+ def initialize
10
+ @s = ''.encode(Encoding::BINARY)
11
+ end
12
+
13
+ def append(string)
14
+ string = string.dup.force_encoding(Encoding::BINARY)
15
+ @s << string
16
+ nil
17
+ end
18
+
19
+ def int8(int8)
20
+ append([int8].pack("C"))
21
+ end
22
+
23
+ def int16(int16)
24
+ append([int16].pack("s>"))
25
+ end
26
+
27
+ def int32(int32)
28
+ append([int32].pack("l>"))
29
+ end
30
+
31
+ def int64(int64)
32
+ append([int64].pack("q>"))
33
+ end
34
+
35
+ # Add a string
36
+ #
37
+ # @param [String] string
38
+ def string(string)
39
+ if string.nil?
40
+ int16(-1)
41
+ else
42
+ int16(string.bytesize)
43
+ append(string)
44
+ end
45
+ end
46
+
47
+ def bytes(string)
48
+ if string.nil?
49
+ int32(-1)
50
+ else
51
+ int32(string.bytesize)
52
+ append(string)
53
+ end
54
+ end
55
+
56
+ def prepend_crc32
57
+ checksum_pos = @s.bytesize
58
+ @s += " "
59
+ yield
60
+ @s[checksum_pos] = [Zlib::crc32(@s[(checksum_pos+1)..-1])].pack("N")
61
+ nil
62
+ end
63
+
64
+ def prepend_size
65
+ size_pos = @s.bytesize
66
+ @s += " "
67
+ yield
68
+ @s[size_pos] = [(@s.bytesize-1) - size_pos].pack("N")
69
+ nil
70
+ end
71
+
72
+ def to_s
73
+ @s
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,72 @@
1
+ module Poseidon
2
+ module Protocol
3
+ class ResponseBuffer
4
+ def initialize(response)
5
+ @s = response
6
+ @pos = 0
7
+ end
8
+
9
+ def int8
10
+ byte = @s.byteslice(@pos, 1).unpack("C").first
11
+ @pos += 1
12
+ byte
13
+ end
14
+
15
+ def int16
16
+ short = @s.byteslice(@pos, 2).unpack("s>").first
17
+ @pos += 2
18
+ short
19
+ end
20
+
21
+ def int32
22
+ int = @s.byteslice(@pos, 4).unpack("l>").first
23
+ @pos += 4
24
+ int
25
+ end
26
+
27
+ def int64
28
+ long = @s.byteslice(@pos, 8).unpack("q>").first
29
+ @pos += 8
30
+ long
31
+ end
32
+
33
+ def string
34
+ len = int16
35
+ string = @s.byteslice(@pos, len)
36
+ @pos += len
37
+ string
38
+ end
39
+
40
+ def read(bytes)
41
+ data = @s.byteslice(@pos, bytes)
42
+ @pos += bytes
43
+ data
44
+ end
45
+
46
+ def peek(bytes)
47
+ @s.byteslice(@pos, bytes)
48
+ end
49
+
50
+ def bytes
51
+ n = int32
52
+ if n == -1
53
+ return nil
54
+ else
55
+ read(n)
56
+ end
57
+ end
58
+
59
+ def bytes_remaining
60
+ @s.bytesize - @pos
61
+ end
62
+
63
+ def eof?
64
+ @pos == @s.bytesize
65
+ end
66
+
67
+ def to_s
68
+ @s
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,161 @@
1
+ module Poseidon
2
+ # Used by +Producer+ for sending messages to the kafka cluster.
3
+ #
4
+ # You should not use this interface directly
5
+ #
6
+ # Fetches metadata at appropriate times.
7
+ # Builds MessagesToSend
8
+ # Handle MessageBatchToSend lifecyle
9
+ #
10
+ # Who is responsible for fetching metadata from broker seed list?
11
+ # Do we want to be fetching from real live brokers eventually?
12
+ #
13
+ # @api private
14
+ class SyncProducer
15
+ OPTION_DEFAULTS = {
16
+ :compression_codec => nil,
17
+ :compressed_topics => nil,
18
+ :metadata_refresh_interval_ms => 600_000,
19
+ :partitioner => nil,
20
+ :max_send_retries => 3,
21
+ :retry_backoff_ms => 100,
22
+ :required_acks => 0,
23
+ :ack_timeout_ms => 1500,
24
+ :socket_timeout_ms => 10_000
25
+ }
26
+
27
+ attr_reader :client_id, :retry_backoff_ms, :max_send_retries,
28
+ :metadata_refresh_interval_ms, :required_acks, :ack_timeout_ms, :socket_timeout_ms
29
+ def initialize(client_id, seed_brokers, options = {})
30
+ @client_id = client_id
31
+
32
+ handle_options(options.dup)
33
+
34
+ @cluster_metadata = ClusterMetadata.new
35
+ @message_conductor = MessageConductor.new(@cluster_metadata, @partitioner)
36
+ @broker_pool = BrokerPool.new(client_id, seed_brokers, socket_timeout_ms)
37
+ end
38
+
39
+ def send_messages(messages)
40
+ return if messages.empty?
41
+
42
+ messages_to_send = MessagesToSend.new(messages, @cluster_metadata)
43
+
44
+ if refresh_interval_elapsed?
45
+ refresh_metadata(messages_to_send.topic_set)
46
+ end
47
+
48
+ ensure_metadata_available_for_topics(messages_to_send)
49
+
50
+ (@max_send_retries+1).times do
51
+ messages_to_send.messages_for_brokers(@message_conductor).each do |messages_for_broker|
52
+ if sent = send_to_broker(messages_for_broker)
53
+ messages_to_send.successfully_sent(sent)
54
+ end
55
+ end
56
+
57
+ if !messages_to_send.pending_messages? || @max_send_retries == 0
58
+ break
59
+ else
60
+ Kernel.sleep retry_backoff_ms / 1000.0
61
+ reset_metadata
62
+ ensure_metadata_available_for_topics(messages_to_send)
63
+ end
64
+ end
65
+
66
+ if messages_to_send.pending_messages?
67
+ raise "Failed to send all messages: #{messages_to_send.messages} remaining"
68
+ else
69
+ true
70
+ end
71
+ end
72
+
73
+ def close
74
+ @broker_pool.close
75
+ end
76
+
77
+ alias_method :shutdown, :close
78
+
79
+ private
80
+
81
+ def ensure_metadata_available_for_topics(messages_to_send)
82
+ return if !messages_to_send.needs_metadata?
83
+
84
+ Poseidon.logger.debug { "Fetching metadata for #{messages_to_send.topic_set.inspect}. (Attempt 1)" }
85
+ refresh_metadata(messages_to_send.topic_set)
86
+ return if !messages_to_send.needs_metadata?
87
+
88
+ 2.times do |n|
89
+ sleep 5
90
+
91
+ Poseidon.logger.debug { "Fetching metadata for #{messages_to_send.topic_set.inspect}. (Attempt #{n+2})" }
92
+ refresh_metadata(messages_to_send.topic_set)
93
+ return if !messages_to_send.needs_metadata?
94
+ end
95
+ raise Errors::UnableToFetchMetadata
96
+ end
97
+
98
+ def handle_options(options)
99
+ @ack_timeout_ms = handle_option(options, :ack_timeout_ms)
100
+ @socket_timeout_ms = handle_option(options, :socket_timeout_ms)
101
+ @retry_backoff_ms = handle_option(options, :retry_backoff_ms)
102
+
103
+ @metadata_refresh_interval_ms =
104
+ handle_option(options, :metadata_refresh_interval_ms)
105
+
106
+ @required_acks = handle_option(options, :required_acks)
107
+ @max_send_retries = handle_option(options, :max_send_retries)
108
+
109
+ @compression_config = ProducerCompressionConfig.new(
110
+ handle_option(options, :compression_codec),
111
+ handle_option(options, :compressed_topics))
112
+
113
+ @partitioner = handle_option(options, :partitioner)
114
+
115
+ raise ArgumentError, "Unknown options: #{options.keys.inspect}" if options.keys.any?
116
+ end
117
+
118
+ def handle_option(options, sym)
119
+ options.delete(sym) || OPTION_DEFAULTS[sym]
120
+ end
121
+
122
+ def refresh_interval_elapsed?
123
+ @cluster_metadata.last_refreshed_at.nil? ||
124
+ (Time.now - @cluster_metadata.last_refreshed_at) * 1000 > metadata_refresh_interval_ms
125
+ end
126
+
127
+ def refresh_metadata(topics)
128
+ topics_to_refresh = topics.dup
129
+
130
+ @cluster_metadata.topics.each do |topic|
131
+ topics_to_refresh.add(topic)
132
+ end
133
+
134
+ @cluster_metadata.update(@broker_pool.fetch_metadata(topics_to_refresh))
135
+ @broker_pool.update_known_brokers(@cluster_metadata.brokers)
136
+ end
137
+
138
+ def reset_metadata
139
+ Poseidon.logger.debug { "Resetting metdata" }
140
+ @cluster_metadata.reset
141
+ @broker_pool.close
142
+ end
143
+
144
+ def send_to_broker(messages_for_broker)
145
+ return false if messages_for_broker.broker_id == -1
146
+ to_send = messages_for_broker.build_protocol_objects(@compression_config)
147
+
148
+ Poseidon.logger.debug { "Sending messages to broker #{messages_for_broker.broker_id}" }
149
+ response = @broker_pool.execute_api_call(messages_for_broker.broker_id, :produce,
150
+ required_acks, ack_timeout_ms,
151
+ to_send)
152
+ if required_acks == 0
153
+ messages_for_broker.messages
154
+ else
155
+ messages_for_broker.successfully_sent(response)
156
+ end
157
+ rescue Connection::ConnectionFailedError
158
+ false
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,89 @@
1
+ module Poseidon
2
+ # @api private
3
+ class TopicMetadata
4
+ # Build a new TopicMetadata object from its binary representation
5
+ #
6
+ # @param [ResponseBuffer] buffer
7
+ # @return [TopicMetadata]
8
+ #
9
+ def self.read(buffer)
10
+ tm = TopicMetadata.new
11
+ tm.struct = Protocol::TopicMetadataStruct.read(buffer)
12
+ tm
13
+ end
14
+
15
+ attr_accessor :struct
16
+ def initialize(struct=nil)
17
+ self.struct = struct
18
+ end
19
+
20
+ # Write a binary representation of the TopicMetadata to buffer
21
+ #
22
+ # @param [RequestBuffer] buffer
23
+ # @return [nil]
24
+ def write(buffer)
25
+ struct.write(buffer)
26
+ nil
27
+ end
28
+
29
+ def name
30
+ struct.name
31
+ end
32
+
33
+ def ==(o)
34
+ eql?(o)
35
+ end
36
+
37
+ def exists?
38
+ struct.error == Errors::NO_ERROR_CODE
39
+ end
40
+
41
+ def eql?(o)
42
+ struct.eql?(o.struct)
43
+ end
44
+
45
+ def objects_with_errors
46
+ struct.objects_with_errors
47
+ end
48
+
49
+ def leader_available?
50
+ struct.error_class != Errors::LeaderNotAvailable
51
+ end
52
+
53
+ def partition_count
54
+ @partition_count ||= struct.partitions.count
55
+ end
56
+
57
+ def available_partitions
58
+ @available_partitions ||= struct.partitions.select do |partition|
59
+ (partition.error == Errors::NO_ERROR_CODE || partition.error_class == Errors::ReplicaNotAvailable) && partition.leader != -1
60
+ end
61
+ end
62
+
63
+ def available_partition_count
64
+ available_partitions.count
65
+ end
66
+
67
+ def partition_leader(partition_id)
68
+ partition = partitions_by_id[partition_id]
69
+ if partition
70
+ partition.leader
71
+ else
72
+ nil
73
+ end
74
+ end
75
+
76
+ def to_s
77
+ struct.partitions.map { |p| p.inspect }.join("\n")
78
+ end
79
+
80
+ private
81
+ def partitions_by_id
82
+ @partitions_by_id ||= Hash[partitions.map { |p| [p.id, p] }]
83
+ end
84
+
85
+ def partitions
86
+ struct.partitions
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,4 @@
1
+ module Poseidon
2
+ # Unstable! API May Change!
3
+ VERSION = "0.0.8"
4
+ end
data/log/.gitkeep ADDED
File without changes
data/poseidon.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'poseidon/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "codeclimate-poseidon"
8
+ gem.version = Poseidon::VERSION
9
+ gem.authors = ["Bob Potter"]
10
+ gem.email = ["bobby.potter@gmail.com"]
11
+ gem.description = %q{A Kafka (http://kafka.apache.org/) producer and consumer}
12
+ gem.summary = %q{Poseidon is a producer and consumer implementation for Kafka >= 0.8}
13
+ gem.homepage = "https://github.com/bpot/poseidon"
14
+ gem.licenses = ["MIT"]
15
+ gem.required_ruby_version = '>= 1.9.3'
16
+
17
+ gem.files = `git ls-files`.split($/)
18
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
19
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
20
+ gem.require_paths = ["lib"]
21
+
22
+ gem.add_development_dependency(%q<rspec>, '>= 3')
23
+ gem.add_development_dependency(%q<yard>)
24
+ gem.add_development_dependency(%q<simplecov>)
25
+ gem.add_development_dependency(%q<snappy>)
26
+ gem.add_development_dependency(%q<timecop>)
27
+ end
@@ -0,0 +1,45 @@
1
+ require 'integration/multiple_brokers/spec_helper'
2
+
3
+ RSpec.describe "consuming with multiple brokers", :type => :request do
4
+ include_context "a multiple broker cluster"
5
+
6
+ before(:each) do
7
+ # autocreate the topic by asking for information about it
8
+ c = Connection.new("localhost", 9092, "metadata_fetcher", 10_000)
9
+ md = c.topic_metadata(["test"])
10
+ sleep 1
11
+ end
12
+
13
+ it "finds the lead broker for each partition" do
14
+ brokers = Set.new
15
+ 0.upto(2) do |partition|
16
+ pc = PartitionConsumer.consumer_for_partition("test_client",
17
+ ["localhost:9092"],
18
+ "test", partition,
19
+ :earliest_offset)
20
+
21
+ brokers.add("#{pc.host}:#{pc.port}")
22
+ end
23
+ expect(brokers.size).to eq(3)
24
+ end
25
+
26
+ it "consumes from all partitions" do
27
+ @p = Producer.new(["localhost:9092","localhost:9093","localhost:9094"], "test",
28
+ :required_acks => 1)
29
+
30
+ msgs = 24.times.map { |n| "hello_#{n}" }
31
+ msgs.each do |msg|
32
+ @p.send_messages([MessageToSend.new("test", msg)])
33
+ end
34
+
35
+ fetched_messages = []
36
+ 0.upto(2) do |partition|
37
+ pc = PartitionConsumer.consumer_for_partition("test_client",
38
+ ["localhost:9092"],
39
+ "test", partition,
40
+ :earliest_offset)
41
+ fetched_messages.push(*pc.fetch)
42
+ end
43
+ expect(fetched_messages.map(&:value).sort).to eq(msgs.sort)
44
+ end
45
+ end