fluent-plugin-kafka 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b1253b7f59126580e11cc6eb1d6f40318f9367bd
4
- data.tar.gz: dc75b9c99e96455add399e4645dacf5f26358810
3
+ metadata.gz: 9cad342ed5984ebc424872a660379b6e471a135d
4
+ data.tar.gz: 73ea1a4a10f3584f719c45582d54f52b67b52c72
5
5
  SHA512:
6
- metadata.gz: de391f877d6db11e0ccbd1a35646043274ef5aeab91015ff09981571fc356d24965289c93f599887d12958549114ef6441a4bce4b856728e8e00ca7dc4f1c011
7
- data.tar.gz: 8c95e61c6617890c7be10dae81f963b4cdb5257829b8e5618cf4c5dfcda618456429a08d8347bc088520174c0bb63ed674528f41d887e3aa3793334ec9f978f4
6
+ metadata.gz: 82ff85d1bc2353109a05092f8cda28237bb4b24cec7af7cce539c55c8d4125e3a9a96e540e43ad33bf2dfe63c1120585b4c3ba7430d70812b973894a92cfc79b
7
+ data.tar.gz: ff574d372eee407d78e199809551bd1ac397c611b018b665416b0c8812cfe3f442b48e75aa320e66933b72a166172d9c09cc0fca913e5141c202d18a0cdd794a
data/README.md CHANGED
@@ -35,7 +35,11 @@ Or install it yourself as:
35
35
 
36
36
  <match *.**>
37
37
  type kafka
38
- brokers <broker1_host>:<broker1_ip>,<broker2_host>:<broker2_ip>,..
38
+
39
+ # Brokers: you can choose either brokers or zookeeper.
40
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
41
+ zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
42
+
39
43
  default_topic <output topic>
40
44
  output_data_type (json|ltsv|msgpack|attr:<record name>)
41
45
  output_include_tag (true|false) :default => false
@@ -57,7 +61,11 @@ See also [Poseidon::Producer](http://www.rubydoc.info/github/bpot/poseidon/Posei
57
61
 
58
62
  <match *.**>
59
63
  type kafka_buffered
60
- brokers <broker1_host>:<broker1_ip>,<broker2_host>:<broker2_ip>,..
64
+
65
+ # Brokers: you can choose either brokers or zookeeper.
66
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
67
+ zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
68
+
61
69
  default_topic <output topic>
62
70
  flush_interval <flush interval (sec) :default => 60>
63
71
  buffer_type (file|memory)
@@ -12,10 +12,11 @@ Gem::Specification.new do |gem|
12
12
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
13
13
  gem.name = "fluent-plugin-kafka"
14
14
  gem.require_paths = ["lib"]
15
- gem.version = '0.0.8'
15
+ gem.version = '0.0.9'
16
16
  gem.add_dependency 'fluentd'
17
17
  gem.add_dependency 'poseidon'
18
18
  gem.add_dependency 'ltsv'
19
19
  gem.add_dependency 'yajl-ruby'
20
20
  gem.add_dependency 'msgpack'
21
+ gem.add_dependency 'zookeeper'
21
22
  end
@@ -7,6 +7,7 @@ class Fluent::KafkaOutput < Fluent::Output
7
7
  end
8
8
 
9
9
  config_param :brokers, :string, :default => 'localhost:9092'
10
+ config_param :zookeeper, :string, :default => nil
10
11
  config_param :default_topic, :string, :default => nil
11
12
  config_param :default_partition, :integer, :default => 0
12
13
  config_param :client_id, :string, :default => 'kafka'
@@ -24,7 +25,20 @@ class Fluent::KafkaOutput < Fluent::Output
24
25
 
25
26
  def configure(conf)
26
27
  super
27
- @seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
28
+ if @zookeeper
29
+ require 'zookeeper'
30
+ require 'yajl'
31
+ @seed_brokers = []
32
+ z = Zookeeper.new(@zookeeper)
33
+ z.get_children(:path => '/brokers/ids')[:children].each do |id|
34
+ broker = Yajl.load(z.get(:path => "/brokers/ids/#{id}")[:data])
35
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
36
+ end
37
+ log.info "brokers has been set via Zookeeper: #{@seed_brokers}"
38
+ else
39
+ @seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
40
+ log.info "brokers has been set directly: #{@seed_brokers}"
41
+ end
28
42
  @producers = {} # keyed by topic:partition
29
43
  case @output_data_type
30
44
  when 'json'
@@ -8,12 +8,14 @@ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
8
8
  end
9
9
 
10
10
  config_param :brokers, :string, :default => 'localhost:9092'
11
+ config_param :zookeeper, :string, :default => nil
11
12
  config_param :default_topic, :string, :default => nil
12
13
  config_param :default_partition, :integer, :default => 0
13
14
  config_param :client_id, :string, :default => 'kafka'
14
15
  config_param :output_data_type, :string, :default => 'json'
15
16
  config_param :output_include_tag, :bool, :default => false
16
17
  config_param :output_include_time, :bool, :default => false
18
+ config_param :kafka_agg_max_bytes, :size, :default => 4*1024 #4k
17
19
 
18
20
  # poseidon producer options
19
21
  config_param :max_send_retries, :integer, :default => 3
@@ -23,10 +25,27 @@ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
23
25
  attr_accessor :output_data_type
24
26
  attr_accessor :field_separator
25
27
 
28
+ unless method_defined?(:log)
29
+ define_method("log") { $log }
30
+ end
31
+
26
32
  def configure(conf)
27
33
  super
28
- @seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
29
- @producers = {} # keyed by topic:partition
34
+ if @zookeeper
35
+ require 'zookeeper'
36
+ require 'yajl'
37
+ @seed_brokers = []
38
+ z = Zookeeper.new(@zookeeper)
39
+ z.get_children(:path => '/brokers/ids')[:children].each do |id|
40
+ broker = Yajl.load(z.get(:path => "/brokers/ids/#{id}")[:data])
41
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
42
+ end
43
+ log.info "brokers has been set via Zookeeper: #{@seed_brokers}"
44
+ else
45
+ @seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
46
+ log.info "brokers has been set directly: #{@seed_brokers}"
47
+ end
48
+
30
49
  case @output_data_type
31
50
  when 'json'
32
51
  require 'yajl'
@@ -58,6 +77,8 @@ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
58
77
 
59
78
  def start
60
79
  super
80
+ @producer = Poseidon::Producer.new(@seed_brokers, @client_id, :max_send_retries => @max_send_retries, :required_acks => @required_acks, :ack_timeout_ms => @ack_timeout_ms)
81
+ log.info "initialized producer #{@client_id}"
61
82
  end
62
83
 
63
84
  def shutdown
@@ -91,26 +112,34 @@ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
91
112
 
92
113
  def write(chunk)
93
114
  records_by_topic = {}
115
+ bytes_by_topic = {}
116
+ messages = []
117
+ messages_bytes = 0
94
118
  chunk.msgpack_each { |tag, time, record|
95
119
  record['time'] = time if @output_include_time
96
120
  record['tag'] = tag if @output_include_tag
97
- topic = record['topic'] || self.default_topic || tag
98
- partition = record['partition'] || self.default_partition
99
- message = Poseidon::MessageToSend.new(topic, parse_record(record))
100
- records_by_topic[topic] ||= []
101
- records_by_topic[topic][partition] ||= []
102
- records_by_topic[topic][partition] << message
103
- }
104
- publish(records_by_topic)
105
- end
121
+ topic = record['topic'] || @default_topic || tag
106
122
 
107
- def publish(records_by_topic)
108
- records_by_topic.each { |topic, partitions|
109
- partitions.each_with_index { |messages, partition|
110
- next if not messages
111
- @producers[topic] ||= Poseidon::Producer.new(@seed_brokers, self.client_id, :max_send_retries => @max_send_retries, :required_acks => @required_acks, :ack_timeout_ms => @ack_timeout_ms)
112
- @producers[topic].send_messages(messages)
113
- }
123
+ records_by_topic[topic] ||= 0
124
+ bytes_by_topic[topic] ||= 0
125
+
126
+ record_buf = parse_record(record)
127
+ record_buf_bytes = record_buf.bytesize
128
+ if messages.length > 0 and messages_bytes + record_buf_bytes > @kafka_agg_max_bytes
129
+ @producer.send_messages(messages)
130
+ messages = []
131
+ messages_bytes = 0
132
+ end
133
+ messages << Poseidon::MessageToSend.new(topic, record_buf)
134
+ messages_bytes += record_buf_bytes
135
+
136
+ records_by_topic[topic] += 1
137
+ bytes_by_topic[topic] += record_buf_bytes
114
138
  }
139
+ if messages.length > 0
140
+ @producer.send_messages(messages)
141
+ end
142
+ log.debug "(records|bytes) (#{records_by_topic}|#{bytes_by_topic})"
115
143
  end
144
+
116
145
  end
metadata CHANGED
@@ -1,83 +1,97 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-15 00:00:00.000000000 Z
11
+ date: 2015-01-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: poseidon
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: ltsv
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: yajl-ruby
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: msgpack
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - '>='
73
+ - - ">="
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: zookeeper
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
81
95
  - !ruby/object:Gem::Version
82
96
  version: '0'
83
97
  description: Fluentd plugin for Apache Kafka > 0.8
@@ -106,17 +120,17 @@ require_paths:
106
120
  - lib
107
121
  required_ruby_version: !ruby/object:Gem::Requirement
108
122
  requirements:
109
- - - '>='
123
+ - - ">="
110
124
  - !ruby/object:Gem::Version
111
125
  version: '0'
112
126
  required_rubygems_version: !ruby/object:Gem::Requirement
113
127
  requirements:
114
- - - '>='
128
+ - - ">="
115
129
  - !ruby/object:Gem::Version
116
130
  version: '0'
117
131
  requirements: []
118
132
  rubyforge_project:
119
- rubygems_version: 2.0.14
133
+ rubygems_version: 2.2.2
120
134
  signing_key:
121
135
  specification_version: 4
122
136
  summary: Fluentd plugin for Apache Kafka > 0.8