fluent-plugin-kafka 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b1253b7f59126580e11cc6eb1d6f40318f9367bd
4
- data.tar.gz: dc75b9c99e96455add399e4645dacf5f26358810
3
+ metadata.gz: 9cad342ed5984ebc424872a660379b6e471a135d
4
+ data.tar.gz: 73ea1a4a10f3584f719c45582d54f52b67b52c72
5
5
  SHA512:
6
- metadata.gz: de391f877d6db11e0ccbd1a35646043274ef5aeab91015ff09981571fc356d24965289c93f599887d12958549114ef6441a4bce4b856728e8e00ca7dc4f1c011
7
- data.tar.gz: 8c95e61c6617890c7be10dae81f963b4cdb5257829b8e5618cf4c5dfcda618456429a08d8347bc088520174c0bb63ed674528f41d887e3aa3793334ec9f978f4
6
+ metadata.gz: 82ff85d1bc2353109a05092f8cda28237bb4b24cec7af7cce539c55c8d4125e3a9a96e540e43ad33bf2dfe63c1120585b4c3ba7430d70812b973894a92cfc79b
7
+ data.tar.gz: ff574d372eee407d78e199809551bd1ac397c611b018b665416b0c8812cfe3f442b48e75aa320e66933b72a166172d9c09cc0fca913e5141c202d18a0cdd794a
data/README.md CHANGED
@@ -35,7 +35,11 @@ Or install it yourself as:
35
35
 
36
36
  <match *.**>
37
37
  type kafka
38
- brokers <broker1_host>:<broker1_ip>,<broker2_host>:<broker2_ip>,..
38
+
39
+ # Brokers: you can choose either brokers or zookeeper.
40
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
41
+ zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
42
+
39
43
  default_topic <output topic>
40
44
  output_data_type (json|ltsv|msgpack|attr:<record name>)
41
45
  output_include_tag (true|false) :default => false
@@ -57,7 +61,11 @@ See also [Poseidon::Producer](http://www.rubydoc.info/github/bpot/poseidon/Posei
57
61
 
58
62
  <match *.**>
59
63
  type kafka_buffered
60
- brokers <broker1_host>:<broker1_ip>,<broker2_host>:<broker2_ip>,..
64
+
65
+ # Brokers: you can choose either brokers or zookeeper.
66
+ brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
67
+ zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
68
+
61
69
  default_topic <output topic>
62
70
  flush_interval <flush interval (sec) :default => 60>
63
71
  buffer_type (file|memory)
@@ -12,10 +12,11 @@ Gem::Specification.new do |gem|
12
12
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
13
13
  gem.name = "fluent-plugin-kafka"
14
14
  gem.require_paths = ["lib"]
15
- gem.version = '0.0.8'
15
+ gem.version = '0.0.9'
16
16
  gem.add_dependency 'fluentd'
17
17
  gem.add_dependency 'poseidon'
18
18
  gem.add_dependency 'ltsv'
19
19
  gem.add_dependency 'yajl-ruby'
20
20
  gem.add_dependency 'msgpack'
21
+ gem.add_dependency 'zookeeper'
21
22
  end
@@ -7,6 +7,7 @@ class Fluent::KafkaOutput < Fluent::Output
7
7
  end
8
8
 
9
9
  config_param :brokers, :string, :default => 'localhost:9092'
10
+ config_param :zookeeper, :string, :default => nil
10
11
  config_param :default_topic, :string, :default => nil
11
12
  config_param :default_partition, :integer, :default => 0
12
13
  config_param :client_id, :string, :default => 'kafka'
@@ -24,7 +25,20 @@ class Fluent::KafkaOutput < Fluent::Output
24
25
 
25
26
  def configure(conf)
26
27
  super
27
- @seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
28
+ if @zookeeper
29
+ require 'zookeeper'
30
+ require 'yajl'
31
+ @seed_brokers = []
32
+ z = Zookeeper.new(@zookeeper)
33
+ z.get_children(:path => '/brokers/ids')[:children].each do |id|
34
+ broker = Yajl.load(z.get(:path => "/brokers/ids/#{id}")[:data])
35
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
36
+ end
37
+ log.info "brokers has been set via Zookeeper: #{@seed_brokers}"
38
+ else
39
+ @seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
40
+ log.info "brokers has been set directly: #{@seed_brokers}"
41
+ end
28
42
  @producers = {} # keyed by topic:partition
29
43
  case @output_data_type
30
44
  when 'json'
@@ -8,12 +8,14 @@ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
8
8
  end
9
9
 
10
10
  config_param :brokers, :string, :default => 'localhost:9092'
11
+ config_param :zookeeper, :string, :default => nil
11
12
  config_param :default_topic, :string, :default => nil
12
13
  config_param :default_partition, :integer, :default => 0
13
14
  config_param :client_id, :string, :default => 'kafka'
14
15
  config_param :output_data_type, :string, :default => 'json'
15
16
  config_param :output_include_tag, :bool, :default => false
16
17
  config_param :output_include_time, :bool, :default => false
18
+ config_param :kafka_agg_max_bytes, :size, :default => 4*1024 #4k
17
19
 
18
20
  # poseidon producer options
19
21
  config_param :max_send_retries, :integer, :default => 3
@@ -23,10 +25,27 @@ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
23
25
  attr_accessor :output_data_type
24
26
  attr_accessor :field_separator
25
27
 
28
+ unless method_defined?(:log)
29
+ define_method("log") { $log }
30
+ end
31
+
26
32
  def configure(conf)
27
33
  super
28
- @seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
29
- @producers = {} # keyed by topic:partition
34
+ if @zookeeper
35
+ require 'zookeeper'
36
+ require 'yajl'
37
+ @seed_brokers = []
38
+ z = Zookeeper.new(@zookeeper)
39
+ z.get_children(:path => '/brokers/ids')[:children].each do |id|
40
+ broker = Yajl.load(z.get(:path => "/brokers/ids/#{id}")[:data])
41
+ @seed_brokers.push("#{broker['host']}:#{broker['port']}")
42
+ end
43
+ log.info "brokers has been set via Zookeeper: #{@seed_brokers}"
44
+ else
45
+ @seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
46
+ log.info "brokers has been set directly: #{@seed_brokers}"
47
+ end
48
+
30
49
  case @output_data_type
31
50
  when 'json'
32
51
  require 'yajl'
@@ -58,6 +77,8 @@ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
58
77
 
59
78
  def start
60
79
  super
80
+ @producer = Poseidon::Producer.new(@seed_brokers, @client_id, :max_send_retries => @max_send_retries, :required_acks => @required_acks, :ack_timeout_ms => @ack_timeout_ms)
81
+ log.info "initialized producer #{@client_id}"
61
82
  end
62
83
 
63
84
  def shutdown
@@ -91,26 +112,34 @@ class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
91
112
 
92
113
  def write(chunk)
93
114
  records_by_topic = {}
115
+ bytes_by_topic = {}
116
+ messages = []
117
+ messages_bytes = 0
94
118
  chunk.msgpack_each { |tag, time, record|
95
119
  record['time'] = time if @output_include_time
96
120
  record['tag'] = tag if @output_include_tag
97
- topic = record['topic'] || self.default_topic || tag
98
- partition = record['partition'] || self.default_partition
99
- message = Poseidon::MessageToSend.new(topic, parse_record(record))
100
- records_by_topic[topic] ||= []
101
- records_by_topic[topic][partition] ||= []
102
- records_by_topic[topic][partition] << message
103
- }
104
- publish(records_by_topic)
105
- end
121
+ topic = record['topic'] || @default_topic || tag
106
122
 
107
- def publish(records_by_topic)
108
- records_by_topic.each { |topic, partitions|
109
- partitions.each_with_index { |messages, partition|
110
- next if not messages
111
- @producers[topic] ||= Poseidon::Producer.new(@seed_brokers, self.client_id, :max_send_retries => @max_send_retries, :required_acks => @required_acks, :ack_timeout_ms => @ack_timeout_ms)
112
- @producers[topic].send_messages(messages)
113
- }
123
+ records_by_topic[topic] ||= 0
124
+ bytes_by_topic[topic] ||= 0
125
+
126
+ record_buf = parse_record(record)
127
+ record_buf_bytes = record_buf.bytesize
128
+ if messages.length > 0 and messages_bytes + record_buf_bytes > @kafka_agg_max_bytes
129
+ @producer.send_messages(messages)
130
+ messages = []
131
+ messages_bytes = 0
132
+ end
133
+ messages << Poseidon::MessageToSend.new(topic, record_buf)
134
+ messages_bytes += record_buf_bytes
135
+
136
+ records_by_topic[topic] += 1
137
+ bytes_by_topic[topic] += record_buf_bytes
114
138
  }
139
+ if messages.length > 0
140
+ @producer.send_messages(messages)
141
+ end
142
+ log.debug "(records|bytes) (#{records_by_topic}|#{bytes_by_topic})"
115
143
  end
144
+
116
145
  end
metadata CHANGED
@@ -1,83 +1,97 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidemasa Togashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-15 00:00:00.000000000 Z
11
+ date: 2015-01-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: poseidon
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: ltsv
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: yajl-ruby
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: msgpack
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - '>='
73
+ - - ">="
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: zookeeper
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
81
95
  - !ruby/object:Gem::Version
82
96
  version: '0'
83
97
  description: Fluentd plugin for Apache Kafka > 0.8
@@ -106,17 +120,17 @@ require_paths:
106
120
  - lib
107
121
  required_ruby_version: !ruby/object:Gem::Requirement
108
122
  requirements:
109
- - - '>='
123
+ - - ">="
110
124
  - !ruby/object:Gem::Version
111
125
  version: '0'
112
126
  required_rubygems_version: !ruby/object:Gem::Requirement
113
127
  requirements:
114
- - - '>='
128
+ - - ">="
115
129
  - !ruby/object:Gem::Version
116
130
  version: '0'
117
131
  requirements: []
118
132
  rubyforge_project:
119
- rubygems_version: 2.0.14
133
+ rubygems_version: 2.2.2
120
134
  signing_key:
121
135
  specification_version: 4
122
136
  summary: Fluentd plugin for Apache Kafka > 0.8