fluent-plugin-kafka-zendesk 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +196 -0
- data/Rakefile +11 -0
- data/fluent-plugin-kafka-0.1.4.gem +0 -0
- data/fluent-plugin-kafka.gemspec +22 -0
- data/lib/fluent/plugin/in_kafka.rb +280 -0
- data/lib/fluent/plugin/in_kafka_group.rb +176 -0
- data/lib/fluent/plugin/out_kafka.rb +165 -0
- data/lib/fluent/plugin/out_kafka_buffered.rb +187 -0
- data/test/helper.rb +27 -0
- data/test/plugin/test_out_kafka.rb +33 -0
- metadata +141 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 37289c076743d4a6760c53274a86604647975e04
|
4
|
+
data.tar.gz: 8a1317287c6d1e4ac03c34f8aa032d1b143320f4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 28c1cdaf406bef12651ca162f5b3638f7f98eb77c9251848c064a9738d7674b6048d79d96b0e1a2cabe14b22935c1d457e646c4a1f31a586a349843ee058b7d0
|
7
|
+
data.tar.gz: 5e6bbd8cba77e7a9bbe090a36e65f852a6cf46212b585c001854a3260d7a1924e05c5e3d0a2804bce861a2f647f7ce7a69e89d2eaff36ab8b27610bde3cac900
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 htgc
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
# Fluent::Plugin::Kafka
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
TODO: Also, I need to write tests
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
gem 'fluent-plugin-kafka'
|
11
|
+
|
12
|
+
And then execute:
|
13
|
+
|
14
|
+
$ bundle
|
15
|
+
|
16
|
+
Or install it yourself as:
|
17
|
+
|
18
|
+
$ gem install fluent-plugin-kafka
|
19
|
+
|
20
|
+
## Usage
|
21
|
+
|
22
|
+
### Input plugin (@type 'kafka')
|
23
|
+
|
24
|
+
<source>
|
25
|
+
@type kafka
|
26
|
+
host <broker host>
|
27
|
+
port <broker port: default=9092>
|
28
|
+
topics <listening topics(separate with comma',')>
|
29
|
+
format <input text type (text|json|ltsv|msgpack)>
|
30
|
+
message_key <key (Optional, for text format only, default is message)>
|
31
|
+
add_prefix <tag prefix (Optional)>
|
32
|
+
add_suffix <tag suffix (Optional)>
|
33
|
+
max_bytes (integer) :default => nil (Use default of Poseidon)
|
34
|
+
max_wait_ms (integer) :default => nil (Use default of Poseidon)
|
35
|
+
min_bytes (integer) :default => nil (Use default of Poseidon)
|
36
|
+
socket_timeout_ms (integer) :default => nil (Use default of Poseidon)
|
37
|
+
</source>
|
38
|
+
|
39
|
+
Supports following Poseidon::PartitionConsumer options.
|
40
|
+
|
41
|
+
- max_bytes — default: 1048576 (1MB) — Maximum number of bytes to fetch
|
42
|
+
- max_wait_ms — default: 100 (100ms) — How long to block until the server sends us data.
|
43
|
+
- min_bytes — default: 1 (Send us data as soon as it is ready) — Smallest amount of data the server should send us.
|
44
|
+
- socket_timeout_ms - default: 10000 (10s) - How long to wait for reply from server. Should be higher than max_wait_ms.
|
45
|
+
|
46
|
+
Supports a start of processing from the assigned offset for specific topics.
|
47
|
+
|
48
|
+
<source>
|
49
|
+
@type kafka
|
50
|
+
host <broker host>
|
51
|
+
port <broker port: default=9092>
|
52
|
+
format <input text type (text|json|ltsv|msgpack)>
|
53
|
+
<topic>
|
54
|
+
topic <listening topic>
|
55
|
+
partition <listening partition: default=0>
|
56
|
+
offset <listening start offset: default=-1>
|
57
|
+
</topic>
|
58
|
+
<topic>
|
59
|
+
topic <listening topic>
|
60
|
+
partition <listening partition: default=0>
|
61
|
+
offset <listening start offset: default=-1>
|
62
|
+
</topic>
|
63
|
+
</source>
|
64
|
+
|
65
|
+
See also [Poseidon::PartitionConsumer](http://www.rubydoc.info/github/bpot/poseidon/Poseidon/PartitionConsumer) for more detailed documentation about Poseidon.
|
66
|
+
|
67
|
+
### Input plugin (@type 'kafka_group', supports kafka group)
|
68
|
+
|
69
|
+
<source>
|
70
|
+
@type kafka_group
|
71
|
+
brokers <list of broker-host:port, separate with comma, must set>
|
72
|
+
zookeepers <list of broker-host:port, separate with comma, must set>
|
73
|
+
zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for brokers
|
74
|
+
consumer_group <consumer group name, must set>
|
75
|
+
topics <listening topics(separate with comma',')>
|
76
|
+
format <input text type (text|json|ltsv|msgpack)>
|
77
|
+
message_key <key (Optional, for text format only, default is message)>
|
78
|
+
add_prefix <tag prefix (Optional)>
|
79
|
+
add_suffix <tag suffix (Optional)>
|
80
|
+
max_bytes (integer) :default => nil (Use default of Poseidon)
|
81
|
+
max_wait_ms (integer) :default => nil (Use default of Poseidon)
|
82
|
+
min_bytes (integer) :default => nil (Use default of Poseidon)
|
83
|
+
socket_timeout_ms (integer) :default => nil (Use default of Poseidon)
|
84
|
+
</source>
|
85
|
+
|
86
|
+
Supports following Poseidon::PartitionConsumer options.
|
87
|
+
|
88
|
+
- max_bytes — default: 1048576 (1MB) — Maximum number of bytes to fetch
|
89
|
+
- max_wait_ms — default: 100 (100ms) — How long to block until the server sends us data.
|
90
|
+
- min_bytes — default: 1 (Send us data as soon as it is ready) — Smallest amount of data the server should send us.
|
91
|
+
- socket_timeout_ms - default: 10000 (10s) - How long to wait for reply from server. Should be higher than max_wait_ms.
|
92
|
+
|
93
|
+
See also [Poseidon::PartitionConsumer](http://www.rubydoc.info/github/bpot/poseidon/Poseidon/PartitionConsumer) for more detailed documentation about Poseidon.
|
94
|
+
|
95
|
+
### Output plugin (non-buffered)
|
96
|
+
|
97
|
+
<match *.**>
|
98
|
+
@type kafka
|
99
|
+
|
100
|
+
# Brokers: you can choose either brokers or zookeeper.
|
101
|
+
brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
|
102
|
+
zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
|
103
|
+
zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for kafka
|
104
|
+
default_topic <output topic>
|
105
|
+
default_partition_key (string) :default => nil
|
106
|
+
output_data_type (json|ltsv|msgpack|attr:<record name>|<formatter name>)
|
107
|
+
output_include_tag (true|false) :default => false
|
108
|
+
output_include_time (true|false) :default => false
|
109
|
+
max_send_retries (integer) :default => 3
|
110
|
+
required_acks (integer) :default => 0
|
111
|
+
ack_timeout_ms (integer) :default => 1500
|
112
|
+
compression_codec (none|gzip|snappy) :default => none
|
113
|
+
</match>
|
114
|
+
|
115
|
+
Supports following Poseidon::Producer options.
|
116
|
+
|
117
|
+
- max_send_retries — default: 3 — Number of times to retry sending of messages to a leader.
|
118
|
+
- required_acks — default: 0 — The number of acks required per request.
|
119
|
+
- ack_timeout_ms — default: 1500 — How long the producer waits for acks.
|
120
|
+
- compression_codec - default: none - The codec the producer uses to compress messages.
|
121
|
+
|
122
|
+
See also [Poseidon::Producer](http://www.rubydoc.info/github/bpot/poseidon/Poseidon/Producer) for more detailed documentation about Poseidon.
|
123
|
+
|
124
|
+
This plugin supports compression codec "snappy" also.
|
125
|
+
Install snappy module before you use snappy compression.
|
126
|
+
|
127
|
+
$ gem install snappy
|
128
|
+
|
129
|
+
#### Load balancing
|
130
|
+
|
131
|
+
Messages will be sent broker in a round-robin manner as default by Poseidon, but you can set `default_partition_key` in config file to route messages to a specific broker.
|
132
|
+
If key name `partition_key` exists in a message, this plugin set its value of partition_key as key.
|
133
|
+
|
134
|
+
|default_partition_key|partition_key| behavior |
|
135
|
+
|-|-|
|
136
|
+
|Not set|Not exists| All messages are sent in round-robin |
|
137
|
+
|Set| Not exists| All messages are sent to specific broker |
|
138
|
+
|Not set| Exists | Messages which have partition_key record are sent to specific broker, others are sent in round-robin|
|
139
|
+
|Set| Exists | Messages which have partition_key record are sent to specific broker with parition_key, others are sent to specific broker with default_parition_key|
|
140
|
+
|
141
|
+
|
142
|
+
### Buffered output plugin
|
143
|
+
|
144
|
+
<match *.**>
|
145
|
+
@type kafka_buffered
|
146
|
+
|
147
|
+
# Brokers: you can choose either brokers or zookeeper.
|
148
|
+
brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
|
149
|
+
zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
|
150
|
+
zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for kafka
|
151
|
+
default_topic <output topic>
|
152
|
+
default_partition_key (string) :default => nil
|
153
|
+
flush_interval <flush interval (sec) :default => 60>
|
154
|
+
buffer_type (file|memory)
|
155
|
+
output_data_type (json|ltsv|msgpack|attr:<record name>|<formatter name>)
|
156
|
+
output_include_tag (true|false) :default => false
|
157
|
+
output_include_time (true|false) :default => false
|
158
|
+
max_send_retries (integer) :default => 3
|
159
|
+
required_acks (integer) :default => 0
|
160
|
+
ack_timeout_ms (integer) :default => 1500
|
161
|
+
compression_codec (none|gzip|snappy) :default => none
|
162
|
+
</match>
|
163
|
+
|
164
|
+
Supports following Poseidon::Producer options.
|
165
|
+
|
166
|
+
- max_send_retries — default: 3 — Number of times to retry sending of messages to a leader.
|
167
|
+
- required_acks — default: 0 — The number of acks required per request.
|
168
|
+
- ack_timeout_ms — default: 1500 — How long the producer waits for acks.
|
169
|
+
- compression_codec - default: none - The codec the producer uses to compress messages.
|
170
|
+
|
171
|
+
See also [Poseidon::Producer](http://www.rubydoc.info/github/bpot/poseidon/Poseidon/Producer) for more detailed documentation about Poseidon.
|
172
|
+
|
173
|
+
This plugin supports compression codec "snappy" also.
|
174
|
+
Install snappy module before you use snappy compression.
|
175
|
+
|
176
|
+
$ gem install snappy
|
177
|
+
|
178
|
+
#### Load balancing
|
179
|
+
|
180
|
+
Messages will be sent broker in a round-robin manner as default by Poseidon, but you can set `default_partition_key` in config file to route messages to a specific broker.
|
181
|
+
If key name `partition_key` exists in a message, this plugin set its value of partition_key as key.
|
182
|
+
|
183
|
+
|default_partition_key|partition_key| behavior |
|
184
|
+
|-|-|
|
185
|
+
|Not set|Not exists| All messages are sent in round-robin |
|
186
|
+
|Set| Not exists| All messages are sent to specific broker |
|
187
|
+
|Not set| Exists | Messages which have partition_key record are sent to specific broker, others are sent in round-robin|
|
188
|
+
|Set| Exists | Messages which have partition_key record are sent to specific broker with parition_key, others are sent to specific broker with default_parition_key|
|
189
|
+
|
190
|
+
## Contributing
|
191
|
+
|
192
|
+
1. Fork it
|
193
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
194
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
195
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
196
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
Binary file
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.authors = ["Hidemasa Togashi"]
|
5
|
+
gem.email = ["togachiro@gmail.com"]
|
6
|
+
gem.description = %q{Fluentd plugin for Apache Kafka > 0.8}
|
7
|
+
gem.summary = %q{Fluentd plugin for Apache Kafka > 0.8}
|
8
|
+
gem.homepage = "https://github.com/khouse/fluent-plugin-kafka"
|
9
|
+
|
10
|
+
gem.files = `git ls-files`.split($\)
|
11
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
12
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
13
|
+
gem.name = "fluent-plugin-kafka-zendesk"
|
14
|
+
gem.require_paths = ["lib"]
|
15
|
+
gem.version = '0.1.4'
|
16
|
+
gem.add_dependency 'fluentd'
|
17
|
+
gem.add_dependency 'poseidon_cluster'
|
18
|
+
gem.add_dependency 'ltsv'
|
19
|
+
gem.add_dependency 'yajl-ruby'
|
20
|
+
gem.add_dependency 'msgpack'
|
21
|
+
gem.add_dependency 'zookeeper'
|
22
|
+
end
|
@@ -0,0 +1,280 @@
|
|
1
|
+
module Fluent
|
2
|
+
|
3
|
+
class KafkaInput < Input
|
4
|
+
Plugin.register_input('kafka', self)
|
5
|
+
|
6
|
+
config_param :format, :string, :default => 'json',
|
7
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
8
|
+
config_param :message_key, :string, :default => 'message',
|
9
|
+
:desc => "For 'text' format only."
|
10
|
+
config_param :host, :string, :default => 'localhost',
|
11
|
+
:desc => "Broker host"
|
12
|
+
config_param :port, :integer, :default => 9092,
|
13
|
+
:desc => "Broker port"
|
14
|
+
config_param :interval, :integer, :default => 1, # seconds
|
15
|
+
:desc => "Interval (Unit: seconds)"
|
16
|
+
config_param :topics, :string, :default => nil,
|
17
|
+
:desc => "Listening topics(separate with comma',')"
|
18
|
+
config_param :client_id, :string, :default => 'kafka'
|
19
|
+
config_param :partition, :integer, :default => 0,
|
20
|
+
:desc => "Listening partition"
|
21
|
+
config_param :offset, :integer, :default => -1,
|
22
|
+
:desc => "Listening start offset"
|
23
|
+
config_param :add_prefix, :string, :default => nil,
|
24
|
+
:desc => "Tag prefix"
|
25
|
+
config_param :add_suffix, :string, :default => nil,
|
26
|
+
:desc => "tag suffix"
|
27
|
+
config_param :add_offset_in_record, :bool, :default => false
|
28
|
+
|
29
|
+
config_param :offset_zookeeper, :string, :default => nil
|
30
|
+
config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
|
31
|
+
|
32
|
+
# poseidon PartitionConsumer options
|
33
|
+
config_param :max_bytes, :integer, :default => nil,
|
34
|
+
:desc => "Maximum number of bytes to fetch."
|
35
|
+
config_param :max_wait_ms, :integer, :default => nil,
|
36
|
+
:desc => "How long to block until the server sends us data."
|
37
|
+
config_param :min_bytes, :integer, :default => nil,
|
38
|
+
:desc => "Smallest amount of data the server should send us."
|
39
|
+
config_param :socket_timeout_ms, :integer, :default => nil,
|
40
|
+
:desc => "How long to wait for reply from server. Should be higher than max_wait_ms."
|
41
|
+
|
42
|
+
unless method_defined?(:router)
|
43
|
+
define_method("router") { Fluent::Engine }
|
44
|
+
end
|
45
|
+
|
46
|
+
def initialize
|
47
|
+
super
|
48
|
+
require 'poseidon'
|
49
|
+
require 'zookeeper'
|
50
|
+
end
|
51
|
+
|
52
|
+
def configure(conf)
|
53
|
+
super
|
54
|
+
|
55
|
+
@topic_list = []
|
56
|
+
if @topics
|
57
|
+
@topic_list = @topics.split(',').map { |topic|
|
58
|
+
TopicEntry.new(topic.strip, @partition, @offset)
|
59
|
+
}
|
60
|
+
else
|
61
|
+
conf.elements.select { |element| element.name == 'topic' }.each do |element|
|
62
|
+
unless element.has_key?('topic')
|
63
|
+
raise ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
|
64
|
+
end
|
65
|
+
partition = element.has_key?('partition') ? element['partition'].to_i : 0
|
66
|
+
offset = element.has_key?('offset') ? element['offset'].to_i : -1
|
67
|
+
@topic_list.push(TopicEntry.new(element['topic'], partition, offset))
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
if @topic_list.empty?
|
72
|
+
raise ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
|
73
|
+
end
|
74
|
+
|
75
|
+
case @format
|
76
|
+
when 'json'
|
77
|
+
require 'yajl'
|
78
|
+
when 'ltsv'
|
79
|
+
require 'ltsv'
|
80
|
+
when 'msgpack'
|
81
|
+
require 'msgpack'
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def start
|
86
|
+
@loop = Coolio::Loop.new
|
87
|
+
opt = {}
|
88
|
+
opt[:max_bytes] = @max_bytes if @max_bytes
|
89
|
+
opt[:max_wait_ms] = @max_wait_ms if @max_wait_ms
|
90
|
+
opt[:min_bytes] = @min_bytes if @min_bytes
|
91
|
+
opt[:socket_timeout_ms] = @socket_timeout_ms if @socket_timeout_ms
|
92
|
+
|
93
|
+
@zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
|
94
|
+
|
95
|
+
@topic_watchers = @topic_list.map {|topic_entry|
|
96
|
+
offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
|
97
|
+
TopicWatcher.new(
|
98
|
+
topic_entry,
|
99
|
+
@host,
|
100
|
+
@port,
|
101
|
+
@client_id,
|
102
|
+
interval,
|
103
|
+
@format,
|
104
|
+
@message_key,
|
105
|
+
@add_offset_in_record,
|
106
|
+
@add_prefix,
|
107
|
+
@add_suffix,
|
108
|
+
offset_manager,
|
109
|
+
router,
|
110
|
+
opt)
|
111
|
+
}
|
112
|
+
@topic_watchers.each {|tw|
|
113
|
+
tw.attach(@loop)
|
114
|
+
}
|
115
|
+
@thread = Thread.new(&method(:run))
|
116
|
+
end
|
117
|
+
|
118
|
+
def shutdown
|
119
|
+
@loop.stop
|
120
|
+
@zookeeper.close! if @zookeeper
|
121
|
+
end
|
122
|
+
|
123
|
+
def run
|
124
|
+
@loop.run
|
125
|
+
rescue
|
126
|
+
$log.error "unexpected error", :error=>$!.to_s
|
127
|
+
$log.error_backtrace
|
128
|
+
end
|
129
|
+
|
130
|
+
class TopicWatcher < Coolio::TimerWatcher
|
131
|
+
def initialize(topic_entry, host, port, client_id, interval, format, message_key, add_offset_in_record, add_prefix, add_suffix, offset_manager, router, options={})
|
132
|
+
@topic_entry = topic_entry
|
133
|
+
@host = host
|
134
|
+
@port = port
|
135
|
+
@client_id = client_id
|
136
|
+
@callback = method(:consume)
|
137
|
+
@format = format
|
138
|
+
@message_key = message_key
|
139
|
+
@add_offset_in_record = add_offset_in_record
|
140
|
+
@add_prefix = add_prefix
|
141
|
+
@add_suffix = add_suffix
|
142
|
+
@options = options
|
143
|
+
@offset_manager = offset_manager
|
144
|
+
@router = router
|
145
|
+
|
146
|
+
@next_offset = @topic_entry.offset
|
147
|
+
if @topic_entry.offset == -1 && offset_manager
|
148
|
+
@next_offset = offset_manager.next_offset
|
149
|
+
end
|
150
|
+
@consumer = create_consumer(@next_offset)
|
151
|
+
|
152
|
+
super(interval, true)
|
153
|
+
end
|
154
|
+
|
155
|
+
def on_timer
|
156
|
+
@callback.call
|
157
|
+
rescue
|
158
|
+
# TODO log?
|
159
|
+
$log.error $!.to_s
|
160
|
+
$log.error_backtrace
|
161
|
+
end
|
162
|
+
|
163
|
+
def consume
|
164
|
+
es = MultiEventStream.new
|
165
|
+
tag = @topic_entry.topic
|
166
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
167
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
168
|
+
|
169
|
+
if @offset_manager && @consumer.next_offset != @next_offset
|
170
|
+
@consumer = create_consumer(@next_offset)
|
171
|
+
end
|
172
|
+
|
173
|
+
@consumer.fetch.each { |msg|
|
174
|
+
begin
|
175
|
+
msg_record = parse_line(msg.value)
|
176
|
+
msg_record = decorate_offset(msg_record, msg.offset) if @add_offset_in_record
|
177
|
+
es.add(Engine.now, msg_record)
|
178
|
+
rescue
|
179
|
+
$log.warn msg_record.to_s, :error=>$!.to_s
|
180
|
+
$log.debug_backtrace
|
181
|
+
end
|
182
|
+
}
|
183
|
+
|
184
|
+
unless es.empty?
|
185
|
+
@router.emit_stream(tag, es)
|
186
|
+
|
187
|
+
if @offset_manager
|
188
|
+
next_offset = @consumer.next_offset
|
189
|
+
@offset_manager.save_offset(next_offset)
|
190
|
+
@next_offset = next_offset
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def create_consumer(offset)
|
196
|
+
@consumer.close if @consumer
|
197
|
+
Poseidon::PartitionConsumer.new(
|
198
|
+
@client_id, # client_id
|
199
|
+
@host, # host
|
200
|
+
@port, # port
|
201
|
+
@topic_entry.topic, # topic
|
202
|
+
@topic_entry.partition, # partition
|
203
|
+
offset, # offset
|
204
|
+
@options # options
|
205
|
+
)
|
206
|
+
end
|
207
|
+
|
208
|
+
def parse_line(record)
|
209
|
+
case @format
|
210
|
+
when 'json'
|
211
|
+
Yajl::Parser.parse(record)
|
212
|
+
when 'ltsv'
|
213
|
+
LTSV.parse(record)
|
214
|
+
when 'msgpack'
|
215
|
+
MessagePack.unpack(record)
|
216
|
+
when 'text'
|
217
|
+
{@message_key => record}
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def decorate_offset(record, offset)
|
222
|
+
case @format
|
223
|
+
when 'json'
|
224
|
+
add_offset_in_hash(record, @topic_entry.topic, @topic_entry.partition, offset)
|
225
|
+
when 'ltsv'
|
226
|
+
record.each { |line|
|
227
|
+
add_offset_in_hash(line, @topic_entry.topic, @topic_entry.partition, offset)
|
228
|
+
}
|
229
|
+
when 'msgpack'
|
230
|
+
add_offset_in_hash(record, @topic_entry.topic, @topic_entry.partition, offset)
|
231
|
+
when 'text'
|
232
|
+
add_offset_in_hash(record, @topic_entry.topic, @topic_entry.partition, offset)
|
233
|
+
end
|
234
|
+
record
|
235
|
+
end
|
236
|
+
|
237
|
+
def add_offset_in_hash(hash, topic, partition, offset)
|
238
|
+
hash['kafka_topic'] = topic
|
239
|
+
hash['kafka_partition'] = partition
|
240
|
+
hash['kafka_offset'] = offset
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
class TopicEntry
|
245
|
+
def initialize(topic, partition, offset)
|
246
|
+
@topic = topic
|
247
|
+
@partition = partition
|
248
|
+
@offset = offset
|
249
|
+
end
|
250
|
+
attr_reader :topic, :partition, :offset
|
251
|
+
end
|
252
|
+
|
253
|
+
class OffsetManager
|
254
|
+
def initialize(topic_entry, zookeeper, zk_root_node)
|
255
|
+
@zookeeper = zookeeper
|
256
|
+
@zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
|
257
|
+
create_node(@zk_path, topic_entry.topic, topic_entry.partition)
|
258
|
+
end
|
259
|
+
|
260
|
+
def create_node(zk_path, topic, partition)
|
261
|
+
path = ""
|
262
|
+
zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
|
263
|
+
path = path + dir
|
264
|
+
@zookeeper.create(:path => "#{path}")
|
265
|
+
}
|
266
|
+
$log.trace "use zk offset node : #{path}"
|
267
|
+
end
|
268
|
+
|
269
|
+
def next_offset
|
270
|
+
@zookeeper.get(:path => @zk_path)[:data].to_i
|
271
|
+
end
|
272
|
+
|
273
|
+
def save_offset(offset)
|
274
|
+
@zookeeper.set(:path => @zk_path, :data => offset.to_s)
|
275
|
+
$log.trace "update zk offset node : #{offset.to_s}"
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
module Fluent
|
2
|
+
|
3
|
+
class KafkaGroupInput < Input
|
4
|
+
Plugin.register_input('kafka_group', self)
|
5
|
+
|
6
|
+
config_param :brokers, :string,
|
7
|
+
:desc => "List of broker-host:port, separate with comma, must set."
|
8
|
+
config_param :zookeepers, :string,
|
9
|
+
:desc => "List of broker-host:port, separate with comma, must set."
|
10
|
+
config_param :consumer_group, :string, :default => nil,
|
11
|
+
:desc => "Consumer group name, must set."
|
12
|
+
config_param :topics, :string,
|
13
|
+
:desc => "Listening topics(separate with comma',')."
|
14
|
+
config_param :interval, :integer, :default => 1, # seconds
|
15
|
+
:desc => "Interval (Unit: seconds)"
|
16
|
+
config_param :format, :string, :default => 'json',
|
17
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
18
|
+
config_param :message_key, :string, :default => 'message',
|
19
|
+
:desc => "For 'text' format only."
|
20
|
+
config_param :add_prefix, :string, :default => nil,
|
21
|
+
:desc => "Tag prefix (Optional)"
|
22
|
+
config_param :add_suffix, :string, :default => nil,
|
23
|
+
:desc => "Tag suffix (Optional)"
|
24
|
+
|
25
|
+
# poseidon PartitionConsumer options
|
26
|
+
config_param :max_bytes, :integer, :default => nil,
|
27
|
+
:desc => "Maximum number of bytes to fetch."
|
28
|
+
config_param :max_wait_ms, :integer, :default => nil,
|
29
|
+
:desc => "How long to block until the server sends us data."
|
30
|
+
config_param :min_bytes, :integer, :default => nil,
|
31
|
+
:desc => "Smallest amount of data the server should send us."
|
32
|
+
config_param :socket_timeout_ms, :integer, :default => nil,
|
33
|
+
:desc => "How long to wait for reply from server. Should be higher than max_wait_ms."
|
34
|
+
|
35
|
+
unless method_defined?(:router)
|
36
|
+
define_method("router") { Fluent::Engine }
|
37
|
+
end
|
38
|
+
|
39
|
+
def initialize
|
40
|
+
super
|
41
|
+
require 'poseidon_cluster'
|
42
|
+
end
|
43
|
+
|
44
|
+
def _config_to_array(config)
|
45
|
+
config_array = config.split(',').map {|k| k.strip }
|
46
|
+
if config_array.empty?
|
47
|
+
raise ConfigError, "kafka_group: '#{config}' is a required parameter"
|
48
|
+
end
|
49
|
+
config_array
|
50
|
+
end
|
51
|
+
|
52
|
+
private :_config_to_array
|
53
|
+
|
54
|
+
def configure(conf)
|
55
|
+
super
|
56
|
+
@broker_list = _config_to_array(@brokers)
|
57
|
+
@zookeeper_list = _config_to_array(@zookeepers)
|
58
|
+
@topic_list = _config_to_array(@topics)
|
59
|
+
|
60
|
+
unless @consumer_group
|
61
|
+
raise ConfigError, "kafka_group: 'consumer_group' is a required parameter"
|
62
|
+
end
|
63
|
+
$log.info "Will watch for topics #{@topic_list} at brokers " \
|
64
|
+
"#{@broker_list}, zookeepers #{@zookeeper_list} and group " \
|
65
|
+
"'#{@consumer_group}'"
|
66
|
+
|
67
|
+
case @format
|
68
|
+
when 'json'
|
69
|
+
require 'yajl'
|
70
|
+
when 'ltsv'
|
71
|
+
require 'ltsv'
|
72
|
+
when 'msgpack'
|
73
|
+
require 'msgpack'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def start
|
78
|
+
@loop = Coolio::Loop.new
|
79
|
+
opt = {}
|
80
|
+
opt[:max_bytes] = @max_bytes if @max_bytes
|
81
|
+
opt[:max_wait_ms] = @max_wait_ms if @max_wait_ms
|
82
|
+
opt[:min_bytes] = @min_bytes if @min_bytes
|
83
|
+
opt[:socket_timeout_ms] = @socket_timeout_ms if @socket_timeout_ms
|
84
|
+
|
85
|
+
@topic_watchers = @topic_list.map {|topic|
|
86
|
+
TopicWatcher.new(topic, @broker_list, @zookeeper_list, @consumer_group,
|
87
|
+
interval, @format, @message_key, @add_prefix,
|
88
|
+
@add_suffix, router, opt)
|
89
|
+
}
|
90
|
+
@topic_watchers.each {|tw|
|
91
|
+
tw.attach(@loop)
|
92
|
+
}
|
93
|
+
@thread = Thread.new(&method(:run))
|
94
|
+
end
|
95
|
+
|
96
|
+
def shutdown
|
97
|
+
@loop.stop
|
98
|
+
end
|
99
|
+
|
100
|
+
def run
|
101
|
+
@loop.run
|
102
|
+
rescue
|
103
|
+
$log.error "unexpected error", :error=>$!.to_s
|
104
|
+
$log.error_backtrace
|
105
|
+
end
|
106
|
+
|
107
|
+
class TopicWatcher < Coolio::TimerWatcher
|
108
|
+
def initialize(topic, broker_list, zookeeper_list, consumer_group,
|
109
|
+
interval, format, message_key, add_prefix, add_suffix,
|
110
|
+
router, options)
|
111
|
+
@topic = topic
|
112
|
+
@callback = method(:consume)
|
113
|
+
@format = format
|
114
|
+
@message_key = message_key
|
115
|
+
@add_prefix = add_prefix
|
116
|
+
@add_suffix = add_suffix
|
117
|
+
@router = router
|
118
|
+
|
119
|
+
@consumer = Poseidon::ConsumerGroup.new(
|
120
|
+
consumer_group,
|
121
|
+
broker_list,
|
122
|
+
zookeeper_list,
|
123
|
+
topic,
|
124
|
+
options
|
125
|
+
)
|
126
|
+
|
127
|
+
super(interval, true)
|
128
|
+
end
|
129
|
+
|
130
|
+
def on_timer
|
131
|
+
@callback.call
|
132
|
+
rescue
|
133
|
+
# TODO log?
|
134
|
+
$log.error $!.to_s
|
135
|
+
$log.error_backtrace
|
136
|
+
end
|
137
|
+
|
138
|
+
def consume
|
139
|
+
es = MultiEventStream.new
|
140
|
+
tag = @topic
|
141
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
142
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
143
|
+
|
144
|
+
@consumer.fetch do |partition, bulk|
|
145
|
+
bulk.each do |msg|
|
146
|
+
begin
|
147
|
+
msg_record = parse_line(msg.value)
|
148
|
+
es.add(Engine.now, msg_record)
|
149
|
+
rescue
|
150
|
+
$log.warn msg_record.to_s, :error=>$!.to_s
|
151
|
+
$log.debug_backtrace
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
unless es.empty?
|
157
|
+
@router.emit_stream(tag, es)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def parse_line(record)
|
162
|
+
case @format
|
163
|
+
when 'json'
|
164
|
+
Yajl::Parser.parse(record)
|
165
|
+
when 'ltsv'
|
166
|
+
LTSV.parse(record)
|
167
|
+
when 'msgpack'
|
168
|
+
MessagePack.unpack(record)
|
169
|
+
when 'text'
|
170
|
+
{@message_key => record}
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
class Fluent::KafkaOutput < Fluent::Output
|
2
|
+
Fluent::Plugin.register_output('kafka', self)
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
super
|
6
|
+
require 'poseidon'
|
7
|
+
end
|
8
|
+
|
9
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
10
|
+
:desc => <<-DESC
|
11
|
+
Set brokers directly
|
12
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
13
|
+
Note that you can choose to use either brokers or zookeeper.
|
14
|
+
DESC
|
15
|
+
config_param :zookeeper, :string, :default => nil,
|
16
|
+
:desc => "Set brokers via Zookeeper: <zookeeper_host>:<zookeeper_port>"
|
17
|
+
config_param :zookeeper_path, :string, :default => '/brokers/ids',
|
18
|
+
:desc => "Path in path for Broker id. Default to /brokers/ids"
|
19
|
+
config_param :default_topic, :string, :default => nil,
|
20
|
+
:desc => "Output topic."
|
21
|
+
config_param :default_partition_key, :string, :default => nil
|
22
|
+
config_param :client_id, :string, :default => 'kafka'
|
23
|
+
config_param :output_data_type, :string, :default => 'json',
|
24
|
+
:desc => "Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)"
|
25
|
+
config_param :output_include_tag, :bool, :default => false
|
26
|
+
config_param :output_include_time, :bool, :default => false
|
27
|
+
|
28
|
+
# poseidon producer options
|
29
|
+
config_param :max_send_retries, :integer, :default => 3,
|
30
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
31
|
+
config_param :required_acks, :integer, :default => 0,
|
32
|
+
:desc => "The number of acks required per request."
|
33
|
+
config_param :ack_timeout_ms, :integer, :default => 1500,
|
34
|
+
:desc => "How long the producer waits for acks."
|
35
|
+
config_param :compression_codec, :string, :default => 'none',
|
36
|
+
:desc => "The codec the producer uses to compress messages."
|
37
|
+
|
38
|
+
attr_accessor :output_data_type
|
39
|
+
attr_accessor :field_separator
|
40
|
+
|
41
|
+
@seed_brokers = []
|
42
|
+
|
43
|
+
unless method_defined?(:log)
|
44
|
+
define_method("log") { $log }
|
45
|
+
end
|
46
|
+
|
47
|
+
def refresh_producer()
|
48
|
+
if @zookeeper
|
49
|
+
@seed_brokers = []
|
50
|
+
z = Zookeeper.new(@zookeeper)
|
51
|
+
z.get_children(:path => @zookeeper_path)[:children].each do |id|
|
52
|
+
broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
|
53
|
+
@seed_brokers.push("#{broker['host']}:#{broker['port']}")
|
54
|
+
end
|
55
|
+
z.close
|
56
|
+
log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
|
57
|
+
end
|
58
|
+
begin
|
59
|
+
if @seed_brokers.length > 0
|
60
|
+
@producer = Poseidon::Producer.new(@seed_brokers, @client_id, :max_send_retries => @max_send_retries, :required_acks => @required_acks, :ack_timeout_ms => @ack_timeout_ms, :compression_codec => @compression_codec.to_sym)
|
61
|
+
log.info "initialized producer #{@client_id}"
|
62
|
+
else
|
63
|
+
log.warn "No brokers found on Zookeeper"
|
64
|
+
end
|
65
|
+
rescue Exception => e
|
66
|
+
log.error e
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def configure(conf)
|
71
|
+
super
|
72
|
+
if @zookeeper
|
73
|
+
require 'zookeeper'
|
74
|
+
require 'yajl'
|
75
|
+
else
|
76
|
+
@seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
|
77
|
+
log.info "brokers has been set directly: #{@seed_brokers}"
|
78
|
+
end
|
79
|
+
if @compression_codec == 'snappy'
|
80
|
+
require 'snappy'
|
81
|
+
end
|
82
|
+
case @output_data_type
|
83
|
+
when 'json'
|
84
|
+
require 'yajl'
|
85
|
+
when 'ltsv'
|
86
|
+
require 'ltsv'
|
87
|
+
when 'msgpack'
|
88
|
+
require 'msgpack'
|
89
|
+
end
|
90
|
+
|
91
|
+
@f_separator = case @field_separator
|
92
|
+
when /SPACE/i then ' '
|
93
|
+
when /COMMA/i then ','
|
94
|
+
when /SOH/i then "\x01"
|
95
|
+
else "\t"
|
96
|
+
end
|
97
|
+
|
98
|
+
@custom_attributes = if @output_data_type == 'json'
|
99
|
+
nil
|
100
|
+
elsif @output_data_type == 'ltsv'
|
101
|
+
nil
|
102
|
+
elsif @output_data_type == 'msgpack'
|
103
|
+
nil
|
104
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
105
|
+
$1.split(',').map(&:strip).reject(&:empty?)
|
106
|
+
else
|
107
|
+
@formatter = Fluent::Plugin.new_formatter(@output_data_type)
|
108
|
+
@formatter.configure(conf)
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
|
114
|
+
def start
|
115
|
+
super
|
116
|
+
refresh_producer()
|
117
|
+
end
|
118
|
+
|
119
|
+
def shutdown
|
120
|
+
super
|
121
|
+
end
|
122
|
+
|
123
|
+
def parse_record(record)
|
124
|
+
if @custom_attributes.nil?
|
125
|
+
case @output_data_type
|
126
|
+
when 'json'
|
127
|
+
Yajl::Encoder.encode(record)
|
128
|
+
when 'ltsv'
|
129
|
+
LTSV.dump(record)
|
130
|
+
when 'msgpack'
|
131
|
+
record.to_msgpack
|
132
|
+
else
|
133
|
+
record.to_s
|
134
|
+
end
|
135
|
+
else
|
136
|
+
@custom_attributes.unshift('time') if @output_include_time
|
137
|
+
@custom_attributes.unshift('tag') if @output_include_tag
|
138
|
+
@custom_attributes.map { |attr|
|
139
|
+
record[attr].nil? ? '' : record[attr].to_s
|
140
|
+
}.join(@f_separator)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def emit(tag, es, chain)
|
145
|
+
begin
|
146
|
+
chain.next
|
147
|
+
es.each do |time,record|
|
148
|
+
record['time'] = time if @output_include_time
|
149
|
+
record['tag'] = tag if @output_include_tag
|
150
|
+
topic = record['topic'] || self.default_topic || tag
|
151
|
+
partition_key = record['partition_key'] || @default_partition_key
|
152
|
+
value = @formatter.nil? ? parse_record(record) : @formatter.format(tag, time, record)
|
153
|
+
log.trace("message send to #{topic} with key: #{partition_key} and value: #{value}.")
|
154
|
+
message = Poseidon::MessageToSend.new(topic, value, partition_key)
|
155
|
+
@producer.send_messages([message])
|
156
|
+
end
|
157
|
+
rescue Exception => e
|
158
|
+
log.warn("Send exception occurred: #{e}")
|
159
|
+
@producer.close if @producer
|
160
|
+
refresh_producer()
|
161
|
+
raise e
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
@@ -0,0 +1,187 @@
|
|
1
|
+
# encode: utf-8
|
2
|
+
class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
|
3
|
+
Fluent::Plugin.register_output('kafka_buffered', self)
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
super
|
7
|
+
require 'poseidon'
|
8
|
+
end
|
9
|
+
|
10
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
11
|
+
:desc => <<-DESC
|
12
|
+
Set brokers directly:
|
13
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
14
|
+
Brokers: you can choose to use either brokers or zookeeper.
|
15
|
+
DESC
|
16
|
+
config_param :zookeeper, :string, :default => nil,
|
17
|
+
:desc => <<-DESC
|
18
|
+
Set brokers via Zookeeper:
|
19
|
+
<zookeeper_host>:<zookeeper_port>
|
20
|
+
DESC
|
21
|
+
config_param :zookeeper_path, :string, :default => '/brokers/ids',
|
22
|
+
:desc => "Path in path for Broker id. Default to /brokers/ids"
|
23
|
+
config_param :default_topic, :string, :default => nil,
|
24
|
+
:desc => "Output topic"
|
25
|
+
config_param :default_partition_key, :string, :default => nil
|
26
|
+
config_param :client_id, :string, :default => 'kafka'
|
27
|
+
config_param :output_data_type, :string, :default => 'json',
|
28
|
+
:desc => <<-DESC
|
29
|
+
Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
|
30
|
+
DESC
|
31
|
+
config_param :output_include_tag, :bool, :default => false
|
32
|
+
config_param :output_include_time, :bool, :default => false
|
33
|
+
config_param :kafka_agg_max_bytes, :size, :default => 4*1024 #4k
|
34
|
+
|
35
|
+
# poseidon producer options
|
36
|
+
config_param :max_send_retries, :integer, :default => 3,
|
37
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
38
|
+
config_param :required_acks, :integer, :default => 0,
|
39
|
+
:desc => "The number of acks required per request."
|
40
|
+
config_param :ack_timeout_ms, :integer, :default => 1500,
|
41
|
+
:desc => "How long the producer waits for acks."
|
42
|
+
config_param :compression_codec, :string, :default => 'none',
|
43
|
+
:desc => <<-DESC
|
44
|
+
The codec the producer uses to compress messages.
|
45
|
+
Supported codecs: (none|gzip|snappy)
|
46
|
+
DESC
|
47
|
+
|
48
|
+
attr_accessor :output_data_type
|
49
|
+
attr_accessor :field_separator
|
50
|
+
|
51
|
+
unless method_defined?(:log)
|
52
|
+
define_method("log") { $log }
|
53
|
+
end
|
54
|
+
|
55
|
+
@seed_brokers = []
|
56
|
+
|
57
|
+
def refresh_producer()
|
58
|
+
if @zookeeper
|
59
|
+
@seed_brokers = []
|
60
|
+
z = Zookeeper.new(@zookeeper)
|
61
|
+
z.get_children(:path => @zookeeper_path)[:children].each do |id|
|
62
|
+
broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
|
63
|
+
@seed_brokers.push("#{broker['host']}:#{broker['port']}")
|
64
|
+
end
|
65
|
+
z.close
|
66
|
+
log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
|
67
|
+
end
|
68
|
+
begin
|
69
|
+
if @seed_brokers.length > 0
|
70
|
+
@producer = Poseidon::Producer.new(@seed_brokers, @client_id, :max_send_retries => @max_send_retries, :required_acks => @required_acks, :ack_timeout_ms => @ack_timeout_ms, :compression_codec => @compression_codec.to_sym)
|
71
|
+
log.info "initialized producer #{@client_id}"
|
72
|
+
else
|
73
|
+
log.warn "No brokers found on Zookeeper"
|
74
|
+
end
|
75
|
+
rescue Exception => e
|
76
|
+
log.error e
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def configure(conf)
|
81
|
+
super
|
82
|
+
if @zookeeper
|
83
|
+
require 'zookeeper'
|
84
|
+
require 'yajl'
|
85
|
+
else
|
86
|
+
@seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
|
87
|
+
log.info "brokers has been set directly: #{@seed_brokers}"
|
88
|
+
end
|
89
|
+
if @compression_codec == 'snappy'
|
90
|
+
require 'snappy'
|
91
|
+
end
|
92
|
+
|
93
|
+
@f_separator = case @field_separator
|
94
|
+
when /SPACE/i then ' '
|
95
|
+
when /COMMA/i then ','
|
96
|
+
when /SOH/i then "\x01"
|
97
|
+
else "\t"
|
98
|
+
end
|
99
|
+
|
100
|
+
@formatter_proc = setup_formatter(conf)
|
101
|
+
end
|
102
|
+
|
103
|
+
def start
|
104
|
+
super
|
105
|
+
refresh_producer()
|
106
|
+
end
|
107
|
+
|
108
|
+
def shutdown
|
109
|
+
super
|
110
|
+
end
|
111
|
+
|
112
|
+
def format(tag, time, record)
|
113
|
+
[tag, time, record].to_msgpack
|
114
|
+
end
|
115
|
+
|
116
|
+
def setup_formatter(conf)
|
117
|
+
if @output_data_type == 'json'
|
118
|
+
require 'yajl'
|
119
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
120
|
+
elsif @output_data_type == 'ltsv'
|
121
|
+
require 'ltsv'
|
122
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
123
|
+
elsif @output_data_type == 'msgpack'
|
124
|
+
require 'msgpack'
|
125
|
+
Proc.new { |tag, time, record| record.to_msgpack }
|
126
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
127
|
+
@custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
|
128
|
+
@custom_attributes.unshift('time') if @output_include_time
|
129
|
+
@custom_attributes.unshift('tag') if @output_include_tag
|
130
|
+
Proc.new { |tag, time, record|
|
131
|
+
@custom_attributes.map { |attr|
|
132
|
+
record[attr].nil? ? '' : record[attr].to_s
|
133
|
+
}.join(@f_separator)
|
134
|
+
}
|
135
|
+
else
|
136
|
+
@formatter = Fluent::Plugin.new_formatter(@output_data_type)
|
137
|
+
@formatter.configure(conf)
|
138
|
+
Proc.new { |tag, time, record|
|
139
|
+
@formatter.format(tag, time, record)
|
140
|
+
}
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def write(chunk)
|
145
|
+
records_by_topic = {}
|
146
|
+
bytes_by_topic = {}
|
147
|
+
messages = []
|
148
|
+
messages_bytes = 0
|
149
|
+
begin
|
150
|
+
chunk.msgpack_each { |tag, time, record|
|
151
|
+
record['time'] = time if @output_include_time
|
152
|
+
record['tag'] = tag if @output_include_tag
|
153
|
+
topic = record['topic'] || @default_topic || tag
|
154
|
+
partition_key = record['partition_key'] || @default_partition_key
|
155
|
+
|
156
|
+
records_by_topic[topic] ||= 0
|
157
|
+
bytes_by_topic[topic] ||= 0
|
158
|
+
|
159
|
+
record_buf = @formatter_proc.call(tag, time, record)
|
160
|
+
record_buf_bytes = record_buf.bytesize
|
161
|
+
if messages.length > 0 and messages_bytes + record_buf_bytes > @kafka_agg_max_bytes
|
162
|
+
log.on_trace { log.trace("#{messages.length} messages send.") }
|
163
|
+
@producer.send_messages(messages)
|
164
|
+
messages = []
|
165
|
+
messages_bytes = 0
|
166
|
+
end
|
167
|
+
log.on_trace { log.trace("message will send to #{topic} with key: #{partition_key} and value: #{record_buf}.") }
|
168
|
+
messages << Poseidon::MessageToSend.new(topic, record_buf, partition_key)
|
169
|
+
messages_bytes += record_buf_bytes
|
170
|
+
|
171
|
+
records_by_topic[topic] += 1
|
172
|
+
bytes_by_topic[topic] += record_buf_bytes
|
173
|
+
}
|
174
|
+
if messages.length > 0
|
175
|
+
log.trace("#{messages.length} messages send.")
|
176
|
+
@producer.send_messages(messages)
|
177
|
+
end
|
178
|
+
log.debug "(records|bytes) (#{records_by_topic}|#{bytes_by_topic})"
|
179
|
+
end
|
180
|
+
rescue Exception => e
|
181
|
+
log.warn "Send exception occurred: #{e}"
|
182
|
+
@producer.close if @producer
|
183
|
+
refresh_producer()
|
184
|
+
# Raise exception to retry sendind messages
|
185
|
+
raise e
|
186
|
+
end
|
187
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'fluent/test'
|
15
|
+
unless ENV.has_key?('VERBOSE')
|
16
|
+
nulllogger = Object.new
|
17
|
+
nulllogger.instance_eval {|obj|
|
18
|
+
def method_missing(method, *args)
|
19
|
+
end
|
20
|
+
}
|
21
|
+
$log = nulllogger
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'fluent/plugin/out_kafka'
|
25
|
+
|
26
|
+
class Test::Unit::TestCase
|
27
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class KafkaOutputTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
default_topic kitagawakeiko
|
10
|
+
brokers localhost:9092
|
11
|
+
]
|
12
|
+
|
13
|
+
def create_driver(conf = CONFIG, tag='test')
|
14
|
+
Fluent::Test::BufferedOutputTestDriver.new(Fluent::KafkaOutput, tag).configure(conf)
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_configure
|
18
|
+
d = create_driver
|
19
|
+
assert_equal 'kitagawakeiko', d.instance.default_topic
|
20
|
+
assert_equal 'localhost:9092', d.instance.brokers
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_format
|
24
|
+
d = create_driver
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_write
|
28
|
+
d = create_driver
|
29
|
+
time = Time.parse("2011-01-02 13:14:15 UTC").to_i
|
30
|
+
d.emit({"a"=>1}, time)
|
31
|
+
d.emit({"a"=>2}, time)
|
32
|
+
end
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-kafka-zendesk
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Hidemasa Togashi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-05-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: fluentd
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: poseidon_cluster
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ltsv
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: yajl-ruby
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: msgpack
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: zookeeper
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Fluentd plugin for Apache Kafka > 0.8
|
98
|
+
email:
|
99
|
+
- togachiro@gmail.com
|
100
|
+
executables: []
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- Gemfile
|
105
|
+
- LICENSE
|
106
|
+
- README.md
|
107
|
+
- Rakefile
|
108
|
+
- fluent-plugin-kafka-0.1.4.gem
|
109
|
+
- fluent-plugin-kafka.gemspec
|
110
|
+
- lib/fluent/plugin/in_kafka.rb
|
111
|
+
- lib/fluent/plugin/in_kafka_group.rb
|
112
|
+
- lib/fluent/plugin/out_kafka.rb
|
113
|
+
- lib/fluent/plugin/out_kafka_buffered.rb
|
114
|
+
- test/helper.rb
|
115
|
+
- test/plugin/test_out_kafka.rb
|
116
|
+
homepage: https://github.com/khouse/fluent-plugin-kafka
|
117
|
+
licenses: []
|
118
|
+
metadata: {}
|
119
|
+
post_install_message:
|
120
|
+
rdoc_options: []
|
121
|
+
require_paths:
|
122
|
+
- lib
|
123
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
+
requirements:
|
125
|
+
- - ">="
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: '0'
|
128
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
129
|
+
requirements:
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '0'
|
133
|
+
requirements: []
|
134
|
+
rubyforge_project:
|
135
|
+
rubygems_version: 2.4.5.1
|
136
|
+
signing_key:
|
137
|
+
specification_version: 4
|
138
|
+
summary: Fluentd plugin for Apache Kafka > 0.8
|
139
|
+
test_files:
|
140
|
+
- test/helper.rb
|
141
|
+
- test/plugin/test_out_kafka.rb
|