fluent-plugin-kafka-zendesk 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +196 -0
- data/Rakefile +11 -0
- data/fluent-plugin-kafka-0.1.4.gem +0 -0
- data/fluent-plugin-kafka.gemspec +22 -0
- data/lib/fluent/plugin/in_kafka.rb +280 -0
- data/lib/fluent/plugin/in_kafka_group.rb +176 -0
- data/lib/fluent/plugin/out_kafka.rb +165 -0
- data/lib/fluent/plugin/out_kafka_buffered.rb +187 -0
- data/test/helper.rb +27 -0
- data/test/plugin/test_out_kafka.rb +33 -0
- metadata +141 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 37289c076743d4a6760c53274a86604647975e04
|
4
|
+
data.tar.gz: 8a1317287c6d1e4ac03c34f8aa032d1b143320f4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 28c1cdaf406bef12651ca162f5b3638f7f98eb77c9251848c064a9738d7674b6048d79d96b0e1a2cabe14b22935c1d457e646c4a1f31a586a349843ee058b7d0
|
7
|
+
data.tar.gz: 5e6bbd8cba77e7a9bbe090a36e65f852a6cf46212b585c001854a3260d7a1924e05c5e3d0a2804bce861a2f647f7ce7a69e89d2eaff36ab8b27610bde3cac900
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 htgc
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
# Fluent::Plugin::Kafka
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
TODO: Also, I need to write tests
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
gem 'fluent-plugin-kafka'
|
11
|
+
|
12
|
+
And then execute:
|
13
|
+
|
14
|
+
$ bundle
|
15
|
+
|
16
|
+
Or install it yourself as:
|
17
|
+
|
18
|
+
$ gem install fluent-plugin-kafka
|
19
|
+
|
20
|
+
## Usage
|
21
|
+
|
22
|
+
### Input plugin (@type 'kafka')
|
23
|
+
|
24
|
+
<source>
|
25
|
+
@type kafka
|
26
|
+
host <broker host>
|
27
|
+
port <broker port: default=9092>
|
28
|
+
topics <listening topics(separate with comma',')>
|
29
|
+
format <input text type (text|json|ltsv|msgpack)>
|
30
|
+
message_key <key (Optional, for text format only, default is message)>
|
31
|
+
add_prefix <tag prefix (Optional)>
|
32
|
+
add_suffix <tag suffix (Optional)>
|
33
|
+
max_bytes (integer) :default => nil (Use default of Poseidon)
|
34
|
+
max_wait_ms (integer) :default => nil (Use default of Poseidon)
|
35
|
+
min_bytes (integer) :default => nil (Use default of Poseidon)
|
36
|
+
socket_timeout_ms (integer) :default => nil (Use default of Poseidon)
|
37
|
+
</source>
|
38
|
+
|
39
|
+
Supports following Poseidon::PartitionConsumer options.
|
40
|
+
|
41
|
+
- max_bytes — default: 1048576 (1MB) — Maximum number of bytes to fetch
|
42
|
+
- max_wait_ms — default: 100 (100ms) — How long to block until the server sends us data.
|
43
|
+
- min_bytes — default: 1 (Send us data as soon as it is ready) — Smallest amount of data the server should send us.
|
44
|
+
- socket_timeout_ms - default: 10000 (10s) - How long to wait for reply from server. Should be higher than max_wait_ms.
|
45
|
+
|
46
|
+
Supports a start of processing from the assigned offset for specific topics.
|
47
|
+
|
48
|
+
<source>
|
49
|
+
@type kafka
|
50
|
+
host <broker host>
|
51
|
+
port <broker port: default=9092>
|
52
|
+
format <input text type (text|json|ltsv|msgpack)>
|
53
|
+
<topic>
|
54
|
+
topic <listening topic>
|
55
|
+
partition <listening partition: default=0>
|
56
|
+
offset <listening start offset: default=-1>
|
57
|
+
</topic>
|
58
|
+
<topic>
|
59
|
+
topic <listening topic>
|
60
|
+
partition <listening partition: default=0>
|
61
|
+
offset <listening start offset: default=-1>
|
62
|
+
</topic>
|
63
|
+
</source>
|
64
|
+
|
65
|
+
See also [Poseidon::PartitionConsumer](http://www.rubydoc.info/github/bpot/poseidon/Poseidon/PartitionConsumer) for more detailed documentation about Poseidon.
|
66
|
+
|
67
|
+
### Input plugin (@type 'kafka_group', supports kafka group)
|
68
|
+
|
69
|
+
<source>
|
70
|
+
@type kafka_group
|
71
|
+
brokers <list of broker-host:port, separate with comma, must set>
|
72
|
+
zookeepers <list of broker-host:port, separate with comma, must set>
|
73
|
+
zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for brokers
|
74
|
+
consumer_group <consumer group name, must set>
|
75
|
+
topics <listening topics(separate with comma',')>
|
76
|
+
format <input text type (text|json|ltsv|msgpack)>
|
77
|
+
message_key <key (Optional, for text format only, default is message)>
|
78
|
+
add_prefix <tag prefix (Optional)>
|
79
|
+
add_suffix <tag suffix (Optional)>
|
80
|
+
max_bytes (integer) :default => nil (Use default of Poseidon)
|
81
|
+
max_wait_ms (integer) :default => nil (Use default of Poseidon)
|
82
|
+
min_bytes (integer) :default => nil (Use default of Poseidon)
|
83
|
+
socket_timeout_ms (integer) :default => nil (Use default of Poseidon)
|
84
|
+
</source>
|
85
|
+
|
86
|
+
Supports following Poseidon::PartitionConsumer options.
|
87
|
+
|
88
|
+
- max_bytes — default: 1048576 (1MB) — Maximum number of bytes to fetch
|
89
|
+
- max_wait_ms — default: 100 (100ms) — How long to block until the server sends us data.
|
90
|
+
- min_bytes — default: 1 (Send us data as soon as it is ready) — Smallest amount of data the server should send us.
|
91
|
+
- socket_timeout_ms - default: 10000 (10s) - How long to wait for reply from server. Should be higher than max_wait_ms.
|
92
|
+
|
93
|
+
See also [Poseidon::PartitionConsumer](http://www.rubydoc.info/github/bpot/poseidon/Poseidon/PartitionConsumer) for more detailed documentation about Poseidon.
|
94
|
+
|
95
|
+
### Output plugin (non-buffered)
|
96
|
+
|
97
|
+
<match *.**>
|
98
|
+
@type kafka
|
99
|
+
|
100
|
+
# Brokers: you can choose either brokers or zookeeper.
|
101
|
+
brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
|
102
|
+
zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
|
103
|
+
zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for kafka
|
104
|
+
default_topic <output topic>
|
105
|
+
default_partition_key (string) :default => nil
|
106
|
+
output_data_type (json|ltsv|msgpack|attr:<record name>|<formatter name>)
|
107
|
+
output_include_tag (true|false) :default => false
|
108
|
+
output_include_time (true|false) :default => false
|
109
|
+
max_send_retries (integer) :default => 3
|
110
|
+
required_acks (integer) :default => 0
|
111
|
+
ack_timeout_ms (integer) :default => 1500
|
112
|
+
compression_codec (none|gzip|snappy) :default => none
|
113
|
+
</match>
|
114
|
+
|
115
|
+
Supports following Poseidon::Producer options.
|
116
|
+
|
117
|
+
- max_send_retries — default: 3 — Number of times to retry sending of messages to a leader.
|
118
|
+
- required_acks — default: 0 — The number of acks required per request.
|
119
|
+
- ack_timeout_ms — default: 1500 — How long the producer waits for acks.
|
120
|
+
- compression_codec - default: none - The codec the producer uses to compress messages.
|
121
|
+
|
122
|
+
See also [Poseidon::Producer](http://www.rubydoc.info/github/bpot/poseidon/Poseidon/Producer) for more detailed documentation about Poseidon.
|
123
|
+
|
124
|
+
This plugin supports compression codec "snappy" also.
|
125
|
+
Install snappy module before you use snappy compression.
|
126
|
+
|
127
|
+
$ gem install snappy
|
128
|
+
|
129
|
+
#### Load balancing
|
130
|
+
|
131
|
+
Messages will be sent broker in a round-robin manner as default by Poseidon, but you can set `default_partition_key` in config file to route messages to a specific broker.
|
132
|
+
If key name `partition_key` exists in a message, this plugin set its value of partition_key as key.
|
133
|
+
|
134
|
+
|default_partition_key|partition_key| behavior |
|
135
|
+
|-|-|
|
136
|
+
|Not set|Not exists| All messages are sent in round-robin |
|
137
|
+
|Set| Not exists| All messages are sent to specific broker |
|
138
|
+
|Not set| Exists | Messages which have partition_key record are sent to specific broker, others are sent in round-robin|
|
139
|
+
|Set| Exists | Messages which have partition_key record are sent to specific broker with parition_key, others are sent to specific broker with default_parition_key|
|
140
|
+
|
141
|
+
|
142
|
+
### Buffered output plugin
|
143
|
+
|
144
|
+
<match *.**>
|
145
|
+
@type kafka_buffered
|
146
|
+
|
147
|
+
# Brokers: you can choose either brokers or zookeeper.
|
148
|
+
brokers <broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,.. # Set brokers directly
|
149
|
+
zookeeper <zookeeper_host>:<zookeeper_port> # Set brokers via Zookeeper
|
150
|
+
zookeeper_path <broker path in zookeeper> :default => /brokers/ids # Set path in zookeeper for kafka
|
151
|
+
default_topic <output topic>
|
152
|
+
default_partition_key (string) :default => nil
|
153
|
+
flush_interval <flush interval (sec) :default => 60>
|
154
|
+
buffer_type (file|memory)
|
155
|
+
output_data_type (json|ltsv|msgpack|attr:<record name>|<formatter name>)
|
156
|
+
output_include_tag (true|false) :default => false
|
157
|
+
output_include_time (true|false) :default => false
|
158
|
+
max_send_retries (integer) :default => 3
|
159
|
+
required_acks (integer) :default => 0
|
160
|
+
ack_timeout_ms (integer) :default => 1500
|
161
|
+
compression_codec (none|gzip|snappy) :default => none
|
162
|
+
</match>
|
163
|
+
|
164
|
+
Supports following Poseidon::Producer options.
|
165
|
+
|
166
|
+
- max_send_retries — default: 3 — Number of times to retry sending of messages to a leader.
|
167
|
+
- required_acks — default: 0 — The number of acks required per request.
|
168
|
+
- ack_timeout_ms — default: 1500 — How long the producer waits for acks.
|
169
|
+
- compression_codec - default: none - The codec the producer uses to compress messages.
|
170
|
+
|
171
|
+
See also [Poseidon::Producer](http://www.rubydoc.info/github/bpot/poseidon/Poseidon/Producer) for more detailed documentation about Poseidon.
|
172
|
+
|
173
|
+
This plugin supports compression codec "snappy" also.
|
174
|
+
Install snappy module before you use snappy compression.
|
175
|
+
|
176
|
+
$ gem install snappy
|
177
|
+
|
178
|
+
#### Load balancing
|
179
|
+
|
180
|
+
Messages will be sent broker in a round-robin manner as default by Poseidon, but you can set `default_partition_key` in config file to route messages to a specific broker.
|
181
|
+
If key name `partition_key` exists in a message, this plugin set its value of partition_key as key.
|
182
|
+
|
183
|
+
|default_partition_key|partition_key| behavior |
|
184
|
+
|-|-|
|
185
|
+
|Not set|Not exists| All messages are sent in round-robin |
|
186
|
+
|Set| Not exists| All messages are sent to specific broker |
|
187
|
+
|Not set| Exists | Messages which have partition_key record are sent to specific broker, others are sent in round-robin|
|
188
|
+
|Set| Exists | Messages which have partition_key record are sent to specific broker with parition_key, others are sent to specific broker with default_parition_key|
|
189
|
+
|
190
|
+
## Contributing
|
191
|
+
|
192
|
+
1. Fork it
|
193
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
194
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
195
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
196
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
Binary file
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.authors = ["Hidemasa Togashi"]
|
5
|
+
gem.email = ["togachiro@gmail.com"]
|
6
|
+
gem.description = %q{Fluentd plugin for Apache Kafka > 0.8}
|
7
|
+
gem.summary = %q{Fluentd plugin for Apache Kafka > 0.8}
|
8
|
+
gem.homepage = "https://github.com/khouse/fluent-plugin-kafka"
|
9
|
+
|
10
|
+
gem.files = `git ls-files`.split($\)
|
11
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
12
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
13
|
+
gem.name = "fluent-plugin-kafka-zendesk"
|
14
|
+
gem.require_paths = ["lib"]
|
15
|
+
gem.version = '0.1.4'
|
16
|
+
gem.add_dependency 'fluentd'
|
17
|
+
gem.add_dependency 'poseidon_cluster'
|
18
|
+
gem.add_dependency 'ltsv'
|
19
|
+
gem.add_dependency 'yajl-ruby'
|
20
|
+
gem.add_dependency 'msgpack'
|
21
|
+
gem.add_dependency 'zookeeper'
|
22
|
+
end
|
@@ -0,0 +1,280 @@
|
|
1
|
+
module Fluent
|
2
|
+
|
3
|
+
class KafkaInput < Input
|
4
|
+
Plugin.register_input('kafka', self)
|
5
|
+
|
6
|
+
config_param :format, :string, :default => 'json',
|
7
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
8
|
+
config_param :message_key, :string, :default => 'message',
|
9
|
+
:desc => "For 'text' format only."
|
10
|
+
config_param :host, :string, :default => 'localhost',
|
11
|
+
:desc => "Broker host"
|
12
|
+
config_param :port, :integer, :default => 9092,
|
13
|
+
:desc => "Broker port"
|
14
|
+
config_param :interval, :integer, :default => 1, # seconds
|
15
|
+
:desc => "Interval (Unit: seconds)"
|
16
|
+
config_param :topics, :string, :default => nil,
|
17
|
+
:desc => "Listening topics(separate with comma',')"
|
18
|
+
config_param :client_id, :string, :default => 'kafka'
|
19
|
+
config_param :partition, :integer, :default => 0,
|
20
|
+
:desc => "Listening partition"
|
21
|
+
config_param :offset, :integer, :default => -1,
|
22
|
+
:desc => "Listening start offset"
|
23
|
+
config_param :add_prefix, :string, :default => nil,
|
24
|
+
:desc => "Tag prefix"
|
25
|
+
config_param :add_suffix, :string, :default => nil,
|
26
|
+
:desc => "tag suffix"
|
27
|
+
config_param :add_offset_in_record, :bool, :default => false
|
28
|
+
|
29
|
+
config_param :offset_zookeeper, :string, :default => nil
|
30
|
+
config_param :offset_zk_root_node, :string, :default => '/fluent-plugin-kafka'
|
31
|
+
|
32
|
+
# poseidon PartitionConsumer options
|
33
|
+
config_param :max_bytes, :integer, :default => nil,
|
34
|
+
:desc => "Maximum number of bytes to fetch."
|
35
|
+
config_param :max_wait_ms, :integer, :default => nil,
|
36
|
+
:desc => "How long to block until the server sends us data."
|
37
|
+
config_param :min_bytes, :integer, :default => nil,
|
38
|
+
:desc => "Smallest amount of data the server should send us."
|
39
|
+
config_param :socket_timeout_ms, :integer, :default => nil,
|
40
|
+
:desc => "How long to wait for reply from server. Should be higher than max_wait_ms."
|
41
|
+
|
42
|
+
unless method_defined?(:router)
|
43
|
+
define_method("router") { Fluent::Engine }
|
44
|
+
end
|
45
|
+
|
46
|
+
def initialize
|
47
|
+
super
|
48
|
+
require 'poseidon'
|
49
|
+
require 'zookeeper'
|
50
|
+
end
|
51
|
+
|
52
|
+
def configure(conf)
|
53
|
+
super
|
54
|
+
|
55
|
+
@topic_list = []
|
56
|
+
if @topics
|
57
|
+
@topic_list = @topics.split(',').map { |topic|
|
58
|
+
TopicEntry.new(topic.strip, @partition, @offset)
|
59
|
+
}
|
60
|
+
else
|
61
|
+
conf.elements.select { |element| element.name == 'topic' }.each do |element|
|
62
|
+
unless element.has_key?('topic')
|
63
|
+
raise ConfigError, "kafka: 'topic' is a require parameter in 'topic element'."
|
64
|
+
end
|
65
|
+
partition = element.has_key?('partition') ? element['partition'].to_i : 0
|
66
|
+
offset = element.has_key?('offset') ? element['offset'].to_i : -1
|
67
|
+
@topic_list.push(TopicEntry.new(element['topic'], partition, offset))
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
if @topic_list.empty?
|
72
|
+
raise ConfigError, "kafka: 'topics' or 'topic element' is a require parameter"
|
73
|
+
end
|
74
|
+
|
75
|
+
case @format
|
76
|
+
when 'json'
|
77
|
+
require 'yajl'
|
78
|
+
when 'ltsv'
|
79
|
+
require 'ltsv'
|
80
|
+
when 'msgpack'
|
81
|
+
require 'msgpack'
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def start
|
86
|
+
@loop = Coolio::Loop.new
|
87
|
+
opt = {}
|
88
|
+
opt[:max_bytes] = @max_bytes if @max_bytes
|
89
|
+
opt[:max_wait_ms] = @max_wait_ms if @max_wait_ms
|
90
|
+
opt[:min_bytes] = @min_bytes if @min_bytes
|
91
|
+
opt[:socket_timeout_ms] = @socket_timeout_ms if @socket_timeout_ms
|
92
|
+
|
93
|
+
@zookeeper = Zookeeper.new(@offset_zookeeper) if @offset_zookeeper
|
94
|
+
|
95
|
+
@topic_watchers = @topic_list.map {|topic_entry|
|
96
|
+
offset_manager = OffsetManager.new(topic_entry, @zookeeper, @offset_zk_root_node) if @offset_zookeeper
|
97
|
+
TopicWatcher.new(
|
98
|
+
topic_entry,
|
99
|
+
@host,
|
100
|
+
@port,
|
101
|
+
@client_id,
|
102
|
+
interval,
|
103
|
+
@format,
|
104
|
+
@message_key,
|
105
|
+
@add_offset_in_record,
|
106
|
+
@add_prefix,
|
107
|
+
@add_suffix,
|
108
|
+
offset_manager,
|
109
|
+
router,
|
110
|
+
opt)
|
111
|
+
}
|
112
|
+
@topic_watchers.each {|tw|
|
113
|
+
tw.attach(@loop)
|
114
|
+
}
|
115
|
+
@thread = Thread.new(&method(:run))
|
116
|
+
end
|
117
|
+
|
118
|
+
def shutdown
|
119
|
+
@loop.stop
|
120
|
+
@zookeeper.close! if @zookeeper
|
121
|
+
end
|
122
|
+
|
123
|
+
def run
|
124
|
+
@loop.run
|
125
|
+
rescue
|
126
|
+
$log.error "unexpected error", :error=>$!.to_s
|
127
|
+
$log.error_backtrace
|
128
|
+
end
|
129
|
+
|
130
|
+
class TopicWatcher < Coolio::TimerWatcher
|
131
|
+
def initialize(topic_entry, host, port, client_id, interval, format, message_key, add_offset_in_record, add_prefix, add_suffix, offset_manager, router, options={})
|
132
|
+
@topic_entry = topic_entry
|
133
|
+
@host = host
|
134
|
+
@port = port
|
135
|
+
@client_id = client_id
|
136
|
+
@callback = method(:consume)
|
137
|
+
@format = format
|
138
|
+
@message_key = message_key
|
139
|
+
@add_offset_in_record = add_offset_in_record
|
140
|
+
@add_prefix = add_prefix
|
141
|
+
@add_suffix = add_suffix
|
142
|
+
@options = options
|
143
|
+
@offset_manager = offset_manager
|
144
|
+
@router = router
|
145
|
+
|
146
|
+
@next_offset = @topic_entry.offset
|
147
|
+
if @topic_entry.offset == -1 && offset_manager
|
148
|
+
@next_offset = offset_manager.next_offset
|
149
|
+
end
|
150
|
+
@consumer = create_consumer(@next_offset)
|
151
|
+
|
152
|
+
super(interval, true)
|
153
|
+
end
|
154
|
+
|
155
|
+
def on_timer
|
156
|
+
@callback.call
|
157
|
+
rescue
|
158
|
+
# TODO log?
|
159
|
+
$log.error $!.to_s
|
160
|
+
$log.error_backtrace
|
161
|
+
end
|
162
|
+
|
163
|
+
def consume
|
164
|
+
es = MultiEventStream.new
|
165
|
+
tag = @topic_entry.topic
|
166
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
167
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
168
|
+
|
169
|
+
if @offset_manager && @consumer.next_offset != @next_offset
|
170
|
+
@consumer = create_consumer(@next_offset)
|
171
|
+
end
|
172
|
+
|
173
|
+
@consumer.fetch.each { |msg|
|
174
|
+
begin
|
175
|
+
msg_record = parse_line(msg.value)
|
176
|
+
msg_record = decorate_offset(msg_record, msg.offset) if @add_offset_in_record
|
177
|
+
es.add(Engine.now, msg_record)
|
178
|
+
rescue
|
179
|
+
$log.warn msg_record.to_s, :error=>$!.to_s
|
180
|
+
$log.debug_backtrace
|
181
|
+
end
|
182
|
+
}
|
183
|
+
|
184
|
+
unless es.empty?
|
185
|
+
@router.emit_stream(tag, es)
|
186
|
+
|
187
|
+
if @offset_manager
|
188
|
+
next_offset = @consumer.next_offset
|
189
|
+
@offset_manager.save_offset(next_offset)
|
190
|
+
@next_offset = next_offset
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def create_consumer(offset)
|
196
|
+
@consumer.close if @consumer
|
197
|
+
Poseidon::PartitionConsumer.new(
|
198
|
+
@client_id, # client_id
|
199
|
+
@host, # host
|
200
|
+
@port, # port
|
201
|
+
@topic_entry.topic, # topic
|
202
|
+
@topic_entry.partition, # partition
|
203
|
+
offset, # offset
|
204
|
+
@options # options
|
205
|
+
)
|
206
|
+
end
|
207
|
+
|
208
|
+
def parse_line(record)
|
209
|
+
case @format
|
210
|
+
when 'json'
|
211
|
+
Yajl::Parser.parse(record)
|
212
|
+
when 'ltsv'
|
213
|
+
LTSV.parse(record)
|
214
|
+
when 'msgpack'
|
215
|
+
MessagePack.unpack(record)
|
216
|
+
when 'text'
|
217
|
+
{@message_key => record}
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def decorate_offset(record, offset)
|
222
|
+
case @format
|
223
|
+
when 'json'
|
224
|
+
add_offset_in_hash(record, @topic_entry.topic, @topic_entry.partition, offset)
|
225
|
+
when 'ltsv'
|
226
|
+
record.each { |line|
|
227
|
+
add_offset_in_hash(line, @topic_entry.topic, @topic_entry.partition, offset)
|
228
|
+
}
|
229
|
+
when 'msgpack'
|
230
|
+
add_offset_in_hash(record, @topic_entry.topic, @topic_entry.partition, offset)
|
231
|
+
when 'text'
|
232
|
+
add_offset_in_hash(record, @topic_entry.topic, @topic_entry.partition, offset)
|
233
|
+
end
|
234
|
+
record
|
235
|
+
end
|
236
|
+
|
237
|
+
def add_offset_in_hash(hash, topic, partition, offset)
|
238
|
+
hash['kafka_topic'] = topic
|
239
|
+
hash['kafka_partition'] = partition
|
240
|
+
hash['kafka_offset'] = offset
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
class TopicEntry
|
245
|
+
def initialize(topic, partition, offset)
|
246
|
+
@topic = topic
|
247
|
+
@partition = partition
|
248
|
+
@offset = offset
|
249
|
+
end
|
250
|
+
attr_reader :topic, :partition, :offset
|
251
|
+
end
|
252
|
+
|
253
|
+
class OffsetManager
|
254
|
+
def initialize(topic_entry, zookeeper, zk_root_node)
|
255
|
+
@zookeeper = zookeeper
|
256
|
+
@zk_path = "#{zk_root_node}/#{topic_entry.topic}/#{topic_entry.partition}/next_offset"
|
257
|
+
create_node(@zk_path, topic_entry.topic, topic_entry.partition)
|
258
|
+
end
|
259
|
+
|
260
|
+
def create_node(zk_path, topic, partition)
|
261
|
+
path = ""
|
262
|
+
zk_path.split(/(\/[^\/]+)/).reject(&:empty?).each { |dir|
|
263
|
+
path = path + dir
|
264
|
+
@zookeeper.create(:path => "#{path}")
|
265
|
+
}
|
266
|
+
$log.trace "use zk offset node : #{path}"
|
267
|
+
end
|
268
|
+
|
269
|
+
def next_offset
|
270
|
+
@zookeeper.get(:path => @zk_path)[:data].to_i
|
271
|
+
end
|
272
|
+
|
273
|
+
def save_offset(offset)
|
274
|
+
@zookeeper.set(:path => @zk_path, :data => offset.to_s)
|
275
|
+
$log.trace "update zk offset node : #{offset.to_s}"
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
module Fluent
|
2
|
+
|
3
|
+
class KafkaGroupInput < Input
|
4
|
+
Plugin.register_input('kafka_group', self)
|
5
|
+
|
6
|
+
config_param :brokers, :string,
|
7
|
+
:desc => "List of broker-host:port, separate with comma, must set."
|
8
|
+
config_param :zookeepers, :string,
|
9
|
+
:desc => "List of broker-host:port, separate with comma, must set."
|
10
|
+
config_param :consumer_group, :string, :default => nil,
|
11
|
+
:desc => "Consumer group name, must set."
|
12
|
+
config_param :topics, :string,
|
13
|
+
:desc => "Listening topics(separate with comma',')."
|
14
|
+
config_param :interval, :integer, :default => 1, # seconds
|
15
|
+
:desc => "Interval (Unit: seconds)"
|
16
|
+
config_param :format, :string, :default => 'json',
|
17
|
+
:desc => "Supported format: (json|text|ltsv|msgpack)"
|
18
|
+
config_param :message_key, :string, :default => 'message',
|
19
|
+
:desc => "For 'text' format only."
|
20
|
+
config_param :add_prefix, :string, :default => nil,
|
21
|
+
:desc => "Tag prefix (Optional)"
|
22
|
+
config_param :add_suffix, :string, :default => nil,
|
23
|
+
:desc => "Tag suffix (Optional)"
|
24
|
+
|
25
|
+
# poseidon PartitionConsumer options
|
26
|
+
config_param :max_bytes, :integer, :default => nil,
|
27
|
+
:desc => "Maximum number of bytes to fetch."
|
28
|
+
config_param :max_wait_ms, :integer, :default => nil,
|
29
|
+
:desc => "How long to block until the server sends us data."
|
30
|
+
config_param :min_bytes, :integer, :default => nil,
|
31
|
+
:desc => "Smallest amount of data the server should send us."
|
32
|
+
config_param :socket_timeout_ms, :integer, :default => nil,
|
33
|
+
:desc => "How long to wait for reply from server. Should be higher than max_wait_ms."
|
34
|
+
|
35
|
+
unless method_defined?(:router)
|
36
|
+
define_method("router") { Fluent::Engine }
|
37
|
+
end
|
38
|
+
|
39
|
+
def initialize
|
40
|
+
super
|
41
|
+
require 'poseidon_cluster'
|
42
|
+
end
|
43
|
+
|
44
|
+
def _config_to_array(config)
|
45
|
+
config_array = config.split(',').map {|k| k.strip }
|
46
|
+
if config_array.empty?
|
47
|
+
raise ConfigError, "kafka_group: '#{config}' is a required parameter"
|
48
|
+
end
|
49
|
+
config_array
|
50
|
+
end
|
51
|
+
|
52
|
+
private :_config_to_array
|
53
|
+
|
54
|
+
def configure(conf)
|
55
|
+
super
|
56
|
+
@broker_list = _config_to_array(@brokers)
|
57
|
+
@zookeeper_list = _config_to_array(@zookeepers)
|
58
|
+
@topic_list = _config_to_array(@topics)
|
59
|
+
|
60
|
+
unless @consumer_group
|
61
|
+
raise ConfigError, "kafka_group: 'consumer_group' is a required parameter"
|
62
|
+
end
|
63
|
+
$log.info "Will watch for topics #{@topic_list} at brokers " \
|
64
|
+
"#{@broker_list}, zookeepers #{@zookeeper_list} and group " \
|
65
|
+
"'#{@consumer_group}'"
|
66
|
+
|
67
|
+
case @format
|
68
|
+
when 'json'
|
69
|
+
require 'yajl'
|
70
|
+
when 'ltsv'
|
71
|
+
require 'ltsv'
|
72
|
+
when 'msgpack'
|
73
|
+
require 'msgpack'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def start
|
78
|
+
@loop = Coolio::Loop.new
|
79
|
+
opt = {}
|
80
|
+
opt[:max_bytes] = @max_bytes if @max_bytes
|
81
|
+
opt[:max_wait_ms] = @max_wait_ms if @max_wait_ms
|
82
|
+
opt[:min_bytes] = @min_bytes if @min_bytes
|
83
|
+
opt[:socket_timeout_ms] = @socket_timeout_ms if @socket_timeout_ms
|
84
|
+
|
85
|
+
@topic_watchers = @topic_list.map {|topic|
|
86
|
+
TopicWatcher.new(topic, @broker_list, @zookeeper_list, @consumer_group,
|
87
|
+
interval, @format, @message_key, @add_prefix,
|
88
|
+
@add_suffix, router, opt)
|
89
|
+
}
|
90
|
+
@topic_watchers.each {|tw|
|
91
|
+
tw.attach(@loop)
|
92
|
+
}
|
93
|
+
@thread = Thread.new(&method(:run))
|
94
|
+
end
|
95
|
+
|
96
|
+
def shutdown
|
97
|
+
@loop.stop
|
98
|
+
end
|
99
|
+
|
100
|
+
def run
|
101
|
+
@loop.run
|
102
|
+
rescue
|
103
|
+
$log.error "unexpected error", :error=>$!.to_s
|
104
|
+
$log.error_backtrace
|
105
|
+
end
|
106
|
+
|
107
|
+
class TopicWatcher < Coolio::TimerWatcher
|
108
|
+
def initialize(topic, broker_list, zookeeper_list, consumer_group,
|
109
|
+
interval, format, message_key, add_prefix, add_suffix,
|
110
|
+
router, options)
|
111
|
+
@topic = topic
|
112
|
+
@callback = method(:consume)
|
113
|
+
@format = format
|
114
|
+
@message_key = message_key
|
115
|
+
@add_prefix = add_prefix
|
116
|
+
@add_suffix = add_suffix
|
117
|
+
@router = router
|
118
|
+
|
119
|
+
@consumer = Poseidon::ConsumerGroup.new(
|
120
|
+
consumer_group,
|
121
|
+
broker_list,
|
122
|
+
zookeeper_list,
|
123
|
+
topic,
|
124
|
+
options
|
125
|
+
)
|
126
|
+
|
127
|
+
super(interval, true)
|
128
|
+
end
|
129
|
+
|
130
|
+
def on_timer
|
131
|
+
@callback.call
|
132
|
+
rescue
|
133
|
+
# TODO log?
|
134
|
+
$log.error $!.to_s
|
135
|
+
$log.error_backtrace
|
136
|
+
end
|
137
|
+
|
138
|
+
def consume
|
139
|
+
es = MultiEventStream.new
|
140
|
+
tag = @topic
|
141
|
+
tag = @add_prefix + "." + tag if @add_prefix
|
142
|
+
tag = tag + "." + @add_suffix if @add_suffix
|
143
|
+
|
144
|
+
@consumer.fetch do |partition, bulk|
|
145
|
+
bulk.each do |msg|
|
146
|
+
begin
|
147
|
+
msg_record = parse_line(msg.value)
|
148
|
+
es.add(Engine.now, msg_record)
|
149
|
+
rescue
|
150
|
+
$log.warn msg_record.to_s, :error=>$!.to_s
|
151
|
+
$log.debug_backtrace
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
unless es.empty?
|
157
|
+
@router.emit_stream(tag, es)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def parse_line(record)
|
162
|
+
case @format
|
163
|
+
when 'json'
|
164
|
+
Yajl::Parser.parse(record)
|
165
|
+
when 'ltsv'
|
166
|
+
LTSV.parse(record)
|
167
|
+
when 'msgpack'
|
168
|
+
MessagePack.unpack(record)
|
169
|
+
when 'text'
|
170
|
+
{@message_key => record}
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
class Fluent::KafkaOutput < Fluent::Output
|
2
|
+
Fluent::Plugin.register_output('kafka', self)
|
3
|
+
|
4
|
+
def initialize
|
5
|
+
super
|
6
|
+
require 'poseidon'
|
7
|
+
end
|
8
|
+
|
9
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
10
|
+
:desc => <<-DESC
|
11
|
+
Set brokers directly
|
12
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
13
|
+
Note that you can choose to use either brokers or zookeeper.
|
14
|
+
DESC
|
15
|
+
config_param :zookeeper, :string, :default => nil,
|
16
|
+
:desc => "Set brokers via Zookeeper: <zookeeper_host>:<zookeeper_port>"
|
17
|
+
config_param :zookeeper_path, :string, :default => '/brokers/ids',
|
18
|
+
:desc => "Path in path for Broker id. Default to /brokers/ids"
|
19
|
+
config_param :default_topic, :string, :default => nil,
|
20
|
+
:desc => "Output topic."
|
21
|
+
config_param :default_partition_key, :string, :default => nil
|
22
|
+
config_param :client_id, :string, :default => 'kafka'
|
23
|
+
config_param :output_data_type, :string, :default => 'json',
|
24
|
+
:desc => "Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)"
|
25
|
+
config_param :output_include_tag, :bool, :default => false
|
26
|
+
config_param :output_include_time, :bool, :default => false
|
27
|
+
|
28
|
+
# poseidon producer options
|
29
|
+
config_param :max_send_retries, :integer, :default => 3,
|
30
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
31
|
+
config_param :required_acks, :integer, :default => 0,
|
32
|
+
:desc => "The number of acks required per request."
|
33
|
+
config_param :ack_timeout_ms, :integer, :default => 1500,
|
34
|
+
:desc => "How long the producer waits for acks."
|
35
|
+
config_param :compression_codec, :string, :default => 'none',
|
36
|
+
:desc => "The codec the producer uses to compress messages."
|
37
|
+
|
38
|
+
attr_accessor :output_data_type
|
39
|
+
attr_accessor :field_separator
|
40
|
+
|
41
|
+
@seed_brokers = []
|
42
|
+
|
43
|
+
unless method_defined?(:log)
|
44
|
+
define_method("log") { $log }
|
45
|
+
end
|
46
|
+
|
47
|
+
def refresh_producer()
|
48
|
+
if @zookeeper
|
49
|
+
@seed_brokers = []
|
50
|
+
z = Zookeeper.new(@zookeeper)
|
51
|
+
z.get_children(:path => @zookeeper_path)[:children].each do |id|
|
52
|
+
broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
|
53
|
+
@seed_brokers.push("#{broker['host']}:#{broker['port']}")
|
54
|
+
end
|
55
|
+
z.close
|
56
|
+
log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
|
57
|
+
end
|
58
|
+
begin
|
59
|
+
if @seed_brokers.length > 0
|
60
|
+
@producer = Poseidon::Producer.new(@seed_brokers, @client_id, :max_send_retries => @max_send_retries, :required_acks => @required_acks, :ack_timeout_ms => @ack_timeout_ms, :compression_codec => @compression_codec.to_sym)
|
61
|
+
log.info "initialized producer #{@client_id}"
|
62
|
+
else
|
63
|
+
log.warn "No brokers found on Zookeeper"
|
64
|
+
end
|
65
|
+
rescue Exception => e
|
66
|
+
log.error e
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def configure(conf)
|
71
|
+
super
|
72
|
+
if @zookeeper
|
73
|
+
require 'zookeeper'
|
74
|
+
require 'yajl'
|
75
|
+
else
|
76
|
+
@seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
|
77
|
+
log.info "brokers has been set directly: #{@seed_brokers}"
|
78
|
+
end
|
79
|
+
if @compression_codec == 'snappy'
|
80
|
+
require 'snappy'
|
81
|
+
end
|
82
|
+
case @output_data_type
|
83
|
+
when 'json'
|
84
|
+
require 'yajl'
|
85
|
+
when 'ltsv'
|
86
|
+
require 'ltsv'
|
87
|
+
when 'msgpack'
|
88
|
+
require 'msgpack'
|
89
|
+
end
|
90
|
+
|
91
|
+
@f_separator = case @field_separator
|
92
|
+
when /SPACE/i then ' '
|
93
|
+
when /COMMA/i then ','
|
94
|
+
when /SOH/i then "\x01"
|
95
|
+
else "\t"
|
96
|
+
end
|
97
|
+
|
98
|
+
@custom_attributes = if @output_data_type == 'json'
|
99
|
+
nil
|
100
|
+
elsif @output_data_type == 'ltsv'
|
101
|
+
nil
|
102
|
+
elsif @output_data_type == 'msgpack'
|
103
|
+
nil
|
104
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
105
|
+
$1.split(',').map(&:strip).reject(&:empty?)
|
106
|
+
else
|
107
|
+
@formatter = Fluent::Plugin.new_formatter(@output_data_type)
|
108
|
+
@formatter.configure(conf)
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
|
114
|
+
def start
|
115
|
+
super
|
116
|
+
refresh_producer()
|
117
|
+
end
|
118
|
+
|
119
|
+
def shutdown
|
120
|
+
super
|
121
|
+
end
|
122
|
+
|
123
|
+
def parse_record(record)
|
124
|
+
if @custom_attributes.nil?
|
125
|
+
case @output_data_type
|
126
|
+
when 'json'
|
127
|
+
Yajl::Encoder.encode(record)
|
128
|
+
when 'ltsv'
|
129
|
+
LTSV.dump(record)
|
130
|
+
when 'msgpack'
|
131
|
+
record.to_msgpack
|
132
|
+
else
|
133
|
+
record.to_s
|
134
|
+
end
|
135
|
+
else
|
136
|
+
@custom_attributes.unshift('time') if @output_include_time
|
137
|
+
@custom_attributes.unshift('tag') if @output_include_tag
|
138
|
+
@custom_attributes.map { |attr|
|
139
|
+
record[attr].nil? ? '' : record[attr].to_s
|
140
|
+
}.join(@f_separator)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def emit(tag, es, chain)
|
145
|
+
begin
|
146
|
+
chain.next
|
147
|
+
es.each do |time,record|
|
148
|
+
record['time'] = time if @output_include_time
|
149
|
+
record['tag'] = tag if @output_include_tag
|
150
|
+
topic = record['topic'] || self.default_topic || tag
|
151
|
+
partition_key = record['partition_key'] || @default_partition_key
|
152
|
+
value = @formatter.nil? ? parse_record(record) : @formatter.format(tag, time, record)
|
153
|
+
log.trace("message send to #{topic} with key: #{partition_key} and value: #{value}.")
|
154
|
+
message = Poseidon::MessageToSend.new(topic, value, partition_key)
|
155
|
+
@producer.send_messages([message])
|
156
|
+
end
|
157
|
+
rescue Exception => e
|
158
|
+
log.warn("Send exception occurred: #{e}")
|
159
|
+
@producer.close if @producer
|
160
|
+
refresh_producer()
|
161
|
+
raise e
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
@@ -0,0 +1,187 @@
|
|
1
|
+
# encode: utf-8
|
2
|
+
class Fluent::KafkaOutputBuffered < Fluent::BufferedOutput
|
3
|
+
Fluent::Plugin.register_output('kafka_buffered', self)
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
super
|
7
|
+
require 'poseidon'
|
8
|
+
end
|
9
|
+
|
10
|
+
config_param :brokers, :string, :default => 'localhost:9092',
|
11
|
+
:desc => <<-DESC
|
12
|
+
Set brokers directly:
|
13
|
+
<broker1_host>:<broker1_port>,<broker2_host>:<broker2_port>,..
|
14
|
+
Brokers: you can choose to use either brokers or zookeeper.
|
15
|
+
DESC
|
16
|
+
config_param :zookeeper, :string, :default => nil,
|
17
|
+
:desc => <<-DESC
|
18
|
+
Set brokers via Zookeeper:
|
19
|
+
<zookeeper_host>:<zookeeper_port>
|
20
|
+
DESC
|
21
|
+
config_param :zookeeper_path, :string, :default => '/brokers/ids',
|
22
|
+
:desc => "Path in path for Broker id. Default to /brokers/ids"
|
23
|
+
config_param :default_topic, :string, :default => nil,
|
24
|
+
:desc => "Output topic"
|
25
|
+
config_param :default_partition_key, :string, :default => nil
|
26
|
+
config_param :client_id, :string, :default => 'kafka'
|
27
|
+
config_param :output_data_type, :string, :default => 'json',
|
28
|
+
:desc => <<-DESC
|
29
|
+
Supported format: (json|ltsv|msgpack|attr:<record name>|<formatter name>)
|
30
|
+
DESC
|
31
|
+
config_param :output_include_tag, :bool, :default => false
|
32
|
+
config_param :output_include_time, :bool, :default => false
|
33
|
+
config_param :kafka_agg_max_bytes, :size, :default => 4*1024 #4k
|
34
|
+
|
35
|
+
# poseidon producer options
|
36
|
+
config_param :max_send_retries, :integer, :default => 3,
|
37
|
+
:desc => "Number of times to retry sending of messages to a leader."
|
38
|
+
config_param :required_acks, :integer, :default => 0,
|
39
|
+
:desc => "The number of acks required per request."
|
40
|
+
config_param :ack_timeout_ms, :integer, :default => 1500,
|
41
|
+
:desc => "How long the producer waits for acks."
|
42
|
+
config_param :compression_codec, :string, :default => 'none',
|
43
|
+
:desc => <<-DESC
|
44
|
+
The codec the producer uses to compress messages.
|
45
|
+
Supported codecs: (none|gzip|snappy)
|
46
|
+
DESC
|
47
|
+
|
48
|
+
attr_accessor :output_data_type
|
49
|
+
attr_accessor :field_separator
|
50
|
+
|
51
|
+
unless method_defined?(:log)
|
52
|
+
define_method("log") { $log }
|
53
|
+
end
|
54
|
+
|
55
|
+
@seed_brokers = []
|
56
|
+
|
57
|
+
def refresh_producer()
|
58
|
+
if @zookeeper
|
59
|
+
@seed_brokers = []
|
60
|
+
z = Zookeeper.new(@zookeeper)
|
61
|
+
z.get_children(:path => @zookeeper_path)[:children].each do |id|
|
62
|
+
broker = Yajl.load(z.get(:path => @zookeeper_path + "/#{id}")[:data])
|
63
|
+
@seed_brokers.push("#{broker['host']}:#{broker['port']}")
|
64
|
+
end
|
65
|
+
z.close
|
66
|
+
log.info "brokers has been refreshed via Zookeeper: #{@seed_brokers}"
|
67
|
+
end
|
68
|
+
begin
|
69
|
+
if @seed_brokers.length > 0
|
70
|
+
@producer = Poseidon::Producer.new(@seed_brokers, @client_id, :max_send_retries => @max_send_retries, :required_acks => @required_acks, :ack_timeout_ms => @ack_timeout_ms, :compression_codec => @compression_codec.to_sym)
|
71
|
+
log.info "initialized producer #{@client_id}"
|
72
|
+
else
|
73
|
+
log.warn "No brokers found on Zookeeper"
|
74
|
+
end
|
75
|
+
rescue Exception => e
|
76
|
+
log.error e
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def configure(conf)
|
81
|
+
super
|
82
|
+
if @zookeeper
|
83
|
+
require 'zookeeper'
|
84
|
+
require 'yajl'
|
85
|
+
else
|
86
|
+
@seed_brokers = @brokers.match(",").nil? ? [@brokers] : @brokers.split(",")
|
87
|
+
log.info "brokers has been set directly: #{@seed_brokers}"
|
88
|
+
end
|
89
|
+
if @compression_codec == 'snappy'
|
90
|
+
require 'snappy'
|
91
|
+
end
|
92
|
+
|
93
|
+
@f_separator = case @field_separator
|
94
|
+
when /SPACE/i then ' '
|
95
|
+
when /COMMA/i then ','
|
96
|
+
when /SOH/i then "\x01"
|
97
|
+
else "\t"
|
98
|
+
end
|
99
|
+
|
100
|
+
@formatter_proc = setup_formatter(conf)
|
101
|
+
end
|
102
|
+
|
103
|
+
def start
|
104
|
+
super
|
105
|
+
refresh_producer()
|
106
|
+
end
|
107
|
+
|
108
|
+
def shutdown
|
109
|
+
super
|
110
|
+
end
|
111
|
+
|
112
|
+
def format(tag, time, record)
|
113
|
+
[tag, time, record].to_msgpack
|
114
|
+
end
|
115
|
+
|
116
|
+
def setup_formatter(conf)
|
117
|
+
if @output_data_type == 'json'
|
118
|
+
require 'yajl'
|
119
|
+
Proc.new { |tag, time, record| Yajl::Encoder.encode(record) }
|
120
|
+
elsif @output_data_type == 'ltsv'
|
121
|
+
require 'ltsv'
|
122
|
+
Proc.new { |tag, time, record| LTSV.dump(record) }
|
123
|
+
elsif @output_data_type == 'msgpack'
|
124
|
+
require 'msgpack'
|
125
|
+
Proc.new { |tag, time, record| record.to_msgpack }
|
126
|
+
elsif @output_data_type =~ /^attr:(.*)$/
|
127
|
+
@custom_attributes = $1.split(',').map(&:strip).reject(&:empty?)
|
128
|
+
@custom_attributes.unshift('time') if @output_include_time
|
129
|
+
@custom_attributes.unshift('tag') if @output_include_tag
|
130
|
+
Proc.new { |tag, time, record|
|
131
|
+
@custom_attributes.map { |attr|
|
132
|
+
record[attr].nil? ? '' : record[attr].to_s
|
133
|
+
}.join(@f_separator)
|
134
|
+
}
|
135
|
+
else
|
136
|
+
@formatter = Fluent::Plugin.new_formatter(@output_data_type)
|
137
|
+
@formatter.configure(conf)
|
138
|
+
Proc.new { |tag, time, record|
|
139
|
+
@formatter.format(tag, time, record)
|
140
|
+
}
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def write(chunk)
|
145
|
+
records_by_topic = {}
|
146
|
+
bytes_by_topic = {}
|
147
|
+
messages = []
|
148
|
+
messages_bytes = 0
|
149
|
+
begin
|
150
|
+
chunk.msgpack_each { |tag, time, record|
|
151
|
+
record['time'] = time if @output_include_time
|
152
|
+
record['tag'] = tag if @output_include_tag
|
153
|
+
topic = record['topic'] || @default_topic || tag
|
154
|
+
partition_key = record['partition_key'] || @default_partition_key
|
155
|
+
|
156
|
+
records_by_topic[topic] ||= 0
|
157
|
+
bytes_by_topic[topic] ||= 0
|
158
|
+
|
159
|
+
record_buf = @formatter_proc.call(tag, time, record)
|
160
|
+
record_buf_bytes = record_buf.bytesize
|
161
|
+
if messages.length > 0 and messages_bytes + record_buf_bytes > @kafka_agg_max_bytes
|
162
|
+
log.on_trace { log.trace("#{messages.length} messages send.") }
|
163
|
+
@producer.send_messages(messages)
|
164
|
+
messages = []
|
165
|
+
messages_bytes = 0
|
166
|
+
end
|
167
|
+
log.on_trace { log.trace("message will send to #{topic} with key: #{partition_key} and value: #{record_buf}.") }
|
168
|
+
messages << Poseidon::MessageToSend.new(topic, record_buf, partition_key)
|
169
|
+
messages_bytes += record_buf_bytes
|
170
|
+
|
171
|
+
records_by_topic[topic] += 1
|
172
|
+
bytes_by_topic[topic] += record_buf_bytes
|
173
|
+
}
|
174
|
+
if messages.length > 0
|
175
|
+
log.trace("#{messages.length} messages send.")
|
176
|
+
@producer.send_messages(messages)
|
177
|
+
end
|
178
|
+
log.debug "(records|bytes) (#{records_by_topic}|#{bytes_by_topic})"
|
179
|
+
end
|
180
|
+
rescue Exception => e
|
181
|
+
log.warn "Send exception occurred: #{e}"
|
182
|
+
@producer.close if @producer
|
183
|
+
refresh_producer()
|
184
|
+
# Raise exception to retry sendind messages
|
185
|
+
raise e
|
186
|
+
end
|
187
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'fluent/test'
|
15
|
+
unless ENV.has_key?('VERBOSE')
|
16
|
+
nulllogger = Object.new
|
17
|
+
nulllogger.instance_eval {|obj|
|
18
|
+
def method_missing(method, *args)
|
19
|
+
end
|
20
|
+
}
|
21
|
+
$log = nulllogger
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'fluent/plugin/out_kafka'
|
25
|
+
|
26
|
+
class Test::Unit::TestCase
|
27
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class KafkaOutputTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
default_topic kitagawakeiko
|
10
|
+
brokers localhost:9092
|
11
|
+
]
|
12
|
+
|
13
|
+
def create_driver(conf = CONFIG, tag='test')
|
14
|
+
Fluent::Test::BufferedOutputTestDriver.new(Fluent::KafkaOutput, tag).configure(conf)
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_configure
|
18
|
+
d = create_driver
|
19
|
+
assert_equal 'kitagawakeiko', d.instance.default_topic
|
20
|
+
assert_equal 'localhost:9092', d.instance.brokers
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_format
|
24
|
+
d = create_driver
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_write
|
28
|
+
d = create_driver
|
29
|
+
time = Time.parse("2011-01-02 13:14:15 UTC").to_i
|
30
|
+
d.emit({"a"=>1}, time)
|
31
|
+
d.emit({"a"=>2}, time)
|
32
|
+
end
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-kafka-zendesk
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Hidemasa Togashi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-05-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: fluentd
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: poseidon_cluster
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ltsv
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: yajl-ruby
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: msgpack
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: zookeeper
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Fluentd plugin for Apache Kafka > 0.8
|
98
|
+
email:
|
99
|
+
- togachiro@gmail.com
|
100
|
+
executables: []
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- Gemfile
|
105
|
+
- LICENSE
|
106
|
+
- README.md
|
107
|
+
- Rakefile
|
108
|
+
- fluent-plugin-kafka-0.1.4.gem
|
109
|
+
- fluent-plugin-kafka.gemspec
|
110
|
+
- lib/fluent/plugin/in_kafka.rb
|
111
|
+
- lib/fluent/plugin/in_kafka_group.rb
|
112
|
+
- lib/fluent/plugin/out_kafka.rb
|
113
|
+
- lib/fluent/plugin/out_kafka_buffered.rb
|
114
|
+
- test/helper.rb
|
115
|
+
- test/plugin/test_out_kafka.rb
|
116
|
+
homepage: https://github.com/khouse/fluent-plugin-kafka
|
117
|
+
licenses: []
|
118
|
+
metadata: {}
|
119
|
+
post_install_message:
|
120
|
+
rdoc_options: []
|
121
|
+
require_paths:
|
122
|
+
- lib
|
123
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
+
requirements:
|
125
|
+
- - ">="
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: '0'
|
128
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
129
|
+
requirements:
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '0'
|
133
|
+
requirements: []
|
134
|
+
rubyforge_project:
|
135
|
+
rubygems_version: 2.4.5.1
|
136
|
+
signing_key:
|
137
|
+
specification_version: 4
|
138
|
+
summary: Fluentd plugin for Apache Kafka > 0.8
|
139
|
+
test_files:
|
140
|
+
- test/helper.rb
|
141
|
+
- test/plugin/test_out_kafka.rb
|