fluent-plugin-webhdfs 0.7.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -10
- data/Appraisals +0 -2
- data/README.md +55 -45
- data/fluent-plugin-webhdfs.gemspec +4 -4
- data/lib/fluent/plugin/out_webhdfs.rb +193 -100
- data/lib/fluent/plugin/webhdfs_compressor_bzip2.rb +3 -7
- data/lib/fluent/plugin/webhdfs_compressor_gzip.rb +3 -3
- data/lib/fluent/plugin/webhdfs_compressor_lzo_command.rb +3 -3
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_text.rb +2 -2
- data/test/helper.rb +5 -0
- data/test/plugin/test_compressor.rb +3 -3
- data/test/plugin/test_out_webhdfs.rb +179 -105
- metadata +18 -24
@@ -1,14 +1,10 @@
|
|
1
|
-
module Fluent
|
2
|
-
class WebHDFSOutput <
|
1
|
+
module Fluent::Plugin
|
2
|
+
class WebHDFSOutput < Output
|
3
3
|
class Bzip2Compressor < Compressor
|
4
4
|
WebHDFSOutput.register_compressor('bzip2', self)
|
5
5
|
|
6
6
|
def initialize(options = {})
|
7
|
-
|
8
|
-
require "bzip2/ffi"
|
9
|
-
rescue LoadError
|
10
|
-
raise Fluent::ConfigError, "Install bzip2-ffi before use bzip2 compressor"
|
11
|
-
end
|
7
|
+
require "bzip2/ffi"
|
12
8
|
end
|
13
9
|
|
14
10
|
def ext
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module Fluent
|
2
|
-
class WebHDFSOutput <
|
1
|
+
module Fluent::Plugin
|
2
|
+
class WebHDFSOutput < Output
|
3
3
|
class GzipCompressor < Compressor
|
4
4
|
WebHDFSOutput.register_compressor('gzip', self)
|
5
5
|
|
@@ -14,7 +14,7 @@ module Fluent
|
|
14
14
|
def compress(chunk, tmp)
|
15
15
|
w = Zlib::GzipWriter.new(tmp)
|
16
16
|
chunk.write_to(w)
|
17
|
-
w.
|
17
|
+
w.close
|
18
18
|
end
|
19
19
|
end
|
20
20
|
end
|
@@ -1,9 +1,9 @@
|
|
1
|
-
module Fluent
|
2
|
-
class WebHDFSOutput
|
1
|
+
module Fluent::Plugin
|
2
|
+
class WebHDFSOutput < Output
|
3
3
|
class LZOCommandCompressor < Compressor
|
4
4
|
WebHDFSOutput.register_compressor('lzo_command', self)
|
5
5
|
|
6
|
-
config_param :command_parameter, :string, :
|
6
|
+
config_param :command_parameter, :string, default: '-qf1'
|
7
7
|
|
8
8
|
def configure(conf)
|
9
9
|
super
|
data/test/helper.rb
CHANGED
@@ -8,10 +8,13 @@ rescue Bundler::BundlerError => e
|
|
8
8
|
exit e.status_code
|
9
9
|
end
|
10
10
|
require 'test/unit'
|
11
|
+
require 'test/unit/rr'
|
11
12
|
|
12
13
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
14
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
15
|
require 'fluent/test'
|
16
|
+
require 'fluent/test/helpers'
|
17
|
+
require 'fluent/test/driver/output'
|
15
18
|
unless ENV.has_key?('VERBOSE')
|
16
19
|
nulllogger = Object.new
|
17
20
|
nulllogger.instance_eval {|obj|
|
@@ -22,6 +25,8 @@ unless ENV.has_key?('VERBOSE')
|
|
22
25
|
$log = nulllogger
|
23
26
|
end
|
24
27
|
|
28
|
+
include Fluent::Test::Helpers
|
29
|
+
|
25
30
|
require 'fluent/plugin/out_webhdfs'
|
26
31
|
|
27
32
|
class Test::Unit::TestCase
|
@@ -16,11 +16,11 @@ class CompressorTest < Test::Unit::TestCase
|
|
16
16
|
def setup
|
17
17
|
omit unless Object.const_defined?(:Snappy)
|
18
18
|
Fluent::Test.setup
|
19
|
-
@compressor = Fluent::WebHDFSOutput::SnappyCompressor.new
|
19
|
+
@compressor = Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new
|
20
20
|
end
|
21
21
|
|
22
|
-
def create_driver(conf=CONFIG
|
23
|
-
Fluent::Test::
|
22
|
+
def create_driver(conf = CONFIG)
|
23
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_ext
|
@@ -1,169 +1,243 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
3
|
class WebHDFSOutputTest < Test::Unit::TestCase
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
CONFIG_DEFAULT = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d.log"})
|
5
|
+
|
6
|
+
CONFIG_COMPAT = config_element(
|
7
|
+
"ROOT", "", {
|
8
|
+
"output_data_type" => "",
|
9
|
+
"host" => "namenode.local",
|
10
|
+
"path" => "/hdfs/path/file.%Y%m%d.log"
|
11
|
+
})
|
12
|
+
|
8
13
|
def setup
|
9
14
|
Fluent::Test.setup
|
10
15
|
end
|
11
16
|
|
12
|
-
def create_driver(conf
|
13
|
-
Fluent::Test::
|
17
|
+
def create_driver(conf)
|
18
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
|
14
19
|
end
|
15
20
|
|
16
|
-
|
17
|
-
|
18
|
-
d = create_driver
|
21
|
+
sub_test_case "default configuration" do
|
22
|
+
test 'configured with standard out_file format with specified hdfs info' do
|
23
|
+
d = create_driver(CONFIG_DEFAULT)
|
24
|
+
assert_true d.instance.instance_eval{ @using_formatter_config }
|
25
|
+
|
19
26
|
assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
|
20
27
|
assert_equal 50070, d.instance.instance_eval{ @namenode_port }
|
21
28
|
assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
|
22
|
-
assert_equal '%Y%m%d', d.instance.time_slice_format
|
23
29
|
assert_equal false, d.instance.httpfs
|
24
30
|
assert_nil d.instance.username
|
25
31
|
assert_equal false, d.instance.ignore_start_check_error
|
26
32
|
|
27
|
-
assert_equal
|
28
|
-
assert_equal true, d.instance.
|
29
|
-
|
33
|
+
assert_equal 'Fluent::Plugin::OutFileFormatter', d.instance.formatter.class.to_s
|
34
|
+
assert_equal true, d.instance.end_with_newline
|
35
|
+
|
36
|
+
# deprecated params
|
37
|
+
assert_nil d.instance.instance_eval{ @output_include_time }
|
38
|
+
assert_nil d.instance.instance_eval{ @output_include_tag }
|
30
39
|
assert_nil d.instance.remove_prefix
|
31
|
-
|
32
|
-
|
33
|
-
|
40
|
+
assert_nil d.instance.instance_eval{ @header_separator }
|
41
|
+
assert_nil d.instance.default_tag
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
sub_test_case "flat configuration" do
|
46
|
+
def test_default_for_traditional_config
|
47
|
+
d = create_driver(CONFIG_COMPAT)
|
48
|
+
assert_false d.instance.instance_eval{ @using_formatter_config }
|
49
|
+
|
50
|
+
assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
|
51
|
+
assert_equal 50070, d.instance.instance_eval{ @namenode_port }
|
52
|
+
assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
|
53
|
+
assert_equal false, d.instance.httpfs
|
54
|
+
assert_nil d.instance.username
|
55
|
+
assert_equal false, d.instance.ignore_start_check_error
|
56
|
+
|
57
|
+
assert_equal 'Fluent::Plugin::JSONFormatter', d.instance.formatter.class.to_s
|
58
|
+
assert_equal true, d.instance.end_with_newline
|
59
|
+
|
60
|
+
assert_equal true, d.instance.instance_eval{ @output_include_time }
|
61
|
+
assert_equal true, d.instance.instance_eval{ @output_include_tag }
|
62
|
+
assert_nil d.instance.instance_eval{ @remove_prefix }
|
63
|
+
assert_equal "\t", d.instance.instance_eval{ @header_separator }
|
64
|
+
assert_equal 'tag_missing', d.instance.instance_eval{ @default_tag }
|
34
65
|
end
|
35
66
|
|
36
67
|
def test_httpfs
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
68
|
+
conf = config_element(
|
69
|
+
"ROOT", "", {
|
70
|
+
"namenode" => "server.local:14000",
|
71
|
+
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
72
|
+
"httpfs" => "yes",
|
73
|
+
"username" => "hdfs_user"
|
74
|
+
})
|
75
|
+
d = create_driver(conf)
|
76
|
+
|
43
77
|
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
44
78
|
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
45
79
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
46
|
-
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
47
80
|
assert_equal true, d.instance.httpfs
|
48
81
|
assert_equal 'hdfs_user', d.instance.username
|
49
82
|
end
|
50
83
|
|
51
84
|
def test_ssl
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
85
|
+
conf = config_element(
|
86
|
+
"ROOT", "", {
|
87
|
+
"namenode" => "server.local:14000",
|
88
|
+
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
89
|
+
"ssl" => true,
|
90
|
+
"ssl_ca_file" => "/path/to/ca_file.pem",
|
91
|
+
"ssl_verify_mode" => "peer",
|
92
|
+
"kerberos" => true
|
93
|
+
})
|
94
|
+
d = create_driver(conf)
|
61
95
|
|
62
96
|
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
63
97
|
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
64
98
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
65
|
-
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
66
99
|
assert_equal true, d.instance.ssl
|
67
100
|
assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
|
68
101
|
assert_equal :peer, d.instance.ssl_verify_mode
|
69
102
|
assert_equal true, d.instance.kerberos
|
70
|
-
assert_equal '/path/to/kerberos.keytab', d.instance.kerberos_keytab
|
71
103
|
end
|
72
104
|
|
73
|
-
data(gzip: [
|
74
|
-
bzip2: [
|
75
|
-
snappy: [
|
76
|
-
lzo: [
|
105
|
+
data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
|
106
|
+
bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
|
107
|
+
snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
|
108
|
+
lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
|
77
109
|
def test_compress(data)
|
78
110
|
compress_type, compressor_class = data
|
79
111
|
begin
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
112
|
+
conf = config_element(
|
113
|
+
"ROOT", "", {
|
114
|
+
"namenode" => "server.local:14000",
|
115
|
+
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
|
116
|
+
"compress" => compress_type
|
117
|
+
})
|
118
|
+
d = create_driver(conf)
|
85
119
|
rescue Fluent::ConfigError => ex
|
86
120
|
omit ex.message
|
87
121
|
end
|
88
122
|
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
89
123
|
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
90
124
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
91
|
-
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
92
125
|
assert_equal compress_type, d.instance.compress
|
93
126
|
assert_equal compressor_class, d.instance.compressor.class
|
94
127
|
end
|
95
128
|
|
96
|
-
def
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
129
|
+
def test_placeholders_old_style
|
130
|
+
conf = config_element(
|
131
|
+
"ROOT", "", {
|
132
|
+
"hostname" => "testing.node.local",
|
133
|
+
"namenode" => "server.local:50070",
|
134
|
+
"path" => "/hdfs/${hostname}/file.%Y%m%d%H.log"
|
135
|
+
})
|
136
|
+
d = create_driver(conf)
|
102
137
|
assert_equal '/hdfs/testing.node.local/file.%Y%m%d%H.log', d.instance.path
|
103
138
|
end
|
104
139
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
140
|
+
data("%Y%m%d" => ["/hdfs/path/file.%Y%m%d.log", "/hdfs/path/file.20120718.log"],
|
141
|
+
"%Y%m%d.%H%M" => ["/hdfs/path/file.%Y%m%d.%H%M.log", "/hdfs/path/file.20120718.1503.log"])
|
142
|
+
test "generate_path" do |(path, expected)|
|
143
|
+
conf = config_element(
|
144
|
+
"ROOT", "", {
|
145
|
+
"namenode" => "server.local:14000",
|
146
|
+
"path" => path
|
147
|
+
})
|
148
|
+
d = create_driver(conf)
|
149
|
+
formatter = Fluent::Timezone.formatter("+0900", path)
|
150
|
+
mock(Fluent::Timezone).formatter(Time.now.strftime("%z"), path) { formatter }
|
151
|
+
time = event_time("2012-07-18 15:03:00 +0900")
|
152
|
+
metadata = d.instance.metadata("test", time, {})
|
153
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
154
|
+
assert_equal expected, d.instance.generate_path(chunk)
|
155
|
+
end
|
112
156
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
157
|
+
data(path: { "append" => false },
|
158
|
+
ssl: { "ssl" => true, "ssl_verify_mode" => "invalid" },
|
159
|
+
compress: { "compress" => "invalid" })
|
160
|
+
test "invalid" do |attr|
|
161
|
+
conf = config_element(
|
162
|
+
"ROOT", "", {
|
163
|
+
"namenode" => "server.local:14000",
|
164
|
+
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log"
|
165
|
+
})
|
166
|
+
conf += config_element("", "", attr)
|
167
|
+
assert_raise Fluent::ConfigError do
|
168
|
+
create_driver(conf)
|
121
169
|
end
|
122
170
|
end
|
171
|
+
end
|
123
172
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
173
|
+
sub_test_case "sub section configuration" do
|
174
|
+
def test_time_key
|
175
|
+
conf = config_element(
|
176
|
+
"ROOT", "", {
|
177
|
+
"host" => "namenode.local",
|
178
|
+
"path" => "/hdfs/path/file.%Y%m%d.log"
|
179
|
+
}, [
|
180
|
+
config_element(
|
181
|
+
"buffer", "time", {
|
182
|
+
"timekey" => 1
|
183
|
+
})
|
184
|
+
]
|
185
|
+
)
|
186
|
+
d = create_driver(conf)
|
187
|
+
time = event_time("2012-07-18 15:03:00 +0900")
|
188
|
+
metadata = d.instance.metadata("test", time, {})
|
189
|
+
chunk = d.instance.buffer.generate_chunk(metadata)
|
190
|
+
assert_equal 1, d.instance.buffer_config.timekey
|
191
|
+
assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
|
192
|
+
end
|
193
|
+
end
|
134
194
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
end
|
195
|
+
sub_test_case "using format subsection" do
|
196
|
+
test "blank format means default format 'out_file' with UTC timezone" do
|
197
|
+
format_section = config_element("format", "", {}, [])
|
198
|
+
conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
|
199
|
+
d = create_driver(conf)
|
200
|
+
time = event_time("2017-01-24 13:10:30 -0700")
|
201
|
+
line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
|
202
|
+
assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
|
203
|
+
end
|
145
204
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
end
|
205
|
+
test "specifying timezone works well in format section" do
|
206
|
+
format_section = config_element("format", "", {"timezone" => "+0100"}, [])
|
207
|
+
conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
|
208
|
+
d = create_driver(conf)
|
209
|
+
time = event_time("2017-01-24 13:10:30 -0700")
|
210
|
+
line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
|
211
|
+
assert_equal "2017-01-24T21:10:30+01:00\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
|
212
|
+
end
|
155
213
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
214
|
+
test "specifying formatter type LTSV for records, without tag and timezone" do
|
215
|
+
format_section = config_element("format", "", {"@type" => "ltsv"}, [])
|
216
|
+
conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
|
217
|
+
d = create_driver(conf)
|
218
|
+
time = event_time("2017-01-24 13:10:30 -0700")
|
219
|
+
line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
|
220
|
+
assert_equal "message:yay\tname:tagomoris\n", line
|
221
|
+
end
|
222
|
+
|
223
|
+
test "specifying formatter type LTSV for records, with inject section to insert tag and time" do
|
224
|
+
inject_section = config_element("inject", "", {"tag_key" => "tag", "time_key" => "time", "time_type" => "string", "localtime" => "false"})
|
225
|
+
format_section = config_element("format", "", {"@type" => "ltsv"}, [])
|
226
|
+
conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [inject_section, format_section])
|
227
|
+
d = create_driver(conf)
|
228
|
+
time = event_time("2017-01-24 13:10:30 -0700")
|
229
|
+
line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
|
230
|
+
assert_equal "message:yay\tname:tagomoris\ttag:test.now\ttime:2017-01-24T20:10:30Z\n", line
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
sub_test_case "using older configuration" do
|
235
|
+
test "output_data_type json is same with out_file with UTC timezone" do
|
236
|
+
conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log", "output_data_type" => "json"}, [])
|
237
|
+
d = create_driver(conf)
|
238
|
+
time = event_time("2017-01-24 13:10:30 -0700")
|
239
|
+
line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
|
240
|
+
assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
|
167
241
|
end
|
168
242
|
end
|
169
243
|
end
|