fluent-plugin-webhdfs 0.7.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,10 @@
1
- module Fluent
2
- class WebHDFSOutput < Fluent::TimeSlicedOutput
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
3
  class Bzip2Compressor < Compressor
4
4
  WebHDFSOutput.register_compressor('bzip2', self)
5
5
 
6
6
  def initialize(options = {})
7
- begin
8
- require "bzip2/ffi"
9
- rescue LoadError
10
- raise Fluent::ConfigError, "Install bzip2-ffi before use bzip2 compressor"
11
- end
7
+ require "bzip2/ffi"
12
8
  end
13
9
 
14
10
  def ext
@@ -1,5 +1,5 @@
1
- module Fluent
2
- class WebHDFSOutput < Fluent::TimeSlicedOutput
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
3
  class GzipCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('gzip', self)
5
5
 
@@ -14,7 +14,7 @@ module Fluent
14
14
  def compress(chunk, tmp)
15
15
  w = Zlib::GzipWriter.new(tmp)
16
16
  chunk.write_to(w)
17
- w.finish
17
+ w.close
18
18
  end
19
19
  end
20
20
  end
@@ -1,9 +1,9 @@
1
- module Fluent
2
- class WebHDFSOutput
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
3
  class LZOCommandCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('lzo_command', self)
5
5
 
6
- config_param :command_parameter, :string, :default => '-qf1'
6
+ config_param :command_parameter, :string, default: '-qf1'
7
7
 
8
8
  def configure(conf)
9
9
  super
@@ -1,5 +1,5 @@
1
- module Fluent
2
- class WebHDFSOutput < Fluent::TimeSlicedOutput
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
3
  class SnappyCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('snappy', self)
5
5
 
@@ -1,5 +1,5 @@
1
- module Fluent
2
- class WebHDFSOutput < Fluent::TimeSlicedOutput
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
3
  class TextCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('text', self)
5
5
 
@@ -8,10 +8,13 @@ rescue Bundler::BundlerError => e
8
8
  exit e.status_code
9
9
  end
10
10
  require 'test/unit'
11
+ require 'test/unit/rr'
11
12
 
12
13
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
14
  $LOAD_PATH.unshift(File.dirname(__FILE__))
14
15
  require 'fluent/test'
16
+ require 'fluent/test/helpers'
17
+ require 'fluent/test/driver/output'
15
18
  unless ENV.has_key?('VERBOSE')
16
19
  nulllogger = Object.new
17
20
  nulllogger.instance_eval {|obj|
@@ -22,6 +25,8 @@ unless ENV.has_key?('VERBOSE')
22
25
  $log = nulllogger
23
26
  end
24
27
 
28
+ include Fluent::Test::Helpers
29
+
25
30
  require 'fluent/plugin/out_webhdfs'
26
31
 
27
32
  class Test::Unit::TestCase
@@ -16,11 +16,11 @@ class CompressorTest < Test::Unit::TestCase
16
16
  def setup
17
17
  omit unless Object.const_defined?(:Snappy)
18
18
  Fluent::Test.setup
19
- @compressor = Fluent::WebHDFSOutput::SnappyCompressor.new
19
+ @compressor = Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new
20
20
  end
21
21
 
22
- def create_driver(conf=CONFIG,tag='test')
23
- Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
22
+ def create_driver(conf = CONFIG)
23
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
24
24
  end
25
25
 
26
26
  def test_ext
@@ -1,169 +1,243 @@
1
1
  require 'helper'
2
2
 
3
3
  class WebHDFSOutputTest < Test::Unit::TestCase
4
- CONFIG = %[
5
- host namenode.local
6
- path /hdfs/path/file.%Y%m%d.log
7
- ]
4
+ CONFIG_DEFAULT = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d.log"})
5
+
6
+ CONFIG_COMPAT = config_element(
7
+ "ROOT", "", {
8
+ "output_data_type" => "",
9
+ "host" => "namenode.local",
10
+ "path" => "/hdfs/path/file.%Y%m%d.log"
11
+ })
12
+
8
13
  def setup
9
14
  Fluent::Test.setup
10
15
  end
11
16
 
12
- def create_driver(conf=CONFIG,tag='test')
13
- Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
17
+ def create_driver(conf)
18
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
14
19
  end
15
20
 
16
- class ConfigureTest < self
17
- def test_default
18
- d = create_driver
21
+ sub_test_case "default configuration" do
22
+ test 'configured with standard out_file format with specified hdfs info' do
23
+ d = create_driver(CONFIG_DEFAULT)
24
+ assert_true d.instance.instance_eval{ @using_formatter_config }
25
+
19
26
  assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
20
27
  assert_equal 50070, d.instance.instance_eval{ @namenode_port }
21
28
  assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
22
- assert_equal '%Y%m%d', d.instance.time_slice_format
23
29
  assert_equal false, d.instance.httpfs
24
30
  assert_nil d.instance.username
25
31
  assert_equal false, d.instance.ignore_start_check_error
26
32
 
27
- assert_equal true, d.instance.output_include_time
28
- assert_equal true, d.instance.output_include_tag
29
- assert_equal 'json', d.instance.output_data_type
33
+ assert_equal 'Fluent::Plugin::OutFileFormatter', d.instance.formatter.class.to_s
34
+ assert_equal true, d.instance.end_with_newline
35
+
36
+ # deprecated params
37
+ assert_nil d.instance.instance_eval{ @output_include_time }
38
+ assert_nil d.instance.instance_eval{ @output_include_tag }
30
39
  assert_nil d.instance.remove_prefix
31
- assert_equal 'TAB', d.instance.field_separator
32
- assert_equal true, d.instance.add_newline
33
- assert_equal 'tag_missing', d.instance.default_tag
40
+ assert_nil d.instance.instance_eval{ @header_separator }
41
+ assert_nil d.instance.default_tag
42
+ end
43
+ end
44
+
45
+ sub_test_case "flat configuration" do
46
+ def test_default_for_traditional_config
47
+ d = create_driver(CONFIG_COMPAT)
48
+ assert_false d.instance.instance_eval{ @using_formatter_config }
49
+
50
+ assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
51
+ assert_equal 50070, d.instance.instance_eval{ @namenode_port }
52
+ assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
53
+ assert_equal false, d.instance.httpfs
54
+ assert_nil d.instance.username
55
+ assert_equal false, d.instance.ignore_start_check_error
56
+
57
+ assert_equal 'Fluent::Plugin::JSONFormatter', d.instance.formatter.class.to_s
58
+ assert_equal true, d.instance.end_with_newline
59
+
60
+ assert_equal true, d.instance.instance_eval{ @output_include_time }
61
+ assert_equal true, d.instance.instance_eval{ @output_include_tag }
62
+ assert_nil d.instance.instance_eval{ @remove_prefix }
63
+ assert_equal "\t", d.instance.instance_eval{ @header_separator }
64
+ assert_equal 'tag_missing', d.instance.instance_eval{ @default_tag }
34
65
  end
35
66
 
36
67
  def test_httpfs
37
- d = create_driver %[
38
- namenode server.local:14000
39
- path /hdfs/path/file.%Y%m%d.%H%M.log
40
- httpfs yes
41
- username hdfs_user
42
- ]
68
+ conf = config_element(
69
+ "ROOT", "", {
70
+ "namenode" => "server.local:14000",
71
+ "path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
72
+ "httpfs" => "yes",
73
+ "username" => "hdfs_user"
74
+ })
75
+ d = create_driver(conf)
76
+
43
77
  assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
44
78
  assert_equal 14000, d.instance.instance_eval{ @namenode_port }
45
79
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
46
- assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
47
80
  assert_equal true, d.instance.httpfs
48
81
  assert_equal 'hdfs_user', d.instance.username
49
82
  end
50
83
 
51
84
  def test_ssl
52
- d = create_driver %[
53
- namenode server.local:14000
54
- path /hdfs/path/file.%Y%m%d.%H%M.log
55
- ssl true
56
- ssl_ca_file /path/to/ca_file.pem
57
- ssl_verify_mode peer
58
- kerberos true
59
- kerberos_keytab /path/to/kerberos.keytab
60
- ]
85
+ conf = config_element(
86
+ "ROOT", "", {
87
+ "namenode" => "server.local:14000",
88
+ "path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
89
+ "ssl" => true,
90
+ "ssl_ca_file" => "/path/to/ca_file.pem",
91
+ "ssl_verify_mode" => "peer",
92
+ "kerberos" => true
93
+ })
94
+ d = create_driver(conf)
61
95
 
62
96
  assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
63
97
  assert_equal 14000, d.instance.instance_eval{ @namenode_port }
64
98
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
65
- assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
66
99
  assert_equal true, d.instance.ssl
67
100
  assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
68
101
  assert_equal :peer, d.instance.ssl_verify_mode
69
102
  assert_equal true, d.instance.kerberos
70
- assert_equal '/path/to/kerberos.keytab', d.instance.kerberos_keytab
71
103
  end
72
104
 
73
- data(gzip: ['gzip', Fluent::WebHDFSOutput::GzipCompressor],
74
- bzip2: ['bzip2', Fluent::WebHDFSOutput::Bzip2Compressor],
75
- snappy: ['snappy', Fluent::WebHDFSOutput::SnappyCompressor],
76
- lzo: ['lzo_command', Fluent::WebHDFSOutput::LZOCommandCompressor])
105
+ data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
106
+ bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
107
+ snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
108
+ lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
77
109
  def test_compress(data)
78
110
  compress_type, compressor_class = data
79
111
  begin
80
- d = create_driver %[
81
- namenode server.local:14000
82
- path /hdfs/path/file.%Y%m%d.%H%M.log
83
- compress #{compress_type}
84
- ]
112
+ conf = config_element(
113
+ "ROOT", "", {
114
+ "namenode" => "server.local:14000",
115
+ "path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
116
+ "compress" => compress_type
117
+ })
118
+ d = create_driver(conf)
85
119
  rescue Fluent::ConfigError => ex
86
120
  omit ex.message
87
121
  end
88
122
  assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
89
123
  assert_equal 14000, d.instance.instance_eval{ @namenode_port }
90
124
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
91
- assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
92
125
  assert_equal compress_type, d.instance.compress
93
126
  assert_equal compressor_class, d.instance.compressor.class
94
127
  end
95
128
 
96
- def test_placeholders
97
- d = create_driver %[
98
- hostname testing.node.local
99
- namenode server.local:50070
100
- path /hdfs/${hostname}/file.%Y%m%d%H.log
101
- ]
129
+ def test_placeholders_old_style
130
+ conf = config_element(
131
+ "ROOT", "", {
132
+ "hostname" => "testing.node.local",
133
+ "namenode" => "server.local:50070",
134
+ "path" => "/hdfs/${hostname}/file.%Y%m%d%H.log"
135
+ })
136
+ d = create_driver(conf)
102
137
  assert_equal '/hdfs/testing.node.local/file.%Y%m%d%H.log', d.instance.path
103
138
  end
104
139
 
105
- class PathFormatTest < self
106
- def test_default
107
- d = create_driver
108
- assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
109
- assert_equal '%Y%m%d', d.instance.time_slice_format
110
- assert_equal '/hdfs/path/file.20120718.log', d.instance.path_format('20120718')
111
- end
140
+ data("%Y%m%d" => ["/hdfs/path/file.%Y%m%d.log", "/hdfs/path/file.20120718.log"],
141
+ "%Y%m%d.%H%M" => ["/hdfs/path/file.%Y%m%d.%H%M.log", "/hdfs/path/file.20120718.1503.log"])
142
+ test "generate_path" do |(path, expected)|
143
+ conf = config_element(
144
+ "ROOT", "", {
145
+ "namenode" => "server.local:14000",
146
+ "path" => path
147
+ })
148
+ d = create_driver(conf)
149
+ formatter = Fluent::Timezone.formatter("+0900", path)
150
+ mock(Fluent::Timezone).formatter(Time.now.strftime("%z"), path) { formatter }
151
+ time = event_time("2012-07-18 15:03:00 +0900")
152
+ metadata = d.instance.metadata("test", time, {})
153
+ chunk = d.instance.buffer.generate_chunk(metadata)
154
+ assert_equal expected, d.instance.generate_path(chunk)
155
+ end
112
156
 
113
- def test_time_slice_format
114
- d = create_driver %[
115
- namenode server.local:14000
116
- path /hdfs/path/file.%Y%m%d.%H%M.log
117
- ]
118
- assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
119
- assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
120
- assert_equal '/hdfs/path/file.20120718.1503.log', d.instance.path_format('201207181503')
157
+ data(path: { "append" => false },
158
+ ssl: { "ssl" => true, "ssl_verify_mode" => "invalid" },
159
+ compress: { "compress" => "invalid" })
160
+ test "invalid" do |attr|
161
+ conf = config_element(
162
+ "ROOT", "", {
163
+ "namenode" => "server.local:14000",
164
+ "path" => "/hdfs/path/file.%Y%m%d.%H%M.log"
165
+ })
166
+ conf += config_element("", "", attr)
167
+ assert_raise Fluent::ConfigError do
168
+ create_driver(conf)
121
169
  end
122
170
  end
171
+ end
123
172
 
124
- class InvalidTest < self
125
- def test_path
126
- assert_raise Fluent::ConfigError do
127
- d = create_driver %[
128
- namenode server.local:14000
129
- path /hdfs/path/file.%Y%m%d.%H%M.log
130
- append false
131
- ]
132
- end
133
- end
173
+ sub_test_case "sub section configuration" do
174
+ def test_time_key
175
+ conf = config_element(
176
+ "ROOT", "", {
177
+ "host" => "namenode.local",
178
+ "path" => "/hdfs/path/file.%Y%m%d.log"
179
+ }, [
180
+ config_element(
181
+ "buffer", "time", {
182
+ "timekey" => 1
183
+ })
184
+ ]
185
+ )
186
+ d = create_driver(conf)
187
+ time = event_time("2012-07-18 15:03:00 +0900")
188
+ metadata = d.instance.metadata("test", time, {})
189
+ chunk = d.instance.buffer.generate_chunk(metadata)
190
+ assert_equal 1, d.instance.buffer_config.timekey
191
+ assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
192
+ end
193
+ end
134
194
 
135
- def test_ssl
136
- assert_raise Fluent::ConfigError do
137
- create_driver %[
138
- namenode server.local:14000
139
- path /hdfs/path/file.%Y%m%d.%H%M.log
140
- ssl true
141
- ssl_verify_mode invalid
142
- ]
143
- end
144
- end
195
+ sub_test_case "using format subsection" do
196
+ test "blank format means default format 'out_file' with UTC timezone" do
197
+ format_section = config_element("format", "", {}, [])
198
+ conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
199
+ d = create_driver(conf)
200
+ time = event_time("2017-01-24 13:10:30 -0700")
201
+ line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
202
+ assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
203
+ end
145
204
 
146
- def test_invalid_compress
147
- assert_raise Fluent::ConfigError do
148
- create_driver %[
149
- namenode server.local:14000
150
- path /hdfs/path/file.%Y%m%d.%H%M.log
151
- compress invalid
152
- ]
153
- end
154
- end
205
+ test "specifying timezone works well in format section" do
206
+ format_section = config_element("format", "", {"timezone" => "+0100"}, [])
207
+ conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
208
+ d = create_driver(conf)
209
+ time = event_time("2017-01-24 13:10:30 -0700")
210
+ line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
211
+ assert_equal "2017-01-24T21:10:30+01:00\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
212
+ end
155
213
 
156
- data("%{uuid:hostname}" => "%{uuid:hostname}",
157
- "%{uuid:timestamp}" => "%{uuid:timestamp}")
158
- def test_obsoleted_placeholders(placeholder)
159
- assert_raise Fluent::ConfigError do
160
- create_driver %[
161
- namenode server.local:14000
162
- path /hdfs/path/#{placeholder}/file.%Y%m%d.%H%M.log
163
- append false
164
- ]
165
- end
166
- end
214
+ test "specifying formatter type LTSV for records, without tag and timezone" do
215
+ format_section = config_element("format", "", {"@type" => "ltsv"}, [])
216
+ conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
217
+ d = create_driver(conf)
218
+ time = event_time("2017-01-24 13:10:30 -0700")
219
+ line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
220
+ assert_equal "message:yay\tname:tagomoris\n", line
221
+ end
222
+
223
+ test "specifying formatter type LTSV for records, with inject section to insert tag and time" do
224
+ inject_section = config_element("inject", "", {"tag_key" => "tag", "time_key" => "time", "time_type" => "string", "localtime" => "false"})
225
+ format_section = config_element("format", "", {"@type" => "ltsv"}, [])
226
+ conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [inject_section, format_section])
227
+ d = create_driver(conf)
228
+ time = event_time("2017-01-24 13:10:30 -0700")
229
+ line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
230
+ assert_equal "message:yay\tname:tagomoris\ttag:test.now\ttime:2017-01-24T20:10:30Z\n", line
231
+ end
232
+ end
233
+
234
+ sub_test_case "using older configuration" do
235
+ test "output_data_type json is same with out_file with UTC timezone" do
236
+ conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log", "output_data_type" => "json"}, [])
237
+ d = create_driver(conf)
238
+ time = event_time("2017-01-24 13:10:30 -0700")
239
+ line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
240
+ assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
167
241
  end
168
242
  end
169
243
  end