fluent-plugin-webhdfs 0.5.3 → 0.6.0rc1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 81d8b4440f1fcf4a086791d26527cb0b8ef86cc4
4
- data.tar.gz: 99b1b82fca276cd88da3af26ef07843c6d3e6751
3
+ metadata.gz: e92e9863c1ee9908d2ee0076b3651b7198062511
4
+ data.tar.gz: b4cf49b2c59fd07f9cb5a3f7faecf165d60723b7
5
5
  SHA512:
6
- metadata.gz: 2b41f320743c71ac0cb14e4599a2111cde800237c9512885966e25a041231bd70472497467d994cc67865b432b86cca2b959d48185edad80333dd471d5b489ca
7
- data.tar.gz: d0ade845373ff150904de373480bdeb2fcfe01ad8e376e6f4850f233d25d5094a544a35fc4057c2a54588d85c122d6e63265156fda7397c1a15890d0f43b294e
6
+ metadata.gz: f9c1ea8130b845859ea7c9dd2504f03c176e6162d24636ca4ee27c12d36ac2e32dbcf962aaca77e394e3384d007a5328d974742ae258555130ff8ecf9384b2a6
7
+ data.tar.gz: 7afef45b797ba03cea394895dc3c6dd327b30339f825d3e2291dec5097b126dd803cd476e1b8420183141ff3f268da602623b0945a71c2f5261fb9b8a18062eb
@@ -2,10 +2,9 @@ sudo: false
2
2
  language: ruby
3
3
 
4
4
  rvm:
5
- - 2.0.0
6
5
  - 2.1
7
6
  - 2.2
8
- - 2.3.0
7
+ - 2.3.1
9
8
 
10
9
  branches:
11
10
  only:
@@ -23,12 +22,4 @@ script: bundle exec rake test
23
22
 
24
23
  gemfile:
25
24
  - Gemfile
26
- - gemfiles/fluentd_v0.12.gemfile
27
25
  - gemfiles/fluentd_v0.14.gemfile
28
-
29
- matrix:
30
- exclude:
31
- - rvm: 2.0.0
32
- gemfile: Gemfile
33
- - rvm: 2.0.0
34
- gemfile: gemfiles/fluentd_v0.14.gemfile
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "0.5.3"
5
+ gem.version = "0.6.0rc1"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -17,11 +17,10 @@ Gem::Specification.new do |gem|
17
17
 
18
18
  gem.add_development_dependency "rake"
19
19
  gem.add_development_dependency "test-unit"
20
+ gem.add_development_dependency "test-unit-rr"
20
21
  gem.add_development_dependency "appraisal"
21
22
  gem.add_development_dependency "snappy", '>= 0.0.13'
22
- gem.add_runtime_dependency "fluentd", ['>= 0.10.59', "< 0.14.0"]
23
- gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
24
- gem.add_runtime_dependency "fluent-mixin-config-placeholders", ">= 0.3.0"
23
+ gem.add_runtime_dependency "fluentd", '>= 0.14.4'
25
24
  gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
26
25
  gem.add_runtime_dependency "bzip2-ffi"
27
26
  end
@@ -1,139 +1,150 @@
1
1
  # -*- coding: utf-8 -*-
2
2
 
3
- require 'tempfile'
3
+ require 'fluent/plugin/output'
4
+ require 'fluent/config/element'
4
5
 
5
- require 'fluent/mixin/config_placeholders'
6
- require 'fluent/mixin/plaintextformatter'
6
+ require 'webhdfs'
7
+ require 'tempfile'
8
+ require 'securerandom'
7
9
 
8
- class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
10
+ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
9
11
  Fluent::Plugin.register_output('webhdfs', self)
10
12
 
11
- config_set_default :buffer_type, 'memory'
12
- config_set_default :time_slice_format, '%Y%m%d'
13
-
14
- # For fluentd v0.12.16 or earlier
15
- class << self
16
- unless method_defined?(:desc)
17
- def desc(description)
18
- end
19
- end
20
- end
13
+ helpers :inject, :formatter, :compat_parameters
21
14
 
22
15
  desc 'WebHDFS/HttpFs host'
23
- config_param :host, :string, :default => nil
16
+ config_param :host, :string, default: nil
24
17
  desc 'WebHDFS/HttpFs port'
25
- config_param :port, :integer, :default => 50070
18
+ config_param :port, :integer, default: 50070
26
19
  desc 'Namenode (host:port)'
27
- config_param :namenode, :string, :default => nil # host:port
20
+ config_param :namenode, :string, default: nil # host:port
28
21
  desc 'Standby namenode for Namenode HA (host:port)'
29
- config_param :standby_namenode, :string, :default => nil # host:port
22
+ config_param :standby_namenode, :string, default: nil # host:port
30
23
 
31
24
  desc 'Ignore errors on start up'
32
- config_param :ignore_start_check_error, :bool, :default => false
33
-
34
- include Fluent::Mixin::ConfigPlaceholders
25
+ config_param :ignore_start_check_error, :bool, default: false
35
26
 
36
27
  desc 'Output file path on HDFS'
37
28
  config_param :path, :string
38
29
  desc 'User name for pseudo authentication'
39
- config_param :username, :string, :default => nil
30
+ config_param :username, :string, default: nil
40
31
 
41
32
  desc 'Store data over HttpFs instead of WebHDFS'
42
- config_param :httpfs, :bool, :default => false
33
+ config_param :httpfs, :bool, default: false
43
34
 
44
35
  desc 'Number of seconds to wait for the connection to open'
45
- config_param :open_timeout, :integer, :default => 30 # from ruby net/http default
36
+ config_param :open_timeout, :integer, default: 30 # from ruby net/http default
46
37
  desc 'Number of seconds to wait for one block to be read'
47
- config_param :read_timeout, :integer, :default => 60 # from ruby net/http default
38
+ config_param :read_timeout, :integer, default: 60 # from ruby net/http default
48
39
 
49
40
  desc 'Retry automatically when known errors of HDFS are occurred'
50
- config_param :retry_known_errors, :bool, :default => false
41
+ config_param :retry_known_errors, :bool, default: false
51
42
  desc 'Retry interval'
52
- config_param :retry_interval, :integer, :default => nil
43
+ config_param :retry_interval, :integer, default: nil
53
44
  desc 'The number of retries'
54
- config_param :retry_times, :integer, :default => nil
45
+ config_param :retry_times, :integer, default: nil
55
46
 
56
47
  # how many times of write failure before switch to standby namenode
57
48
  # by default it's 11 times that costs 1023 seconds inside fluentd,
58
49
  # which is considered enough to exclude the scenes that caused by temporary network fail or single datanode fail
59
50
  desc 'How many times of write failure before switch to standby namenode'
60
- config_param :failures_before_use_standby, :integer, :default => 11
61
-
62
- include Fluent::Mixin::PlainTextFormatter
51
+ config_param :failures_before_use_standby, :integer, default: 11
63
52
 
64
- config_param :default_tag, :string, :default => 'tag_missing'
53
+ config_param :end_with_newline, :bool, default: true
65
54
 
66
55
  desc 'Append data or not'
67
- config_param :append, :bool, :default => true
56
+ config_param :append, :bool, default: true
68
57
 
69
58
  desc 'Use SSL or not'
70
- config_param :ssl, :bool, :default => false
59
+ config_param :ssl, :bool, default: false
71
60
  desc 'OpenSSL certificate authority file'
72
- config_param :ssl_ca_file, :string, :default => nil
61
+ config_param :ssl_ca_file, :string, default: nil
73
62
  desc 'OpenSSL verify mode (none,peer)'
74
- config_param :ssl_verify_mode, :default => nil do |val|
75
- case val
76
- when 'none'
77
- :none
78
- when 'peer'
79
- :peer
80
- else
81
- raise Fluent::ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
82
- end
83
- end
63
+ config_param :ssl_verify_mode, :enum, list: [:none, :peer], default: :none
84
64
 
85
65
  desc 'Use kerberos authentication or not'
86
- config_param :kerberos, :bool, :default => false
66
+ config_param :kerberos, :bool, default: false
87
67
 
88
- SUPPORTED_COMPRESS = ['gzip', 'bzip2', 'snappy', 'lzo_command', 'text']
68
+ SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
89
69
  desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
90
- config_param :compress, :default => nil do |val|
91
- unless SUPPORTED_COMPRESS.include? val
92
- raise Fluent::ConfigError, "unsupported compress: #{val}"
93
- end
94
- val
95
- end
70
+ config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
71
+
72
+ config_param :remove_prefix, :string, default: nil, deprecated: "use @label for routing"
73
+ config_param :default_tag, :string, default: nil, deprecated: "use @label for routing"
74
+ config_param :null_value, :string, default: nil, deprecated: "use filter plugins to convert null values into any specified string"
75
+ config_param :suppress_log_broken_string, :bool, default: false, deprecated: "use @log_level for plugin to suppress such info logs"
96
76
 
97
77
  CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
98
78
 
99
- attr_reader :compressor
79
+ config_section :buffer do
80
+ config_set_default :chunk_keys, ["time"]
81
+ end
82
+
83
+ config_section :format do
84
+ config_set_default :@type, 'out_file'
85
+ config_set_default :localtime, false # default timezone is UTC
86
+ end
87
+
88
+ attr_reader :formatter, :compressor
100
89
 
101
90
  def initialize
102
91
  super
103
- require 'net/http'
104
- require 'time'
105
- require 'webhdfs'
106
-
107
92
  @compressor = nil
108
- end
109
-
110
- # Define `log` method for v0.10.42 or earlier
111
- unless method_defined?(:log)
112
- define_method("log") { $log }
93
+ @standby_namenode_host = nil
94
+ @output_include_tag = @output_include_time = nil # TODO: deprecated
95
+ @header_separator = @field_separator = nil # TODO: deprecated
113
96
  end
114
97
 
115
98
  def configure(conf)
116
- if conf['path']
117
- if conf['path'].index('%S')
118
- conf['time_slice_format'] = '%Y%m%d%H%M%S'
119
- elsif conf['path'].index('%M')
120
- conf['time_slice_format'] = '%Y%m%d%H%M'
121
- elsif conf['path'].index('%H')
122
- conf['time_slice_format'] = '%Y%m%d%H'
123
- end
99
+ compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
100
+
101
+ timekey = case conf["path"]
102
+ when /%S/ then 1
103
+ when /%M/ then 60
104
+ when /%H/ then 3600
105
+ else 86400
106
+ end
107
+ if conf.elements(name: "buffer").empty?
108
+ e = Fluent::Config::Element.new("buffer", "time", {}, [])
109
+ conf.elements << e
124
110
  end
111
+ buffer_config = conf.elements(name: "buffer").first
112
+ buffer_config["timekey"] = timekey unless buffer_config["timekey"]
113
+
114
+ compat_parameters_convert_plaintextformatter(conf)
125
115
 
126
116
  super
127
117
 
128
- begin
129
- @compressor = COMPRESSOR_REGISTRY.lookup(@compress || 'text').new
130
- rescue Fluent::ConfigError
131
- raise
132
- rescue
133
- $log.warn "#{@comress} not found. Use 'text' instead"
134
- @compressor = COMPRESSOR_REGISTRY.lookup('text').new
118
+ @formatter = formatter_create
119
+
120
+ if @using_formatter_config
121
+ @null_value = nil
122
+ else
123
+ @formatter.delimiter = "\x01" if @formatter.respond_to?(:delimiter) && @formatter.delimiter == 'SOH'
124
+ @null_value ||= 'NULL'
135
125
  end
136
126
 
127
+ if @default_tag.nil? && !@using_formatter_config && @output_include_tag
128
+ @default_tag = "tag_missing"
129
+ end
130
+ if @remove_prefix
131
+ @remove_prefix_actual = @remove_prefix + "."
132
+ @remove_prefix_actual_length = @remove_prefix_actual.length
133
+ end
134
+
135
+ verify_config_placeholders_in_path!(conf)
136
+ @replace_random_uuid = @path.include?('%{uuid}') || @path.include?('%{uuid_flush}')
137
+ if @replace_random_uuid
138
+ # to check SecureRandom.uuid is available or not (NotImplementedError raised in such environment)
139
+ begin
140
+ SecureRandom.uuid
141
+ rescue
142
+ raise Fluent::ConfigError, "uuid feature (SecureRandom) is unavailable in this environment"
143
+ end
144
+ end
145
+
146
+ @compressor = COMPRESSOR_REGISTRY.lookup(@compress.to_s).new
147
+
137
148
  if @host
138
149
  @namenode_host = @host
139
150
  @namenode_port = @port
@@ -167,7 +178,7 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
167
178
  @client_standby = nil
168
179
  end
169
180
 
170
- if not @append
181
+ unless @append
171
182
  if @path.index(CHUNK_ID_PLACE_HOLDER).nil?
172
183
  raise Fluent::ConfigError, "path must contain ${chunk_id}, which is the placeholder for chunk_id, when append is set to false."
173
184
  end
@@ -230,14 +241,6 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
230
241
  end
231
242
  end
232
243
 
233
- def shutdown
234
- super
235
- end
236
-
237
- def path_format(chunk_key)
238
- Time.strptime(chunk_key, @time_slice_format).strftime(@path)
239
- end
240
-
241
244
  def is_standby_exception(e)
242
245
  e.is_a?(WebHDFS::IOError) && e.message.match(/org\.apache\.hadoop\.ipc\.StandbyException/)
243
246
  end
@@ -249,12 +252,6 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
249
252
  end
250
253
  end
251
254
 
252
- def chunk_unique_id_to_str(unique_id)
253
- unique_id.unpack('C*').map{|x| x.to_s(16).rjust(2,'0')}.join('')
254
- end
255
-
256
- # TODO check conflictions
257
-
258
255
  def send_data(path, data)
259
256
  if @append
260
257
  begin
@@ -267,13 +264,52 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
267
264
  end
268
265
  end
269
266
 
267
+ HOSTNAME_PLACEHOLDERS_DEPRECATED = ['${hostname}', '%{hostname}', '__HOSTNAME__']
268
+ UUID_RANDOM_PLACEHOLDERS_DEPRECATED = ['${uuid}', '${uuid:random}', '__UUID__', '__UUID_RANDOM__']
269
+ UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
270
+
271
+ def verify_config_placeholders_in_path!(conf)
272
+ return unless conf.has_key?('path')
273
+
274
+ path = conf['path']
275
+
276
+ # check @path for ${hostname}, %{hostname} and __HOSTNAME__ to warn to use #{Socket.gethostbyname}
277
+ if HOSTNAME_PLACEHOLDERS_DEPRECATED.any?{|ph| path.include?(ph) }
278
+ log.warn "hostname placeholder is now deprecated. use '\#\{Socket.gethostname\}' instead."
279
+ hostname = conf['hostname'] || Socket.gethostname
280
+ HOSTNAME_PLACEHOLDERS_DEPRECATED.each do |ph|
281
+ path.gsub!(ph, hostname)
282
+ end
283
+ end
284
+
285
+ if UUID_RANDOM_PLACEHOLDERS_DEPRECATED.any?{|ph| path.include?(ph) }
286
+ log.warn "random uuid placeholders are now deprecated. use %{uuid} (or %{uuid_flush}) instead."
287
+ UUID_RANDOM_PLACEHOLDERS_DEPRECATED.each do |ph|
288
+ path.gsub!(ph, '%{uuid}')
289
+ end
290
+ end
291
+
292
+ if UUID_OTHER_PLACEHOLDERS_OBSOLETED.any?{|ph| path.include?(ph) }
293
+ UUID_OTHER_PLACEHOLDERS_OBSOLETED.each do |ph|
294
+ if path.include?(ph)
295
+ log.error "configuration placeholder #{ph} is now unsupported by webhdfs output plugin."
296
+ end
297
+ end
298
+ raise ConfigError, "there are unsupported placeholders in path."
299
+ end
300
+ end
301
+
270
302
  def generate_path(chunk)
271
303
  hdfs_path = if @append
272
- path_format(chunk.key)
304
+ extract_placeholders(@path, chunk.metadata)
273
305
  else
274
- path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
306
+ extract_placeholders(@path, chunk.metadata).gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id(chunk.unique_id))
275
307
  end
276
308
  hdfs_path = "#{hdfs_path}#{@compressor.ext}"
309
+ if @replace_random_uuid
310
+ uuid_random = SecureRandom.uuid
311
+ hdfs_path.gsub!('%{uuid}', uuid_random).gsub!('%{uuid_flush}', uuid_random)
312
+ end
277
313
  hdfs_path
278
314
  end
279
315
 
@@ -288,6 +324,48 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
288
324
  end
289
325
  end
290
326
 
327
+ def format(tag, time, record)
328
+ if @remove_prefix # TODO: remove when it's obsoleted
329
+ if tag.start_with?(@remove_prefix_actual)
330
+ if tag.length > @remove_prefix_actual_length
331
+ tag = tag[@remove_prefix_actual_length..-1]
332
+ else
333
+ tag = @default_tag
334
+ end
335
+ elsif tag.start_with?(@remove_prefix)
336
+ if tag == @remove_prefix
337
+ tag = @default_tag
338
+ else
339
+ tag = tag.sub(@remove_prefix, '')
340
+ end
341
+ end
342
+ end
343
+
344
+ if @null_value # TODO: remove when it's obsoleted
345
+ check_keys = (record.keys + @null_convert_keys).uniq
346
+ check_keys.each do |key|
347
+ record[key] = @null_value if record[key].nil?
348
+ end
349
+ end
350
+
351
+ if @using_formatter_config
352
+ record = inject_values_to_record(tag, time, record)
353
+ line = @formatter.format(tag, time, record)
354
+ else # TODO: remove when it's obsoleted
355
+ time_str = @output_include_time ? @time_formatter.call(time) + @header_separator : ''
356
+ tag_str = @output_include_tag ? tag + @header_separator : ''
357
+ record_str = @formatter.format(tag, time, record)
358
+ line = time_str + tag_str + record_str
359
+ end
360
+ line << "\n" if @end_with_newline && !line.end_with?("\n")
361
+ line
362
+ rescue => e # remove this clause when @suppress_log_broken_string is obsoleted
363
+ unless @suppress_log_broken_string
364
+ log.info "unexpected error while formatting events, ignored", tag: tag, record: record, error: e
365
+ end
366
+ ''
367
+ end
368
+
291
369
  def write(chunk)
292
370
  hdfs_path = generate_path(chunk)
293
371
 
@@ -318,6 +396,72 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
318
396
  hdfs_path
319
397
  end
320
398
 
399
+ def compat_parameters_convert_plaintextformatter(conf)
400
+ if !conf.elements('format').empty? || !conf['output_data_type']
401
+ @using_formatter_config = true
402
+ @null_convert_keys = []
403
+ return
404
+ end
405
+
406
+ log.warn "webhdfs output plugin is working with old configuration parameters. use <inject>/<format> sections instead for further releases."
407
+ @using_formatter_config = false
408
+ @null_convert_keys = []
409
+
410
+ @header_separator = case conf['field_separator']
411
+ when nil then "\t"
412
+ when 'SPACE' then ' '
413
+ when 'TAB' then "\t"
414
+ when 'COMMA' then ','
415
+ when 'SOH' then "\x01"
416
+ else conf['field_separator']
417
+ end
418
+
419
+ format_section = Fluent::Config::Element.new('format', '', {}, [])
420
+ case conf['output_data_type']
421
+ when '', 'json' # blank value is for compatibility reason (especially in testing)
422
+ format_section['@type'] = 'json'
423
+ when 'ltsv'
424
+ format_section['@type'] = 'ltsv'
425
+ else
426
+ unless conf['output_data_type'].start_with?('attr:')
427
+ raise Fluent::ConfigError, "output_data_type is invalid: #{conf['output_data_type']}"
428
+ end
429
+ format_section['@format'] = 'tsv'
430
+ keys_part = conf['output_data_type'].sub(/^attr:/, '')
431
+ @null_convert_keys = keys_part.split(',')
432
+ format_section['keys'] = keys_part
433
+ format_section['delimiter'] = case conf['field_separator']
434
+ when nil then '\t'
435
+ when 'SPACE' then ' '
436
+ when 'TAB' then '\t'
437
+ when 'COMMA' then ','
438
+ when 'SOH' then 'SOH' # fixed later
439
+ else conf['field_separator']
440
+ end
441
+ end
442
+
443
+ conf.elements << format_section
444
+
445
+ @output_include_time = conf.has_key?('output_include_time') ? Fluent::Config.bool_value(conf['output_include_time']) : true
446
+ @output_include_tag = conf.has_key?('output_include_tag') ? Fluent::Config.bool_value(conf['output_include_tag']) : true
447
+
448
+ if @output_include_time
449
+ # default timezone is UTC
450
+ using_localtime = if !conf.has_key?('utc') && !conf.has_key?('localtime')
451
+ false
452
+ elsif conf.has_key?('localtime') && conf.has_key?('utc')
453
+ raise Fluent::ConfigError, "specify either 'localtime' or 'utc'"
454
+ elsif conf.has_key?('localtime')
455
+ Fluent::Config.bool_value('localtime')
456
+ else
457
+ Fluent::Config.bool_value('utc')
458
+ end
459
+ @time_formatter = Fluent::TimeFormatter.new(conf['time_format'], using_localtime)
460
+ else
461
+ @time_formatter = nil
462
+ end
463
+ end
464
+
321
465
  class Compressor
322
466
  include Fluent::Configurable
323
467
 
@@ -1,5 +1,5 @@
1
- module Fluent
2
- class WebHDFSOutput < Fluent::TimeSlicedOutput
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
3
  class Bzip2Compressor < Compressor
4
4
  WebHDFSOutput.register_compressor('bzip2', self)
5
5
 
@@ -1,5 +1,5 @@
1
- module Fluent
2
- class WebHDFSOutput < Fluent::TimeSlicedOutput
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
3
  class GzipCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('gzip', self)
5
5
 
@@ -1,9 +1,9 @@
1
- module Fluent
2
- class WebHDFSOutput
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
3
  class LZOCommandCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('lzo_command', self)
5
5
 
6
- config_param :command_parameter, :string, :default => '-qf1'
6
+ config_param :command_parameter, :string, default: '-qf1'
7
7
 
8
8
  def configure(conf)
9
9
  super
@@ -1,5 +1,5 @@
1
- module Fluent
2
- class WebHDFSOutput < Fluent::TimeSlicedOutput
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
3
  class SnappyCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('snappy', self)
5
5
 
@@ -1,5 +1,5 @@
1
- module Fluent
2
- class WebHDFSOutput < Fluent::TimeSlicedOutput
1
+ module Fluent::Plugin
2
+ class WebHDFSOutput < Output
3
3
  class TextCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('text', self)
5
5
 
@@ -8,10 +8,13 @@ rescue Bundler::BundlerError => e
8
8
  exit e.status_code
9
9
  end
10
10
  require 'test/unit'
11
+ require 'test/unit/rr'
11
12
 
12
13
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
14
  $LOAD_PATH.unshift(File.dirname(__FILE__))
14
15
  require 'fluent/test'
16
+ require 'fluent/test/helpers'
17
+ require 'fluent/test/driver/output'
15
18
  unless ENV.has_key?('VERBOSE')
16
19
  nulllogger = Object.new
17
20
  nulllogger.instance_eval {|obj|
@@ -22,6 +25,8 @@ unless ENV.has_key?('VERBOSE')
22
25
  $log = nulllogger
23
26
  end
24
27
 
28
+ include Fluent::Test::Helpers
29
+
25
30
  require 'fluent/plugin/out_webhdfs'
26
31
 
27
32
  class Test::Unit::TestCase
@@ -16,11 +16,11 @@ class CompressorTest < Test::Unit::TestCase
16
16
  def setup
17
17
  omit unless Object.const_defined?(:Snappy)
18
18
  Fluent::Test.setup
19
- @compressor = Fluent::WebHDFSOutput::SnappyCompressor.new
19
+ @compressor = Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new
20
20
  end
21
21
 
22
- def create_driver(conf=CONFIG,tag='test')
23
- Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
22
+ def create_driver(conf = CONFIG)
23
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
24
24
  end
25
25
 
26
26
  def test_ext
@@ -1,154 +1,243 @@
1
1
  require 'helper'
2
2
 
3
3
  class WebHDFSOutputTest < Test::Unit::TestCase
4
- CONFIG = %[
5
- host namenode.local
6
- path /hdfs/path/file.%Y%m%d.log
7
- ]
4
+ CONFIG_DEFAULT = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d.log"})
5
+
6
+ CONFIG_COMPAT = config_element(
7
+ "ROOT", "", {
8
+ "output_data_type" => "",
9
+ "host" => "namenode.local",
10
+ "path" => "/hdfs/path/file.%Y%m%d.log"
11
+ })
12
+
8
13
  def setup
9
14
  Fluent::Test.setup
10
15
  end
11
16
 
12
- def create_driver(conf=CONFIG,tag='test')
13
- Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
17
+ def create_driver(conf)
18
+ Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
14
19
  end
15
20
 
16
- class ConfigureTest < self
17
- def test_default
18
- d = create_driver
21
+ sub_test_case "default configuration" do
22
+ test 'configured with standard out_file format with specified hdfs info' do
23
+ d = create_driver(CONFIG_DEFAULT)
24
+ assert_true d.instance.instance_eval{ @using_formatter_config }
25
+
19
26
  assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
20
27
  assert_equal 50070, d.instance.instance_eval{ @namenode_port }
21
28
  assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
22
- assert_equal '%Y%m%d', d.instance.time_slice_format
23
29
  assert_equal false, d.instance.httpfs
24
30
  assert_nil d.instance.username
25
31
  assert_equal false, d.instance.ignore_start_check_error
26
32
 
27
- assert_equal true, d.instance.output_include_time
28
- assert_equal true, d.instance.output_include_tag
29
- assert_equal 'json', d.instance.output_data_type
33
+ assert_equal 'Fluent::Plugin::OutFileFormatter', d.instance.formatter.class.to_s
34
+ assert_equal true, d.instance.end_with_newline
35
+
36
+ # deprecated params
37
+ assert_nil d.instance.instance_eval{ @output_include_time }
38
+ assert_nil d.instance.instance_eval{ @output_include_tag }
30
39
  assert_nil d.instance.remove_prefix
31
- assert_equal 'TAB', d.instance.field_separator
32
- assert_equal true, d.instance.add_newline
33
- assert_equal 'tag_missing', d.instance.default_tag
40
+ assert_nil d.instance.instance_eval{ @header_separator }
41
+ assert_nil d.instance.default_tag
42
+ end
43
+ end
44
+
45
+ sub_test_case "flat configuration" do
46
+ def test_default_for_traditional_config
47
+ d = create_driver(CONFIG_COMPAT)
48
+ assert_false d.instance.instance_eval{ @using_formatter_config }
49
+
50
+ assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
51
+ assert_equal 50070, d.instance.instance_eval{ @namenode_port }
52
+ assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
53
+ assert_equal false, d.instance.httpfs
54
+ assert_nil d.instance.username
55
+ assert_equal false, d.instance.ignore_start_check_error
56
+
57
+ assert_equal 'Fluent::Plugin::JSONFormatter', d.instance.formatter.class.to_s
58
+ assert_equal true, d.instance.end_with_newline
59
+
60
+ assert_equal true, d.instance.instance_eval{ @output_include_time }
61
+ assert_equal true, d.instance.instance_eval{ @output_include_tag }
62
+ assert_nil d.instance.instance_eval{ @remove_prefix }
63
+ assert_equal "\t", d.instance.instance_eval{ @header_separator }
64
+ assert_equal 'tag_missing', d.instance.instance_eval{ @default_tag }
34
65
  end
35
66
 
36
67
  def test_httpfs
37
- d = create_driver %[
38
- namenode server.local:14000
39
- path /hdfs/path/file.%Y%m%d.%H%M.log
40
- httpfs yes
41
- username hdfs_user
42
- ]
68
+ conf = config_element(
69
+ "ROOT", "", {
70
+ "namenode" => "server.local:14000",
71
+ "path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
72
+ "httpfs" => "yes",
73
+ "username" => "hdfs_user"
74
+ })
75
+ d = create_driver(conf)
76
+
43
77
  assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
44
78
  assert_equal 14000, d.instance.instance_eval{ @namenode_port }
45
79
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
46
- assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
47
80
  assert_equal true, d.instance.httpfs
48
81
  assert_equal 'hdfs_user', d.instance.username
49
82
  end
50
83
 
51
84
  def test_ssl
52
- d = create_driver %[
53
- namenode server.local:14000
54
- path /hdfs/path/file.%Y%m%d.%H%M.log
55
- ssl true
56
- ssl_ca_file /path/to/ca_file.pem
57
- ssl_verify_mode peer
58
- kerberos true
59
- ]
85
+ conf = config_element(
86
+ "ROOT", "", {
87
+ "namenode" => "server.local:14000",
88
+ "path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
89
+ "ssl" => true,
90
+ "ssl_ca_file" => "/path/to/ca_file.pem",
91
+ "ssl_verify_mode" => "peer",
92
+ "kerberos" => true
93
+ })
94
+ d = create_driver(conf)
95
+
60
96
  assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
61
97
  assert_equal 14000, d.instance.instance_eval{ @namenode_port }
62
98
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
63
- assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
64
99
  assert_equal true, d.instance.ssl
65
100
  assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
66
101
  assert_equal :peer, d.instance.ssl_verify_mode
67
102
  assert_equal true, d.instance.kerberos
68
103
  end
69
104
 
70
- data(gzip: ['gzip', Fluent::WebHDFSOutput::GzipCompressor],
71
- bzip2: ['bzip2', Fluent::WebHDFSOutput::Bzip2Compressor],
72
- snappy: ['snappy', Fluent::WebHDFSOutput::SnappyCompressor],
73
- lzo: ['lzo_command', Fluent::WebHDFSOutput::LZOCommandCompressor])
105
+ data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
106
+ bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
107
+ snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
108
+ lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
74
109
  def test_compress(data)
75
110
  compress_type, compressor_class = data
76
111
  begin
77
- d = create_driver %[
78
- namenode server.local:14000
79
- path /hdfs/path/file.%Y%m%d.%H%M.log
80
- compress #{compress_type}
81
- ]
112
+ conf = config_element(
113
+ "ROOT", "", {
114
+ "namenode" => "server.local:14000",
115
+ "path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
116
+ "compress" => compress_type
117
+ })
118
+ d = create_driver(conf)
82
119
  rescue Fluent::ConfigError => ex
83
120
  omit ex.message
84
121
  end
85
122
  assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
86
123
  assert_equal 14000, d.instance.instance_eval{ @namenode_port }
87
124
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
88
- assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
89
125
  assert_equal compress_type, d.instance.compress
90
126
  assert_equal compressor_class, d.instance.compressor.class
91
127
  end
92
128
 
93
- def test_placeholders
94
- d = create_driver %[
95
- hostname testing.node.local
96
- namenode server.local:50070
97
- path /hdfs/${hostname}/file.%Y%m%d%H.log
98
- ]
129
+ def test_placeholders_old_style
130
+ conf = config_element(
131
+ "ROOT", "", {
132
+ "hostname" => "testing.node.local",
133
+ "namenode" => "server.local:50070",
134
+ "path" => "/hdfs/${hostname}/file.%Y%m%d%H.log"
135
+ })
136
+ d = create_driver(conf)
99
137
  assert_equal '/hdfs/testing.node.local/file.%Y%m%d%H.log', d.instance.path
100
138
  end
101
139
 
102
- class PathFormatTest < self
103
- def test_default
104
- d = create_driver
105
- assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
106
- assert_equal '%Y%m%d', d.instance.time_slice_format
107
- assert_equal '/hdfs/path/file.20120718.log', d.instance.path_format('20120718')
108
- end
140
+ data("%Y%m%d" => ["/hdfs/path/file.%Y%m%d.log", "/hdfs/path/file.20120718.log"],
141
+ "%Y%m%d.%H%M" => ["/hdfs/path/file.%Y%m%d.%H%M.log", "/hdfs/path/file.20120718.1503.log"])
142
+ test "generate_path" do |(path, expected)|
143
+ conf = config_element(
144
+ "ROOT", "", {
145
+ "namenode" => "server.local:14000",
146
+ "path" => path
147
+ })
148
+ d = create_driver(conf)
149
+ formatter = Fluent::Timezone.formatter("+0900", path)
150
+ mock(Fluent::Timezone).formatter(Time.now.strftime("%z"), path) { formatter }
151
+ time = event_time("2012-07-18 15:03:00 +0900")
152
+ metadata = d.instance.metadata("test", time, {})
153
+ chunk = d.instance.buffer.generate_chunk(metadata)
154
+ assert_equal expected, d.instance.generate_path(chunk)
155
+ end
109
156
 
110
- def test_time_slice_format
111
- d = create_driver %[
112
- namenode server.local:14000
113
- path /hdfs/path/file.%Y%m%d.%H%M.log
114
- ]
115
- assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
116
- assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
117
- assert_equal '/hdfs/path/file.20120718.1503.log', d.instance.path_format('201207181503')
157
+ data(path: { "append" => false },
158
+ ssl: { "ssl" => true, "ssl_verify_mode" => "invalid" },
159
+ compress: { "compress" => "invalid" })
160
+ test "invalid" do |attr|
161
+ conf = config_element(
162
+ "ROOT", "", {
163
+ "namenode" => "server.local:14000",
164
+ "path" => "/hdfs/path/file.%Y%m%d.%H%M.log"
165
+ })
166
+ conf += config_element("", "", attr)
167
+ assert_raise Fluent::ConfigError do
168
+ create_driver(conf)
118
169
  end
119
170
  end
171
+ end
120
172
 
121
- class InvalidTest < self
122
- def test_path
123
- assert_raise Fluent::ConfigError do
124
- d = create_driver %[
125
- namenode server.local:14000
126
- path /hdfs/path/file.%Y%m%d.%H%M.log
127
- append false
128
- ]
129
- end
130
- end
173
+ sub_test_case "sub section configuration" do
174
+ def test_time_key
175
+ conf = config_element(
176
+ "ROOT", "", {
177
+ "host" => "namenode.local",
178
+ "path" => "/hdfs/path/file.%Y%m%d.log"
179
+ }, [
180
+ config_element(
181
+ "buffer", "time", {
182
+ "timekey" => 1
183
+ })
184
+ ]
185
+ )
186
+ d = create_driver(conf)
187
+ time = event_time("2012-07-18 15:03:00 +0900")
188
+ metadata = d.instance.metadata("test", time, {})
189
+ chunk = d.instance.buffer.generate_chunk(metadata)
190
+ assert_equal 1, d.instance.buffer_config.timekey
191
+ assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
192
+ end
193
+ end
131
194
 
132
- def test_ssl
133
- assert_raise Fluent::ConfigError do
134
- create_driver %[
135
- namenode server.local:14000
136
- path /hdfs/path/file.%Y%m%d.%H%M.log
137
- ssl true
138
- ssl_verify_mode invalid
139
- ]
140
- end
141
- end
195
+ sub_test_case "using format subsection" do
196
+ test "blank format means default format 'out_file' with UTC timezone" do
197
+ format_section = config_element("format", "", {}, [])
198
+ conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
199
+ d = create_driver(conf)
200
+ time = event_time("2017-01-24 13:10:30 -0700")
201
+ line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
202
+ assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
203
+ end
142
204
 
143
- def test_invalid_compress
144
- assert_raise Fluent::ConfigError do
145
- create_driver %[
146
- namenode server.local:14000
147
- path /hdfs/path/file.%Y%m%d.%H%M.log
148
- compress invalid
149
- ]
150
- end
151
- end
205
+ test "specifying timezone works well in format section" do
206
+ format_section = config_element("format", "", {"timezone" => "+0100"}, [])
207
+ conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
208
+ d = create_driver(conf)
209
+ time = event_time("2017-01-24 13:10:30 -0700")
210
+ line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
211
+ assert_equal "2017-01-24T21:10:30+01:00\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
212
+ end
213
+
214
+ test "specifying formatter type LTSV for records, without tag and timezone" do
215
+ format_section = config_element("format", "", {"@type" => "ltsv"}, [])
216
+ conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
217
+ d = create_driver(conf)
218
+ time = event_time("2017-01-24 13:10:30 -0700")
219
+ line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
220
+ assert_equal "message:yay\tname:tagomoris\n", line
221
+ end
222
+
223
+ test "specifying formatter type LTSV for records, with inject section to insert tag and time" do
224
+ inject_section = config_element("inject", "", {"tag_key" => "tag", "time_key" => "time", "time_type" => "string", "localtime" => "false"})
225
+ format_section = config_element("format", "", {"@type" => "ltsv"}, [])
226
+ conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [inject_section, format_section])
227
+ d = create_driver(conf)
228
+ time = event_time("2017-01-24 13:10:30 -0700")
229
+ line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
230
+ assert_equal "message:yay\tname:tagomoris\ttag:test.now\ttime:2017-01-24T20:10:30Z\n", line
231
+ end
232
+ end
233
+
234
+ sub_test_case "using older configuration" do
235
+ test "output_data_type json is same with out_file with UTC timezone" do
236
+ conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log", "output_data_type" => "json"}, [])
237
+ d = create_driver(conf)
238
+ time = event_time("2017-01-24 13:10:30 -0700")
239
+ line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
240
+ assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
152
241
  end
153
242
  end
154
243
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.6.0rc1
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-23 00:00:00.000000000 Z
11
+ date: 2017-01-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: appraisal
42
+ name: test-unit-rr
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -53,67 +53,47 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: snappy
56
+ name: appraisal
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
- version: 0.0.13
61
+ version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: 0.0.13
69
- - !ruby/object:Gem::Dependency
70
- name: fluentd
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: 0.10.59
76
- - - "<"
77
- - !ruby/object:Gem::Version
78
- version: 0.14.0
79
- type: :runtime
80
- prerelease: false
81
- version_requirements: !ruby/object:Gem::Requirement
82
- requirements:
83
- - - ">="
84
- - !ruby/object:Gem::Version
85
- version: 0.10.59
86
- - - "<"
87
- - !ruby/object:Gem::Version
88
- version: 0.14.0
68
+ version: '0'
89
69
  - !ruby/object:Gem::Dependency
90
- name: fluent-mixin-plaintextformatter
70
+ name: snappy
91
71
  requirement: !ruby/object:Gem::Requirement
92
72
  requirements:
93
73
  - - ">="
94
74
  - !ruby/object:Gem::Version
95
- version: 0.2.1
96
- type: :runtime
75
+ version: 0.0.13
76
+ type: :development
97
77
  prerelease: false
98
78
  version_requirements: !ruby/object:Gem::Requirement
99
79
  requirements:
100
80
  - - ">="
101
81
  - !ruby/object:Gem::Version
102
- version: 0.2.1
82
+ version: 0.0.13
103
83
  - !ruby/object:Gem::Dependency
104
- name: fluent-mixin-config-placeholders
84
+ name: fluentd
105
85
  requirement: !ruby/object:Gem::Requirement
106
86
  requirements:
107
87
  - - ">="
108
88
  - !ruby/object:Gem::Version
109
- version: 0.3.0
89
+ version: 0.14.4
110
90
  type: :runtime
111
91
  prerelease: false
112
92
  version_requirements: !ruby/object:Gem::Requirement
113
93
  requirements:
114
94
  - - ">="
115
95
  - !ruby/object:Gem::Version
116
- version: 0.3.0
96
+ version: 0.14.4
117
97
  - !ruby/object:Gem::Dependency
118
98
  name: webhdfs
119
99
  requirement: !ruby/object:Gem::Requirement
@@ -183,9 +163,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
183
163
  version: '0'
184
164
  required_rubygems_version: !ruby/object:Gem::Requirement
185
165
  requirements:
186
- - - ">="
166
+ - - ">"
187
167
  - !ruby/object:Gem::Version
188
- version: '0'
168
+ version: 1.3.1
189
169
  requirements: []
190
170
  rubyforge_project:
191
171
  rubygems_version: 2.6.8