fluent-plugin-webhdfs 0.6.0rc1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e92e9863c1ee9908d2ee0076b3651b7198062511
4
- data.tar.gz: b4cf49b2c59fd07f9cb5a3f7faecf165d60723b7
3
+ metadata.gz: 3001d6e76014cec6f043369f92db09fb9f6989d4
4
+ data.tar.gz: e7fdab6516fd7ef1ad2732369b002116e8b65b85
5
5
  SHA512:
6
- metadata.gz: f9c1ea8130b845859ea7c9dd2504f03c176e6162d24636ca4ee27c12d36ac2e32dbcf962aaca77e394e3384d007a5328d974742ae258555130ff8ecf9384b2a6
7
- data.tar.gz: 7afef45b797ba03cea394895dc3c6dd327b30339f825d3e2291dec5097b126dd803cd476e1b8420183141ff3f268da602623b0945a71c2f5261fb9b8a18062eb
6
+ metadata.gz: 898a6fa68810d9f94aefcb70e2b0e168b67b56922d2ae5f7757e1a8c9a38603d8ba0dc353ee49ae9247e42196fe3d8c88c31172773ac0df3919428bc25c84a5f
7
+ data.tar.gz: 37c6d7534dab627dd3a31bd4650f8642c983041cf1f06ff916cbc3107605d6070122ae8f1990cb36dd1f14ba19a3b2e5a65fbc0958553df4225c2228e81c29ca
@@ -2,9 +2,10 @@ sudo: false
2
2
  language: ruby
3
3
 
4
4
  rvm:
5
+ - 2.0.0
5
6
  - 2.1
6
7
  - 2.2
7
- - 2.3.1
8
+ - 2.3.0
8
9
 
9
10
  branches:
10
11
  only:
@@ -22,4 +23,12 @@ script: bundle exec rake test
22
23
 
23
24
  gemfile:
24
25
  - Gemfile
26
+ - gemfiles/fluentd_v0.12.gemfile
25
27
  - gemfiles/fluentd_v0.14.gemfile
28
+
29
+ matrix:
30
+ exclude:
31
+ - rvm: 2.0.0
32
+ gemfile: Gemfile
33
+ - rvm: 2.0.0
34
+ gemfile: gemfiles/fluentd_v0.14.gemfile
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "0.6.0rc1"
5
+ gem.version = "0.6.0"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -17,10 +17,10 @@ Gem::Specification.new do |gem|
17
17
 
18
18
  gem.add_development_dependency "rake"
19
19
  gem.add_development_dependency "test-unit"
20
- gem.add_development_dependency "test-unit-rr"
21
20
  gem.add_development_dependency "appraisal"
22
21
  gem.add_development_dependency "snappy", '>= 0.0.13'
23
- gem.add_runtime_dependency "fluentd", '>= 0.14.4'
22
+ gem.add_runtime_dependency "fluentd", ['>= 0.10.59', "< 0.14.0"]
23
+ gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
24
24
  gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
25
25
  gem.add_runtime_dependency "bzip2-ffi"
26
26
  end
@@ -1,138 +1,129 @@
1
1
  # -*- coding: utf-8 -*-
2
2
 
3
- require 'fluent/plugin/output'
4
- require 'fluent/config/element'
5
-
6
- require 'webhdfs'
7
3
  require 'tempfile'
8
4
  require 'securerandom'
5
+ require 'fluent/mixin/plaintextformatter'
9
6
 
10
- class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
7
+ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
11
8
  Fluent::Plugin.register_output('webhdfs', self)
12
9
 
13
- helpers :inject, :formatter, :compat_parameters
10
+ config_set_default :buffer_type, 'memory'
11
+ config_set_default :time_slice_format, '%Y%m%d'
12
+
13
+ # For fluentd v0.12.16 or earlier
14
+ class << self
15
+ unless method_defined?(:desc)
16
+ def desc(description)
17
+ end
18
+ end
19
+ end
14
20
 
15
21
  desc 'WebHDFS/HttpFs host'
16
- config_param :host, :string, default: nil
22
+ config_param :host, :string, :default => nil
17
23
  desc 'WebHDFS/HttpFs port'
18
- config_param :port, :integer, default: 50070
24
+ config_param :port, :integer, :default => 50070
19
25
  desc 'Namenode (host:port)'
20
- config_param :namenode, :string, default: nil # host:port
26
+ config_param :namenode, :string, :default => nil # host:port
21
27
  desc 'Standby namenode for Namenode HA (host:port)'
22
- config_param :standby_namenode, :string, default: nil # host:port
28
+ config_param :standby_namenode, :string, :default => nil # host:port
23
29
 
24
30
  desc 'Ignore errors on start up'
25
- config_param :ignore_start_check_error, :bool, default: false
31
+ config_param :ignore_start_check_error, :bool, :default => false
26
32
 
27
33
  desc 'Output file path on HDFS'
28
34
  config_param :path, :string
29
35
  desc 'User name for pseudo authentication'
30
- config_param :username, :string, default: nil
36
+ config_param :username, :string, :default => nil
31
37
 
32
38
  desc 'Store data over HttpFs instead of WebHDFS'
33
- config_param :httpfs, :bool, default: false
39
+ config_param :httpfs, :bool, :default => false
34
40
 
35
41
  desc 'Number of seconds to wait for the connection to open'
36
- config_param :open_timeout, :integer, default: 30 # from ruby net/http default
42
+ config_param :open_timeout, :integer, :default => 30 # from ruby net/http default
37
43
  desc 'Number of seconds to wait for one block to be read'
38
- config_param :read_timeout, :integer, default: 60 # from ruby net/http default
44
+ config_param :read_timeout, :integer, :default => 60 # from ruby net/http default
39
45
 
40
46
  desc 'Retry automatically when known errors of HDFS are occurred'
41
- config_param :retry_known_errors, :bool, default: false
47
+ config_param :retry_known_errors, :bool, :default => false
42
48
  desc 'Retry interval'
43
- config_param :retry_interval, :integer, default: nil
49
+ config_param :retry_interval, :integer, :default => nil
44
50
  desc 'The number of retries'
45
- config_param :retry_times, :integer, default: nil
51
+ config_param :retry_times, :integer, :default => nil
46
52
 
47
53
  # how many times of write failure before switch to standby namenode
48
54
  # by default it's 11 times that costs 1023 seconds inside fluentd,
49
55
  # which is considered enough to exclude the scenes that caused by temporary network fail or single datanode fail
50
56
  desc 'How many times of write failure before switch to standby namenode'
51
- config_param :failures_before_use_standby, :integer, default: 11
57
+ config_param :failures_before_use_standby, :integer, :default => 11
58
+
59
+ include Fluent::Mixin::PlainTextFormatter
52
60
 
53
- config_param :end_with_newline, :bool, default: true
61
+ config_param :default_tag, :string, :default => 'tag_missing'
54
62
 
55
63
  desc 'Append data or not'
56
- config_param :append, :bool, default: true
64
+ config_param :append, :bool, :default => true
57
65
 
58
66
  desc 'Use SSL or not'
59
- config_param :ssl, :bool, default: false
67
+ config_param :ssl, :bool, :default => false
60
68
  desc 'OpenSSL certificate authority file'
61
- config_param :ssl_ca_file, :string, default: nil
69
+ config_param :ssl_ca_file, :string, :default => nil
62
70
  desc 'OpenSSL verify mode (none,peer)'
63
- config_param :ssl_verify_mode, :enum, list: [:none, :peer], default: :none
71
+ config_param :ssl_verify_mode, :default => nil do |val|
72
+ case val
73
+ when 'none'
74
+ :none
75
+ when 'peer'
76
+ :peer
77
+ else
78
+ raise Fluent::ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
79
+ end
80
+ end
64
81
 
65
82
  desc 'Use kerberos authentication or not'
66
- config_param :kerberos, :bool, default: false
83
+ config_param :kerberos, :bool, :default => false
67
84
 
68
- SUPPORTED_COMPRESS = [:gzip, :bzip2, :snappy, :lzo_command, :text]
85
+ SUPPORTED_COMPRESS = ['gzip', 'bzip2', 'snappy', 'lzo_command', 'text']
69
86
  desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
70
- config_param :compress, :enum, list: SUPPORTED_COMPRESS, default: :text
71
-
72
- config_param :remove_prefix, :string, default: nil, deprecated: "use @label for routing"
73
- config_param :default_tag, :string, default: nil, deprecated: "use @label for routing"
74
- config_param :null_value, :string, default: nil, deprecated: "use filter plugins to convert null values into any specified string"
75
- config_param :suppress_log_broken_string, :bool, default: false, deprecated: "use @log_level for plugin to suppress such info logs"
76
-
77
- CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
78
-
79
- config_section :buffer do
80
- config_set_default :chunk_keys, ["time"]
87
+ config_param :compress, :default => nil do |val|
88
+ unless SUPPORTED_COMPRESS.include? val
89
+ raise Fluent::ConfigError, "unsupported compress: #{val}"
90
+ end
91
+ val
81
92
  end
82
93
 
83
- config_section :format do
84
- config_set_default :@type, 'out_file'
85
- config_set_default :localtime, false # default timezone is UTC
86
- end
94
+ CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
87
95
 
88
- attr_reader :formatter, :compressor
96
+ attr_reader :compressor
89
97
 
90
98
  def initialize
91
99
  super
100
+ require 'net/http'
101
+ require 'time'
102
+ require 'webhdfs'
103
+
92
104
  @compressor = nil
93
- @standby_namenode_host = nil
94
- @output_include_tag = @output_include_time = nil # TODO: deprecated
95
- @header_separator = @field_separator = nil # TODO: deprecated
96
105
  end
97
106
 
98
- def configure(conf)
99
- compat_parameters_convert(conf, :buffer, default_chunk_key: "time")
107
+ # Define `log` method for v0.10.42 or earlier
108
+ unless method_defined?(:log)
109
+ define_method("log") { $log }
110
+ end
100
111
 
101
- timekey = case conf["path"]
102
- when /%S/ then 1
103
- when /%M/ then 60
104
- when /%H/ then 3600
105
- else 86400
106
- end
107
- if conf.elements(name: "buffer").empty?
108
- e = Fluent::Config::Element.new("buffer", "time", {}, [])
109
- conf.elements << e
112
+ def configure(conf)
113
+ if conf['path']
114
+ if conf['path'].index('%S')
115
+ conf['time_slice_format'] = '%Y%m%d%H%M%S'
116
+ elsif conf['path'].index('%M')
117
+ conf['time_slice_format'] = '%Y%m%d%H%M'
118
+ elsif conf['path'].index('%H')
119
+ conf['time_slice_format'] = '%Y%m%d%H'
120
+ end
110
121
  end
111
- buffer_config = conf.elements(name: "buffer").first
112
- buffer_config["timekey"] = timekey unless buffer_config["timekey"]
113
122
 
114
- compat_parameters_convert_plaintextformatter(conf)
123
+ verify_config_placeholders_in_path!(conf)
115
124
 
116
125
  super
117
126
 
118
- @formatter = formatter_create
119
-
120
- if @using_formatter_config
121
- @null_value = nil
122
- else
123
- @formatter.delimiter = "\x01" if @formatter.respond_to?(:delimiter) && @formatter.delimiter == 'SOH'
124
- @null_value ||= 'NULL'
125
- end
126
-
127
- if @default_tag.nil? && !@using_formatter_config && @output_include_tag
128
- @default_tag = "tag_missing"
129
- end
130
- if @remove_prefix
131
- @remove_prefix_actual = @remove_prefix + "."
132
- @remove_prefix_actual_length = @remove_prefix_actual.length
133
- end
134
-
135
- verify_config_placeholders_in_path!(conf)
136
127
  @replace_random_uuid = @path.include?('%{uuid}') || @path.include?('%{uuid_flush}')
137
128
  if @replace_random_uuid
138
129
  # to check SecureRandom.uuid is available or not (NotImplementedError raised in such environment)
@@ -143,7 +134,14 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
143
134
  end
144
135
  end
145
136
 
146
- @compressor = COMPRESSOR_REGISTRY.lookup(@compress.to_s).new
137
+ begin
138
+ @compressor = COMPRESSOR_REGISTRY.lookup(@compress || 'text').new
139
+ rescue Fluent::ConfigError
140
+ raise
141
+ rescue
142
+ $log.warn "#{@comress} not found. Use 'text' instead"
143
+ @compressor = COMPRESSOR_REGISTRY.lookup('text').new
144
+ end
147
145
 
148
146
  if @host
149
147
  @namenode_host = @host
@@ -178,7 +176,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
178
176
  @client_standby = nil
179
177
  end
180
178
 
181
- unless @append
179
+ if not @append
182
180
  if @path.index(CHUNK_ID_PLACE_HOLDER).nil?
183
181
  raise Fluent::ConfigError, "path must contain ${chunk_id}, which is the placeholder for chunk_id, when append is set to false."
184
182
  end
@@ -241,6 +239,14 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
241
239
  end
242
240
  end
243
241
 
242
+ def shutdown
243
+ super
244
+ end
245
+
246
+ def path_format(chunk_key)
247
+ Time.strptime(chunk_key, @time_slice_format).strftime(@path)
248
+ end
249
+
244
250
  def is_standby_exception(e)
245
251
  e.is_a?(WebHDFS::IOError) && e.message.match(/org\.apache\.hadoop\.ipc\.StandbyException/)
246
252
  end
@@ -252,6 +258,12 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
252
258
  end
253
259
  end
254
260
 
261
+ def chunk_unique_id_to_str(unique_id)
262
+ unique_id.unpack('C*').map{|x| x.to_s(16).rjust(2,'0')}.join('')
263
+ end
264
+
265
+ # TODO check conflictions
266
+
255
267
  def send_data(path, data)
256
268
  if @append
257
269
  begin
@@ -266,7 +278,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
266
278
 
267
279
  HOSTNAME_PLACEHOLDERS_DEPRECATED = ['${hostname}', '%{hostname}', '__HOSTNAME__']
268
280
  UUID_RANDOM_PLACEHOLDERS_DEPRECATED = ['${uuid}', '${uuid:random}', '__UUID__', '__UUID_RANDOM__']
269
- UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
281
+ UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
270
282
 
271
283
  def verify_config_placeholders_in_path!(conf)
272
284
  return unless conf.has_key?('path')
@@ -295,20 +307,20 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
295
307
  log.error "configuration placeholder #{ph} is now unsupported by webhdfs output plugin."
296
308
  end
297
309
  end
298
- raise ConfigError, "there are unsupported placeholders in path."
310
+ raise Fluent::ConfigError, "there are unsupported placeholders in path."
299
311
  end
300
312
  end
301
313
 
302
314
  def generate_path(chunk)
303
315
  hdfs_path = if @append
304
- extract_placeholders(@path, chunk.metadata)
316
+ path_format(chunk.key)
305
317
  else
306
- extract_placeholders(@path, chunk.metadata).gsub(CHUNK_ID_PLACE_HOLDER, dump_unique_id(chunk.unique_id))
318
+ path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
307
319
  end
308
320
  hdfs_path = "#{hdfs_path}#{@compressor.ext}"
309
321
  if @replace_random_uuid
310
322
  uuid_random = SecureRandom.uuid
311
- hdfs_path.gsub!('%{uuid}', uuid_random).gsub!('%{uuid_flush}', uuid_random)
323
+ hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
312
324
  end
313
325
  hdfs_path
314
326
  end
@@ -324,48 +336,6 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
324
336
  end
325
337
  end
326
338
 
327
- def format(tag, time, record)
328
- if @remove_prefix # TODO: remove when it's obsoleted
329
- if tag.start_with?(@remove_prefix_actual)
330
- if tag.length > @remove_prefix_actual_length
331
- tag = tag[@remove_prefix_actual_length..-1]
332
- else
333
- tag = @default_tag
334
- end
335
- elsif tag.start_with?(@remove_prefix)
336
- if tag == @remove_prefix
337
- tag = @default_tag
338
- else
339
- tag = tag.sub(@remove_prefix, '')
340
- end
341
- end
342
- end
343
-
344
- if @null_value # TODO: remove when it's obsoleted
345
- check_keys = (record.keys + @null_convert_keys).uniq
346
- check_keys.each do |key|
347
- record[key] = @null_value if record[key].nil?
348
- end
349
- end
350
-
351
- if @using_formatter_config
352
- record = inject_values_to_record(tag, time, record)
353
- line = @formatter.format(tag, time, record)
354
- else # TODO: remove when it's obsoleted
355
- time_str = @output_include_time ? @time_formatter.call(time) + @header_separator : ''
356
- tag_str = @output_include_tag ? tag + @header_separator : ''
357
- record_str = @formatter.format(tag, time, record)
358
- line = time_str + tag_str + record_str
359
- end
360
- line << "\n" if @end_with_newline && !line.end_with?("\n")
361
- line
362
- rescue => e # remove this clause when @suppress_log_broken_string is obsoleted
363
- unless @suppress_log_broken_string
364
- log.info "unexpected error while formatting events, ignored", tag: tag, record: record, error: e
365
- end
366
- ''
367
- end
368
-
369
339
  def write(chunk)
370
340
  hdfs_path = generate_path(chunk)
371
341
 
@@ -396,72 +366,6 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
396
366
  hdfs_path
397
367
  end
398
368
 
399
- def compat_parameters_convert_plaintextformatter(conf)
400
- if !conf.elements('format').empty? || !conf['output_data_type']
401
- @using_formatter_config = true
402
- @null_convert_keys = []
403
- return
404
- end
405
-
406
- log.warn "webhdfs output plugin is working with old configuration parameters. use <inject>/<format> sections instead for further releases."
407
- @using_formatter_config = false
408
- @null_convert_keys = []
409
-
410
- @header_separator = case conf['field_separator']
411
- when nil then "\t"
412
- when 'SPACE' then ' '
413
- when 'TAB' then "\t"
414
- when 'COMMA' then ','
415
- when 'SOH' then "\x01"
416
- else conf['field_separator']
417
- end
418
-
419
- format_section = Fluent::Config::Element.new('format', '', {}, [])
420
- case conf['output_data_type']
421
- when '', 'json' # blank value is for compatibility reason (especially in testing)
422
- format_section['@type'] = 'json'
423
- when 'ltsv'
424
- format_section['@type'] = 'ltsv'
425
- else
426
- unless conf['output_data_type'].start_with?('attr:')
427
- raise Fluent::ConfigError, "output_data_type is invalid: #{conf['output_data_type']}"
428
- end
429
- format_section['@format'] = 'tsv'
430
- keys_part = conf['output_data_type'].sub(/^attr:/, '')
431
- @null_convert_keys = keys_part.split(',')
432
- format_section['keys'] = keys_part
433
- format_section['delimiter'] = case conf['field_separator']
434
- when nil then '\t'
435
- when 'SPACE' then ' '
436
- when 'TAB' then '\t'
437
- when 'COMMA' then ','
438
- when 'SOH' then 'SOH' # fixed later
439
- else conf['field_separator']
440
- end
441
- end
442
-
443
- conf.elements << format_section
444
-
445
- @output_include_time = conf.has_key?('output_include_time') ? Fluent::Config.bool_value(conf['output_include_time']) : true
446
- @output_include_tag = conf.has_key?('output_include_tag') ? Fluent::Config.bool_value(conf['output_include_tag']) : true
447
-
448
- if @output_include_time
449
- # default timezone is UTC
450
- using_localtime = if !conf.has_key?('utc') && !conf.has_key?('localtime')
451
- false
452
- elsif conf.has_key?('localtime') && conf.has_key?('utc')
453
- raise Fluent::ConfigError, "specify either 'localtime' or 'utc'"
454
- elsif conf.has_key?('localtime')
455
- Fluent::Config.bool_value('localtime')
456
- else
457
- Fluent::Config.bool_value('utc')
458
- end
459
- @time_formatter = Fluent::TimeFormatter.new(conf['time_format'], using_localtime)
460
- else
461
- @time_formatter = nil
462
- end
463
- end
464
-
465
369
  class Compressor
466
370
  include Fluent::Configurable
467
371
 
@@ -488,7 +392,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
488
392
  begin
489
393
  Open3.capture3("#{command} -V")
490
394
  rescue Errno::ENOENT
491
- raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
395
+ raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
492
396
  end
493
397
  end
494
398
  end
@@ -1,5 +1,5 @@
1
- module Fluent::Plugin
2
- class WebHDFSOutput < Output
1
+ module Fluent
2
+ class WebHDFSOutput < Fluent::TimeSlicedOutput
3
3
  class Bzip2Compressor < Compressor
4
4
  WebHDFSOutput.register_compressor('bzip2', self)
5
5
 
@@ -1,5 +1,5 @@
1
- module Fluent::Plugin
2
- class WebHDFSOutput < Output
1
+ module Fluent
2
+ class WebHDFSOutput < Fluent::TimeSlicedOutput
3
3
  class GzipCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('gzip', self)
5
5
 
@@ -1,9 +1,9 @@
1
- module Fluent::Plugin
2
- class WebHDFSOutput < Output
1
+ module Fluent
2
+ class WebHDFSOutput
3
3
  class LZOCommandCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('lzo_command', self)
5
5
 
6
- config_param :command_parameter, :string, default: '-qf1'
6
+ config_param :command_parameter, :string, :default => '-qf1'
7
7
 
8
8
  def configure(conf)
9
9
  super
@@ -1,5 +1,5 @@
1
- module Fluent::Plugin
2
- class WebHDFSOutput < Output
1
+ module Fluent
2
+ class WebHDFSOutput < Fluent::TimeSlicedOutput
3
3
  class SnappyCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('snappy', self)
5
5
 
@@ -1,5 +1,5 @@
1
- module Fluent::Plugin
2
- class WebHDFSOutput < Output
1
+ module Fluent
2
+ class WebHDFSOutput < Fluent::TimeSlicedOutput
3
3
  class TextCompressor < Compressor
4
4
  WebHDFSOutput.register_compressor('text', self)
5
5
 
@@ -8,13 +8,10 @@ rescue Bundler::BundlerError => e
8
8
  exit e.status_code
9
9
  end
10
10
  require 'test/unit'
11
- require 'test/unit/rr'
12
11
 
13
12
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
13
  $LOAD_PATH.unshift(File.dirname(__FILE__))
15
14
  require 'fluent/test'
16
- require 'fluent/test/helpers'
17
- require 'fluent/test/driver/output'
18
15
  unless ENV.has_key?('VERBOSE')
19
16
  nulllogger = Object.new
20
17
  nulllogger.instance_eval {|obj|
@@ -25,8 +22,6 @@ unless ENV.has_key?('VERBOSE')
25
22
  $log = nulllogger
26
23
  end
27
24
 
28
- include Fluent::Test::Helpers
29
-
30
25
  require 'fluent/plugin/out_webhdfs'
31
26
 
32
27
  class Test::Unit::TestCase
@@ -16,11 +16,11 @@ class CompressorTest < Test::Unit::TestCase
16
16
  def setup
17
17
  omit unless Object.const_defined?(:Snappy)
18
18
  Fluent::Test.setup
19
- @compressor = Fluent::Plugin::WebHDFSOutput::SnappyCompressor.new
19
+ @compressor = Fluent::WebHDFSOutput::SnappyCompressor.new
20
20
  end
21
21
 
22
- def create_driver(conf = CONFIG)
23
- Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
22
+ def create_driver(conf=CONFIG,tag='test')
23
+ Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
24
24
  end
25
25
 
26
26
  def test_ext
@@ -1,243 +1,166 @@
1
1
  require 'helper'
2
2
 
3
3
  class WebHDFSOutputTest < Test::Unit::TestCase
4
- CONFIG_DEFAULT = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d.log"})
5
-
6
- CONFIG_COMPAT = config_element(
7
- "ROOT", "", {
8
- "output_data_type" => "",
9
- "host" => "namenode.local",
10
- "path" => "/hdfs/path/file.%Y%m%d.log"
11
- })
12
-
4
+ CONFIG = %[
5
+ host namenode.local
6
+ path /hdfs/path/file.%Y%m%d.log
7
+ ]
13
8
  def setup
14
9
  Fluent::Test.setup
15
10
  end
16
11
 
17
- def create_driver(conf)
18
- Fluent::Test::Driver::Output.new(Fluent::Plugin::WebHDFSOutput).configure(conf)
12
+ def create_driver(conf=CONFIG,tag='test')
13
+ Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
19
14
  end
20
15
 
21
- sub_test_case "default configuration" do
22
- test 'configured with standard out_file format with specified hdfs info' do
23
- d = create_driver(CONFIG_DEFAULT)
24
- assert_true d.instance.instance_eval{ @using_formatter_config }
25
-
16
+ class ConfigureTest < self
17
+ def test_default
18
+ d = create_driver
26
19
  assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
27
20
  assert_equal 50070, d.instance.instance_eval{ @namenode_port }
28
21
  assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
22
+ assert_equal '%Y%m%d', d.instance.time_slice_format
29
23
  assert_equal false, d.instance.httpfs
30
24
  assert_nil d.instance.username
31
25
  assert_equal false, d.instance.ignore_start_check_error
32
26
 
33
- assert_equal 'Fluent::Plugin::OutFileFormatter', d.instance.formatter.class.to_s
34
- assert_equal true, d.instance.end_with_newline
35
-
36
- # deprecated params
37
- assert_nil d.instance.instance_eval{ @output_include_time }
38
- assert_nil d.instance.instance_eval{ @output_include_tag }
27
+ assert_equal true, d.instance.output_include_time
28
+ assert_equal true, d.instance.output_include_tag
29
+ assert_equal 'json', d.instance.output_data_type
39
30
  assert_nil d.instance.remove_prefix
40
- assert_nil d.instance.instance_eval{ @header_separator }
41
- assert_nil d.instance.default_tag
42
- end
43
- end
44
-
45
- sub_test_case "flat configuration" do
46
- def test_default_for_traditional_config
47
- d = create_driver(CONFIG_COMPAT)
48
- assert_false d.instance.instance_eval{ @using_formatter_config }
49
-
50
- assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
51
- assert_equal 50070, d.instance.instance_eval{ @namenode_port }
52
- assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
53
- assert_equal false, d.instance.httpfs
54
- assert_nil d.instance.username
55
- assert_equal false, d.instance.ignore_start_check_error
56
-
57
- assert_equal 'Fluent::Plugin::JSONFormatter', d.instance.formatter.class.to_s
58
- assert_equal true, d.instance.end_with_newline
59
-
60
- assert_equal true, d.instance.instance_eval{ @output_include_time }
61
- assert_equal true, d.instance.instance_eval{ @output_include_tag }
62
- assert_nil d.instance.instance_eval{ @remove_prefix }
63
- assert_equal "\t", d.instance.instance_eval{ @header_separator }
64
- assert_equal 'tag_missing', d.instance.instance_eval{ @default_tag }
31
+ assert_equal 'TAB', d.instance.field_separator
32
+ assert_equal true, d.instance.add_newline
33
+ assert_equal 'tag_missing', d.instance.default_tag
65
34
  end
66
35
 
67
36
  def test_httpfs
68
- conf = config_element(
69
- "ROOT", "", {
70
- "namenode" => "server.local:14000",
71
- "path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
72
- "httpfs" => "yes",
73
- "username" => "hdfs_user"
74
- })
75
- d = create_driver(conf)
76
-
37
+ d = create_driver %[
38
+ namenode server.local:14000
39
+ path /hdfs/path/file.%Y%m%d.%H%M.log
40
+ httpfs yes
41
+ username hdfs_user
42
+ ]
77
43
  assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
78
44
  assert_equal 14000, d.instance.instance_eval{ @namenode_port }
79
45
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
46
+ assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
80
47
  assert_equal true, d.instance.httpfs
81
48
  assert_equal 'hdfs_user', d.instance.username
82
49
  end
83
50
 
84
51
  def test_ssl
85
- conf = config_element(
86
- "ROOT", "", {
87
- "namenode" => "server.local:14000",
88
- "path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
89
- "ssl" => true,
90
- "ssl_ca_file" => "/path/to/ca_file.pem",
91
- "ssl_verify_mode" => "peer",
92
- "kerberos" => true
93
- })
94
- d = create_driver(conf)
95
-
52
+ d = create_driver %[
53
+ namenode server.local:14000
54
+ path /hdfs/path/file.%Y%m%d.%H%M.log
55
+ ssl true
56
+ ssl_ca_file /path/to/ca_file.pem
57
+ ssl_verify_mode peer
58
+ kerberos true
59
+ ]
96
60
  assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
97
61
  assert_equal 14000, d.instance.instance_eval{ @namenode_port }
98
62
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
63
+ assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
99
64
  assert_equal true, d.instance.ssl
100
65
  assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
101
66
  assert_equal :peer, d.instance.ssl_verify_mode
102
67
  assert_equal true, d.instance.kerberos
103
68
  end
104
69
 
105
- data(gzip: [:gzip, Fluent::Plugin::WebHDFSOutput::GzipCompressor],
106
- bzip2: [:bzip2, Fluent::Plugin::WebHDFSOutput::Bzip2Compressor],
107
- snappy: [:snappy, Fluent::Plugin::WebHDFSOutput::SnappyCompressor],
108
- lzo: [:lzo_command, Fluent::Plugin::WebHDFSOutput::LZOCommandCompressor])
70
+ data(gzip: ['gzip', Fluent::WebHDFSOutput::GzipCompressor],
71
+ bzip2: ['bzip2', Fluent::WebHDFSOutput::Bzip2Compressor],
72
+ snappy: ['snappy', Fluent::WebHDFSOutput::SnappyCompressor],
73
+ lzo: ['lzo_command', Fluent::WebHDFSOutput::LZOCommandCompressor])
109
74
  def test_compress(data)
110
75
  compress_type, compressor_class = data
111
76
  begin
112
- conf = config_element(
113
- "ROOT", "", {
114
- "namenode" => "server.local:14000",
115
- "path" => "/hdfs/path/file.%Y%m%d.%H%M.log",
116
- "compress" => compress_type
117
- })
118
- d = create_driver(conf)
77
+ d = create_driver %[
78
+ namenode server.local:14000
79
+ path /hdfs/path/file.%Y%m%d.%H%M.log
80
+ compress #{compress_type}
81
+ ]
119
82
  rescue Fluent::ConfigError => ex
120
83
  omit ex.message
121
84
  end
122
85
  assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
123
86
  assert_equal 14000, d.instance.instance_eval{ @namenode_port }
124
87
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
88
+ assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
125
89
  assert_equal compress_type, d.instance.compress
126
90
  assert_equal compressor_class, d.instance.compressor.class
127
91
  end
128
92
 
129
- def test_placeholders_old_style
130
- conf = config_element(
131
- "ROOT", "", {
132
- "hostname" => "testing.node.local",
133
- "namenode" => "server.local:50070",
134
- "path" => "/hdfs/${hostname}/file.%Y%m%d%H.log"
135
- })
136
- d = create_driver(conf)
93
+ def test_placeholders
94
+ d = create_driver %[
95
+ hostname testing.node.local
96
+ namenode server.local:50070
97
+ path /hdfs/${hostname}/file.%Y%m%d%H.log
98
+ ]
137
99
  assert_equal '/hdfs/testing.node.local/file.%Y%m%d%H.log', d.instance.path
138
100
  end
139
101
 
140
- data("%Y%m%d" => ["/hdfs/path/file.%Y%m%d.log", "/hdfs/path/file.20120718.log"],
141
- "%Y%m%d.%H%M" => ["/hdfs/path/file.%Y%m%d.%H%M.log", "/hdfs/path/file.20120718.1503.log"])
142
- test "generate_path" do |(path, expected)|
143
- conf = config_element(
144
- "ROOT", "", {
145
- "namenode" => "server.local:14000",
146
- "path" => path
147
- })
148
- d = create_driver(conf)
149
- formatter = Fluent::Timezone.formatter("+0900", path)
150
- mock(Fluent::Timezone).formatter(Time.now.strftime("%z"), path) { formatter }
151
- time = event_time("2012-07-18 15:03:00 +0900")
152
- metadata = d.instance.metadata("test", time, {})
153
- chunk = d.instance.buffer.generate_chunk(metadata)
154
- assert_equal expected, d.instance.generate_path(chunk)
155
- end
156
-
157
- data(path: { "append" => false },
158
- ssl: { "ssl" => true, "ssl_verify_mode" => "invalid" },
159
- compress: { "compress" => "invalid" })
160
- test "invalid" do |attr|
161
- conf = config_element(
162
- "ROOT", "", {
163
- "namenode" => "server.local:14000",
164
- "path" => "/hdfs/path/file.%Y%m%d.%H%M.log"
165
- })
166
- conf += config_element("", "", attr)
167
- assert_raise Fluent::ConfigError do
168
- create_driver(conf)
102
+ class PathFormatTest < self
103
+ def test_default
104
+ d = create_driver
105
+ assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
106
+ assert_equal '%Y%m%d', d.instance.time_slice_format
107
+ assert_equal '/hdfs/path/file.20120718.log', d.instance.path_format('20120718')
169
108
  end
170
- end
171
- end
172
-
173
- sub_test_case "sub section configuration" do
174
- def test_time_key
175
- conf = config_element(
176
- "ROOT", "", {
177
- "host" => "namenode.local",
178
- "path" => "/hdfs/path/file.%Y%m%d.log"
179
- }, [
180
- config_element(
181
- "buffer", "time", {
182
- "timekey" => 1
183
- })
184
- ]
185
- )
186
- d = create_driver(conf)
187
- time = event_time("2012-07-18 15:03:00 +0900")
188
- metadata = d.instance.metadata("test", time, {})
189
- chunk = d.instance.buffer.generate_chunk(metadata)
190
- assert_equal 1, d.instance.buffer_config.timekey
191
- assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
192
- end
193
- end
194
109
 
195
- sub_test_case "using format subsection" do
196
- test "blank format means default format 'out_file' with UTC timezone" do
197
- format_section = config_element("format", "", {}, [])
198
- conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
199
- d = create_driver(conf)
200
- time = event_time("2017-01-24 13:10:30 -0700")
201
- line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
202
- assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
110
+ def test_time_slice_format
111
+ d = create_driver %[
112
+ namenode server.local:14000
113
+ path /hdfs/path/file.%Y%m%d.%H%M.log
114
+ ]
115
+ assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
116
+ assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
117
+ assert_equal '/hdfs/path/file.20120718.1503.log', d.instance.path_format('201207181503')
118
+ end
203
119
  end
204
120
 
205
- test "specifying timezone works well in format section" do
206
- format_section = config_element("format", "", {"timezone" => "+0100"}, [])
207
- conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
208
- d = create_driver(conf)
209
- time = event_time("2017-01-24 13:10:30 -0700")
210
- line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
211
- assert_equal "2017-01-24T21:10:30+01:00\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
212
- end
121
+ class InvalidTest < self
122
+ def test_path
123
+ assert_raise Fluent::ConfigError do
124
+ d = create_driver %[
125
+ namenode server.local:14000
126
+ path /hdfs/path/file.%Y%m%d.%H%M.log
127
+ append false
128
+ ]
129
+ end
130
+ end
213
131
 
214
- test "specifying formatter type LTSV for records, without tag and timezone" do
215
- format_section = config_element("format", "", {"@type" => "ltsv"}, [])
216
- conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [format_section])
217
- d = create_driver(conf)
218
- time = event_time("2017-01-24 13:10:30 -0700")
219
- line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
220
- assert_equal "message:yay\tname:tagomoris\n", line
221
- end
132
+ def test_ssl
133
+ assert_raise Fluent::ConfigError do
134
+ create_driver %[
135
+ namenode server.local:14000
136
+ path /hdfs/path/file.%Y%m%d.%H%M.log
137
+ ssl true
138
+ ssl_verify_mode invalid
139
+ ]
140
+ end
141
+ end
222
142
 
223
- test "specifying formatter type LTSV for records, with inject section to insert tag and time" do
224
- inject_section = config_element("inject", "", {"tag_key" => "tag", "time_key" => "time", "time_type" => "string", "localtime" => "false"})
225
- format_section = config_element("format", "", {"@type" => "ltsv"}, [])
226
- conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log"}, [inject_section, format_section])
227
- d = create_driver(conf)
228
- time = event_time("2017-01-24 13:10:30 -0700")
229
- line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
230
- assert_equal "message:yay\tname:tagomoris\ttag:test.now\ttime:2017-01-24T20:10:30Z\n", line
231
- end
232
- end
143
+ def test_invalid_compress
144
+ assert_raise Fluent::ConfigError do
145
+ create_driver %[
146
+ namenode server.local:14000
147
+ path /hdfs/path/file.%Y%m%d.%H%M.log
148
+ compress invalid
149
+ ]
150
+ end
151
+ end
233
152
 
234
- sub_test_case "using older configuration" do
235
- test "output_data_type json is same with out_file with UTC timezone" do
236
- conf = config_element("match", "", {"host" => "namenode.local", "path" => "/hdfs/path/file.%Y%m%d%H.log", "output_data_type" => "json"}, [])
237
- d = create_driver(conf)
238
- time = event_time("2017-01-24 13:10:30 -0700")
239
- line = d.instance.format("test.now", time, {"message" => "yay", "name" => "tagomoris"})
240
- assert_equal "2017-01-24T20:10:30Z\ttest.now\t{\"message\":\"yay\",\"name\":\"tagomoris\"}\n", line
153
+ data("%{uuid:hostname}" => "%{uuid:hostname}",
154
+ "%{uuid:timestamp}" => "%{uuid:timestamp}")
155
+ def test_obsoleted_placeholders(placeholder)
156
+ assert_raise Fluent::ConfigError do
157
+ create_driver %[
158
+ namenode server.local:14000
159
+ path /hdfs/path/#{placeholder}/file.%Y%m%d.%H%M.log
160
+ append false
161
+ ]
162
+ end
163
+ end
241
164
  end
242
165
  end
243
166
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0rc1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-24 00:00:00.000000000 Z
11
+ date: 2017-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: test-unit-rr
42
+ name: appraisal
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -53,47 +53,53 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: appraisal
56
+ name: snappy
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: 0.0.13
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: 0.0.13
69
69
  - !ruby/object:Gem::Dependency
70
- name: snappy
70
+ name: fluentd
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
74
74
  - !ruby/object:Gem::Version
75
- version: 0.0.13
76
- type: :development
75
+ version: 0.10.59
76
+ - - "<"
77
+ - !ruby/object:Gem::Version
78
+ version: 0.14.0
79
+ type: :runtime
77
80
  prerelease: false
78
81
  version_requirements: !ruby/object:Gem::Requirement
79
82
  requirements:
80
83
  - - ">="
81
84
  - !ruby/object:Gem::Version
82
- version: 0.0.13
85
+ version: 0.10.59
86
+ - - "<"
87
+ - !ruby/object:Gem::Version
88
+ version: 0.14.0
83
89
  - !ruby/object:Gem::Dependency
84
- name: fluentd
90
+ name: fluent-mixin-plaintextformatter
85
91
  requirement: !ruby/object:Gem::Requirement
86
92
  requirements:
87
93
  - - ">="
88
94
  - !ruby/object:Gem::Version
89
- version: 0.14.4
95
+ version: 0.2.1
90
96
  type: :runtime
91
97
  prerelease: false
92
98
  version_requirements: !ruby/object:Gem::Requirement
93
99
  requirements:
94
100
  - - ">="
95
101
  - !ruby/object:Gem::Version
96
- version: 0.14.4
102
+ version: 0.2.1
97
103
  - !ruby/object:Gem::Dependency
98
104
  name: webhdfs
99
105
  requirement: !ruby/object:Gem::Requirement
@@ -163,12 +169,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
163
169
  version: '0'
164
170
  required_rubygems_version: !ruby/object:Gem::Requirement
165
171
  requirements:
166
- - - ">"
172
+ - - ">="
167
173
  - !ruby/object:Gem::Version
168
- version: 1.3.1
174
+ version: '0'
169
175
  requirements: []
170
176
  rubyforge_project:
171
- rubygems_version: 2.6.8
177
+ rubygems_version: 2.6.11
172
178
  signing_key:
173
179
  specification_version: 4
174
180
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting