fluent-plugin-webhdfs 0.6.0rc1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +10 -1
- data/fluent-plugin-webhdfs.gemspec +3 -3
- data/lib/fluent/plugin/out_webhdfs.rb +97 -193
- data/lib/fluent/plugin/webhdfs_compressor_bzip2.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_gzip.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_lzo_command.rb +3 -3
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_text.rb +2 -2
- data/test/helper.rb +0 -5
- data/test/plugin/test_compressor.rb +3 -3
- data/test/plugin/test_out_webhdfs.rb +103 -180
- metadata +22 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3001d6e76014cec6f043369f92db09fb9f6989d4
|
4
|
+
data.tar.gz: e7fdab6516fd7ef1ad2732369b002116e8b65b85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 898a6fa68810d9f94aefcb70e2b0e168b67b56922d2ae5f7757e1a8c9a38603d8ba0dc353ee49ae9247e42196fe3d8c88c31172773ac0df3919428bc25c84a5f
|
7
|
+
data.tar.gz: 37c6d7534dab627dd3a31bd4650f8642c983041cf1f06ff916cbc3107605d6070122ae8f1990cb36dd1f14ba19a3b2e5a65fbc0958553df4225c2228e81c29ca
|
data/.travis.yml
CHANGED
@@ -2,9 +2,10 @@ sudo: false
|
|
2
2
|
language: ruby
|
3
3
|
|
4
4
|
rvm:
|
5
|
+
- 2.0.0
|
5
6
|
- 2.1
|
6
7
|
- 2.2
|
7
|
-
- 2.3.
|
8
|
+
- 2.3.0
|
8
9
|
|
9
10
|
branches:
|
10
11
|
only:
|
@@ -22,4 +23,12 @@ script: bundle exec rake test
|
|
22
23
|
|
23
24
|
gemfile:
|
24
25
|
- Gemfile
|
26
|
+
- gemfiles/fluentd_v0.12.gemfile
|
25
27
|
- gemfiles/fluentd_v0.14.gemfile
|
28
|
+
|
29
|
+
matrix:
|
30
|
+
exclude:
|
31
|
+
- rvm: 2.0.0
|
32
|
+
gemfile: Gemfile
|
33
|
+
- rvm: 2.0.0
|
34
|
+
gemfile: gemfiles/fluentd_v0.14.gemfile
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "0.6.
|
5
|
+
gem.version = "0.6.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -17,10 +17,10 @@ Gem::Specification.new do |gem|
|
|
17
17
|
|
18
18
|
gem.add_development_dependency "rake"
|
19
19
|
gem.add_development_dependency "test-unit"
|
20
|
-
gem.add_development_dependency "test-unit-rr"
|
21
20
|
gem.add_development_dependency "appraisal"
|
22
21
|
gem.add_development_dependency "snappy", '>= 0.0.13'
|
23
|
-
gem.add_runtime_dependency "fluentd", '>= 0.14.
|
22
|
+
gem.add_runtime_dependency "fluentd", ['>= 0.10.59', "< 0.14.0"]
|
23
|
+
gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
24
24
|
gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
|
25
25
|
gem.add_runtime_dependency "bzip2-ffi"
|
26
26
|
end
|
@@ -1,138 +1,129 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
|
3
|
-
require 'fluent/plugin/output'
|
4
|
-
require 'fluent/config/element'
|
5
|
-
|
6
|
-
require 'webhdfs'
|
7
3
|
require 'tempfile'
|
8
4
|
require 'securerandom'
|
5
|
+
require 'fluent/mixin/plaintextformatter'
|
9
6
|
|
10
|
-
class Fluent::
|
7
|
+
class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
11
8
|
Fluent::Plugin.register_output('webhdfs', self)
|
12
9
|
|
13
|
-
|
10
|
+
config_set_default :buffer_type, 'memory'
|
11
|
+
config_set_default :time_slice_format, '%Y%m%d'
|
12
|
+
|
13
|
+
# For fluentd v0.12.16 or earlier
|
14
|
+
class << self
|
15
|
+
unless method_defined?(:desc)
|
16
|
+
def desc(description)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
14
20
|
|
15
21
|
desc 'WebHDFS/HttpFs host'
|
16
|
-
config_param :host, :string, default
|
22
|
+
config_param :host, :string, :default => nil
|
17
23
|
desc 'WebHDFS/HttpFs port'
|
18
|
-
config_param :port, :integer, default
|
24
|
+
config_param :port, :integer, :default => 50070
|
19
25
|
desc 'Namenode (host:port)'
|
20
|
-
config_param :namenode, :string, default
|
26
|
+
config_param :namenode, :string, :default => nil # host:port
|
21
27
|
desc 'Standby namenode for Namenode HA (host:port)'
|
22
|
-
config_param :standby_namenode, :string, default
|
28
|
+
config_param :standby_namenode, :string, :default => nil # host:port
|
23
29
|
|
24
30
|
desc 'Ignore errors on start up'
|
25
|
-
config_param :ignore_start_check_error, :bool, default
|
31
|
+
config_param :ignore_start_check_error, :bool, :default => false
|
26
32
|
|
27
33
|
desc 'Output file path on HDFS'
|
28
34
|
config_param :path, :string
|
29
35
|
desc 'User name for pseudo authentication'
|
30
|
-
config_param :username, :string, default
|
36
|
+
config_param :username, :string, :default => nil
|
31
37
|
|
32
38
|
desc 'Store data over HttpFs instead of WebHDFS'
|
33
|
-
config_param :httpfs, :bool, default
|
39
|
+
config_param :httpfs, :bool, :default => false
|
34
40
|
|
35
41
|
desc 'Number of seconds to wait for the connection to open'
|
36
|
-
config_param :open_timeout, :integer, default
|
42
|
+
config_param :open_timeout, :integer, :default => 30 # from ruby net/http default
|
37
43
|
desc 'Number of seconds to wait for one block to be read'
|
38
|
-
config_param :read_timeout, :integer, default
|
44
|
+
config_param :read_timeout, :integer, :default => 60 # from ruby net/http default
|
39
45
|
|
40
46
|
desc 'Retry automatically when known errors of HDFS are occurred'
|
41
|
-
config_param :retry_known_errors, :bool, default
|
47
|
+
config_param :retry_known_errors, :bool, :default => false
|
42
48
|
desc 'Retry interval'
|
43
|
-
config_param :retry_interval, :integer, default
|
49
|
+
config_param :retry_interval, :integer, :default => nil
|
44
50
|
desc 'The number of retries'
|
45
|
-
config_param :retry_times, :integer, default
|
51
|
+
config_param :retry_times, :integer, :default => nil
|
46
52
|
|
47
53
|
# how many times of write failure before switch to standby namenode
|
48
54
|
# by default it's 11 times that costs 1023 seconds inside fluentd,
|
49
55
|
# which is considered enough to exclude the scenes that caused by temporary network fail or single datanode fail
|
50
56
|
desc 'How many times of write failure before switch to standby namenode'
|
51
|
-
config_param :failures_before_use_standby, :integer, default
|
57
|
+
config_param :failures_before_use_standby, :integer, :default => 11
|
58
|
+
|
59
|
+
include Fluent::Mixin::PlainTextFormatter
|
52
60
|
|
53
|
-
config_param :
|
61
|
+
config_param :default_tag, :string, :default => 'tag_missing'
|
54
62
|
|
55
63
|
desc 'Append data or not'
|
56
|
-
config_param :append, :bool, default
|
64
|
+
config_param :append, :bool, :default => true
|
57
65
|
|
58
66
|
desc 'Use SSL or not'
|
59
|
-
config_param :ssl, :bool, default
|
67
|
+
config_param :ssl, :bool, :default => false
|
60
68
|
desc 'OpenSSL certificate authority file'
|
61
|
-
config_param :ssl_ca_file, :string, default
|
69
|
+
config_param :ssl_ca_file, :string, :default => nil
|
62
70
|
desc 'OpenSSL verify mode (none,peer)'
|
63
|
-
config_param :ssl_verify_mode, :
|
71
|
+
config_param :ssl_verify_mode, :default => nil do |val|
|
72
|
+
case val
|
73
|
+
when 'none'
|
74
|
+
:none
|
75
|
+
when 'peer'
|
76
|
+
:peer
|
77
|
+
else
|
78
|
+
raise Fluent::ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
|
79
|
+
end
|
80
|
+
end
|
64
81
|
|
65
82
|
desc 'Use kerberos authentication or not'
|
66
|
-
config_param :kerberos, :bool, default
|
83
|
+
config_param :kerberos, :bool, :default => false
|
67
84
|
|
68
|
-
SUPPORTED_COMPRESS = [
|
85
|
+
SUPPORTED_COMPRESS = ['gzip', 'bzip2', 'snappy', 'lzo_command', 'text']
|
69
86
|
desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
|
70
|
-
config_param :compress, :
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
config_param :suppress_log_broken_string, :bool, default: false, deprecated: "use @log_level for plugin to suppress such info logs"
|
76
|
-
|
77
|
-
CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
|
78
|
-
|
79
|
-
config_section :buffer do
|
80
|
-
config_set_default :chunk_keys, ["time"]
|
87
|
+
config_param :compress, :default => nil do |val|
|
88
|
+
unless SUPPORTED_COMPRESS.include? val
|
89
|
+
raise Fluent::ConfigError, "unsupported compress: #{val}"
|
90
|
+
end
|
91
|
+
val
|
81
92
|
end
|
82
93
|
|
83
|
-
|
84
|
-
config_set_default :@type, 'out_file'
|
85
|
-
config_set_default :localtime, false # default timezone is UTC
|
86
|
-
end
|
94
|
+
CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
|
87
95
|
|
88
|
-
attr_reader :
|
96
|
+
attr_reader :compressor
|
89
97
|
|
90
98
|
def initialize
|
91
99
|
super
|
100
|
+
require 'net/http'
|
101
|
+
require 'time'
|
102
|
+
require 'webhdfs'
|
103
|
+
|
92
104
|
@compressor = nil
|
93
|
-
@standby_namenode_host = nil
|
94
|
-
@output_include_tag = @output_include_time = nil # TODO: deprecated
|
95
|
-
@header_separator = @field_separator = nil # TODO: deprecated
|
96
105
|
end
|
97
106
|
|
98
|
-
|
99
|
-
|
107
|
+
# Define `log` method for v0.10.42 or earlier
|
108
|
+
unless method_defined?(:log)
|
109
|
+
define_method("log") { $log }
|
110
|
+
end
|
100
111
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
112
|
+
def configure(conf)
|
113
|
+
if conf['path']
|
114
|
+
if conf['path'].index('%S')
|
115
|
+
conf['time_slice_format'] = '%Y%m%d%H%M%S'
|
116
|
+
elsif conf['path'].index('%M')
|
117
|
+
conf['time_slice_format'] = '%Y%m%d%H%M'
|
118
|
+
elsif conf['path'].index('%H')
|
119
|
+
conf['time_slice_format'] = '%Y%m%d%H'
|
120
|
+
end
|
110
121
|
end
|
111
|
-
buffer_config = conf.elements(name: "buffer").first
|
112
|
-
buffer_config["timekey"] = timekey unless buffer_config["timekey"]
|
113
122
|
|
114
|
-
|
123
|
+
verify_config_placeholders_in_path!(conf)
|
115
124
|
|
116
125
|
super
|
117
126
|
|
118
|
-
@formatter = formatter_create
|
119
|
-
|
120
|
-
if @using_formatter_config
|
121
|
-
@null_value = nil
|
122
|
-
else
|
123
|
-
@formatter.delimiter = "\x01" if @formatter.respond_to?(:delimiter) && @formatter.delimiter == 'SOH'
|
124
|
-
@null_value ||= 'NULL'
|
125
|
-
end
|
126
|
-
|
127
|
-
if @default_tag.nil? && !@using_formatter_config && @output_include_tag
|
128
|
-
@default_tag = "tag_missing"
|
129
|
-
end
|
130
|
-
if @remove_prefix
|
131
|
-
@remove_prefix_actual = @remove_prefix + "."
|
132
|
-
@remove_prefix_actual_length = @remove_prefix_actual.length
|
133
|
-
end
|
134
|
-
|
135
|
-
verify_config_placeholders_in_path!(conf)
|
136
127
|
@replace_random_uuid = @path.include?('%{uuid}') || @path.include?('%{uuid_flush}')
|
137
128
|
if @replace_random_uuid
|
138
129
|
# to check SecureRandom.uuid is available or not (NotImplementedError raised in such environment)
|
@@ -143,7 +134,14 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
143
134
|
end
|
144
135
|
end
|
145
136
|
|
146
|
-
|
137
|
+
begin
|
138
|
+
@compressor = COMPRESSOR_REGISTRY.lookup(@compress || 'text').new
|
139
|
+
rescue Fluent::ConfigError
|
140
|
+
raise
|
141
|
+
rescue
|
142
|
+
$log.warn "#{@comress} not found. Use 'text' instead"
|
143
|
+
@compressor = COMPRESSOR_REGISTRY.lookup('text').new
|
144
|
+
end
|
147
145
|
|
148
146
|
if @host
|
149
147
|
@namenode_host = @host
|
@@ -178,7 +176,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
178
176
|
@client_standby = nil
|
179
177
|
end
|
180
178
|
|
181
|
-
|
179
|
+
if not @append
|
182
180
|
if @path.index(CHUNK_ID_PLACE_HOLDER).nil?
|
183
181
|
raise Fluent::ConfigError, "path must contain ${chunk_id}, which is the placeholder for chunk_id, when append is set to false."
|
184
182
|
end
|
@@ -241,6 +239,14 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
241
239
|
end
|
242
240
|
end
|
243
241
|
|
242
|
+
def shutdown
|
243
|
+
super
|
244
|
+
end
|
245
|
+
|
246
|
+
def path_format(chunk_key)
|
247
|
+
Time.strptime(chunk_key, @time_slice_format).strftime(@path)
|
248
|
+
end
|
249
|
+
|
244
250
|
def is_standby_exception(e)
|
245
251
|
e.is_a?(WebHDFS::IOError) && e.message.match(/org\.apache\.hadoop\.ipc\.StandbyException/)
|
246
252
|
end
|
@@ -252,6 +258,12 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
252
258
|
end
|
253
259
|
end
|
254
260
|
|
261
|
+
def chunk_unique_id_to_str(unique_id)
|
262
|
+
unique_id.unpack('C*').map{|x| x.to_s(16).rjust(2,'0')}.join('')
|
263
|
+
end
|
264
|
+
|
265
|
+
# TODO check conflictions
|
266
|
+
|
255
267
|
def send_data(path, data)
|
256
268
|
if @append
|
257
269
|
begin
|
@@ -266,7 +278,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
266
278
|
|
267
279
|
HOSTNAME_PLACEHOLDERS_DEPRECATED = ['${hostname}', '%{hostname}', '__HOSTNAME__']
|
268
280
|
UUID_RANDOM_PLACEHOLDERS_DEPRECATED = ['${uuid}', '${uuid:random}', '__UUID__', '__UUID_RANDOM__']
|
269
|
-
UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
|
281
|
+
UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
|
270
282
|
|
271
283
|
def verify_config_placeholders_in_path!(conf)
|
272
284
|
return unless conf.has_key?('path')
|
@@ -295,20 +307,20 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
295
307
|
log.error "configuration placeholder #{ph} is now unsupported by webhdfs output plugin."
|
296
308
|
end
|
297
309
|
end
|
298
|
-
raise ConfigError, "there are unsupported placeholders in path."
|
310
|
+
raise Fluent::ConfigError, "there are unsupported placeholders in path."
|
299
311
|
end
|
300
312
|
end
|
301
313
|
|
302
314
|
def generate_path(chunk)
|
303
315
|
hdfs_path = if @append
|
304
|
-
|
316
|
+
path_format(chunk.key)
|
305
317
|
else
|
306
|
-
|
318
|
+
path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
|
307
319
|
end
|
308
320
|
hdfs_path = "#{hdfs_path}#{@compressor.ext}"
|
309
321
|
if @replace_random_uuid
|
310
322
|
uuid_random = SecureRandom.uuid
|
311
|
-
hdfs_path.gsub
|
323
|
+
hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
|
312
324
|
end
|
313
325
|
hdfs_path
|
314
326
|
end
|
@@ -324,48 +336,6 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
324
336
|
end
|
325
337
|
end
|
326
338
|
|
327
|
-
def format(tag, time, record)
|
328
|
-
if @remove_prefix # TODO: remove when it's obsoleted
|
329
|
-
if tag.start_with?(@remove_prefix_actual)
|
330
|
-
if tag.length > @remove_prefix_actual_length
|
331
|
-
tag = tag[@remove_prefix_actual_length..-1]
|
332
|
-
else
|
333
|
-
tag = @default_tag
|
334
|
-
end
|
335
|
-
elsif tag.start_with?(@remove_prefix)
|
336
|
-
if tag == @remove_prefix
|
337
|
-
tag = @default_tag
|
338
|
-
else
|
339
|
-
tag = tag.sub(@remove_prefix, '')
|
340
|
-
end
|
341
|
-
end
|
342
|
-
end
|
343
|
-
|
344
|
-
if @null_value # TODO: remove when it's obsoleted
|
345
|
-
check_keys = (record.keys + @null_convert_keys).uniq
|
346
|
-
check_keys.each do |key|
|
347
|
-
record[key] = @null_value if record[key].nil?
|
348
|
-
end
|
349
|
-
end
|
350
|
-
|
351
|
-
if @using_formatter_config
|
352
|
-
record = inject_values_to_record(tag, time, record)
|
353
|
-
line = @formatter.format(tag, time, record)
|
354
|
-
else # TODO: remove when it's obsoleted
|
355
|
-
time_str = @output_include_time ? @time_formatter.call(time) + @header_separator : ''
|
356
|
-
tag_str = @output_include_tag ? tag + @header_separator : ''
|
357
|
-
record_str = @formatter.format(tag, time, record)
|
358
|
-
line = time_str + tag_str + record_str
|
359
|
-
end
|
360
|
-
line << "\n" if @end_with_newline && !line.end_with?("\n")
|
361
|
-
line
|
362
|
-
rescue => e # remove this clause when @suppress_log_broken_string is obsoleted
|
363
|
-
unless @suppress_log_broken_string
|
364
|
-
log.info "unexpected error while formatting events, ignored", tag: tag, record: record, error: e
|
365
|
-
end
|
366
|
-
''
|
367
|
-
end
|
368
|
-
|
369
339
|
def write(chunk)
|
370
340
|
hdfs_path = generate_path(chunk)
|
371
341
|
|
@@ -396,72 +366,6 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
396
366
|
hdfs_path
|
397
367
|
end
|
398
368
|
|
399
|
-
def compat_parameters_convert_plaintextformatter(conf)
|
400
|
-
if !conf.elements('format').empty? || !conf['output_data_type']
|
401
|
-
@using_formatter_config = true
|
402
|
-
@null_convert_keys = []
|
403
|
-
return
|
404
|
-
end
|
405
|
-
|
406
|
-
log.warn "webhdfs output plugin is working with old configuration parameters. use <inject>/<format> sections instead for further releases."
|
407
|
-
@using_formatter_config = false
|
408
|
-
@null_convert_keys = []
|
409
|
-
|
410
|
-
@header_separator = case conf['field_separator']
|
411
|
-
when nil then "\t"
|
412
|
-
when 'SPACE' then ' '
|
413
|
-
when 'TAB' then "\t"
|
414
|
-
when 'COMMA' then ','
|
415
|
-
when 'SOH' then "\x01"
|
416
|
-
else conf['field_separator']
|
417
|
-
end
|
418
|
-
|
419
|
-
format_section = Fluent::Config::Element.new('format', '', {}, [])
|
420
|
-
case conf['output_data_type']
|
421
|
-
when '', 'json' # blank value is for compatibility reason (especially in testing)
|
422
|
-
format_section['@type'] = 'json'
|
423
|
-
when 'ltsv'
|
424
|
-
format_section['@type'] = 'ltsv'
|
425
|
-
else
|
426
|
-
unless conf['output_data_type'].start_with?('attr:')
|
427
|
-
raise Fluent::ConfigError, "output_data_type is invalid: #{conf['output_data_type']}"
|
428
|
-
end
|
429
|
-
format_section['@format'] = 'tsv'
|
430
|
-
keys_part = conf['output_data_type'].sub(/^attr:/, '')
|
431
|
-
@null_convert_keys = keys_part.split(',')
|
432
|
-
format_section['keys'] = keys_part
|
433
|
-
format_section['delimiter'] = case conf['field_separator']
|
434
|
-
when nil then '\t'
|
435
|
-
when 'SPACE' then ' '
|
436
|
-
when 'TAB' then '\t'
|
437
|
-
when 'COMMA' then ','
|
438
|
-
when 'SOH' then 'SOH' # fixed later
|
439
|
-
else conf['field_separator']
|
440
|
-
end
|
441
|
-
end
|
442
|
-
|
443
|
-
conf.elements << format_section
|
444
|
-
|
445
|
-
@output_include_time = conf.has_key?('output_include_time') ? Fluent::Config.bool_value(conf['output_include_time']) : true
|
446
|
-
@output_include_tag = conf.has_key?('output_include_tag') ? Fluent::Config.bool_value(conf['output_include_tag']) : true
|
447
|
-
|
448
|
-
if @output_include_time
|
449
|
-
# default timezone is UTC
|
450
|
-
using_localtime = if !conf.has_key?('utc') && !conf.has_key?('localtime')
|
451
|
-
false
|
452
|
-
elsif conf.has_key?('localtime') && conf.has_key?('utc')
|
453
|
-
raise Fluent::ConfigError, "specify either 'localtime' or 'utc'"
|
454
|
-
elsif conf.has_key?('localtime')
|
455
|
-
Fluent::Config.bool_value('localtime')
|
456
|
-
else
|
457
|
-
Fluent::Config.bool_value('utc')
|
458
|
-
end
|
459
|
-
@time_formatter = Fluent::TimeFormatter.new(conf['time_format'], using_localtime)
|
460
|
-
else
|
461
|
-
@time_formatter = nil
|
462
|
-
end
|
463
|
-
end
|
464
|
-
|
465
369
|
class Compressor
|
466
370
|
include Fluent::Configurable
|
467
371
|
|
@@ -488,7 +392,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
488
392
|
begin
|
489
393
|
Open3.capture3("#{command} -V")
|
490
394
|
rescue Errno::ENOENT
|
491
|
-
raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
395
|
+
raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
492
396
|
end
|
493
397
|
end
|
494
398
|
end
|
@@ -1,9 +1,9 @@
|
|
1
|
-
module Fluent
|
2
|
-
class WebHDFSOutput
|
1
|
+
module Fluent
|
2
|
+
class WebHDFSOutput
|
3
3
|
class LZOCommandCompressor < Compressor
|
4
4
|
WebHDFSOutput.register_compressor('lzo_command', self)
|
5
5
|
|
6
|
-
config_param :command_parameter, :string, default
|
6
|
+
config_param :command_parameter, :string, :default => '-qf1'
|
7
7
|
|
8
8
|
def configure(conf)
|
9
9
|
super
|
data/test/helper.rb
CHANGED
@@ -8,13 +8,10 @@ rescue Bundler::BundlerError => e
|
|
8
8
|
exit e.status_code
|
9
9
|
end
|
10
10
|
require 'test/unit'
|
11
|
-
require 'test/unit/rr'
|
12
11
|
|
13
12
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
13
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
14
|
require 'fluent/test'
|
16
|
-
require 'fluent/test/helpers'
|
17
|
-
require 'fluent/test/driver/output'
|
18
15
|
unless ENV.has_key?('VERBOSE')
|
19
16
|
nulllogger = Object.new
|
20
17
|
nulllogger.instance_eval {|obj|
|
@@ -25,8 +22,6 @@ unless ENV.has_key?('VERBOSE')
|
|
25
22
|
$log = nulllogger
|
26
23
|
end
|
27
24
|
|
28
|
-
include Fluent::Test::Helpers
|
29
|
-
|
30
25
|
require 'fluent/plugin/out_webhdfs'
|
31
26
|
|
32
27
|
class Test::Unit::TestCase
|
@@ -16,11 +16,11 @@ class CompressorTest < Test::Unit::TestCase
|
|
16
16
|
def setup
|
17
17
|
omit unless Object.const_defined?(:Snappy)
|
18
18
|
Fluent::Test.setup
|
19
|
-
@compressor = Fluent::
|
19
|
+
@compressor = Fluent::WebHDFSOutput::SnappyCompressor.new
|
20
20
|
end
|
21
21
|
|
22
|
-
def create_driver(conf
|
23
|
-
Fluent::Test::
|
22
|
+
def create_driver(conf=CONFIG,tag='test')
|
23
|
+
Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_ext
|
@@ -1,243 +1,166 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
3
|
class WebHDFSOutputTest < Test::Unit::TestCase
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
"output_data_type" => "",
|
9
|
-
"host" => "namenode.local",
|
10
|
-
"path" => "/hdfs/path/file.%Y%m%d.log"
|
11
|
-
})
|
12
|
-
|
4
|
+
CONFIG = %[
|
5
|
+
host namenode.local
|
6
|
+
path /hdfs/path/file.%Y%m%d.log
|
7
|
+
]
|
13
8
|
def setup
|
14
9
|
Fluent::Test.setup
|
15
10
|
end
|
16
11
|
|
17
|
-
def create_driver(conf)
|
18
|
-
Fluent::Test::
|
12
|
+
def create_driver(conf=CONFIG,tag='test')
|
13
|
+
Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
|
19
14
|
end
|
20
15
|
|
21
|
-
|
22
|
-
|
23
|
-
d = create_driver
|
24
|
-
assert_true d.instance.instance_eval{ @using_formatter_config }
|
25
|
-
|
16
|
+
class ConfigureTest < self
|
17
|
+
def test_default
|
18
|
+
d = create_driver
|
26
19
|
assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
|
27
20
|
assert_equal 50070, d.instance.instance_eval{ @namenode_port }
|
28
21
|
assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
|
22
|
+
assert_equal '%Y%m%d', d.instance.time_slice_format
|
29
23
|
assert_equal false, d.instance.httpfs
|
30
24
|
assert_nil d.instance.username
|
31
25
|
assert_equal false, d.instance.ignore_start_check_error
|
32
26
|
|
33
|
-
assert_equal
|
34
|
-
assert_equal true, d.instance.
|
35
|
-
|
36
|
-
# deprecated params
|
37
|
-
assert_nil d.instance.instance_eval{ @output_include_time }
|
38
|
-
assert_nil d.instance.instance_eval{ @output_include_tag }
|
27
|
+
assert_equal true, d.instance.output_include_time
|
28
|
+
assert_equal true, d.instance.output_include_tag
|
29
|
+
assert_equal 'json', d.instance.output_data_type
|
39
30
|
assert_nil d.instance.remove_prefix
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
sub_test_case "flat configuration" do
|
46
|
-
def test_default_for_traditional_config
|
47
|
-
d = create_driver(CONFIG_COMPAT)
|
48
|
-
assert_false d.instance.instance_eval{ @using_formatter_config }
|
49
|
-
|
50
|
-
assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
|
51
|
-
assert_equal 50070, d.instance.instance_eval{ @namenode_port }
|
52
|
-
assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
|
53
|
-
assert_equal false, d.instance.httpfs
|
54
|
-
assert_nil d.instance.username
|
55
|
-
assert_equal false, d.instance.ignore_start_check_error
|
56
|
-
|
57
|
-
assert_equal 'Fluent::Plugin::JSONFormatter', d.instance.formatter.class.to_s
|
58
|
-
assert_equal true, d.instance.end_with_newline
|
59
|
-
|
60
|
-
assert_equal true, d.instance.instance_eval{ @output_include_time }
|
61
|
-
assert_equal true, d.instance.instance_eval{ @output_include_tag }
|
62
|
-
assert_nil d.instance.instance_eval{ @remove_prefix }
|
63
|
-
assert_equal "\t", d.instance.instance_eval{ @header_separator }
|
64
|
-
assert_equal 'tag_missing', d.instance.instance_eval{ @default_tag }
|
31
|
+
assert_equal 'TAB', d.instance.field_separator
|
32
|
+
assert_equal true, d.instance.add_newline
|
33
|
+
assert_equal 'tag_missing', d.instance.default_tag
|
65
34
|
end
|
66
35
|
|
67
36
|
def test_httpfs
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
})
|
75
|
-
d = create_driver(conf)
|
76
|
-
|
37
|
+
d = create_driver %[
|
38
|
+
namenode server.local:14000
|
39
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
40
|
+
httpfs yes
|
41
|
+
username hdfs_user
|
42
|
+
]
|
77
43
|
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
78
44
|
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
79
45
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
46
|
+
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
80
47
|
assert_equal true, d.instance.httpfs
|
81
48
|
assert_equal 'hdfs_user', d.instance.username
|
82
49
|
end
|
83
50
|
|
84
51
|
def test_ssl
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
})
|
94
|
-
d = create_driver(conf)
|
95
|
-
|
52
|
+
d = create_driver %[
|
53
|
+
namenode server.local:14000
|
54
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
55
|
+
ssl true
|
56
|
+
ssl_ca_file /path/to/ca_file.pem
|
57
|
+
ssl_verify_mode peer
|
58
|
+
kerberos true
|
59
|
+
]
|
96
60
|
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
97
61
|
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
98
62
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
63
|
+
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
99
64
|
assert_equal true, d.instance.ssl
|
100
65
|
assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
|
101
66
|
assert_equal :peer, d.instance.ssl_verify_mode
|
102
67
|
assert_equal true, d.instance.kerberos
|
103
68
|
end
|
104
69
|
|
105
|
-
data(gzip: [
|
106
|
-
bzip2: [
|
107
|
-
snappy: [
|
108
|
-
lzo: [
|
70
|
+
data(gzip: ['gzip', Fluent::WebHDFSOutput::GzipCompressor],
|
71
|
+
bzip2: ['bzip2', Fluent::WebHDFSOutput::Bzip2Compressor],
|
72
|
+
snappy: ['snappy', Fluent::WebHDFSOutput::SnappyCompressor],
|
73
|
+
lzo: ['lzo_command', Fluent::WebHDFSOutput::LZOCommandCompressor])
|
109
74
|
def test_compress(data)
|
110
75
|
compress_type, compressor_class = data
|
111
76
|
begin
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
})
|
118
|
-
d = create_driver(conf)
|
77
|
+
d = create_driver %[
|
78
|
+
namenode server.local:14000
|
79
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
80
|
+
compress #{compress_type}
|
81
|
+
]
|
119
82
|
rescue Fluent::ConfigError => ex
|
120
83
|
omit ex.message
|
121
84
|
end
|
122
85
|
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
123
86
|
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
124
87
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
88
|
+
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
125
89
|
assert_equal compress_type, d.instance.compress
|
126
90
|
assert_equal compressor_class, d.instance.compressor.class
|
127
91
|
end
|
128
92
|
|
129
|
-
def
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
})
|
136
|
-
d = create_driver(conf)
|
93
|
+
def test_placeholders
|
94
|
+
d = create_driver %[
|
95
|
+
hostname testing.node.local
|
96
|
+
namenode server.local:50070
|
97
|
+
path /hdfs/${hostname}/file.%Y%m%d%H.log
|
98
|
+
]
|
137
99
|
assert_equal '/hdfs/testing.node.local/file.%Y%m%d%H.log', d.instance.path
|
138
100
|
end
|
139
101
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
"path" => path
|
147
|
-
})
|
148
|
-
d = create_driver(conf)
|
149
|
-
formatter = Fluent::Timezone.formatter("+0900", path)
|
150
|
-
mock(Fluent::Timezone).formatter(Time.now.strftime("%z"), path) { formatter }
|
151
|
-
time = event_time("2012-07-18 15:03:00 +0900")
|
152
|
-
metadata = d.instance.metadata("test", time, {})
|
153
|
-
chunk = d.instance.buffer.generate_chunk(metadata)
|
154
|
-
assert_equal expected, d.instance.generate_path(chunk)
|
155
|
-
end
|
156
|
-
|
157
|
-
data(path: { "append" => false },
|
158
|
-
ssl: { "ssl" => true, "ssl_verify_mode" => "invalid" },
|
159
|
-
compress: { "compress" => "invalid" })
|
160
|
-
test "invalid" do |attr|
|
161
|
-
conf = config_element(
|
162
|
-
"ROOT", "", {
|
163
|
-
"namenode" => "server.local:14000",
|
164
|
-
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log"
|
165
|
-
})
|
166
|
-
conf += config_element("", "", attr)
|
167
|
-
assert_raise Fluent::ConfigError do
|
168
|
-
create_driver(conf)
|
102
|
+
class PathFormatTest < self
|
103
|
+
def test_default
|
104
|
+
d = create_driver
|
105
|
+
assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
|
106
|
+
assert_equal '%Y%m%d', d.instance.time_slice_format
|
107
|
+
assert_equal '/hdfs/path/file.20120718.log', d.instance.path_format('20120718')
|
169
108
|
end
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
sub_test_case "sub section configuration" do
|
174
|
-
def test_time_key
|
175
|
-
conf = config_element(
|
176
|
-
"ROOT", "", {
|
177
|
-
"host" => "namenode.local",
|
178
|
-
"path" => "/hdfs/path/file.%Y%m%d.log"
|
179
|
-
}, [
|
180
|
-
config_element(
|
181
|
-
"buffer", "time", {
|
182
|
-
"timekey" => 1
|
183
|
-
})
|
184
|
-
]
|
185
|
-
)
|
186
|
-
d = create_driver(conf)
|
187
|
-
time = event_time("2012-07-18 15:03:00 +0900")
|
188
|
-
metadata = d.instance.metadata("test", time, {})
|
189
|
-
chunk = d.instance.buffer.generate_chunk(metadata)
|
190
|
-
assert_equal 1, d.instance.buffer_config.timekey
|
191
|
-
assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
|
192
|
-
end
|
193
|
-
end
|
194
109
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
110
|
+
def test_time_slice_format
|
111
|
+
d = create_driver %[
|
112
|
+
namenode server.local:14000
|
113
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
114
|
+
]
|
115
|
+
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
116
|
+
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
117
|
+
assert_equal '/hdfs/path/file.20120718.1503.log', d.instance.path_format('201207181503')
|
118
|
+
end
|
203
119
|
end
|
204
120
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
121
|
+
class InvalidTest < self
|
122
|
+
def test_path
|
123
|
+
assert_raise Fluent::ConfigError do
|
124
|
+
d = create_driver %[
|
125
|
+
namenode server.local:14000
|
126
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
127
|
+
append false
|
128
|
+
]
|
129
|
+
end
|
130
|
+
end
|
213
131
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
132
|
+
def test_ssl
|
133
|
+
assert_raise Fluent::ConfigError do
|
134
|
+
create_driver %[
|
135
|
+
namenode server.local:14000
|
136
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
137
|
+
ssl true
|
138
|
+
ssl_verify_mode invalid
|
139
|
+
]
|
140
|
+
end
|
141
|
+
end
|
222
142
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
end
|
143
|
+
def test_invalid_compress
|
144
|
+
assert_raise Fluent::ConfigError do
|
145
|
+
create_driver %[
|
146
|
+
namenode server.local:14000
|
147
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
148
|
+
compress invalid
|
149
|
+
]
|
150
|
+
end
|
151
|
+
end
|
233
152
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
153
|
+
data("%{uuid:hostname}" => "%{uuid:hostname}",
|
154
|
+
"%{uuid:timestamp}" => "%{uuid:timestamp}")
|
155
|
+
def test_obsoleted_placeholders(placeholder)
|
156
|
+
assert_raise Fluent::ConfigError do
|
157
|
+
create_driver %[
|
158
|
+
namenode server.local:14000
|
159
|
+
path /hdfs/path/#{placeholder}/file.%Y%m%d.%H%M.log
|
160
|
+
append false
|
161
|
+
]
|
162
|
+
end
|
163
|
+
end
|
241
164
|
end
|
242
165
|
end
|
243
166
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-04-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: appraisal
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -53,47 +53,53 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: snappy
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 0.0.13
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 0.0.13
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: fluentd
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.
|
76
|
-
|
75
|
+
version: 0.10.59
|
76
|
+
- - "<"
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 0.14.0
|
79
|
+
type: :runtime
|
77
80
|
prerelease: false
|
78
81
|
version_requirements: !ruby/object:Gem::Requirement
|
79
82
|
requirements:
|
80
83
|
- - ">="
|
81
84
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.
|
85
|
+
version: 0.10.59
|
86
|
+
- - "<"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 0.14.0
|
83
89
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
90
|
+
name: fluent-mixin-plaintextformatter
|
85
91
|
requirement: !ruby/object:Gem::Requirement
|
86
92
|
requirements:
|
87
93
|
- - ">="
|
88
94
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
95
|
+
version: 0.2.1
|
90
96
|
type: :runtime
|
91
97
|
prerelease: false
|
92
98
|
version_requirements: !ruby/object:Gem::Requirement
|
93
99
|
requirements:
|
94
100
|
- - ">="
|
95
101
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
102
|
+
version: 0.2.1
|
97
103
|
- !ruby/object:Gem::Dependency
|
98
104
|
name: webhdfs
|
99
105
|
requirement: !ruby/object:Gem::Requirement
|
@@ -163,12 +169,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
163
169
|
version: '0'
|
164
170
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
165
171
|
requirements:
|
166
|
-
- - "
|
172
|
+
- - ">="
|
167
173
|
- !ruby/object:Gem::Version
|
168
|
-
version:
|
174
|
+
version: '0'
|
169
175
|
requirements: []
|
170
176
|
rubyforge_project:
|
171
|
-
rubygems_version: 2.6.
|
177
|
+
rubygems_version: 2.6.11
|
172
178
|
signing_key:
|
173
179
|
specification_version: 4
|
174
180
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|