fluent-plugin-webhdfs 0.6.0rc1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +10 -1
- data/fluent-plugin-webhdfs.gemspec +3 -3
- data/lib/fluent/plugin/out_webhdfs.rb +97 -193
- data/lib/fluent/plugin/webhdfs_compressor_bzip2.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_gzip.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_lzo_command.rb +3 -3
- data/lib/fluent/plugin/webhdfs_compressor_snappy.rb +2 -2
- data/lib/fluent/plugin/webhdfs_compressor_text.rb +2 -2
- data/test/helper.rb +0 -5
- data/test/plugin/test_compressor.rb +3 -3
- data/test/plugin/test_out_webhdfs.rb +103 -180
- metadata +22 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3001d6e76014cec6f043369f92db09fb9f6989d4
|
4
|
+
data.tar.gz: e7fdab6516fd7ef1ad2732369b002116e8b65b85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 898a6fa68810d9f94aefcb70e2b0e168b67b56922d2ae5f7757e1a8c9a38603d8ba0dc353ee49ae9247e42196fe3d8c88c31172773ac0df3919428bc25c84a5f
|
7
|
+
data.tar.gz: 37c6d7534dab627dd3a31bd4650f8642c983041cf1f06ff916cbc3107605d6070122ae8f1990cb36dd1f14ba19a3b2e5a65fbc0958553df4225c2228e81c29ca
|
data/.travis.yml
CHANGED
@@ -2,9 +2,10 @@ sudo: false
|
|
2
2
|
language: ruby
|
3
3
|
|
4
4
|
rvm:
|
5
|
+
- 2.0.0
|
5
6
|
- 2.1
|
6
7
|
- 2.2
|
7
|
-
- 2.3.
|
8
|
+
- 2.3.0
|
8
9
|
|
9
10
|
branches:
|
10
11
|
only:
|
@@ -22,4 +23,12 @@ script: bundle exec rake test
|
|
22
23
|
|
23
24
|
gemfile:
|
24
25
|
- Gemfile
|
26
|
+
- gemfiles/fluentd_v0.12.gemfile
|
25
27
|
- gemfiles/fluentd_v0.14.gemfile
|
28
|
+
|
29
|
+
matrix:
|
30
|
+
exclude:
|
31
|
+
- rvm: 2.0.0
|
32
|
+
gemfile: Gemfile
|
33
|
+
- rvm: 2.0.0
|
34
|
+
gemfile: gemfiles/fluentd_v0.14.gemfile
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "0.6.
|
5
|
+
gem.version = "0.6.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -17,10 +17,10 @@ Gem::Specification.new do |gem|
|
|
17
17
|
|
18
18
|
gem.add_development_dependency "rake"
|
19
19
|
gem.add_development_dependency "test-unit"
|
20
|
-
gem.add_development_dependency "test-unit-rr"
|
21
20
|
gem.add_development_dependency "appraisal"
|
22
21
|
gem.add_development_dependency "snappy", '>= 0.0.13'
|
23
|
-
gem.add_runtime_dependency "fluentd", '>= 0.14.
|
22
|
+
gem.add_runtime_dependency "fluentd", ['>= 0.10.59', "< 0.14.0"]
|
23
|
+
gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
|
24
24
|
gem.add_runtime_dependency "webhdfs", '>= 0.6.0'
|
25
25
|
gem.add_runtime_dependency "bzip2-ffi"
|
26
26
|
end
|
@@ -1,138 +1,129 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
|
3
|
-
require 'fluent/plugin/output'
|
4
|
-
require 'fluent/config/element'
|
5
|
-
|
6
|
-
require 'webhdfs'
|
7
3
|
require 'tempfile'
|
8
4
|
require 'securerandom'
|
5
|
+
require 'fluent/mixin/plaintextformatter'
|
9
6
|
|
10
|
-
class Fluent::
|
7
|
+
class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
11
8
|
Fluent::Plugin.register_output('webhdfs', self)
|
12
9
|
|
13
|
-
|
10
|
+
config_set_default :buffer_type, 'memory'
|
11
|
+
config_set_default :time_slice_format, '%Y%m%d'
|
12
|
+
|
13
|
+
# For fluentd v0.12.16 or earlier
|
14
|
+
class << self
|
15
|
+
unless method_defined?(:desc)
|
16
|
+
def desc(description)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
14
20
|
|
15
21
|
desc 'WebHDFS/HttpFs host'
|
16
|
-
config_param :host, :string, default
|
22
|
+
config_param :host, :string, :default => nil
|
17
23
|
desc 'WebHDFS/HttpFs port'
|
18
|
-
config_param :port, :integer, default
|
24
|
+
config_param :port, :integer, :default => 50070
|
19
25
|
desc 'Namenode (host:port)'
|
20
|
-
config_param :namenode, :string, default
|
26
|
+
config_param :namenode, :string, :default => nil # host:port
|
21
27
|
desc 'Standby namenode for Namenode HA (host:port)'
|
22
|
-
config_param :standby_namenode, :string, default
|
28
|
+
config_param :standby_namenode, :string, :default => nil # host:port
|
23
29
|
|
24
30
|
desc 'Ignore errors on start up'
|
25
|
-
config_param :ignore_start_check_error, :bool, default
|
31
|
+
config_param :ignore_start_check_error, :bool, :default => false
|
26
32
|
|
27
33
|
desc 'Output file path on HDFS'
|
28
34
|
config_param :path, :string
|
29
35
|
desc 'User name for pseudo authentication'
|
30
|
-
config_param :username, :string, default
|
36
|
+
config_param :username, :string, :default => nil
|
31
37
|
|
32
38
|
desc 'Store data over HttpFs instead of WebHDFS'
|
33
|
-
config_param :httpfs, :bool, default
|
39
|
+
config_param :httpfs, :bool, :default => false
|
34
40
|
|
35
41
|
desc 'Number of seconds to wait for the connection to open'
|
36
|
-
config_param :open_timeout, :integer, default
|
42
|
+
config_param :open_timeout, :integer, :default => 30 # from ruby net/http default
|
37
43
|
desc 'Number of seconds to wait for one block to be read'
|
38
|
-
config_param :read_timeout, :integer, default
|
44
|
+
config_param :read_timeout, :integer, :default => 60 # from ruby net/http default
|
39
45
|
|
40
46
|
desc 'Retry automatically when known errors of HDFS are occurred'
|
41
|
-
config_param :retry_known_errors, :bool, default
|
47
|
+
config_param :retry_known_errors, :bool, :default => false
|
42
48
|
desc 'Retry interval'
|
43
|
-
config_param :retry_interval, :integer, default
|
49
|
+
config_param :retry_interval, :integer, :default => nil
|
44
50
|
desc 'The number of retries'
|
45
|
-
config_param :retry_times, :integer, default
|
51
|
+
config_param :retry_times, :integer, :default => nil
|
46
52
|
|
47
53
|
# how many times of write failure before switch to standby namenode
|
48
54
|
# by default it's 11 times that costs 1023 seconds inside fluentd,
|
49
55
|
# which is considered enough to exclude the scenes that caused by temporary network fail or single datanode fail
|
50
56
|
desc 'How many times of write failure before switch to standby namenode'
|
51
|
-
config_param :failures_before_use_standby, :integer, default
|
57
|
+
config_param :failures_before_use_standby, :integer, :default => 11
|
58
|
+
|
59
|
+
include Fluent::Mixin::PlainTextFormatter
|
52
60
|
|
53
|
-
config_param :
|
61
|
+
config_param :default_tag, :string, :default => 'tag_missing'
|
54
62
|
|
55
63
|
desc 'Append data or not'
|
56
|
-
config_param :append, :bool, default
|
64
|
+
config_param :append, :bool, :default => true
|
57
65
|
|
58
66
|
desc 'Use SSL or not'
|
59
|
-
config_param :ssl, :bool, default
|
67
|
+
config_param :ssl, :bool, :default => false
|
60
68
|
desc 'OpenSSL certificate authority file'
|
61
|
-
config_param :ssl_ca_file, :string, default
|
69
|
+
config_param :ssl_ca_file, :string, :default => nil
|
62
70
|
desc 'OpenSSL verify mode (none,peer)'
|
63
|
-
config_param :ssl_verify_mode, :
|
71
|
+
config_param :ssl_verify_mode, :default => nil do |val|
|
72
|
+
case val
|
73
|
+
when 'none'
|
74
|
+
:none
|
75
|
+
when 'peer'
|
76
|
+
:peer
|
77
|
+
else
|
78
|
+
raise Fluent::ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
|
79
|
+
end
|
80
|
+
end
|
64
81
|
|
65
82
|
desc 'Use kerberos authentication or not'
|
66
|
-
config_param :kerberos, :bool, default
|
83
|
+
config_param :kerberos, :bool, :default => false
|
67
84
|
|
68
|
-
SUPPORTED_COMPRESS = [
|
85
|
+
SUPPORTED_COMPRESS = ['gzip', 'bzip2', 'snappy', 'lzo_command', 'text']
|
69
86
|
desc "Compress method (#{SUPPORTED_COMPRESS.join(',')})"
|
70
|
-
config_param :compress, :
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
config_param :suppress_log_broken_string, :bool, default: false, deprecated: "use @log_level for plugin to suppress such info logs"
|
76
|
-
|
77
|
-
CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
|
78
|
-
|
79
|
-
config_section :buffer do
|
80
|
-
config_set_default :chunk_keys, ["time"]
|
87
|
+
config_param :compress, :default => nil do |val|
|
88
|
+
unless SUPPORTED_COMPRESS.include? val
|
89
|
+
raise Fluent::ConfigError, "unsupported compress: #{val}"
|
90
|
+
end
|
91
|
+
val
|
81
92
|
end
|
82
93
|
|
83
|
-
|
84
|
-
config_set_default :@type, 'out_file'
|
85
|
-
config_set_default :localtime, false # default timezone is UTC
|
86
|
-
end
|
94
|
+
CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
|
87
95
|
|
88
|
-
attr_reader :
|
96
|
+
attr_reader :compressor
|
89
97
|
|
90
98
|
def initialize
|
91
99
|
super
|
100
|
+
require 'net/http'
|
101
|
+
require 'time'
|
102
|
+
require 'webhdfs'
|
103
|
+
|
92
104
|
@compressor = nil
|
93
|
-
@standby_namenode_host = nil
|
94
|
-
@output_include_tag = @output_include_time = nil # TODO: deprecated
|
95
|
-
@header_separator = @field_separator = nil # TODO: deprecated
|
96
105
|
end
|
97
106
|
|
98
|
-
|
99
|
-
|
107
|
+
# Define `log` method for v0.10.42 or earlier
|
108
|
+
unless method_defined?(:log)
|
109
|
+
define_method("log") { $log }
|
110
|
+
end
|
100
111
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
112
|
+
def configure(conf)
|
113
|
+
if conf['path']
|
114
|
+
if conf['path'].index('%S')
|
115
|
+
conf['time_slice_format'] = '%Y%m%d%H%M%S'
|
116
|
+
elsif conf['path'].index('%M')
|
117
|
+
conf['time_slice_format'] = '%Y%m%d%H%M'
|
118
|
+
elsif conf['path'].index('%H')
|
119
|
+
conf['time_slice_format'] = '%Y%m%d%H'
|
120
|
+
end
|
110
121
|
end
|
111
|
-
buffer_config = conf.elements(name: "buffer").first
|
112
|
-
buffer_config["timekey"] = timekey unless buffer_config["timekey"]
|
113
122
|
|
114
|
-
|
123
|
+
verify_config_placeholders_in_path!(conf)
|
115
124
|
|
116
125
|
super
|
117
126
|
|
118
|
-
@formatter = formatter_create
|
119
|
-
|
120
|
-
if @using_formatter_config
|
121
|
-
@null_value = nil
|
122
|
-
else
|
123
|
-
@formatter.delimiter = "\x01" if @formatter.respond_to?(:delimiter) && @formatter.delimiter == 'SOH'
|
124
|
-
@null_value ||= 'NULL'
|
125
|
-
end
|
126
|
-
|
127
|
-
if @default_tag.nil? && !@using_formatter_config && @output_include_tag
|
128
|
-
@default_tag = "tag_missing"
|
129
|
-
end
|
130
|
-
if @remove_prefix
|
131
|
-
@remove_prefix_actual = @remove_prefix + "."
|
132
|
-
@remove_prefix_actual_length = @remove_prefix_actual.length
|
133
|
-
end
|
134
|
-
|
135
|
-
verify_config_placeholders_in_path!(conf)
|
136
127
|
@replace_random_uuid = @path.include?('%{uuid}') || @path.include?('%{uuid_flush}')
|
137
128
|
if @replace_random_uuid
|
138
129
|
# to check SecureRandom.uuid is available or not (NotImplementedError raised in such environment)
|
@@ -143,7 +134,14 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
143
134
|
end
|
144
135
|
end
|
145
136
|
|
146
|
-
|
137
|
+
begin
|
138
|
+
@compressor = COMPRESSOR_REGISTRY.lookup(@compress || 'text').new
|
139
|
+
rescue Fluent::ConfigError
|
140
|
+
raise
|
141
|
+
rescue
|
142
|
+
$log.warn "#{@comress} not found. Use 'text' instead"
|
143
|
+
@compressor = COMPRESSOR_REGISTRY.lookup('text').new
|
144
|
+
end
|
147
145
|
|
148
146
|
if @host
|
149
147
|
@namenode_host = @host
|
@@ -178,7 +176,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
178
176
|
@client_standby = nil
|
179
177
|
end
|
180
178
|
|
181
|
-
|
179
|
+
if not @append
|
182
180
|
if @path.index(CHUNK_ID_PLACE_HOLDER).nil?
|
183
181
|
raise Fluent::ConfigError, "path must contain ${chunk_id}, which is the placeholder for chunk_id, when append is set to false."
|
184
182
|
end
|
@@ -241,6 +239,14 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
241
239
|
end
|
242
240
|
end
|
243
241
|
|
242
|
+
def shutdown
|
243
|
+
super
|
244
|
+
end
|
245
|
+
|
246
|
+
def path_format(chunk_key)
|
247
|
+
Time.strptime(chunk_key, @time_slice_format).strftime(@path)
|
248
|
+
end
|
249
|
+
|
244
250
|
def is_standby_exception(e)
|
245
251
|
e.is_a?(WebHDFS::IOError) && e.message.match(/org\.apache\.hadoop\.ipc\.StandbyException/)
|
246
252
|
end
|
@@ -252,6 +258,12 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
252
258
|
end
|
253
259
|
end
|
254
260
|
|
261
|
+
def chunk_unique_id_to_str(unique_id)
|
262
|
+
unique_id.unpack('C*').map{|x| x.to_s(16).rjust(2,'0')}.join('')
|
263
|
+
end
|
264
|
+
|
265
|
+
# TODO check conflictions
|
266
|
+
|
255
267
|
def send_data(path, data)
|
256
268
|
if @append
|
257
269
|
begin
|
@@ -266,7 +278,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
266
278
|
|
267
279
|
HOSTNAME_PLACEHOLDERS_DEPRECATED = ['${hostname}', '%{hostname}', '__HOSTNAME__']
|
268
280
|
UUID_RANDOM_PLACEHOLDERS_DEPRECATED = ['${uuid}', '${uuid:random}', '__UUID__', '__UUID_RANDOM__']
|
269
|
-
UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
|
281
|
+
UUID_OTHER_PLACEHOLDERS_OBSOLETED = ['${uuid:hostname}', '%{uuid:hostname}', '__UUID_HOSTNAME__', '${uuid:timestamp}', '%{uuid:timestamp}', '__UUID_TIMESTAMP__']
|
270
282
|
|
271
283
|
def verify_config_placeholders_in_path!(conf)
|
272
284
|
return unless conf.has_key?('path')
|
@@ -295,20 +307,20 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
295
307
|
log.error "configuration placeholder #{ph} is now unsupported by webhdfs output plugin."
|
296
308
|
end
|
297
309
|
end
|
298
|
-
raise ConfigError, "there are unsupported placeholders in path."
|
310
|
+
raise Fluent::ConfigError, "there are unsupported placeholders in path."
|
299
311
|
end
|
300
312
|
end
|
301
313
|
|
302
314
|
def generate_path(chunk)
|
303
315
|
hdfs_path = if @append
|
304
|
-
|
316
|
+
path_format(chunk.key)
|
305
317
|
else
|
306
|
-
|
318
|
+
path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
|
307
319
|
end
|
308
320
|
hdfs_path = "#{hdfs_path}#{@compressor.ext}"
|
309
321
|
if @replace_random_uuid
|
310
322
|
uuid_random = SecureRandom.uuid
|
311
|
-
hdfs_path.gsub
|
323
|
+
hdfs_path = hdfs_path.gsub('%{uuid}', uuid_random).gsub('%{uuid_flush}', uuid_random)
|
312
324
|
end
|
313
325
|
hdfs_path
|
314
326
|
end
|
@@ -324,48 +336,6 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
324
336
|
end
|
325
337
|
end
|
326
338
|
|
327
|
-
def format(tag, time, record)
|
328
|
-
if @remove_prefix # TODO: remove when it's obsoleted
|
329
|
-
if tag.start_with?(@remove_prefix_actual)
|
330
|
-
if tag.length > @remove_prefix_actual_length
|
331
|
-
tag = tag[@remove_prefix_actual_length..-1]
|
332
|
-
else
|
333
|
-
tag = @default_tag
|
334
|
-
end
|
335
|
-
elsif tag.start_with?(@remove_prefix)
|
336
|
-
if tag == @remove_prefix
|
337
|
-
tag = @default_tag
|
338
|
-
else
|
339
|
-
tag = tag.sub(@remove_prefix, '')
|
340
|
-
end
|
341
|
-
end
|
342
|
-
end
|
343
|
-
|
344
|
-
if @null_value # TODO: remove when it's obsoleted
|
345
|
-
check_keys = (record.keys + @null_convert_keys).uniq
|
346
|
-
check_keys.each do |key|
|
347
|
-
record[key] = @null_value if record[key].nil?
|
348
|
-
end
|
349
|
-
end
|
350
|
-
|
351
|
-
if @using_formatter_config
|
352
|
-
record = inject_values_to_record(tag, time, record)
|
353
|
-
line = @formatter.format(tag, time, record)
|
354
|
-
else # TODO: remove when it's obsoleted
|
355
|
-
time_str = @output_include_time ? @time_formatter.call(time) + @header_separator : ''
|
356
|
-
tag_str = @output_include_tag ? tag + @header_separator : ''
|
357
|
-
record_str = @formatter.format(tag, time, record)
|
358
|
-
line = time_str + tag_str + record_str
|
359
|
-
end
|
360
|
-
line << "\n" if @end_with_newline && !line.end_with?("\n")
|
361
|
-
line
|
362
|
-
rescue => e # remove this clause when @suppress_log_broken_string is obsoleted
|
363
|
-
unless @suppress_log_broken_string
|
364
|
-
log.info "unexpected error while formatting events, ignored", tag: tag, record: record, error: e
|
365
|
-
end
|
366
|
-
''
|
367
|
-
end
|
368
|
-
|
369
339
|
def write(chunk)
|
370
340
|
hdfs_path = generate_path(chunk)
|
371
341
|
|
@@ -396,72 +366,6 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
396
366
|
hdfs_path
|
397
367
|
end
|
398
368
|
|
399
|
-
def compat_parameters_convert_plaintextformatter(conf)
|
400
|
-
if !conf.elements('format').empty? || !conf['output_data_type']
|
401
|
-
@using_formatter_config = true
|
402
|
-
@null_convert_keys = []
|
403
|
-
return
|
404
|
-
end
|
405
|
-
|
406
|
-
log.warn "webhdfs output plugin is working with old configuration parameters. use <inject>/<format> sections instead for further releases."
|
407
|
-
@using_formatter_config = false
|
408
|
-
@null_convert_keys = []
|
409
|
-
|
410
|
-
@header_separator = case conf['field_separator']
|
411
|
-
when nil then "\t"
|
412
|
-
when 'SPACE' then ' '
|
413
|
-
when 'TAB' then "\t"
|
414
|
-
when 'COMMA' then ','
|
415
|
-
when 'SOH' then "\x01"
|
416
|
-
else conf['field_separator']
|
417
|
-
end
|
418
|
-
|
419
|
-
format_section = Fluent::Config::Element.new('format', '', {}, [])
|
420
|
-
case conf['output_data_type']
|
421
|
-
when '', 'json' # blank value is for compatibility reason (especially in testing)
|
422
|
-
format_section['@type'] = 'json'
|
423
|
-
when 'ltsv'
|
424
|
-
format_section['@type'] = 'ltsv'
|
425
|
-
else
|
426
|
-
unless conf['output_data_type'].start_with?('attr:')
|
427
|
-
raise Fluent::ConfigError, "output_data_type is invalid: #{conf['output_data_type']}"
|
428
|
-
end
|
429
|
-
format_section['@format'] = 'tsv'
|
430
|
-
keys_part = conf['output_data_type'].sub(/^attr:/, '')
|
431
|
-
@null_convert_keys = keys_part.split(',')
|
432
|
-
format_section['keys'] = keys_part
|
433
|
-
format_section['delimiter'] = case conf['field_separator']
|
434
|
-
when nil then '\t'
|
435
|
-
when 'SPACE' then ' '
|
436
|
-
when 'TAB' then '\t'
|
437
|
-
when 'COMMA' then ','
|
438
|
-
when 'SOH' then 'SOH' # fixed later
|
439
|
-
else conf['field_separator']
|
440
|
-
end
|
441
|
-
end
|
442
|
-
|
443
|
-
conf.elements << format_section
|
444
|
-
|
445
|
-
@output_include_time = conf.has_key?('output_include_time') ? Fluent::Config.bool_value(conf['output_include_time']) : true
|
446
|
-
@output_include_tag = conf.has_key?('output_include_tag') ? Fluent::Config.bool_value(conf['output_include_tag']) : true
|
447
|
-
|
448
|
-
if @output_include_time
|
449
|
-
# default timezone is UTC
|
450
|
-
using_localtime = if !conf.has_key?('utc') && !conf.has_key?('localtime')
|
451
|
-
false
|
452
|
-
elsif conf.has_key?('localtime') && conf.has_key?('utc')
|
453
|
-
raise Fluent::ConfigError, "specify either 'localtime' or 'utc'"
|
454
|
-
elsif conf.has_key?('localtime')
|
455
|
-
Fluent::Config.bool_value('localtime')
|
456
|
-
else
|
457
|
-
Fluent::Config.bool_value('utc')
|
458
|
-
end
|
459
|
-
@time_formatter = Fluent::TimeFormatter.new(conf['time_format'], using_localtime)
|
460
|
-
else
|
461
|
-
@time_formatter = nil
|
462
|
-
end
|
463
|
-
end
|
464
|
-
|
465
369
|
class Compressor
|
466
370
|
include Fluent::Configurable
|
467
371
|
|
@@ -488,7 +392,7 @@ class Fluent::Plugin::WebHDFSOutput < Fluent::Plugin::Output
|
|
488
392
|
begin
|
489
393
|
Open3.capture3("#{command} -V")
|
490
394
|
rescue Errno::ENOENT
|
491
|
-
raise ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
395
|
+
raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
492
396
|
end
|
493
397
|
end
|
494
398
|
end
|
@@ -1,9 +1,9 @@
|
|
1
|
-
module Fluent
|
2
|
-
class WebHDFSOutput
|
1
|
+
module Fluent
|
2
|
+
class WebHDFSOutput
|
3
3
|
class LZOCommandCompressor < Compressor
|
4
4
|
WebHDFSOutput.register_compressor('lzo_command', self)
|
5
5
|
|
6
|
-
config_param :command_parameter, :string, default
|
6
|
+
config_param :command_parameter, :string, :default => '-qf1'
|
7
7
|
|
8
8
|
def configure(conf)
|
9
9
|
super
|
data/test/helper.rb
CHANGED
@@ -8,13 +8,10 @@ rescue Bundler::BundlerError => e
|
|
8
8
|
exit e.status_code
|
9
9
|
end
|
10
10
|
require 'test/unit'
|
11
|
-
require 'test/unit/rr'
|
12
11
|
|
13
12
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
13
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
14
|
require 'fluent/test'
|
16
|
-
require 'fluent/test/helpers'
|
17
|
-
require 'fluent/test/driver/output'
|
18
15
|
unless ENV.has_key?('VERBOSE')
|
19
16
|
nulllogger = Object.new
|
20
17
|
nulllogger.instance_eval {|obj|
|
@@ -25,8 +22,6 @@ unless ENV.has_key?('VERBOSE')
|
|
25
22
|
$log = nulllogger
|
26
23
|
end
|
27
24
|
|
28
|
-
include Fluent::Test::Helpers
|
29
|
-
|
30
25
|
require 'fluent/plugin/out_webhdfs'
|
31
26
|
|
32
27
|
class Test::Unit::TestCase
|
@@ -16,11 +16,11 @@ class CompressorTest < Test::Unit::TestCase
|
|
16
16
|
def setup
|
17
17
|
omit unless Object.const_defined?(:Snappy)
|
18
18
|
Fluent::Test.setup
|
19
|
-
@compressor = Fluent::
|
19
|
+
@compressor = Fluent::WebHDFSOutput::SnappyCompressor.new
|
20
20
|
end
|
21
21
|
|
22
|
-
def create_driver(conf
|
23
|
-
Fluent::Test::
|
22
|
+
def create_driver(conf=CONFIG,tag='test')
|
23
|
+
Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_ext
|
@@ -1,243 +1,166 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
3
|
class WebHDFSOutputTest < Test::Unit::TestCase
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
"output_data_type" => "",
|
9
|
-
"host" => "namenode.local",
|
10
|
-
"path" => "/hdfs/path/file.%Y%m%d.log"
|
11
|
-
})
|
12
|
-
|
4
|
+
CONFIG = %[
|
5
|
+
host namenode.local
|
6
|
+
path /hdfs/path/file.%Y%m%d.log
|
7
|
+
]
|
13
8
|
def setup
|
14
9
|
Fluent::Test.setup
|
15
10
|
end
|
16
11
|
|
17
|
-
def create_driver(conf)
|
18
|
-
Fluent::Test::
|
12
|
+
def create_driver(conf=CONFIG,tag='test')
|
13
|
+
Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
|
19
14
|
end
|
20
15
|
|
21
|
-
|
22
|
-
|
23
|
-
d = create_driver
|
24
|
-
assert_true d.instance.instance_eval{ @using_formatter_config }
|
25
|
-
|
16
|
+
class ConfigureTest < self
|
17
|
+
def test_default
|
18
|
+
d = create_driver
|
26
19
|
assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
|
27
20
|
assert_equal 50070, d.instance.instance_eval{ @namenode_port }
|
28
21
|
assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
|
22
|
+
assert_equal '%Y%m%d', d.instance.time_slice_format
|
29
23
|
assert_equal false, d.instance.httpfs
|
30
24
|
assert_nil d.instance.username
|
31
25
|
assert_equal false, d.instance.ignore_start_check_error
|
32
26
|
|
33
|
-
assert_equal
|
34
|
-
assert_equal true, d.instance.
|
35
|
-
|
36
|
-
# deprecated params
|
37
|
-
assert_nil d.instance.instance_eval{ @output_include_time }
|
38
|
-
assert_nil d.instance.instance_eval{ @output_include_tag }
|
27
|
+
assert_equal true, d.instance.output_include_time
|
28
|
+
assert_equal true, d.instance.output_include_tag
|
29
|
+
assert_equal 'json', d.instance.output_data_type
|
39
30
|
assert_nil d.instance.remove_prefix
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
sub_test_case "flat configuration" do
|
46
|
-
def test_default_for_traditional_config
|
47
|
-
d = create_driver(CONFIG_COMPAT)
|
48
|
-
assert_false d.instance.instance_eval{ @using_formatter_config }
|
49
|
-
|
50
|
-
assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
|
51
|
-
assert_equal 50070, d.instance.instance_eval{ @namenode_port }
|
52
|
-
assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
|
53
|
-
assert_equal false, d.instance.httpfs
|
54
|
-
assert_nil d.instance.username
|
55
|
-
assert_equal false, d.instance.ignore_start_check_error
|
56
|
-
|
57
|
-
assert_equal 'Fluent::Plugin::JSONFormatter', d.instance.formatter.class.to_s
|
58
|
-
assert_equal true, d.instance.end_with_newline
|
59
|
-
|
60
|
-
assert_equal true, d.instance.instance_eval{ @output_include_time }
|
61
|
-
assert_equal true, d.instance.instance_eval{ @output_include_tag }
|
62
|
-
assert_nil d.instance.instance_eval{ @remove_prefix }
|
63
|
-
assert_equal "\t", d.instance.instance_eval{ @header_separator }
|
64
|
-
assert_equal 'tag_missing', d.instance.instance_eval{ @default_tag }
|
31
|
+
assert_equal 'TAB', d.instance.field_separator
|
32
|
+
assert_equal true, d.instance.add_newline
|
33
|
+
assert_equal 'tag_missing', d.instance.default_tag
|
65
34
|
end
|
66
35
|
|
67
36
|
def test_httpfs
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
})
|
75
|
-
d = create_driver(conf)
|
76
|
-
|
37
|
+
d = create_driver %[
|
38
|
+
namenode server.local:14000
|
39
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
40
|
+
httpfs yes
|
41
|
+
username hdfs_user
|
42
|
+
]
|
77
43
|
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
78
44
|
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
79
45
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
46
|
+
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
80
47
|
assert_equal true, d.instance.httpfs
|
81
48
|
assert_equal 'hdfs_user', d.instance.username
|
82
49
|
end
|
83
50
|
|
84
51
|
def test_ssl
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
})
|
94
|
-
d = create_driver(conf)
|
95
|
-
|
52
|
+
d = create_driver %[
|
53
|
+
namenode server.local:14000
|
54
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
55
|
+
ssl true
|
56
|
+
ssl_ca_file /path/to/ca_file.pem
|
57
|
+
ssl_verify_mode peer
|
58
|
+
kerberos true
|
59
|
+
]
|
96
60
|
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
97
61
|
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
98
62
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
63
|
+
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
99
64
|
assert_equal true, d.instance.ssl
|
100
65
|
assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
|
101
66
|
assert_equal :peer, d.instance.ssl_verify_mode
|
102
67
|
assert_equal true, d.instance.kerberos
|
103
68
|
end
|
104
69
|
|
105
|
-
data(gzip: [
|
106
|
-
bzip2: [
|
107
|
-
snappy: [
|
108
|
-
lzo: [
|
70
|
+
data(gzip: ['gzip', Fluent::WebHDFSOutput::GzipCompressor],
|
71
|
+
bzip2: ['bzip2', Fluent::WebHDFSOutput::Bzip2Compressor],
|
72
|
+
snappy: ['snappy', Fluent::WebHDFSOutput::SnappyCompressor],
|
73
|
+
lzo: ['lzo_command', Fluent::WebHDFSOutput::LZOCommandCompressor])
|
109
74
|
def test_compress(data)
|
110
75
|
compress_type, compressor_class = data
|
111
76
|
begin
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
})
|
118
|
-
d = create_driver(conf)
|
77
|
+
d = create_driver %[
|
78
|
+
namenode server.local:14000
|
79
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
80
|
+
compress #{compress_type}
|
81
|
+
]
|
119
82
|
rescue Fluent::ConfigError => ex
|
120
83
|
omit ex.message
|
121
84
|
end
|
122
85
|
assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
|
123
86
|
assert_equal 14000, d.instance.instance_eval{ @namenode_port }
|
124
87
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
88
|
+
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
125
89
|
assert_equal compress_type, d.instance.compress
|
126
90
|
assert_equal compressor_class, d.instance.compressor.class
|
127
91
|
end
|
128
92
|
|
129
|
-
def
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
})
|
136
|
-
d = create_driver(conf)
|
93
|
+
def test_placeholders
|
94
|
+
d = create_driver %[
|
95
|
+
hostname testing.node.local
|
96
|
+
namenode server.local:50070
|
97
|
+
path /hdfs/${hostname}/file.%Y%m%d%H.log
|
98
|
+
]
|
137
99
|
assert_equal '/hdfs/testing.node.local/file.%Y%m%d%H.log', d.instance.path
|
138
100
|
end
|
139
101
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
"path" => path
|
147
|
-
})
|
148
|
-
d = create_driver(conf)
|
149
|
-
formatter = Fluent::Timezone.formatter("+0900", path)
|
150
|
-
mock(Fluent::Timezone).formatter(Time.now.strftime("%z"), path) { formatter }
|
151
|
-
time = event_time("2012-07-18 15:03:00 +0900")
|
152
|
-
metadata = d.instance.metadata("test", time, {})
|
153
|
-
chunk = d.instance.buffer.generate_chunk(metadata)
|
154
|
-
assert_equal expected, d.instance.generate_path(chunk)
|
155
|
-
end
|
156
|
-
|
157
|
-
data(path: { "append" => false },
|
158
|
-
ssl: { "ssl" => true, "ssl_verify_mode" => "invalid" },
|
159
|
-
compress: { "compress" => "invalid" })
|
160
|
-
test "invalid" do |attr|
|
161
|
-
conf = config_element(
|
162
|
-
"ROOT", "", {
|
163
|
-
"namenode" => "server.local:14000",
|
164
|
-
"path" => "/hdfs/path/file.%Y%m%d.%H%M.log"
|
165
|
-
})
|
166
|
-
conf += config_element("", "", attr)
|
167
|
-
assert_raise Fluent::ConfigError do
|
168
|
-
create_driver(conf)
|
102
|
+
class PathFormatTest < self
|
103
|
+
def test_default
|
104
|
+
d = create_driver
|
105
|
+
assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
|
106
|
+
assert_equal '%Y%m%d', d.instance.time_slice_format
|
107
|
+
assert_equal '/hdfs/path/file.20120718.log', d.instance.path_format('20120718')
|
169
108
|
end
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
sub_test_case "sub section configuration" do
|
174
|
-
def test_time_key
|
175
|
-
conf = config_element(
|
176
|
-
"ROOT", "", {
|
177
|
-
"host" => "namenode.local",
|
178
|
-
"path" => "/hdfs/path/file.%Y%m%d.log"
|
179
|
-
}, [
|
180
|
-
config_element(
|
181
|
-
"buffer", "time", {
|
182
|
-
"timekey" => 1
|
183
|
-
})
|
184
|
-
]
|
185
|
-
)
|
186
|
-
d = create_driver(conf)
|
187
|
-
time = event_time("2012-07-18 15:03:00 +0900")
|
188
|
-
metadata = d.instance.metadata("test", time, {})
|
189
|
-
chunk = d.instance.buffer.generate_chunk(metadata)
|
190
|
-
assert_equal 1, d.instance.buffer_config.timekey
|
191
|
-
assert_equal "/hdfs/path/file.20120718.log", d.instance.generate_path(chunk)
|
192
|
-
end
|
193
|
-
end
|
194
109
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
110
|
+
def test_time_slice_format
|
111
|
+
d = create_driver %[
|
112
|
+
namenode server.local:14000
|
113
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
114
|
+
]
|
115
|
+
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
116
|
+
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
117
|
+
assert_equal '/hdfs/path/file.20120718.1503.log', d.instance.path_format('201207181503')
|
118
|
+
end
|
203
119
|
end
|
204
120
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
121
|
+
class InvalidTest < self
|
122
|
+
def test_path
|
123
|
+
assert_raise Fluent::ConfigError do
|
124
|
+
d = create_driver %[
|
125
|
+
namenode server.local:14000
|
126
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
127
|
+
append false
|
128
|
+
]
|
129
|
+
end
|
130
|
+
end
|
213
131
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
132
|
+
def test_ssl
|
133
|
+
assert_raise Fluent::ConfigError do
|
134
|
+
create_driver %[
|
135
|
+
namenode server.local:14000
|
136
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
137
|
+
ssl true
|
138
|
+
ssl_verify_mode invalid
|
139
|
+
]
|
140
|
+
end
|
141
|
+
end
|
222
142
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
end
|
143
|
+
def test_invalid_compress
|
144
|
+
assert_raise Fluent::ConfigError do
|
145
|
+
create_driver %[
|
146
|
+
namenode server.local:14000
|
147
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
148
|
+
compress invalid
|
149
|
+
]
|
150
|
+
end
|
151
|
+
end
|
233
152
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
153
|
+
data("%{uuid:hostname}" => "%{uuid:hostname}",
|
154
|
+
"%{uuid:timestamp}" => "%{uuid:timestamp}")
|
155
|
+
def test_obsoleted_placeholders(placeholder)
|
156
|
+
assert_raise Fluent::ConfigError do
|
157
|
+
create_driver %[
|
158
|
+
namenode server.local:14000
|
159
|
+
path /hdfs/path/#{placeholder}/file.%Y%m%d.%H%M.log
|
160
|
+
append false
|
161
|
+
]
|
162
|
+
end
|
163
|
+
end
|
241
164
|
end
|
242
165
|
end
|
243
166
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-04-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: appraisal
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -53,47 +53,53 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: snappy
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 0.0.13
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 0.0.13
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: fluentd
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.
|
76
|
-
|
75
|
+
version: 0.10.59
|
76
|
+
- - "<"
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 0.14.0
|
79
|
+
type: :runtime
|
77
80
|
prerelease: false
|
78
81
|
version_requirements: !ruby/object:Gem::Requirement
|
79
82
|
requirements:
|
80
83
|
- - ">="
|
81
84
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.
|
85
|
+
version: 0.10.59
|
86
|
+
- - "<"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 0.14.0
|
83
89
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
90
|
+
name: fluent-mixin-plaintextformatter
|
85
91
|
requirement: !ruby/object:Gem::Requirement
|
86
92
|
requirements:
|
87
93
|
- - ">="
|
88
94
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
95
|
+
version: 0.2.1
|
90
96
|
type: :runtime
|
91
97
|
prerelease: false
|
92
98
|
version_requirements: !ruby/object:Gem::Requirement
|
93
99
|
requirements:
|
94
100
|
- - ">="
|
95
101
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
102
|
+
version: 0.2.1
|
97
103
|
- !ruby/object:Gem::Dependency
|
98
104
|
name: webhdfs
|
99
105
|
requirement: !ruby/object:Gem::Requirement
|
@@ -163,12 +169,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
163
169
|
version: '0'
|
164
170
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
165
171
|
requirements:
|
166
|
-
- - "
|
172
|
+
- - ">="
|
167
173
|
- !ruby/object:Gem::Version
|
168
|
-
version:
|
174
|
+
version: '0'
|
169
175
|
requirements: []
|
170
176
|
rubyforge_project:
|
171
|
-
rubygems_version: 2.6.
|
177
|
+
rubygems_version: 2.6.11
|
172
178
|
signing_key:
|
173
179
|
specification_version: 4
|
174
180
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|