fluent-plugin-webhdfs 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/fluent-plugin-webhdfs.gemspec +3 -1
- data/lib/fluent/plugin/out_webhdfs.rb +4 -20
- metadata +34 -3
- data/lib/fluent/plugin/ext_mixin.rb +0 -207
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-webhdfs"
|
4
|
-
gem.version = "0.0.
|
4
|
+
gem.version = "0.0.3"
|
5
5
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
6
|
gem.email = ["tagomoris@gmail.com"]
|
7
7
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -15,7 +15,9 @@ Gem::Specification.new do |gem|
|
|
15
15
|
|
16
16
|
gem.add_development_dependency "rake"
|
17
17
|
gem.add_development_dependency "fluentd"
|
18
|
+
gem.add_development_dependency "fluent-mixin-plaintextformatter"
|
18
19
|
gem.add_development_dependency "webhdfs", '>= 0.5.0'
|
19
20
|
gem.add_runtime_dependency "fluentd"
|
21
|
+
gem.add_runtime_dependency "fluent-mixin-plaintextformatter"
|
20
22
|
gem.add_runtime_dependency "webhdfs", '>= 0.5.0'
|
21
23
|
end
|
@@ -1,12 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
|
3
|
-
|
3
|
+
require 'fluent/mixin/plaintextformatter'
|
4
4
|
|
5
5
|
class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
6
6
|
Fluent::Plugin.register_output('webhdfs', self)
|
7
7
|
|
8
|
-
WEBHDFS_VERSION = 'v1'
|
9
|
-
|
10
8
|
config_set_default :buffer_type, 'memory'
|
11
9
|
config_set_default :time_slice_format, '%Y%m%d'
|
12
10
|
|
@@ -16,13 +14,7 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
16
14
|
|
17
15
|
config_param :httpfs, :bool, :default => false
|
18
16
|
|
19
|
-
include
|
20
|
-
config_set_default :output_include_time, true
|
21
|
-
config_set_default :output_include_tag, true
|
22
|
-
config_set_default :output_data_type, 'json'
|
23
|
-
config_set_default :field_separator, "\t"
|
24
|
-
config_set_default :add_newline, true
|
25
|
-
config_set_default :remove_prefix, nil
|
17
|
+
include Fluent::Mixin::PlainTextFormatter
|
26
18
|
|
27
19
|
def initialize
|
28
20
|
super
|
@@ -54,12 +46,6 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
54
46
|
end
|
55
47
|
@conn = nil
|
56
48
|
|
57
|
-
@f_separator = case @field_separator
|
58
|
-
when 'SPACE' then ' '
|
59
|
-
when 'COMMA' then ','
|
60
|
-
else "\t"
|
61
|
-
end
|
62
|
-
|
63
49
|
# path => cached_url
|
64
50
|
# @cached_datanode_urls = {}
|
65
51
|
@client = WebHDFS::Client.new(@namenode_host, @namenode_port, @username)
|
@@ -91,10 +77,8 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
91
77
|
record.to_json
|
92
78
|
end
|
93
79
|
|
94
|
-
def format(tag, time, record)
|
95
|
-
|
96
|
-
time_str + @f_separator + tag + @f_separator + record_to_string(record) + @line_end
|
97
|
-
end
|
80
|
+
# def format(tag, time, record)
|
81
|
+
# end
|
98
82
|
|
99
83
|
def path_format(chunk_key)
|
100
84
|
Time.strptime(chunk_key, @time_slice_format).strftime(@path)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -43,6 +43,22 @@ dependencies:
|
|
43
43
|
- - ! '>='
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: fluent-mixin-plaintextformatter
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
46
62
|
- !ruby/object:Gem::Dependency
|
47
63
|
name: webhdfs
|
48
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,6 +91,22 @@ dependencies:
|
|
75
91
|
- - ! '>='
|
76
92
|
- !ruby/object:Gem::Version
|
77
93
|
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: fluent-mixin-plaintextformatter
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
78
110
|
- !ruby/object:Gem::Dependency
|
79
111
|
name: webhdfs
|
80
112
|
requirement: !ruby/object:Gem::Requirement
|
@@ -104,7 +136,6 @@ files:
|
|
104
136
|
- README.md
|
105
137
|
- Rakefile
|
106
138
|
- fluent-plugin-webhdfs.gemspec
|
107
|
-
- lib/fluent/plugin/ext_mixin.rb
|
108
139
|
- lib/fluent/plugin/out_webhdfs.rb
|
109
140
|
homepage: https://github.com/tagomoris/fluent-plugin-webhdfs
|
110
141
|
licenses: []
|
@@ -1,207 +0,0 @@
|
|
1
|
-
module FluentExt; end
|
2
|
-
|
3
|
-
module FluentExt::PlainTextFormatterMixin
|
4
|
-
#TODO: tests!
|
5
|
-
|
6
|
-
# config_param :output_data_type, :string, :default => 'json' # or 'attr:field' or 'attr:field1,field2,field3(...)'
|
7
|
-
|
8
|
-
attr_accessor :output_include_time, :output_include_tag, :output_data_type
|
9
|
-
attr_accessor :add_newline, :field_separator
|
10
|
-
attr_accessor :remove_prefix, :default_tag
|
11
|
-
|
12
|
-
attr_accessor :f_separator
|
13
|
-
|
14
|
-
def configure(conf)
|
15
|
-
super
|
16
|
-
|
17
|
-
@output_include_time = Fluent::Config.bool_value(conf['output_include_time'])
|
18
|
-
@output_include_time = true if @output_include_time.nil?
|
19
|
-
|
20
|
-
@output_include_tag = Fluent::Config.bool_value(conf['output_include_tag'])
|
21
|
-
@output_include_tag = true if @output_include_tag.nil?
|
22
|
-
|
23
|
-
@output_data_type = conf['output_data_type']
|
24
|
-
@output_data_type = 'json' if @output_data_type.nil?
|
25
|
-
|
26
|
-
@f_separator = case conf['field_separator']
|
27
|
-
when 'SPACE' then ' '
|
28
|
-
when 'COMMA' then ','
|
29
|
-
else "\t"
|
30
|
-
end
|
31
|
-
@add_newline = Fluent::Config.bool_value(conf['add_newline'])
|
32
|
-
if @add_newline.nil?
|
33
|
-
@add_newline = true
|
34
|
-
end
|
35
|
-
|
36
|
-
@remove_prefix = conf['remove_prefix']
|
37
|
-
if @remove_prefix
|
38
|
-
@removed_prefix_string = @remove_prefix + '.'
|
39
|
-
@removed_length = @removed_prefix_string.length
|
40
|
-
end
|
41
|
-
if @output_include_tag and @remove_prefix and @remove_prefix.length > 0
|
42
|
-
@default_tag = conf['default_tag']
|
43
|
-
if @default_tag.nil? or @default_tag.length < 1
|
44
|
-
raise Fluent::ConfigError, "Missing 'default_tag' with output_include_tag and remove_prefix."
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
# default timezone: utc
|
49
|
-
if conf['localtime'].nil? and conf['utc'].nil?
|
50
|
-
@utc = true
|
51
|
-
@localtime = false
|
52
|
-
elsif not @localtime and not @utc
|
53
|
-
@utc = true
|
54
|
-
@localtime = false
|
55
|
-
end
|
56
|
-
# mix-in default time formatter (or you can overwrite @timef on your own configure)
|
57
|
-
@timef = @output_include_time ? Fluent::TimeFormatter.new(@time_format, @localtime) : nil
|
58
|
-
|
59
|
-
@custom_attributes = []
|
60
|
-
if @output_data_type == 'json'
|
61
|
-
self.instance_eval {
|
62
|
-
def stringify_record(record)
|
63
|
-
record.to_json
|
64
|
-
end
|
65
|
-
}
|
66
|
-
elsif @output_data_type =~ /^attr:(.*)$/
|
67
|
-
@custom_attributes = $1.split(',')
|
68
|
-
if @custom_attributes.size > 1
|
69
|
-
self.instance_eval {
|
70
|
-
def stringify_record(record)
|
71
|
-
@custom_attributes.map{|attr| (record[attr] || 'NULL').to_s}.join(@f_separator)
|
72
|
-
end
|
73
|
-
}
|
74
|
-
elsif @custom_attributes.size == 1
|
75
|
-
self.instance_eval {
|
76
|
-
def stringify_record(record)
|
77
|
-
(record[@custom_attributes[0]] || 'NULL').to_s
|
78
|
-
end
|
79
|
-
}
|
80
|
-
else
|
81
|
-
raise Fluent::ConfigError, "Invalid attributes specification: '#{@output_data_type}', needs one or more attributes."
|
82
|
-
end
|
83
|
-
else
|
84
|
-
raise Fluent::ConfigError, "Invalid output_data_type: '#{@output_data_type}'. specify 'json' or 'attr:ATTRIBUTE_NAME' or 'attr:ATTR1,ATTR2,...'"
|
85
|
-
end
|
86
|
-
|
87
|
-
if @output_include_time and @output_include_tag
|
88
|
-
if @add_newline and @remove_prefix
|
89
|
-
self.instance_eval {
|
90
|
-
def format(tag,time,record)
|
91
|
-
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
92
|
-
tag == @remove_prefix
|
93
|
-
tag = tag[@removed_length..-1] || @default_tag
|
94
|
-
end
|
95
|
-
@timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record) + "\n"
|
96
|
-
end
|
97
|
-
}
|
98
|
-
elsif @add_newline
|
99
|
-
self.instance_eval {
|
100
|
-
def format(tag,time,record)
|
101
|
-
@timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record) + "\n"
|
102
|
-
end
|
103
|
-
}
|
104
|
-
elsif @remove_prefix
|
105
|
-
self.instance_eval {
|
106
|
-
def format(tag,time,record)
|
107
|
-
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
108
|
-
tag == @remove_prefix
|
109
|
-
tag = tag[@removed_length..-1] || @default_tag
|
110
|
-
end
|
111
|
-
@timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record)
|
112
|
-
end
|
113
|
-
}
|
114
|
-
else
|
115
|
-
self.instance_eval {
|
116
|
-
def format(tag,time,record)
|
117
|
-
@timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record)
|
118
|
-
end
|
119
|
-
}
|
120
|
-
end
|
121
|
-
elsif @output_include_time
|
122
|
-
if @add_newline
|
123
|
-
self.instance_eval {
|
124
|
-
def format(tag,time,record);
|
125
|
-
@timef.format(time) + @f_separator + stringify_record(record) + "\n"
|
126
|
-
end
|
127
|
-
}
|
128
|
-
else
|
129
|
-
self.instance_eval {
|
130
|
-
def format(tag,time,record);
|
131
|
-
@timef.format(time) + @f_separator + stringify_record(record)
|
132
|
-
end
|
133
|
-
}
|
134
|
-
end
|
135
|
-
elsif @output_include_tag
|
136
|
-
if @add_newline and @remove_prefix
|
137
|
-
self.instance_eval {
|
138
|
-
def format(tag,time,record)
|
139
|
-
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
140
|
-
tag == @remove_prefix
|
141
|
-
tag = tag[@removed_length..-1] || @default_tag
|
142
|
-
end
|
143
|
-
tag + @f_separator + stringify_record(record) + "\n"
|
144
|
-
end
|
145
|
-
}
|
146
|
-
elsif @add_newline
|
147
|
-
self.instance_eval {
|
148
|
-
def format(tag,time,record)
|
149
|
-
tag + @f_separator + stringify_record(record) + "\n"
|
150
|
-
end
|
151
|
-
}
|
152
|
-
elsif @remove_prefix
|
153
|
-
self.instance_eval {
|
154
|
-
def format(tag,time,record)
|
155
|
-
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
156
|
-
tag == @remove_prefix
|
157
|
-
tag = tag[@removed_length..-1] || @default_tag
|
158
|
-
end
|
159
|
-
tag + @f_separator + stringify_record(record)
|
160
|
-
end
|
161
|
-
}
|
162
|
-
else
|
163
|
-
self.instance_eval {
|
164
|
-
def format(tag,time,record)
|
165
|
-
tag + @f_separator + stringify_record(record)
|
166
|
-
end
|
167
|
-
}
|
168
|
-
end
|
169
|
-
else # without time, tag
|
170
|
-
if @add_newline
|
171
|
-
self.instance_eval {
|
172
|
-
def format(tag,time,record);
|
173
|
-
stringify_record(record) + "\n"
|
174
|
-
end
|
175
|
-
}
|
176
|
-
else
|
177
|
-
self.instance_eval {
|
178
|
-
def format(tag,time,record);
|
179
|
-
stringify_record(record)
|
180
|
-
end
|
181
|
-
}
|
182
|
-
end
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
def stringify_record(record)
|
187
|
-
record.to_json
|
188
|
-
end
|
189
|
-
|
190
|
-
def format(tag, time, record)
|
191
|
-
if tag == @remove_prefix or (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length)
|
192
|
-
tag = tag[@removed_length..-1] || @default_tag
|
193
|
-
end
|
194
|
-
time_str = if @output_include_time
|
195
|
-
@timef.format(time) + @f_separator
|
196
|
-
else
|
197
|
-
''
|
198
|
-
end
|
199
|
-
tag_str = if @output_include_tag
|
200
|
-
tag + @f_separator
|
201
|
-
else
|
202
|
-
''
|
203
|
-
end
|
204
|
-
time_str + tag_str + stringify_record(record) + "\n"
|
205
|
-
end
|
206
|
-
|
207
|
-
end
|