fluent-plugin-webhdfs 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/fluent-plugin-webhdfs.gemspec +3 -1
- data/lib/fluent/plugin/out_webhdfs.rb +4 -20
- metadata +34 -3
- data/lib/fluent/plugin/ext_mixin.rb +0 -207
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-webhdfs"
|
4
|
-
gem.version = "0.0.
|
4
|
+
gem.version = "0.0.3"
|
5
5
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
6
|
gem.email = ["tagomoris@gmail.com"]
|
7
7
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -15,7 +15,9 @@ Gem::Specification.new do |gem|
|
|
15
15
|
|
16
16
|
gem.add_development_dependency "rake"
|
17
17
|
gem.add_development_dependency "fluentd"
|
18
|
+
gem.add_development_dependency "fluent-mixin-plaintextformatter"
|
18
19
|
gem.add_development_dependency "webhdfs", '>= 0.5.0'
|
19
20
|
gem.add_runtime_dependency "fluentd"
|
21
|
+
gem.add_runtime_dependency "fluent-mixin-plaintextformatter"
|
20
22
|
gem.add_runtime_dependency "webhdfs", '>= 0.5.0'
|
21
23
|
end
|
@@ -1,12 +1,10 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
|
3
|
-
|
3
|
+
require 'fluent/mixin/plaintextformatter'
|
4
4
|
|
5
5
|
class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
6
6
|
Fluent::Plugin.register_output('webhdfs', self)
|
7
7
|
|
8
|
-
WEBHDFS_VERSION = 'v1'
|
9
|
-
|
10
8
|
config_set_default :buffer_type, 'memory'
|
11
9
|
config_set_default :time_slice_format, '%Y%m%d'
|
12
10
|
|
@@ -16,13 +14,7 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
16
14
|
|
17
15
|
config_param :httpfs, :bool, :default => false
|
18
16
|
|
19
|
-
include
|
20
|
-
config_set_default :output_include_time, true
|
21
|
-
config_set_default :output_include_tag, true
|
22
|
-
config_set_default :output_data_type, 'json'
|
23
|
-
config_set_default :field_separator, "\t"
|
24
|
-
config_set_default :add_newline, true
|
25
|
-
config_set_default :remove_prefix, nil
|
17
|
+
include Fluent::Mixin::PlainTextFormatter
|
26
18
|
|
27
19
|
def initialize
|
28
20
|
super
|
@@ -54,12 +46,6 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
54
46
|
end
|
55
47
|
@conn = nil
|
56
48
|
|
57
|
-
@f_separator = case @field_separator
|
58
|
-
when 'SPACE' then ' '
|
59
|
-
when 'COMMA' then ','
|
60
|
-
else "\t"
|
61
|
-
end
|
62
|
-
|
63
49
|
# path => cached_url
|
64
50
|
# @cached_datanode_urls = {}
|
65
51
|
@client = WebHDFS::Client.new(@namenode_host, @namenode_port, @username)
|
@@ -91,10 +77,8 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
91
77
|
record.to_json
|
92
78
|
end
|
93
79
|
|
94
|
-
def format(tag, time, record)
|
95
|
-
|
96
|
-
time_str + @f_separator + tag + @f_separator + record_to_string(record) + @line_end
|
97
|
-
end
|
80
|
+
# def format(tag, time, record)
|
81
|
+
# end
|
98
82
|
|
99
83
|
def path_format(chunk_key)
|
100
84
|
Time.strptime(chunk_key, @time_slice_format).strftime(@path)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -43,6 +43,22 @@ dependencies:
|
|
43
43
|
- - ! '>='
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: fluent-mixin-plaintextformatter
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
46
62
|
- !ruby/object:Gem::Dependency
|
47
63
|
name: webhdfs
|
48
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,6 +91,22 @@ dependencies:
|
|
75
91
|
- - ! '>='
|
76
92
|
- !ruby/object:Gem::Version
|
77
93
|
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: fluent-mixin-plaintextformatter
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
78
110
|
- !ruby/object:Gem::Dependency
|
79
111
|
name: webhdfs
|
80
112
|
requirement: !ruby/object:Gem::Requirement
|
@@ -104,7 +136,6 @@ files:
|
|
104
136
|
- README.md
|
105
137
|
- Rakefile
|
106
138
|
- fluent-plugin-webhdfs.gemspec
|
107
|
-
- lib/fluent/plugin/ext_mixin.rb
|
108
139
|
- lib/fluent/plugin/out_webhdfs.rb
|
109
140
|
homepage: https://github.com/tagomoris/fluent-plugin-webhdfs
|
110
141
|
licenses: []
|
@@ -1,207 +0,0 @@
|
|
1
|
-
module FluentExt; end
|
2
|
-
|
3
|
-
module FluentExt::PlainTextFormatterMixin
|
4
|
-
#TODO: tests!
|
5
|
-
|
6
|
-
# config_param :output_data_type, :string, :default => 'json' # or 'attr:field' or 'attr:field1,field2,field3(...)'
|
7
|
-
|
8
|
-
attr_accessor :output_include_time, :output_include_tag, :output_data_type
|
9
|
-
attr_accessor :add_newline, :field_separator
|
10
|
-
attr_accessor :remove_prefix, :default_tag
|
11
|
-
|
12
|
-
attr_accessor :f_separator
|
13
|
-
|
14
|
-
def configure(conf)
|
15
|
-
super
|
16
|
-
|
17
|
-
@output_include_time = Fluent::Config.bool_value(conf['output_include_time'])
|
18
|
-
@output_include_time = true if @output_include_time.nil?
|
19
|
-
|
20
|
-
@output_include_tag = Fluent::Config.bool_value(conf['output_include_tag'])
|
21
|
-
@output_include_tag = true if @output_include_tag.nil?
|
22
|
-
|
23
|
-
@output_data_type = conf['output_data_type']
|
24
|
-
@output_data_type = 'json' if @output_data_type.nil?
|
25
|
-
|
26
|
-
@f_separator = case conf['field_separator']
|
27
|
-
when 'SPACE' then ' '
|
28
|
-
when 'COMMA' then ','
|
29
|
-
else "\t"
|
30
|
-
end
|
31
|
-
@add_newline = Fluent::Config.bool_value(conf['add_newline'])
|
32
|
-
if @add_newline.nil?
|
33
|
-
@add_newline = true
|
34
|
-
end
|
35
|
-
|
36
|
-
@remove_prefix = conf['remove_prefix']
|
37
|
-
if @remove_prefix
|
38
|
-
@removed_prefix_string = @remove_prefix + '.'
|
39
|
-
@removed_length = @removed_prefix_string.length
|
40
|
-
end
|
41
|
-
if @output_include_tag and @remove_prefix and @remove_prefix.length > 0
|
42
|
-
@default_tag = conf['default_tag']
|
43
|
-
if @default_tag.nil? or @default_tag.length < 1
|
44
|
-
raise Fluent::ConfigError, "Missing 'default_tag' with output_include_tag and remove_prefix."
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
# default timezone: utc
|
49
|
-
if conf['localtime'].nil? and conf['utc'].nil?
|
50
|
-
@utc = true
|
51
|
-
@localtime = false
|
52
|
-
elsif not @localtime and not @utc
|
53
|
-
@utc = true
|
54
|
-
@localtime = false
|
55
|
-
end
|
56
|
-
# mix-in default time formatter (or you can overwrite @timef on your own configure)
|
57
|
-
@timef = @output_include_time ? Fluent::TimeFormatter.new(@time_format, @localtime) : nil
|
58
|
-
|
59
|
-
@custom_attributes = []
|
60
|
-
if @output_data_type == 'json'
|
61
|
-
self.instance_eval {
|
62
|
-
def stringify_record(record)
|
63
|
-
record.to_json
|
64
|
-
end
|
65
|
-
}
|
66
|
-
elsif @output_data_type =~ /^attr:(.*)$/
|
67
|
-
@custom_attributes = $1.split(',')
|
68
|
-
if @custom_attributes.size > 1
|
69
|
-
self.instance_eval {
|
70
|
-
def stringify_record(record)
|
71
|
-
@custom_attributes.map{|attr| (record[attr] || 'NULL').to_s}.join(@f_separator)
|
72
|
-
end
|
73
|
-
}
|
74
|
-
elsif @custom_attributes.size == 1
|
75
|
-
self.instance_eval {
|
76
|
-
def stringify_record(record)
|
77
|
-
(record[@custom_attributes[0]] || 'NULL').to_s
|
78
|
-
end
|
79
|
-
}
|
80
|
-
else
|
81
|
-
raise Fluent::ConfigError, "Invalid attributes specification: '#{@output_data_type}', needs one or more attributes."
|
82
|
-
end
|
83
|
-
else
|
84
|
-
raise Fluent::ConfigError, "Invalid output_data_type: '#{@output_data_type}'. specify 'json' or 'attr:ATTRIBUTE_NAME' or 'attr:ATTR1,ATTR2,...'"
|
85
|
-
end
|
86
|
-
|
87
|
-
if @output_include_time and @output_include_tag
|
88
|
-
if @add_newline and @remove_prefix
|
89
|
-
self.instance_eval {
|
90
|
-
def format(tag,time,record)
|
91
|
-
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
92
|
-
tag == @remove_prefix
|
93
|
-
tag = tag[@removed_length..-1] || @default_tag
|
94
|
-
end
|
95
|
-
@timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record) + "\n"
|
96
|
-
end
|
97
|
-
}
|
98
|
-
elsif @add_newline
|
99
|
-
self.instance_eval {
|
100
|
-
def format(tag,time,record)
|
101
|
-
@timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record) + "\n"
|
102
|
-
end
|
103
|
-
}
|
104
|
-
elsif @remove_prefix
|
105
|
-
self.instance_eval {
|
106
|
-
def format(tag,time,record)
|
107
|
-
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
108
|
-
tag == @remove_prefix
|
109
|
-
tag = tag[@removed_length..-1] || @default_tag
|
110
|
-
end
|
111
|
-
@timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record)
|
112
|
-
end
|
113
|
-
}
|
114
|
-
else
|
115
|
-
self.instance_eval {
|
116
|
-
def format(tag,time,record)
|
117
|
-
@timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record)
|
118
|
-
end
|
119
|
-
}
|
120
|
-
end
|
121
|
-
elsif @output_include_time
|
122
|
-
if @add_newline
|
123
|
-
self.instance_eval {
|
124
|
-
def format(tag,time,record);
|
125
|
-
@timef.format(time) + @f_separator + stringify_record(record) + "\n"
|
126
|
-
end
|
127
|
-
}
|
128
|
-
else
|
129
|
-
self.instance_eval {
|
130
|
-
def format(tag,time,record);
|
131
|
-
@timef.format(time) + @f_separator + stringify_record(record)
|
132
|
-
end
|
133
|
-
}
|
134
|
-
end
|
135
|
-
elsif @output_include_tag
|
136
|
-
if @add_newline and @remove_prefix
|
137
|
-
self.instance_eval {
|
138
|
-
def format(tag,time,record)
|
139
|
-
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
140
|
-
tag == @remove_prefix
|
141
|
-
tag = tag[@removed_length..-1] || @default_tag
|
142
|
-
end
|
143
|
-
tag + @f_separator + stringify_record(record) + "\n"
|
144
|
-
end
|
145
|
-
}
|
146
|
-
elsif @add_newline
|
147
|
-
self.instance_eval {
|
148
|
-
def format(tag,time,record)
|
149
|
-
tag + @f_separator + stringify_record(record) + "\n"
|
150
|
-
end
|
151
|
-
}
|
152
|
-
elsif @remove_prefix
|
153
|
-
self.instance_eval {
|
154
|
-
def format(tag,time,record)
|
155
|
-
if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
|
156
|
-
tag == @remove_prefix
|
157
|
-
tag = tag[@removed_length..-1] || @default_tag
|
158
|
-
end
|
159
|
-
tag + @f_separator + stringify_record(record)
|
160
|
-
end
|
161
|
-
}
|
162
|
-
else
|
163
|
-
self.instance_eval {
|
164
|
-
def format(tag,time,record)
|
165
|
-
tag + @f_separator + stringify_record(record)
|
166
|
-
end
|
167
|
-
}
|
168
|
-
end
|
169
|
-
else # without time, tag
|
170
|
-
if @add_newline
|
171
|
-
self.instance_eval {
|
172
|
-
def format(tag,time,record);
|
173
|
-
stringify_record(record) + "\n"
|
174
|
-
end
|
175
|
-
}
|
176
|
-
else
|
177
|
-
self.instance_eval {
|
178
|
-
def format(tag,time,record);
|
179
|
-
stringify_record(record)
|
180
|
-
end
|
181
|
-
}
|
182
|
-
end
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
def stringify_record(record)
|
187
|
-
record.to_json
|
188
|
-
end
|
189
|
-
|
190
|
-
def format(tag, time, record)
|
191
|
-
if tag == @remove_prefix or (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length)
|
192
|
-
tag = tag[@removed_length..-1] || @default_tag
|
193
|
-
end
|
194
|
-
time_str = if @output_include_time
|
195
|
-
@timef.format(time) + @f_separator
|
196
|
-
else
|
197
|
-
''
|
198
|
-
end
|
199
|
-
tag_str = if @output_include_tag
|
200
|
-
tag + @f_separator
|
201
|
-
else
|
202
|
-
''
|
203
|
-
end
|
204
|
-
time_str + tag_str + stringify_record(record) + "\n"
|
205
|
-
end
|
206
|
-
|
207
|
-
end
|