fluent-plugin-webhdfs 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-webhdfs"
4
- gem.version = "0.0.2"
4
+ gem.version = "0.0.3"
5
5
  gem.authors = ["TAGOMORI Satoshi"]
6
6
  gem.email = ["tagomoris@gmail.com"]
7
7
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -15,7 +15,9 @@ Gem::Specification.new do |gem|
15
15
 
16
16
  gem.add_development_dependency "rake"
17
17
  gem.add_development_dependency "fluentd"
18
+ gem.add_development_dependency "fluent-mixin-plaintextformatter"
18
19
  gem.add_development_dependency "webhdfs", '>= 0.5.0'
19
20
  gem.add_runtime_dependency "fluentd"
21
+ gem.add_runtime_dependency "fluent-mixin-plaintextformatter"
20
22
  gem.add_runtime_dependency "webhdfs", '>= 0.5.0'
21
23
  end
@@ -1,12 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
2
 
3
- require_relative 'ext_mixin'
3
+ require 'fluent/mixin/plaintextformatter'
4
4
 
5
5
  class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
6
6
  Fluent::Plugin.register_output('webhdfs', self)
7
7
 
8
- WEBHDFS_VERSION = 'v1'
9
-
10
8
  config_set_default :buffer_type, 'memory'
11
9
  config_set_default :time_slice_format, '%Y%m%d'
12
10
 
@@ -16,13 +14,7 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
16
14
 
17
15
  config_param :httpfs, :bool, :default => false
18
16
 
19
- include FluentExt::PlainTextFormatterMixin
20
- config_set_default :output_include_time, true
21
- config_set_default :output_include_tag, true
22
- config_set_default :output_data_type, 'json'
23
- config_set_default :field_separator, "\t"
24
- config_set_default :add_newline, true
25
- config_set_default :remove_prefix, nil
17
+ include Fluent::Mixin::PlainTextFormatter
26
18
 
27
19
  def initialize
28
20
  super
@@ -54,12 +46,6 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
54
46
  end
55
47
  @conn = nil
56
48
 
57
- @f_separator = case @field_separator
58
- when 'SPACE' then ' '
59
- when 'COMMA' then ','
60
- else "\t"
61
- end
62
-
63
49
  # path => cached_url
64
50
  # @cached_datanode_urls = {}
65
51
  @client = WebHDFS::Client.new(@namenode_host, @namenode_port, @username)
@@ -91,10 +77,8 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
91
77
  record.to_json
92
78
  end
93
79
 
94
- def format(tag, time, record)
95
- time_str = @timef.format(time)
96
- time_str + @f_separator + tag + @f_separator + record_to_string(record) + @line_end
97
- end
80
+ # def format(tag, time, record)
81
+ # end
98
82
 
99
83
  def path_format(chunk_key)
100
84
  Time.strptime(chunk_key, @time_slice_format).strftime(@path)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-18 00:00:00.000000000 Z
12
+ date: 2012-07-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -43,6 +43,22 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: fluent-mixin-plaintextformatter
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
46
62
  - !ruby/object:Gem::Dependency
47
63
  name: webhdfs
48
64
  requirement: !ruby/object:Gem::Requirement
@@ -75,6 +91,22 @@ dependencies:
75
91
  - - ! '>='
76
92
  - !ruby/object:Gem::Version
77
93
  version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: fluent-mixin-plaintextformatter
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
78
110
  - !ruby/object:Gem::Dependency
79
111
  name: webhdfs
80
112
  requirement: !ruby/object:Gem::Requirement
@@ -104,7 +136,6 @@ files:
104
136
  - README.md
105
137
  - Rakefile
106
138
  - fluent-plugin-webhdfs.gemspec
107
- - lib/fluent/plugin/ext_mixin.rb
108
139
  - lib/fluent/plugin/out_webhdfs.rb
109
140
  homepage: https://github.com/tagomoris/fluent-plugin-webhdfs
110
141
  licenses: []
@@ -1,207 +0,0 @@
1
- module FluentExt; end
2
-
3
- module FluentExt::PlainTextFormatterMixin
4
- #TODO: tests!
5
-
6
- # config_param :output_data_type, :string, :default => 'json' # or 'attr:field' or 'attr:field1,field2,field3(...)'
7
-
8
- attr_accessor :output_include_time, :output_include_tag, :output_data_type
9
- attr_accessor :add_newline, :field_separator
10
- attr_accessor :remove_prefix, :default_tag
11
-
12
- attr_accessor :f_separator
13
-
14
- def configure(conf)
15
- super
16
-
17
- @output_include_time = Fluent::Config.bool_value(conf['output_include_time'])
18
- @output_include_time = true if @output_include_time.nil?
19
-
20
- @output_include_tag = Fluent::Config.bool_value(conf['output_include_tag'])
21
- @output_include_tag = true if @output_include_tag.nil?
22
-
23
- @output_data_type = conf['output_data_type']
24
- @output_data_type = 'json' if @output_data_type.nil?
25
-
26
- @f_separator = case conf['field_separator']
27
- when 'SPACE' then ' '
28
- when 'COMMA' then ','
29
- else "\t"
30
- end
31
- @add_newline = Fluent::Config.bool_value(conf['add_newline'])
32
- if @add_newline.nil?
33
- @add_newline = true
34
- end
35
-
36
- @remove_prefix = conf['remove_prefix']
37
- if @remove_prefix
38
- @removed_prefix_string = @remove_prefix + '.'
39
- @removed_length = @removed_prefix_string.length
40
- end
41
- if @output_include_tag and @remove_prefix and @remove_prefix.length > 0
42
- @default_tag = conf['default_tag']
43
- if @default_tag.nil? or @default_tag.length < 1
44
- raise Fluent::ConfigError, "Missing 'default_tag' with output_include_tag and remove_prefix."
45
- end
46
- end
47
-
48
- # default timezone: utc
49
- if conf['localtime'].nil? and conf['utc'].nil?
50
- @utc = true
51
- @localtime = false
52
- elsif not @localtime and not @utc
53
- @utc = true
54
- @localtime = false
55
- end
56
- # mix-in default time formatter (or you can overwrite @timef on your own configure)
57
- @timef = @output_include_time ? Fluent::TimeFormatter.new(@time_format, @localtime) : nil
58
-
59
- @custom_attributes = []
60
- if @output_data_type == 'json'
61
- self.instance_eval {
62
- def stringify_record(record)
63
- record.to_json
64
- end
65
- }
66
- elsif @output_data_type =~ /^attr:(.*)$/
67
- @custom_attributes = $1.split(',')
68
- if @custom_attributes.size > 1
69
- self.instance_eval {
70
- def stringify_record(record)
71
- @custom_attributes.map{|attr| (record[attr] || 'NULL').to_s}.join(@f_separator)
72
- end
73
- }
74
- elsif @custom_attributes.size == 1
75
- self.instance_eval {
76
- def stringify_record(record)
77
- (record[@custom_attributes[0]] || 'NULL').to_s
78
- end
79
- }
80
- else
81
- raise Fluent::ConfigError, "Invalid attributes specification: '#{@output_data_type}', needs one or more attributes."
82
- end
83
- else
84
- raise Fluent::ConfigError, "Invalid output_data_type: '#{@output_data_type}'. specify 'json' or 'attr:ATTRIBUTE_NAME' or 'attr:ATTR1,ATTR2,...'"
85
- end
86
-
87
- if @output_include_time and @output_include_tag
88
- if @add_newline and @remove_prefix
89
- self.instance_eval {
90
- def format(tag,time,record)
91
- if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
92
- tag == @remove_prefix
93
- tag = tag[@removed_length..-1] || @default_tag
94
- end
95
- @timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record) + "\n"
96
- end
97
- }
98
- elsif @add_newline
99
- self.instance_eval {
100
- def format(tag,time,record)
101
- @timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record) + "\n"
102
- end
103
- }
104
- elsif @remove_prefix
105
- self.instance_eval {
106
- def format(tag,time,record)
107
- if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
108
- tag == @remove_prefix
109
- tag = tag[@removed_length..-1] || @default_tag
110
- end
111
- @timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record)
112
- end
113
- }
114
- else
115
- self.instance_eval {
116
- def format(tag,time,record)
117
- @timef.format(time) + @f_separator + tag + @f_separator + stringify_record(record)
118
- end
119
- }
120
- end
121
- elsif @output_include_time
122
- if @add_newline
123
- self.instance_eval {
124
- def format(tag,time,record);
125
- @timef.format(time) + @f_separator + stringify_record(record) + "\n"
126
- end
127
- }
128
- else
129
- self.instance_eval {
130
- def format(tag,time,record);
131
- @timef.format(time) + @f_separator + stringify_record(record)
132
- end
133
- }
134
- end
135
- elsif @output_include_tag
136
- if @add_newline and @remove_prefix
137
- self.instance_eval {
138
- def format(tag,time,record)
139
- if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
140
- tag == @remove_prefix
141
- tag = tag[@removed_length..-1] || @default_tag
142
- end
143
- tag + @f_separator + stringify_record(record) + "\n"
144
- end
145
- }
146
- elsif @add_newline
147
- self.instance_eval {
148
- def format(tag,time,record)
149
- tag + @f_separator + stringify_record(record) + "\n"
150
- end
151
- }
152
- elsif @remove_prefix
153
- self.instance_eval {
154
- def format(tag,time,record)
155
- if (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length) or
156
- tag == @remove_prefix
157
- tag = tag[@removed_length..-1] || @default_tag
158
- end
159
- tag + @f_separator + stringify_record(record)
160
- end
161
- }
162
- else
163
- self.instance_eval {
164
- def format(tag,time,record)
165
- tag + @f_separator + stringify_record(record)
166
- end
167
- }
168
- end
169
- else # without time, tag
170
- if @add_newline
171
- self.instance_eval {
172
- def format(tag,time,record);
173
- stringify_record(record) + "\n"
174
- end
175
- }
176
- else
177
- self.instance_eval {
178
- def format(tag,time,record);
179
- stringify_record(record)
180
- end
181
- }
182
- end
183
- end
184
- end
185
-
186
- def stringify_record(record)
187
- record.to_json
188
- end
189
-
190
- def format(tag, time, record)
191
- if tag == @remove_prefix or (tag[0, @removed_length] == @removed_prefix_string and tag.length > @removed_length)
192
- tag = tag[@removed_length..-1] || @default_tag
193
- end
194
- time_str = if @output_include_time
195
- @timef.format(time) + @f_separator
196
- else
197
- ''
198
- end
199
- tag_str = if @output_include_tag
200
- tag + @f_separator
201
- else
202
- ''
203
- end
204
- time_str + tag_str + stringify_record(record) + "\n"
205
- end
206
-
207
- end