fluent-plugin-webhdfs 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +25 -2
- data/fluent-plugin-webhdfs.gemspec +3 -1
- data/lib/fluent/plugin/out_webhdfs.rb +3 -0
- data/test/plugin/test_out_webhdfs.rb +9 -0
- metadata +34 -2
data/README.md
CHANGED
@@ -63,10 +63,33 @@ Store data as TSV (TAB separated values) of specified keys, without time, with t
|
|
63
63
|
|
64
64
|
If message doesn't have specified attribute, fluent-plugin-webhdfs outputs 'NULL' instead of values.
|
65
65
|
|
66
|
+
### Performance notifications
|
67
|
+
|
68
|
+
Writing data on HDFS single file from 2 or more fluentd nodes, makes many bad blocks of HDFS. If you want to run 2 or more fluentd nodes with fluent-plugin-webhdfs, you should configure 'path' for each node.
|
69
|
+
You can use '${hostname}' or '${uuid:random}' placeholders in configuration for this purpose.
|
70
|
+
|
71
|
+
For hostname:
|
72
|
+
|
73
|
+
<match access.**>
|
74
|
+
type webhdfs
|
75
|
+
host namenode.your.cluster.local
|
76
|
+
port 50070
|
77
|
+
path /log/access/%Y%m%d/${hostname}.log
|
78
|
+
</match>
|
79
|
+
|
80
|
+
Or with random filename (to avoid duplicated file name only):
|
81
|
+
|
82
|
+
<match access.**>
|
83
|
+
type webhdfs
|
84
|
+
host namenode.your.cluster.local
|
85
|
+
port 50070
|
86
|
+
path /log/access/%Y%m%d/${uuid:random}.log
|
87
|
+
</match>
|
88
|
+
|
89
|
+
With configurations above, you can handle all of files of '/log/access/20120820/*' as specified timeslice access logs.
|
90
|
+
|
66
91
|
## TODO
|
67
92
|
|
68
|
-
* long run test
|
69
|
-
* over webhdfs and httpfs
|
70
93
|
* patches welcome!
|
71
94
|
|
72
95
|
## Copyright
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-webhdfs"
|
4
|
-
gem.version = "0.0
|
4
|
+
gem.version = "0.1.0"
|
5
5
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
6
|
gem.email = ["tagomoris@gmail.com"]
|
7
7
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -16,8 +16,10 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.add_development_dependency "rake"
|
17
17
|
gem.add_development_dependency "fluentd"
|
18
18
|
gem.add_development_dependency "fluent-mixin-plaintextformatter"
|
19
|
+
gem.add_development_dependency "fluent-mixin-config-placeholders"
|
19
20
|
gem.add_development_dependency "webhdfs", '>= 0.5.1'
|
20
21
|
gem.add_runtime_dependency "fluentd"
|
21
22
|
gem.add_runtime_dependency "fluent-mixin-plaintextformatter"
|
23
|
+
gem.add_runtime_dependency "fluent-mixin-config-placeholders"
|
22
24
|
gem.add_runtime_dependency "webhdfs", '>= 0.5.1'
|
23
25
|
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
|
3
|
+
require 'fluent/mixin/config_placeholders'
|
3
4
|
require 'fluent/mixin/plaintextformatter'
|
4
5
|
|
5
6
|
class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
@@ -12,6 +13,8 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
12
13
|
config_param :port, :integer, :default => 50070
|
13
14
|
config_param :namenode, :string, :default => nil # host:port
|
14
15
|
|
16
|
+
include Fluent::Mixin::ConfigPlaceholders
|
17
|
+
|
15
18
|
config_param :path, :string
|
16
19
|
config_param :username, :string, :default => nil
|
17
20
|
|
@@ -41,6 +41,15 @@ username hdfs_user
|
|
41
41
|
assert_equal 'hdfs_user', d.instance.username
|
42
42
|
end
|
43
43
|
|
44
|
+
def test_configure_placeholders
|
45
|
+
d = create_driver %[
|
46
|
+
hostname testing.node.local
|
47
|
+
namenode server.local:50070
|
48
|
+
path /hdfs/${hostname}/file.%Y%m%d%H.log
|
49
|
+
]
|
50
|
+
assert_equal '/hdfs/testing.node.local/file.%Y%m%d%H.log', d.instance.path
|
51
|
+
end
|
52
|
+
|
44
53
|
def test_path_format
|
45
54
|
d = create_driver
|
46
55
|
assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -59,6 +59,22 @@ dependencies:
|
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: fluent-mixin-config-placeholders
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
62
78
|
- !ruby/object:Gem::Dependency
|
63
79
|
name: webhdfs
|
64
80
|
requirement: !ruby/object:Gem::Requirement
|
@@ -107,6 +123,22 @@ dependencies:
|
|
107
123
|
- - ! '>='
|
108
124
|
- !ruby/object:Gem::Version
|
109
125
|
version: '0'
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: fluent-mixin-config-placeholders
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
type: :runtime
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
110
142
|
- !ruby/object:Gem::Dependency
|
111
143
|
name: webhdfs
|
112
144
|
requirement: !ruby/object:Gem::Requirement
|