fluent-plugin-webhdfs 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +13 -0
- data/README.md +15 -0
- data/fluent-plugin-webhdfs.gemspec +1 -1
- data/lib/fluent/plugin/out_webhdfs.rb +28 -5
- data/test/plugin/test_out_webhdfs.rb +8 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56b8e7efe377002f3f092f79152826975098b9f8
|
4
|
+
data.tar.gz: 77ef93b880901d5136980b63125f0a36b620fd9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8f2660f0c17f7956fba570deea3ed4a3301e8ea54f9b3f0e67232c5f24ba508949cfd259398eb22d4a20be1d4a83e1637ddf68c730919e619872b2382d5eac2
|
7
|
+
data.tar.gz: 3d834d669fab4f4986e46a9e439afe983b53264dd7555db1c48c3565ac4a3cbf82b3d5ebb5c90d0259edde0463b0cfadffde2b93f8a9020e877fe9160e3cc52b
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -153,6 +153,21 @@ If you were usging unstable NameNodes and have wanted to ignore NameNode errors
|
|
153
153
|
ignore_start_check_error true
|
154
154
|
</match>
|
155
155
|
|
156
|
+
### For unstable Datanodes
|
157
|
+
|
158
|
+
With unstable datanodes that frequently downs, appending over WebHDFS may produce broken files. In such cases, specify `append no` and `${chunk_id}` parameter.
|
159
|
+
|
160
|
+
<match access.**>
|
161
|
+
type webhdfs
|
162
|
+
host namenode.your.cluster.local
|
163
|
+
port 50070
|
164
|
+
|
165
|
+
append no
|
166
|
+
path /log/access/%Y%m%d/${hostname}.${chunk_id}.log
|
167
|
+
</match>
|
168
|
+
|
169
|
+
`out_webhdfs` creates new files on hdfs per flush of fluentd, with chunk id. You shouldn't care broken files from append operations.
|
170
|
+
|
156
171
|
## TODO
|
157
172
|
|
158
173
|
* configuration example for Hadoop Namenode HA
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "0.2.
|
5
|
+
gem.version = "0.2.1"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -39,6 +39,10 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
39
39
|
|
40
40
|
config_param :default_tag, :string, :default => 'tag_missing'
|
41
41
|
|
42
|
+
config_param :append, :bool, :default => true
|
43
|
+
|
44
|
+
CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
|
45
|
+
|
42
46
|
def initialize
|
43
47
|
super
|
44
48
|
require 'net/http'
|
@@ -91,6 +95,12 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
91
95
|
else
|
92
96
|
@client_standby = nil
|
93
97
|
end
|
98
|
+
|
99
|
+
if not @append
|
100
|
+
if @path.index(CHUNK_ID_PLACE_HOLDER).nil?
|
101
|
+
raise Fluent::ConfigError, "path must contain ${chunk_id}, which is the placeholder for chunk_id, when append is set to false."
|
102
|
+
end
|
103
|
+
end
|
94
104
|
end
|
95
105
|
|
96
106
|
def prepare_client(host, port, username)
|
@@ -160,18 +170,31 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
160
170
|
end
|
161
171
|
end
|
162
172
|
|
173
|
+
def chunk_unique_id_to_str(unique_id)
|
174
|
+
unique_id.unpack('C*').map{|x| x.to_s(16).rjust(2,'0')}.join('')
|
175
|
+
end
|
176
|
+
|
163
177
|
# TODO check conflictions
|
164
178
|
|
165
179
|
def send_data(path, data)
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
180
|
+
if @append
|
181
|
+
begin
|
182
|
+
@client.append(path, data)
|
183
|
+
rescue WebHDFS::FileNotFoundError
|
184
|
+
@client.create(path, data)
|
185
|
+
end
|
186
|
+
else
|
187
|
+
@client.create(path, data, {'overwrite' => 'true'})
|
170
188
|
end
|
171
189
|
end
|
172
190
|
|
173
191
|
def write(chunk)
|
174
|
-
hdfs_path =
|
192
|
+
hdfs_path = if @append
|
193
|
+
path_format(chunk.key)
|
194
|
+
else
|
195
|
+
path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
|
196
|
+
end
|
197
|
+
|
175
198
|
failovered = false
|
176
199
|
begin
|
177
200
|
send_data(hdfs_path, chunk.read)
|
@@ -64,5 +64,13 @@ path /hdfs/path/file.%Y%m%d.%H%M.log
|
|
64
64
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
65
65
|
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
66
66
|
assert_equal '/hdfs/path/file.20120718.1503.log', d.instance.path_format('201207181503')
|
67
|
+
|
68
|
+
assert_raise Fluent::ConfigError do
|
69
|
+
d = create_driver %[
|
70
|
+
namenode server.local:14000
|
71
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
72
|
+
append false
|
73
|
+
]
|
74
|
+
end
|
67
75
|
end
|
68
76
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -88,6 +88,7 @@ extensions: []
|
|
88
88
|
extra_rdoc_files: []
|
89
89
|
files:
|
90
90
|
- .gitignore
|
91
|
+
- .travis.yml
|
91
92
|
- Gemfile
|
92
93
|
- LICENSE.txt
|
93
94
|
- README.md
|
@@ -115,10 +116,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
115
116
|
version: '0'
|
116
117
|
requirements: []
|
117
118
|
rubyforge_project:
|
118
|
-
rubygems_version: 2.0.
|
119
|
+
rubygems_version: 2.0.2
|
119
120
|
signing_key:
|
120
121
|
specification_version: 4
|
121
122
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
122
123
|
test_files:
|
123
124
|
- test/helper.rb
|
124
125
|
- test/plugin/test_out_webhdfs.rb
|
126
|
+
has_rdoc:
|