fluent-plugin-webhdfs 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +13 -0
- data/README.md +15 -0
- data/fluent-plugin-webhdfs.gemspec +1 -1
- data/lib/fluent/plugin/out_webhdfs.rb +28 -5
- data/test/plugin/test_out_webhdfs.rb +8 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56b8e7efe377002f3f092f79152826975098b9f8
|
4
|
+
data.tar.gz: 77ef93b880901d5136980b63125f0a36b620fd9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8f2660f0c17f7956fba570deea3ed4a3301e8ea54f9b3f0e67232c5f24ba508949cfd259398eb22d4a20be1d4a83e1637ddf68c730919e619872b2382d5eac2
|
7
|
+
data.tar.gz: 3d834d669fab4f4986e46a9e439afe983b53264dd7555db1c48c3565ac4a3cbf82b3d5ebb5c90d0259edde0463b0cfadffde2b93f8a9020e877fe9160e3cc52b
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -153,6 +153,21 @@ If you were usging unstable NameNodes and have wanted to ignore NameNode errors
|
|
153
153
|
ignore_start_check_error true
|
154
154
|
</match>
|
155
155
|
|
156
|
+
### For unstable Datanodes
|
157
|
+
|
158
|
+
With unstable datanodes that frequently downs, appending over WebHDFS may produce broken files. In such cases, specify `append no` and `${chunk_id}` parameter.
|
159
|
+
|
160
|
+
<match access.**>
|
161
|
+
type webhdfs
|
162
|
+
host namenode.your.cluster.local
|
163
|
+
port 50070
|
164
|
+
|
165
|
+
append no
|
166
|
+
path /log/access/%Y%m%d/${hostname}.${chunk_id}.log
|
167
|
+
</match>
|
168
|
+
|
169
|
+
`out_webhdfs` creates new files on hdfs per flush of fluentd, with chunk id. You shouldn't care broken files from append operations.
|
170
|
+
|
156
171
|
## TODO
|
157
172
|
|
158
173
|
* configuration example for Hadoop Namenode HA
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-webhdfs"
|
5
|
-
gem.version = "0.2.
|
5
|
+
gem.version = "0.2.1"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
|
@@ -39,6 +39,10 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
39
39
|
|
40
40
|
config_param :default_tag, :string, :default => 'tag_missing'
|
41
41
|
|
42
|
+
config_param :append, :bool, :default => true
|
43
|
+
|
44
|
+
CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
|
45
|
+
|
42
46
|
def initialize
|
43
47
|
super
|
44
48
|
require 'net/http'
|
@@ -91,6 +95,12 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
91
95
|
else
|
92
96
|
@client_standby = nil
|
93
97
|
end
|
98
|
+
|
99
|
+
if not @append
|
100
|
+
if @path.index(CHUNK_ID_PLACE_HOLDER).nil?
|
101
|
+
raise Fluent::ConfigError, "path must contain ${chunk_id}, which is the placeholder for chunk_id, when append is set to false."
|
102
|
+
end
|
103
|
+
end
|
94
104
|
end
|
95
105
|
|
96
106
|
def prepare_client(host, port, username)
|
@@ -160,18 +170,31 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
|
|
160
170
|
end
|
161
171
|
end
|
162
172
|
|
173
|
+
def chunk_unique_id_to_str(unique_id)
|
174
|
+
unique_id.unpack('C*').map{|x| x.to_s(16).rjust(2,'0')}.join('')
|
175
|
+
end
|
176
|
+
|
163
177
|
# TODO check conflictions
|
164
178
|
|
165
179
|
def send_data(path, data)
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
180
|
+
if @append
|
181
|
+
begin
|
182
|
+
@client.append(path, data)
|
183
|
+
rescue WebHDFS::FileNotFoundError
|
184
|
+
@client.create(path, data)
|
185
|
+
end
|
186
|
+
else
|
187
|
+
@client.create(path, data, {'overwrite' => 'true'})
|
170
188
|
end
|
171
189
|
end
|
172
190
|
|
173
191
|
def write(chunk)
|
174
|
-
hdfs_path =
|
192
|
+
hdfs_path = if @append
|
193
|
+
path_format(chunk.key)
|
194
|
+
else
|
195
|
+
path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
|
196
|
+
end
|
197
|
+
|
175
198
|
failovered = false
|
176
199
|
begin
|
177
200
|
send_data(hdfs_path, chunk.read)
|
@@ -64,5 +64,13 @@ path /hdfs/path/file.%Y%m%d.%H%M.log
|
|
64
64
|
assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
|
65
65
|
assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
|
66
66
|
assert_equal '/hdfs/path/file.20120718.1503.log', d.instance.path_format('201207181503')
|
67
|
+
|
68
|
+
assert_raise Fluent::ConfigError do
|
69
|
+
d = create_driver %[
|
70
|
+
namenode server.local:14000
|
71
|
+
path /hdfs/path/file.%Y%m%d.%H%M.log
|
72
|
+
append false
|
73
|
+
]
|
74
|
+
end
|
67
75
|
end
|
68
76
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-webhdfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -88,6 +88,7 @@ extensions: []
|
|
88
88
|
extra_rdoc_files: []
|
89
89
|
files:
|
90
90
|
- .gitignore
|
91
|
+
- .travis.yml
|
91
92
|
- Gemfile
|
92
93
|
- LICENSE.txt
|
93
94
|
- README.md
|
@@ -115,10 +116,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
115
116
|
version: '0'
|
116
117
|
requirements: []
|
117
118
|
rubyforge_project:
|
118
|
-
rubygems_version: 2.0.
|
119
|
+
rubygems_version: 2.0.2
|
119
120
|
signing_key:
|
120
121
|
specification_version: 4
|
121
122
|
summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
|
122
123
|
test_files:
|
123
124
|
- test/helper.rb
|
124
125
|
- test/plugin/test_out_webhdfs.rb
|
126
|
+
has_rdoc:
|