fluent-plugin-webhdfs 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b109af994a373ce05a964a0b1560bfad64b08f6d
4
- data.tar.gz: 3bf1cc0de4b54d399158ead361f90825d4f8ba92
3
+ metadata.gz: 56b8e7efe377002f3f092f79152826975098b9f8
4
+ data.tar.gz: 77ef93b880901d5136980b63125f0a36b620fd9c
5
5
  SHA512:
6
- metadata.gz: e1ca791673549cc32e874360d07c57005af2a2c88e1d7b3ad6a5bd491cfa01d1724f196a793b8294eca3b8448792d3c2b148006ac61ad24b8e583050bcd2a789
7
- data.tar.gz: ad02485500aa4af1b1f8bff78e91722ebebad6387bbbe4c53e89ba7c094119693afe8ff0fa42018f9a75ac206d689c1b5fcae8b6e215305d1a92db22f476c23d
6
+ metadata.gz: d8f2660f0c17f7956fba570deea3ed4a3301e8ea54f9b3f0e67232c5f24ba508949cfd259398eb22d4a20be1d4a83e1637ddf68c730919e619872b2382d5eac2
7
+ data.tar.gz: 3d834d669fab4f4986e46a9e439afe983b53264dd7555db1c48c3565ac4a3cbf82b3d5ebb5c90d0259edde0463b0cfadffde2b93f8a9020e877fe9160e3cc52b
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - 2.0.0
7
+ - rbx-19mode
8
+
9
+ branches:
10
+ only:
11
+ - master
12
+
13
+ script: bundle exec rake test
data/README.md CHANGED
@@ -153,6 +153,21 @@ If you were usging unstable NameNodes and have wanted to ignore NameNode errors
153
153
  ignore_start_check_error true
154
154
  </match>
155
155
 
156
+ ### For unstable Datanodes
157
+
158
+ With unstable datanodes that frequently downs, appending over WebHDFS may produce broken files. In such cases, specify `append no` and `${chunk_id}` parameter.
159
+
160
+ <match access.**>
161
+ type webhdfs
162
+ host namenode.your.cluster.local
163
+ port 50070
164
+
165
+ append no
166
+ path /log/access/%Y%m%d/${hostname}.${chunk_id}.log
167
+ </match>
168
+
169
+ `out_webhdfs` creates new files on hdfs per flush of fluentd, with chunk id. You shouldn't care broken files from append operations.
170
+
156
171
  ## TODO
157
172
 
158
173
  * configuration example for Hadoop Namenode HA
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "0.2.0"
5
+ gem.version = "0.2.1"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -39,6 +39,10 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
39
39
 
40
40
  config_param :default_tag, :string, :default => 'tag_missing'
41
41
 
42
+ config_param :append, :bool, :default => true
43
+
44
+ CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
45
+
42
46
  def initialize
43
47
  super
44
48
  require 'net/http'
@@ -91,6 +95,12 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
91
95
  else
92
96
  @client_standby = nil
93
97
  end
98
+
99
+ if not @append
100
+ if @path.index(CHUNK_ID_PLACE_HOLDER).nil?
101
+ raise Fluent::ConfigError, "path must contain ${chunk_id}, which is the placeholder for chunk_id, when append is set to false."
102
+ end
103
+ end
94
104
  end
95
105
 
96
106
  def prepare_client(host, port, username)
@@ -160,18 +170,31 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
160
170
  end
161
171
  end
162
172
 
173
+ def chunk_unique_id_to_str(unique_id)
174
+ unique_id.unpack('C*').map{|x| x.to_s(16).rjust(2,'0')}.join('')
175
+ end
176
+
163
177
  # TODO check conflictions
164
178
 
165
179
  def send_data(path, data)
166
- begin
167
- @client.append(path, data)
168
- rescue WebHDFS::FileNotFoundError
169
- @client.create(path, data)
180
+ if @append
181
+ begin
182
+ @client.append(path, data)
183
+ rescue WebHDFS::FileNotFoundError
184
+ @client.create(path, data)
185
+ end
186
+ else
187
+ @client.create(path, data, {'overwrite' => 'true'})
170
188
  end
171
189
  end
172
190
 
173
191
  def write(chunk)
174
- hdfs_path = path_format(chunk.key)
192
+ hdfs_path = if @append
193
+ path_format(chunk.key)
194
+ else
195
+ path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
196
+ end
197
+
175
198
  failovered = false
176
199
  begin
177
200
  send_data(hdfs_path, chunk.read)
@@ -64,5 +64,13 @@ path /hdfs/path/file.%Y%m%d.%H%M.log
64
64
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
65
65
  assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
66
66
  assert_equal '/hdfs/path/file.20120718.1503.log', d.instance.path_format('201207181503')
67
+
68
+ assert_raise Fluent::ConfigError do
69
+ d = create_driver %[
70
+ namenode server.local:14000
71
+ path /hdfs/path/file.%Y%m%d.%H%M.log
72
+ append false
73
+ ]
74
+ end
67
75
  end
68
76
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-15 00:00:00.000000000 Z
11
+ date: 2013-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -88,6 +88,7 @@ extensions: []
88
88
  extra_rdoc_files: []
89
89
  files:
90
90
  - .gitignore
91
+ - .travis.yml
91
92
  - Gemfile
92
93
  - LICENSE.txt
93
94
  - README.md
@@ -115,10 +116,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
115
116
  version: '0'
116
117
  requirements: []
117
118
  rubyforge_project:
118
- rubygems_version: 2.0.0
119
+ rubygems_version: 2.0.2
119
120
  signing_key:
120
121
  specification_version: 4
121
122
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
122
123
  test_files:
123
124
  - test/helper.rb
124
125
  - test/plugin/test_out_webhdfs.rb
126
+ has_rdoc: