fluent-plugin-webhdfs 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b109af994a373ce05a964a0b1560bfad64b08f6d
4
- data.tar.gz: 3bf1cc0de4b54d399158ead361f90825d4f8ba92
3
+ metadata.gz: 56b8e7efe377002f3f092f79152826975098b9f8
4
+ data.tar.gz: 77ef93b880901d5136980b63125f0a36b620fd9c
5
5
  SHA512:
6
- metadata.gz: e1ca791673549cc32e874360d07c57005af2a2c88e1d7b3ad6a5bd491cfa01d1724f196a793b8294eca3b8448792d3c2b148006ac61ad24b8e583050bcd2a789
7
- data.tar.gz: ad02485500aa4af1b1f8bff78e91722ebebad6387bbbe4c53e89ba7c094119693afe8ff0fa42018f9a75ac206d689c1b5fcae8b6e215305d1a92db22f476c23d
6
+ metadata.gz: d8f2660f0c17f7956fba570deea3ed4a3301e8ea54f9b3f0e67232c5f24ba508949cfd259398eb22d4a20be1d4a83e1637ddf68c730919e619872b2382d5eac2
7
+ data.tar.gz: 3d834d669fab4f4986e46a9e439afe983b53264dd7555db1c48c3565ac4a3cbf82b3d5ebb5c90d0259edde0463b0cfadffde2b93f8a9020e877fe9160e3cc52b
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - 2.0.0
7
+ - rbx-19mode
8
+
9
+ branches:
10
+ only:
11
+ - master
12
+
13
+ script: bundle exec rake test
data/README.md CHANGED
@@ -153,6 +153,21 @@ If you were usging unstable NameNodes and have wanted to ignore NameNode errors
153
153
  ignore_start_check_error true
154
154
  </match>
155
155
 
156
+ ### For unstable Datanodes
157
+
158
+ With unstable datanodes that frequently downs, appending over WebHDFS may produce broken files. In such cases, specify `append no` and `${chunk_id}` parameter.
159
+
160
+ <match access.**>
161
+ type webhdfs
162
+ host namenode.your.cluster.local
163
+ port 50070
164
+
165
+ append no
166
+ path /log/access/%Y%m%d/${hostname}.${chunk_id}.log
167
+ </match>
168
+
169
+ `out_webhdfs` creates new files on hdfs per flush of fluentd, with chunk id. You shouldn't care broken files from append operations.
170
+
156
171
  ## TODO
157
172
 
158
173
  * configuration example for Hadoop Namenode HA
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-webhdfs"
5
- gem.version = "0.2.0"
5
+ gem.version = "0.2.1"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.summary = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
@@ -39,6 +39,10 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
39
39
 
40
40
  config_param :default_tag, :string, :default => 'tag_missing'
41
41
 
42
+ config_param :append, :bool, :default => true
43
+
44
+ CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
45
+
42
46
  def initialize
43
47
  super
44
48
  require 'net/http'
@@ -91,6 +95,12 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
91
95
  else
92
96
  @client_standby = nil
93
97
  end
98
+
99
+ if not @append
100
+ if @path.index(CHUNK_ID_PLACE_HOLDER).nil?
101
+ raise Fluent::ConfigError, "path must contain ${chunk_id}, which is the placeholder for chunk_id, when append is set to false."
102
+ end
103
+ end
94
104
  end
95
105
 
96
106
  def prepare_client(host, port, username)
@@ -160,18 +170,31 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput
160
170
  end
161
171
  end
162
172
 
173
+ def chunk_unique_id_to_str(unique_id)
174
+ unique_id.unpack('C*').map{|x| x.to_s(16).rjust(2,'0')}.join('')
175
+ end
176
+
163
177
  # TODO check conflictions
164
178
 
165
179
  def send_data(path, data)
166
- begin
167
- @client.append(path, data)
168
- rescue WebHDFS::FileNotFoundError
169
- @client.create(path, data)
180
+ if @append
181
+ begin
182
+ @client.append(path, data)
183
+ rescue WebHDFS::FileNotFoundError
184
+ @client.create(path, data)
185
+ end
186
+ else
187
+ @client.create(path, data, {'overwrite' => 'true'})
170
188
  end
171
189
  end
172
190
 
173
191
  def write(chunk)
174
- hdfs_path = path_format(chunk.key)
192
+ hdfs_path = if @append
193
+ path_format(chunk.key)
194
+ else
195
+ path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
196
+ end
197
+
175
198
  failovered = false
176
199
  begin
177
200
  send_data(hdfs_path, chunk.read)
@@ -64,5 +64,13 @@ path /hdfs/path/file.%Y%m%d.%H%M.log
64
64
  assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
65
65
  assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
66
66
  assert_equal '/hdfs/path/file.20120718.1503.log', d.instance.path_format('201207181503')
67
+
68
+ assert_raise Fluent::ConfigError do
69
+ d = create_driver %[
70
+ namenode server.local:14000
71
+ path /hdfs/path/file.%Y%m%d.%H%M.log
72
+ append false
73
+ ]
74
+ end
67
75
  end
68
76
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-15 00:00:00.000000000 Z
11
+ date: 2013-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -88,6 +88,7 @@ extensions: []
88
88
  extra_rdoc_files: []
89
89
  files:
90
90
  - .gitignore
91
+ - .travis.yml
91
92
  - Gemfile
92
93
  - LICENSE.txt
93
94
  - README.md
@@ -115,10 +116,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
115
116
  version: '0'
116
117
  requirements: []
117
118
  rubyforge_project:
118
- rubygems_version: 2.0.0
119
+ rubygems_version: 2.0.2
119
120
  signing_key:
120
121
  specification_version: 4
121
122
  summary: Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting
122
123
  test_files:
123
124
  - test/helper.rb
124
125
  - test/plugin/test_out_webhdfs.rb
126
+ has_rdoc: