logstash-output-webhdfs 3.0.2 → 3.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c13f13f2ad2d514bf729af26ee643ac543bcd0fd
4
- data.tar.gz: 9cb1db9b084c09fde21b77b96ff54dc38ae245e2
2
+ SHA256:
3
+ metadata.gz: dae08573b1282fafd1e124aaf6a206fe1820227ef0572d74184b6d21861bf8d2
4
+ data.tar.gz: 74d5265260ccdf7b329b7a4d8cef93c951d37005993328755fb95388e509a03c
5
5
  SHA512:
6
- metadata.gz: 22cce8a3b643ce79ca9f72b2b921d5c3e31c86236f6cefc34491958aadd8ce78d5284de80160864fba84a254e366e8ab00853d5c5fdf9e4777f1430100b2aea2
7
- data.tar.gz: 2d7b31dde2311f43720c5d8b96f141e29ce3dcfd412d907f472d81e848516e3ab59af65240590f251379d658dd6b65e4ae978b995a58d2312bc344c7ab86c912
6
+ metadata.gz: 58ec908b7dfc70f6a77475295d2a7e7cbc2d6c6c7bfa31ad9e179b21b68f723a0e4fca178f73588e4b79b58a896201eddd30dc24dcbafe69c72923ac06f0fc34
7
+ data.tar.gz: d6ebb6c0a8908e458a1dbc086f1c55eadba606d6c992e240f8775cda8bd1b135f1831eb4fc5a808521970a326dc82865370869cef5a9b31871cac3d9662c7f40
data/Gemfile CHANGED
@@ -1,2 +1,11 @@
1
1
  source 'https://rubygems.org'
2
- gemspec
2
+
3
+ gemspec
4
+
5
+ logstash_path = ENV["LOGSTASH_PATH"] || "../../logstash"
6
+ use_logstash_source = ENV["LOGSTASH_SOURCE"] && ENV["LOGSTASH_SOURCE"].to_s == "1"
7
+
8
+ if Dir.exist?(logstash_path) && use_logstash_source
9
+ gem 'logstash-core', :path => "#{logstash_path}/logstash-core"
10
+ gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api"
11
+ end
@@ -0,0 +1,293 @@
1
+ :plugin: webhdfs
2
+ :type: output
3
+
4
+ ///////////////////////////////////////////
5
+ START - GENERATED VARIABLES, DO NOT EDIT!
6
+ ///////////////////////////////////////////
7
+ :version: %VERSION%
8
+ :release_date: %RELEASE_DATE%
9
+ :changelog_url: %CHANGELOG_URL%
10
+ :include_path: ../../../../logstash/docs/include
11
+ ///////////////////////////////////////////
12
+ END - GENERATED VARIABLES, DO NOT EDIT!
13
+ ///////////////////////////////////////////
14
+
15
+ [id="plugins-{type}-{plugin}"]
16
+
17
+ === Webhdfs output plugin
18
+
19
+ include::{include_path}/plugin_header.asciidoc[]
20
+
21
+ ==== Description
22
+
23
+ This plugin sends Logstash events into files in HDFS via
24
+ the https://hadoop.apache.org/docs/r1.0.4/webhdfs.html[webhdfs] REST API.
25
+
26
+ ==== Dependencies
27
+ This plugin has no dependency on jars from hadoop, thus reducing configuration and compatibility
28
+ problems. It uses the webhdfs gem from Kazuki Ohta and TAGOMORI Satoshi (@see: https://github.com/kzk/webhdfs).
29
+ Optional dependencies are zlib and snappy gem if you use the compression functionality.
30
+
31
+ ==== Operational Notes
32
+ If you get an error like:
33
+
34
+ Max write retries reached. Exception: initialize: name or service not known {:level=>:error}
35
+
36
+ make sure that the hostname of your namenode is resolvable on the host running Logstash. When creating/appending
37
+ to a file, webhdfs somtime sends a `307 TEMPORARY_REDIRECT` with the `HOSTNAME` of the machine its running on.
38
+
39
+ ==== Usage
40
+ This is an example of Logstash config:
41
+
42
+ [source,ruby]
43
+ ----------------------------------
44
+ input {
45
+ ...
46
+ }
47
+ filter {
48
+ ...
49
+ }
50
+ output {
51
+ webhdfs {
52
+ host => "127.0.0.1" # (required)
53
+ port => 50070 # (optional, default: 50070)
54
+ path => "/user/logstash/dt=%{+YYYY-MM-dd}/logstash-%{+HH}.log" # (required)
55
+ user => "hue" # (required)
56
+ }
57
+ }
58
+ ----------------------------------
59
+
60
+ [id="plugins-{type}s-{plugin}-options"]
61
+ ==== Webhdfs Output Configuration Options
62
+
63
+ This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
64
+
65
+ [cols="<,<,<",options="header",]
66
+ |=======================================================================
67
+ |Setting |Input type|Required
68
+ | <<plugins-{type}s-{plugin}-compression>> |<<string,string>>, one of `["none", "snappy", "gzip"]`|No
69
+ | <<plugins-{type}s-{plugin}-flush_size>> |<<number,number>>|No
70
+ | <<plugins-{type}s-{plugin}-host>> |<<string,string>>|Yes
71
+ | <<plugins-{type}s-{plugin}-idle_flush_time>> |<<number,number>>|No
72
+ | <<plugins-{type}s-{plugin}-kerberos_keytab>> |<<string,string>>|No
73
+ | <<plugins-{type}s-{plugin}-open_timeout>> |<<number,number>>|No
74
+ | <<plugins-{type}s-{plugin}-path>> |<<string,string>>|Yes
75
+ | <<plugins-{type}s-{plugin}-port>> |<<number,number>>|No
76
+ | <<plugins-{type}s-{plugin}-read_timeout>> |<<number,number>>|No
77
+ | <<plugins-{type}s-{plugin}-retry_interval>> |<<number,number>>|No
78
+ | <<plugins-{type}s-{plugin}-retry_known_errors>> |<<boolean,boolean>>|No
79
+ | <<plugins-{type}s-{plugin}-retry_times>> |<<number,number>>|No
80
+ | <<plugins-{type}s-{plugin}-single_file_per_thread>> |<<boolean,boolean>>|No
81
+ | <<plugins-{type}s-{plugin}-snappy_bufsize>> |<<number,number>>|No
82
+ | <<plugins-{type}s-{plugin}-snappy_format>> |<<string,string>>, one of `["stream", "file"]`|No
83
+ | <<plugins-{type}s-{plugin}-ssl_cert>> |<<string,string>>|No
84
+ | <<plugins-{type}s-{plugin}-ssl_key>> |<<string,string>>|No
85
+ | <<plugins-{type}s-{plugin}-standby_host>> |<<string,string>>|No
86
+ | <<plugins-{type}s-{plugin}-standby_port>> |<<number,number>>|No
87
+ | <<plugins-{type}s-{plugin}-use_httpfs>> |<<boolean,boolean>>|No
88
+ | <<plugins-{type}s-{plugin}-use_kerberos_auth>> |<<boolean,boolean>>|No
89
+ | <<plugins-{type}s-{plugin}-use_ssl_auth>> |<<boolean,boolean>>|No
90
+ | <<plugins-{type}s-{plugin}-user>> |<<string,string>>|Yes
91
+ |=======================================================================
92
+
93
+ Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
94
+ output plugins.
95
+
96
+ &nbsp;
97
+
98
+ [id="plugins-{type}s-{plugin}-compression"]
99
+ ===== `compression`
100
+
101
+ * Value can be any of: `none`, `snappy`, `gzip`
102
+ * Default value is `"none"`
103
+
104
+ Compress output. One of ['none', 'snappy', 'gzip']
105
+
106
+ [id="plugins-{type}s-{plugin}-flush_size"]
107
+ ===== `flush_size`
108
+
109
+ * Value type is <<number,number>>
110
+ * Default value is `500`
111
+
112
+ Sending data to webhdfs if event count is above, even if `store_interval_in_secs` is not reached.
113
+
114
+ [id="plugins-{type}s-{plugin}-host"]
115
+ ===== `host`
116
+
117
+ * This is a required setting.
118
+ * Value type is <<string,string>>
119
+ * There is no default value for this setting.
120
+
121
+ The server name for webhdfs/httpfs connections.
122
+
123
+ [id="plugins-{type}s-{plugin}-idle_flush_time"]
124
+ ===== `idle_flush_time`
125
+
126
+ * Value type is <<number,number>>
127
+ * Default value is `1`
128
+
129
+ Sending data to webhdfs in x seconds intervals.
130
+
131
+ [id="plugins-{type}s-{plugin}-kerberos_keytab"]
132
+ ===== `kerberos_keytab`
133
+
134
+ * Value type is <<string,string>>
135
+ * There is no default value for this setting.
136
+
137
+ Set kerberos keytab file. Note that the gssapi library needs to be available to use this.
138
+
139
+ [id="plugins-{type}s-{plugin}-open_timeout"]
140
+ ===== `open_timeout`
141
+
142
+ * Value type is <<number,number>>
143
+ * Default value is `30`
144
+
145
+ WebHdfs open timeout, default 30s.
146
+
147
+ [id="plugins-{type}s-{plugin}-path"]
148
+ ===== `path`
149
+
150
+ * This is a required setting.
151
+ * Value type is <<string,string>>
152
+ * There is no default value for this setting.
153
+
154
+ The path to the file to write to. Event fields can be used here,
155
+ as well as date fields in the joda time format, e.g.:
156
+ `/user/logstash/dt=%{+YYYY-MM-dd}/%{@source_host}-%{+HH}.log`
157
+
158
+ [id="plugins-{type}s-{plugin}-port"]
159
+ ===== `port`
160
+
161
+ * Value type is <<number,number>>
162
+ * Default value is `50070`
163
+
164
+ The server port for webhdfs/httpfs connections.
165
+
166
+ [id="plugins-{type}s-{plugin}-read_timeout"]
167
+ ===== `read_timeout`
168
+
169
+ * Value type is <<number,number>>
170
+ * Default value is `30`
171
+
172
+ The WebHdfs read timeout, default 30s.
173
+
174
+ [id="plugins-{type}s-{plugin}-retry_interval"]
175
+ ===== `retry_interval`
176
+
177
+ * Value type is <<number,number>>
178
+ * Default value is `0.5`
179
+
180
+ How long should we wait between retries.
181
+
182
+ [id="plugins-{type}s-{plugin}-retry_known_errors"]
183
+ ===== `retry_known_errors`
184
+
185
+ * Value type is <<boolean,boolean>>
186
+ * Default value is `true`
187
+
188
+ Retry some known webhdfs errors. These may be caused by race conditions when appending to same file, etc.
189
+
190
+ [id="plugins-{type}s-{plugin}-retry_times"]
191
+ ===== `retry_times`
192
+
193
+ * Value type is <<number,number>>
194
+ * Default value is `5`
195
+
196
+ How many times should we retry. If retry_times is exceeded, an error will be logged and the event will be discarded.
197
+
198
+ [id="plugins-{type}s-{plugin}-single_file_per_thread"]
199
+ ===== `single_file_per_thread`
200
+
201
+ * Value type is <<boolean,boolean>>
202
+ * Default value is `false`
203
+
204
+ Avoid appending to same file in multiple threads.
205
+ This solves some problems with multiple logstash output threads and locked file leases in webhdfs.
206
+ If this option is set to true, %{[@metadata][thread_id]} needs to be used in path config settting.
207
+
208
+ [id="plugins-{type}s-{plugin}-snappy_bufsize"]
209
+ ===== `snappy_bufsize`
210
+
211
+ * Value type is <<number,number>>
212
+ * Default value is `32768`
213
+
214
+ Set snappy chunksize. Only neccessary for stream format. Defaults to 32k. Max is 65536
215
+ @see http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
216
+
217
+ [id="plugins-{type}s-{plugin}-snappy_format"]
218
+ ===== `snappy_format`
219
+
220
+ * Value can be any of: `stream`, `file`
221
+ * Default value is `"stream"`
222
+
223
+ Set snappy format. One of "stream", "file". Set to stream to be hive compatible.
224
+
225
+ [id="plugins-{type}s-{plugin}-ssl_cert"]
226
+ ===== `ssl_cert`
227
+
228
+ * Value type is <<string,string>>
229
+ * There is no default value for this setting.
230
+
231
+ Set ssl cert file.
232
+
233
+ [id="plugins-{type}s-{plugin}-ssl_key"]
234
+ ===== `ssl_key`
235
+
236
+ * Value type is <<string,string>>
237
+ * There is no default value for this setting.
238
+
239
+ Set ssl key file.
240
+
241
+ [id="plugins-{type}s-{plugin}-standby_host"]
242
+ ===== `standby_host`
243
+
244
+ * Value type is <<string,string>>
245
+ * Default value is `false`
246
+
247
+ Standby namenode for ha hdfs.
248
+
249
+ [id="plugins-{type}s-{plugin}-standby_port"]
250
+ ===== `standby_port`
251
+
252
+ * Value type is <<number,number>>
253
+ * Default value is `50070`
254
+
255
+ Standby namenode port for ha hdfs.
256
+
257
+ [id="plugins-{type}s-{plugin}-use_httpfs"]
258
+ ===== `use_httpfs`
259
+
260
+ * Value type is <<boolean,boolean>>
261
+ * Default value is `false`
262
+
263
+ Use httpfs mode if set to true, else webhdfs.
264
+
265
+ [id="plugins-{type}s-{plugin}-use_kerberos_auth"]
266
+ ===== `use_kerberos_auth`
267
+
268
+ * Value type is <<boolean,boolean>>
269
+ * Default value is `false`
270
+
271
+ Set kerberos authentication.
272
+
273
+ [id="plugins-{type}s-{plugin}-use_ssl_auth"]
274
+ ===== `use_ssl_auth`
275
+
276
+ * Value type is <<boolean,boolean>>
277
+ * Default value is `false`
278
+
279
+ Set ssl authentication. Note that the openssl library needs to be available to use this.
280
+
281
+ [id="plugins-{type}s-{plugin}-user"]
282
+ ===== `user`
283
+
284
+ * This is a required setting.
285
+ * Value type is <<string,string>>
286
+ * There is no default value for this setting.
287
+
288
+ The Username for webhdfs.
289
+
290
+
291
+
292
+ [id="plugins-{type}s-{plugin}-common-options"]
293
+ include::{include_path}/{type}.asciidoc[]
@@ -111,6 +111,21 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
111
111
  # Set snappy format. One of "stream", "file". Set to stream to be hive compatible.
112
112
  config :snappy_format, :validate => ["stream", "file"], :default => "stream"
113
113
 
114
+ # Set kerberos authentication.
115
+ config :use_kerberos_auth, :validate => :boolean, :default => false
116
+
117
+ # Set kerberos keytab file. Note that the gssapi library needs to be available to use this.
118
+ config :kerberos_keytab, :validate => :string
119
+
120
+ # Set ssl authentication. Note that the openssl library needs to be available to use this.
121
+ config :use_ssl_auth, :validate => :boolean, :default => false
122
+
123
+ # Set ssl key file.
124
+ config :ssl_key, :validate => :string
125
+
126
+ # Set ssl cert file.
127
+ config :ssl_cert, :validate => :string
128
+
114
129
  ## Set codec.
115
130
  default :codec, 'line'
116
131
 
@@ -132,7 +147,7 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
132
147
  begin
133
148
  test_client(@standby_client)
134
149
  rescue => e
135
- logger.warn("Could not connect to standby namenode #{@standby_host}. Error: #{e.message}. Trying main webhdfs namenode.")
150
+ logger.warn("Could not connect to standby namenode #{@standby_client.host}. Error: #{e.message}. Trying main webhdfs namenode.")
136
151
  end
137
152
  end
138
153
  @client = prepare_client(@host, @port, @user)
@@ -144,6 +159,7 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
144
159
  raise
145
160
  else
146
161
  # If a standby host is configured, try this before giving up.
162
+ logger.error("Could not connect to #{@client.host}:#{@client.port}. Error: #{e.message}")
147
163
  do_failover
148
164
  end
149
165
  end
@@ -235,7 +251,7 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
235
251
  if not @standby_client
236
252
  return
237
253
  end
238
- @logger.warn("Got exception from #{@host}. Switching to #{@standby_host}")
254
+ @logger.warn("Failing over from #{@client.host}:#{@client.port} to #{@standby_client.host}:#{@standby_client.port}.")
239
255
  @client, @standby_client = @standby_client, @client
240
256
  end
241
257
 
@@ -23,6 +23,17 @@ module LogStash
23
23
  # @return [WebHDFS] An setup client instance
24
24
  def prepare_client(host, port, username)
25
25
  client = WebHDFS::Client.new(host, port, username)
26
+ if @use_kerberos_auth
27
+ require 'gssapi'
28
+ client.kerberos = true
29
+ client.kerberos_keytab = @kerberos_keytab
30
+ end
31
+ if @use_ssl_auth
32
+ require 'openssl'
33
+ client.ssl = true
34
+ client.ssl_key = OpenSSL::PKey::RSA.new(open(@ssl_key))
35
+ client.ssl_cert = OpenSSL::X509::Certificate.new(open(@ssl_cert))
36
+ end
26
37
  client.httpfs_mode = @use_httpfs
27
38
  client.open_timeout = @open_timeout
28
39
  client.read_timeout = @read_timeout
@@ -2,7 +2,7 @@
2
2
  Gem::Specification.new do |s|
3
3
 
4
4
  s.name = 'logstash-output-webhdfs'
5
- s.version = '3.0.2'
5
+ s.version = '3.0.3'
6
6
  s.licenses = ['Apache License (2.0)']
7
7
  s.summary = "Plugin to write events to hdfs via webhdfs."
8
8
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -12,7 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.require_paths = ["lib"]
13
13
 
14
14
  # Files
15
- s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
15
+ s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
16
16
 
17
17
  # Tests
18
18
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Björn Puttmann, loshkovskyi, Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-14 00:00:00.000000000 Z
11
+ date: 2017-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -100,7 +100,9 @@ dependencies:
100
100
  - - ">="
101
101
  - !ruby/object:Gem::Version
102
102
  version: '0'
103
- description: This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program
103
+ description: This gem is a Logstash plugin required to be installed on top of the
104
+ Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This
105
+ gem is not a stand-alone program
104
106
  email: b.puttmann@dbap.de
105
107
  executables: []
106
108
  extensions: []
@@ -112,6 +114,7 @@ files:
112
114
  - LICENSE
113
115
  - NOTICE.TXT
114
116
  - README.md
117
+ - docs/index.asciidoc
115
118
  - lib/logstash/outputs/webhdfs.rb
116
119
  - lib/logstash/outputs/webhdfs_helper.rb
117
120
  - logstash-output-webhdfs.gemspec
@@ -139,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
139
142
  version: '0'
140
143
  requirements: []
141
144
  rubyforge_project:
142
- rubygems_version: 2.6.3
145
+ rubygems_version: 2.6.11
143
146
  signing_key:
144
147
  specification_version: 4
145
148
  summary: Plugin to write events to hdfs via webhdfs.