logstash-output-webhdfs 3.0.2 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c13f13f2ad2d514bf729af26ee643ac543bcd0fd
4
- data.tar.gz: 9cb1db9b084c09fde21b77b96ff54dc38ae245e2
2
+ SHA256:
3
+ metadata.gz: dae08573b1282fafd1e124aaf6a206fe1820227ef0572d74184b6d21861bf8d2
4
+ data.tar.gz: 74d5265260ccdf7b329b7a4d8cef93c951d37005993328755fb95388e509a03c
5
5
  SHA512:
6
- metadata.gz: 22cce8a3b643ce79ca9f72b2b921d5c3e31c86236f6cefc34491958aadd8ce78d5284de80160864fba84a254e366e8ab00853d5c5fdf9e4777f1430100b2aea2
7
- data.tar.gz: 2d7b31dde2311f43720c5d8b96f141e29ce3dcfd412d907f472d81e848516e3ab59af65240590f251379d658dd6b65e4ae978b995a58d2312bc344c7ab86c912
6
+ metadata.gz: 58ec908b7dfc70f6a77475295d2a7e7cbc2d6c6c7bfa31ad9e179b21b68f723a0e4fca178f73588e4b79b58a896201eddd30dc24dcbafe69c72923ac06f0fc34
7
+ data.tar.gz: d6ebb6c0a8908e458a1dbc086f1c55eadba606d6c992e240f8775cda8bd1b135f1831eb4fc5a808521970a326dc82865370869cef5a9b31871cac3d9662c7f40
data/Gemfile CHANGED
@@ -1,2 +1,11 @@
1
1
  source 'https://rubygems.org'
2
- gemspec
2
+
3
+ gemspec
4
+
5
+ logstash_path = ENV["LOGSTASH_PATH"] || "../../logstash"
6
+ use_logstash_source = ENV["LOGSTASH_SOURCE"] && ENV["LOGSTASH_SOURCE"].to_s == "1"
7
+
8
+ if Dir.exist?(logstash_path) && use_logstash_source
9
+ gem 'logstash-core', :path => "#{logstash_path}/logstash-core"
10
+ gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api"
11
+ end
@@ -0,0 +1,293 @@
1
+ :plugin: webhdfs
2
+ :type: output
3
+
4
+ ///////////////////////////////////////////
5
+ START - GENERATED VARIABLES, DO NOT EDIT!
6
+ ///////////////////////////////////////////
7
+ :version: %VERSION%
8
+ :release_date: %RELEASE_DATE%
9
+ :changelog_url: %CHANGELOG_URL%
10
+ :include_path: ../../../../logstash/docs/include
11
+ ///////////////////////////////////////////
12
+ END - GENERATED VARIABLES, DO NOT EDIT!
13
+ ///////////////////////////////////////////
14
+
15
+ [id="plugins-{type}-{plugin}"]
16
+
17
+ === Webhdfs output plugin
18
+
19
+ include::{include_path}/plugin_header.asciidoc[]
20
+
21
+ ==== Description
22
+
23
+ This plugin sends Logstash events into files in HDFS via
24
+ the https://hadoop.apache.org/docs/r1.0.4/webhdfs.html[webhdfs] REST API.
25
+
26
+ ==== Dependencies
27
+ This plugin has no dependency on jars from hadoop, thus reducing configuration and compatibility
28
+ problems. It uses the webhdfs gem from Kazuki Ohta and TAGOMORI Satoshi (@see: https://github.com/kzk/webhdfs).
29
+ Optional dependencies are zlib and snappy gem if you use the compression functionality.
30
+
31
+ ==== Operational Notes
32
+ If you get an error like:
33
+
34
+ Max write retries reached. Exception: initialize: name or service not known {:level=>:error}
35
+
36
+ make sure that the hostname of your namenode is resolvable on the host running Logstash. When creating/appending
37
+ to a file, webhdfs somtime sends a `307 TEMPORARY_REDIRECT` with the `HOSTNAME` of the machine its running on.
38
+
39
+ ==== Usage
40
+ This is an example of Logstash config:
41
+
42
+ [source,ruby]
43
+ ----------------------------------
44
+ input {
45
+ ...
46
+ }
47
+ filter {
48
+ ...
49
+ }
50
+ output {
51
+ webhdfs {
52
+ host => "127.0.0.1" # (required)
53
+ port => 50070 # (optional, default: 50070)
54
+ path => "/user/logstash/dt=%{+YYYY-MM-dd}/logstash-%{+HH}.log" # (required)
55
+ user => "hue" # (required)
56
+ }
57
+ }
58
+ ----------------------------------
59
+
60
+ [id="plugins-{type}s-{plugin}-options"]
61
+ ==== Webhdfs Output Configuration Options
62
+
63
+ This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
64
+
65
+ [cols="<,<,<",options="header",]
66
+ |=======================================================================
67
+ |Setting |Input type|Required
68
+ | <<plugins-{type}s-{plugin}-compression>> |<<string,string>>, one of `["none", "snappy", "gzip"]`|No
69
+ | <<plugins-{type}s-{plugin}-flush_size>> |<<number,number>>|No
70
+ | <<plugins-{type}s-{plugin}-host>> |<<string,string>>|Yes
71
+ | <<plugins-{type}s-{plugin}-idle_flush_time>> |<<number,number>>|No
72
+ | <<plugins-{type}s-{plugin}-kerberos_keytab>> |<<string,string>>|No
73
+ | <<plugins-{type}s-{plugin}-open_timeout>> |<<number,number>>|No
74
+ | <<plugins-{type}s-{plugin}-path>> |<<string,string>>|Yes
75
+ | <<plugins-{type}s-{plugin}-port>> |<<number,number>>|No
76
+ | <<plugins-{type}s-{plugin}-read_timeout>> |<<number,number>>|No
77
+ | <<plugins-{type}s-{plugin}-retry_interval>> |<<number,number>>|No
78
+ | <<plugins-{type}s-{plugin}-retry_known_errors>> |<<boolean,boolean>>|No
79
+ | <<plugins-{type}s-{plugin}-retry_times>> |<<number,number>>|No
80
+ | <<plugins-{type}s-{plugin}-single_file_per_thread>> |<<boolean,boolean>>|No
81
+ | <<plugins-{type}s-{plugin}-snappy_bufsize>> |<<number,number>>|No
82
+ | <<plugins-{type}s-{plugin}-snappy_format>> |<<string,string>>, one of `["stream", "file"]`|No
83
+ | <<plugins-{type}s-{plugin}-ssl_cert>> |<<string,string>>|No
84
+ | <<plugins-{type}s-{plugin}-ssl_key>> |<<string,string>>|No
85
+ | <<plugins-{type}s-{plugin}-standby_host>> |<<string,string>>|No
86
+ | <<plugins-{type}s-{plugin}-standby_port>> |<<number,number>>|No
87
+ | <<plugins-{type}s-{plugin}-use_httpfs>> |<<boolean,boolean>>|No
88
+ | <<plugins-{type}s-{plugin}-use_kerberos_auth>> |<<boolean,boolean>>|No
89
+ | <<plugins-{type}s-{plugin}-use_ssl_auth>> |<<boolean,boolean>>|No
90
+ | <<plugins-{type}s-{plugin}-user>> |<<string,string>>|Yes
91
+ |=======================================================================
92
+
93
+ Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
94
+ output plugins.
95
+
96
+ &nbsp;
97
+
98
+ [id="plugins-{type}s-{plugin}-compression"]
99
+ ===== `compression`
100
+
101
+ * Value can be any of: `none`, `snappy`, `gzip`
102
+ * Default value is `"none"`
103
+
104
+ Compress output. One of ['none', 'snappy', 'gzip']
105
+
106
+ [id="plugins-{type}s-{plugin}-flush_size"]
107
+ ===== `flush_size`
108
+
109
+ * Value type is <<number,number>>
110
+ * Default value is `500`
111
+
112
+ Sending data to webhdfs if event count is above, even if `store_interval_in_secs` is not reached.
113
+
114
+ [id="plugins-{type}s-{plugin}-host"]
115
+ ===== `host`
116
+
117
+ * This is a required setting.
118
+ * Value type is <<string,string>>
119
+ * There is no default value for this setting.
120
+
121
+ The server name for webhdfs/httpfs connections.
122
+
123
+ [id="plugins-{type}s-{plugin}-idle_flush_time"]
124
+ ===== `idle_flush_time`
125
+
126
+ * Value type is <<number,number>>
127
+ * Default value is `1`
128
+
129
+ Sending data to webhdfs in x seconds intervals.
130
+
131
+ [id="plugins-{type}s-{plugin}-kerberos_keytab"]
132
+ ===== `kerberos_keytab`
133
+
134
+ * Value type is <<string,string>>
135
+ * There is no default value for this setting.
136
+
137
+ Set kerberos keytab file. Note that the gssapi library needs to be available to use this.
138
+
139
+ [id="plugins-{type}s-{plugin}-open_timeout"]
140
+ ===== `open_timeout`
141
+
142
+ * Value type is <<number,number>>
143
+ * Default value is `30`
144
+
145
+ WebHdfs open timeout, default 30s.
146
+
147
+ [id="plugins-{type}s-{plugin}-path"]
148
+ ===== `path`
149
+
150
+ * This is a required setting.
151
+ * Value type is <<string,string>>
152
+ * There is no default value for this setting.
153
+
154
+ The path to the file to write to. Event fields can be used here,
155
+ as well as date fields in the joda time format, e.g.:
156
+ `/user/logstash/dt=%{+YYYY-MM-dd}/%{@source_host}-%{+HH}.log`
157
+
158
+ [id="plugins-{type}s-{plugin}-port"]
159
+ ===== `port`
160
+
161
+ * Value type is <<number,number>>
162
+ * Default value is `50070`
163
+
164
+ The server port for webhdfs/httpfs connections.
165
+
166
+ [id="plugins-{type}s-{plugin}-read_timeout"]
167
+ ===== `read_timeout`
168
+
169
+ * Value type is <<number,number>>
170
+ * Default value is `30`
171
+
172
+ The WebHdfs read timeout, default 30s.
173
+
174
+ [id="plugins-{type}s-{plugin}-retry_interval"]
175
+ ===== `retry_interval`
176
+
177
+ * Value type is <<number,number>>
178
+ * Default value is `0.5`
179
+
180
+ How long should we wait between retries.
181
+
182
+ [id="plugins-{type}s-{plugin}-retry_known_errors"]
183
+ ===== `retry_known_errors`
184
+
185
+ * Value type is <<boolean,boolean>>
186
+ * Default value is `true`
187
+
188
+ Retry some known webhdfs errors. These may be caused by race conditions when appending to same file, etc.
189
+
190
+ [id="plugins-{type}s-{plugin}-retry_times"]
191
+ ===== `retry_times`
192
+
193
+ * Value type is <<number,number>>
194
+ * Default value is `5`
195
+
196
+ How many times should we retry. If retry_times is exceeded, an error will be logged and the event will be discarded.
197
+
198
+ [id="plugins-{type}s-{plugin}-single_file_per_thread"]
199
+ ===== `single_file_per_thread`
200
+
201
+ * Value type is <<boolean,boolean>>
202
+ * Default value is `false`
203
+
204
+ Avoid appending to same file in multiple threads.
205
+ This solves some problems with multiple logstash output threads and locked file leases in webhdfs.
206
+ If this option is set to true, %{[@metadata][thread_id]} needs to be used in path config settting.
207
+
208
+ [id="plugins-{type}s-{plugin}-snappy_bufsize"]
209
+ ===== `snappy_bufsize`
210
+
211
+ * Value type is <<number,number>>
212
+ * Default value is `32768`
213
+
214
+ Set snappy chunksize. Only neccessary for stream format. Defaults to 32k. Max is 65536
215
+ @see http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
216
+
217
+ [id="plugins-{type}s-{plugin}-snappy_format"]
218
+ ===== `snappy_format`
219
+
220
+ * Value can be any of: `stream`, `file`
221
+ * Default value is `"stream"`
222
+
223
+ Set snappy format. One of "stream", "file". Set to stream to be hive compatible.
224
+
225
+ [id="plugins-{type}s-{plugin}-ssl_cert"]
226
+ ===== `ssl_cert`
227
+
228
+ * Value type is <<string,string>>
229
+ * There is no default value for this setting.
230
+
231
+ Set ssl cert file.
232
+
233
+ [id="plugins-{type}s-{plugin}-ssl_key"]
234
+ ===== `ssl_key`
235
+
236
+ * Value type is <<string,string>>
237
+ * There is no default value for this setting.
238
+
239
+ Set ssl key file.
240
+
241
+ [id="plugins-{type}s-{plugin}-standby_host"]
242
+ ===== `standby_host`
243
+
244
+ * Value type is <<string,string>>
245
+ * Default value is `false`
246
+
247
+ Standby namenode for ha hdfs.
248
+
249
+ [id="plugins-{type}s-{plugin}-standby_port"]
250
+ ===== `standby_port`
251
+
252
+ * Value type is <<number,number>>
253
+ * Default value is `50070`
254
+
255
+ Standby namenode port for ha hdfs.
256
+
257
+ [id="plugins-{type}s-{plugin}-use_httpfs"]
258
+ ===== `use_httpfs`
259
+
260
+ * Value type is <<boolean,boolean>>
261
+ * Default value is `false`
262
+
263
+ Use httpfs mode if set to true, else webhdfs.
264
+
265
+ [id="plugins-{type}s-{plugin}-use_kerberos_auth"]
266
+ ===== `use_kerberos_auth`
267
+
268
+ * Value type is <<boolean,boolean>>
269
+ * Default value is `false`
270
+
271
+ Set kerberos authentication.
272
+
273
+ [id="plugins-{type}s-{plugin}-use_ssl_auth"]
274
+ ===== `use_ssl_auth`
275
+
276
+ * Value type is <<boolean,boolean>>
277
+ * Default value is `false`
278
+
279
+ Set ssl authentication. Note that the openssl library needs to be available to use this.
280
+
281
+ [id="plugins-{type}s-{plugin}-user"]
282
+ ===== `user`
283
+
284
+ * This is a required setting.
285
+ * Value type is <<string,string>>
286
+ * There is no default value for this setting.
287
+
288
+ The Username for webhdfs.
289
+
290
+
291
+
292
+ [id="plugins-{type}s-{plugin}-common-options"]
293
+ include::{include_path}/{type}.asciidoc[]
@@ -111,6 +111,21 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
111
111
  # Set snappy format. One of "stream", "file". Set to stream to be hive compatible.
112
112
  config :snappy_format, :validate => ["stream", "file"], :default => "stream"
113
113
 
114
+ # Set kerberos authentication.
115
+ config :use_kerberos_auth, :validate => :boolean, :default => false
116
+
117
+ # Set kerberos keytab file. Note that the gssapi library needs to be available to use this.
118
+ config :kerberos_keytab, :validate => :string
119
+
120
+ # Set ssl authentication. Note that the openssl library needs to be available to use this.
121
+ config :use_ssl_auth, :validate => :boolean, :default => false
122
+
123
+ # Set ssl key file.
124
+ config :ssl_key, :validate => :string
125
+
126
+ # Set ssl cert file.
127
+ config :ssl_cert, :validate => :string
128
+
114
129
  ## Set codec.
115
130
  default :codec, 'line'
116
131
 
@@ -132,7 +147,7 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
132
147
  begin
133
148
  test_client(@standby_client)
134
149
  rescue => e
135
- logger.warn("Could not connect to standby namenode #{@standby_host}. Error: #{e.message}. Trying main webhdfs namenode.")
150
+ logger.warn("Could not connect to standby namenode #{@standby_client.host}. Error: #{e.message}. Trying main webhdfs namenode.")
136
151
  end
137
152
  end
138
153
  @client = prepare_client(@host, @port, @user)
@@ -144,6 +159,7 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
144
159
  raise
145
160
  else
146
161
  # If a standby host is configured, try this before giving up.
162
+ logger.error("Could not connect to #{@client.host}:#{@client.port}. Error: #{e.message}")
147
163
  do_failover
148
164
  end
149
165
  end
@@ -235,7 +251,7 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
235
251
  if not @standby_client
236
252
  return
237
253
  end
238
- @logger.warn("Got exception from #{@host}. Switching to #{@standby_host}")
254
+ @logger.warn("Failing over from #{@client.host}:#{@client.port} to #{@standby_client.host}:#{@standby_client.port}.")
239
255
  @client, @standby_client = @standby_client, @client
240
256
  end
241
257
 
@@ -23,6 +23,17 @@ module LogStash
23
23
  # @return [WebHDFS] An setup client instance
24
24
  def prepare_client(host, port, username)
25
25
  client = WebHDFS::Client.new(host, port, username)
26
+ if @use_kerberos_auth
27
+ require 'gssapi'
28
+ client.kerberos = true
29
+ client.kerberos_keytab = @kerberos_keytab
30
+ end
31
+ if @use_ssl_auth
32
+ require 'openssl'
33
+ client.ssl = true
34
+ client.ssl_key = OpenSSL::PKey::RSA.new(open(@ssl_key))
35
+ client.ssl_cert = OpenSSL::X509::Certificate.new(open(@ssl_cert))
36
+ end
26
37
  client.httpfs_mode = @use_httpfs
27
38
  client.open_timeout = @open_timeout
28
39
  client.read_timeout = @read_timeout
@@ -2,7 +2,7 @@
2
2
  Gem::Specification.new do |s|
3
3
 
4
4
  s.name = 'logstash-output-webhdfs'
5
- s.version = '3.0.2'
5
+ s.version = '3.0.3'
6
6
  s.licenses = ['Apache License (2.0)']
7
7
  s.summary = "Plugin to write events to hdfs via webhdfs."
8
8
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -12,7 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.require_paths = ["lib"]
13
13
 
14
14
  # Files
15
- s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
15
+ s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
16
16
 
17
17
  # Tests
18
18
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Björn Puttmann, loshkovskyi, Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-14 00:00:00.000000000 Z
11
+ date: 2017-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -100,7 +100,9 @@ dependencies:
100
100
  - - ">="
101
101
  - !ruby/object:Gem::Version
102
102
  version: '0'
103
- description: This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program
103
+ description: This gem is a Logstash plugin required to be installed on top of the
104
+ Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This
105
+ gem is not a stand-alone program
104
106
  email: b.puttmann@dbap.de
105
107
  executables: []
106
108
  extensions: []
@@ -112,6 +114,7 @@ files:
112
114
  - LICENSE
113
115
  - NOTICE.TXT
114
116
  - README.md
117
+ - docs/index.asciidoc
115
118
  - lib/logstash/outputs/webhdfs.rb
116
119
  - lib/logstash/outputs/webhdfs_helper.rb
117
120
  - logstash-output-webhdfs.gemspec
@@ -139,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
139
142
  version: '0'
140
143
  requirements: []
141
144
  rubyforge_project:
142
- rubygems_version: 2.6.3
145
+ rubygems_version: 2.6.11
143
146
  signing_key:
144
147
  specification_version: 4
145
148
  summary: Plugin to write events to hdfs via webhdfs.