logstash-output-webhdfs 2.0.2 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5f10d1fc2a87cbe9cc0889cc3ebaab2c92edcdbf
4
- data.tar.gz: 5184e5107893ca20d0a7501d43cc3df57b687a9a
3
+ metadata.gz: 4901ba13f6b2c802b94500c02c3a89279be136c5
4
+ data.tar.gz: 1520941c4b49bd8c20f44c5242ac6fb4973371ac
5
5
  SHA512:
6
- metadata.gz: 7de713395fa8702505c3ee6a98813f0f2c86392f226873f9ed7e8439298e9be777da91f28a2ad1eff561f3517c4f83a31c6fa57ce2675bf3e4383eda2bc908f8
7
- data.tar.gz: 3ed6e21b6184b555f6027088269ddc4b39512954c797b03112a45fffa6abe292f24eda70633ea13b851585a949350f4622a5330bb80a14e55a3870e19d3ced72
6
+ metadata.gz: 0a981834776bbc973527e6b15b09dbedde35520bb53379b001be64905ba8ab659a55fab8d5972e96bb49f555c1eb9ba78fab87ee65168fb3f176a78e6ea57c1d
7
+ data.tar.gz: 2daf6350fdeabf38323a6254157a60ed87d960c613060ce6b87ab8adfc82dc06e906b610461f80ef6a4e90f8de7302778ba1b328961499efef3b8692854042c3
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 2.0.4
2
+ - Depend on logstash-core-plugin-api instead of logstash-core, removing the need to mass update plugins on major releases of logstash
3
+ # 2.0.3
4
+ - New dependency requirements for logstash-core for the 5.0 release
1
5
  ## 2.0.0
2
6
  - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
3
7
  instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895
data/CONTRIBUTORS CHANGED
@@ -1,7 +1,16 @@
1
1
  The following is a list of people who have contributed ideas, code, bug
2
2
  reports, or in general have helped logstash along its way.
3
3
 
4
+ Maintainers:
5
+ * Björn Puttmann, dbap GmbH (dstore-dbap)
6
+
4
7
  Contributors:
8
+ * Björn Puttmann, dbap GmbH (dstore-dbap)
9
+ * Pier-Hugues Pellerin (ph)
10
+ * Pere Urbón (purbon)
11
+ * Suyog Rao (suyograo)
12
+ * João Duarte (jsvd)
13
+ * Shaunak Kashyap (ycombinator)
5
14
 
6
15
  Note: If you've sent us patches, bug reports, or otherwise contributed to
7
16
  Logstash, and you aren't on the list above and want to be, please let us know
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Logstash Plugin
2
2
 
3
+ [![Build
4
+ Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Outputs/job/logstash-plugin-output-webhdfs-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Outputs/job/logstash-plugin-output-webhdfs-unit/)
5
+
3
6
  This is a plugin for [Logstash](https://github.com/elastic/logstash).
4
7
 
5
8
  It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
@@ -4,13 +4,13 @@ require "logstash/outputs/base"
4
4
  require "stud/buffer"
5
5
  require "logstash/outputs/webhdfs_helper"
6
6
 
7
- # This plugin sends Logstash events into files in HDFS via
7
+ # This plugin sends Logstash events into files in HDFS via
8
8
  # the https://hadoop.apache.org/docs/r1.0.4/webhdfs.html[webhdfs] REST API.
9
9
  #
10
10
  # ==== Dependencies
11
11
  # This plugin has no dependency on jars from hadoop, thus reducing configuration and compatibility
12
12
  # problems. It uses the webhdfs gem from Kazuki Ohta and TAGOMORI Satoshi (@see: https://github.com/kzk/webhdfs).
13
- # Optional dependencies are zlib and snappy gem if you use the compression functionality.
13
+ # Optional dependencies are zlib and snappy gem if you use the compression functionality.
14
14
  #
15
15
  # ==== Operational Notes
16
16
  # If you get an error like:
@@ -58,6 +58,12 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
58
58
  # The server port for webhdfs/httpfs connections.
59
59
  config :port, :validate => :number, :default => 50070
60
60
 
61
+ # Standby namenode for ha hdfs.
62
+ config :standby_host, :validate => :string, :default => false
63
+
64
+ # Standby namenode port for ha hdfs.
65
+ config :standby_port, :validate => :number, :default => 50070
66
+
61
67
  # The Username for webhdfs.
62
68
  config :user, :validate => :string, :required => true
63
69
 
@@ -66,14 +72,6 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
66
72
  # `/user/logstash/dt=%{+YYYY-MM-dd}/%{@source_host}-%{+HH}.log`
67
73
  config :path, :validate => :string, :required => true
68
74
 
69
- # The format to use when writing events to the file. This value
70
- # supports any string and can include `%{name}` and other dynamic
71
- # strings.
72
- #
73
- # If this setting is omitted, the full json representation of the
74
- # event will be written as a single line.
75
- config :message_format, :validate => :string
76
-
77
75
  # Sending data to webhdfs in x seconds intervals.
78
76
  config :idle_flush_time, :validate => :number, :default => 1
79
77
 
@@ -89,6 +87,11 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
89
87
  # Use httpfs mode if set to true, else webhdfs.
90
88
  config :use_httpfs, :validate => :boolean, :default => false
91
89
 
90
+ # Avoid appending to same file in multiple threads.
91
+ # This solves some problems with multiple logstash output threads and locked file leases in webhdfs.
92
+ # If this option is set to true, %{[@metadata][thread_id]} needs to be used in path config settting.
93
+ config :single_file_per_thread, :validate => :boolean, :default => false
94
+
92
95
  # Retry some known webhdfs errors. These may be caused by race conditions when appending to same file, etc.
93
96
  config :retry_known_errors, :validate => :boolean, :default => true
94
97
 
@@ -120,14 +123,34 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
120
123
  elsif @compression == "snappy"
121
124
  load_module('snappy')
122
125
  end
126
+ @main_namenode_failed = false
127
+ @standby_client = false
123
128
  @files = {}
129
+ # Create and test standby client if configured.
130
+ if @standby_host
131
+ @standby_client = prepare_client(@standby_host, @standby_port, @user)
132
+ begin
133
+ test_client(@standby_client)
134
+ rescue => e
135
+ logger.warn("Could not connect to standby namenode #{@standby_host}. Error: #{e.message}. Trying main webhdfs namenode.")
136
+ end
137
+ end
124
138
  @client = prepare_client(@host, @port, @user)
125
- # Test client connection.
126
139
  begin
127
- @client.list('/')
140
+ test_client(@client)
128
141
  rescue => e
129
- @logger.error("Webhdfs check request failed. (namenode: #{@client.host}:#{@client.port}, Exception: #{e.message})")
130
- raise
142
+ # If no standy host is configured, we need to exit here.
143
+ if not @standby_host
144
+ raise
145
+ else
146
+ # If a standby host is configured, try this before giving up.
147
+ do_failover
148
+ end
149
+ end
150
+ # Make sure @path contains %{[@metadata][thread_id]} format value if @single_file_per_thread is set to true.
151
+ if @single_file_per_thread and !@path.include? "%{[@metadata][thread_id]}"
152
+ @logger.error("Please set %{[@metadata][thread_id]} format value in @path if @single_file_per_thread is active.")
153
+ raise LogStash::ConfigurationError
131
154
  end
132
155
  buffer_initialize(
133
156
  :max_items => @flush_size,
@@ -140,7 +163,6 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
140
163
  end # def register
141
164
 
142
165
  def receive(event)
143
-
144
166
  buffer_receive(event)
145
167
  end # def receive
146
168
 
@@ -149,6 +171,10 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
149
171
  newline = "\n"
150
172
  output_files = Hash.new { |hash, key| hash[key] = "" }
151
173
  events.collect do |event|
174
+ # Add thread_id to event metadata to be used as format value in path configuration.
175
+ if @single_file_per_thread
176
+ event['@metadata']['thread_id'] = Thread.current.object_id.to_s
177
+ end
152
178
  path = event.sprintf(@path)
153
179
  event_as_string = @codec.encode(event)
154
180
  event_as_string += newline unless event_as_string.end_with? newline
@@ -185,8 +211,14 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
185
211
  elsif
186
212
  @client.create(path, data)
187
213
  end
188
- # Handle other write errors and retry to write max. @retry_times.
214
+ # Handle other write errors and retry to write max. @retry_times.
189
215
  rescue => e
216
+ # Handle StandbyException and do failover. Still we want to exit if write_tries >= @retry_times.
217
+ if @standby_client && (e.message.match(/Failed to connect to host/) || e.message.match(/StandbyException/))
218
+ do_failover
219
+ write_tries += 1
220
+ retry
221
+ end
190
222
  if write_tries < @retry_times
191
223
  @logger.warn("webhdfs write caused an exception: #{e.message}. Maybe you should increase retry_interval or reduce number of workers. Retrying...")
192
224
  sleep(@retry_interval * write_tries)
@@ -199,6 +231,14 @@ class LogStash::Outputs::WebHdfs < LogStash::Outputs::Base
199
231
  end
200
232
  end
201
233
 
234
+ def do_failover
235
+ if not @standby_client
236
+ return
237
+ end
238
+ @logger.warn("Got exception from #{@host}. Switching to #{@standby_host}")
239
+ @client, @standby_client = @standby_client, @client
240
+ end
241
+
202
242
  def close
203
243
  buffer_flush(:final => true)
204
244
  end # def close
@@ -31,7 +31,16 @@ module LogStash
31
31
  client.retry_times = @retry_times if @retry_times
32
32
  client
33
33
  end
34
-
34
+ # Test client connection.
35
+ #@param client [WebHDFS] webhdfs client object.
36
+ def test_client(client)
37
+ begin
38
+ client.list('/')
39
+ rescue => e
40
+ @logger.error("Webhdfs check request failed. (namenode: #{client.host}:#{client.port}, Exception: #{e.message})")
41
+ raise
42
+ end
43
+ end
35
44
 
36
45
  # Compress data using the gzip methods.
37
46
  # @param data [String] stream of data to be compressed
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-output-webhdfs'
4
- s.version = '2.0.2'
4
+ s.version = '2.0.4'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Plugin to write events to hdfs via webhdfs."
7
7
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -20,9 +20,9 @@ Gem::Specification.new do |s|
20
20
  s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
21
21
 
22
22
  # Gem dependencies
23
- s.add_runtime_dependency "logstash-core", ">= 2.0.0.beta2", "< 3.0.0"
23
+ s.add_runtime_dependency "logstash-core-plugin-api", "~> 1.0"
24
24
  s.add_runtime_dependency 'webhdfs'
25
- s.add_runtime_dependency 'snappy'
25
+ s.add_runtime_dependency 'snappy', "= 0.0.12"
26
26
  s.add_development_dependency 'logstash-devutils'
27
27
 
28
28
  s.add_development_dependency 'logstash-codec-line'
@@ -5,22 +5,20 @@ require 'webhdfs'
5
5
  require 'json'
6
6
 
7
7
  describe LogStash::Outputs::WebHdfs, :integration => true do
8
-
9
8
  let(:host) { 'localhost' }
10
9
  let(:port) { 50070 }
11
- let(:user) { 'vagrant' }
10
+ let(:user) { 'test' }
11
+ let(:test_file) { '/user/' + user + '/%{host}.test' }
12
+ let(:hdfs_file_name) { 'user/' + user + '/localhost.test' }
12
13
 
13
- let(:test_file) { "/test.file" }
14
+ let(:config) { { 'host' => host, 'user' => user, 'path' => test_file, 'compression' => 'none' } }
14
15
 
15
- let(:event) { LogStash::Event.new('message' => 'Hello world!', 'source' => 'out of the blue',
16
- 'type' => 'generator', 'host' => 'localhost' ) }
16
+ subject(:plugin) { LogStash::Plugin.lookup("output", "webhdfs").new(config) }
17
17
 
18
- let(:config) { { 'host' => host, 'user' => user,
19
- 'path' => test_file, 'compression' => 'none' } }
18
+ let(:webhdfs_client) { WebHDFS::Client.new(host, port, user) }
20
19
 
21
- subject { LogStash::Plugin.lookup("output", "webhdfs").new(config) }
22
-
23
- let(:client) { WebHDFS::Client.new(host, port, user) }
20
+ let(:event) { LogStash::Event.new('message' => 'Hello world!', 'source' => 'out of the blue',
21
+ 'type' => 'generator', 'host' => 'localhost' ) }
24
22
 
25
23
  describe "register and close" do
26
24
 
@@ -33,10 +31,10 @@ describe LogStash::Outputs::WebHdfs, :integration => true do
33
31
  describe '#write' do
34
32
 
35
33
  let(:config) { { 'host' => host, 'user' => user, 'flush_size' => 10,
36
- 'path' => "/%{host}_test.log", 'compression' => 'none' } }
34
+ 'path' => test_file, 'compression' => 'none' } }
37
35
 
38
36
  after(:each) do
39
- client.delete(test_file)
37
+ webhdfs_client.delete(hdfs_file_name)
40
38
  end
41
39
 
42
40
  describe "writing plain files" do
@@ -48,17 +46,18 @@ describe LogStash::Outputs::WebHdfs, :integration => true do
48
46
  end
49
47
 
50
48
  it 'should use the correct filename pattern' do
51
- expect { client.read('localhost_test.log') }.to_not raise_error
49
+ expect { webhdfs_client.read(hdfs_file_name) }.to_not raise_error
52
50
  end
53
51
 
54
- context "using the line codec" do
52
+ context "using the line codec without format" do
55
53
 
56
54
  let(:config) { { 'host' => host, 'user' => user, 'flush_size' => 10,
57
55
  'path' => test_file, 'compression' => 'none', 'codec' => 'line' } }
58
56
 
59
57
  it 'should match the event data' do
60
- expect(client.read(test_file).strip()).to eq(event.to_s)
58
+ expect(webhdfs_client.read(hdfs_file_name).strip()).to eq(event.to_s)
61
59
  end
60
+
62
61
  end
63
62
 
64
63
  context "using the json codec" do
@@ -68,7 +67,7 @@ describe LogStash::Outputs::WebHdfs, :integration => true do
68
67
 
69
68
 
70
69
  it 'should match the event data' do
71
- expect(client.read(test_file).strip()).to eq(event.to_json)
70
+ expect(webhdfs_client.read(hdfs_file_name).strip()).to eq(event.to_json)
72
71
  end
73
72
 
74
73
  end
@@ -79,17 +78,18 @@ describe LogStash::Outputs::WebHdfs, :integration => true do
79
78
  'path' => test_file, 'compression' => 'none', 'codec' => 'json' } }
80
79
 
81
80
  before(:each) do
82
- client.delete(test_file)
81
+ webhdfs_client.delete(hdfs_file_name)
83
82
  end
84
83
 
85
84
  it 'should flush after configured idle time' do
86
85
  subject.register
87
86
  subject.receive(event)
88
- expect { client.read(test_file) }.to raise_error(error=WebHDFS::FileNotFoundError)
87
+ expect { webhdfs_client.read(hdfs_file_name) }.to raise_error(error=WebHDFS::FileNotFoundError)
89
88
  sleep 3
90
- expect { client.read(test_file) }.to_not raise_error
91
- expect(client.read(test_file).strip()).to eq(event.to_json)
89
+ expect { webhdfs_client.read(hdfs_file_name) }.to_not raise_error
90
+ expect(webhdfs_client.read(hdfs_file_name).strip()).to eq(event.to_json)
92
91
  end
92
+
93
93
  end
94
94
 
95
95
  end
@@ -110,7 +110,7 @@ describe LogStash::Outputs::WebHdfs, :integration => true do
110
110
  'path' => test_file, 'compression' => 'none', 'codec' => 'line' } }
111
111
 
112
112
  it 'should write some messages uncompressed' do
113
- expect(client.read(test_file).lines.count).to eq(500)
113
+ expect(webhdfs_client.read(hdfs_file_name).lines.count).to eq(500)
114
114
  end
115
115
 
116
116
  end
@@ -121,11 +121,10 @@ describe LogStash::Outputs::WebHdfs, :integration => true do
121
121
  'path' => test_file, 'compression' => 'gzip', 'codec' => 'line' } }
122
122
 
123
123
  it 'should write some messages gzip compressed' do
124
- expect(Zlib::Inflate.new(window_bits=47).inflate(client.read("#{test_file}.gz")).lines.count ).to eq(500)
124
+ expect(Zlib::Inflate.new(window_bits=47).inflate(webhdfs_client.read("#{hdfs_file_name}.gz")).lines.count ).to eq(500)
125
+ webhdfs_client.delete("#{hdfs_file_name}.gz")
125
126
  end
126
127
  end
127
-
128
128
  end
129
-
130
129
  end
131
- end
130
+ end
metadata CHANGED
@@ -1,39 +1,33 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-output-webhdfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 2.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Björn Puttmann, loshkovskyi, Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-14 00:00:00.000000000 Z
11
+ date: 2016-03-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
- - - '>='
16
+ - - "~>"
17
17
  - !ruby/object:Gem::Version
18
- version: 2.0.0.beta2
19
- - - <
20
- - !ruby/object:Gem::Version
21
- version: 3.0.0
22
- name: logstash-core
18
+ version: '1.0'
19
+ name: logstash-core-plugin-api
23
20
  prerelease: false
24
21
  type: :runtime
25
22
  version_requirements: !ruby/object:Gem::Requirement
26
23
  requirements:
27
- - - '>='
28
- - !ruby/object:Gem::Version
29
- version: 2.0.0.beta2
30
- - - <
24
+ - - "~>"
31
25
  - !ruby/object:Gem::Version
32
- version: 3.0.0
26
+ version: '1.0'
33
27
  - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
- - - '>='
30
+ - - ">="
37
31
  - !ruby/object:Gem::Version
38
32
  version: '0'
39
33
  name: webhdfs
@@ -41,27 +35,27 @@ dependencies:
41
35
  type: :runtime
42
36
  version_requirements: !ruby/object:Gem::Requirement
43
37
  requirements:
44
- - - '>='
38
+ - - ">="
45
39
  - !ruby/object:Gem::Version
46
40
  version: '0'
47
41
  - !ruby/object:Gem::Dependency
48
42
  requirement: !ruby/object:Gem::Requirement
49
43
  requirements:
50
- - - '>='
44
+ - - '='
51
45
  - !ruby/object:Gem::Version
52
- version: '0'
46
+ version: 0.0.12
53
47
  name: snappy
54
48
  prerelease: false
55
49
  type: :runtime
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
51
  requirements:
58
- - - '>='
52
+ - - '='
59
53
  - !ruby/object:Gem::Version
60
- version: '0'
54
+ version: 0.0.12
61
55
  - !ruby/object:Gem::Dependency
62
56
  requirement: !ruby/object:Gem::Requirement
63
57
  requirements:
64
- - - '>='
58
+ - - ">="
65
59
  - !ruby/object:Gem::Version
66
60
  version: '0'
67
61
  name: logstash-devutils
@@ -69,13 +63,13 @@ dependencies:
69
63
  type: :development
70
64
  version_requirements: !ruby/object:Gem::Requirement
71
65
  requirements:
72
- - - '>='
66
+ - - ">="
73
67
  - !ruby/object:Gem::Version
74
68
  version: '0'
75
69
  - !ruby/object:Gem::Dependency
76
70
  requirement: !ruby/object:Gem::Requirement
77
71
  requirements:
78
- - - '>='
72
+ - - ">="
79
73
  - !ruby/object:Gem::Version
80
74
  version: '0'
81
75
  name: logstash-codec-line
@@ -83,13 +77,13 @@ dependencies:
83
77
  type: :development
84
78
  version_requirements: !ruby/object:Gem::Requirement
85
79
  requirements:
86
- - - '>='
80
+ - - ">="
87
81
  - !ruby/object:Gem::Version
88
82
  version: '0'
89
83
  - !ruby/object:Gem::Dependency
90
84
  requirement: !ruby/object:Gem::Requirement
91
85
  requirements:
92
- - - '>='
86
+ - - ">="
93
87
  - !ruby/object:Gem::Version
94
88
  version: '0'
95
89
  name: logstash-codec-json
@@ -97,7 +91,7 @@ dependencies:
97
91
  type: :development
98
92
  version_requirements: !ruby/object:Gem::Requirement
99
93
  requirements:
100
- - - '>='
94
+ - - ">="
101
95
  - !ruby/object:Gem::Version
102
96
  version: '0'
103
97
  description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
@@ -129,12 +123,12 @@ require_paths:
129
123
  - lib
130
124
  required_ruby_version: !ruby/object:Gem::Requirement
131
125
  requirements:
132
- - - '>='
126
+ - - ">="
133
127
  - !ruby/object:Gem::Version
134
128
  version: '0'
135
129
  required_rubygems_version: !ruby/object:Gem::Requirement
136
130
  requirements:
137
- - - '>='
131
+ - - ">="
138
132
  - !ruby/object:Gem::Version
139
133
  version: '0'
140
134
  requirements: []