logstash-output-elasticsearch 0.2.8-java → 0.2.9-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d77d5d461770a2ff1f5305fc51bfd2d26ee30765
4
- data.tar.gz: 5f7fc066a058c2858684080f7ff428019e44541d
3
+ metadata.gz: 721d83f58bd48e9c017b488d2ed84032ab5583c1
4
+ data.tar.gz: 8535821c0248a9417347ec0bf4fd16264ae39454
5
5
  SHA512:
6
- metadata.gz: bb51c75b6dabfe4dd33f96322ebe920cb64bfb580aaed2215ecc8bb36ae0fb3f58533ef501dbe93b9c56450b7a54333c56ffd72f31b8206574656b5cc5654b6a
7
- data.tar.gz: aaf694ad4c3113b7d25a2320d05c66bf4906e5714901f6ba4f6cb2343f302561375128a0002d5eda6c31bceb0b5ea0d984cc1a0c6bd46c43fd623d38e57e4ff1
6
+ metadata.gz: 6d5ebb15b21546220ea868bc12ba2db88de9566830b96812faa43bb675b50a0aa54fcd1a6cf0d431936ece9c1bb0b6d9ede309c64c9d390c17066d6e88062f0c
7
+ data.tar.gz: 2db6134af16a7bc92de8ca4198e980546b30ef5e613e5f4d9164b6309a694c9ad85e552900d8f5d90f0a361a8cf58ee32f9ddc46ae840e5dfc6f06d1efb48ce9
data/.gitignore CHANGED
@@ -1,3 +1,5 @@
1
1
  *.gem
2
2
  Gemfile.lock
3
3
  .bundle
4
+ .idea
5
+ *~
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.2.9
2
+ - Add 'path' parameter for ES HTTP hosts behind a proxy on a subpath
3
+
1
4
  ## 0.2.8 (June 12, 2015)
2
5
  - Add option to enable and disable SSL certificate verification during handshake (#160)
3
6
  - Doc improvements for clarifying round robin behavior using hosts config
data/Gemfile CHANGED
@@ -1,2 +1,3 @@
1
1
  source 'https://rubygems.org'
2
+
2
3
  gemspec
data/NOTICE.TXT ADDED
@@ -0,0 +1,5 @@
1
+ Elasticsearch
2
+ Copyright 2012-2015 Elasticsearch
3
+
4
+ This product includes software developed by The Apache Software
5
+ Foundation (http://www.apache.org/).
data/README.md CHANGED
@@ -37,12 +37,24 @@ bundle install
37
37
  bundle install
38
38
  ```
39
39
 
40
- - Run tests
40
+ - Run unit tests
41
41
 
42
42
  ```sh
43
43
  bundle exec rspec
44
44
  ```
45
45
 
46
+ - Run integration tests
47
+
48
+ Dependencies: [Docker](http://docker.com)
49
+
50
+ Before the test suite is run, we will load and run an
51
+ Elasticsearch instance within a docker container. This container
52
+ will be cleaned up when suite has finished.
53
+
54
+ ```sh
55
+ bundle exec rspec --tag integration
56
+ ```
57
+
46
58
  ### 2. Running your unpublished Plugin in Logstash
47
59
 
48
60
  #### 2.1 Run in a local Logstash clone
@@ -83,4 +95,4 @@ Programming is not a required skill. Whatever you've seen about open source and
83
95
 
84
96
  It is more important to the community that you are able to contribute.
85
97
 
86
- For more information about contributing, see the [CONTRIBUTING](https://github.com/elasticsearch/logstash/blob/master/CONTRIBUTING.md) file.
98
+ For more information about contributing, see the [CONTRIBUTING](https://github.com/elasticsearch/logstash/blob/master/CONTRIBUTING.md) file.
@@ -33,7 +33,37 @@ require 'logstash-output-elasticsearch_jars.rb'
33
33
  # If using the default `protocol` setting ("node"), your firewalls might need
34
34
  # to permit port 9300 in *both* directions (from Logstash to Elasticsearch, and
35
35
  # Elasticsearch to Logstash)
36
+ #
37
+ # ## Retry Policy
38
+ #
39
+ # By default all bulk requests to ES are synchronous. Not all events in the bulk requests
40
+ # always make it successfully. For example, there could be events which are not formatted
41
+ # correctly for the index they are targeting (type mismatch in mapping). So that we minimize loss of
42
+ # events, we have a specific retry policy in place. We retry all events which fail to be reached by
43
+ # Elasticsearch for network related issues. We retry specific events which exhibit errors under a separate
44
+ # policy described below. Events of this nature are ones which experience ES error codes described as
45
+ # retryable errors.
46
+ #
47
+ # Retryable Errors:
48
+ #
49
+ # - 429, Too Many Requests (RFC6585)
50
+ # - 503, The server is currently unable to handle the request due to a temporary overloading or maintenance of the server.
51
+ #
52
+ # Here are the rules of what is retried when:
53
+ #
54
+ # - Block and retry all events in bulk response that experiences transient network exceptions until
55
+ # a successful submission is received by Elasticsearch.
56
+ # - Retry subset of sent events which resulted in ES errors of a retryable nature which can be found
57
+ # in RETRYABLE_CODES
58
+ # - For events which returned retryable error codes, they will be pushed onto a separate queue for
59
+ # retrying events. events in this queue will be retried a maximum of 5 times by default (configurable through :max_retries). The size of
60
+ # this queue is capped by the value set in :retry_max_items.
61
+ # - Events from the retry queue are submitted again either when the queue reaches its max size or when
62
+ # the max interval time is reached, which is set in :retry_max_interval.
63
+ # - Events which are not retryable or have reached their max retry count are logged to stderr.
36
64
  class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
65
+ attr_reader :client
66
+
37
67
  include Stud::Buffer
38
68
  RETRYABLE_CODES = [429, 503]
39
69
  SUCCESS_CODES = [200, 201]
@@ -235,6 +265,10 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
235
265
  config :user, :validate => :string
236
266
  config :password, :validate => :password
237
267
 
268
+ # HTTP Path at which the Elasticsearch server lives. Use this if you must run ES behind a proxy that remaps
269
+ # the root path for the Elasticsearch HTTP API lives. This option is ignored for non-HTTP transports.
270
+ config :path, :validate => :string, :default => "/"
271
+
238
272
  # SSL Configurations (only valid when protocol is HTTP)
239
273
  #
240
274
  # Enable SSL
@@ -286,8 +320,13 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
286
320
  @protocol = LogStash::Environment.jruby? ? "node" : "http"
287
321
  end
288
322
 
289
- if @protocol == "http" && @action == "create_unless_exists"
290
- raise(LogStash::ConfigurationError, "action => 'create_unless_exists' is not supported under the HTTP protocol");
323
+ if @protocol == "http"
324
+ if @action == "create_unless_exists"
325
+ raise(LogStash::ConfigurationError, "action => 'create_unless_exists' is not supported under the HTTP protocol");
326
+ end
327
+
328
+ client_settings[:path] = "/#{@path}/".gsub(/\/+/, "/") # Normalize slashes
329
+ @logger.debug? && @logger.debug("Normalizing http path", :path => @path, :normalized => client_settings[:path])
291
330
  end
292
331
 
293
332
  if ["node", "transport"].include?(@protocol)
@@ -524,6 +563,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
524
563
  buffer_flush(:final => true)
525
564
  retry_flush
526
565
  end
566
+
527
567
  protected
528
568
  def start_local_elasticsearch
529
569
  @logger.info("Starting embedded Elasticsearch local node.")
@@ -61,7 +61,7 @@ module LogStash::Outputs::Elasticsearch
61
61
  end
62
62
 
63
63
  def build_client(options)
64
- uri = "#{options[:protocol]}://#{options[:host]}:#{options[:port]}"
64
+ uri = "#{options[:protocol]}://#{options[:host]}:#{options[:port]}#{options[:client_settings][:path]}"
65
65
 
66
66
  client_options = {
67
67
  :host => [uri],
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-output-elasticsearch'
4
- s.version = '0.2.8'
4
+ s.version = '0.2.9'
5
5
  s.licenses = ['apache-2.0']
6
6
  s.summary = "Logstash Output to Elasticsearch"
7
7
  s.description = "Output events to elasticsearch"
@@ -36,4 +36,5 @@ Gem::Specification.new do |s|
36
36
  end
37
37
 
38
38
  s.add_development_dependency 'logstash-devutils'
39
+ s.add_development_dependency 'longshoreman'
39
40
  end
@@ -0,0 +1,65 @@
1
+ require "logstash/devutils/rspec/spec_helper"
2
+ require "ftw"
3
+ require "logstash/plugin"
4
+ require "logstash/json"
5
+ require "stud/try"
6
+ require "longshoreman"
7
+
8
+ CONTAINER_NAME = "logstash-output-elasticsearch-#{rand(999).to_s}"
9
+ CONTAINER_IMAGE = "elasticsearch"
10
+ CONTAINER_TAG = "1.6"
11
+
12
+ module ESHelper
13
+
14
+ def get_host
15
+ Longshoreman.new.get_host_ip
16
+ end
17
+
18
+ def get_port(protocol)
19
+ container = Longshoreman::Container.new
20
+ container.get(CONTAINER_NAME)
21
+ case protocol
22
+ when "http"
23
+ container.rport(9200)
24
+ when "transport", "node"
25
+ container.rport(9300)
26
+ end
27
+ end
28
+
29
+ def get_client
30
+ Elasticsearch::Client.new(:host => "#{get_host}:#{get_port('http')}")
31
+ end
32
+ end
33
+
34
+ RSpec.configure do |config|
35
+ config.include ESHelper
36
+
37
+ # this :all hook gets run before every describe block that is tagged with :integration => true.
38
+ config.before(:all, :integration => true) do
39
+ # check if container exists already before creating new one.
40
+ begin
41
+ ls = Longshoreman::new
42
+ ls.container.get(CONTAINER_NAME)
43
+ rescue Docker::Error::NotFoundError
44
+ Longshoreman.new("#{CONTAINER_IMAGE}:#{CONTAINER_TAG}", CONTAINER_NAME)
45
+ # TODO(talevy): verify ES is running instead of static timeout
46
+ sleep 10
47
+ end
48
+ end
49
+
50
+ # we want to do a final cleanup after all :integration runs,
51
+ # but we don't want to clean up before the last block.
52
+ # This is a final blind check to see if the ES docker container is running and
53
+ # needs to be cleaned up. If no container can be found and/or docker is not
54
+ # running on the system, we do nothing.
55
+ config.after(:suite) do
56
+ # only cleanup docker container if system has docker and the container is running
57
+ begin
58
+ ls = Longshoreman::new
59
+ ls.container.get(CONTAINER_NAME)
60
+ ls.cleanup
61
+ rescue Docker::Error::NotFoundError, Excon::Errors::SocketError
62
+ # do nothing
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,36 @@
1
+ require_relative "../../../../spec/es_spec_helper"
2
+ require "logstash/outputs/elasticsearch/protocol"
3
+
4
+ describe "elasticsearch node client", :integration => true do
5
+ # Test ElasticSearch Node Client
6
+ # Reference: http://www.elasticsearch.org/guide/reference/modules/discovery/zen/
7
+
8
+ subject { LogStash::Outputs::Elasticsearch::Protocols::NodeClient.new(:host => get_host()) }
9
+
10
+ it "should support hosts in both string and array" do
11
+ # Because we defined *hosts* method in NodeClient as private,
12
+ # we use *obj.send :method,[args...]* to call method *hosts*
13
+
14
+ # Node client should support host in string
15
+ # Case 1: default :host in string
16
+ insist { subject.send :hosts, :host => "host",:port => 9300 } == "host:9300"
17
+ # Case 2: :port =~ /^\d+_\d+$/
18
+ insist { subject.send :hosts, :host => "host",:port => "9300-9302"} == "host:9300,host:9301,host:9302"
19
+ # Case 3: :host =~ /^.+:.+$/
20
+ insist { subject.send :hosts, :host => "host:9303",:port => 9300 } == "host:9303"
21
+ # Case 4: :host =~ /^.+:.+$/ and :port =~ /^\d+_\d+$/
22
+ insist { subject.send :hosts, :host => "host:9303",:port => "9300-9302"} == "host:9303"
23
+
24
+ # Node client should support host in array
25
+ # Case 5: :host in array with single item
26
+ insist { subject.send :hosts, :host => ["host"],:port => 9300 } == ("host:9300")
27
+ # Case 6: :host in array with more than one items
28
+ insist { subject.send :hosts, :host => ["host1","host2"],:port => 9300 } == "host1:9300,host2:9300"
29
+ # Case 7: :host in array with more than one items and :port =~ /^\d+_\d+$/
30
+ insist { subject.send :hosts, :host => ["host1","host2"],:port => "9300-9302" } == "host1:9300,host1:9301,host1:9302,host2:9300,host2:9301,host2:9302"
31
+ # Case 8: :host in array with more than one items and some :host =~ /^.+:.+$/
32
+ insist { subject.send :hosts, :host => ["host1","host2:9303"],:port => 9300 } == "host1:9300,host2:9303"
33
+ # Case 9: :host in array with more than one items, :port =~ /^\d+_\d+$/ and some :host =~ /^.+:.+$/
34
+ insist { subject.send :hosts, :host => ["host1","host2:9303"],:port => "9300-9302" } == "host1:9300,host1:9301,host1:9302,host2:9303"
35
+ end
36
+ end
@@ -0,0 +1,90 @@
1
+ require_relative "../../../spec/es_spec_helper"
2
+
3
+ shared_examples "an indexer" do
4
+ let(:index) { 10.times.collect { rand(10).to_s }.join("") }
5
+ let(:type) { 10.times.collect { rand(10).to_s }.join("") }
6
+ let(:event_count) { 10000 + rand(500) }
7
+ let(:flush_size) { rand(200) + 1 }
8
+ let(:config) { "not implemented" }
9
+
10
+ it "ships events" do
11
+ insist { config } != "not implemented"
12
+
13
+ pipeline = LogStash::Pipeline.new(config)
14
+ pipeline.run
15
+
16
+ index_url = "http://#{get_host}:#{get_port('http')}/#{index}"
17
+
18
+ ftw = FTW::Agent.new
19
+ ftw.post!("#{index_url}/_refresh")
20
+
21
+ # Wait until all events are available.
22
+ Stud::try(10.times) do
23
+ data = ""
24
+ response = ftw.get!("#{index_url}/_count?q=*")
25
+ response.read_body { |chunk| data << chunk }
26
+ result = LogStash::Json.load(data)
27
+ cur_count = result["count"]
28
+ insist { cur_count } == event_count
29
+ end
30
+
31
+ response = ftw.get!("#{index_url}/_search?q=*&size=1000")
32
+ data = ""
33
+ response.read_body { |chunk| data << chunk }
34
+ result = LogStash::Json.load(data)
35
+ result["hits"]["hits"].each do |doc|
36
+ insist { doc["_type"] } == type
37
+ insist { doc["_index"] } == index
38
+ end
39
+ end
40
+ end
41
+
42
+ describe "an indexer with custom index_type", :integration => true do
43
+ it_behaves_like "an indexer" do
44
+ let(:config) {
45
+ <<-CONFIG
46
+ input {
47
+ generator {
48
+ message => "hello world"
49
+ count => #{event_count}
50
+ type => "#{type}"
51
+ }
52
+ }
53
+ output {
54
+ elasticsearch {
55
+ host => "#{get_host()}"
56
+ port => "#{get_port('http')}"
57
+ protocol => "http"
58
+ index => "#{index}"
59
+ flush_size => #{flush_size}
60
+ }
61
+ }
62
+ CONFIG
63
+ }
64
+ end
65
+ end
66
+
67
+ describe "an indexer with no type value set (default to logs)", :integration => true do
68
+ it_behaves_like "an indexer" do
69
+ let(:type) { "logs" }
70
+ let(:config) {
71
+ <<-CONFIG
72
+ input {
73
+ generator {
74
+ message => "hello world"
75
+ count => #{event_count}
76
+ }
77
+ }
78
+ output {
79
+ elasticsearch {
80
+ host => "#{get_host()}"
81
+ port => "#{get_port('http')}"
82
+ protocol => "http"
83
+ index => "#{index}"
84
+ flush_size => #{flush_size}
85
+ }
86
+ }
87
+ CONFIG
88
+ }
89
+ end
90
+ end
@@ -0,0 +1,156 @@
1
+ require "logstash/outputs/elasticsearch"
2
+ require_relative "../../../spec/es_spec_helper"
3
+
4
+ describe "failures in bulk class expected behavior", :integration => true do
5
+ let(:template) { '{"template" : "not important, will be updated by :index"}' }
6
+ let(:event1) { LogStash::Event.new("somevalue" => 100, "@timestamp" => "2014-11-17T20:37:17.223Z", "@metadata" => {"retry_count" => 0}) }
7
+ let(:action1) { ["index", {:_id=>nil, :_routing=>nil, :_index=>"logstash-2014.11.17", :_type=>"logs"}, event1] }
8
+ let(:event2) { LogStash::Event.new("geoip" => { "location" => [ 0.0, 0.0] }, "@timestamp" => "2014-11-17T20:37:17.223Z", "@metadata" => {"retry_count" => 0}) }
9
+ let(:action2) { ["index", {:_id=>nil, :_routing=>nil, :_index=>"logstash-2014.11.17", :_type=>"logs"}, event2] }
10
+ let(:invalid_event) { LogStash::Event.new("geoip" => { "location" => "notlatlon" }, "@timestamp" => "2014-11-17T20:37:17.223Z") }
11
+ let(:max_retries) { 3 }
12
+
13
+ def mock_actions_with_response(*resp)
14
+ LogStash::Outputs::Elasticsearch::Protocols::HTTPClient
15
+ .any_instance.stub(:bulk).and_return(*resp)
16
+ LogStash::Outputs::Elasticsearch::Protocols::NodeClient
17
+ .any_instance.stub(:bulk).and_return(*resp)
18
+ end
19
+
20
+ ["transport", "http"].each do |protocol|
21
+ context "with protocol => #{protocol}" do
22
+ subject! do
23
+ settings = {
24
+ "manage_template" => true,
25
+ "index" => "logstash-2014.11.17",
26
+ "template_overwrite" => true,
27
+ "protocol" => protocol,
28
+ "host" => get_host(),
29
+ "port" => get_port(protocol),
30
+ "retry_max_items" => 10,
31
+ "retry_max_interval" => 1,
32
+ "max_retries" => max_retries
33
+ }
34
+ next LogStash::Outputs::ElasticSearch.new(settings)
35
+ end
36
+
37
+ before :each do
38
+ # Delete all templates first.
39
+ require "elasticsearch"
40
+
41
+ # Clean ES of data before we start.
42
+ @es = get_client
43
+ @es.indices.delete_template(:name => "*")
44
+ @es.indices.delete(:index => "*")
45
+ @es.indices.refresh
46
+ end
47
+
48
+ it "should return no errors if all bulk actions are successful" do
49
+ mock_actions_with_response({"errors" => false})
50
+ expect(subject).to receive(:submit).with([action1, action2]).once.and_call_original
51
+ subject.register
52
+ subject.receive(event1)
53
+ subject.receive(event2)
54
+ subject.buffer_flush(:final => true)
55
+ sleep(2)
56
+ end
57
+
58
+ it "should raise exception and be retried by stud::buffer" do
59
+ call_count = 0
60
+ expect(subject).to receive(:submit).with([action1]).exactly(3).times do
61
+ if (call_count += 1) <= 2
62
+ raise "error first two times"
63
+ else
64
+ {"errors" => false}
65
+ end
66
+ end
67
+ subject.register
68
+ subject.receive(event1)
69
+ subject.teardown
70
+ end
71
+
72
+ it "should retry actions with response status of 503" do
73
+ mock_actions_with_response({"errors" => true, "statuses" => [200, 200, 503, 503]},
74
+ {"errors" => true, "statuses" => [200, 503]},
75
+ {"errors" => false})
76
+ expect(subject).to receive(:submit).with([action1, action1, action1, action2]).ordered.once.and_call_original
77
+ expect(subject).to receive(:submit).with([action1, action2]).ordered.once.and_call_original
78
+ expect(subject).to receive(:submit).with([action2]).ordered.once.and_call_original
79
+
80
+ subject.register
81
+ subject.receive(event1)
82
+ subject.receive(event1)
83
+ subject.receive(event1)
84
+ subject.receive(event2)
85
+ subject.buffer_flush(:final => true)
86
+ sleep(3)
87
+ end
88
+
89
+ it "should retry actions with response status of 429" do
90
+ mock_actions_with_response({"errors" => true, "statuses" => [429]},
91
+ {"errors" => false})
92
+ expect(subject).to receive(:submit).with([action1]).twice.and_call_original
93
+ subject.register
94
+ subject.receive(event1)
95
+ subject.buffer_flush(:final => true)
96
+ sleep(3)
97
+ end
98
+
99
+ it "should retry an event until max_retries reached" do
100
+ mock_actions_with_response({"errors" => true, "statuses" => [429]},
101
+ {"errors" => true, "statuses" => [429]},
102
+ {"errors" => true, "statuses" => [429]},
103
+ {"errors" => true, "statuses" => [429]},
104
+ {"errors" => true, "statuses" => [429]},
105
+ {"errors" => true, "statuses" => [429]})
106
+ expect(subject).to receive(:submit).with([action1]).exactly(max_retries).times.and_call_original
107
+ subject.register
108
+ subject.receive(event1)
109
+ subject.buffer_flush(:final => true)
110
+ sleep(3)
111
+ end
112
+
113
+ it "non-retryable errors like mapping errors (400) should be dropped and not be retried (unfortunetly)" do
114
+ subject.register
115
+ subject.receive(invalid_event)
116
+ expect(subject).not_to receive(:retry_push)
117
+ subject.teardown
118
+
119
+ @es.indices.refresh
120
+ sleep(5)
121
+ Stud::try(10.times) do
122
+ r = @es.search
123
+ insist { r["hits"]["total"] } == 0
124
+ end
125
+ end
126
+
127
+ it "successful requests should not be appended to retry queue" do
128
+ subject.register
129
+ subject.receive(event1)
130
+ expect(subject).not_to receive(:retry_push)
131
+ subject.teardown
132
+
133
+ @es.indices.refresh
134
+ sleep(5)
135
+ Stud::try(10.times) do
136
+ r = @es.search
137
+ insist { r["hits"]["total"] } == 1
138
+ end
139
+ end
140
+
141
+ it "should only index proper events" do
142
+ subject.register
143
+ subject.receive(invalid_event)
144
+ subject.receive(event1)
145
+ subject.teardown
146
+
147
+ @es.indices.refresh
148
+ sleep(5)
149
+ Stud::try(10.times) do
150
+ r = @es.search
151
+ insist { r["hits"]["total"] } == 1
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end