RubyGems - logstash-output-elasticsearch - Versions diffs - 0.2.8-java → 0.2.9-java - Mend

logstash-output-elasticsearch 0.2.8-java → 0.2.9-java

Files changed (22) hide show

checksums.yaml +4 -4
data/.gitignore +2 -0
data/CHANGELOG.md +3 -0
data/Gemfile +1 -0
data/NOTICE.TXT +5 -0
data/README.md +14 -2
data/lib/logstash/outputs/elasticsearch.rb +42 -2
data/lib/logstash/outputs/elasticsearch/protocol.rb +1 -1
data/logstash-output-elasticsearch.gemspec +2 -1
data/spec/es_spec_helper.rb +65 -0
data/spec/integration/outputs/elasticsearch/node_spec.rb +36 -0
data/spec/integration/outputs/index_spec.rb +90 -0
data/spec/integration/outputs/retry_spec.rb +156 -0
data/spec/integration/outputs/routing_spec.rb +114 -0
data/spec/integration/outputs/secure_spec.rb +113 -0
data/spec/integration/outputs/templates_spec.rb +97 -0
data/spec/integration/outputs/transport_create_spec.rb +94 -0
data/spec/{outputs → unit/outputs}/elasticsearch/protocol_spec.rb +0 -1
data/spec/unit/outputs/elasticsearch_spec.rb +157 -0
data/spec/unit/outputs/elasticsearch_ssl_spec.rb +51 -0
metadata +39 -6
data/spec/outputs/elasticsearch_spec.rb +0 -1059

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: d77d5d461770a2ff1f5305fc51bfd2d26ee30765
-  data.tar.gz: 5f7fc066a058c2858684080f7ff428019e44541d
+  metadata.gz: 721d83f58bd48e9c017b488d2ed84032ab5583c1
+  data.tar.gz: 8535821c0248a9417347ec0bf4fd16264ae39454
 SHA512:
-  metadata.gz: bb51c75b6dabfe4dd33f96322ebe920cb64bfb580aaed2215ecc8bb36ae0fb3f58533ef501dbe93b9c56450b7a54333c56ffd72f31b8206574656b5cc5654b6a
-  data.tar.gz: aaf694ad4c3113b7d25a2320d05c66bf4906e5714901f6ba4f6cb2343f302561375128a0002d5eda6c31bceb0b5ea0d984cc1a0c6bd46c43fd623d38e57e4ff1
+  metadata.gz: 6d5ebb15b21546220ea868bc12ba2db88de9566830b96812faa43bb675b50a0aa54fcd1a6cf0d431936ece9c1bb0b6d9ede309c64c9d390c17066d6e88062f0c
+  data.tar.gz: 2db6134af16a7bc92de8ca4198e980546b30ef5e613e5f4d9164b6309a694c9ad85e552900d8f5d90f0a361a8cf58ee32f9ddc46ae840e5dfc6f06d1efb48ce9

data/.gitignore CHANGED Viewed

@@ -1,3 +1,5 @@
 *.gem
 Gemfile.lock
 .bundle
+.idea
+*~

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,6 @@
+## 0.2.9
+ - Add 'path' parameter for ES HTTP hosts behind a proxy on a subpath
 ## 0.2.8 (June 12, 2015)
  - Add option to enable and disable SSL certificate verification during handshake (#160)
  - Doc improvements for clarifying round robin behavior using hosts config

data/Gemfile CHANGED Viewed

@@ -1,2 +1,3 @@
 source 'https://rubygems.org'
 gemspec

data/NOTICE.TXT ADDED Viewed

@@ -0,0 +1,5 @@
+Elasticsearch
+Copyright 2012-2015 Elasticsearch
+This product includes software developed by The Apache Software
+Foundation (http://www.apache.org/).

data/README.md CHANGED Viewed

@@ -37,12 +37,24 @@ bundle install
 bundle install
 ```
-- Run tests
+- Run unit tests
 ```sh
 bundle exec rspec
 ```
+- Run integration tests
+Dependencies: [Docker](http://docker.com)
+Before the test suite is run, we will load and run an
+Elasticsearch instance within a docker container. This container
+will be cleaned up when suite has finished.
+```sh
+bundle exec rspec --tag integration
+```
 ### 2. Running your unpublished Plugin in Logstash
 #### 2.1 Run in a local Logstash clone
@@ -83,4 +95,4 @@ Programming is not a required skill. Whatever you've seen about open source and
 It is more important to the community that you are able to contribute.
-For more information about contributing, see the [CONTRIBUTING](https://github.com/elasticsearch/logstash/blob/master/CONTRIBUTING.md) file.
+For more information about contributing, see the [CONTRIBUTING](https://github.com/elasticsearch/logstash/blob/master/CONTRIBUTING.md) file.

data/lib/logstash/outputs/elasticsearch.rb CHANGED Viewed

@@ -33,7 +33,37 @@ require 'logstash-output-elasticsearch_jars.rb'
 # If using the default `protocol` setting ("node"), your firewalls might need
 # to permit port 9300 in *both* directions (from Logstash to Elasticsearch, and
 # Elasticsearch to Logstash)
+#
+# ## Retry Policy
+#
+# By default all bulk requests to ES are synchronous. Not all events in the bulk requests
+# always make it successfully. For example, there could be events which are not formatted
+# correctly for the index they are targeting (type mismatch in mapping). So that we minimize loss of
+# events, we have a specific retry policy in place. We retry all events which fail to be reached by
+# Elasticsearch for network related issues. We retry specific events which exhibit errors under a separate
+# policy described below. Events of this nature are ones which experience ES error codes described as
+# retryable errors.
+#
+# Retryable Errors:
+#
+# - 429, Too Many Requests (RFC6585)
+# - 503, The server is currently unable to handle the request due to a temporary overloading or maintenance of the server.
+#
+# Here are the rules of what is retried when:
+#
+# - Block and retry all events in bulk response that experiences transient network exceptions until
+#   a successful submission is received by Elasticsearch.
+# - Retry subset of sent events which resulted in ES errors of a retryable nature which can be found
+#   in RETRYABLE_CODES
+# - For events which returned retryable error codes, they will be pushed onto a separate queue for
+#   retrying events. events in this queue will be retried a maximum of 5 times by default (configurable through :max_retries). The size of
+#   this queue is capped by the value set in :retry_max_items.
+# - Events from the retry queue are submitted again either when the queue reaches its max size or when
+#   the max interval time is reached, which is set in :retry_max_interval.
+# - Events which are not retryable or have reached their max retry count are logged to stderr.
 class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
+  attr_reader :client
   include Stud::Buffer
   RETRYABLE_CODES = [429, 503]
   SUCCESS_CODES = [200, 201]
@@ -235,6 +265,10 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
   config :user, :validate => :string
   config :password, :validate => :password
+  # HTTP Path at which the Elasticsearch server lives. Use this if you must run ES behind a proxy that remaps
+  # the root path for the Elasticsearch HTTP API lives. This option is ignored for non-HTTP transports.
+  config :path, :validate => :string, :default => "/"
   # SSL Configurations (only valid when protocol is HTTP)
   #
   # Enable SSL
@@ -286,8 +320,13 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
       @protocol = LogStash::Environment.jruby? ? "node" : "http"
     end
-    if @protocol == "http" && @action == "create_unless_exists"
-      raise(LogStash::ConfigurationError, "action => 'create_unless_exists' is not supported under the HTTP protocol");
+    if @protocol == "http"
+      if @action == "create_unless_exists"
+        raise(LogStash::ConfigurationError, "action => 'create_unless_exists' is not supported under the HTTP protocol");
+      end
+      client_settings[:path] = "/#{@path}/".gsub(/\/+/, "/") # Normalize slashes
+      @logger.debug? && @logger.debug("Normalizing http path", :path => @path, :normalized => client_settings[:path])
     end
     if ["node", "transport"].include?(@protocol)
@@ -524,6 +563,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
     buffer_flush(:final => true)
     retry_flush
   end
   protected
   def start_local_elasticsearch
     @logger.info("Starting embedded Elasticsearch local node.")

data/lib/logstash/outputs/elasticsearch/protocol.rb CHANGED Viewed

@@ -61,7 +61,7 @@ module LogStash::Outputs::Elasticsearch
       end
       def build_client(options)
-        uri = "#{options[:protocol]}://#{options[:host]}:#{options[:port]}"
+        uri = "#{options[:protocol]}://#{options[:host]}:#{options[:port]}#{options[:client_settings][:path]}"
         client_options = {
           :host => [uri],

data/logstash-output-elasticsearch.gemspec CHANGED Viewed

@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
   s.name            = 'logstash-output-elasticsearch'
-  s.version         = '0.2.8'
+  s.version         = '0.2.9'
   s.licenses        = ['apache-2.0']
   s.summary         = "Logstash Output to Elasticsearch"
   s.description     = "Output events to elasticsearch"
@@ -36,4 +36,5 @@ Gem::Specification.new do |s|
   end
   s.add_development_dependency 'logstash-devutils'
+  s.add_development_dependency 'longshoreman'
 end

data/spec/es_spec_helper.rb ADDED Viewed

@@ -0,0 +1,65 @@
+require "logstash/devutils/rspec/spec_helper"
+require "ftw"
+require "logstash/plugin"
+require "logstash/json"
+require "stud/try"
+require "longshoreman"
+CONTAINER_NAME = "logstash-output-elasticsearch-#{rand(999).to_s}"
+CONTAINER_IMAGE = "elasticsearch"
+CONTAINER_TAG = "1.6"
+module ESHelper
+  def get_host
+    Longshoreman.new.get_host_ip
+  end
+  def get_port(protocol)
+    container = Longshoreman::Container.new
+    container.get(CONTAINER_NAME)
+    case protocol
+    when "http"
+      container.rport(9200)
+    when "transport", "node"
+      container.rport(9300)
+    end
+  end
+  def get_client
+    Elasticsearch::Client.new(:host => "#{get_host}:#{get_port('http')}")
+  end
+end
+RSpec.configure do |config|
+  config.include ESHelper
+  # this :all hook gets run before every describe block that is tagged with :integration => true.
+  config.before(:all, :integration => true) do
+    # check if container exists already before creating new one.
+    begin
+      ls = Longshoreman::new
+      ls.container.get(CONTAINER_NAME)
+    rescue Docker::Error::NotFoundError
+      Longshoreman.new("#{CONTAINER_IMAGE}:#{CONTAINER_TAG}", CONTAINER_NAME)
+      # TODO(talevy): verify ES is running instead of static timeout
+      sleep 10
+    end
+  end
+  # we want to do a final cleanup after all :integration runs,
+  # but we don't want to clean up before the last block.
+  # This is a final blind check to see if the ES docker container is running and
+  # needs to be cleaned up. If no container can be found and/or docker is not
+  # running on the system, we do nothing.
+  config.after(:suite) do
+    # only cleanup docker container if system has docker and the container is running
+    begin
+      ls = Longshoreman::new
+      ls.container.get(CONTAINER_NAME)
+      ls.cleanup
+    rescue Docker::Error::NotFoundError, Excon::Errors::SocketError
+      # do nothing
+    end
+  end
+end

data/spec/integration/outputs/elasticsearch/node_spec.rb ADDED Viewed

@@ -0,0 +1,36 @@
+require_relative "../../../../spec/es_spec_helper"
+require "logstash/outputs/elasticsearch/protocol"
+describe "elasticsearch node client", :integration => true do
+  # Test ElasticSearch Node Client
+  # Reference: http://www.elasticsearch.org/guide/reference/modules/discovery/zen/
+  subject { LogStash::Outputs::Elasticsearch::Protocols::NodeClient.new(:host => get_host()) }
+  it "should support hosts in both string and array" do
+    # Because we defined *hosts* method in NodeClient as private,
+    # we use *obj.send :method,[args...]* to call method *hosts*
+    # Node client should support host in string
+    # Case 1: default :host in string
+    insist { subject.send :hosts, :host => "host",:port => 9300 } == "host:9300"
+    # Case 2: :port =~ /^\d+_\d+$/
+    insist { subject.send :hosts, :host => "host",:port => "9300-9302"} == "host:9300,host:9301,host:9302"
+    # Case 3: :host =~ /^.+:.+$/
+    insist { subject.send :hosts, :host => "host:9303",:port => 9300 } == "host:9303"
+    # Case 4:  :host =~ /^.+:.+$/ and :port =~ /^\d+_\d+$/
+    insist { subject.send :hosts, :host => "host:9303",:port => "9300-9302"} == "host:9303"
+    # Node client should support host in array
+    # Case 5: :host in array with single item
+    insist { subject.send :hosts, :host => ["host"],:port => 9300 } == ("host:9300")
+    # Case 6: :host in array with more than one items
+    insist { subject.send :hosts, :host => ["host1","host2"],:port => 9300 } == "host1:9300,host2:9300"
+    # Case 7: :host in array with more than one items and :port =~ /^\d+_\d+$/
+    insist { subject.send :hosts, :host => ["host1","host2"],:port => "9300-9302" } == "host1:9300,host1:9301,host1:9302,host2:9300,host2:9301,host2:9302"
+    # Case 8: :host in array with more than one items and some :host =~ /^.+:.+$/
+    insist { subject.send :hosts, :host => ["host1","host2:9303"],:port => 9300 } == "host1:9300,host2:9303"
+    # Case 9: :host in array with more than one items, :port =~ /^\d+_\d+$/ and some :host =~ /^.+:.+$/
+    insist { subject.send :hosts, :host => ["host1","host2:9303"],:port => "9300-9302" } == "host1:9300,host1:9301,host1:9302,host2:9303"
+  end
+end

data/spec/integration/outputs/index_spec.rb ADDED Viewed

@@ -0,0 +1,90 @@
+require_relative "../../../spec/es_spec_helper"
+shared_examples "an indexer" do
+    let(:index) { 10.times.collect { rand(10).to_s }.join("") }
+    let(:type) { 10.times.collect { rand(10).to_s }.join("") }
+    let(:event_count) { 10000 + rand(500) }
+    let(:flush_size) { rand(200) + 1 }
+    let(:config) { "not implemented" }
+    it "ships events" do
+      insist { config } != "not implemented"
+      pipeline = LogStash::Pipeline.new(config)
+      pipeline.run
+      index_url = "http://#{get_host}:#{get_port('http')}/#{index}"
+      ftw = FTW::Agent.new
+      ftw.post!("#{index_url}/_refresh")
+      # Wait until all events are available.
+      Stud::try(10.times) do
+        data = ""
+        response = ftw.get!("#{index_url}/_count?q=*")
+        response.read_body { |chunk| data << chunk }
+        result = LogStash::Json.load(data)
+        cur_count = result["count"]
+        insist { cur_count } == event_count
+      end
+      response = ftw.get!("#{index_url}/_search?q=*&size=1000")
+      data = ""
+      response.read_body { |chunk| data << chunk }
+      result = LogStash::Json.load(data)
+      result["hits"]["hits"].each do |doc|
+        insist { doc["_type"] } == type
+        insist { doc["_index"] } == index
+      end
+    end
+end
+describe "an indexer with custom index_type", :integration => true do
+  it_behaves_like "an indexer" do
+    let(:config) {
+      <<-CONFIG
+      input {
+        generator {
+          message => "hello world"
+          count => #{event_count}
+          type => "#{type}"
+        }
+      }
+      output {
+        elasticsearch {
+          host => "#{get_host()}"
+          port => "#{get_port('http')}"
+          protocol => "http"
+          index => "#{index}"
+          flush_size => #{flush_size}
+        }
+      }
+      CONFIG
+    }
+  end
+end
+describe "an indexer with no type value set (default to logs)", :integration => true do
+  it_behaves_like "an indexer" do
+    let(:type) { "logs" }
+    let(:config) {
+      <<-CONFIG
+      input {
+        generator {
+          message => "hello world"
+          count => #{event_count}
+        }
+      }
+      output {
+        elasticsearch {
+          host => "#{get_host()}"
+          port => "#{get_port('http')}"
+          protocol => "http"
+          index => "#{index}"
+          flush_size => #{flush_size}
+        }
+      }
+      CONFIG
+    }
+  end
+end

data/spec/integration/outputs/retry_spec.rb ADDED Viewed

@@ -0,0 +1,156 @@
+require "logstash/outputs/elasticsearch"
+require_relative "../../../spec/es_spec_helper"
+describe "failures in bulk class expected behavior", :integration => true do
+  let(:template) { '{"template" : "not important, will be updated by :index"}' }
+  let(:event1) { LogStash::Event.new("somevalue" => 100, "@timestamp" => "2014-11-17T20:37:17.223Z", "@metadata" => {"retry_count" => 0}) }
+  let(:action1) { ["index", {:_id=>nil, :_routing=>nil, :_index=>"logstash-2014.11.17", :_type=>"logs"}, event1] }
+  let(:event2) { LogStash::Event.new("geoip" => { "location" => [ 0.0, 0.0] }, "@timestamp" => "2014-11-17T20:37:17.223Z", "@metadata" => {"retry_count" => 0}) }
+  let(:action2) { ["index", {:_id=>nil, :_routing=>nil, :_index=>"logstash-2014.11.17", :_type=>"logs"}, event2] }
+  let(:invalid_event) { LogStash::Event.new("geoip" => { "location" => "notlatlon" }, "@timestamp" => "2014-11-17T20:37:17.223Z") }
+  let(:max_retries) { 3 }
+  def mock_actions_with_response(*resp)
+    LogStash::Outputs::Elasticsearch::Protocols::HTTPClient
+      .any_instance.stub(:bulk).and_return(*resp)
+    LogStash::Outputs::Elasticsearch::Protocols::NodeClient
+      .any_instance.stub(:bulk).and_return(*resp)
+  end
+  ["transport", "http"].each do |protocol|
+    context "with protocol => #{protocol}" do
+      subject! do
+        settings = {
+          "manage_template" => true,
+          "index" => "logstash-2014.11.17",
+          "template_overwrite" => true,
+          "protocol" => protocol,
+          "host" => get_host(),
+          "port" => get_port(protocol),
+          "retry_max_items" => 10,
+          "retry_max_interval" => 1,
+          "max_retries" => max_retries
+        }
+        next LogStash::Outputs::ElasticSearch.new(settings)
+      end
+      before :each do
+        # Delete all templates first.
+        require "elasticsearch"
+        # Clean ES of data before we start.
+        @es = get_client
+        @es.indices.delete_template(:name => "*")
+        @es.indices.delete(:index => "*")
+        @es.indices.refresh
+      end
+      it "should return no errors if all bulk actions are successful" do
+        mock_actions_with_response({"errors" => false})
+        expect(subject).to receive(:submit).with([action1, action2]).once.and_call_original
+        subject.register
+        subject.receive(event1)
+        subject.receive(event2)
+        subject.buffer_flush(:final => true)
+        sleep(2)
+      end
+      it "should raise exception and be retried by stud::buffer" do
+        call_count = 0
+        expect(subject).to receive(:submit).with([action1]).exactly(3).times do
+          if (call_count += 1) <= 2
+            raise "error first two times"
+          else
+            {"errors" => false}
+          end
+        end
+        subject.register
+        subject.receive(event1)
+        subject.teardown
+      end
+      it "should retry actions with response status of 503" do
+        mock_actions_with_response({"errors" => true, "statuses" => [200, 200, 503, 503]},
+                                   {"errors" => true, "statuses" => [200, 503]},
+                                   {"errors" => false})
+        expect(subject).to receive(:submit).with([action1, action1, action1, action2]).ordered.once.and_call_original
+        expect(subject).to receive(:submit).with([action1, action2]).ordered.once.and_call_original
+        expect(subject).to receive(:submit).with([action2]).ordered.once.and_call_original
+        subject.register
+        subject.receive(event1)
+        subject.receive(event1)
+        subject.receive(event1)
+        subject.receive(event2)
+        subject.buffer_flush(:final => true)
+        sleep(3)
+      end
+      it "should retry actions with response status of 429" do
+        mock_actions_with_response({"errors" => true, "statuses" => [429]},
+                                   {"errors" => false})
+        expect(subject).to receive(:submit).with([action1]).twice.and_call_original
+        subject.register
+        subject.receive(event1)
+        subject.buffer_flush(:final => true)
+        sleep(3)
+      end
+      it "should retry an event until max_retries reached" do
+        mock_actions_with_response({"errors" => true, "statuses" => [429]},
+                                   {"errors" => true, "statuses" => [429]},
+                                   {"errors" => true, "statuses" => [429]},
+                                   {"errors" => true, "statuses" => [429]},
+                                   {"errors" => true, "statuses" => [429]},
+                                   {"errors" => true, "statuses" => [429]})
+        expect(subject).to receive(:submit).with([action1]).exactly(max_retries).times.and_call_original
+        subject.register
+        subject.receive(event1)
+        subject.buffer_flush(:final => true)
+        sleep(3)
+      end
+      it "non-retryable errors like mapping errors (400) should be dropped and not be retried (unfortunetly)" do
+        subject.register
+        subject.receive(invalid_event)
+        expect(subject).not_to receive(:retry_push)
+        subject.teardown
+        @es.indices.refresh
+        sleep(5)
+        Stud::try(10.times) do
+          r = @es.search
+          insist { r["hits"]["total"] } == 0
+        end
+      end
+      it "successful requests should not be appended to retry queue" do
+        subject.register
+        subject.receive(event1)
+        expect(subject).not_to receive(:retry_push)
+        subject.teardown
+        @es.indices.refresh
+        sleep(5)
+        Stud::try(10.times) do
+          r = @es.search
+          insist { r["hits"]["total"] } == 1
+        end
+      end
+      it "should only index proper events" do
+        subject.register
+        subject.receive(invalid_event)
+        subject.receive(event1)
+        subject.teardown
+        @es.indices.refresh
+        sleep(5)
+        Stud::try(10.times) do
+          r = @es.search
+          insist { r["hits"]["total"] } == 1
+        end
+      end
+    end
+  end
+end