logstash-output-elasticsearch 0.1.6 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +5 -13
  2. data/CHANGELOG.md +117 -0
  3. data/CONTRIBUTORS +32 -0
  4. data/Gemfile +4 -4
  5. data/LICENSE +1 -1
  6. data/NOTICE.TXT +5 -0
  7. data/README.md +110 -0
  8. data/lib/logstash/outputs/elasticsearch.rb +97 -425
  9. data/lib/logstash/outputs/elasticsearch/buffer.rb +124 -0
  10. data/lib/logstash/outputs/elasticsearch/common.rb +205 -0
  11. data/lib/logstash/outputs/elasticsearch/common_configs.rb +164 -0
  12. data/lib/logstash/outputs/elasticsearch/elasticsearch-template.json +36 -24
  13. data/lib/logstash/outputs/elasticsearch/http_client.rb +236 -0
  14. data/lib/logstash/outputs/elasticsearch/http_client_builder.rb +106 -0
  15. data/lib/logstash/outputs/elasticsearch/template_manager.rb +35 -0
  16. data/logstash-output-elasticsearch.gemspec +17 -15
  17. data/spec/es_spec_helper.rb +77 -0
  18. data/spec/fixtures/scripts/scripted_update.groovy +2 -0
  19. data/spec/fixtures/scripts/scripted_update_nested.groovy +2 -0
  20. data/spec/fixtures/scripts/scripted_upsert.groovy +2 -0
  21. data/spec/integration/outputs/create_spec.rb +55 -0
  22. data/spec/integration/outputs/index_spec.rb +68 -0
  23. data/spec/integration/outputs/parent_spec.rb +73 -0
  24. data/spec/integration/outputs/pipeline_spec.rb +75 -0
  25. data/spec/integration/outputs/retry_spec.rb +163 -0
  26. data/spec/integration/outputs/routing_spec.rb +65 -0
  27. data/spec/integration/outputs/secure_spec.rb +108 -0
  28. data/spec/integration/outputs/templates_spec.rb +90 -0
  29. data/spec/integration/outputs/update_spec.rb +188 -0
  30. data/spec/unit/buffer_spec.rb +118 -0
  31. data/spec/unit/http_client_builder_spec.rb +27 -0
  32. data/spec/unit/outputs/elasticsearch/http_client_spec.rb +133 -0
  33. data/spec/unit/outputs/elasticsearch_proxy_spec.rb +58 -0
  34. data/spec/unit/outputs/elasticsearch_spec.rb +227 -0
  35. data/spec/unit/outputs/elasticsearch_ssl_spec.rb +55 -0
  36. metadata +137 -51
  37. data/.gitignore +0 -4
  38. data/Rakefile +0 -6
  39. data/lib/logstash/outputs/elasticsearch/protocol.rb +0 -253
  40. data/rakelib/publish.rake +0 -9
  41. data/rakelib/vendor.rake +0 -169
  42. data/spec/outputs/elasticsearch.rb +0 -518
@@ -0,0 +1,77 @@
1
+ require "logstash/devutils/rspec/spec_helper"
2
+ require "ftw"
3
+ require "logstash/plugin"
4
+ require "logstash/json"
5
+ require "stud/try"
6
+ require "longshoreman"
7
+ require "logstash/outputs/elasticsearch"
8
+
9
+ CONTAINER_NAME = "logstash-output-elasticsearch-#{rand(999).to_s}"
10
+ CONTAINER_IMAGE = "elasticsearch"
11
+ CONTAINER_TAG = "2.0"
12
+
13
+ DOCKER_INTEGRATION = ENV["DOCKER_INTEGRATION"]
14
+
15
+ module ESHelper
16
+ def get_host_port
17
+ addr = DOCKER_INTEGRATION ? Longshoreman.new.get_host_ip : "127.0.0.1"
18
+ "#{addr}:#{get_port}"
19
+ end
20
+
21
+ def get_port
22
+ return 9200 unless DOCKER_INTEGRATION
23
+
24
+ container = Longshoreman::Container.new
25
+ container.get(CONTAINER_NAME)
26
+ container.rport(9200)
27
+ end
28
+
29
+ def get_client
30
+ Elasticsearch::Client.new(:hosts => [get_host_port])
31
+ end
32
+ end
33
+
34
+
35
+ RSpec.configure do |config|
36
+ config.include ESHelper
37
+
38
+ if DOCKER_INTEGRATION
39
+ # this :all hook gets run before every describe block that is tagged with :integration => true.
40
+ config.before(:all, :integration => true) do
41
+
42
+
43
+ # check if container exists already before creating new one.
44
+ begin
45
+ ls = Longshoreman::new
46
+ ls.container.get(CONTAINER_NAME)
47
+ rescue Docker::Error::NotFoundError
48
+ scriptDir = File.expand_path File.dirname(__FILE__) + '/fixtures/scripts'
49
+ Longshoreman.new("#{CONTAINER_IMAGE}:#{CONTAINER_TAG}", CONTAINER_NAME, {
50
+ 'Cmd' => [ "-Des.script.inline=on", "-Des.script.indexed=on" ],
51
+ 'HostConfig' => {
52
+ 'Binds' => ["#{scriptDir}:/usr/share/elasticsearch/config/scripts"],
53
+ 'PublishAllPorts' => true
54
+ }
55
+ })
56
+ # TODO(talevy): verify ES is running instead of static timeout
57
+ sleep 10
58
+ end
59
+ end
60
+
61
+ # we want to do a final cleanup after all :integration runs,
62
+ # but we don't want to clean up before the last block.
63
+ # This is a final blind check to see if the ES docker container is running and
64
+ # needs to be cleaned up. If no container can be found and/or docker is not
65
+ # running on the system, we do nothing.
66
+ config.after(:suite) do
67
+ # only cleanup docker container if system has docker and the container is running
68
+ begin
69
+ ls = Longshoreman::new
70
+ ls.container.get(CONTAINER_NAME)
71
+ ls.cleanup
72
+ rescue Docker::Error::NotFoundError, Excon::Errors::SocketError
73
+ # do nothing
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,2 @@
1
+ ctx._source.counter += event["count"]
2
+
@@ -0,0 +1,2 @@
1
+ ctx._source.counter += event["data"]["count"]
2
+
@@ -0,0 +1,2 @@
1
+ ctx._source.counter = event["counter"]
2
+
@@ -0,0 +1,55 @@
1
+ require_relative "../../../spec/es_spec_helper"
2
+
3
+ describe "client create actions", :integration => true do
4
+ require "logstash/outputs/elasticsearch"
5
+ require "elasticsearch"
6
+
7
+ def get_es_output(action, id = nil)
8
+ settings = {
9
+ "manage_template" => true,
10
+ "index" => "logstash-create",
11
+ "template_overwrite" => true,
12
+ "hosts" => get_host_port(),
13
+ "action" => action
14
+ }
15
+ settings['document_id'] = id unless id.nil?
16
+ LogStash::Outputs::ElasticSearch.new(settings)
17
+ end
18
+
19
+ before :each do
20
+ @es = get_client
21
+ # Delete all templates first.
22
+ # Clean ES of data before we start.
23
+ @es.indices.delete_template(:name => "*")
24
+ # This can fail if there are no indexes, ignore failure.
25
+ @es.indices.delete(:index => "*") rescue nil
26
+ end
27
+
28
+ context "when action => create" do
29
+ it "should create new documents with or without id" do
30
+ subject = get_es_output("create", "id123")
31
+ subject.register
32
+ subject.receive(LogStash::Event.new("message" => "sample message here"))
33
+ subject.flush
34
+ @es.indices.refresh
35
+ # Wait or fail until everything's indexed.
36
+ Stud::try(3.times) do
37
+ r = @es.search
38
+ insist { r["hits"]["total"] } == 1
39
+ end
40
+ end
41
+
42
+ it "should create new documents without id" do
43
+ subject = get_es_output("create")
44
+ subject.register
45
+ subject.receive(LogStash::Event.new("message" => "sample message here"))
46
+ subject.flush
47
+ @es.indices.refresh
48
+ # Wait or fail until everything's indexed.
49
+ Stud::try(3.times) do
50
+ r = @es.search
51
+ insist { r["hits"]["total"] } == 1
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,68 @@
1
+ require_relative "../../../spec/es_spec_helper"
2
+
3
+ shared_examples "an indexer" do
4
+ let(:index) { 10.times.collect { rand(10).to_s }.join("") }
5
+ let(:type) { 10.times.collect { rand(10).to_s }.join("") }
6
+ let(:event_count) { 10000 + rand(500) }
7
+ let(:flush_size) { rand(200) + 1 }
8
+ let(:config) { "not implemented" }
9
+ subject { LogStash::Outputs::ElasticSearch.new(config) }
10
+
11
+ before do
12
+ subject.register
13
+ event_count.times do
14
+ subject.receive(LogStash::Event.new("message" => "Hello World!", "type" => type))
15
+ end
16
+ end
17
+
18
+ it "ships events" do
19
+ index_url = "http://#{get_host_port}/#{index}"
20
+
21
+ ftw = FTW::Agent.new
22
+ ftw.post!("#{index_url}/_refresh")
23
+
24
+ # Wait until all events are available.
25
+ Stud::try(10.times) do
26
+ data = ""
27
+ response = ftw.get!("#{index_url}/_count?q=*")
28
+ response.read_body { |chunk| data << chunk }
29
+ result = LogStash::Json.load(data)
30
+ cur_count = result["count"]
31
+ insist { cur_count } == event_count
32
+ end
33
+
34
+ response = ftw.get!("#{index_url}/_search?q=*&size=1000")
35
+ data = ""
36
+ response.read_body { |chunk| data << chunk }
37
+ result = LogStash::Json.load(data)
38
+ result["hits"]["hits"].each do |doc|
39
+ insist { doc["_type"] } == type
40
+ insist { doc["_index"] } == index
41
+ end
42
+ end
43
+ end
44
+
45
+ describe "an indexer with custom index_type", :integration => true do
46
+ it_behaves_like "an indexer" do
47
+ let(:config) {
48
+ {
49
+ "hosts" => get_host_port,
50
+ "index" => index,
51
+ "flush_size" => flush_size
52
+ }
53
+ }
54
+ end
55
+ end
56
+
57
+ describe "an indexer with no type value set (default to logs)", :integration => true do
58
+ it_behaves_like "an indexer" do
59
+ let(:type) { "logs" }
60
+ let(:config) {
61
+ {
62
+ "hosts" => get_host_port,
63
+ "index" => index,
64
+ "flush_size" => flush_size
65
+ }
66
+ }
67
+ end
68
+ end
@@ -0,0 +1,73 @@
1
+ require_relative "../../../spec/es_spec_helper"
2
+
3
+ shared_examples "a parent indexer" do
4
+ let(:index) { 10.times.collect { rand(10).to_s }.join("") }
5
+ let(:type) { 10.times.collect { rand(10).to_s }.join("") }
6
+ let(:event_count) { 10000 + rand(500) }
7
+ let(:flush_size) { rand(200) + 1 }
8
+ let(:parent) { "not_implemented" }
9
+ let(:config) { "not_implemented" }
10
+ subject { LogStash::Outputs::ElasticSearch.new(config) }
11
+
12
+ before do
13
+ # Add mapping and a parent document
14
+ index_url = "http://#{get_host_port()}/#{index}"
15
+ ftw = FTW::Agent.new
16
+ mapping = { "mappings" => { "#{type}" => { "_parent" => { "type" => "#{type}_parent" } } } }
17
+ ftw.put!("#{index_url}", :body => mapping.to_json)
18
+ pdoc = { "foo" => "bar" }
19
+ ftw.put!("#{index_url}/#{type}_parent/test", :body => pdoc.to_json)
20
+
21
+ subject.register
22
+ event_count.times do
23
+ subject.receive(LogStash::Event.new("link_to" => "test", "message" => "Hello World!", "type" => type))
24
+ end
25
+ end
26
+
27
+
28
+ it "ships events" do
29
+ index_url = "http://#{get_host_port()}/#{index}"
30
+
31
+ ftw = FTW::Agent.new
32
+ ftw.post!("#{index_url}/_refresh")
33
+
34
+ # Wait until all events are available.
35
+ Stud::try(10.times) do
36
+ query = { "query" => { "has_parent" => { "type" => "#{type}_parent", "query" => { "match" => { "foo" => "bar" } } } } }
37
+ data = ""
38
+ response = ftw.post!("#{index_url}/_count?q=*", :body => query.to_json)
39
+ response.read_body { |chunk| data << chunk }
40
+ result = LogStash::Json.load(data)
41
+ cur_count = result["count"]
42
+ insist { cur_count } == event_count
43
+ end
44
+ end
45
+ end
46
+
47
+ describe "(http protocol) index events with static parent", :integration => true do
48
+ it_behaves_like 'a parent indexer' do
49
+ let(:parent) { "test" }
50
+ let(:config) {
51
+ {
52
+ "hosts" => get_host_port,
53
+ "index" => index,
54
+ "flush_size" => flush_size,
55
+ "parent" => parent
56
+ }
57
+ }
58
+ end
59
+ end
60
+
61
+ describe "(http_protocol) index events with fieldref in parent value", :integration => true do
62
+ it_behaves_like 'a parent indexer' do
63
+ let(:config) {
64
+ {
65
+ "hosts" => get_host_port,
66
+ "index" => index,
67
+ "flush_size" => flush_size,
68
+ "parent" => "%{link_to}"
69
+ }
70
+ }
71
+ end
72
+ end
73
+
@@ -0,0 +1,75 @@
1
+ require_relative "../../../spec/es_spec_helper"
2
+
3
+ describe "Ingest pipeline execution behavior", :integration => true, :version_5x => true do
4
+ subject! do
5
+ require "logstash/outputs/elasticsearch"
6
+ settings = {
7
+ "hosts" => "#{get_host_port()}",
8
+ "pipeline" => "apache-logs"
9
+ }
10
+ next LogStash::Outputs::ElasticSearch.new(settings)
11
+ end
12
+
13
+ let(:ftw_client) { FTW::Agent.new }
14
+ let(:ingest_url) { "http://#{get_host_port()}/_ingest/pipeline/apache-logs" }
15
+ let(:apache_logs_pipeline) { '
16
+ {
17
+ "description" : "Pipeline to parse Apache logs",
18
+ "processors" : [
19
+ {
20
+ "grok": {
21
+ "field": "message",
22
+ "pattern": "%{COMBINEDAPACHELOG}"
23
+ }
24
+ }
25
+ ]
26
+ }'
27
+ }
28
+
29
+ before :each do
30
+ # Delete all templates first.
31
+ require "elasticsearch"
32
+
33
+ # Clean ES of data before we start.
34
+ @es = get_client
35
+ @es.indices.delete_template(:name => "*")
36
+
37
+ # This can fail if there are no indexes, ignore failure.
38
+ @es.indices.delete(:index => "*") rescue nil
39
+
40
+ # delete existing ingest pipeline
41
+ req = ftw_client.delete(ingest_url)
42
+ ftw_client.execute(req)
43
+
44
+ # register pipeline
45
+ req = ftw_client.put(ingest_url, :body => apache_logs_pipeline)
46
+ ftw_client.execute(req)
47
+
48
+ #TODO: Use esclient
49
+ #@es.ingest.put_pipeline :id => 'apache_pipeline', :body => pipeline_defintion
50
+
51
+ subject.register
52
+ subject.receive(LogStash::Event.new("message" => '183.60.215.50 - - [01/Jun/2015:18:00:00 +0000] "GET /scripts/netcat-webserver HTTP/1.1" 200 182 "-" "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)"'))
53
+ subject.flush
54
+ @es.indices.refresh
55
+
56
+ #Wait or fail until everything's indexed.
57
+ Stud::try(20.times) do
58
+ r = @es.search
59
+ insist { r["hits"]["total"] } == 1
60
+ end
61
+ end
62
+
63
+ it "indexes using the proper pipeline" do
64
+ results = @es.search(:index => 'logstash-*', :q => "message:\"netcat\"")
65
+ insist { results["hits"]["total"] } == 1
66
+ insist { results["hits"]["hits"][0]["_source"]["response"] } == "200"
67
+ insist { results["hits"]["hits"][0]["_source"]["bytes"] } == "182"
68
+ insist { results["hits"]["hits"][0]["_source"]["verb"] } == "GET"
69
+ insist { results["hits"]["hits"][0]["_source"]["request"] } == "/scripts/netcat-webserver"
70
+ insist { results["hits"]["hits"][0]["_source"]["auth"] } == "-"
71
+ insist { results["hits"]["hits"][0]["_source"]["ident"] } == "-"
72
+ insist { results["hits"]["hits"][0]["_source"]["clientip"] } == "183.60.215.50"
73
+ insist { results["hits"]["hits"][0]["_source"]["junkfieldaaaa"] } == nil
74
+ end
75
+ end
@@ -0,0 +1,163 @@
1
+ require "logstash/outputs/elasticsearch"
2
+ require_relative "../../../spec/es_spec_helper"
3
+
4
+ describe "failures in bulk class expected behavior", :integration => true do
5
+ let(:template) { '{"template" : "not important, will be updated by :index"}' }
6
+ let(:event1) { LogStash::Event.new("somevalue" => 100, "@timestamp" => "2014-11-17T20:37:17.223Z", "@metadata" => {"retry_count" => 0}) }
7
+ let(:action1) { ["index", {:_id=>nil, :_routing=>nil, :_index=>"logstash-2014.11.17", :_type=>"logs"}, event1] }
8
+ let(:event2) { LogStash::Event.new("geoip" => { "location" => [ 0.0, 0.0] }, "@timestamp" => "2014-11-17T20:37:17.223Z", "@metadata" => {"retry_count" => 0}) }
9
+ let(:action2) { ["index", {:_id=>nil, :_routing=>nil, :_index=>"logstash-2014.11.17", :_type=>"logs"}, event2] }
10
+ let(:invalid_event) { LogStash::Event.new("geoip" => { "location" => "notlatlon" }, "@timestamp" => "2014-11-17T20:37:17.223Z") }
11
+
12
+ def mock_actions_with_response(*resp)
13
+ expanded_responses = resp.map do |resp|
14
+ items = resp["statuses"] && resp["statuses"].map do |status|
15
+ {"create" => {"status" => status, "error" => "Error for #{status}"}}
16
+ end
17
+
18
+ {
19
+ "errors" => resp["errors"],
20
+ "items" => items
21
+ }
22
+ end
23
+
24
+ allow_any_instance_of(LogStash::Outputs::ElasticSearch::HttpClient).to receive(:bulk).and_return(*expanded_responses)
25
+ end
26
+
27
+ subject! do
28
+ settings = {
29
+ "manage_template" => true,
30
+ "index" => "logstash-2014.11.17",
31
+ "template_overwrite" => true,
32
+ "hosts" => get_host_port(),
33
+ "retry_max_items" => 10,
34
+ "retry_max_interval" => 1,
35
+ }
36
+ next LogStash::Outputs::ElasticSearch.new(settings)
37
+ end
38
+
39
+ before :each do
40
+ # Delete all templates first.
41
+ require "elasticsearch"
42
+
43
+ # Clean ES of data before we start.
44
+ @es = get_client
45
+ @es.indices.delete_template(:name => "*")
46
+ @es.indices.delete(:index => "*")
47
+ @es.indices.refresh
48
+ end
49
+
50
+ after :each do
51
+ subject.close
52
+ end
53
+
54
+ it "should return no errors if all bulk actions are successful" do
55
+ mock_actions_with_response({"errors" => false})
56
+ expect(subject).to receive(:submit).with([action1, action2]).once.and_call_original
57
+ subject.register
58
+ subject.receive(event1)
59
+ subject.receive(event2)
60
+ subject.flush
61
+ sleep(2)
62
+ end
63
+
64
+ it "retry exceptions within the submit body" do
65
+ call_count = 0
66
+ subject.register
67
+
68
+ expect(subject.client).to receive(:bulk).with(anything).exactly(3).times do
69
+ if (call_count += 1) <= 2
70
+ raise "error first two times"
71
+ else
72
+ {"errors" => false}
73
+ end
74
+ end
75
+
76
+ subject.receive(event1)
77
+ subject.flush
78
+ end
79
+
80
+ it "should retry actions with response status of 503" do
81
+ mock_actions_with_response({"errors" => true, "statuses" => [200, 200, 503, 503]},
82
+ {"errors" => true, "statuses" => [200, 503]},
83
+ {"errors" => false})
84
+ expect(subject).to receive(:submit).with([action1, action1, action1, action2]).ordered.once.and_call_original
85
+ expect(subject).to receive(:submit).with([action1, action2]).ordered.once.and_call_original
86
+ expect(subject).to receive(:submit).with([action2]).ordered.once.and_call_original
87
+
88
+ subject.register
89
+ subject.receive(event1)
90
+ subject.receive(event1)
91
+ subject.receive(event1)
92
+ subject.receive(event2)
93
+ subject.flush
94
+ sleep(3)
95
+ end
96
+
97
+ it "should retry actions with response status of 429" do
98
+ subject.register
99
+
100
+ mock_actions_with_response({"errors" => true, "statuses" => [429]},
101
+ {"errors" => false})
102
+ expect(subject).to receive(:submit).with([action1]).twice.and_call_original
103
+
104
+ subject.receive(event1)
105
+ subject.flush
106
+ sleep(3)
107
+ end
108
+
109
+ it "should retry an event infinitely until a non retryable status occurs" do
110
+ mock_actions_with_response({"errors" => true, "statuses" => [429]},
111
+ {"errors" => true, "statuses" => [429]},
112
+ {"errors" => true, "statuses" => [429]},
113
+ {"errors" => true, "statuses" => [429]},
114
+ {"errors" => true, "statuses" => [429]},
115
+ {"errors" => true, "statuses" => [500]})
116
+ expect(subject).to receive(:submit).with([action1]).exactly(6).times.and_call_original
117
+ subject.register
118
+ subject.receive(event1)
119
+ subject.flush
120
+ sleep(5)
121
+ end
122
+
123
+ it "non-retryable errors like mapping errors (400) should be dropped and not be retried (unfortunately)" do
124
+ subject.register
125
+ subject.receive(invalid_event)
126
+ expect(subject).to receive(:submit).once.and_call_original
127
+ subject.close
128
+
129
+ @es.indices.refresh
130
+ sleep(5)
131
+ Stud::try(10.times) do
132
+ r = @es.search
133
+ insist { r["hits"]["total"] } == 0
134
+ end
135
+ end
136
+
137
+ it "successful requests should not be appended to retry queue" do
138
+ subject.register
139
+ subject.receive(event1)
140
+ expect(subject).to receive(:submit).once.and_call_original
141
+ subject.close
142
+ @es.indices.refresh
143
+ sleep(5)
144
+ Stud::try(10.times) do
145
+ r = @es.search
146
+ insist { r["hits"]["total"] } == 1
147
+ end
148
+ end
149
+
150
+ it "should only index proper events" do
151
+ subject.register
152
+ subject.receive(invalid_event)
153
+ subject.receive(event1)
154
+ subject.close
155
+
156
+ @es.indices.refresh
157
+ sleep(5)
158
+ Stud::try(10.times) do
159
+ r = @es.search
160
+ insist { r["hits"]["total"] } == 1
161
+ end
162
+ end
163
+ end