logstash-output-elasticsearch 0.1.6 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +5 -13
  2. data/CHANGELOG.md +117 -0
  3. data/CONTRIBUTORS +32 -0
  4. data/Gemfile +4 -4
  5. data/LICENSE +1 -1
  6. data/NOTICE.TXT +5 -0
  7. data/README.md +110 -0
  8. data/lib/logstash/outputs/elasticsearch.rb +97 -425
  9. data/lib/logstash/outputs/elasticsearch/buffer.rb +124 -0
  10. data/lib/logstash/outputs/elasticsearch/common.rb +205 -0
  11. data/lib/logstash/outputs/elasticsearch/common_configs.rb +164 -0
  12. data/lib/logstash/outputs/elasticsearch/elasticsearch-template.json +36 -24
  13. data/lib/logstash/outputs/elasticsearch/http_client.rb +236 -0
  14. data/lib/logstash/outputs/elasticsearch/http_client_builder.rb +106 -0
  15. data/lib/logstash/outputs/elasticsearch/template_manager.rb +35 -0
  16. data/logstash-output-elasticsearch.gemspec +17 -15
  17. data/spec/es_spec_helper.rb +77 -0
  18. data/spec/fixtures/scripts/scripted_update.groovy +2 -0
  19. data/spec/fixtures/scripts/scripted_update_nested.groovy +2 -0
  20. data/spec/fixtures/scripts/scripted_upsert.groovy +2 -0
  21. data/spec/integration/outputs/create_spec.rb +55 -0
  22. data/spec/integration/outputs/index_spec.rb +68 -0
  23. data/spec/integration/outputs/parent_spec.rb +73 -0
  24. data/spec/integration/outputs/pipeline_spec.rb +75 -0
  25. data/spec/integration/outputs/retry_spec.rb +163 -0
  26. data/spec/integration/outputs/routing_spec.rb +65 -0
  27. data/spec/integration/outputs/secure_spec.rb +108 -0
  28. data/spec/integration/outputs/templates_spec.rb +90 -0
  29. data/spec/integration/outputs/update_spec.rb +188 -0
  30. data/spec/unit/buffer_spec.rb +118 -0
  31. data/spec/unit/http_client_builder_spec.rb +27 -0
  32. data/spec/unit/outputs/elasticsearch/http_client_spec.rb +133 -0
  33. data/spec/unit/outputs/elasticsearch_proxy_spec.rb +58 -0
  34. data/spec/unit/outputs/elasticsearch_spec.rb +227 -0
  35. data/spec/unit/outputs/elasticsearch_ssl_spec.rb +55 -0
  36. metadata +137 -51
  37. data/.gitignore +0 -4
  38. data/Rakefile +0 -6
  39. data/lib/logstash/outputs/elasticsearch/protocol.rb +0 -253
  40. data/rakelib/publish.rake +0 -9
  41. data/rakelib/vendor.rake +0 -169
  42. data/spec/outputs/elasticsearch.rb +0 -518
@@ -0,0 +1,77 @@
1
+ require "logstash/devutils/rspec/spec_helper"
2
+ require "ftw"
3
+ require "logstash/plugin"
4
+ require "logstash/json"
5
+ require "stud/try"
6
+ require "longshoreman"
7
+ require "logstash/outputs/elasticsearch"
8
+
9
+ CONTAINER_NAME = "logstash-output-elasticsearch-#{rand(999).to_s}"
10
+ CONTAINER_IMAGE = "elasticsearch"
11
+ CONTAINER_TAG = "2.0"
12
+
13
+ DOCKER_INTEGRATION = ENV["DOCKER_INTEGRATION"]
14
+
15
+ module ESHelper
16
+ def get_host_port
17
+ addr = DOCKER_INTEGRATION ? Longshoreman.new.get_host_ip : "127.0.0.1"
18
+ "#{addr}:#{get_port}"
19
+ end
20
+
21
+ def get_port
22
+ return 9200 unless DOCKER_INTEGRATION
23
+
24
+ container = Longshoreman::Container.new
25
+ container.get(CONTAINER_NAME)
26
+ container.rport(9200)
27
+ end
28
+
29
+ def get_client
30
+ Elasticsearch::Client.new(:hosts => [get_host_port])
31
+ end
32
+ end
33
+
34
+
35
+ RSpec.configure do |config|
36
+ config.include ESHelper
37
+
38
+ if DOCKER_INTEGRATION
39
+ # this :all hook gets run before every describe block that is tagged with :integration => true.
40
+ config.before(:all, :integration => true) do
41
+
42
+
43
+ # check if container exists already before creating new one.
44
+ begin
45
+ ls = Longshoreman::new
46
+ ls.container.get(CONTAINER_NAME)
47
+ rescue Docker::Error::NotFoundError
48
+ scriptDir = File.expand_path File.dirname(__FILE__) + '/fixtures/scripts'
49
+ Longshoreman.new("#{CONTAINER_IMAGE}:#{CONTAINER_TAG}", CONTAINER_NAME, {
50
+ 'Cmd' => [ "-Des.script.inline=on", "-Des.script.indexed=on" ],
51
+ 'HostConfig' => {
52
+ 'Binds' => ["#{scriptDir}:/usr/share/elasticsearch/config/scripts"],
53
+ 'PublishAllPorts' => true
54
+ }
55
+ })
56
+ # TODO(talevy): verify ES is running instead of static timeout
57
+ sleep 10
58
+ end
59
+ end
60
+
61
+ # we want to do a final cleanup after all :integration runs,
62
+ # but we don't want to clean up before the last block.
63
+ # This is a final blind check to see if the ES docker container is running and
64
+ # needs to be cleaned up. If no container can be found and/or docker is not
65
+ # running on the system, we do nothing.
66
+ config.after(:suite) do
67
+ # only cleanup docker container if system has docker and the container is running
68
+ begin
69
+ ls = Longshoreman::new
70
+ ls.container.get(CONTAINER_NAME)
71
+ ls.cleanup
72
+ rescue Docker::Error::NotFoundError, Excon::Errors::SocketError
73
+ # do nothing
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,2 @@
1
+ ctx._source.counter += event["count"]
2
+
@@ -0,0 +1,2 @@
1
+ ctx._source.counter += event["data"]["count"]
2
+
@@ -0,0 +1,2 @@
1
+ ctx._source.counter = event["counter"]
2
+
@@ -0,0 +1,55 @@
1
+ require_relative "../../../spec/es_spec_helper"
2
+
3
+ describe "client create actions", :integration => true do
4
+ require "logstash/outputs/elasticsearch"
5
+ require "elasticsearch"
6
+
7
+ def get_es_output(action, id = nil)
8
+ settings = {
9
+ "manage_template" => true,
10
+ "index" => "logstash-create",
11
+ "template_overwrite" => true,
12
+ "hosts" => get_host_port(),
13
+ "action" => action
14
+ }
15
+ settings['document_id'] = id unless id.nil?
16
+ LogStash::Outputs::ElasticSearch.new(settings)
17
+ end
18
+
19
+ before :each do
20
+ @es = get_client
21
+ # Delete all templates first.
22
+ # Clean ES of data before we start.
23
+ @es.indices.delete_template(:name => "*")
24
+ # This can fail if there are no indexes, ignore failure.
25
+ @es.indices.delete(:index => "*") rescue nil
26
+ end
27
+
28
+ context "when action => create" do
29
+ it "should create new documents with or without id" do
30
+ subject = get_es_output("create", "id123")
31
+ subject.register
32
+ subject.receive(LogStash::Event.new("message" => "sample message here"))
33
+ subject.flush
34
+ @es.indices.refresh
35
+ # Wait or fail until everything's indexed.
36
+ Stud::try(3.times) do
37
+ r = @es.search
38
+ insist { r["hits"]["total"] } == 1
39
+ end
40
+ end
41
+
42
+ it "should create new documents without id" do
43
+ subject = get_es_output("create")
44
+ subject.register
45
+ subject.receive(LogStash::Event.new("message" => "sample message here"))
46
+ subject.flush
47
+ @es.indices.refresh
48
+ # Wait or fail until everything's indexed.
49
+ Stud::try(3.times) do
50
+ r = @es.search
51
+ insist { r["hits"]["total"] } == 1
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,68 @@
1
+ require_relative "../../../spec/es_spec_helper"
2
+
3
+ shared_examples "an indexer" do
4
+ let(:index) { 10.times.collect { rand(10).to_s }.join("") }
5
+ let(:type) { 10.times.collect { rand(10).to_s }.join("") }
6
+ let(:event_count) { 10000 + rand(500) }
7
+ let(:flush_size) { rand(200) + 1 }
8
+ let(:config) { "not implemented" }
9
+ subject { LogStash::Outputs::ElasticSearch.new(config) }
10
+
11
+ before do
12
+ subject.register
13
+ event_count.times do
14
+ subject.receive(LogStash::Event.new("message" => "Hello World!", "type" => type))
15
+ end
16
+ end
17
+
18
+ it "ships events" do
19
+ index_url = "http://#{get_host_port}/#{index}"
20
+
21
+ ftw = FTW::Agent.new
22
+ ftw.post!("#{index_url}/_refresh")
23
+
24
+ # Wait until all events are available.
25
+ Stud::try(10.times) do
26
+ data = ""
27
+ response = ftw.get!("#{index_url}/_count?q=*")
28
+ response.read_body { |chunk| data << chunk }
29
+ result = LogStash::Json.load(data)
30
+ cur_count = result["count"]
31
+ insist { cur_count } == event_count
32
+ end
33
+
34
+ response = ftw.get!("#{index_url}/_search?q=*&size=1000")
35
+ data = ""
36
+ response.read_body { |chunk| data << chunk }
37
+ result = LogStash::Json.load(data)
38
+ result["hits"]["hits"].each do |doc|
39
+ insist { doc["_type"] } == type
40
+ insist { doc["_index"] } == index
41
+ end
42
+ end
43
+ end
44
+
45
+ describe "an indexer with custom index_type", :integration => true do
46
+ it_behaves_like "an indexer" do
47
+ let(:config) {
48
+ {
49
+ "hosts" => get_host_port,
50
+ "index" => index,
51
+ "flush_size" => flush_size
52
+ }
53
+ }
54
+ end
55
+ end
56
+
57
+ describe "an indexer with no type value set (default to logs)", :integration => true do
58
+ it_behaves_like "an indexer" do
59
+ let(:type) { "logs" }
60
+ let(:config) {
61
+ {
62
+ "hosts" => get_host_port,
63
+ "index" => index,
64
+ "flush_size" => flush_size
65
+ }
66
+ }
67
+ end
68
+ end
@@ -0,0 +1,73 @@
1
+ require_relative "../../../spec/es_spec_helper"
2
+
3
+ shared_examples "a parent indexer" do
4
+ let(:index) { 10.times.collect { rand(10).to_s }.join("") }
5
+ let(:type) { 10.times.collect { rand(10).to_s }.join("") }
6
+ let(:event_count) { 10000 + rand(500) }
7
+ let(:flush_size) { rand(200) + 1 }
8
+ let(:parent) { "not_implemented" }
9
+ let(:config) { "not_implemented" }
10
+ subject { LogStash::Outputs::ElasticSearch.new(config) }
11
+
12
+ before do
13
+ # Add mapping and a parent document
14
+ index_url = "http://#{get_host_port()}/#{index}"
15
+ ftw = FTW::Agent.new
16
+ mapping = { "mappings" => { "#{type}" => { "_parent" => { "type" => "#{type}_parent" } } } }
17
+ ftw.put!("#{index_url}", :body => mapping.to_json)
18
+ pdoc = { "foo" => "bar" }
19
+ ftw.put!("#{index_url}/#{type}_parent/test", :body => pdoc.to_json)
20
+
21
+ subject.register
22
+ event_count.times do
23
+ subject.receive(LogStash::Event.new("link_to" => "test", "message" => "Hello World!", "type" => type))
24
+ end
25
+ end
26
+
27
+
28
+ it "ships events" do
29
+ index_url = "http://#{get_host_port()}/#{index}"
30
+
31
+ ftw = FTW::Agent.new
32
+ ftw.post!("#{index_url}/_refresh")
33
+
34
+ # Wait until all events are available.
35
+ Stud::try(10.times) do
36
+ query = { "query" => { "has_parent" => { "type" => "#{type}_parent", "query" => { "match" => { "foo" => "bar" } } } } }
37
+ data = ""
38
+ response = ftw.post!("#{index_url}/_count?q=*", :body => query.to_json)
39
+ response.read_body { |chunk| data << chunk }
40
+ result = LogStash::Json.load(data)
41
+ cur_count = result["count"]
42
+ insist { cur_count } == event_count
43
+ end
44
+ end
45
+ end
46
+
47
+ describe "(http protocol) index events with static parent", :integration => true do
48
+ it_behaves_like 'a parent indexer' do
49
+ let(:parent) { "test" }
50
+ let(:config) {
51
+ {
52
+ "hosts" => get_host_port,
53
+ "index" => index,
54
+ "flush_size" => flush_size,
55
+ "parent" => parent
56
+ }
57
+ }
58
+ end
59
+ end
60
+
61
+ describe "(http_protocol) index events with fieldref in parent value", :integration => true do
62
+ it_behaves_like 'a parent indexer' do
63
+ let(:config) {
64
+ {
65
+ "hosts" => get_host_port,
66
+ "index" => index,
67
+ "flush_size" => flush_size,
68
+ "parent" => "%{link_to}"
69
+ }
70
+ }
71
+ end
72
+ end
73
+
@@ -0,0 +1,75 @@
1
+ require_relative "../../../spec/es_spec_helper"
2
+
3
+ describe "Ingest pipeline execution behavior", :integration => true, :version_5x => true do
4
+ subject! do
5
+ require "logstash/outputs/elasticsearch"
6
+ settings = {
7
+ "hosts" => "#{get_host_port()}",
8
+ "pipeline" => "apache-logs"
9
+ }
10
+ next LogStash::Outputs::ElasticSearch.new(settings)
11
+ end
12
+
13
+ let(:ftw_client) { FTW::Agent.new }
14
+ let(:ingest_url) { "http://#{get_host_port()}/_ingest/pipeline/apache-logs" }
15
+ let(:apache_logs_pipeline) { '
16
+ {
17
+ "description" : "Pipeline to parse Apache logs",
18
+ "processors" : [
19
+ {
20
+ "grok": {
21
+ "field": "message",
22
+ "pattern": "%{COMBINEDAPACHELOG}"
23
+ }
24
+ }
25
+ ]
26
+ }'
27
+ }
28
+
29
+ before :each do
30
+ # Delete all templates first.
31
+ require "elasticsearch"
32
+
33
+ # Clean ES of data before we start.
34
+ @es = get_client
35
+ @es.indices.delete_template(:name => "*")
36
+
37
+ # This can fail if there are no indexes, ignore failure.
38
+ @es.indices.delete(:index => "*") rescue nil
39
+
40
+ # delete existing ingest pipeline
41
+ req = ftw_client.delete(ingest_url)
42
+ ftw_client.execute(req)
43
+
44
+ # register pipeline
45
+ req = ftw_client.put(ingest_url, :body => apache_logs_pipeline)
46
+ ftw_client.execute(req)
47
+
48
+ #TODO: Use esclient
49
+ #@es.ingest.put_pipeline :id => 'apache_pipeline', :body => pipeline_defintion
50
+
51
+ subject.register
52
+ subject.receive(LogStash::Event.new("message" => '183.60.215.50 - - [01/Jun/2015:18:00:00 +0000] "GET /scripts/netcat-webserver HTTP/1.1" 200 182 "-" "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)"'))
53
+ subject.flush
54
+ @es.indices.refresh
55
+
56
+ #Wait or fail until everything's indexed.
57
+ Stud::try(20.times) do
58
+ r = @es.search
59
+ insist { r["hits"]["total"] } == 1
60
+ end
61
+ end
62
+
63
+ it "indexes using the proper pipeline" do
64
+ results = @es.search(:index => 'logstash-*', :q => "message:\"netcat\"")
65
+ insist { results["hits"]["total"] } == 1
66
+ insist { results["hits"]["hits"][0]["_source"]["response"] } == "200"
67
+ insist { results["hits"]["hits"][0]["_source"]["bytes"] } == "182"
68
+ insist { results["hits"]["hits"][0]["_source"]["verb"] } == "GET"
69
+ insist { results["hits"]["hits"][0]["_source"]["request"] } == "/scripts/netcat-webserver"
70
+ insist { results["hits"]["hits"][0]["_source"]["auth"] } == "-"
71
+ insist { results["hits"]["hits"][0]["_source"]["ident"] } == "-"
72
+ insist { results["hits"]["hits"][0]["_source"]["clientip"] } == "183.60.215.50"
73
+ insist { results["hits"]["hits"][0]["_source"]["junkfieldaaaa"] } == nil
74
+ end
75
+ end
@@ -0,0 +1,163 @@
1
+ require "logstash/outputs/elasticsearch"
2
+ require_relative "../../../spec/es_spec_helper"
3
+
4
+ describe "failures in bulk class expected behavior", :integration => true do
5
+ let(:template) { '{"template" : "not important, will be updated by :index"}' }
6
+ let(:event1) { LogStash::Event.new("somevalue" => 100, "@timestamp" => "2014-11-17T20:37:17.223Z", "@metadata" => {"retry_count" => 0}) }
7
+ let(:action1) { ["index", {:_id=>nil, :_routing=>nil, :_index=>"logstash-2014.11.17", :_type=>"logs"}, event1] }
8
+ let(:event2) { LogStash::Event.new("geoip" => { "location" => [ 0.0, 0.0] }, "@timestamp" => "2014-11-17T20:37:17.223Z", "@metadata" => {"retry_count" => 0}) }
9
+ let(:action2) { ["index", {:_id=>nil, :_routing=>nil, :_index=>"logstash-2014.11.17", :_type=>"logs"}, event2] }
10
+ let(:invalid_event) { LogStash::Event.new("geoip" => { "location" => "notlatlon" }, "@timestamp" => "2014-11-17T20:37:17.223Z") }
11
+
12
+ def mock_actions_with_response(*resp)
13
+ expanded_responses = resp.map do |resp|
14
+ items = resp["statuses"] && resp["statuses"].map do |status|
15
+ {"create" => {"status" => status, "error" => "Error for #{status}"}}
16
+ end
17
+
18
+ {
19
+ "errors" => resp["errors"],
20
+ "items" => items
21
+ }
22
+ end
23
+
24
+ allow_any_instance_of(LogStash::Outputs::ElasticSearch::HttpClient).to receive(:bulk).and_return(*expanded_responses)
25
+ end
26
+
27
+ subject! do
28
+ settings = {
29
+ "manage_template" => true,
30
+ "index" => "logstash-2014.11.17",
31
+ "template_overwrite" => true,
32
+ "hosts" => get_host_port(),
33
+ "retry_max_items" => 10,
34
+ "retry_max_interval" => 1,
35
+ }
36
+ next LogStash::Outputs::ElasticSearch.new(settings)
37
+ end
38
+
39
+ before :each do
40
+ # Delete all templates first.
41
+ require "elasticsearch"
42
+
43
+ # Clean ES of data before we start.
44
+ @es = get_client
45
+ @es.indices.delete_template(:name => "*")
46
+ @es.indices.delete(:index => "*")
47
+ @es.indices.refresh
48
+ end
49
+
50
+ after :each do
51
+ subject.close
52
+ end
53
+
54
+ it "should return no errors if all bulk actions are successful" do
55
+ mock_actions_with_response({"errors" => false})
56
+ expect(subject).to receive(:submit).with([action1, action2]).once.and_call_original
57
+ subject.register
58
+ subject.receive(event1)
59
+ subject.receive(event2)
60
+ subject.flush
61
+ sleep(2)
62
+ end
63
+
64
+ it "retry exceptions within the submit body" do
65
+ call_count = 0
66
+ subject.register
67
+
68
+ expect(subject.client).to receive(:bulk).with(anything).exactly(3).times do
69
+ if (call_count += 1) <= 2
70
+ raise "error first two times"
71
+ else
72
+ {"errors" => false}
73
+ end
74
+ end
75
+
76
+ subject.receive(event1)
77
+ subject.flush
78
+ end
79
+
80
+ it "should retry actions with response status of 503" do
81
+ mock_actions_with_response({"errors" => true, "statuses" => [200, 200, 503, 503]},
82
+ {"errors" => true, "statuses" => [200, 503]},
83
+ {"errors" => false})
84
+ expect(subject).to receive(:submit).with([action1, action1, action1, action2]).ordered.once.and_call_original
85
+ expect(subject).to receive(:submit).with([action1, action2]).ordered.once.and_call_original
86
+ expect(subject).to receive(:submit).with([action2]).ordered.once.and_call_original
87
+
88
+ subject.register
89
+ subject.receive(event1)
90
+ subject.receive(event1)
91
+ subject.receive(event1)
92
+ subject.receive(event2)
93
+ subject.flush
94
+ sleep(3)
95
+ end
96
+
97
+ it "should retry actions with response status of 429" do
98
+ subject.register
99
+
100
+ mock_actions_with_response({"errors" => true, "statuses" => [429]},
101
+ {"errors" => false})
102
+ expect(subject).to receive(:submit).with([action1]).twice.and_call_original
103
+
104
+ subject.receive(event1)
105
+ subject.flush
106
+ sleep(3)
107
+ end
108
+
109
+ it "should retry an event infinitely until a non retryable status occurs" do
110
+ mock_actions_with_response({"errors" => true, "statuses" => [429]},
111
+ {"errors" => true, "statuses" => [429]},
112
+ {"errors" => true, "statuses" => [429]},
113
+ {"errors" => true, "statuses" => [429]},
114
+ {"errors" => true, "statuses" => [429]},
115
+ {"errors" => true, "statuses" => [500]})
116
+ expect(subject).to receive(:submit).with([action1]).exactly(6).times.and_call_original
117
+ subject.register
118
+ subject.receive(event1)
119
+ subject.flush
120
+ sleep(5)
121
+ end
122
+
123
+ it "non-retryable errors like mapping errors (400) should be dropped and not be retried (unfortunately)" do
124
+ subject.register
125
+ subject.receive(invalid_event)
126
+ expect(subject).to receive(:submit).once.and_call_original
127
+ subject.close
128
+
129
+ @es.indices.refresh
130
+ sleep(5)
131
+ Stud::try(10.times) do
132
+ r = @es.search
133
+ insist { r["hits"]["total"] } == 0
134
+ end
135
+ end
136
+
137
+ it "successful requests should not be appended to retry queue" do
138
+ subject.register
139
+ subject.receive(event1)
140
+ expect(subject).to receive(:submit).once.and_call_original
141
+ subject.close
142
+ @es.indices.refresh
143
+ sleep(5)
144
+ Stud::try(10.times) do
145
+ r = @es.search
146
+ insist { r["hits"]["total"] } == 1
147
+ end
148
+ end
149
+
150
+ it "should only index proper events" do
151
+ subject.register
152
+ subject.receive(invalid_event)
153
+ subject.receive(event1)
154
+ subject.close
155
+
156
+ @es.indices.refresh
157
+ sleep(5)
158
+ Stud::try(10.times) do
159
+ r = @es.search
160
+ insist { r["hits"]["total"] } == 1
161
+ end
162
+ end
163
+ end