logstash-output-elasticsearch 0.1.6 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +5 -13
  2. data/CHANGELOG.md +117 -0
  3. data/CONTRIBUTORS +32 -0
  4. data/Gemfile +4 -4
  5. data/LICENSE +1 -1
  6. data/NOTICE.TXT +5 -0
  7. data/README.md +110 -0
  8. data/lib/logstash/outputs/elasticsearch.rb +97 -425
  9. data/lib/logstash/outputs/elasticsearch/buffer.rb +124 -0
  10. data/lib/logstash/outputs/elasticsearch/common.rb +205 -0
  11. data/lib/logstash/outputs/elasticsearch/common_configs.rb +164 -0
  12. data/lib/logstash/outputs/elasticsearch/elasticsearch-template.json +36 -24
  13. data/lib/logstash/outputs/elasticsearch/http_client.rb +236 -0
  14. data/lib/logstash/outputs/elasticsearch/http_client_builder.rb +106 -0
  15. data/lib/logstash/outputs/elasticsearch/template_manager.rb +35 -0
  16. data/logstash-output-elasticsearch.gemspec +17 -15
  17. data/spec/es_spec_helper.rb +77 -0
  18. data/spec/fixtures/scripts/scripted_update.groovy +2 -0
  19. data/spec/fixtures/scripts/scripted_update_nested.groovy +2 -0
  20. data/spec/fixtures/scripts/scripted_upsert.groovy +2 -0
  21. data/spec/integration/outputs/create_spec.rb +55 -0
  22. data/spec/integration/outputs/index_spec.rb +68 -0
  23. data/spec/integration/outputs/parent_spec.rb +73 -0
  24. data/spec/integration/outputs/pipeline_spec.rb +75 -0
  25. data/spec/integration/outputs/retry_spec.rb +163 -0
  26. data/spec/integration/outputs/routing_spec.rb +65 -0
  27. data/spec/integration/outputs/secure_spec.rb +108 -0
  28. data/spec/integration/outputs/templates_spec.rb +90 -0
  29. data/spec/integration/outputs/update_spec.rb +188 -0
  30. data/spec/unit/buffer_spec.rb +118 -0
  31. data/spec/unit/http_client_builder_spec.rb +27 -0
  32. data/spec/unit/outputs/elasticsearch/http_client_spec.rb +133 -0
  33. data/spec/unit/outputs/elasticsearch_proxy_spec.rb +58 -0
  34. data/spec/unit/outputs/elasticsearch_spec.rb +227 -0
  35. data/spec/unit/outputs/elasticsearch_ssl_spec.rb +55 -0
  36. metadata +137 -51
  37. data/.gitignore +0 -4
  38. data/Rakefile +0 -6
  39. data/lib/logstash/outputs/elasticsearch/protocol.rb +0 -253
  40. data/rakelib/publish.rake +0 -9
  41. data/rakelib/vendor.rake +0 -169
  42. data/spec/outputs/elasticsearch.rb +0 -518
@@ -5,30 +5,42 @@
5
5
  },
6
6
  "mappings" : {
7
7
  "_default_" : {
8
- "_all" : {"enabled" : true},
9
- "dynamic_templates" : [ {
10
- "string_fields" : {
11
- "match" : "*",
12
- "match_mapping_type" : "string",
13
- "mapping" : {
14
- "type" : "string", "index" : "analyzed", "omit_norms" : true,
15
- "fields" : {
16
- "raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
17
- }
18
- }
19
- }
20
- } ],
21
- "properties" : {
22
- "@version": { "type": "string", "index": "not_analyzed" },
23
- "geoip" : {
24
- "type" : "object",
25
- "dynamic": true,
26
- "path": "full",
27
- "properties" : {
28
- "location" : { "type" : "geo_point" }
29
- }
30
- }
31
- }
8
+ "_all" : {"enabled" : true, "omit_norms" : true},
9
+ "dynamic_templates" : [ {
10
+ "message_field" : {
11
+ "match" : "message",
12
+ "match_mapping_type" : "string",
13
+ "mapping" : {
14
+ "type" : "string", "index" : "analyzed", "omit_norms" : true,
15
+ "fielddata" : { "format" : "disabled" }
16
+ }
17
+ }
18
+ }, {
19
+ "string_fields" : {
20
+ "match" : "*",
21
+ "match_mapping_type" : "string",
22
+ "mapping" : {
23
+ "type" : "string", "index" : "analyzed", "omit_norms" : true,
24
+ "fielddata" : { "format" : "disabled" },
25
+ "fields" : {
26
+ "raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
27
+ }
28
+ }
29
+ }
30
+ } ],
31
+ "properties" : {
32
+ "@timestamp": { "type": "date" },
33
+ "@version": { "type": "string", "index": "not_analyzed" },
34
+ "geoip" : {
35
+ "dynamic": true,
36
+ "properties" : {
37
+ "ip": { "type": "ip" },
38
+ "location" : { "type" : "geo_point" },
39
+ "latitude" : { "type" : "float" },
40
+ "longitude" : { "type" : "float" }
41
+ }
42
+ }
43
+ }
32
44
  }
33
45
  }
34
46
  }
@@ -0,0 +1,236 @@
1
+ require "logstash/outputs/elasticsearch"
2
+ require "cabin"
3
+ require "base64"
4
+ require "elasticsearch"
5
+ require "elasticsearch/transport/transport/http/manticore"
6
+
7
+ module LogStash; module Outputs; class ElasticSearch;
8
+ class HttpClient
9
+ attr_reader :client, :options, :client_options, :sniffer_thread
10
+ # This is here in case we use DEFAULT_OPTIONS in the future
11
+ # DEFAULT_OPTIONS = {
12
+ # :setting => value
13
+ # }
14
+
15
+ def initialize(options={})
16
+ @logger = options[:logger]
17
+ # Again, in case we use DEFAULT_OPTIONS in the future, uncomment this.
18
+ # @options = DEFAULT_OPTIONS.merge(options)
19
+ @options = options
20
+ @client = build_client(@options)
21
+ # mutex to prevent requests and sniffing to access the
22
+ # connection pool at the same time
23
+ @request_mutex = Mutex.new
24
+ start_sniffing!
25
+ end
26
+
27
+ def template_install(name, template, force=false)
28
+ @request_mutex.synchronize do
29
+ if template_exists?(name) && !force
30
+ @logger.debug("Found existing Elasticsearch template. Skipping template management", :name => name)
31
+ return
32
+ end
33
+ template_put(name, template)
34
+ end
35
+ end
36
+
37
+ def bulk(actions)
38
+ @request_mutex.synchronize { non_threadsafe_bulk(actions) }
39
+ end
40
+
41
+ def non_threadsafe_bulk(actions)
42
+ return if actions.empty?
43
+ bulk_body = actions.collect do |action, args, source|
44
+ args, source = update_action_builder(args, source) if action == 'update'
45
+
46
+ if source && action != 'delete'
47
+ next [ { action => args.merge({ :data => source }) } ]
48
+ else
49
+ next { action => args }
50
+ end
51
+ end.flatten
52
+
53
+ @client.bulk(:body => bulk_body)
54
+ end
55
+
56
+ def start_sniffing!
57
+ if options[:sniffing]
58
+ @sniffer_thread = Thread.new do
59
+ loop do
60
+ @request_mutex.synchronize { sniff! }
61
+ sleep (options[:sniffing_delay].to_f || 30)
62
+ end
63
+ end
64
+ end
65
+ end
66
+
67
+ def stop_sniffing!
68
+ @sniffer_thread.kill() if @sniffer_thread
69
+ end
70
+
71
+ def sniff!
72
+ client.transport.reload_connections! if options[:sniffing]
73
+ hosts_by_name = client.transport.hosts.map {|h| h["name"]}.sort
74
+ @logger.debug({"count" => hosts_by_name.count, "hosts" => hosts_by_name})
75
+ rescue StandardError => e
76
+ @logger.error("Error while sniffing connection",
77
+ :message => e.message,
78
+ :class => e.class.name,
79
+ :backtrace => e.backtrace)
80
+ end
81
+
82
+ private
83
+
84
+ # Builds a client and returns an Elasticsearch::Client
85
+ #
86
+ # The `options` is a hash where the following symbol keys have meaning:
87
+ #
88
+ # * `:hosts` - array of String. Set a list of hosts to use for communication.
89
+ # * `:port` - number. set the port to use to communicate with Elasticsearch
90
+ # * `:user` - String. The user to use for authentication.
91
+ # * `:password` - String. The password to use for authentication.
92
+ # * `:timeout` - Float. A duration value, in seconds, after which a socket
93
+ # operation or request will be aborted if not yet successfull
94
+ # * `:client_settings` - a hash; see below for keys.
95
+ #
96
+ # The `client_settings` key is a has that can contain other settings:
97
+ #
98
+ # * `:ssl` - Boolean. Enable or disable SSL/TLS.
99
+ # * `:proxy` - String. Choose a HTTP HTTProxy to use.
100
+ # * `:path` - String. The leading path for prefixing Elasticsearch
101
+ # requests. This is sometimes used if you are proxying Elasticsearch access
102
+ # through a special http path, such as using mod_rewrite.
103
+ def build_client(options)
104
+ hosts = options[:hosts] || ["127.0.0.1"]
105
+ client_settings = options[:client_settings] || {}
106
+ timeout = options[:timeout] || 0
107
+
108
+ host_ssl_opt = client_settings[:ssl].nil? ? nil : client_settings[:ssl][:enabled]
109
+ urls = hosts.map {|host| host_to_url(host, host_ssl_opt, client_settings[:path])}
110
+
111
+ @client_options = {
112
+ :hosts => urls,
113
+ :ssl => client_settings[:ssl],
114
+ :transport_options => {
115
+ :socket_timeout => timeout,
116
+ :request_timeout => timeout,
117
+ :proxy => client_settings[:proxy]
118
+ },
119
+ :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore
120
+ }
121
+
122
+ if options[:user] && options[:password] then
123
+ token = Base64.strict_encode64(options[:user] + ":" + options[:password])
124
+ @client_options[:headers] = { "Authorization" => "Basic #{token}" }
125
+ end
126
+
127
+ @logger.debug? && @logger.debug("Elasticsearch HTTP client options", client_options)
128
+
129
+ Elasticsearch::Client.new(client_options)
130
+ end
131
+
132
+ HOSTNAME_PORT_REGEX=/\A(?<hostname>([A-Za-z0-9\.\-]+)|\[[0-9A-Fa-f\:]+\])(:(?<port>\d+))?\Z/
133
+ URL_REGEX=/\A#{URI::regexp(['http', 'https'])}\z/
134
+ # Parse a configuration host to a normalized URL
135
+ def host_to_url(host, ssl=nil, path=nil)
136
+ explicit_scheme = case ssl
137
+ when true
138
+ "https"
139
+ when false
140
+ "http"
141
+ when nil
142
+ nil
143
+ else
144
+ raise ArgumentError, "Unexpected SSL value!"
145
+ end
146
+
147
+ # Ensure path starts with a /
148
+ if path && path[0] != '/'
149
+ path = "/#{path}"
150
+ end
151
+
152
+ url = nil
153
+ if host =~ URL_REGEX
154
+ url = URI.parse(host)
155
+
156
+ # Please note that the ssl == nil case is different! If you didn't make an explicit
157
+ # choice we don't complain!
158
+ if url.scheme == "http" && ssl == true
159
+ raise LogStash::ConfigurationError, "You specified a plain 'http' URL '#{host}' but set 'ssl' to true! Aborting!"
160
+ elsif url.scheme == "https" && ssl == false
161
+ raise LogStash::ConfigurationError, "You have explicitly disabled SSL but passed in an https URL '#{host}'! Aborting!"
162
+ end
163
+
164
+ url.scheme = explicit_scheme if explicit_scheme
165
+ elsif (match_results = HOSTNAME_PORT_REGEX.match(host))
166
+ hostname = match_results["hostname"]
167
+ port = match_results["port"] || 9200
168
+ url = URI.parse("#{explicit_scheme || 'http'}://#{hostname}:#{port}")
169
+ else
170
+ raise LogStash::ConfigurationError, "Host '#{host}' was specified, but is not valid! Use either a full URL or a hostname:port string!"
171
+ end
172
+
173
+ if path && url.path && url.path != "/" && url.path != ''
174
+ raise LogStash::ConfigurationError, "A path '#{url.path}' has been explicitly specified in the url '#{url}', but you also specified a path of '#{path}'. This is probably a mistake, please remove one setting."
175
+ end
176
+
177
+ if path
178
+ url.path = path # The URI library cannot stringify if it holds a nil
179
+ end
180
+
181
+ if url.password || url.user
182
+ raise LogStash::ConfigurationError, "We do not support setting the user password in the URL directly as " +
183
+ "this may be logged to disk thus leaking credentials. Use the 'user' and 'password' options respectively"
184
+ end
185
+
186
+ url.to_s
187
+ end
188
+
189
+ def template_exists?(name)
190
+ @client.indices.get_template(:name => name)
191
+ return true
192
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
193
+ return false
194
+ end
195
+
196
+ def template_put(name, template)
197
+ @client.indices.put_template(:name => name, :body => template)
198
+ end
199
+
200
+ # Build a bulk item for an elasticsearch update action
201
+ def update_action_builder(args, source)
202
+ if args[:_script]
203
+ # Use the event as a hash from your script with variable name defined
204
+ # by script_var_name (default: "event")
205
+ # Ex: event["@timestamp"]
206
+ source_orig = source
207
+ source = { 'script' => {'params' => { @options[:script_var_name] => source_orig }} }
208
+ if @options[:scripted_upsert]
209
+ source['scripted_upsert'] = true
210
+ source['upsert'] = {}
211
+ elsif @options[:doc_as_upsert]
212
+ source['upsert'] = source_orig
213
+ else
214
+ source['upsert'] = args.delete(:_upsert) if args[:_upsert]
215
+ end
216
+ case @options[:script_type]
217
+ when 'indexed'
218
+ source['script']['id'] = args.delete(:_script)
219
+ when 'file'
220
+ source['script']['file'] = args.delete(:_script)
221
+ when 'inline'
222
+ source['script']['inline'] = args.delete(:_script)
223
+ end
224
+ source['script']['lang'] = @options[:script_lang] if @options[:script_lang] != ''
225
+ else
226
+ source = { 'doc' => source }
227
+ if @options[:doc_as_upsert]
228
+ source['doc_as_upsert'] = true
229
+ else
230
+ source['upsert'] = args.delete(:_upsert) if args[:_upsert]
231
+ end
232
+ end
233
+ [args, source]
234
+ end
235
+ end
236
+ end end end
@@ -0,0 +1,106 @@
1
+ module LogStash; module Outputs; class ElasticSearch;
2
+ module HttpClientBuilder
3
+ def self.build(logger, hosts, params)
4
+ client_settings = {}
5
+
6
+ common_options = {
7
+ :client_settings => client_settings,
8
+ :sniffing => params["sniffing"],
9
+ :sniffing_delay => params["sniffing_delay"]
10
+ }
11
+
12
+ common_options[:timeout] = params["timeout"] if params["timeout"]
13
+ client_settings[:path] = "/#{params["path"]}/".gsub(/\/+/, "/") # Normalize slashes
14
+ logger.debug? && logger.debug("Normalizing http path", :path => params["path"], :normalized => client_settings[:path])
15
+
16
+ client_settings.merge! setup_ssl(logger, params)
17
+ client_settings.merge! setup_proxy(logger, params)
18
+ common_options.merge! setup_basic_auth(logger, params)
19
+
20
+ # Update API setup
21
+ raise( Logstash::ConfigurationError,
22
+ "doc_as_upsert and scripted_upsert are mutually exclusive."
23
+ ) if params["doc_as_upsert"] and params["scripted_upsert"]
24
+
25
+ raise(
26
+ LogStash::ConfigurationError,
27
+ "Specifying action => 'update' needs a document_id."
28
+ ) if params['action'] == 'update' and params.fetch('document_id', '') == ''
29
+
30
+ # Update API setup
31
+ update_options = {
32
+ :doc_as_upsert => params["doc_as_upsert"],
33
+ :script_var_name => params["script_var_name"],
34
+ :script_type => params["script_type"],
35
+ :script_lang => params["script_lang"],
36
+ :scripted_upsert => params["scripted_upsert"]
37
+ }
38
+ common_options.merge! update_options if params["action"] == 'update'
39
+
40
+ LogStash::Outputs::ElasticSearch::HttpClient.new(
41
+ common_options.merge(:hosts => hosts, :logger => logger)
42
+ )
43
+ end
44
+
45
+ def self.setup_proxy(logger, params)
46
+ proxy = params["proxy"]
47
+ return {} unless proxy
48
+
49
+ # Symbolize keys
50
+ proxy = if proxy.is_a?(Hash)
51
+ Hash[proxy.map {|k,v| [k.to_sym, v]}]
52
+ elsif proxy.is_a?(String)
53
+ proxy
54
+ else
55
+ raise LogStash::ConfigurationError, "Expected 'proxy' to be a string or hash, not '#{proxy}''!"
56
+ end
57
+
58
+ return {:proxy => proxy}
59
+ end
60
+
61
+ def self.setup_ssl(logger, params)
62
+ return {} if params["ssl"].nil?
63
+ return {:ssl => {:enabled => false}} if params["ssl"] == false
64
+
65
+ cacert, truststore, truststore_password, keystore, keystore_password =
66
+ params.values_at('cacert', 'truststore', 'truststore_password', 'keystore', 'keystore_password')
67
+
68
+ if cacert && truststore
69
+ raise(LogStash::ConfigurationError, "Use either \"cacert\" or \"truststore\" when configuring the CA certificate") if truststore
70
+ end
71
+
72
+ ssl_options = {:enabled => true}
73
+
74
+ if cacert
75
+ ssl_options[:ca_file] = cacert
76
+ elsif truststore
77
+ ssl_options[:truststore_password] = truststore_password.value if truststore_password
78
+ end
79
+
80
+ ssl_options[:truststore] = truststore if truststore
81
+ if keystore
82
+ ssl_options[:keystore] = keystore
83
+ ssl_options[:keystore_password] = keystore_password.value if keystore_password
84
+ end
85
+ if !params["ssl_certificate_verification"]
86
+ logger.warn [
87
+ "** WARNING ** Detected UNSAFE options in elasticsearch output configuration!",
88
+ "** WARNING ** You have enabled encryption but DISABLED certificate verification.",
89
+ "** WARNING ** To make sure your data is secure change :ssl_certificate_verification to true"
90
+ ].join("\n")
91
+ ssl_options[:verify] = false
92
+ end
93
+ { ssl: ssl_options }
94
+ end
95
+
96
+ def self.setup_basic_auth(logger, params)
97
+ user, password = params["user"], params["password"]
98
+ return {} unless user && password
99
+
100
+ {
101
+ :user => user,
102
+ :password => password.value
103
+ }
104
+ end
105
+ end
106
+ end; end; end
@@ -0,0 +1,35 @@
1
+ module LogStash; module Outputs; class ElasticSearch
2
+ class TemplateManager
3
+ # To be mixed into the elasticsearch plugin base
4
+ def self.install_template(plugin)
5
+ return unless plugin.manage_template
6
+ plugin.logger.info("Using mapping template from", :path => plugin.template)
7
+ template = get_template(plugin.template)
8
+ plugin.logger.info("Attempting to install template", :manage_template => template)
9
+ install(plugin.client, plugin.template_name, template, plugin.template_overwrite)
10
+ rescue => e
11
+ plugin.logger.error("Failed to install template.", :message => e.message, :class => e.class.name)
12
+ end
13
+
14
+ private
15
+
16
+ def self.get_template(path)
17
+ template_path = path || default_template_path
18
+ read_template_file(template_path)
19
+ end
20
+
21
+ def self.install(client, template_name, template, template_overwrite)
22
+ client.template_install(template_name, template, template_overwrite)
23
+ end
24
+
25
+ def self.default_template_path
26
+ ::File.expand_path('elasticsearch-template.json', ::File.dirname(__FILE__))
27
+ end
28
+
29
+ def self.read_template_file(template_path)
30
+ raise ArgumentError, "Template file '#{@template_path}' could not be found!" unless ::File.exists?(template_path)
31
+ template_data = ::IO.read(template_path)
32
+ LogStash::Json.load(template_data)
33
+ end
34
+ end
35
+ end end end
@@ -1,17 +1,17 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-output-elasticsearch'
4
- s.version = '0.1.6'
5
- s.licenses = ['Apache License (2.0)']
4
+ s.version = '3.0.0'
5
+ s.licenses = ['apache-2.0']
6
6
  s.summary = "Logstash Output to Elasticsearch"
7
- s.description = "Output events to elasticsearch"
8
- s.authors = ["Elasticsearch"]
9
- s.email = 'richard.pijnenburg@elasticsearch.com'
7
+ s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
8
+ s.authors = ["Elastic"]
9
+ s.email = 'info@elastic.co'
10
10
  s.homepage = "http://logstash.net/"
11
11
  s.require_paths = ["lib"]
12
12
 
13
13
  # Files
14
- s.files = `git ls-files`.split($\)
14
+ s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
15
15
 
16
16
  # Tests
17
17
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
@@ -19,20 +19,22 @@ Gem::Specification.new do |s|
19
19
  # Special flag to let us know this is actually a logstash plugin
20
20
  s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
21
21
 
22
- # Jar dependencies
23
- s.requirements << "jar 'org.elasticsearch:elasticsearch', '1.4.0'"
24
-
25
22
  # Gem dependencies
26
- s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0']
23
+ s.add_runtime_dependency 'concurrent-ruby'
24
+ s.add_runtime_dependency 'elasticsearch', ['>= 1.0.13', '~> 1.0']
27
25
  s.add_runtime_dependency 'stud', ['>= 0.0.17', '~> 0.0']
28
26
  s.add_runtime_dependency 'cabin', ['~> 0.6']
29
- s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
30
- s.add_runtime_dependency 'jar-dependencies'
27
+ s.add_runtime_dependency "logstash-core-plugin-api", "~> 2.0"
31
28
 
32
- s.add_development_dependency 'ftw', ['>= 0.0.40', '~> 0']
29
+ s.add_development_dependency 'ftw', '~> 0.0.42'
30
+ s.add_development_dependency 'logstash-codec-plain'
33
31
 
34
32
  if RUBY_PLATFORM == 'java'
35
- s.add_runtime_dependency "manticore", '~> 0.3'
33
+ s.platform = RUBY_PLATFORM
34
+ s.add_runtime_dependency "manticore", '>= 0.5.4', '< 1.0.0'
36
35
  end
37
- end
38
36
 
37
+ s.add_development_dependency 'logstash-devutils'
38
+ s.add_development_dependency 'longshoreman'
39
+ s.add_development_dependency 'flores'
40
+ end