logstash-output-elasticsearch 0.1.6 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +5 -13
  2. data/CHANGELOG.md +117 -0
  3. data/CONTRIBUTORS +32 -0
  4. data/Gemfile +4 -4
  5. data/LICENSE +1 -1
  6. data/NOTICE.TXT +5 -0
  7. data/README.md +110 -0
  8. data/lib/logstash/outputs/elasticsearch.rb +97 -425
  9. data/lib/logstash/outputs/elasticsearch/buffer.rb +124 -0
  10. data/lib/logstash/outputs/elasticsearch/common.rb +205 -0
  11. data/lib/logstash/outputs/elasticsearch/common_configs.rb +164 -0
  12. data/lib/logstash/outputs/elasticsearch/elasticsearch-template.json +36 -24
  13. data/lib/logstash/outputs/elasticsearch/http_client.rb +236 -0
  14. data/lib/logstash/outputs/elasticsearch/http_client_builder.rb +106 -0
  15. data/lib/logstash/outputs/elasticsearch/template_manager.rb +35 -0
  16. data/logstash-output-elasticsearch.gemspec +17 -15
  17. data/spec/es_spec_helper.rb +77 -0
  18. data/spec/fixtures/scripts/scripted_update.groovy +2 -0
  19. data/spec/fixtures/scripts/scripted_update_nested.groovy +2 -0
  20. data/spec/fixtures/scripts/scripted_upsert.groovy +2 -0
  21. data/spec/integration/outputs/create_spec.rb +55 -0
  22. data/spec/integration/outputs/index_spec.rb +68 -0
  23. data/spec/integration/outputs/parent_spec.rb +73 -0
  24. data/spec/integration/outputs/pipeline_spec.rb +75 -0
  25. data/spec/integration/outputs/retry_spec.rb +163 -0
  26. data/spec/integration/outputs/routing_spec.rb +65 -0
  27. data/spec/integration/outputs/secure_spec.rb +108 -0
  28. data/spec/integration/outputs/templates_spec.rb +90 -0
  29. data/spec/integration/outputs/update_spec.rb +188 -0
  30. data/spec/unit/buffer_spec.rb +118 -0
  31. data/spec/unit/http_client_builder_spec.rb +27 -0
  32. data/spec/unit/outputs/elasticsearch/http_client_spec.rb +133 -0
  33. data/spec/unit/outputs/elasticsearch_proxy_spec.rb +58 -0
  34. data/spec/unit/outputs/elasticsearch_spec.rb +227 -0
  35. data/spec/unit/outputs/elasticsearch_ssl_spec.rb +55 -0
  36. metadata +137 -51
  37. data/.gitignore +0 -4
  38. data/Rakefile +0 -6
  39. data/lib/logstash/outputs/elasticsearch/protocol.rb +0 -253
  40. data/rakelib/publish.rake +0 -9
  41. data/rakelib/vendor.rake +0 -169
  42. data/spec/outputs/elasticsearch.rb +0 -518
@@ -5,30 +5,42 @@
5
5
  },
6
6
  "mappings" : {
7
7
  "_default_" : {
8
- "_all" : {"enabled" : true},
9
- "dynamic_templates" : [ {
10
- "string_fields" : {
11
- "match" : "*",
12
- "match_mapping_type" : "string",
13
- "mapping" : {
14
- "type" : "string", "index" : "analyzed", "omit_norms" : true,
15
- "fields" : {
16
- "raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
17
- }
18
- }
19
- }
20
- } ],
21
- "properties" : {
22
- "@version": { "type": "string", "index": "not_analyzed" },
23
- "geoip" : {
24
- "type" : "object",
25
- "dynamic": true,
26
- "path": "full",
27
- "properties" : {
28
- "location" : { "type" : "geo_point" }
29
- }
30
- }
31
- }
8
+ "_all" : {"enabled" : true, "omit_norms" : true},
9
+ "dynamic_templates" : [ {
10
+ "message_field" : {
11
+ "match" : "message",
12
+ "match_mapping_type" : "string",
13
+ "mapping" : {
14
+ "type" : "string", "index" : "analyzed", "omit_norms" : true,
15
+ "fielddata" : { "format" : "disabled" }
16
+ }
17
+ }
18
+ }, {
19
+ "string_fields" : {
20
+ "match" : "*",
21
+ "match_mapping_type" : "string",
22
+ "mapping" : {
23
+ "type" : "string", "index" : "analyzed", "omit_norms" : true,
24
+ "fielddata" : { "format" : "disabled" },
25
+ "fields" : {
26
+ "raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
27
+ }
28
+ }
29
+ }
30
+ } ],
31
+ "properties" : {
32
+ "@timestamp": { "type": "date" },
33
+ "@version": { "type": "string", "index": "not_analyzed" },
34
+ "geoip" : {
35
+ "dynamic": true,
36
+ "properties" : {
37
+ "ip": { "type": "ip" },
38
+ "location" : { "type" : "geo_point" },
39
+ "latitude" : { "type" : "float" },
40
+ "longitude" : { "type" : "float" }
41
+ }
42
+ }
43
+ }
32
44
  }
33
45
  }
34
46
  }
@@ -0,0 +1,236 @@
1
+ require "logstash/outputs/elasticsearch"
2
+ require "cabin"
3
+ require "base64"
4
+ require "elasticsearch"
5
+ require "elasticsearch/transport/transport/http/manticore"
6
+
7
+ module LogStash; module Outputs; class ElasticSearch;
8
+ class HttpClient
9
+ attr_reader :client, :options, :client_options, :sniffer_thread
10
+ # This is here in case we use DEFAULT_OPTIONS in the future
11
+ # DEFAULT_OPTIONS = {
12
+ # :setting => value
13
+ # }
14
+
15
+ def initialize(options={})
16
+ @logger = options[:logger]
17
+ # Again, in case we use DEFAULT_OPTIONS in the future, uncomment this.
18
+ # @options = DEFAULT_OPTIONS.merge(options)
19
+ @options = options
20
+ @client = build_client(@options)
21
+ # mutex to prevent requests and sniffing to access the
22
+ # connection pool at the same time
23
+ @request_mutex = Mutex.new
24
+ start_sniffing!
25
+ end
26
+
27
+ def template_install(name, template, force=false)
28
+ @request_mutex.synchronize do
29
+ if template_exists?(name) && !force
30
+ @logger.debug("Found existing Elasticsearch template. Skipping template management", :name => name)
31
+ return
32
+ end
33
+ template_put(name, template)
34
+ end
35
+ end
36
+
37
+ def bulk(actions)
38
+ @request_mutex.synchronize { non_threadsafe_bulk(actions) }
39
+ end
40
+
41
+ def non_threadsafe_bulk(actions)
42
+ return if actions.empty?
43
+ bulk_body = actions.collect do |action, args, source|
44
+ args, source = update_action_builder(args, source) if action == 'update'
45
+
46
+ if source && action != 'delete'
47
+ next [ { action => args.merge({ :data => source }) } ]
48
+ else
49
+ next { action => args }
50
+ end
51
+ end.flatten
52
+
53
+ @client.bulk(:body => bulk_body)
54
+ end
55
+
56
+ def start_sniffing!
57
+ if options[:sniffing]
58
+ @sniffer_thread = Thread.new do
59
+ loop do
60
+ @request_mutex.synchronize { sniff! }
61
+ sleep (options[:sniffing_delay].to_f || 30)
62
+ end
63
+ end
64
+ end
65
+ end
66
+
67
+ def stop_sniffing!
68
+ @sniffer_thread.kill() if @sniffer_thread
69
+ end
70
+
71
+ def sniff!
72
+ client.transport.reload_connections! if options[:sniffing]
73
+ hosts_by_name = client.transport.hosts.map {|h| h["name"]}.sort
74
+ @logger.debug({"count" => hosts_by_name.count, "hosts" => hosts_by_name})
75
+ rescue StandardError => e
76
+ @logger.error("Error while sniffing connection",
77
+ :message => e.message,
78
+ :class => e.class.name,
79
+ :backtrace => e.backtrace)
80
+ end
81
+
82
+ private
83
+
84
+ # Builds a client and returns an Elasticsearch::Client
85
+ #
86
+ # The `options` is a hash where the following symbol keys have meaning:
87
+ #
88
+ # * `:hosts` - array of String. Set a list of hosts to use for communication.
89
+ # * `:port` - number. set the port to use to communicate with Elasticsearch
90
+ # * `:user` - String. The user to use for authentication.
91
+ # * `:password` - String. The password to use for authentication.
92
+ # * `:timeout` - Float. A duration value, in seconds, after which a socket
93
+ # operation or request will be aborted if not yet successfull
94
+ # * `:client_settings` - a hash; see below for keys.
95
+ #
96
+ # The `client_settings` key is a has that can contain other settings:
97
+ #
98
+ # * `:ssl` - Boolean. Enable or disable SSL/TLS.
99
+ # * `:proxy` - String. Choose a HTTP HTTProxy to use.
100
+ # * `:path` - String. The leading path for prefixing Elasticsearch
101
+ # requests. This is sometimes used if you are proxying Elasticsearch access
102
+ # through a special http path, such as using mod_rewrite.
103
+ def build_client(options)
104
+ hosts = options[:hosts] || ["127.0.0.1"]
105
+ client_settings = options[:client_settings] || {}
106
+ timeout = options[:timeout] || 0
107
+
108
+ host_ssl_opt = client_settings[:ssl].nil? ? nil : client_settings[:ssl][:enabled]
109
+ urls = hosts.map {|host| host_to_url(host, host_ssl_opt, client_settings[:path])}
110
+
111
+ @client_options = {
112
+ :hosts => urls,
113
+ :ssl => client_settings[:ssl],
114
+ :transport_options => {
115
+ :socket_timeout => timeout,
116
+ :request_timeout => timeout,
117
+ :proxy => client_settings[:proxy]
118
+ },
119
+ :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore
120
+ }
121
+
122
+ if options[:user] && options[:password] then
123
+ token = Base64.strict_encode64(options[:user] + ":" + options[:password])
124
+ @client_options[:headers] = { "Authorization" => "Basic #{token}" }
125
+ end
126
+
127
+ @logger.debug? && @logger.debug("Elasticsearch HTTP client options", client_options)
128
+
129
+ Elasticsearch::Client.new(client_options)
130
+ end
131
+
132
+ HOSTNAME_PORT_REGEX=/\A(?<hostname>([A-Za-z0-9\.\-]+)|\[[0-9A-Fa-f\:]+\])(:(?<port>\d+))?\Z/
133
+ URL_REGEX=/\A#{URI::regexp(['http', 'https'])}\z/
134
+ # Parse a configuration host to a normalized URL
135
+ def host_to_url(host, ssl=nil, path=nil)
136
+ explicit_scheme = case ssl
137
+ when true
138
+ "https"
139
+ when false
140
+ "http"
141
+ when nil
142
+ nil
143
+ else
144
+ raise ArgumentError, "Unexpected SSL value!"
145
+ end
146
+
147
+ # Ensure path starts with a /
148
+ if path && path[0] != '/'
149
+ path = "/#{path}"
150
+ end
151
+
152
+ url = nil
153
+ if host =~ URL_REGEX
154
+ url = URI.parse(host)
155
+
156
+ # Please note that the ssl == nil case is different! If you didn't make an explicit
157
+ # choice we don't complain!
158
+ if url.scheme == "http" && ssl == true
159
+ raise LogStash::ConfigurationError, "You specified a plain 'http' URL '#{host}' but set 'ssl' to true! Aborting!"
160
+ elsif url.scheme == "https" && ssl == false
161
+ raise LogStash::ConfigurationError, "You have explicitly disabled SSL but passed in an https URL '#{host}'! Aborting!"
162
+ end
163
+
164
+ url.scheme = explicit_scheme if explicit_scheme
165
+ elsif (match_results = HOSTNAME_PORT_REGEX.match(host))
166
+ hostname = match_results["hostname"]
167
+ port = match_results["port"] || 9200
168
+ url = URI.parse("#{explicit_scheme || 'http'}://#{hostname}:#{port}")
169
+ else
170
+ raise LogStash::ConfigurationError, "Host '#{host}' was specified, but is not valid! Use either a full URL or a hostname:port string!"
171
+ end
172
+
173
+ if path && url.path && url.path != "/" && url.path != ''
174
+ raise LogStash::ConfigurationError, "A path '#{url.path}' has been explicitly specified in the url '#{url}', but you also specified a path of '#{path}'. This is probably a mistake, please remove one setting."
175
+ end
176
+
177
+ if path
178
+ url.path = path # The URI library cannot stringify if it holds a nil
179
+ end
180
+
181
+ if url.password || url.user
182
+ raise LogStash::ConfigurationError, "We do not support setting the user password in the URL directly as " +
183
+ "this may be logged to disk thus leaking credentials. Use the 'user' and 'password' options respectively"
184
+ end
185
+
186
+ url.to_s
187
+ end
188
+
189
+ def template_exists?(name)
190
+ @client.indices.get_template(:name => name)
191
+ return true
192
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
193
+ return false
194
+ end
195
+
196
+ def template_put(name, template)
197
+ @client.indices.put_template(:name => name, :body => template)
198
+ end
199
+
200
+ # Build a bulk item for an elasticsearch update action
201
+ def update_action_builder(args, source)
202
+ if args[:_script]
203
+ # Use the event as a hash from your script with variable name defined
204
+ # by script_var_name (default: "event")
205
+ # Ex: event["@timestamp"]
206
+ source_orig = source
207
+ source = { 'script' => {'params' => { @options[:script_var_name] => source_orig }} }
208
+ if @options[:scripted_upsert]
209
+ source['scripted_upsert'] = true
210
+ source['upsert'] = {}
211
+ elsif @options[:doc_as_upsert]
212
+ source['upsert'] = source_orig
213
+ else
214
+ source['upsert'] = args.delete(:_upsert) if args[:_upsert]
215
+ end
216
+ case @options[:script_type]
217
+ when 'indexed'
218
+ source['script']['id'] = args.delete(:_script)
219
+ when 'file'
220
+ source['script']['file'] = args.delete(:_script)
221
+ when 'inline'
222
+ source['script']['inline'] = args.delete(:_script)
223
+ end
224
+ source['script']['lang'] = @options[:script_lang] if @options[:script_lang] != ''
225
+ else
226
+ source = { 'doc' => source }
227
+ if @options[:doc_as_upsert]
228
+ source['doc_as_upsert'] = true
229
+ else
230
+ source['upsert'] = args.delete(:_upsert) if args[:_upsert]
231
+ end
232
+ end
233
+ [args, source]
234
+ end
235
+ end
236
+ end end end
@@ -0,0 +1,106 @@
1
+ module LogStash; module Outputs; class ElasticSearch;
2
+ module HttpClientBuilder
3
+ def self.build(logger, hosts, params)
4
+ client_settings = {}
5
+
6
+ common_options = {
7
+ :client_settings => client_settings,
8
+ :sniffing => params["sniffing"],
9
+ :sniffing_delay => params["sniffing_delay"]
10
+ }
11
+
12
+ common_options[:timeout] = params["timeout"] if params["timeout"]
13
+ client_settings[:path] = "/#{params["path"]}/".gsub(/\/+/, "/") # Normalize slashes
14
+ logger.debug? && logger.debug("Normalizing http path", :path => params["path"], :normalized => client_settings[:path])
15
+
16
+ client_settings.merge! setup_ssl(logger, params)
17
+ client_settings.merge! setup_proxy(logger, params)
18
+ common_options.merge! setup_basic_auth(logger, params)
19
+
20
+ # Update API setup
21
+ raise( Logstash::ConfigurationError,
22
+ "doc_as_upsert and scripted_upsert are mutually exclusive."
23
+ ) if params["doc_as_upsert"] and params["scripted_upsert"]
24
+
25
+ raise(
26
+ LogStash::ConfigurationError,
27
+ "Specifying action => 'update' needs a document_id."
28
+ ) if params['action'] == 'update' and params.fetch('document_id', '') == ''
29
+
30
+ # Update API setup
31
+ update_options = {
32
+ :doc_as_upsert => params["doc_as_upsert"],
33
+ :script_var_name => params["script_var_name"],
34
+ :script_type => params["script_type"],
35
+ :script_lang => params["script_lang"],
36
+ :scripted_upsert => params["scripted_upsert"]
37
+ }
38
+ common_options.merge! update_options if params["action"] == 'update'
39
+
40
+ LogStash::Outputs::ElasticSearch::HttpClient.new(
41
+ common_options.merge(:hosts => hosts, :logger => logger)
42
+ )
43
+ end
44
+
45
+ def self.setup_proxy(logger, params)
46
+ proxy = params["proxy"]
47
+ return {} unless proxy
48
+
49
+ # Symbolize keys
50
+ proxy = if proxy.is_a?(Hash)
51
+ Hash[proxy.map {|k,v| [k.to_sym, v]}]
52
+ elsif proxy.is_a?(String)
53
+ proxy
54
+ else
55
+ raise LogStash::ConfigurationError, "Expected 'proxy' to be a string or hash, not '#{proxy}''!"
56
+ end
57
+
58
+ return {:proxy => proxy}
59
+ end
60
+
61
+ def self.setup_ssl(logger, params)
62
+ return {} if params["ssl"].nil?
63
+ return {:ssl => {:enabled => false}} if params["ssl"] == false
64
+
65
+ cacert, truststore, truststore_password, keystore, keystore_password =
66
+ params.values_at('cacert', 'truststore', 'truststore_password', 'keystore', 'keystore_password')
67
+
68
+ if cacert && truststore
69
+ raise(LogStash::ConfigurationError, "Use either \"cacert\" or \"truststore\" when configuring the CA certificate") if truststore
70
+ end
71
+
72
+ ssl_options = {:enabled => true}
73
+
74
+ if cacert
75
+ ssl_options[:ca_file] = cacert
76
+ elsif truststore
77
+ ssl_options[:truststore_password] = truststore_password.value if truststore_password
78
+ end
79
+
80
+ ssl_options[:truststore] = truststore if truststore
81
+ if keystore
82
+ ssl_options[:keystore] = keystore
83
+ ssl_options[:keystore_password] = keystore_password.value if keystore_password
84
+ end
85
+ if !params["ssl_certificate_verification"]
86
+ logger.warn [
87
+ "** WARNING ** Detected UNSAFE options in elasticsearch output configuration!",
88
+ "** WARNING ** You have enabled encryption but DISABLED certificate verification.",
89
+ "** WARNING ** To make sure your data is secure change :ssl_certificate_verification to true"
90
+ ].join("\n")
91
+ ssl_options[:verify] = false
92
+ end
93
+ { ssl: ssl_options }
94
+ end
95
+
96
+ def self.setup_basic_auth(logger, params)
97
+ user, password = params["user"], params["password"]
98
+ return {} unless user && password
99
+
100
+ {
101
+ :user => user,
102
+ :password => password.value
103
+ }
104
+ end
105
+ end
106
+ end; end; end
@@ -0,0 +1,35 @@
1
+ module LogStash; module Outputs; class ElasticSearch
2
+ class TemplateManager
3
+ # To be mixed into the elasticsearch plugin base
4
+ def self.install_template(plugin)
5
+ return unless plugin.manage_template
6
+ plugin.logger.info("Using mapping template from", :path => plugin.template)
7
+ template = get_template(plugin.template)
8
+ plugin.logger.info("Attempting to install template", :manage_template => template)
9
+ install(plugin.client, plugin.template_name, template, plugin.template_overwrite)
10
+ rescue => e
11
+ plugin.logger.error("Failed to install template.", :message => e.message, :class => e.class.name)
12
+ end
13
+
14
+ private
15
+
16
+ def self.get_template(path)
17
+ template_path = path || default_template_path
18
+ read_template_file(template_path)
19
+ end
20
+
21
+ def self.install(client, template_name, template, template_overwrite)
22
+ client.template_install(template_name, template, template_overwrite)
23
+ end
24
+
25
+ def self.default_template_path
26
+ ::File.expand_path('elasticsearch-template.json', ::File.dirname(__FILE__))
27
+ end
28
+
29
+ def self.read_template_file(template_path)
30
+ raise ArgumentError, "Template file '#{@template_path}' could not be found!" unless ::File.exists?(template_path)
31
+ template_data = ::IO.read(template_path)
32
+ LogStash::Json.load(template_data)
33
+ end
34
+ end
35
+ end end end
@@ -1,17 +1,17 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-output-elasticsearch'
4
- s.version = '0.1.6'
5
- s.licenses = ['Apache License (2.0)']
4
+ s.version = '3.0.0'
5
+ s.licenses = ['apache-2.0']
6
6
  s.summary = "Logstash Output to Elasticsearch"
7
- s.description = "Output events to elasticsearch"
8
- s.authors = ["Elasticsearch"]
9
- s.email = 'richard.pijnenburg@elasticsearch.com'
7
+ s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
8
+ s.authors = ["Elastic"]
9
+ s.email = 'info@elastic.co'
10
10
  s.homepage = "http://logstash.net/"
11
11
  s.require_paths = ["lib"]
12
12
 
13
13
  # Files
14
- s.files = `git ls-files`.split($\)
14
+ s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
15
15
 
16
16
  # Tests
17
17
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
@@ -19,20 +19,22 @@ Gem::Specification.new do |s|
19
19
  # Special flag to let us know this is actually a logstash plugin
20
20
  s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" }
21
21
 
22
- # Jar dependencies
23
- s.requirements << "jar 'org.elasticsearch:elasticsearch', '1.4.0'"
24
-
25
22
  # Gem dependencies
26
- s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0']
23
+ s.add_runtime_dependency 'concurrent-ruby'
24
+ s.add_runtime_dependency 'elasticsearch', ['>= 1.0.13', '~> 1.0']
27
25
  s.add_runtime_dependency 'stud', ['>= 0.0.17', '~> 0.0']
28
26
  s.add_runtime_dependency 'cabin', ['~> 0.6']
29
- s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
30
- s.add_runtime_dependency 'jar-dependencies'
27
+ s.add_runtime_dependency "logstash-core-plugin-api", "~> 2.0"
31
28
 
32
- s.add_development_dependency 'ftw', ['>= 0.0.40', '~> 0']
29
+ s.add_development_dependency 'ftw', '~> 0.0.42'
30
+ s.add_development_dependency 'logstash-codec-plain'
33
31
 
34
32
  if RUBY_PLATFORM == 'java'
35
- s.add_runtime_dependency "manticore", '~> 0.3'
33
+ s.platform = RUBY_PLATFORM
34
+ s.add_runtime_dependency "manticore", '>= 0.5.4', '< 1.0.0'
36
35
  end
37
- end
38
36
 
37
+ s.add_development_dependency 'logstash-devutils'
38
+ s.add_development_dependency 'longshoreman'
39
+ s.add_development_dependency 'flores'
40
+ end