logstash-output-elasticsearch 10.8.6-java → 11.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +7 -0
  3. data/docs/index.asciidoc +132 -22
  4. data/lib/logstash/outputs/elasticsearch.rb +122 -64
  5. data/lib/logstash/outputs/elasticsearch/data_stream_support.rb +233 -0
  6. data/lib/logstash/outputs/elasticsearch/http_client.rb +9 -7
  7. data/lib/logstash/outputs/elasticsearch/http_client/pool.rb +47 -34
  8. data/lib/logstash/outputs/elasticsearch/ilm.rb +11 -12
  9. data/lib/logstash/outputs/elasticsearch/license_checker.rb +19 -22
  10. data/lib/logstash/outputs/elasticsearch/template_manager.rb +3 -5
  11. data/lib/logstash/plugin_mixins/elasticsearch/api_configs.rb +157 -153
  12. data/lib/logstash/plugin_mixins/elasticsearch/common.rb +70 -58
  13. data/logstash-output-elasticsearch.gemspec +2 -2
  14. data/spec/es_spec_helper.rb +3 -6
  15. data/spec/integration/outputs/data_stream_spec.rb +61 -0
  16. data/spec/integration/outputs/ilm_spec.rb +6 -2
  17. data/spec/integration/outputs/ingest_pipeline_spec.rb +4 -2
  18. data/spec/integration/outputs/retry_spec.rb +4 -4
  19. data/spec/integration/outputs/sniffer_spec.rb +0 -1
  20. data/spec/spec_helper.rb +14 -0
  21. data/spec/unit/outputs/elasticsearch/data_stream_support_spec.rb +542 -0
  22. data/spec/unit/outputs/elasticsearch/http_client/manticore_adapter_spec.rb +1 -0
  23. data/spec/unit/outputs/elasticsearch/http_client/pool_spec.rb +24 -10
  24. data/spec/unit/outputs/elasticsearch/http_client_spec.rb +2 -3
  25. data/spec/unit/outputs/elasticsearch/template_manager_spec.rb +1 -3
  26. data/spec/unit/outputs/elasticsearch_proxy_spec.rb +1 -2
  27. data/spec/unit/outputs/elasticsearch_spec.rb +122 -23
  28. data/spec/unit/outputs/elasticsearch_ssl_spec.rb +1 -2
  29. data/spec/unit/outputs/error_whitelist_spec.rb +3 -2
  30. data/spec/unit/outputs/license_check_spec.rb +0 -16
  31. metadata +23 -16
@@ -7,41 +7,38 @@ module LogStash; module Outputs; class ElasticSearch
7
7
 
8
8
  # Figure out if the provided license is appropriate or not
9
9
  # The appropriate_license? methods is the method called from LogStash::Outputs::ElasticSearch::HttpClient::Pool#healthcheck!
10
+ # @param pool
10
11
  # @param url [LogStash::Util::SafeURI] ES node URL
11
- # @param license [Hash] ES node deserialized licence document
12
12
  # @return [Boolean] true if provided license is deemed appropriate
13
13
  def appropriate_license?(pool, url)
14
- return true if oss?
15
-
16
14
  license = pool.get_license(url)
17
- if valid_es_license?(license)
15
+ case license_status(license)
16
+ when 'active'
18
17
  true
19
- else
20
- # As this version is to be shipped with Logstash 7.x we won't mark the connection as unlicensed
21
- #
22
- # @logger.error("Cannot connect to the Elasticsearch cluster configured in the Elasticsearch output. Logstash requires the default distribution of Elasticsearch. Please update to the default distribution of Elasticsearch for full access to all free features, or switch to the OSS distribution of Logstash.", :url => url.sanitized.to_s)
23
- # meta[:state] = :unlicensed
24
- #
25
- # Instead we'll log a deprecation warning and mark it as alive:
26
- #
27
- log_license_deprecation_warn(url)
18
+ when nil
19
+ warn_no_license(url)
20
+ false
21
+ else # 'invalid', 'expired'
22
+ warn_invalid_license(url, license)
28
23
  true
29
24
  end
30
25
  end
31
26
 
32
- # Note that oss? could be private but is used by the Pool specs
33
- def oss?
34
- LogStash::OSS
27
+ def license_status(license)
28
+ license.fetch("license", {}).fetch("status", nil)
35
29
  end
36
30
 
37
- # Note that valid_es_license? could be private but is used by the Pool specs
38
- def valid_es_license?(license)
39
- license.fetch("license", {}).fetch("status", nil) == "active"
31
+ private
32
+
33
+ def warn_no_license(url)
34
+ @logger.error("Connecting to an OSS distribution of Elasticsearch is no longer supported, " +
35
+ "please upgrade to the default distribution of Elasticsearch", url: url.sanitized.to_s)
40
36
  end
41
37
 
42
- # Note that log_license_deprecation_warn could be private but is used by the Pool specs
43
- def log_license_deprecation_warn(url)
44
- @logger.warn("DEPRECATION WARNING: Connecting to an OSS distribution of Elasticsearch using the default distribution of Logstash will stop working in Logstash 8.0.0. Please upgrade to the default distribution of Elasticsearch, or use the OSS distribution of Logstash", :url => url.sanitized.to_s)
38
+ def warn_invalid_license(url, license)
39
+ @logger.warn("WARNING: Current Elasticsearch license is not active, " +
40
+ "please check Elasticsearch's licensing information", url: url.sanitized.to_s, license: license)
45
41
  end
42
+
46
43
  end
47
44
  end; end; end
@@ -13,10 +13,8 @@ module LogStash; module Outputs; class ElasticSearch
13
13
  end
14
14
 
15
15
  add_ilm_settings_to_template(plugin, template) if plugin.ilm_in_use?
16
- plugin.logger.info("Attempting to install template", :manage_template => template)
16
+ plugin.logger.debug("Attempting to install template", template: template)
17
17
  install(plugin.client, template_name(plugin), template, plugin.template_overwrite)
18
- rescue => e
19
- plugin.logger.error("Failed to install template.", :message => e.message, :class => e.class.name, :backtrace => e.backtrace)
20
18
  end
21
19
 
22
20
  private
@@ -38,7 +36,7 @@ module LogStash; module Outputs; class ElasticSearch
38
36
  template['index_patterns'] = "#{plugin.ilm_rollover_alias}-*"
39
37
  settings = template_settings(plugin, template)
40
38
  if settings && (settings['index.lifecycle.name'] || settings['index.lifecycle.rollover_alias'])
41
- plugin.logger.info("Overwriting index lifecycle name and rollover alias as ILM is enabled.")
39
+ plugin.logger.info("Overwriting index lifecycle name and rollover alias as ILM is enabled")
42
40
  end
43
41
  settings.update({ 'index.lifecycle.name' => plugin.ilm_policy, 'index.lifecycle.rollover_alias' => plugin.ilm_rollover_alias})
44
42
  end
@@ -61,7 +59,7 @@ module LogStash; module Outputs; class ElasticSearch
61
59
  end
62
60
 
63
61
  def self.read_template_file(template_path)
64
- raise ArgumentError, "Template file '#{template_path}' could not be found!" unless ::File.exists?(template_path)
62
+ raise ArgumentError, "Template file '#{template_path}' could not be found" unless ::File.exists?(template_path)
65
63
  template_data = ::IO.read(template_path)
66
64
  LogStash::Json.load(template_data)
67
65
  end
@@ -5,159 +5,163 @@ module LogStash; module PluginMixins; module ElasticSearch
5
5
 
6
6
  DEFAULT_HOST = ::LogStash::Util::SafeURI.new("//127.0.0.1")
7
7
 
8
- def self.included(mod)
9
- # Username to authenticate to a secure Elasticsearch cluster
10
- mod.config :user, :validate => :string
11
- # Password to authenticate to a secure Elasticsearch cluster
12
- mod.config :password, :validate => :password
13
-
14
- # Authenticate using Elasticsearch API key.
15
- # format is id:api_key (as returned by https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html[Create API key])
16
- mod.config :api_key, :validate => :password
17
-
18
- # Cloud authentication string ("<username>:<password>" format) is an alternative for the `user`/`password` configuration.
19
- #
20
- # For more details, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_auth[cloud documentation]
21
- mod.config :cloud_auth, :validate => :password
22
-
23
- # The document ID for the index. Useful for overwriting existing entries in
24
- # Elasticsearch with the same ID.
25
- mod.config :document_id, :validate => :string
26
-
27
- # HTTP Path at which the Elasticsearch server lives. Use this if you must run Elasticsearch behind a proxy that remaps
28
- # the root path for the Elasticsearch HTTP API lives.
29
- # Note that if you use paths as components of URLs in the 'hosts' field you may
30
- # not also set this field. That will raise an error at startup
31
- mod.config :path, :validate => :string
32
-
33
- # HTTP Path to perform the _bulk requests to
34
- # this defaults to a concatenation of the path parameter and "_bulk"
35
- mod.config :bulk_path, :validate => :string
36
-
37
- # Pass a set of key value pairs as the URL query string. This query string is added
38
- # to every host listed in the 'hosts' configuration. If the 'hosts' list contains
39
- # urls that already have query strings, the one specified here will be appended.
40
- mod.config :parameters, :validate => :hash
41
-
42
- # Enable SSL/TLS secured communication to Elasticsearch cluster. Leaving this unspecified will use whatever scheme
43
- # is specified in the URLs listed in 'hosts'. If no explicit protocol is specified plain HTTP will be used.
44
- # If SSL is explicitly disabled here the plugin will refuse to start if an HTTPS URL is given in 'hosts'
45
- mod.config :ssl, :validate => :boolean
46
-
47
- # Option to validate the server's certificate. Disabling this severely compromises security.
48
- # For more information on disabling certificate verification please read
49
- # https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf
50
- mod.config :ssl_certificate_verification, :validate => :boolean, :default => true
51
-
52
- # The .cer or .pem file to validate the server's certificate
53
- mod.config :cacert, :validate => :path
54
-
55
- # The JKS truststore to validate the server's certificate.
56
- # Use either `:truststore` or `:cacert`
57
- mod.config :truststore, :validate => :path
58
-
59
- # Set the truststore password
60
- mod.config :truststore_password, :validate => :password
61
-
62
- # The keystore used to present a certificate to the server.
63
- # It can be either .jks or .p12
64
- mod.config :keystore, :validate => :path
65
-
66
- # Set the keystore password
67
- mod.config :keystore_password, :validate => :password
68
-
69
- # This setting asks Elasticsearch for the list of all cluster nodes and adds them to the hosts list.
70
- # Note: This will return ALL nodes with HTTP enabled (including master nodes!). If you use
71
- # this with master nodes, you probably want to disable HTTP on them by setting
72
- # `http.enabled` to false in their elasticsearch.yml. You can either use the `sniffing` option or
73
- # manually enter multiple Elasticsearch hosts using the `hosts` parameter.
74
- mod.config :sniffing, :validate => :boolean, :default => false
75
-
76
- # How long to wait, in seconds, between sniffing attempts
77
- mod.config :sniffing_delay, :validate => :number, :default => 5
78
-
79
- # HTTP Path to be used for the sniffing requests
80
- # the default value is computed by concatenating the path value and "_nodes/http"
81
- # if sniffing_path is set it will be used as an absolute path
82
- # do not use full URL here, only paths, e.g. "/sniff/_nodes/http"
83
- mod.config :sniffing_path, :validate => :string
84
-
85
- # Set the address of a forward HTTP proxy.
86
- # This used to accept hashes as arguments but now only accepts
87
- # arguments of the URI type to prevent leaking credentials.
88
- mod.config :proxy, :validate => :uri # but empty string is allowed
89
-
90
- # Set the timeout, in seconds, for network operations and requests sent Elasticsearch. If
91
- # a timeout occurs, the request will be retried.
92
- mod.config :timeout, :validate => :number, :default => 60
93
-
94
- # Set the Elasticsearch errors in the whitelist that you don't want to log.
95
- # A useful example is when you want to skip all 409 errors
96
- # which are `document_already_exists_exception`.
97
- mod.config :failure_type_logging_whitelist, :validate => :array, :default => []
98
-
99
- # While the output tries to reuse connections efficiently we have a maximum.
100
- # This sets the maximum number of open connections the output will create.
101
- # Setting this too low may mean frequently closing / opening connections
102
- # which is bad.
103
- mod.config :pool_max, :validate => :number, :default => 1000
104
-
105
- # While the output tries to reuse connections efficiently we have a maximum per endpoint.
106
- # This sets the maximum number of open connections per endpoint the output will create.
107
- # Setting this too low may mean frequently closing / opening connections
108
- # which is bad.
109
- mod.config :pool_max_per_route, :validate => :number, :default => 100
110
-
111
- # HTTP Path where a HEAD request is sent when a backend is marked down
112
- # the request is sent in the background to see if it has come back again
113
- # before it is once again eligible to service requests.
114
- # If you have custom firewall rules you may need to change this
115
- mod.config :healthcheck_path, :validate => :string
116
-
117
- # How frequently, in seconds, to wait between resurrection attempts.
118
- # Resurrection is the process by which backend endpoints marked 'down' are checked
119
- # to see if they have come back to life
120
- mod.config :resurrect_delay, :validate => :number, :default => 5
121
-
122
- # How long to wait before checking if the connection is stale before executing a request on a connection using keepalive.
123
- # You may want to set this lower, if you get connection errors regularly
124
- # Quoting the Apache commons docs (this client is based Apache Commmons):
125
- # 'Defines period of inactivity in milliseconds after which persistent connections must
126
- # be re-validated prior to being leased to the consumer. Non-positive value passed to
127
- # this method disables connection validation. This check helps detect connections that
128
- # have become stale (half-closed) while kept inactive in the pool.'
129
- # See https://hc.apache.org/httpcomponents-client-ga/httpclient/apidocs/org/apache/http/impl/conn/PoolingHttpClientConnectionManager.html#setValidateAfterInactivity(int)[these docs for more info]
130
- mod.config :validate_after_inactivity, :validate => :number, :default => 10000
131
-
132
- # Enable gzip compression on requests. Note that response compression is on by default for Elasticsearch v5.0 and beyond
133
- mod.config :http_compression, :validate => :boolean, :default => false
134
-
135
- # Custom Headers to send on each request to elasticsearch nodes
136
- mod.config :custom_headers, :validate => :hash, :default => {}
137
-
138
- # Sets the host(s) of the remote instance. If given an array it will load balance requests across the hosts specified in the `hosts` parameter.
139
- # Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
140
- # `"127.0.0.1"`
141
- # `["127.0.0.1:9200","127.0.0.2:9200"]`
142
- # `["http://127.0.0.1"]`
143
- # `["https://127.0.0.1:9200"]`
144
- # `["https://127.0.0.1:9200/mypath"]` (If using a proxy on a subpath)
145
- # It is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `hosts` list
146
- # to prevent LS from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes in Elasticsearch.
147
- #
148
- # Any special characters present in the URLs here MUST be URL escaped! This means `#` should be put in as `%23` for instance.
149
- mod.config :hosts, :validate => :uri, :default => [ DEFAULT_HOST ], :list => true
150
-
151
- # Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used.
152
- #
153
- # For more details, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_id[cloud documentation]
154
- mod.config :cloud_id, :validate => :string
155
-
156
- # Set initial interval in seconds between bulk retries. Doubled on each retry up to `retry_max_interval`
157
- mod.config :retry_initial_interval, :validate => :number, :default => 2
158
-
159
- # Set max interval in seconds between bulk retries.
160
- mod.config :retry_max_interval, :validate => :number, :default => 64
8
+ CONFIG_PARAMS = {
9
+ # Username to authenticate to a secure Elasticsearch cluster
10
+ :user => { :validate => :string },
11
+ # Password to authenticate to a secure Elasticsearch cluster
12
+ :password => { :validate => :password },
13
+
14
+ # Authenticate using Elasticsearch API key.
15
+ # format is id:api_key (as returned by https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html[Create API key])
16
+ :api_key => { :validate => :password },
17
+
18
+ # Cloud authentication string ("<username>:<password>" format) is an alternative for the `user`/`password` configuration.
19
+ #
20
+ # For more details, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_auth[cloud documentation]
21
+ :cloud_auth => { :validate => :password },
22
+
23
+ # The document ID for the index. Useful for overwriting existing entries in
24
+ # Elasticsearch with the same ID.
25
+ :document_id => { :validate => :string },
26
+
27
+ # HTTP Path at which the Elasticsearch server lives. Use this if you must run Elasticsearch behind a proxy that remaps
28
+ # the root path for the Elasticsearch HTTP API lives.
29
+ # Note that if you use paths as components of URLs in the 'hosts' field you may
30
+ # not also set this field. That will raise an error at startup
31
+ :path => { :validate => :string },
32
+
33
+ # HTTP Path to perform the _bulk requests to
34
+ # this defaults to a concatenation of the path parameter and "_bulk"
35
+ :bulk_path => { :validate => :string },
36
+
37
+ # Pass a set of key value pairs as the URL query string. This query string is added
38
+ # to every host listed in the 'hosts' configuration. If the 'hosts' list contains
39
+ # urls that already have query strings, the one specified here will be appended.
40
+ :parameters => { :validate => :hash },
41
+
42
+ # Enable SSL/TLS secured communication to Elasticsearch cluster. Leaving this unspecified will use whatever scheme
43
+ # is specified in the URLs listed in 'hosts'. If no explicit protocol is specified plain HTTP will be used.
44
+ # If SSL is explicitly disabled here the plugin will refuse to start if an HTTPS URL is given in 'hosts'
45
+ :ssl => { :validate => :boolean },
46
+
47
+ # Option to validate the server's certificate. Disabling this severely compromises security.
48
+ # For more information on disabling certificate verification please read
49
+ # https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf
50
+ :ssl_certificate_verification => { :validate => :boolean, :default => true },
51
+
52
+ # The .cer or .pem file to validate the server's certificate
53
+ :cacert => { :validate => :path },
54
+
55
+ # The JKS truststore to validate the server's certificate.
56
+ # Use either `:truststore` or `:cacert`
57
+ :truststore => { :validate => :path },
58
+
59
+ # Set the truststore password
60
+ :truststore_password => { :validate => :password },
61
+
62
+ # The keystore used to present a certificate to the server.
63
+ # It can be either .jks or .p12
64
+ :keystore => { :validate => :path },
65
+
66
+ # Set the keystore password
67
+ :keystore_password => { :validate => :password },
68
+
69
+ # This setting asks Elasticsearch for the list of all cluster nodes and adds them to the hosts list.
70
+ # Note: This will return ALL nodes with HTTP enabled (including master nodes!). If you use
71
+ # this with master nodes, you probably want to disable HTTP on them by setting
72
+ # `http.enabled` to false in their elasticsearch.yml. You can either use the `sniffing` option or
73
+ # manually enter multiple Elasticsearch hosts using the `hosts` parameter.
74
+ :sniffing => { :validate => :boolean, :default => false },
75
+
76
+ # How long to wait, in seconds, between sniffing attempts
77
+ :sniffing_delay => { :validate => :number, :default => 5 },
78
+
79
+ # HTTP Path to be used for the sniffing requests
80
+ # the default value is computed by concatenating the path value and "_nodes/http"
81
+ # if sniffing_path is set it will be used as an absolute path
82
+ # do not use full URL here, only paths, e.g. "/sniff/_nodes/http"
83
+ :sniffing_path => { :validate => :string },
84
+
85
+ # Set the address of a forward HTTP proxy.
86
+ # This used to accept hashes as arguments but now only accepts
87
+ # arguments of the URI type to prevent leaking credentials.
88
+ :proxy => { :validate => :uri }, # but empty string is allowed
89
+
90
+ # Set the timeout, in seconds, for network operations and requests sent Elasticsearch. If
91
+ # a timeout occurs, the request will be retried.
92
+ :timeout => { :validate => :number, :default => 60 },
93
+
94
+ # Set the Elasticsearch errors in the whitelist that you don't want to log.
95
+ # A useful example is when you want to skip all 409 errors
96
+ # which are `document_already_exists_exception`.
97
+ :failure_type_logging_whitelist => { :validate => :array, :default => [] },
98
+
99
+ # While the output tries to reuse connections efficiently we have a maximum.
100
+ # This sets the maximum number of open connections the output will create.
101
+ # Setting this too low may mean frequently closing / opening connections
102
+ # which is bad.
103
+ :pool_max => { :validate => :number, :default => 1000 },
104
+
105
+ # While the output tries to reuse connections efficiently we have a maximum per endpoint.
106
+ # This sets the maximum number of open connections per endpoint the output will create.
107
+ # Setting this too low may mean frequently closing / opening connections
108
+ # which is bad.
109
+ :pool_max_per_route => { :validate => :number, :default => 100 },
110
+
111
+ # HTTP Path where a HEAD request is sent when a backend is marked down
112
+ # the request is sent in the background to see if it has come back again
113
+ # before it is once again eligible to service requests.
114
+ # If you have custom firewall rules you may need to change this
115
+ :healthcheck_path => { :validate => :string },
116
+
117
+ # How frequently, in seconds, to wait between resurrection attempts.
118
+ # Resurrection is the process by which backend endpoints marked 'down' are checked
119
+ # to see if they have come back to life
120
+ :resurrect_delay => { :validate => :number, :default => 5 },
121
+
122
+ # How long to wait before checking if the connection is stale before executing a request on a connection using keepalive.
123
+ # You may want to set this lower, if you get connection errors regularly
124
+ # Quoting the Apache commons docs (this client is based Apache Commmons):
125
+ # 'Defines period of inactivity in milliseconds after which persistent connections must
126
+ # be re-validated prior to being leased to the consumer. Non-positive value passed to
127
+ # this method disables connection validation. This check helps detect connections that
128
+ # have become stale (half-closed) while kept inactive in the pool.'
129
+ # See https://hc.apache.org/httpcomponents-client-ga/httpclient/apidocs/org/apache/http/impl/conn/PoolingHttpClientConnectionManager.html#setValidateAfterInactivity(int)[these docs for more info]
130
+ :validate_after_inactivity => { :validate => :number, :default => 10000 },
131
+
132
+ # Enable gzip compression on requests. Note that response compression is on by default for Elasticsearch v5.0 and beyond
133
+ :http_compression => { :validate => :boolean, :default => false },
134
+
135
+ # Custom Headers to send on each request to elasticsearch nodes
136
+ :custom_headers => { :validate => :hash, :default => {} },
137
+
138
+ # Sets the host(s) of the remote instance. If given an array it will load balance requests across the hosts specified in the `hosts` parameter.
139
+ # Remember the `http` protocol uses the http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#modules-http[http] address (eg. 9200, not 9300).
140
+ # `"127.0.0.1"`
141
+ # `["127.0.0.1:9200","127.0.0.2:9200"]`
142
+ # `["http://127.0.0.1"]`
143
+ # `["https://127.0.0.1:9200"]`
144
+ # `["https://127.0.0.1:9200/mypath"]` (If using a proxy on a subpath)
145
+ # It is important to exclude http://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html[dedicated master nodes] from the `hosts` list
146
+ # to prevent LS from sending bulk requests to the master nodes. So this parameter should only reference either data or client nodes in Elasticsearch.
147
+ #
148
+ # Any special characters present in the URLs here MUST be URL escaped! This means `#` should be put in as `%23` for instance.
149
+ :hosts => { :validate => :uri, :default => [ DEFAULT_HOST ], :list => true },
150
+
151
+ # Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used.
152
+ #
153
+ # For more details, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_id[cloud documentation]
154
+ :cloud_id => { :validate => :string },
155
+
156
+ # Set initial interval in seconds between bulk retries. Doubled on each retry up to `retry_max_interval`
157
+ :retry_initial_interval => { :validate => :number, :default => 2 },
158
+
159
+ # Set max interval in seconds between bulk retries.
160
+ :retry_max_interval => { :validate => :number, :default => 64 }
161
+ }.freeze
162
+
163
+ def self.included(base)
164
+ CONFIG_PARAMS.each { |name, opts| base.config(name, opts) }
161
165
  end
162
166
  end
163
167
  end; end; end
@@ -5,7 +5,7 @@ module LogStash; module PluginMixins; module ElasticSearch
5
5
 
6
6
  # This module defines common methods that can be reused by alternate elasticsearch output plugins such as the elasticsearch_data_streams output.
7
7
 
8
- attr_reader :client, :hosts
8
+ attr_reader :hosts
9
9
 
10
10
  # These codes apply to documents, not at the request level
11
11
  DOC_DLQ_CODES = [400, 404]
@@ -31,7 +31,7 @@ module LogStash; module PluginMixins; module ElasticSearch
31
31
  if @proxy.eql?('')
32
32
  @logger.warn "Supplied proxy setting (proxy => '') has no effect"
33
33
  end
34
- @client ||= ::LogStash::Outputs::ElasticSearch::HttpClientBuilder.build(@logger, @hosts, params)
34
+ ::LogStash::Outputs::ElasticSearch::HttpClientBuilder.build(@logger, @hosts, params)
35
35
  end
36
36
 
37
37
  def validate_authentication
@@ -115,6 +115,15 @@ module LogStash; module PluginMixins; module ElasticSearch
115
115
  end
116
116
  private :parse_user_password_from_cloud_auth
117
117
 
118
+ # Plugin initialization extension point (after a successful ES connection).
119
+ def finish_register
120
+ end
121
+ protected :finish_register
122
+
123
+ def last_es_version
124
+ client.last_es_version
125
+ end
126
+
118
127
  def maximum_seen_major_version
119
128
  client.maximum_seen_major_version
120
129
  end
@@ -126,25 +135,24 @@ module LogStash; module PluginMixins; module ElasticSearch
126
135
  # launch a thread that waits for an initial successful connection to the ES cluster to call the given block
127
136
  # @param block [Proc] the block to execute upon initial successful connection
128
137
  # @return [Thread] the successful connection wait thread
129
- def setup_after_successful_connection(&block)
138
+ def after_successful_connection(&block)
130
139
  Thread.new do
131
140
  sleep_interval = @retry_initial_interval
132
141
  until successful_connection? || @stopping.true?
133
- @logger.debug("Waiting for connectivity to Elasticsearch cluster. Retrying in #{sleep_interval}s")
134
- Stud.stoppable_sleep(sleep_interval) { @stopping.true? }
135
- sleep_interval = next_sleep_interval(sleep_interval)
142
+ @logger.debug("Waiting for connectivity to Elasticsearch cluster, retrying in #{sleep_interval}s")
143
+ sleep_interval = sleep_for_interval(sleep_interval)
136
144
  end
137
145
  block.call if successful_connection?
138
146
  end
139
147
  end
148
+ private :after_successful_connection
140
149
 
141
150
  def discover_cluster_uuid
142
151
  return unless defined?(plugin_metadata)
143
152
  cluster_info = client.get('/')
144
153
  plugin_metadata.set(:cluster_uuid, cluster_info['cluster_uuid'])
145
154
  rescue => e
146
- # TODO introducing this logging message breaks many tests that need refactoring
147
- # @logger.error("Unable to retrieve elasticsearch cluster uuid", error => e.message)
155
+ @logger.error("Unable to retrieve Elasticsearch cluster uuid", message: e.message, exception: e.class, backtrace: e.backtrace)
148
156
  end
149
157
 
150
158
  def retrying_submit(actions)
@@ -159,13 +167,11 @@ module LogStash; module PluginMixins; module ElasticSearch
159
167
  begin
160
168
  submit_actions = submit(submit_actions)
161
169
  if submit_actions && submit_actions.size > 0
162
- @logger.info("Retrying individual bulk actions that failed or were rejected by the previous bulk request.", :count => submit_actions.size)
170
+ @logger.info("Retrying individual bulk actions that failed or were rejected by the previous bulk request", count: submit_actions.size)
163
171
  end
164
172
  rescue => e
165
- @logger.error("Encountered an unexpected error submitting a bulk request! Will retry.",
166
- :error_message => e.message,
167
- :class => e.class.name,
168
- :backtrace => e.backtrace)
173
+ @logger.error("Encountered an unexpected error submitting a bulk request, will retry",
174
+ message: e.message, exception: e.class, backtrace: e.backtrace)
169
175
  end
170
176
 
171
177
  # Everything was a success!
@@ -173,21 +179,41 @@ module LogStash; module PluginMixins; module ElasticSearch
173
179
 
174
180
  # If we're retrying the action sleep for the recommended interval
175
181
  # Double the interval for the next time through to achieve exponential backoff
176
- Stud.stoppable_sleep(sleep_interval) { @stopping.true? }
177
- sleep_interval = next_sleep_interval(sleep_interval)
182
+ sleep_interval = sleep_for_interval(sleep_interval)
178
183
  end
179
184
  end
180
185
 
181
186
  def sleep_for_interval(sleep_interval)
182
- Stud.stoppable_sleep(sleep_interval) { @stopping.true? }
187
+ stoppable_sleep(sleep_interval)
183
188
  next_sleep_interval(sleep_interval)
184
189
  end
185
190
 
191
+ def stoppable_sleep(interval)
192
+ Stud.stoppable_sleep(interval) { @stopping.true? }
193
+ end
194
+
186
195
  def next_sleep_interval(current_interval)
187
196
  doubled = current_interval * 2
188
197
  doubled > @retry_max_interval ? @retry_max_interval : doubled
189
198
  end
190
199
 
200
+ def handle_dlq_status(message, action, status, response)
201
+ # To support bwc, we check if DLQ exists. otherwise we log and drop event (previous behavior)
202
+ if @dlq_writer
203
+ # TODO: Change this to send a map with { :status => status, :action => action } in the future
204
+ @dlq_writer.write(action[2], "#{message} status: #{status}, action: #{action}, response: #{response}")
205
+ else
206
+ if dig_value(response, 'index', 'error', 'type') == 'invalid_index_name_exception'
207
+ level = :error
208
+ else
209
+ level = :warn
210
+ end
211
+ @logger.send level, message, status: status, action: action, response: response
212
+ end
213
+ end
214
+
215
+ private
216
+
191
217
  def submit(actions)
192
218
  bulk_response = safe_bulk(actions)
193
219
 
@@ -217,7 +243,7 @@ module LogStash; module PluginMixins; module ElasticSearch
217
243
  action_type, action_props = response.first
218
244
 
219
245
  status = action_props["status"]
220
- failure = action_props["error"]
246
+ error = action_props["error"]
221
247
  action = actions[idx]
222
248
  action_params = action[1]
223
249
 
@@ -230,7 +256,7 @@ module LogStash; module PluginMixins; module ElasticSearch
230
256
  next
231
257
  elsif DOC_CONFLICT_CODE == status
232
258
  @document_level_metrics.increment(:non_retryable_failures)
233
- @logger.warn "Failed action.", status: status, action: action, response: response if !failure_type_logging_whitelist.include?(failure["type"])
259
+ @logger.warn "Failed action", status: status, action: action, response: response if log_failure_type?(error)
234
260
  next
235
261
  elsif DOC_DLQ_CODES.include?(status)
236
262
  handle_dlq_status("Could not index event to Elasticsearch.", action, status, response)
@@ -239,7 +265,7 @@ module LogStash; module PluginMixins; module ElasticSearch
239
265
  else
240
266
  # only log what the user whitelisted
241
267
  @document_level_metrics.increment(:retryable_failures)
242
- @logger.info "retrying failed action with response code: #{status} (#{failure})" if !failure_type_logging_whitelist.include?(failure["type"])
268
+ @logger.info "Retrying failed action", status: status, action: action, error: error if log_failure_type?(error)
243
269
  actions_to_retry << action
244
270
  end
245
271
  end
@@ -247,40 +273,25 @@ module LogStash; module PluginMixins; module ElasticSearch
247
273
  actions_to_retry
248
274
  end
249
275
 
250
- def handle_dlq_status(message, action, status, response)
251
- # To support bwc, we check if DLQ exists. otherwise we log and drop event (previous behavior)
252
- if @dlq_writer
253
- # TODO: Change this to send a map with { :status => status, :action => action } in the future
254
- @dlq_writer.write(action[2], "#{message} status: #{status}, action: #{action}, response: #{response}")
255
- else
256
- error_type = response.fetch('index', {}).fetch('error', {})['type']
257
- if 'invalid_index_name_exception' == error_type
258
- level = :error
259
- else
260
- level = :warn
261
- end
262
- @logger.send level, message, status: status, action: action, response: response
263
- end
276
+ def log_failure_type?(failure)
277
+ !failure_type_logging_whitelist.include?(failure["type"])
264
278
  end
265
279
 
266
280
  # Rescue retryable errors during bulk submission
281
+ # @param actions a [action, params, event.to_hash] tuple
282
+ # @return response [Hash] which contains 'errors' and processed 'items' entries
267
283
  def safe_bulk(actions)
268
284
  sleep_interval = @retry_initial_interval
269
285
  begin
270
- es_actions = actions.map {|action_type, params, event| [action_type, params, event.to_hash]}
271
- response = @client.bulk(es_actions)
272
- response
286
+ @client.bulk(actions) # returns { 'errors': ..., 'items': ... }
273
287
  rescue ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::HostUnreachableError => e
274
288
  # If we can't even connect to the server let's just print out the URL (:hosts is actually a URL)
275
289
  # and let the user sort it out from there
276
290
  @logger.error(
277
- "Attempted to send a bulk request to elasticsearch'"+
278
- " but Elasticsearch appears to be unreachable or down!",
279
- :error_message => e.message,
280
- :class => e.class.name,
281
- :will_retry_in_seconds => sleep_interval
291
+ "Attempted to send a bulk request but Elasticsearch appears to be unreachable or down",
292
+ message: e.message, exception: e.class, will_retry_in_seconds: sleep_interval
282
293
  )
283
- @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
294
+ @logger.debug? && @logger.debug("Failed actions for last bad bulk request", :actions => actions)
284
295
 
285
296
  # We retry until there are no errors! Errors should all go to the retry queue
286
297
  sleep_interval = sleep_for_interval(sleep_interval)
@@ -288,20 +299,19 @@ module LogStash; module PluginMixins; module ElasticSearch
288
299
  retry unless @stopping.true?
289
300
  rescue ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::NoConnectionAvailableError => e
290
301
  @logger.error(
291
- "Attempted to send a bulk request to elasticsearch, but no there are no living connections in the connection pool. Perhaps Elasticsearch is unreachable or down?",
292
- :error_message => e.message,
293
- :class => e.class.name,
294
- :will_retry_in_seconds => sleep_interval
302
+ "Attempted to send a bulk request but there are no living connections in the pool " +
303
+ "(perhaps Elasticsearch is unreachable or down?)",
304
+ message: e.message, exception: e.class, will_retry_in_seconds: sleep_interval
295
305
  )
296
- Stud.stoppable_sleep(sleep_interval) { @stopping.true? }
297
- sleep_interval = next_sleep_interval(sleep_interval)
306
+
307
+ sleep_interval = sleep_for_interval(sleep_interval)
298
308
  @bulk_request_metrics.increment(:failures)
299
309
  retry unless @stopping.true?
300
310
  rescue ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError => e
301
311
  @bulk_request_metrics.increment(:failures)
302
312
  log_hash = {:code => e.response_code, :url => e.url.sanitized.to_s, :content_length => e.request_body.bytesize}
303
313
  log_hash[:body] = e.response_body if @logger.debug? # Generally this is too verbose
304
- message = "Encountered a retryable error. Will Retry with exponential backoff "
314
+ message = "Encountered a retryable error (will retry with exponential backoff)"
305
315
 
306
316
  # We treat 429s as a special case because these really aren't errors, but
307
317
  # rather just ES telling us to back off a bit, which we do.
@@ -315,17 +325,12 @@ module LogStash; module PluginMixins; module ElasticSearch
315
325
 
316
326
  sleep_interval = sleep_for_interval(sleep_interval)
317
327
  retry
318
- rescue => e
319
- # Stuff that should never happen
320
- # For all other errors print out full connection issues
328
+ rescue => e # Stuff that should never happen - print out full connection issues
321
329
  @logger.error(
322
- "An unknown error occurred sending a bulk request to Elasticsearch. We will retry indefinitely",
323
- :error_message => e.message,
324
- :error_class => e.class.name,
325
- :backtrace => e.backtrace
330
+ "An unknown error occurred sending a bulk request to Elasticsearch (will retry indefinitely)",
331
+ message: e.message, exception: e.class, backtrace: e.backtrace
326
332
  )
327
-
328
- @logger.debug("Failed actions for last bad bulk request!", :actions => actions)
333
+ @logger.debug? && @logger.debug("Failed actions for last bad bulk request", :actions => actions)
329
334
 
330
335
  sleep_interval = sleep_for_interval(sleep_interval)
331
336
  @bulk_request_metrics.increment(:failures)
@@ -339,5 +344,12 @@ module LogStash; module PluginMixins; module ElasticSearch
339
344
  respond_to?(:execution_context) && execution_context.respond_to?(:dlq_writer) &&
340
345
  !execution_context.dlq_writer.inner_writer.is_a?(::LogStash::Util::DummyDeadLetterQueueWriter)
341
346
  end
347
+
348
+ def dig_value(val, first_key, *rest_keys)
349
+ fail(TypeError, "cannot dig value from #{val.class}") unless val.kind_of?(Hash)
350
+ val = val[first_key]
351
+ return val if rest_keys.empty? || val == nil
352
+ dig_value(val, *rest_keys)
353
+ end
342
354
  end
343
355
  end; end; end