logstash-output-elasticsearch 10.8.2-java → 11.0.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/docs/index.asciidoc +134 -23
- data/lib/logstash/outputs/elasticsearch.rb +137 -63
- data/lib/logstash/outputs/elasticsearch/data_stream_support.rb +233 -0
- data/lib/logstash/outputs/elasticsearch/http_client.rb +59 -21
- data/lib/logstash/outputs/elasticsearch/http_client/pool.rb +47 -34
- data/lib/logstash/outputs/elasticsearch/ilm.rb +11 -12
- data/lib/logstash/outputs/elasticsearch/license_checker.rb +19 -22
- data/lib/logstash/outputs/elasticsearch/template_manager.rb +3 -5
- data/lib/logstash/plugin_mixins/elasticsearch/api_configs.rb +157 -153
- data/lib/logstash/plugin_mixins/elasticsearch/common.rb +81 -60
- data/logstash-output-elasticsearch.gemspec +2 -2
- data/spec/es_spec_helper.rb +3 -6
- data/spec/integration/outputs/data_stream_spec.rb +61 -0
- data/spec/integration/outputs/ilm_spec.rb +22 -18
- data/spec/integration/outputs/ingest_pipeline_spec.rb +4 -2
- data/spec/integration/outputs/retry_spec.rb +14 -2
- data/spec/integration/outputs/sniffer_spec.rb +0 -1
- data/spec/spec_helper.rb +14 -0
- data/spec/unit/http_client_builder_spec.rb +9 -9
- data/spec/unit/outputs/elasticsearch/data_stream_support_spec.rb +542 -0
- data/spec/unit/outputs/elasticsearch/http_client/manticore_adapter_spec.rb +1 -0
- data/spec/unit/outputs/elasticsearch/http_client/pool_spec.rb +27 -13
- data/spec/unit/outputs/elasticsearch/http_client_spec.rb +59 -41
- data/spec/unit/outputs/elasticsearch/template_manager_spec.rb +1 -3
- data/spec/unit/outputs/elasticsearch_proxy_spec.rb +4 -5
- data/spec/unit/outputs/elasticsearch_spec.rb +280 -47
- data/spec/unit/outputs/elasticsearch_ssl_spec.rb +1 -2
- data/spec/unit/outputs/error_whitelist_spec.rb +4 -3
- data/spec/unit/outputs/license_check_spec.rb +0 -16
- metadata +23 -16
| @@ -0,0 +1,233 @@ | |
| 1 | 
            +
            module LogStash module Outputs class ElasticSearch
         | 
| 2 | 
            +
              # DS specific behavior/configuration.
         | 
| 3 | 
            +
              module DataStreamSupport
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                def self.included(base)
         | 
| 6 | 
            +
                  # Defines whether data will be indexed into an Elasticsearch data stream,
         | 
| 7 | 
            +
                  # `data_stream_*` settings will only be used if this setting is enabled!
         | 
| 8 | 
            +
                  # This setting supports values `true`, `false`, and `auto`.
         | 
| 9 | 
            +
                  # Defaults to `false` in Logstash 7.x and `auto` starting in Logstash 8.0.
         | 
| 10 | 
            +
                  base.config :data_stream, :validate => ['true', 'false', 'auto']
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  base.config :data_stream_type, :validate => ['logs', 'metrics', 'synthetics'], :default => 'logs'
         | 
| 13 | 
            +
                  base.config :data_stream_dataset, :validate => :dataset_identifier, :default => 'generic'
         | 
| 14 | 
            +
                  base.config :data_stream_namespace, :validate => :namespace_identifier, :default => 'default'
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  base.config :data_stream_sync_fields, :validate => :boolean, :default => true
         | 
| 17 | 
            +
                  base.config :data_stream_auto_routing, :validate => :boolean, :default => true
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  base.extend(Validator)
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                # @note assumes to be running AFTER {after_successful_connection} completed, due ES version checks
         | 
| 23 | 
            +
                def data_stream_config?
         | 
| 24 | 
            +
                  @data_stream_config.nil? ? @data_stream_config = check_data_stream_config! : @data_stream_config
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                private
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                def data_stream_name(event)
         | 
| 30 | 
            +
                  data_stream = event.get('data_stream')
         | 
| 31 | 
            +
                  return @index if !data_stream_auto_routing || !data_stream.is_a?(Hash)
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                  type = data_stream['type'] || data_stream_type
         | 
| 34 | 
            +
                  dataset = data_stream['dataset'] || data_stream_dataset
         | 
| 35 | 
            +
                  namespace = data_stream['namespace'] || data_stream_namespace
         | 
| 36 | 
            +
                  "#{type}-#{dataset}-#{namespace}"
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                # @param params the user configuration for the ES output
         | 
| 40 | 
            +
                # @note LS initialized configuration (with filled defaults) won't detect as data-stream
         | 
| 41 | 
            +
                # compatible, only explicit (`original_params`) config should be tested.
         | 
| 42 | 
            +
                # @return [TrueClass|FalseClass] whether given configuration is data-stream compatible
         | 
| 43 | 
            +
                def check_data_stream_config!(params = original_params)
         | 
| 44 | 
            +
                  data_stream_params = params.select { |name, _| name.start_with?('data_stream_') } # exclude data_stream =>
         | 
| 45 | 
            +
                  invalid_data_stream_params = invalid_data_stream_params(params)
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  case data_stream_explicit_value
         | 
| 48 | 
            +
                  when false
         | 
| 49 | 
            +
                    if data_stream_params.any?
         | 
| 50 | 
            +
                      @logger.error "Ambiguous configuration; data stream settings must not be present when data streams is disabled (caused by: `data_stream => false`)", data_stream_params
         | 
| 51 | 
            +
                      raise LogStash::ConfigurationError, "Ambiguous configuration, please remove data stream specific settings: #{data_stream_params.keys}"
         | 
| 52 | 
            +
                    end
         | 
| 53 | 
            +
                    return false
         | 
| 54 | 
            +
                  when true
         | 
| 55 | 
            +
                    if invalid_data_stream_params.any?
         | 
| 56 | 
            +
                      @logger.error "Invalid data stream configuration, following parameters are not supported:", invalid_data_stream_params
         | 
| 57 | 
            +
                      raise LogStash::ConfigurationError, "Invalid data stream configuration: #{invalid_data_stream_params.keys}"
         | 
| 58 | 
            +
                    end
         | 
| 59 | 
            +
                    return true
         | 
| 60 | 
            +
                  else
         | 
| 61 | 
            +
                    use_data_stream = data_stream_default(data_stream_params, invalid_data_stream_params.empty?)
         | 
| 62 | 
            +
                    if !use_data_stream && data_stream_params.any?
         | 
| 63 | 
            +
                      # DS (auto) disabled but there's still some data-stream parameters (and no `data_stream => false`)
         | 
| 64 | 
            +
                      @logger.error "Ambiguous configuration; data stream settings are present, but data streams are not enabled", data_stream_params
         | 
| 65 | 
            +
                      raise LogStash::ConfigurationError, "Ambiguous configuration, please set data_stream => true " +
         | 
| 66 | 
            +
                          "or remove data stream specific settings: #{data_stream_params.keys}"
         | 
| 67 | 
            +
                    end
         | 
| 68 | 
            +
                    use_data_stream
         | 
| 69 | 
            +
                  end
         | 
| 70 | 
            +
                end
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                def data_stream_explicit_value
         | 
| 73 | 
            +
                  case @data_stream
         | 
| 74 | 
            +
                  when 'true'
         | 
| 75 | 
            +
                    return true
         | 
| 76 | 
            +
                  when 'false'
         | 
| 77 | 
            +
                    return false
         | 
| 78 | 
            +
                  else
         | 
| 79 | 
            +
                    return nil # 'auto' or not set by user
         | 
| 80 | 
            +
                  end
         | 
| 81 | 
            +
                end
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                def invalid_data_stream_params(params)
         | 
| 84 | 
            +
                  shared_params = LogStash::PluginMixins::ElasticSearch::APIConfigs::CONFIG_PARAMS.keys.map(&:to_s)
         | 
| 85 | 
            +
                  params.reject do |name, value|
         | 
| 86 | 
            +
                    # NOTE: intentionally do not support explicit DS configuration like:
         | 
| 87 | 
            +
                    # - `index => ...` identifier provided by data_stream_xxx settings
         | 
| 88 | 
            +
                    # - `manage_template => false` implied by not setting the parameter
         | 
| 89 | 
            +
                    case name
         | 
| 90 | 
            +
                    when 'action'
         | 
| 91 | 
            +
                      value == 'create'
         | 
| 92 | 
            +
                    when 'routing', 'pipeline'
         | 
| 93 | 
            +
                      true
         | 
| 94 | 
            +
                    when 'data_stream'
         | 
| 95 | 
            +
                      value.to_s == 'true'
         | 
| 96 | 
            +
                    else
         | 
| 97 | 
            +
                      name.start_with?('data_stream_') ||
         | 
| 98 | 
            +
                          shared_params.include?(name) ||
         | 
| 99 | 
            +
                            inherited_internal_config_param?(name) # 'id', 'enabled_metric' etc
         | 
| 100 | 
            +
                    end
         | 
| 101 | 
            +
                  end
         | 
| 102 | 
            +
                end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                def inherited_internal_config_param?(name)
         | 
| 105 | 
            +
                  self.class.superclass.get_config.key?(name.to_s) # superclass -> LogStash::Outputs::Base
         | 
| 106 | 
            +
                end
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                DATA_STREAMS_ORIGIN_ES_VERSION = '7.9.0'
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                # @return [Gem::Version] if ES supports DS nil (or raise) otherwise
         | 
| 111 | 
            +
                def assert_es_version_supports_data_streams
         | 
| 112 | 
            +
                  fail 'no last_es_version' unless last_es_version # assert - should not happen
         | 
| 113 | 
            +
                  es_version = Gem::Version.create(last_es_version)
         | 
| 114 | 
            +
                  if es_version < Gem::Version.create(DATA_STREAMS_ORIGIN_ES_VERSION)
         | 
| 115 | 
            +
                    @logger.error "Elasticsearch version does not support data streams, Logstash might end up writing to an index", es_version: es_version.version
         | 
| 116 | 
            +
                    # NOTE: when switching to synchronous check from register, this should be a ConfigurationError
         | 
| 117 | 
            +
                    raise LogStash::Error, "A data_stream configuration is only supported since Elasticsearch #{DATA_STREAMS_ORIGIN_ES_VERSION} " +
         | 
| 118 | 
            +
                                           "(detected version #{es_version.version}), please upgrade your cluster"
         | 
| 119 | 
            +
                  end
         | 
| 120 | 
            +
                  es_version # return truthy
         | 
| 121 | 
            +
                end
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                DATA_STREAMS_ENABLED_BY_DEFAULT_LS_VERSION = '8.0.0'
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                # when data_stream => is either 'auto' or not set
         | 
| 126 | 
            +
                def data_stream_default(data_stream_params, valid_data_stream_config)
         | 
| 127 | 
            +
                  ds_default = Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create(DATA_STREAMS_ENABLED_BY_DEFAULT_LS_VERSION)
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                  if ds_default # LS 8.0
         | 
| 130 | 
            +
                    return false unless valid_data_stream_config
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                    @logger.debug 'Configuration is data stream compliant'
         | 
| 133 | 
            +
                    return true
         | 
| 134 | 
            +
                  end
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                  # LS 7.x
         | 
| 137 | 
            +
                  if valid_data_stream_config && !data_stream_params.any?
         | 
| 138 | 
            +
                    @logger.warn "Configuration is data stream compliant but due backwards compatibility Logstash 7.x will not assume " +
         | 
| 139 | 
            +
                                 "writing to a data-stream, default behavior will change on Logstash 8.0 " +
         | 
| 140 | 
            +
                                 "(set `data_stream => true/false` to disable this warning)"
         | 
| 141 | 
            +
                  end
         | 
| 142 | 
            +
                  false
         | 
| 143 | 
            +
                end
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                # an {event_action_tuple} replacement when a data-stream configuration is detected
         | 
| 146 | 
            +
                def data_stream_event_action_tuple(event)
         | 
| 147 | 
            +
                  event_data = event.to_hash
         | 
| 148 | 
            +
                  data_stream_event_sync(event_data) if data_stream_sync_fields
         | 
| 149 | 
            +
                  EventActionTuple.new('create', common_event_params(event), event, event_data)
         | 
| 150 | 
            +
                end
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                DATA_STREAM_SYNC_FIELDS = [ 'type', 'dataset', 'namespace' ].freeze
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                def data_stream_event_sync(event_data)
         | 
| 155 | 
            +
                  data_stream = event_data['data_stream']
         | 
| 156 | 
            +
                  if data_stream.is_a?(Hash)
         | 
| 157 | 
            +
                    unless data_stream_auto_routing
         | 
| 158 | 
            +
                      sync_fields = DATA_STREAM_SYNC_FIELDS.select { |name| data_stream.key?(name) && data_stream[name] != send(:"data_stream_#{name}") }
         | 
| 159 | 
            +
                      if sync_fields.any? # these fields will need to be overwritten
         | 
| 160 | 
            +
                        info = sync_fields.inject({}) { |info, name| info[name] = data_stream[name]; info }
         | 
| 161 | 
            +
                        info[:event] = event_data
         | 
| 162 | 
            +
                        @logger.warn "Some data_stream fields are out of sync, these will be updated to reflect data-stream name", info
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                        # NOTE: we work directly with event.to_hash data thus fine to mutate the 'data_stream' hash
         | 
| 165 | 
            +
                        sync_fields.each { |name| data_stream[name] = nil } # fallback to ||= bellow
         | 
| 166 | 
            +
                      end
         | 
| 167 | 
            +
                    end
         | 
| 168 | 
            +
                  else
         | 
| 169 | 
            +
                    unless data_stream.nil?
         | 
| 170 | 
            +
                      @logger.warn "Invalid 'data_stream' field type, due fields sync will overwrite", value: data_stream, event: event_data
         | 
| 171 | 
            +
                    end
         | 
| 172 | 
            +
                    event_data['data_stream'] = data_stream = Hash.new
         | 
| 173 | 
            +
                  end
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                  data_stream['type'] ||= data_stream_type
         | 
| 176 | 
            +
                  data_stream['dataset'] ||= data_stream_dataset
         | 
| 177 | 
            +
                  data_stream['namespace'] ||= data_stream_namespace
         | 
| 178 | 
            +
             | 
| 179 | 
            +
                  event_data
         | 
| 180 | 
            +
                end
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                module Validator
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                  # @override {LogStash::Config::Mixin::validate_value} to handle custom validators
         | 
| 185 | 
            +
                  # @param value [Array<Object>]
         | 
| 186 | 
            +
                  # @param validator [nil,Array,Symbol]
         | 
| 187 | 
            +
                  # @return [Array(true,Object)]: if validation is a success, a tuple containing `true` and the coerced value
         | 
| 188 | 
            +
                  # @return [Array(false,String)]: if validation is a failure, a tuple containing `false` and the failure reason.
         | 
| 189 | 
            +
                  def validate_value(value, validator)
         | 
| 190 | 
            +
                    case validator
         | 
| 191 | 
            +
                    when :dataset_identifier   then validate_dataset_identifier(value)
         | 
| 192 | 
            +
                    when :namespace_identifier then validate_namespace_identifier(value)
         | 
| 193 | 
            +
                    else super
         | 
| 194 | 
            +
                    end
         | 
| 195 | 
            +
                  end
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                  private
         | 
| 198 | 
            +
             | 
| 199 | 
            +
                  def validate_dataset_identifier(value)
         | 
| 200 | 
            +
                    valid, value = validate_value(value, :string)
         | 
| 201 | 
            +
                    return false, value unless valid
         | 
| 202 | 
            +
             | 
| 203 | 
            +
                    validate_identifier(value)
         | 
| 204 | 
            +
                  end
         | 
| 205 | 
            +
             | 
| 206 | 
            +
                  def validate_namespace_identifier(value)
         | 
| 207 | 
            +
                    valid, value = validate_value(value, :string)
         | 
| 208 | 
            +
                    return false, value unless valid
         | 
| 209 | 
            +
             | 
| 210 | 
            +
                    validate_identifier(value)
         | 
| 211 | 
            +
                  end
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                  def validate_identifier(value, max_size = 100)
         | 
| 214 | 
            +
                    if value.empty?
         | 
| 215 | 
            +
                      return false, "Invalid identifier - empty string"
         | 
| 216 | 
            +
                    end
         | 
| 217 | 
            +
                    if value.bytesize > max_size
         | 
| 218 | 
            +
                      return false, "Invalid identifier - too long (#{value.bytesize} bytes)"
         | 
| 219 | 
            +
                    end
         | 
| 220 | 
            +
                    # cannot include \, /, *, ?, ", <, >, |, ' ' (space char), ',', #, :
         | 
| 221 | 
            +
                    if value.match? Regexp.union(INVALID_IDENTIFIER_CHARS)
         | 
| 222 | 
            +
                      return false, "Invalid characters detected #{INVALID_IDENTIFIER_CHARS.inspect} are not allowed"
         | 
| 223 | 
            +
                    end
         | 
| 224 | 
            +
                    return true, value
         | 
| 225 | 
            +
                  end
         | 
| 226 | 
            +
             | 
| 227 | 
            +
                  INVALID_IDENTIFIER_CHARS = [ '\\', '/', '*', '?', '"', '<', '>', '|', ' ', ',', '#', ':' ]
         | 
| 228 | 
            +
                  private_constant :INVALID_IDENTIFIER_CHARS
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                end
         | 
| 231 | 
            +
             | 
| 232 | 
            +
              end
         | 
| 233 | 
            +
            end end end
         | 
| @@ -1,6 +1,4 @@ | |
| 1 1 | 
             
            require "logstash/outputs/elasticsearch"
         | 
| 2 | 
            -
            require "cabin"
         | 
| 3 | 
            -
            require "base64"
         | 
| 4 2 | 
             
            require 'logstash/outputs/elasticsearch/http_client/pool'
         | 
| 5 3 | 
             
            require 'logstash/outputs/elasticsearch/http_client/manticore_adapter'
         | 
| 6 4 | 
             
            require 'cgi'
         | 
| @@ -80,12 +78,16 @@ module LogStash; module Outputs; class ElasticSearch; | |
| 80 78 |  | 
| 81 79 | 
             
                def template_install(name, template, force=false)
         | 
| 82 80 | 
             
                  if template_exists?(name) && !force
         | 
| 83 | 
            -
                    @logger.debug("Found existing Elasticsearch template | 
| 81 | 
            +
                    @logger.debug("Found existing Elasticsearch template, skipping template management", name: name)
         | 
| 84 82 | 
             
                    return
         | 
| 85 83 | 
             
                  end
         | 
| 86 84 | 
             
                  template_put(name, template)
         | 
| 87 85 | 
             
                end
         | 
| 88 86 |  | 
| 87 | 
            +
                def last_es_version
         | 
| 88 | 
            +
                  @pool.last_es_version
         | 
| 89 | 
            +
                end
         | 
| 90 | 
            +
             | 
| 89 91 | 
             
                def maximum_seen_major_version
         | 
| 90 92 | 
             
                  @pool.maximum_seen_major_version
         | 
| 91 93 | 
             
                end
         | 
| @@ -109,27 +111,50 @@ module LogStash; module Outputs; class ElasticSearch; | |
| 109 111 | 
             
                  body_stream = StringIO.new
         | 
| 110 112 | 
             
                  if http_compression
         | 
| 111 113 | 
             
                    body_stream.set_encoding "BINARY"
         | 
| 112 | 
            -
                    stream_writer =  | 
| 113 | 
            -
                  else | 
| 114 | 
            +
                    stream_writer = gzip_writer(body_stream)
         | 
| 115 | 
            +
                  else
         | 
| 114 116 | 
             
                    stream_writer = body_stream
         | 
| 115 117 | 
             
                  end
         | 
| 116 118 | 
             
                  bulk_responses = []
         | 
| 117 | 
            -
                   | 
| 119 | 
            +
                  batch_actions = []
         | 
| 120 | 
            +
                  bulk_actions.each_with_index do |action, index|
         | 
| 118 121 | 
             
                    as_json = action.is_a?(Array) ?
         | 
| 119 122 | 
             
                                action.map {|line| LogStash::Json.dump(line)}.join("\n") :
         | 
| 120 123 | 
             
                                LogStash::Json.dump(action)
         | 
| 121 124 | 
             
                    as_json << "\n"
         | 
| 122 | 
            -
                    if ( | 
| 123 | 
            -
                       | 
| 125 | 
            +
                    if (stream_writer.pos + as_json.bytesize) > TARGET_BULK_BYTES && stream_writer.pos > 0
         | 
| 126 | 
            +
                      stream_writer.flush # ensure writer has sync'd buffers before reporting sizes
         | 
| 127 | 
            +
                      logger.debug("Sending partial bulk request for batch with one or more actions remaining.",
         | 
| 128 | 
            +
                                   :action_count => batch_actions.size,
         | 
| 129 | 
            +
                                   :payload_size => stream_writer.pos,
         | 
| 130 | 
            +
                                   :content_length => body_stream.size,
         | 
| 131 | 
            +
                                   :batch_offset => (index + 1 - batch_actions.size))
         | 
| 132 | 
            +
                      bulk_responses << bulk_send(body_stream, batch_actions)
         | 
| 133 | 
            +
                      body_stream.truncate(0) && body_stream.seek(0)
         | 
| 134 | 
            +
                      stream_writer = gzip_writer(body_stream) if http_compression
         | 
| 135 | 
            +
                      batch_actions.clear
         | 
| 124 136 | 
             
                    end
         | 
| 125 137 | 
             
                    stream_writer.write(as_json)
         | 
| 138 | 
            +
                    batch_actions << action
         | 
| 126 139 | 
             
                  end
         | 
| 127 140 | 
             
                  stream_writer.close if http_compression
         | 
| 128 | 
            -
                   | 
| 141 | 
            +
                  logger.debug("Sending final bulk request for batch.",
         | 
| 142 | 
            +
                               :action_count => batch_actions.size,
         | 
| 143 | 
            +
                               :payload_size => stream_writer.pos,
         | 
| 144 | 
            +
                               :content_length => body_stream.size,
         | 
| 145 | 
            +
                               :batch_offset => (actions.size - batch_actions.size))
         | 
| 146 | 
            +
                  bulk_responses << bulk_send(body_stream, batch_actions) if body_stream.size > 0
         | 
| 129 147 | 
             
                  body_stream.close if !http_compression
         | 
| 130 148 | 
             
                  join_bulk_responses(bulk_responses)
         | 
| 131 149 | 
             
                end
         | 
| 132 150 |  | 
| 151 | 
            +
                def gzip_writer(io)
         | 
| 152 | 
            +
                  fail(ArgumentError, "Cannot create gzip writer on IO with unread bytes") unless io.eof?
         | 
| 153 | 
            +
                  fail(ArgumentError, "Cannot create gzip writer on non-empty IO") unless io.pos == 0
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                  Zlib::GzipWriter.new(io, Zlib::DEFAULT_COMPRESSION, Zlib::DEFAULT_STRATEGY)
         | 
| 156 | 
            +
                end
         | 
| 157 | 
            +
             | 
| 133 158 | 
             
                def join_bulk_responses(bulk_responses)
         | 
| 134 159 | 
             
                  {
         | 
| 135 160 | 
             
                    "errors" => bulk_responses.any? {|r| r["errors"] == true},
         | 
| @@ -137,25 +162,37 @@ module LogStash; module Outputs; class ElasticSearch; | |
| 137 162 | 
             
                  }
         | 
| 138 163 | 
             
                end
         | 
| 139 164 |  | 
| 140 | 
            -
                def bulk_send(body_stream)
         | 
| 165 | 
            +
                def bulk_send(body_stream, batch_actions)
         | 
| 141 166 | 
             
                  params = http_compression ? {:headers => {"Content-Encoding" => "gzip"}} : {}
         | 
| 142 | 
            -
                  # Discard the URL
         | 
| 143 167 | 
             
                  response = @pool.post(@bulk_path, params, body_stream.string)
         | 
| 144 | 
            -
                  if !body_stream.closed?
         | 
| 145 | 
            -
                    body_stream.truncate(0)
         | 
| 146 | 
            -
                    body_stream.seek(0)
         | 
| 147 | 
            -
                  end
         | 
| 148 168 |  | 
| 149 169 | 
             
                  @bulk_response_metrics.increment(response.code.to_s)
         | 
| 150 170 |  | 
| 151 | 
            -
                   | 
| 171 | 
            +
                  case response.code
         | 
| 172 | 
            +
                  when 200 # OK
         | 
| 173 | 
            +
                    LogStash::Json.load(response.body)
         | 
| 174 | 
            +
                  when 413 # Payload Too Large
         | 
| 175 | 
            +
                    logger.warn("Bulk request rejected: `413 Payload Too Large`", :action_count => batch_actions.size, :content_length => body_stream.size)
         | 
| 176 | 
            +
                    emulate_batch_error_response(batch_actions, response.code, 'payload_too_large')
         | 
| 177 | 
            +
                  else
         | 
| 152 178 | 
             
                    url = ::LogStash::Util::SafeURI.new(response.final_url)
         | 
| 153 179 | 
             
                    raise ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError.new(
         | 
| 154 180 | 
             
                      response.code, url, body_stream.to_s, response.body
         | 
| 155 181 | 
             
                    )
         | 
| 156 182 | 
             
                  end
         | 
| 183 | 
            +
                end
         | 
| 157 184 |  | 
| 158 | 
            -
             | 
| 185 | 
            +
                def emulate_batch_error_response(actions, http_code, reason)
         | 
| 186 | 
            +
                  {
         | 
| 187 | 
            +
                      "errors" => true,
         | 
| 188 | 
            +
                      "items" => actions.map do |action|
         | 
| 189 | 
            +
                        action = action.first if action.is_a?(Array)
         | 
| 190 | 
            +
                        request_action, request_parameters = action.first
         | 
| 191 | 
            +
                        {
         | 
| 192 | 
            +
                            request_action => {"status" => http_code, "error" => { "type" => reason }}
         | 
| 193 | 
            +
                        }
         | 
| 194 | 
            +
                      end
         | 
| 195 | 
            +
                  }
         | 
| 159 196 | 
             
                end
         | 
| 160 197 |  | 
| 161 198 | 
             
                def get(path)
         | 
| @@ -349,7 +386,7 @@ module LogStash; module Outputs; class ElasticSearch; | |
| 349 386 |  | 
| 350 387 | 
             
                def template_put(name, template)
         | 
| 351 388 | 
             
                  path = "#{template_endpoint}/#{name}"
         | 
| 352 | 
            -
                  logger.info("Installing  | 
| 389 | 
            +
                  logger.info("Installing Elasticsearch template", name: name)
         | 
| 353 390 | 
             
                  @pool.put(path, nil, LogStash::Json.dump(template))
         | 
| 354 391 | 
             
                end
         | 
| 355 392 |  | 
| @@ -366,13 +403,13 @@ module LogStash; module Outputs; class ElasticSearch; | |
| 366 403 |  | 
| 367 404 | 
             
                # Create a new rollover alias
         | 
| 368 405 | 
             
                def rollover_alias_put(alias_name, alias_definition)
         | 
| 369 | 
            -
                  logger.info("Creating rollover alias #{alias_name}")
         | 
| 370 406 | 
             
                  begin
         | 
| 371 407 | 
             
                    @pool.put(CGI::escape(alias_name), nil, LogStash::Json.dump(alias_definition))
         | 
| 408 | 
            +
                    logger.info("Created rollover alias", name: alias_name)
         | 
| 372 409 | 
             
                    # If the rollover alias already exists, ignore the error that comes back from Elasticsearch
         | 
| 373 410 | 
             
                  rescue ::LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError => e
         | 
| 374 411 | 
             
                    if e.response_code == 400
         | 
| 375 | 
            -
                        logger.info("Rollover  | 
| 412 | 
            +
                        logger.info("Rollover alias already exists, skipping", name: alias_name)
         | 
| 376 413 | 
             
                        return
         | 
| 377 414 | 
             
                    end
         | 
| 378 415 | 
             
                    raise e
         | 
| @@ -393,13 +430,14 @@ module LogStash; module Outputs; class ElasticSearch; | |
| 393 430 |  | 
| 394 431 | 
             
                def ilm_policy_put(name, policy)
         | 
| 395 432 | 
             
                  path = "_ilm/policy/#{name}"
         | 
| 396 | 
            -
                  logger.info("Installing ILM policy #{policy}  | 
| 433 | 
            +
                  logger.info("Installing ILM policy #{policy}", name: name)
         | 
| 397 434 | 
             
                  @pool.put(path, nil, LogStash::Json.dump(policy))
         | 
| 398 435 | 
             
                end
         | 
| 399 436 |  | 
| 400 437 |  | 
| 401 438 | 
             
                # Build a bulk item for an elasticsearch update action
         | 
| 402 439 | 
             
                def update_action_builder(args, source)
         | 
| 440 | 
            +
                  args = args.clone()
         | 
| 403 441 | 
             
                  if args[:_script]
         | 
| 404 442 | 
             
                    # Use the event as a hash from your script with variable name defined
         | 
| 405 443 | 
             
                    # by script_var_name (default: "event")
         | 
| @@ -1,3 +1,4 @@ | |
| 1 | 
            +
            require "concurrent/atomic/atomic_reference"
         | 
| 1 2 | 
             
            require "logstash/plugin_mixins/elasticsearch/noop_license_checker"
         | 
| 2 3 |  | 
| 3 4 | 
             
            module LogStash; module Outputs; class ElasticSearch; class HttpClient;
         | 
| @@ -71,6 +72,8 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient; | |
| 71 72 | 
             
                  @stopping = false
         | 
| 72 73 |  | 
| 73 74 | 
             
                  @license_checker = options[:license_checker] || LogStash::PluginMixins::ElasticSearch::NoopLicenseChecker::INSTANCE
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                  @last_es_version = Concurrent::AtomicReference.new
         | 
| 74 77 | 
             
                end
         | 
| 75 78 |  | 
| 76 79 | 
             
                def start
         | 
| @@ -118,12 +121,6 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient; | |
| 118 121 | 
             
                  @state_mutex.synchronize { @url_info }
         | 
| 119 122 | 
             
                end
         | 
| 120 123 |  | 
| 121 | 
            -
                def maximum_seen_major_version
         | 
| 122 | 
            -
                  @state_mutex.synchronize do
         | 
| 123 | 
            -
                    @maximum_seen_major_version
         | 
| 124 | 
            -
                  end
         | 
| 125 | 
            -
                end
         | 
| 126 | 
            -
             | 
| 127 124 | 
             
                def urls
         | 
| 128 125 | 
             
                  url_info.keys
         | 
| 129 126 | 
             
                end
         | 
| @@ -252,11 +249,12 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient; | |
| 252 249 | 
             
                  response = perform_request_to_url(url, :get, LICENSE_PATH)
         | 
| 253 250 | 
             
                  LogStash::Json.load(response.body)
         | 
| 254 251 | 
             
                rescue => e
         | 
| 255 | 
            -
                  logger.error("Unable to get license information", url: url.sanitized.to_s,  | 
| 252 | 
            +
                  logger.error("Unable to get license information", url: url.sanitized.to_s, exception: e.class, message: e.message)
         | 
| 256 253 | 
             
                  {}
         | 
| 257 254 | 
             
                end
         | 
| 258 255 |  | 
| 259 256 | 
             
                def health_check_request(url)
         | 
| 257 | 
            +
                  logger.debug("Running health check to see if an ES connection is working", url: url.sanitized.to_s, path: @healthcheck_path)
         | 
| 260 258 | 
             
                  perform_request_to_url(url, :head, @healthcheck_path)
         | 
| 261 259 | 
             
                end
         | 
| 262 260 |  | 
| @@ -264,29 +262,20 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient; | |
| 264 262 | 
             
                  # Try to keep locking granularity low such that we don't affect IO...
         | 
| 265 263 | 
             
                  @state_mutex.synchronize { @url_info.select {|url,meta| meta[:state] != :alive } }.each do |url,meta|
         | 
| 266 264 | 
             
                    begin
         | 
| 267 | 
            -
                      logger.debug("Running health check to see if an Elasticsearch connection is working",
         | 
| 268 | 
            -
                                    :healthcheck_url => url, :path => @healthcheck_path)
         | 
| 269 265 | 
             
                      health_check_request(url)
         | 
| 270 266 | 
             
                      # If no exception was raised it must have succeeded!
         | 
| 271 | 
            -
                      logger.warn("Restored connection to ES instance", : | 
| 267 | 
            +
                      logger.warn("Restored connection to ES instance", url: url.sanitized.to_s)
         | 
| 272 268 | 
             
                      # We reconnected to this node, check its ES version
         | 
| 273 269 | 
             
                      es_version = get_es_version(url)
         | 
| 274 270 | 
             
                      @state_mutex.synchronize do
         | 
| 275 271 | 
             
                        meta[:version] = es_version
         | 
| 276 | 
            -
                         | 
| 277 | 
            -
                        if !@maximum_seen_major_version
         | 
| 278 | 
            -
                          @logger.info("ES Output version determined", :es_version => major)
         | 
| 279 | 
            -
                          set_new_major_version(major)
         | 
| 280 | 
            -
                        elsif major > @maximum_seen_major_version
         | 
| 281 | 
            -
                          @logger.warn("Detected a node with a higher major version than previously observed. This could be the result of an elasticsearch cluster upgrade.", :previous_major => @maximum_seen_major_version, :new_major => major, :node_url => url.sanitized.to_s)
         | 
| 282 | 
            -
                          set_new_major_version(major)
         | 
| 283 | 
            -
                        end
         | 
| 272 | 
            +
                        set_last_es_version(es_version, url)
         | 
| 284 273 |  | 
| 285 274 | 
             
                        alive = @license_checker.appropriate_license?(self, url)
         | 
| 286 275 | 
             
                        meta[:state] = alive ? :alive : :dead
         | 
| 287 276 | 
             
                      end
         | 
| 288 277 | 
             
                    rescue HostUnreachableError, BadResponseCodeError => e
         | 
| 289 | 
            -
                      logger.warn("Attempted to resurrect connection to dead ES instance, but got an error | 
| 278 | 
            +
                      logger.warn("Attempted to resurrect connection to dead ES instance, but got an error", url: url.sanitized.to_s, exception: e.class, message: e.message)
         | 
| 290 279 | 
             
                    end
         | 
| 291 280 | 
             
                  end
         | 
| 292 281 | 
             
                end
         | 
| @@ -355,9 +344,7 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient; | |
| 355 344 | 
             
                  end
         | 
| 356 345 |  | 
| 357 346 | 
             
                  if state_changes[:removed].size > 0 || state_changes[:added].size > 0
         | 
| 358 | 
            -
                     | 
| 359 | 
            -
                      logger.info("Elasticsearch pool URLs updated", :changes => state_changes)
         | 
| 360 | 
            -
                    end
         | 
| 347 | 
            +
                    logger.info? && logger.info("Elasticsearch pool URLs updated", :changes => state_changes)
         | 
| 361 348 | 
             
                  end
         | 
| 362 349 |  | 
| 363 350 | 
             
                  # Run an inline healthcheck anytime URLs are updated
         | 
| @@ -371,10 +358,6 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient; | |
| 371 358 | 
             
                  @state_mutex.synchronize { @url_info.size }
         | 
| 372 359 | 
             
                end
         | 
| 373 360 |  | 
| 374 | 
            -
                def es_versions
         | 
| 375 | 
            -
                  @state_mutex.synchronize { @url_info.size }
         | 
| 376 | 
            -
                end
         | 
| 377 | 
            -
             | 
| 378 361 | 
             
                def add_url(url)
         | 
| 379 362 | 
             
                  @url_info[url] ||= empty_url_meta
         | 
| 380 363 | 
             
                end
         | 
| @@ -459,22 +442,52 @@ module LogStash; module Outputs; class ElasticSearch; class HttpClient; | |
| 459 442 |  | 
| 460 443 | 
             
                def return_connection(url)
         | 
| 461 444 | 
             
                  @state_mutex.synchronize do
         | 
| 462 | 
            -
                     | 
| 463 | 
            -
             | 
| 464 | 
            -
                    end
         | 
| 445 | 
            +
                    info = @url_info[url]
         | 
| 446 | 
            +
                    info[:in_use] -= 1 if info # Guard against the condition where the connection has already been deleted
         | 
| 465 447 | 
             
                  end
         | 
| 466 448 | 
             
                end
         | 
| 467 449 |  | 
| 468 450 | 
             
                def get_es_version(url)
         | 
| 469 451 | 
             
                  request = perform_request_to_url(url, :get, ROOT_URI_PATH)
         | 
| 470 | 
            -
                  LogStash::Json.load(request.body)["version"]["number"]
         | 
| 452 | 
            +
                  LogStash::Json.load(request.body)["version"]["number"] # e.g. "7.10.0"
         | 
| 453 | 
            +
                end
         | 
| 454 | 
            +
             | 
| 455 | 
            +
                def last_es_version
         | 
| 456 | 
            +
                  @last_es_version.get
         | 
| 457 | 
            +
                end
         | 
| 458 | 
            +
             | 
| 459 | 
            +
                def maximum_seen_major_version
         | 
| 460 | 
            +
                  @state_mutex.synchronize { @maximum_seen_major_version }
         | 
| 461 | 
            +
                end
         | 
| 462 | 
            +
             | 
| 463 | 
            +
                private
         | 
| 464 | 
            +
             | 
| 465 | 
            +
                # @private executing within @state_mutex
         | 
| 466 | 
            +
                def set_last_es_version(version, url)
         | 
| 467 | 
            +
                  @last_es_version.set(version)
         | 
| 468 | 
            +
             | 
| 469 | 
            +
                  major = major_version(version)
         | 
| 470 | 
            +
                  if @maximum_seen_major_version.nil?
         | 
| 471 | 
            +
                    @logger.info("Elasticsearch version determined (#{version})", es_version: major)
         | 
| 472 | 
            +
                    set_maximum_seen_major_version(major)
         | 
| 473 | 
            +
                  elsif major > @maximum_seen_major_version
         | 
| 474 | 
            +
                    warn_on_higher_major_version(major, url)
         | 
| 475 | 
            +
                    @maximum_seen_major_version = major
         | 
| 476 | 
            +
                  end
         | 
| 471 477 | 
             
                end
         | 
| 472 478 |  | 
| 473 | 
            -
                def  | 
| 474 | 
            -
                   | 
| 475 | 
            -
             | 
| 476 | 
            -
                    @logger.warn("Detected a 6.x and above cluster: the `type` event field won't be used to determine the document _type", :es_version => @maximum_seen_major_version)
         | 
| 479 | 
            +
                def set_maximum_seen_major_version(major)
         | 
| 480 | 
            +
                  if major >= 6
         | 
| 481 | 
            +
                    @logger.warn("Detected a 6.x and above cluster: the `type` event field won't be used to determine the document _type", es_version: major)
         | 
| 477 482 | 
             
                  end
         | 
| 483 | 
            +
                  @maximum_seen_major_version = major
         | 
| 478 484 | 
             
                end
         | 
| 485 | 
            +
             | 
| 486 | 
            +
                def warn_on_higher_major_version(major, url)
         | 
| 487 | 
            +
                  @logger.warn("Detected a node with a higher major version than previously observed, " +
         | 
| 488 | 
            +
                               "this could be the result of an Elasticsearch cluster upgrade",
         | 
| 489 | 
            +
                               previous_major: @maximum_seen_major_version, new_major: major, node_url: url.sanitized.to_s)
         | 
| 490 | 
            +
                end
         | 
| 491 | 
            +
             | 
| 479 492 | 
             
              end
         | 
| 480 493 | 
             
            end; end; end; end;
         |