RubyGems - fluent-plugin-elasticsearch - Versions diffs - 5.0.0 → 5.2.3 - Mend

fluent-plugin-elasticsearch 5.0.0 → 5.2.3

Files changed (34) hide show

checksums.yaml +4 -4
data/.github/dependabot.yml +6 -0
data/.github/workflows/linux.yml +5 -2
data/.github/workflows/macos.yml +5 -2
data/.github/workflows/windows.yml +5 -2
data/Gemfile +1 -1
data/History.md +65 -1
data/README.Troubleshooting.md +91 -0
data/README.md +129 -4
data/fluent-plugin-elasticsearch.gemspec +2 -1
data/lib/fluent/plugin/elasticsearch_compat.rb +30 -0
data/lib/fluent/plugin/elasticsearch_error_handler.rb +19 -4
data/lib/fluent/plugin/elasticsearch_fallback_selector.rb +2 -2
data/lib/fluent/plugin/elasticsearch_index_lifecycle_management.rb +18 -4
data/lib/fluent/plugin/elasticsearch_index_template.rb +20 -4
data/lib/fluent/plugin/elasticsearch_simple_sniffer.rb +2 -1
data/lib/fluent/plugin/filter_elasticsearch_genid.rb +1 -1
data/lib/fluent/plugin/in_elasticsearch.rb +2 -1
data/lib/fluent/plugin/oj_serializer.rb +2 -1
data/lib/fluent/plugin/out_elasticsearch.rb +80 -19
data/lib/fluent/plugin/out_elasticsearch_data_stream.rb +132 -62
data/lib/fluent/plugin/out_elasticsearch_dynamic.rb +3 -1
data/test/plugin/mock_chunk.dat +0 -0
data/test/plugin/test_elasticsearch_error_handler.rb +130 -23
data/test/plugin/test_elasticsearch_fallback_selector.rb +16 -8
data/test/plugin/test_elasticsearch_index_lifecycle_management.rb +55 -15
data/test/plugin/test_filter_elasticsearch_genid.rb +16 -16
data/test/plugin/test_in_elasticsearch.rb +20 -0
data/test/plugin/test_out_elasticsearch.rb +795 -134
data/test/plugin/test_out_elasticsearch_data_stream.rb +717 -117
data/test/plugin/test_out_elasticsearch_dynamic.rb +150 -18
metadata +21 -5
data/.travis.yml +0 -40
data/appveyor.yml +0 -20

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 5f2f8268d9a8a5acf6d941a915044bd3781a5722cea8bc7ac205d6b7fd6fe580
-  data.tar.gz: 7d9373fb963040efac0ea42f11bf1841f2e0b2a39b5566f5e2794507e77e5f5c
+  metadata.gz: 7e2737dc1ced4c50a3db85d71c83351e77a606d581fbd768261ca6613b1700f8
+  data.tar.gz: 49a84ff1ea184c4afd43e69fb6cbb89559926bbd6ac063196bd777281be17d2b
 SHA512:
-  metadata.gz: 0e432748181717cedfa55239d2da7b0141c3280da7240f6f9643db411f1f3168f00f88c6933d65a5d1b944620dab0d87ce088f4dba76fdf17cec336ca55e83bf
-  data.tar.gz: f44b2a14c5a13e1a59bd7d6926af13a9a9ba7b990594bca45de20071756d9ca88f7ee53d6ec00e09f0ad2ed054510a7803ac11c7b695275d37dbacb1e5d51457
+  metadata.gz: 157dbe3ab067ec279f2051a8b6c6ac25538821903b48e96a09687b547270796d7f6972d0707225c99cd36694e8a8668d32dc9ebba3599d24f08c814712c0849a
+  data.tar.gz: 73c611531aa95d5d03d8bd5146ab5f6812c30b3be634f5d1ca139b2ffc6206ccebb8915c4fa1e78d2d53706f6c14b7203342d235bd217a89ac9bf0135e3fec21

data/.github/dependabot.yml ADDED Viewed

@@ -0,0 +1,6 @@
+version: 2
+updates:
+- package-ecosystem: "github-actions"
+  directory: "/"
+  schedule:
+      interval: "weekly"

data/.github/workflows/linux.yml CHANGED Viewed

@@ -2,18 +2,21 @@ name: Testing on Ubuntu
 on:
   - push
   - pull_request
+permissions:
+  contents: read
 jobs:
   build:
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        ruby: [ '2.4', '2.5', '2.6', '2.7' ]
+        ruby: [ '2.6', '2.7', '3.0' ]
         os:
           - ubuntu-latest
     name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - uses: ruby/setup-ruby@v1
       with:
         ruby-version: ${{ matrix.ruby }}

data/.github/workflows/macos.yml CHANGED Viewed

@@ -2,18 +2,21 @@ name: Testing on macOS
 on:
   - push
   - pull_request
+permissions:
+  contents: read
 jobs:
   build:
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        ruby: [ '2.4', '2.5', '2.6', '2.7' ]
+        ruby: [ '2.6', '2.7', '3.0' ]
         os:
           - macOS-latest
     name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - uses: ruby/setup-ruby@v1
       with:
         ruby-version: ${{ matrix.ruby }}

data/.github/workflows/windows.yml CHANGED Viewed

@@ -2,18 +2,21 @@ name: Testing on Windows
 on:
   - push
   - pull_request
+permissions:
+  contents: read
 jobs:
   build:
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        ruby: [ '2.4', '2.5', '2.6', '2.7' ]
+        ruby: [ '2.6', '2.7', '3.0' ]
         os:
           - windows-latest
     name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - uses: ruby/setup-ruby@v1
       with:
         ruby-version: ${{ matrix.ruby }}

data/Gemfile CHANGED Viewed

@@ -7,5 +7,5 @@ gem 'simplecov', require: false
 gem 'coveralls', ">= 0.8.0", require: false
 gem 'strptime', require: false if RUBY_ENGINE == "ruby" && RUBY_VERSION =~ /^2/
 gem "irb" if RUBY_ENGINE == "ruby" && RUBY_VERSION >= "2.6"
-gem "elasticsearch-xpack"
+gem "elasticsearch-xpack" if ENV["USE_XPACK"]
 gem "oj"

data/History.md CHANGED Viewed

@@ -2,8 +2,72 @@
 ### [Unreleased]
+### 5.2.3
+- Bump actions/checkout from 2 to 3 (#978)
+- chore: Included githubactions in the dependabot config (#977)
+- chore: Set permissions for GitHub actions (#972)
+- Remove nested msgpack\_each in handle\_error (#970)
+- do not overwrite @timestamp in data stream if it already exists in the record (#968)
+### 5.2.2
+-  Add missing top level class markers (#961)
+- Ensure use_record_as_seed for same records (#960)
+### 5.2.1
+- respect include\_tag\_key and tag\_key setting when using data streams (#936)
+- Handle unsupported version error (#956)
+- Display deprecated warning on ES dynamic plugin (#955)
+### 5.2.0
+-  Migrate to handle Elasticsearch 8 (#949)
+### 5.1.5
+- Make retryable DataStreams creation at configure phase (#943)
+- Handle @hosts parameter on data_stream plugin (#942)
+- allow specifying custom ILM policies for data streams (#933)
+### 5.1.4
+- Handle ES8 or above more strictly (#931)
+- fixing double "\_policy" in index lifecycle management policy for elasticsearch\_data\_stream output (#930)
+### 5.1.3
+-  fixing execution order for dynamic data stream creation (#928)
+### 5.1.2
+-  Fix default values of datastream parameters (#926)
+### 5.1.1
+- Report appropriate error for data_stream parameters (#922)
+- Add ILM and template parameters for data streams (#920)
+- Support Buffer in Data Stream Output (#917)
+### 5.1.0
+- Correct default target bytes value (#914)
+- Handle elasticsearch-ruby 7.14 properly (#913)
+### 5.0.5
+- Drop json_parse_exception messages for bulk failures (#900)
+- GitHub Actions: Drop Ruby 2.5 due to EOL (#894)
+### 5.0.4
+- test: out_elasticsearch: Remove a needless headers from affinity stub (#888)
+- Target Index Affinity (#883)
+### 5.0.3
+- Fix use_legacy_template documentation (#880)
+- Add FAQ for dynamic index/template (#878)
+- Handle IPv6 address string on host and hosts parameters (#877)
+### 5.0.2
+- GitHub Actions: Tweak Ruby versions on test (#875)
+- test: datastreams: Set nonexistent datastream as default (#874)
+- Fix overwriting of index template and index lifecycle policy on existing data streams (#872)
+### 5.0.1
+- Use elasticsearch/api instead of elasticsearch/xpack (#870)
 ### 5.0.0
--  Support #retry_operate on data stream (#863)
+- Support #retry_operate on data stream (#863)
 - Support placeholder in @data\_stream\_name for @type elasticsearch\_data\_stream (#862)
 - Extract troubleshooting section (#861)
 - Fix unmatched `<source>` close tag (#860)

data/README.Troubleshooting.md CHANGED Viewed

@@ -10,6 +10,7 @@
   + [Random 400 - Rejected by Elasticsearch is occured, why?](#random-400---rejected-by-elasticsearch-is-occured-why)
   + [Fluentd seems to hang if it unable to connect Elasticsearch, why?](#fluentd-seems-to-hang-if-it-unable-to-connect-elasticsearch-why)
   + [Enable Index Lifecycle Management](#enable-index-lifecycle-management)
+    + [Configuring for dynamic index or template](#configuring-for-dynamic-index-or-template)
   + [How to specify index codec](#how-to-specify-index-codec)
   + [Cannot push logs to Elasticsearch with connect_write timeout reached, why?](#cannot-push-logs-to-elasticsearch-with-connect_write-timeout-reached-why)
@@ -524,6 +525,96 @@ template_name your-fluentd-template
 template_file /path/to/fluentd-template.json
 ```
+#### Configuring for dynamic index or template
+Some users want to setup ILM for dynamic index/template.
+`index_petterns` and `template.settings.index.lifecycle.name` in Elasticsearch template will be overwritten by Elasticsearch plugin:
+```json
+{
+  "index_patterns": ["mock"],
+  "template": {
+    "settings": {
+      "index": {
+        "lifecycle": {
+          "name": "mock",
+          "rollover_alias": "mock"
+        },
+        "number_of_shards": "<<shard>>",
+        "number_of_replicas": "<<replica>>"
+      }
+    }
+  }
+}
+```
+This template will be handled with:
+```aconf
+<source>
+  @type http
+  port 5004
+  bind 0.0.0.0
+  body_size_limit 32m
+  keepalive_timeout 10s
+  <parse>
+    @type json
+  </parse>
+</source>
+<match kubernetes.var.log.containers.**etl-webserver**.log>
+    @type elasticsearch
+    @id out_es_etl_webserver
+    @log_level info
+    include_tag_key true
+    host $HOST
+    port $PORT
+    path "#{ENV['FLUENT_ELASTICSEARCH_PATH']}"
+    request_timeout "#{ENV['FLUENT_ELASTICSEARCH_REQUEST_TIMEOUT'] || '30s'}"
+    scheme "#{ENV['FLUENT_ELASTICSEARCH_SCHEME'] || 'http'}"
+    ssl_verify "#{ENV['FLUENT_ELASTICSEARCH_SSL_VERIFY'] || 'true'}"
+    ssl_version "#{ENV['FLUENT_ELASTICSEARCH_SSL_VERSION'] || 'TLSv1'}"
+    reload_connections "#{ENV['FLUENT_ELASTICSEARCH_RELOAD_CONNECTIONS'] || 'false'}"
+    reconnect_on_error "#{ENV['FLUENT_ELASTICSEARCH_RECONNECT_ON_ERROR'] || 'true'}"
+    reload_on_failure "#{ENV['FLUENT_ELASTICSEARCH_RELOAD_ON_FAILURE'] || 'true'}"
+    log_es_400_reason "#{ENV['FLUENT_ELASTICSEARCH_LOG_ES_400_REASON'] || 'false'}"
+    logstash_prefix "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_PREFIX'] || 'etl-webserver'}"
+    logstash_format "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_FORMAT'] || 'false'}"
+    index_name "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_INDEX_NAME'] || 'etl-webserver'}"
+    type_name "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_TYPE_NAME'] || 'fluentd'}"
+    time_key "#{ENV['FLUENT_ELASTICSEARCH_TIME_KEY'] || '@timestamp'}"
+    include_timestamp "#{ENV['FLUENT_ELASTICSEARCH_INCLUDE_TIMESTAMP'] || 'true'}"
+    # ILM Settings - WITH ROLLOVER support
+    # https://github.com/uken/fluent-plugin-elasticsearch#enable-index-lifecycle-management
+    application_name "etl-webserver"
+    index_date_pattern ""
+    # Policy configurations
+    enable_ilm true
+    ilm_policy_id etl-webserver
+    ilm_policy_overwrite true
+    ilm_policy {"policy": {"phases": {"hot": {"min_age": "0ms","actions": {"rollover": {"max_age": "5m","max_size": "3gb"},"set_priority": {"priority": 100}}},"delete": {"min_age": "30d","actions": {"delete": {"delete_searchable_snapshot": true}}}}}}
+    use_legacy_template false
+    template_name etl-webserver
+    template_file /configs/index-template.json
+    template_overwrite true
+    customize_template {"<<shard>>": "3","<<replica>>": "0"}
+    <buffer>
+        flush_thread_count "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_FLUSH_THREAD_COUNT'] || '8'}"
+        flush_interval "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_FLUSH_INTERVAL'] || '5s'}"
+        chunk_limit_size "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_CHUNK_LIMIT_SIZE'] || '8MB'}"
+        total_limit_size "#{ENV['FLUENT_ELASTICSEARCH_TOTAL_LIMIT_SIZE'] || '450MB'}"
+        queue_limit_length "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_QUEUE_LIMIT_LENGTH'] || '32'}"
+        retry_max_interval "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_RETRY_MAX_INTERVAL'] || '60s'}"
+        retry_forever false
+    </buffer>
+</match>
+```
+For more details, please refer the discussion:
+https://github.com/uken/fluent-plugin-elasticsearch/issues/867
 ### How to specify index codec
 Elasticsearch can handle compression methods for stored data such as LZ4 and best_compression.

data/README.md CHANGED Viewed

@@ -11,7 +11,7 @@ Send your logs to Elasticsearch (and search them with Kibana maybe?)
 Note: For Amazon Elasticsearch Service please consider using [fluent-plugin-aws-elasticsearch-service](https://github.com/atomita/fluent-plugin-aws-elasticsearch-service)
-Current maintainers: @cosmo0920
+Current maintainers: [Hiroshi Hatake | @cosmo0920](https://github.com/cosmo0920), [Kentaro Hayashi | @kenhys](https://github.com/kenhys)
 * [Installation](#installation)
 * [Usage](#usage)
@@ -38,6 +38,7 @@ Current maintainers: @cosmo0920
   + [suppress_type_name](#suppress_type_name)
   + [target_index_key](#target_index_key)
   + [target_type_key](#target_type_key)
+  + [target_index_affinity](#target_index_affinity)
   + [template_name](#template_name)
   + [template_file](#template_file)
   + [template_overwrite](#template_overwrite)
@@ -171,6 +172,24 @@ You can specify Elasticsearch host by this parameter.
 **Note:** Since v3.3.2, `host` parameter supports builtin placeholders. If you want to send events dynamically into different hosts at runtime with `elasticsearch_dynamic` output plugin, please consider to switch to use plain `elasticsearch` output plugin. In more detail for builtin placeholders, please refer to [Placeholders](#placeholders) section.
+To use IPv6 address on `host` parameter, you can use the following styles:
+#### string style
+To use string style, you must quote IPv6 address due to prevent to be interpreted as JSON:
+```
+host "[2404:7a80:d440:3000:192a:a292:bd7f:ca10]"
+```
+#### raw style
+You can also specify raw IPv6 address. This will be handled as `[specified IPv6 address]`:
+```
+host 2404:7a80:d440:3000:192a:a292:bd7f:ca10
+```
 ### port
 ```
@@ -237,6 +256,16 @@ hosts host1:port1,host2:port2,host3 # port3 is 9200
 **Note:** Up until v2.8.5, it was allowed to embed the username/password in the URL. However, this syntax is deprecated as of v2.8.6 because it was found to cause serious connection problems (See #394). Please migrate your settings to use the `user` and `password` field (described below) instead.
+#### IPv6 addresses
+When you want to specify IPv6 addresses, you must specify schema together:
+```
+hosts http://[2404:7a80:d440:3000:de:7311:6329:2e6c]:port1,http://[2404:7a80:d440:3000:de:7311:6329:1e6c]:port2,http://[2404:7a80:d440:3000:de:6311:6329:2e6c]:port3
+```
+If you don't specify hosts with schema together, Elasticsearch plugin complains Invalid URI for them.
 ### user, password, path, scheme, ssl_verify
 ```
@@ -426,6 +455,75 @@ and this record will be written to the specified index (`logstash-2014.12.19`) r
 Similar to `target_index_key` config, find the type name to write to in the record under this key (or nested record). If key not found in record - fallback to `type_name` (default "fluentd").
+### target_index_affinity
+Enable plugin to dynamically select logstash time based target index in update/upsert operations based on already indexed data rather than current time of indexing.
+```
+target_index_affinity true # defaults to false
+```
+By default plugin writes data of logstash format index based on current time. For example daily based index after mignight data is written to newly created index. This is normally ok when data is coming from single source and not updated after indexing.
+But if you have a use case where data is also updated after indexing and `id_key` is used to identify the document uniquely for updating. Logstash format is wanted to be used for easy data managing and retention. Updates are done right after indexing to complete the data (all data not available from single source) and no updates are done anymore later point on time. In this case problem happends at index rotation time where write to 2 indexes with same id_key value may happen.
+This setting will search existing data by using elastic search's [id query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html) using `id_key` value (with logstash_prefix and logstash_prefix_separator index pattarn e.g. `logstash-*`). The index of found data is used for update/upsert. When no data is found, data is written to current logstash index as normally.
+This setting requires following other settings:
+```
+logstash_format true
+id_key myId  # Some field on your data to identify the data uniquely
+write_operation upsert  # upsert or update
+```
+Suppose you have the following situation where you have 2 different match to consume data from 2 different Kafka topics independently but close in time with each other (order not known).
+```
+  <match data1>
+    @type elasticsearch
+    ...
+    id_key myId
+    write_operation upsert
+    logstash_format true
+    logstash_dateformat %Y.%m.%d
+    logstash_prefix myindexprefix
+    target_index_affinity true
+    ...
+  <match data2>
+    @type elasticsearch
+    ...
+    id_key myId
+    write_operation upsert
+    logstash_format true
+    logstash_dateformat %Y.%m.%d
+    logstash_prefix myindexprefix
+    target_index_affinity true
+    ...
+```
+If your first (data1) input is:
+```
+{
+  "myId": "myuniqueId1",
+  "datafield1": "some value",
+}
+```
+and your second (data2) input is:
+```
+{
+  "myId": "myuniqueId1",
+  "datafield99": "some important data from other source tightly related to id myuniqueId1 and wanted to be in same document.",
+}
+```
+Date today is 10.05.2021 so data is written to index `myindexprefix-2021.05.10` when both data1 and data2 is consumed during today.
+But when we are close to index rotation and data1 is consumed and indexed at `2021-05-10T23:59:55.59707672Z` and data2
+is consumed a bit later at `2021-05-11T00:00:58.222079Z` i.e. logstash index has been rotated and normally data2 would have been written
+to index `myindexprefix-2021.05.11`. But with target_index_affinity setting as value true, data2 is now written to index `myindexprefix-2021.05.10`
+into same document with data1 as wanted and duplicated document is avoided.
 ### template_name
 The name of the template to define. If a template by the name given is already present, it will be left unchanged, unless [template_overwrite](#template_overwrite) is set, in which case the template will be updated.
@@ -1325,9 +1423,9 @@ Default value is `nil`.
 Use legacy template or not.
-Elasticsearch 7.8 or later supports the brand new composable templates.
+For Elasticsearch 7.8 or later, users can specify this parameter as `false` if their [template_file](#template_file) contains a composable index template.
-For Elasticsearch 7.7 or older, users should specify this parameter as `false`.
+For Elasticsearch 7.7 or older, users should specify this parameter as `true`.
 Composable template documentation is [Put Index Template API | Elasticsearch Reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html) and legacy template documentation is [Index Templates | Elasticsearch Reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates-v1.html).
@@ -1423,7 +1521,7 @@ You can enable this feature by specifying `@type elasticsearch_data_stream`.
 data_stream_name test
 ```
-When `@type elasticsearch_data_stream` is used, ILM default policy is set to the specified data stream.
+When `@type elasticsearch_data_stream` is used, unless specified with `data_stream_ilm_name` and `data_stream_template_name` or `data_stream_ilm_policy`, ILM default policy is set to the specified data stream.
 Then, the matching index template is also created automatically.
 ### data_stream_name
@@ -1431,10 +1529,37 @@ Then, the matching index template is also created automatically.
 You can specify Elasticsearch data stream name by this parameter.
 This parameter is mandatory for `elasticsearch_data_stream`.
+### data_stream_template_name
+You can specify an existing matching index template for the data stream. If not present, it creates a new matching index template.
+Default value is `data_stream_name`.
+### data_stream_ilm_name
+You can specify the name of an existing ILM policy, which will be applied to the data stream. If not present, it creates a new ILM default policy (unless `data_stream_template_name` is defined, in that case the ILM will be set to the one specified in the matching index template).
+Default value is `data_stream_name`.
 There are some limitations about naming rule.
 In more detail, please refer to the [Path parameters](https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-create-data-stream.html#indices-create-data-stream-api-path-params).
+### data_stream_ilm_policy
+You can specify the ILM policy contents as hash. If not present, it will apply the ILM default policy.
+**NOTE:** This parameter requests to install elasticsearch-xpack gem.
+### data_stream_ilm_policy_overwrite
+Specify whether the data stream ILM policy should be overwritten.
+Default value is `false`.
+**NOTE:** This parameter requests to install elasticsearch-xpack gem.
 ## Troubleshooting
 See [Troubleshooting document](README.Troubleshooting.md)

data/fluent-plugin-elasticsearch.gemspec CHANGED Viewed

@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
 Gem::Specification.new do |s|
   s.name          = 'fluent-plugin-elasticsearch'
-  s.version       = '5.0.0'
+  s.version       = '5.2.3'
   s.authors       = ['diogo', 'pitr', 'Hiroshi Hatake']
   s.email         = ['pitr.vern@gmail.com', 'me@diogoterror.com', 'cosmo0920.wp@gmail.com']
   s.description   = %q{Elasticsearch output plugin for Fluent event collector}
@@ -28,6 +28,7 @@ Gem::Specification.new do |s|
   s.add_development_dependency 'rake', '>= 0'
+  s.add_development_dependency 'webrick', '~> 1.7.0'
   s.add_development_dependency 'webmock', '~> 3'
   s.add_development_dependency 'test-unit', '~> 3.3.0'
   s.add_development_dependency 'minitest', '~> 5.8'

data/lib/fluent/plugin/elasticsearch_compat.rb ADDED Viewed

@@ -0,0 +1,30 @@
+begin
+  require 'elastic/transport'
+  ::TRANSPORT_CLASS = Elastic::Transport
+rescue LoadError
+end
+begin
+  require 'elasticsearch/transport'
+  ::TRANSPORT_CLASS = Elasticsearch::Transport
+rescue LoadError
+end
+if Gem::Version.new(Elasticsearch::VERSION) < Gem::Version.new("8.0.0")
+  begin
+    require 'elasticsearch/xpack'
+  rescue LoadError
+  end
+end
+begin
+  require 'elastic/transport/transport/connections/selector'
+  ::SELECTOR_CLASS = Elastic::Transport::Transport::Connections::Selector
+rescue LoadError
+end
+begin
+  require 'elasticsearch/transport/transport/connections/selector'
+  ::SELECTOR_CLASS = Elasticsearch::Transport::Transport::Connections::Selector
+rescue LoadError
+end
+unless defined?(::Elasticsearch::UnsupportedProductError)
+  class ::Elasticsearch::UnsupportedProductError < StandardError; end
+end

data/lib/fluent/plugin/elasticsearch_error_handler.rb CHANGED Viewed

@@ -23,6 +23,10 @@ class Fluent::Plugin::ElasticsearchErrorHandler
     unrecoverable_error_types.include?(type)
   end
+  def unrecoverable_record_error?(type)
+    ['json_parse_exception'].include?(type)
+  end
   def log_es_400_reason(&block)
     if @plugin.log_es_400_reason
       block.call
@@ -31,7 +35,7 @@ class Fluent::Plugin::ElasticsearchErrorHandler
     end
   end
-  def handle_error(response, tag, chunk, bulk_message_count, extracted_values)
+  def handle_error(response, tag, chunk, bulk_message_count, extracted_values, unpacked_msg_arr)
     items = response['items']
     if items.nil? || !items.is_a?(Array)
       raise ElasticsearchVersionMismatch, "The response format was unrecognized: #{response}"
@@ -43,15 +47,21 @@ class Fluent::Plugin::ElasticsearchErrorHandler
     stats = Hash.new(0)
     meta = {}
     header = {}
-    chunk.msgpack_each do |time, rawrecord|
+    affinity_target_indices = @plugin.get_affinity_target_indices(chunk)
+    unpacked_msg_arr.each do |msg|
+      time = msg[:time]
+      rawrecord = msg[:record]
       bulk_message = ''
       next unless rawrecord.is_a? Hash
       begin
         # we need a deep copy for process_message to alter
         processrecord = Marshal.load(Marshal.dump(rawrecord))
-        meta, header, record = @plugin.process_message(tag, meta, header, time, processrecord, extracted_values)
+        meta, header, record = @plugin.process_message(tag, meta, header, time, processrecord, affinity_target_indices, extracted_values)
         next unless @plugin.append_record_to_messages(@plugin.write_operation, meta, header, record, bulk_message)
       rescue => e
+        @plugin.log.debug("Exception in error handler during deep copy: #{e}")
         stats[:bad_chunk_record] += 1
         next
       end
@@ -105,10 +115,15 @@ class Fluent::Plugin::ElasticsearchErrorHandler
         elsif item[write_operation].has_key?('error') && item[write_operation]['error'].has_key?('type')
           type = item[write_operation]['error']['type']
           stats[type] += 1
-          retry_stream.add(time, rawrecord)
           if unrecoverable_error?(type)
             raise ElasticsearchRequestAbortError, "Rejected Elasticsearch due to #{type}"
           end
+          if unrecoverable_record_error?(type)
+            @plugin.router.emit_error_event(tag, time, rawrecord, ElasticsearchError.new("#{status} - #{type}: #{reason}"))
+            next
+          else
+            retry_stream.add(time, rawrecord) unless unrecoverable_record_error?(type)
+          end
         else
           # When we don't have a type field, something changed in the API
           # expected return values (ES 2.x)

data/lib/fluent/plugin/elasticsearch_fallback_selector.rb CHANGED Viewed

@@ -1,7 +1,7 @@
-require 'elasticsearch/transport/transport/connections/selector'
+require_relative 'elasticsearch_compat'
 class Fluent::Plugin::ElasticseatchFallbackSelector
-  include Elasticsearch::Transport::Transport::Connections::Selector::Base
+  include SELECTOR_CLASS::Base
   def select(options={})
     connections.first

data/lib/fluent/plugin/elasticsearch_index_lifecycle_management.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+require_relative 'elasticsearch_compat'
 module Fluent::Plugin::ElasticsearchIndexLifecycleManagement
   ILM_DEFAULT_POLICY_PATH = "default-ilm-policy.json"
@@ -21,7 +23,7 @@ module Fluent::Plugin::ElasticsearchIndexLifecycleManagement
       raise Fluent::ConfigError, "Index Lifecycle management is enabled in Fluentd, but not available in your Elasticsearch" unless ilm['available']
       raise Fluent::ConfigError, "Index Lifecycle management is enabled in Fluentd, but not enabled in your Elasticsearch" unless ilm['enabled']
-    rescue Elasticsearch::Transport::Transport::Error => e
+    rescue ::TRANSPORT_CLASS::Transport::Error => e
       raise Fluent::ConfigError, "Index Lifecycle management is enabled in Fluentd, but not installed on your Elasticsearch", error: e
     end
   end
@@ -43,12 +45,20 @@ module Fluent::Plugin::ElasticsearchIndexLifecycleManagement
   end
   def get_ilm_policy
-    client.ilm.get_policy
+    if Gem::Version.new(TRANSPORT_CLASS::VERSION) < Gem::Version.new("8.0.0")
+      client.ilm.get_policy
+    else
+      client.enrich.get_policy
+    end
   end
   def ilm_policy_exists?(policy_id)
     begin
-      client.ilm.get_policy(policy_id: policy_id)
+      if Gem::Version.new(TRANSPORT_CLASS::VERSION) < Gem::Version.new("8.0.0")
+        client.ilm.get_policy(policy_id: policy_id)
+      else
+        client.enrich.get_policy(name: policy_id)
+      end
       true
     rescue
       false
@@ -57,7 +67,11 @@ module Fluent::Plugin::ElasticsearchIndexLifecycleManagement
   def ilm_policy_put(policy_id, policy)
     log.info("Installing ILM policy: #{policy}")
-    client.ilm.put_policy(policy_id: policy_id, body: policy)
+    if Gem::Version.new(TRANSPORT_CLASS::VERSION) < Gem::Version.new("8.0.0")
+      client.ilm.put_policy(policy_id: policy_id, body: policy)
+    else
+      client.enrich.put_policy(name: policy_id, body: policy)
+    end
   end
   def default_policy_payload