fluent-plugin-elasticsearch 1.10.2 → 1.10.3.rc.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7756405c1e59422b26624fbd6e3a0fded30c6d46
4
- data.tar.gz: c12bd57fb39982e51e9a5a4dfaa73f3ef5da3e2c
3
+ metadata.gz: 91687e424c6bff1383a85347f0cf9d6aecd76631
4
+ data.tar.gz: f0a943997d76aa6a63234915ac44868d0f2285a2
5
5
  SHA512:
6
- metadata.gz: 7fb789e36c7a2ad8ae823fa4ed0f7578293bd70426629fccdb09681cc82bbc4e7183439dee046a117bb08ea7de4bcc69eed8052822e5c454e1934218db8d23f2
7
- data.tar.gz: ea0983374a67d7f23ad2e5c2a043de495131784b6b583abbd12d483946dc13aa9fc01e9601fabded84ef11404efd23643f280ff9623aafdb4ce5d1a02f8db43a
6
+ metadata.gz: a9ceabd8ac3832d51174b955c08f24ce683f95411b1b97afc3565018dca8ddde55d679ae84099815a71bb86691dafa03cbcc3405879975fa6684badc31de6b0b
7
+ data.tar.gz: d2663e6accd44050cc16eb8a85a5f521f399ca503a8fb3e866715b918f14db4dbf0df175db6ae1c7dc3f3695f19972f27a6f1f79ed2944c83464d4c522136e08
data/History.md CHANGED
@@ -4,6 +4,9 @@
4
4
  - Log ES response errors (#230)
5
5
  - Use latest elasticsearch-ruby (#240)
6
6
 
7
+ ### 1.10.3.rc.1
8
+ - backport Add generating hash id mechanism to avoid records duplication (#323)
9
+
7
10
  ### 1.10.2
8
11
  - backport adding `include_timestamp` option (#311)
9
12
 
data/README.md CHANGED
@@ -51,6 +51,7 @@ Note: For Amazon Elasticsearch Service please consider using [fluent-plugin-aws-
51
51
  + [Proxy Support](#proxy-support)
52
52
  + [Buffered output options](#buffered-output-options)
53
53
  + [Hash flattening](#hash-flattening)
54
+ + [Generate Hash ID](#generate-hash-id)
54
55
  + [Not seeing a config you need?](#not-seeing-a-config-you-need)
55
56
  + [Dynamic configuration](#dynamic-configuration)
56
57
  * [Contact](#contact)
@@ -332,7 +333,7 @@ reload_on_failure true # defaults to false
332
333
  You can set in the elasticsearch-transport how often dead connections from the elasticsearch-transport's pool will be resurrected.
333
334
 
334
335
  ```
335
- resurrect_after 5 # defaults to 60s
336
+ resurrect_after 5s # defaults to 60s
336
337
  ```
337
338
 
338
339
  ### include_tag_key, tag_key
@@ -485,7 +486,7 @@ Starting with version 0.8.0, this gem uses excon, which supports proxy with envi
485
486
 
486
487
  ```
487
488
  buffer_type memory
488
- flush_interval 60
489
+ flush_interval 60s
489
490
  retry_limit 17
490
491
  retry_wait 1.0
491
492
  num_threads 1
@@ -512,6 +513,19 @@ This will produce elasticsearch output that looks like this:
512
513
 
513
514
  Note that the flattener does not deal with arrays at this time.
514
515
 
516
+ ### Generate Hash ID
517
+
518
+ By default, the fluentd elasticsearch plugin does not emit records with a _id field, leaving it to Elasticsearch to generate a unique _id as the record is indexed. When an Elasticsearch cluster is congested and begins to take longer to respond than the configured request_timeout, the fluentd elasticsearch plugin will re-send the same bulk request. Since Elasticsearch can't tell its actually the same request, all documents in the request are indexed again resulting in duplicate data. In certain scenarios, this can result in essentially and infinite loop generating multiple copies of the same data.
519
+
520
+ Using an _id generated by the fluentd elasticsearch plugin will communicate to Elasticsearch the uniqueness of the requests so that duplicates will be rejected or simply replace the existing records.
521
+ Here is a sample config:
522
+
523
+ ```
524
+ <hash>
525
+ hash_id_key _id # storing generated hash id key
526
+ <hash>
527
+ ```
528
+
515
529
  ### Not seeing a config you need?
516
530
 
517
531
  We try to keep the scope of this plugin small and not add too many configuration options. If you think an option would be useful to others, feel free to open an issue or contribute a Pull Request.
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '1.10.2'
6
+ s.version = '1.10.3.rc.1'
7
7
  s.authors = ['diogo', 'pitr']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
9
9
  s.description = %q{ElasticSearch output plugin for Fluent event collector}
@@ -0,0 +1,21 @@
1
+ require 'securerandom'
2
+ require 'base64'
3
+
4
+ module Fluent
5
+ module GenerateHashIdSupport
6
+ def self.included(klass)
7
+ klass.instance_eval {
8
+ config_section :hash, param_name: :hash_config, required: false, multi: false do
9
+ config_param :hash_id_key, :string, default: '_id'
10
+ end
11
+ }
12
+ end
13
+
14
+ def generate_hash_id_key(record)
15
+ s = ""
16
+ s += Base64.strict_encode64(SecureRandom.uuid)
17
+ record[@hash_config.hash_id_key] = s
18
+ record
19
+ end
20
+ end
21
+ end
@@ -11,6 +11,7 @@ end
11
11
 
12
12
  require 'fluent/output'
13
13
  require_relative 'elasticsearch_index_template'
14
+ require_relative 'generate_hash_id_support'
14
15
 
15
16
  class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
16
17
  class ConnectionFailure < StandardError; end
@@ -67,6 +68,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
67
68
  config_param :pipeline, :string, :default => nil
68
69
 
69
70
  include Fluent::ElasticsearchIndexTemplate
71
+ include Fluent::GenerateHashIdSupport
70
72
 
71
73
  def initialize
72
74
  super
@@ -311,6 +313,10 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
311
313
  record = flatten_record(record)
312
314
  end
313
315
 
316
+ if @hash_config
317
+ record = generate_hash_id_key(record)
318
+ end
319
+
314
320
  dt = nil
315
321
  if @logstash_format || @include_timestamp
316
322
  if record.has_key?(TIMESTAMP_FIELD)
@@ -10,6 +10,8 @@ class Fluent::ElasticsearchOutputDynamic < Fluent::ElasticsearchOutput
10
10
  DYNAMIC_PARAM_NAMES = %W[hosts host port include_timestamp logstash_format logstash_prefix logstash_dateformat time_key utc_index index_name tag_key type_name id_key parent_key routing_key write_operation]
11
11
  DYNAMIC_PARAM_SYMBOLS = DYNAMIC_PARAM_NAMES.map { |n| "@#{n}".to_sym }
12
12
 
13
+ include Fluent::GenerateHashIdSupport
14
+
13
15
  attr_reader :dynamic_config
14
16
 
15
17
  def configure(conf)
@@ -121,6 +123,10 @@ class Fluent::ElasticsearchOutputDynamic < Fluent::ElasticsearchOutput
121
123
  chunk.msgpack_each do |time, record|
122
124
  next unless record.is_a? Hash
123
125
 
126
+ if @hash_config
127
+ record = generate_hash_id_key(record)
128
+ end
129
+
124
130
  begin
125
131
  # evaluate all configurations here
126
132
  DYNAMIC_PARAM_SYMBOLS.each_with_index { |var, i|
@@ -1,7 +1,10 @@
1
1
  require 'helper'
2
2
  require 'date'
3
+ require 'flexmock/test_unit'
3
4
 
4
5
  class ElasticsearchOutput < Test::Unit::TestCase
6
+ include FlexMock::TestCase
7
+
5
8
  attr_accessor :index_cmds, :index_command_counts
6
9
 
7
10
  def setup
@@ -410,6 +413,33 @@ class ElasticsearchOutput < Test::Unit::TestCase
410
413
  assert_equal('myindex', index_cmds.first['index']['_index'])
411
414
  end
412
415
 
416
+ class AdditionalHashIdMechanismTest < self
417
+ data("default" => {"hash_id_key" => '_id'},
418
+ "custom hash_id_key" => {"hash_id_key" => '_hash_id'},
419
+ )
420
+ def test_writes_with_genrate_hash(data)
421
+ driver.configure(Fluent::Config::Element.new(
422
+ 'ROOT', '', {
423
+ '@type' => 'elasticsearch',
424
+ 'id_key' => data["hash_id_key"],
425
+ }, [
426
+ Fluent::Config::Element.new('hash', '', {
427
+ 'keys' => ['request_id'],
428
+ 'hash_id_key' => data["hash_id_key"],
429
+ }, [])
430
+ ]
431
+ ))
432
+ stub_elastic_ping
433
+ stub_elastic
434
+ flexmock(SecureRandom).should_receive(:uuid)
435
+ .and_return("13a0c028-bf7c-4ae2-ad03-ec09a40006df")
436
+ time = Time.parse("2017-10-15 15:00:23.34567890 UTC").to_i
437
+ driver.emit(sample_record.merge('request_id' => 'elastic'), time)
438
+ driver.run
439
+ assert_equal(Base64.strict_encode64(SecureRandom.uuid), index_cmds[1]["#{data["hash_id_key"]}"])
440
+ end
441
+ end
442
+
413
443
  def test_writes_to_speficied_index_uppercase
414
444
  driver.configure("index_name MyIndex\n")
415
445
  stub_elastic_ping
@@ -308,6 +308,34 @@ class ElasticsearchOutputDynamic < Test::Unit::TestCase
308
308
  assert_equal(2000, total)
309
309
  end
310
310
 
311
+ class AdditionalHashIdMechanismTest < self
312
+ data("default" => {"hash_id_key" => '_id'},
313
+ "custom hash_id_key" => {"hash_id_key" => '_hash_id'},
314
+ )
315
+ def test_writes_with_genrate_hash(data)
316
+ driver.configure(Fluent::Config::Element.new(
317
+ 'ROOT', '', {
318
+ '@type' => 'elasticsearch',
319
+ 'id_key' => data["hash_id_key"],
320
+ }, [
321
+ Fluent::Config::Element.new('hash', '', {
322
+ 'keys' => ['request_id'],
323
+ 'hash_id_key' => data["hash_id_key"],
324
+ }, [])
325
+ ]
326
+ ))
327
+ stub_elastic_ping
328
+ stub_elastic
329
+ stub_elastic
330
+ flexmock(SecureRandom).should_receive(:uuid)
331
+ .and_return("82120f33-897a-4d9d-b3d5-14afd18fb412")
332
+ time = Time.parse("2017-10-15 15:00:23.34567890 UTC").to_i
333
+ driver.emit(sample_record.merge('request_id' => 'elastic'), time)
334
+ driver.run
335
+ assert_equal(Base64.strict_encode64(SecureRandom.uuid), index_cmds[1]["#{data["hash_id_key"]}"])
336
+ end
337
+ end
338
+
311
339
  def test_makes_bulk_request
312
340
  stub_elastic_ping
313
341
  stub_elastic
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.2
4
+ version: 1.10.3.rc.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - diogo
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-10-25 00:00:00.000000000 Z
12
+ date: 2017-11-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd
@@ -145,6 +145,7 @@ files:
145
145
  - Rakefile
146
146
  - fluent-plugin-elasticsearch.gemspec
147
147
  - lib/fluent/plugin/elasticsearch_index_template.rb
148
+ - lib/fluent/plugin/generate_hash_id_support.rb
148
149
  - lib/fluent/plugin/out_elasticsearch.rb
149
150
  - lib/fluent/plugin/out_elasticsearch_dynamic.rb
150
151
  - test/helper.rb
@@ -166,9 +167,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
166
167
  version: '2.0'
167
168
  required_rubygems_version: !ruby/object:Gem::Requirement
168
169
  requirements:
169
- - - ">="
170
+ - - ">"
170
171
  - !ruby/object:Gem::Version
171
- version: '0'
172
+ version: 1.3.1
172
173
  requirements: []
173
174
  rubyforge_project:
174
175
  rubygems_version: 2.6.13