fluent-plugin-elasticsearch 1.10.2 → 1.10.3.rc.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.md +3 -0
- data/README.md +16 -2
- data/fluent-plugin-elasticsearch.gemspec +1 -1
- data/lib/fluent/plugin/generate_hash_id_support.rb +21 -0
- data/lib/fluent/plugin/out_elasticsearch.rb +6 -0
- data/lib/fluent/plugin/out_elasticsearch_dynamic.rb +6 -0
- data/test/plugin/test_out_elasticsearch.rb +30 -0
- data/test/plugin/test_out_elasticsearch_dynamic.rb +28 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91687e424c6bff1383a85347f0cf9d6aecd76631
|
4
|
+
data.tar.gz: f0a943997d76aa6a63234915ac44868d0f2285a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9ceabd8ac3832d51174b955c08f24ce683f95411b1b97afc3565018dca8ddde55d679ae84099815a71bb86691dafa03cbcc3405879975fa6684badc31de6b0b
|
7
|
+
data.tar.gz: d2663e6accd44050cc16eb8a85a5f521f399ca503a8fb3e866715b918f14db4dbf0df175db6ae1c7dc3f3695f19972f27a6f1f79ed2944c83464d4c522136e08
|
data/History.md
CHANGED
data/README.md
CHANGED
@@ -51,6 +51,7 @@ Note: For Amazon Elasticsearch Service please consider using [fluent-plugin-aws-
|
|
51
51
|
+ [Proxy Support](#proxy-support)
|
52
52
|
+ [Buffered output options](#buffered-output-options)
|
53
53
|
+ [Hash flattening](#hash-flattening)
|
54
|
+
+ [Generate Hash ID](#generate-hash-id)
|
54
55
|
+ [Not seeing a config you need?](#not-seeing-a-config-you-need)
|
55
56
|
+ [Dynamic configuration](#dynamic-configuration)
|
56
57
|
* [Contact](#contact)
|
@@ -332,7 +333,7 @@ reload_on_failure true # defaults to false
|
|
332
333
|
You can set in the elasticsearch-transport how often dead connections from the elasticsearch-transport's pool will be resurrected.
|
333
334
|
|
334
335
|
```
|
335
|
-
resurrect_after
|
336
|
+
resurrect_after 5s # defaults to 60s
|
336
337
|
```
|
337
338
|
|
338
339
|
### include_tag_key, tag_key
|
@@ -485,7 +486,7 @@ Starting with version 0.8.0, this gem uses excon, which supports proxy with envi
|
|
485
486
|
|
486
487
|
```
|
487
488
|
buffer_type memory
|
488
|
-
flush_interval
|
489
|
+
flush_interval 60s
|
489
490
|
retry_limit 17
|
490
491
|
retry_wait 1.0
|
491
492
|
num_threads 1
|
@@ -512,6 +513,19 @@ This will produce elasticsearch output that looks like this:
|
|
512
513
|
|
513
514
|
Note that the flattener does not deal with arrays at this time.
|
514
515
|
|
516
|
+
### Generate Hash ID
|
517
|
+
|
518
|
+
By default, the fluentd elasticsearch plugin does not emit records with a _id field, leaving it to Elasticsearch to generate a unique _id as the record is indexed. When an Elasticsearch cluster is congested and begins to take longer to respond than the configured request_timeout, the fluentd elasticsearch plugin will re-send the same bulk request. Since Elasticsearch can't tell its actually the same request, all documents in the request are indexed again resulting in duplicate data. In certain scenarios, this can result in essentially and infinite loop generating multiple copies of the same data.
|
519
|
+
|
520
|
+
Using an _id generated by the fluentd elasticsearch plugin will communicate to Elasticsearch the uniqueness of the requests so that duplicates will be rejected or simply replace the existing records.
|
521
|
+
Here is a sample config:
|
522
|
+
|
523
|
+
```
|
524
|
+
<hash>
|
525
|
+
hash_id_key _id # storing generated hash id key
|
526
|
+
<hash>
|
527
|
+
```
|
528
|
+
|
515
529
|
### Not seeing a config you need?
|
516
530
|
|
517
531
|
We try to keep the scope of this plugin small and not add too many configuration options. If you think an option would be useful to others, feel free to open an issue or contribute a Pull Request.
|
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'fluent-plugin-elasticsearch'
|
6
|
-
s.version = '1.10.
|
6
|
+
s.version = '1.10.3.rc.1'
|
7
7
|
s.authors = ['diogo', 'pitr']
|
8
8
|
s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
|
9
9
|
s.description = %q{ElasticSearch output plugin for Fluent event collector}
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'securerandom'
|
2
|
+
require 'base64'
|
3
|
+
|
4
|
+
module Fluent
|
5
|
+
module GenerateHashIdSupport
|
6
|
+
def self.included(klass)
|
7
|
+
klass.instance_eval {
|
8
|
+
config_section :hash, param_name: :hash_config, required: false, multi: false do
|
9
|
+
config_param :hash_id_key, :string, default: '_id'
|
10
|
+
end
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def generate_hash_id_key(record)
|
15
|
+
s = ""
|
16
|
+
s += Base64.strict_encode64(SecureRandom.uuid)
|
17
|
+
record[@hash_config.hash_id_key] = s
|
18
|
+
record
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -11,6 +11,7 @@ end
|
|
11
11
|
|
12
12
|
require 'fluent/output'
|
13
13
|
require_relative 'elasticsearch_index_template'
|
14
|
+
require_relative 'generate_hash_id_support'
|
14
15
|
|
15
16
|
class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
|
16
17
|
class ConnectionFailure < StandardError; end
|
@@ -67,6 +68,7 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
|
|
67
68
|
config_param :pipeline, :string, :default => nil
|
68
69
|
|
69
70
|
include Fluent::ElasticsearchIndexTemplate
|
71
|
+
include Fluent::GenerateHashIdSupport
|
70
72
|
|
71
73
|
def initialize
|
72
74
|
super
|
@@ -311,6 +313,10 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
|
|
311
313
|
record = flatten_record(record)
|
312
314
|
end
|
313
315
|
|
316
|
+
if @hash_config
|
317
|
+
record = generate_hash_id_key(record)
|
318
|
+
end
|
319
|
+
|
314
320
|
dt = nil
|
315
321
|
if @logstash_format || @include_timestamp
|
316
322
|
if record.has_key?(TIMESTAMP_FIELD)
|
@@ -10,6 +10,8 @@ class Fluent::ElasticsearchOutputDynamic < Fluent::ElasticsearchOutput
|
|
10
10
|
DYNAMIC_PARAM_NAMES = %W[hosts host port include_timestamp logstash_format logstash_prefix logstash_dateformat time_key utc_index index_name tag_key type_name id_key parent_key routing_key write_operation]
|
11
11
|
DYNAMIC_PARAM_SYMBOLS = DYNAMIC_PARAM_NAMES.map { |n| "@#{n}".to_sym }
|
12
12
|
|
13
|
+
include Fluent::GenerateHashIdSupport
|
14
|
+
|
13
15
|
attr_reader :dynamic_config
|
14
16
|
|
15
17
|
def configure(conf)
|
@@ -121,6 +123,10 @@ class Fluent::ElasticsearchOutputDynamic < Fluent::ElasticsearchOutput
|
|
121
123
|
chunk.msgpack_each do |time, record|
|
122
124
|
next unless record.is_a? Hash
|
123
125
|
|
126
|
+
if @hash_config
|
127
|
+
record = generate_hash_id_key(record)
|
128
|
+
end
|
129
|
+
|
124
130
|
begin
|
125
131
|
# evaluate all configurations here
|
126
132
|
DYNAMIC_PARAM_SYMBOLS.each_with_index { |var, i|
|
@@ -1,7 +1,10 @@
|
|
1
1
|
require 'helper'
|
2
2
|
require 'date'
|
3
|
+
require 'flexmock/test_unit'
|
3
4
|
|
4
5
|
class ElasticsearchOutput < Test::Unit::TestCase
|
6
|
+
include FlexMock::TestCase
|
7
|
+
|
5
8
|
attr_accessor :index_cmds, :index_command_counts
|
6
9
|
|
7
10
|
def setup
|
@@ -410,6 +413,33 @@ class ElasticsearchOutput < Test::Unit::TestCase
|
|
410
413
|
assert_equal('myindex', index_cmds.first['index']['_index'])
|
411
414
|
end
|
412
415
|
|
416
|
+
class AdditionalHashIdMechanismTest < self
|
417
|
+
data("default" => {"hash_id_key" => '_id'},
|
418
|
+
"custom hash_id_key" => {"hash_id_key" => '_hash_id'},
|
419
|
+
)
|
420
|
+
def test_writes_with_genrate_hash(data)
|
421
|
+
driver.configure(Fluent::Config::Element.new(
|
422
|
+
'ROOT', '', {
|
423
|
+
'@type' => 'elasticsearch',
|
424
|
+
'id_key' => data["hash_id_key"],
|
425
|
+
}, [
|
426
|
+
Fluent::Config::Element.new('hash', '', {
|
427
|
+
'keys' => ['request_id'],
|
428
|
+
'hash_id_key' => data["hash_id_key"],
|
429
|
+
}, [])
|
430
|
+
]
|
431
|
+
))
|
432
|
+
stub_elastic_ping
|
433
|
+
stub_elastic
|
434
|
+
flexmock(SecureRandom).should_receive(:uuid)
|
435
|
+
.and_return("13a0c028-bf7c-4ae2-ad03-ec09a40006df")
|
436
|
+
time = Time.parse("2017-10-15 15:00:23.34567890 UTC").to_i
|
437
|
+
driver.emit(sample_record.merge('request_id' => 'elastic'), time)
|
438
|
+
driver.run
|
439
|
+
assert_equal(Base64.strict_encode64(SecureRandom.uuid), index_cmds[1]["#{data["hash_id_key"]}"])
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
413
443
|
def test_writes_to_speficied_index_uppercase
|
414
444
|
driver.configure("index_name MyIndex\n")
|
415
445
|
stub_elastic_ping
|
@@ -308,6 +308,34 @@ class ElasticsearchOutputDynamic < Test::Unit::TestCase
|
|
308
308
|
assert_equal(2000, total)
|
309
309
|
end
|
310
310
|
|
311
|
+
class AdditionalHashIdMechanismTest < self
|
312
|
+
data("default" => {"hash_id_key" => '_id'},
|
313
|
+
"custom hash_id_key" => {"hash_id_key" => '_hash_id'},
|
314
|
+
)
|
315
|
+
def test_writes_with_genrate_hash(data)
|
316
|
+
driver.configure(Fluent::Config::Element.new(
|
317
|
+
'ROOT', '', {
|
318
|
+
'@type' => 'elasticsearch',
|
319
|
+
'id_key' => data["hash_id_key"],
|
320
|
+
}, [
|
321
|
+
Fluent::Config::Element.new('hash', '', {
|
322
|
+
'keys' => ['request_id'],
|
323
|
+
'hash_id_key' => data["hash_id_key"],
|
324
|
+
}, [])
|
325
|
+
]
|
326
|
+
))
|
327
|
+
stub_elastic_ping
|
328
|
+
stub_elastic
|
329
|
+
stub_elastic
|
330
|
+
flexmock(SecureRandom).should_receive(:uuid)
|
331
|
+
.and_return("82120f33-897a-4d9d-b3d5-14afd18fb412")
|
332
|
+
time = Time.parse("2017-10-15 15:00:23.34567890 UTC").to_i
|
333
|
+
driver.emit(sample_record.merge('request_id' => 'elastic'), time)
|
334
|
+
driver.run
|
335
|
+
assert_equal(Base64.strict_encode64(SecureRandom.uuid), index_cmds[1]["#{data["hash_id_key"]}"])
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
311
339
|
def test_makes_bulk_request
|
312
340
|
stub_elastic_ping
|
313
341
|
stub_elastic
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.10.
|
4
|
+
version: 1.10.3.rc.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- diogo
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-11-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -145,6 +145,7 @@ files:
|
|
145
145
|
- Rakefile
|
146
146
|
- fluent-plugin-elasticsearch.gemspec
|
147
147
|
- lib/fluent/plugin/elasticsearch_index_template.rb
|
148
|
+
- lib/fluent/plugin/generate_hash_id_support.rb
|
148
149
|
- lib/fluent/plugin/out_elasticsearch.rb
|
149
150
|
- lib/fluent/plugin/out_elasticsearch_dynamic.rb
|
150
151
|
- test/helper.rb
|
@@ -166,9 +167,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
166
167
|
version: '2.0'
|
167
168
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
168
169
|
requirements:
|
169
|
-
- - "
|
170
|
+
- - ">"
|
170
171
|
- !ruby/object:Gem::Version
|
171
|
-
version:
|
172
|
+
version: 1.3.1
|
172
173
|
requirements: []
|
173
174
|
rubyforge_project:
|
174
175
|
rubygems_version: 2.6.13
|