fluent-plugin-elasticsearch 2.1.1 → 2.2.0.rc.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.md +3 -0
- data/README.md +11 -6
- data/fluent-plugin-elasticsearch.gemspec +1 -1
- data/lib/fluent/plugin/filter_elasticsearch_genid.rb +25 -0
- data/lib/fluent/plugin/generate_hash_id_support.rb +2 -1
- data/test/plugin/test_filter_elasticsearch_genid.rb +44 -0
- data/test/plugin/test_out_elasticsearch.rb +14 -21
- data/test/plugin/test_out_elasticsearch_dynamic.rb +12 -20
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90e7f44770dc48c5061c88be01ef839bed6cb5bb
|
4
|
+
data.tar.gz: bbf38e0fb4f108affcb5d0f868deacf540d432fc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 671b7654c928aa0d8f919266c6cbc9861ecf080e5da9e9529852bcc86322d62904d32bcc952658fa83d842bbb904eab1a31e14f0ef0191c4ad0a056307a305e5
|
7
|
+
data.tar.gz: e82257e8d545eec34e862e2198fabd936bb0622e64b58f78d6d8f1d3d6bba98ec7309e4444a50f574f8382189984b771fed9918af9ff2238d6324d97845f8e9c
|
data/History.md
CHANGED
@@ -4,6 +4,9 @@
|
|
4
4
|
- Log ES response errors (#230)
|
5
5
|
- Use latest elasticsearch-ruby (#240)
|
6
6
|
|
7
|
+
### 2.2.0.rc.1
|
8
|
+
- Separate generate hash id module and bundled new plugin for generating unique hash id (#331)
|
9
|
+
|
7
10
|
### 2.1.1
|
8
11
|
- Raise ConfigError when specifying different @hash_config.hash_id_key and id_key configration (#327)
|
9
12
|
- Small typo fix in README.md (#325)
|
data/README.md
CHANGED
@@ -557,15 +557,20 @@ Note that the flattener does not deal with arrays at this time.
|
|
557
557
|
|
558
558
|
By default, the fluentd elasticsearch plugin does not emit records with a _id field, leaving it to Elasticsearch to generate a unique _id as the record is indexed. When an Elasticsearch cluster is congested and begins to take longer to respond than the configured request_timeout, the fluentd elasticsearch plugin will re-send the same bulk request. Since Elasticsearch can't tell its actually the same request, all documents in the request are indexed again resulting in duplicate data. In certain scenarios, this can result in essentially and infinite loop generating multiple copies of the same data.
|
559
559
|
|
560
|
-
|
560
|
+
The bundled elasticsearch_genid filter can generate a unique _hash key for each record, this key may be passed to the id_key parameter in the elasticsearch plugin to communicate to Elasticsearch the uniqueness of the requests so that duplicates will be rejected or simply replace the existing records.
|
561
561
|
Here is a sample config:
|
562
562
|
|
563
563
|
```
|
564
|
-
<
|
565
|
-
|
566
|
-
hash_id_key _hash # storing generated hash id key
|
567
|
-
</
|
568
|
-
|
564
|
+
<filter>
|
565
|
+
@type elasticsearch_genid
|
566
|
+
hash_id_key _hash # storing generated hash id key (default is _hash)
|
567
|
+
</filter>
|
568
|
+
<match **>
|
569
|
+
@type elasticsearch
|
570
|
+
id_key _hash # specify same key name which is specified in hash_id_key
|
571
|
+
remove_keys _hash # Elasticsearch doesn't like keys that start with _
|
572
|
+
# other settings are ommitted.
|
573
|
+
</match>
|
569
574
|
```
|
570
575
|
|
571
576
|
### Not seeing a config you need?
|
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'fluent-plugin-elasticsearch'
|
6
|
-
s.version = '2.
|
6
|
+
s.version = '2.2.0.rc.1'
|
7
7
|
s.authors = ['diogo', 'pitr']
|
8
8
|
s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
|
9
9
|
s.description = %q{ElasticSearch output plugin for Fluent event collector}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'securerandom'
|
2
|
+
require 'base64'
|
3
|
+
require 'fluent/plugin/filter'
|
4
|
+
|
5
|
+
module Fluent::Plugin
|
6
|
+
class ElasticsearchGenidFilter < Filter
|
7
|
+
Fluent::Plugin.register_filter('elasticsearch_genid', self)
|
8
|
+
|
9
|
+
config_param :hash_id_key, :string, :default => '_hash'
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
super
|
13
|
+
end
|
14
|
+
|
15
|
+
def configure(conf)
|
16
|
+
super
|
17
|
+
end
|
18
|
+
|
19
|
+
def filter(tag, time, record)
|
20
|
+
record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
|
21
|
+
record
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -7,7 +7,8 @@ module Fluent
|
|
7
7
|
def self.included(klass)
|
8
8
|
klass.instance_eval {
|
9
9
|
config_section :hash, param_name: :hash_config, required: false, multi: false do
|
10
|
-
config_param :hash_id_key, :string, default: '_hash'
|
10
|
+
config_param :hash_id_key, :string, default: '_hash',
|
11
|
+
obsoleted: "Use bundled filer-elasticsearch-genid instead."
|
11
12
|
end
|
12
13
|
}
|
13
14
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'date'
|
3
|
+
require 'fluent/test/helpers'
|
4
|
+
require 'json'
|
5
|
+
require 'fluent/test/driver/filter'
|
6
|
+
require 'flexmock/test_unit'
|
7
|
+
require 'fluent/plugin/filter_elasticsearch_genid'
|
8
|
+
|
9
|
+
class ElasticsearchGenidFilterTest < Test::Unit::TestCase
|
10
|
+
include FlexMock::TestCase
|
11
|
+
include Fluent::Test::Helpers
|
12
|
+
|
13
|
+
def setup
|
14
|
+
Fluent::Test.setup
|
15
|
+
end
|
16
|
+
|
17
|
+
def create_driver(conf='')
|
18
|
+
Fluent::Test::Driver::Filter.new(Fluent::Plugin::ElasticsearchGenidFilter).configure(conf)
|
19
|
+
end
|
20
|
+
|
21
|
+
def sample_record
|
22
|
+
{'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_configure
|
26
|
+
d = create_driver
|
27
|
+
assert_equal '_hash', d.instance.hash_id_key
|
28
|
+
end
|
29
|
+
|
30
|
+
data("default" => {"hash_id_key" => "_hash"},
|
31
|
+
"custom_key" => {"hash_id_key" => "_edited"},
|
32
|
+
)
|
33
|
+
def test_filter(data)
|
34
|
+
d = create_driver("hash_id_key #{data["hash_id_key"]}")
|
35
|
+
flexmock(SecureRandom).should_receive(:uuid)
|
36
|
+
.and_return("13a0c028-bf7c-4ae2-ad03-ec09a40006df")
|
37
|
+
time = event_time("2017-10-15 15:00:23.34567890 UTC")
|
38
|
+
d.run(default_tag: 'test') do
|
39
|
+
d.feed(time, sample_record)
|
40
|
+
end
|
41
|
+
assert_equal(Base64.strict_encode64(SecureRandom.uuid),
|
42
|
+
d.filtered.map {|e| e.last}.first[d.instance.hash_id_key])
|
43
|
+
end
|
44
|
+
end
|
@@ -228,7 +228,7 @@ class ElasticsearchOutput < Test::Unit::TestCase
|
|
228
228
|
end
|
229
229
|
|
230
230
|
test 'with invaild generate id config' do
|
231
|
-
|
231
|
+
assert_raise_message(/Use bundled filer-elasticsearch-genid instead./) do
|
232
232
|
driver.configure(Fluent::Config::Element.new(
|
233
233
|
'ROOT', '', {
|
234
234
|
'@type' => 'elasticsearch',
|
@@ -239,7 +239,7 @@ class ElasticsearchOutput < Test::Unit::TestCase
|
|
239
239
|
}, [])
|
240
240
|
]
|
241
241
|
))
|
242
|
-
|
242
|
+
end
|
243
243
|
end
|
244
244
|
|
245
245
|
def test_template_already_present
|
@@ -585,26 +585,19 @@ class ElasticsearchOutput < Test::Unit::TestCase
|
|
585
585
|
"custom hash_id_key" => {"hash_id_key" => '_hash_id'},
|
586
586
|
)
|
587
587
|
def test_writes_with_genrate_hash(data)
|
588
|
-
|
589
|
-
|
590
|
-
'
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
stub_elastic
|
601
|
-
flexmock(SecureRandom).should_receive(:uuid)
|
602
|
-
.and_return("13a0c028-bf7c-4ae2-ad03-ec09a40006df")
|
603
|
-
time = event_time("2017-10-15 15:00:23.34567890 UTC")
|
604
|
-
driver.run(default_tag: 'test') do
|
605
|
-
driver.feed(time, sample_record.merge('request_id' => 'elastic'))
|
588
|
+
assert_raise_message(/Use bundled filer-elasticsearch-genid instead./) do
|
589
|
+
driver.configure(Fluent::Config::Element.new(
|
590
|
+
'ROOT', '', {
|
591
|
+
'@type' => 'elasticsearch',
|
592
|
+
'id_key' => data["hash_id_key"],
|
593
|
+
}, [
|
594
|
+
Fluent::Config::Element.new('hash', '', {
|
595
|
+
'keys' => ['request_id'],
|
596
|
+
'hash_id_key' => data["hash_id_key"],
|
597
|
+
}, [])
|
598
|
+
]
|
599
|
+
))
|
606
600
|
end
|
607
|
-
assert_equal(Base64.strict_encode64(SecureRandom.uuid), index_cmds[1]["#{data["hash_id_key"]}"])
|
608
601
|
end
|
609
602
|
end
|
610
603
|
|
@@ -323,27 +323,19 @@ class ElasticsearchOutputDynamic < Test::Unit::TestCase
|
|
323
323
|
"custom hash_id_key" => {"hash_id_key" => '_hash_id'},
|
324
324
|
)
|
325
325
|
def test_writes_with_genrate_hash(data)
|
326
|
-
|
327
|
-
|
328
|
-
'
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
stub_elastic
|
339
|
-
stub_elastic
|
340
|
-
flexmock(SecureRandom).should_receive(:uuid)
|
341
|
-
.and_return("82120f33-897a-4d9d-b3d5-14afd18fb412")
|
342
|
-
time = event_time("2017-10-15 15:00:23.34567890 UTC")
|
343
|
-
driver.run(default_tag: 'test') do
|
344
|
-
driver.feed(time, sample_record.merge('request_id' => 'elastic'))
|
326
|
+
assert_raise_message(/Use bundled filer-elasticsearch-genid instead./) do
|
327
|
+
driver.configure(Fluent::Config::Element.new(
|
328
|
+
'ROOT', '', {
|
329
|
+
'@type' => 'elasticsearch',
|
330
|
+
'id_key' => data["hash_id_key"],
|
331
|
+
}, [
|
332
|
+
Fluent::Config::Element.new('hash', '', {
|
333
|
+
'keys' => ['request_id'],
|
334
|
+
'hash_id_key' => data["hash_id_key"],
|
335
|
+
}, [])
|
336
|
+
]
|
337
|
+
))
|
345
338
|
end
|
346
|
-
assert_equal(Base64.strict_encode64(SecureRandom.uuid), index_cmds[1]["#{data["hash_id_key"]}"])
|
347
339
|
end
|
348
340
|
end
|
349
341
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0.rc.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- diogo
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-11-
|
12
|
+
date: 2017-11-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -146,10 +146,12 @@ files:
|
|
146
146
|
- lib/fluent/plugin/elasticsearch_constants.rb
|
147
147
|
- lib/fluent/plugin/elasticsearch_error_handler.rb
|
148
148
|
- lib/fluent/plugin/elasticsearch_index_template.rb
|
149
|
+
- lib/fluent/plugin/filter_elasticsearch_genid.rb
|
149
150
|
- lib/fluent/plugin/generate_hash_id_support.rb
|
150
151
|
- lib/fluent/plugin/out_elasticsearch.rb
|
151
152
|
- lib/fluent/plugin/out_elasticsearch_dynamic.rb
|
152
153
|
- test/helper.rb
|
154
|
+
- test/plugin/test_filter_elasticsearch_genid.rb
|
153
155
|
- test/plugin/test_out_elasticsearch.rb
|
154
156
|
- test/plugin/test_out_elasticsearch_dynamic.rb
|
155
157
|
- test/plugin/test_template.json
|
@@ -168,9 +170,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
168
170
|
version: '2.0'
|
169
171
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
170
172
|
requirements:
|
171
|
-
- - "
|
173
|
+
- - ">"
|
172
174
|
- !ruby/object:Gem::Version
|
173
|
-
version:
|
175
|
+
version: 1.3.1
|
174
176
|
requirements: []
|
175
177
|
rubyforge_project:
|
176
178
|
rubygems_version: 2.6.13
|
@@ -179,6 +181,7 @@ specification_version: 4
|
|
179
181
|
summary: ElasticSearch output plugin for Fluent event collector
|
180
182
|
test_files:
|
181
183
|
- test/helper.rb
|
184
|
+
- test/plugin/test_filter_elasticsearch_genid.rb
|
182
185
|
- test/plugin/test_out_elasticsearch.rb
|
183
186
|
- test/plugin/test_out_elasticsearch_dynamic.rb
|
184
187
|
- test/plugin/test_template.json
|