fluent-plugin-elasticsearch 1.11.1 → 1.12.0.rc.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.md +3 -0
- data/README.md +11 -5
- data/fluent-plugin-elasticsearch.gemspec +1 -1
- data/lib/fluent/plugin/filter_elasticsearch_genid.rb +25 -0
- data/lib/fluent/plugin/generate_hash_id_support.rb +3 -1
- data/lib/fluent/plugin/out_elasticsearch.rb +0 -8
- data/test/plugin/test_filter_elasticsearch_genid.rb +40 -0
- data/test/plugin/test_out_elasticsearch.rb +15 -21
- data/test/plugin/test_out_elasticsearch_dynamic.rb +13 -20
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7214256a17bb6711403a1eee956cb8c3a040b8d4
|
4
|
+
data.tar.gz: cd8677126edbf697885826f46fa92c0519821f18
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcd8ef4cf0ddd8ab0eb6ca11020ee97ec823727c53fe5eb56a0460c9588e4b1ac13a99bc996bae44872a1cfa6a3bc88f68ad391efacdedad08d6092522d69857
|
7
|
+
data.tar.gz: b8aeeda2d04b325dca968b38dacf3f150d1a1239c431bcb6f0c1b3becbbf59b5922dd6289e35eb5e4885944ae3b940acad5f271c873c3ad9948e644f02a831de
|
data/History.md
CHANGED
@@ -4,6 +4,9 @@
|
|
4
4
|
- Log ES response errors (#230)
|
5
5
|
- Use latest elasticsearch-ruby (#240)
|
6
6
|
|
7
|
+
### 1.12.0.rc.1
|
8
|
+
- Backport separating generate hash id module and bundled new plugin for generating unique hash id (#331)
|
9
|
+
|
7
10
|
### 1.11.1
|
8
11
|
- Raise ConfigError when specifying different @hash_config.hash_id_key and id_key configration (#326)
|
9
12
|
- backport small typo fix in README.md (#328)
|
data/README.md
CHANGED
@@ -517,14 +517,20 @@ Note that the flattener does not deal with arrays at this time.
|
|
517
517
|
|
518
518
|
By default, the fluentd elasticsearch plugin does not emit records with a _id field, leaving it to Elasticsearch to generate a unique _id as the record is indexed. When an Elasticsearch cluster is congested and begins to take longer to respond than the configured request_timeout, the fluentd elasticsearch plugin will re-send the same bulk request. Since Elasticsearch can't tell its actually the same request, all documents in the request are indexed again resulting in duplicate data. In certain scenarios, this can result in essentially and infinite loop generating multiple copies of the same data.
|
519
519
|
|
520
|
-
|
520
|
+
The bundled elasticsearch_genid filter can generate a unique _hash key for each record, this key may be passed to the id_key parameter in the elasticsearch plugin to communicate to Elasticsearch the uniqueness of the requests so that duplicates will be rejected or simply replace the existing records.
|
521
521
|
Here is a sample config:
|
522
522
|
|
523
523
|
```
|
524
|
-
<
|
525
|
-
|
526
|
-
|
527
|
-
|
524
|
+
<filter>
|
525
|
+
@type elasticsearch_genid
|
526
|
+
hash_id_key _hash # storing generated hash id key (default is _hash)
|
527
|
+
</filter>
|
528
|
+
<match **>
|
529
|
+
@type elasticsearch
|
530
|
+
id_key _hash # specify same key name which is specified in hash_id_key
|
531
|
+
remove_keys _hash # Elasticsearch doesn't like keys that start with _
|
532
|
+
# other settings are ommitted.
|
533
|
+
</match>
|
528
534
|
```
|
529
535
|
|
530
536
|
### Not seeing a config you need?
|
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'fluent-plugin-elasticsearch'
|
6
|
-
s.version = '1.
|
6
|
+
s.version = '1.12.0.rc.1'
|
7
7
|
s.authors = ['diogo', 'pitr']
|
8
8
|
s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com']
|
9
9
|
s.description = %q{ElasticSearch output plugin for Fluent event collector}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'securerandom'
|
2
|
+
require 'base64'
|
3
|
+
require 'fluent/filter'
|
4
|
+
|
5
|
+
module Fluent
|
6
|
+
class ElasticsearchGenidFilter < Filter
|
7
|
+
Fluent::Plugin.register_filter('elasticsearch_genid', self)
|
8
|
+
|
9
|
+
config_param :hash_id_key, :string, :default => '_hash'
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
super
|
13
|
+
end
|
14
|
+
|
15
|
+
def configure(conf)
|
16
|
+
super
|
17
|
+
end
|
18
|
+
|
19
|
+
def filter(tag, time, record)
|
20
|
+
record[@hash_id_key] = Base64.strict_encode64(SecureRandom.uuid)
|
21
|
+
record
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -6,7 +6,9 @@ module Fluent
|
|
6
6
|
def self.included(klass)
|
7
7
|
klass.instance_eval {
|
8
8
|
config_section :hash, param_name: :hash_config, required: false, multi: false do
|
9
|
-
config_param :hash_id_key, :string, default: '_hash'
|
9
|
+
config_param :hash_id_key, :string, default: '_hash',
|
10
|
+
obsoleted: "Use bundled filer-elasticsearch-genid instead."
|
11
|
+
|
10
12
|
end
|
11
13
|
}
|
12
14
|
end
|
@@ -242,14 +242,6 @@ class Fluent::ElasticsearchOutput < Fluent::ObjectBufferedOutput
|
|
242
242
|
end.join(', ')
|
243
243
|
end
|
244
244
|
|
245
|
-
BODY_DELIMITER = "\n".freeze
|
246
|
-
UPDATE_OP = "update".freeze
|
247
|
-
UPSERT_OP = "upsert".freeze
|
248
|
-
CREATE_OP = "create".freeze
|
249
|
-
INDEX_OP = "index".freeze
|
250
|
-
ID_FIELD = "_id".freeze
|
251
|
-
TIMESTAMP_FIELD = "@timestamp".freeze
|
252
|
-
|
253
245
|
def append_record_to_messages(op, meta, header, record, msgs)
|
254
246
|
case op
|
255
247
|
when UPDATE_OP, UPSERT_OP
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'date'
|
3
|
+
require 'json'
|
4
|
+
require 'flexmock/test_unit'
|
5
|
+
require 'fluent/plugin/filter_elasticsearch_genid'
|
6
|
+
|
7
|
+
class ElasticsearchGenidFilterTest < Test::Unit::TestCase
|
8
|
+
include FlexMock::TestCase
|
9
|
+
|
10
|
+
def setup
|
11
|
+
Fluent::Test.setup
|
12
|
+
end
|
13
|
+
|
14
|
+
def create_driver(conf='')
|
15
|
+
Fluent::Test::FilterTestDriver.new(Fluent::ElasticsearchGenidFilter).configure(conf)
|
16
|
+
end
|
17
|
+
|
18
|
+
def sample_record
|
19
|
+
{'age' => 26, 'request_id' => '42', 'parent_id' => 'parent', 'routing_id' => 'routing'}
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_configure
|
23
|
+
d = create_driver
|
24
|
+
assert_equal '_hash', d.instance.hash_id_key
|
25
|
+
end
|
26
|
+
|
27
|
+
data("default" => {"hash_id_key" => "_hash"},
|
28
|
+
"custom_key" => {"hash_id_key" => "_edited"},
|
29
|
+
)
|
30
|
+
def test_filter(data)
|
31
|
+
d = create_driver("hash_id_key #{data["hash_id_key"]}")
|
32
|
+
flexmock(SecureRandom).should_receive(:uuid)
|
33
|
+
.and_return("13a0c028-bf7c-4ae2-ad03-ec09a40006df")
|
34
|
+
d.run do
|
35
|
+
d.filter(sample_record)
|
36
|
+
end
|
37
|
+
assert_equal(Base64.strict_encode64(SecureRandom.uuid),
|
38
|
+
d.filtered_as_array.map {|e| e.last}.first[d.instance.hash_id_key])
|
39
|
+
end
|
40
|
+
end
|
@@ -205,7 +205,7 @@ class ElasticsearchOutput < Test::Unit::TestCase
|
|
205
205
|
end
|
206
206
|
|
207
207
|
def test_configure_with_invaild_generate_id_config
|
208
|
-
|
208
|
+
assert_raise_message(/Use bundled filer-elasticsearch-genid instead./) do
|
209
209
|
driver.configure(Fluent::Config::Element.new(
|
210
210
|
'ROOT', '', {
|
211
211
|
'@type' => 'elasticsearch',
|
@@ -216,7 +216,7 @@ class ElasticsearchOutput < Test::Unit::TestCase
|
|
216
216
|
}, [])
|
217
217
|
]
|
218
218
|
))
|
219
|
-
|
219
|
+
end
|
220
220
|
end
|
221
221
|
|
222
222
|
def test_template_already_present
|
@@ -558,25 +558,19 @@ class ElasticsearchOutput < Test::Unit::TestCase
|
|
558
558
|
"custom hash_id_key" => {"hash_id_key" => '_hash_id'},
|
559
559
|
)
|
560
560
|
def test_writes_with_genrate_hash(data)
|
561
|
-
|
562
|
-
|
563
|
-
'
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
flexmock(SecureRandom).should_receive(:uuid)
|
575
|
-
.and_return("13a0c028-bf7c-4ae2-ad03-ec09a40006df")
|
576
|
-
time = Time.parse("2017-10-15 15:00:23.34567890 UTC").to_i
|
577
|
-
driver.emit(sample_record.merge('request_id' => 'elastic'), time)
|
578
|
-
driver.run
|
579
|
-
assert_equal(Base64.strict_encode64(SecureRandom.uuid), index_cmds[1]["#{data["hash_id_key"]}"])
|
561
|
+
assert_raise_message(/Use bundled filer-elasticsearch-genid instead./) do
|
562
|
+
driver.configure(Fluent::Config::Element.new(
|
563
|
+
'ROOT', '', {
|
564
|
+
'@type' => 'elasticsearch',
|
565
|
+
'id_key' => data["hash_id_key"],
|
566
|
+
}, [
|
567
|
+
Fluent::Config::Element.new('hash', '', {
|
568
|
+
'keys' => ['request_id'],
|
569
|
+
'hash_id_key' => data["hash_id_key"],
|
570
|
+
}, [])
|
571
|
+
]
|
572
|
+
))
|
573
|
+
end
|
580
574
|
end
|
581
575
|
end
|
582
576
|
|
@@ -313,26 +313,19 @@ class ElasticsearchOutputDynamic < Test::Unit::TestCase
|
|
313
313
|
"custom hash_id_key" => {"hash_id_key" => '_hash_id'},
|
314
314
|
)
|
315
315
|
def test_writes_with_genrate_hash(data)
|
316
|
-
|
317
|
-
|
318
|
-
'
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
stub_elastic
|
330
|
-
flexmock(SecureRandom).should_receive(:uuid)
|
331
|
-
.and_return("82120f33-897a-4d9d-b3d5-14afd18fb412")
|
332
|
-
time = Time.parse("2017-10-15 15:00:23.34567890 UTC").to_i
|
333
|
-
driver.emit(sample_record.merge('request_id' => 'elastic'), time)
|
334
|
-
driver.run
|
335
|
-
assert_equal(Base64.strict_encode64(SecureRandom.uuid), index_cmds[1]["#{data["hash_id_key"]}"])
|
316
|
+
assert_raise_message(/Use bundled filer-elasticsearch-genid instead./) do
|
317
|
+
driver.configure(Fluent::Config::Element.new(
|
318
|
+
'ROOT', '', {
|
319
|
+
'@type' => 'elasticsearch',
|
320
|
+
'id_key' => data["hash_id_key"],
|
321
|
+
}, [
|
322
|
+
Fluent::Config::Element.new('hash', '', {
|
323
|
+
'keys' => ['request_id'],
|
324
|
+
'hash_id_key' => data["hash_id_key"],
|
325
|
+
}, [])
|
326
|
+
]
|
327
|
+
))
|
328
|
+
end
|
336
329
|
end
|
337
330
|
end
|
338
331
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.12.0.rc.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- diogo
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-11-
|
12
|
+
date: 2017-11-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -147,10 +147,12 @@ files:
|
|
147
147
|
- lib/fluent/plugin/elasticsearch_constants.rb
|
148
148
|
- lib/fluent/plugin/elasticsearch_error_handler.rb
|
149
149
|
- lib/fluent/plugin/elasticsearch_index_template.rb
|
150
|
+
- lib/fluent/plugin/filter_elasticsearch_genid.rb
|
150
151
|
- lib/fluent/plugin/generate_hash_id_support.rb
|
151
152
|
- lib/fluent/plugin/out_elasticsearch.rb
|
152
153
|
- lib/fluent/plugin/out_elasticsearch_dynamic.rb
|
153
154
|
- test/helper.rb
|
155
|
+
- test/plugin/test_filter_elasticsearch_genid.rb
|
154
156
|
- test/plugin/test_out_elasticsearch.rb
|
155
157
|
- test/plugin/test_out_elasticsearch_dynamic.rb
|
156
158
|
- test/plugin/test_template.json
|
@@ -169,9 +171,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
169
171
|
version: '2.0'
|
170
172
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
173
|
requirements:
|
172
|
-
- - "
|
174
|
+
- - ">"
|
173
175
|
- !ruby/object:Gem::Version
|
174
|
-
version:
|
176
|
+
version: 1.3.1
|
175
177
|
requirements: []
|
176
178
|
rubyforge_project:
|
177
179
|
rubygems_version: 2.6.13
|
@@ -180,6 +182,7 @@ specification_version: 4
|
|
180
182
|
summary: ElasticSearch output plugin for Fluent event collector
|
181
183
|
test_files:
|
182
184
|
- test/helper.rb
|
185
|
+
- test/plugin/test_filter_elasticsearch_genid.rb
|
183
186
|
- test/plugin/test_out_elasticsearch.rb
|
184
187
|
- test/plugin/test_out_elasticsearch_dynamic.rb
|
185
188
|
- test/plugin/test_template.json
|