fluent-plugin-elasticsearch 5.0.2 → 5.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa89d08181943944c6bda419dcf34ef9f1bbe94b67bb270c293d8a0fa03dced0
4
- data.tar.gz: d276c5a35783d83608032740c69e2d33e9b6462f507b83e4ff393feb4ed1d151
3
+ metadata.gz: 89dfb07388fcdb941bd3dc13a9e481fcb2ecf8f4a0b3c828b6a539f13c90e8ee
4
+ data.tar.gz: ee8ac22c05d144076e62311e013fea483d72493d6066f87712b5c325f3c215e7
5
5
  SHA512:
6
- metadata.gz: 28c78fb744db4ae40a31f27a4d25fed52d278cfc8cf39c5d5bcbcf129302c1a2157e609179ecfa6693465454c131834bb04d6f40b42a46c6cdf45e71f3036291
7
- data.tar.gz: f19746088603311f3aa06787fcc219e810ef97762ae72563b557a00ad29518a06f4c641a951913be3865b86cae7cda2329856c4d2317a0a8a2360109aa8ffca5
6
+ metadata.gz: a69fbfa9c0fab591f55656d87cc99aa003aa4e3b7a7d1b2819bd385e70d0c67d7b91dfff0fa42cfeafe94b6fe8f467b069ed075a2776809d0615f6e243f8c355
7
+ data.tar.gz: 572585eeb9e939d2b08e5fe0590bd25f02b8390a730d9f3012c44604b33123d9372cf59ac50a108119b413f3d718e2a606a0b21dc62f676199329e3b12c6073c
@@ -8,7 +8,7 @@ jobs:
8
8
  strategy:
9
9
  fail-fast: false
10
10
  matrix:
11
- ruby: [ '2.5', '2.6', '2.7', '3.0' ]
11
+ ruby: [ '2.6', '2.7', '3.0' ]
12
12
  os:
13
13
  - ubuntu-latest
14
14
  name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
@@ -8,7 +8,7 @@ jobs:
8
8
  strategy:
9
9
  fail-fast: false
10
10
  matrix:
11
- ruby: [ '2.5', '2.6', '2.7', '3.0' ]
11
+ ruby: [ '2.6', '2.7', '3.0' ]
12
12
  os:
13
13
  - macOS-latest
14
14
  name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
@@ -8,7 +8,7 @@ jobs:
8
8
  strategy:
9
9
  fail-fast: false
10
10
  matrix:
11
- ruby: [ '2.5', '2.6', '2.7', '3.0' ]
11
+ ruby: [ '2.6', '2.7', '3.0' ]
12
12
  os:
13
13
  - windows-latest
14
14
  name: Ruby ${{ matrix.ruby }} unit testing on ${{ matrix.os }}
data/History.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  ### [Unreleased]
4
4
 
5
+ ### 5.1.0
6
+ - Correct default target bytes value (#914)
7
+ - Handle elasticsearch-ruby 7.14 properly (#913)
8
+
9
+ ### 5.0.5
10
+ - Drop json_parse_exception messages for bulk failures (#900)
11
+ - GitHub Actions: Drop Ruby 2.5 due to EOL (#894)
12
+
13
+ ### 5.0.4
14
+ - test: out_elasticsearch: Remove a needless headers from affinity stub (#888)
15
+ - Target Index Affinity (#883)
16
+
17
+ ### 5.0.3
18
+ - Fix use_legacy_template documentation (#880)
19
+ - Add FAQ for dynamic index/template (#878)
20
+ - Handle IPv6 address string on host and hosts parameters (#877)
21
+
5
22
  ### 5.0.2
6
23
  - GitHub Actions: Tweak Ruby versions on test (#875)
7
24
  - test: datastreams: Set nonexistent datastream as default (#874)
@@ -10,6 +10,7 @@
10
10
  + [Random 400 - Rejected by Elasticsearch is occured, why?](#random-400---rejected-by-elasticsearch-is-occured-why)
11
11
  + [Fluentd seems to hang if it unable to connect Elasticsearch, why?](#fluentd-seems-to-hang-if-it-unable-to-connect-elasticsearch-why)
12
12
  + [Enable Index Lifecycle Management](#enable-index-lifecycle-management)
13
+ + [Configuring for dynamic index or template](#configuring-for-dynamic-index-or-template)
13
14
  + [How to specify index codec](#how-to-specify-index-codec)
14
15
  + [Cannot push logs to Elasticsearch with connect_write timeout reached, why?](#cannot-push-logs-to-elasticsearch-with-connect_write-timeout-reached-why)
15
16
 
@@ -524,6 +525,96 @@ template_name your-fluentd-template
524
525
  template_file /path/to/fluentd-template.json
525
526
  ```
526
527
 
528
+ #### Configuring for dynamic index or template
529
+
530
+ Some users want to setup ILM for dynamic index/template.
531
+ `index_petterns` and `template.settings.index.lifecycle.name` in Elasticsearch template will be overwritten by Elasticsearch plugin:
532
+
533
+ ```json
534
+ {
535
+ "index_patterns": ["mock"],
536
+ "template": {
537
+ "settings": {
538
+ "index": {
539
+ "lifecycle": {
540
+ "name": "mock",
541
+ "rollover_alias": "mock"
542
+ },
543
+ "number_of_shards": "<<shard>>",
544
+ "number_of_replicas": "<<replica>>"
545
+ }
546
+ }
547
+ }
548
+ }
549
+ ```
550
+
551
+ This template will be handled with:
552
+
553
+ ```aconf
554
+ <source>
555
+ @type http
556
+ port 5004
557
+ bind 0.0.0.0
558
+ body_size_limit 32m
559
+ keepalive_timeout 10s
560
+ <parse>
561
+ @type json
562
+ </parse>
563
+ </source>
564
+
565
+ <match kubernetes.var.log.containers.**etl-webserver**.log>
566
+ @type elasticsearch
567
+ @id out_es_etl_webserver
568
+ @log_level info
569
+ include_tag_key true
570
+ host $HOST
571
+ port $PORT
572
+ path "#{ENV['FLUENT_ELASTICSEARCH_PATH']}"
573
+ request_timeout "#{ENV['FLUENT_ELASTICSEARCH_REQUEST_TIMEOUT'] || '30s'}"
574
+ scheme "#{ENV['FLUENT_ELASTICSEARCH_SCHEME'] || 'http'}"
575
+ ssl_verify "#{ENV['FLUENT_ELASTICSEARCH_SSL_VERIFY'] || 'true'}"
576
+ ssl_version "#{ENV['FLUENT_ELASTICSEARCH_SSL_VERSION'] || 'TLSv1'}"
577
+ reload_connections "#{ENV['FLUENT_ELASTICSEARCH_RELOAD_CONNECTIONS'] || 'false'}"
578
+ reconnect_on_error "#{ENV['FLUENT_ELASTICSEARCH_RECONNECT_ON_ERROR'] || 'true'}"
579
+ reload_on_failure "#{ENV['FLUENT_ELASTICSEARCH_RELOAD_ON_FAILURE'] || 'true'}"
580
+ log_es_400_reason "#{ENV['FLUENT_ELASTICSEARCH_LOG_ES_400_REASON'] || 'false'}"
581
+ logstash_prefix "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_PREFIX'] || 'etl-webserver'}"
582
+ logstash_format "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_FORMAT'] || 'false'}"
583
+ index_name "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_INDEX_NAME'] || 'etl-webserver'}"
584
+ type_name "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_TYPE_NAME'] || 'fluentd'}"
585
+ time_key "#{ENV['FLUENT_ELASTICSEARCH_TIME_KEY'] || '@timestamp'}"
586
+ include_timestamp "#{ENV['FLUENT_ELASTICSEARCH_INCLUDE_TIMESTAMP'] || 'true'}"
587
+
588
+ # ILM Settings - WITH ROLLOVER support
589
+ # https://github.com/uken/fluent-plugin-elasticsearch#enable-index-lifecycle-management
590
+ application_name "etl-webserver"
591
+ index_date_pattern ""
592
+ # Policy configurations
593
+ enable_ilm true
594
+ ilm_policy_id etl-webserver
595
+ ilm_policy_overwrite true
596
+ ilm_policy {"policy": {"phases": {"hot": {"min_age": "0ms","actions": {"rollover": {"max_age": "5m","max_size": "3gb"},"set_priority": {"priority": 100}}},"delete": {"min_age": "30d","actions": {"delete": {"delete_searchable_snapshot": true}}}}}}
597
+ use_legacy_template false
598
+ template_name etl-webserver
599
+ template_file /configs/index-template.json
600
+ template_overwrite true
601
+ customize_template {"<<shard>>": "3","<<replica>>": "0"}
602
+
603
+ <buffer>
604
+ flush_thread_count "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_FLUSH_THREAD_COUNT'] || '8'}"
605
+ flush_interval "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_FLUSH_INTERVAL'] || '5s'}"
606
+ chunk_limit_size "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_CHUNK_LIMIT_SIZE'] || '8MB'}"
607
+ total_limit_size "#{ENV['FLUENT_ELASTICSEARCH_TOTAL_LIMIT_SIZE'] || '450MB'}"
608
+ queue_limit_length "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_QUEUE_LIMIT_LENGTH'] || '32'}"
609
+ retry_max_interval "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_RETRY_MAX_INTERVAL'] || '60s'}"
610
+ retry_forever false
611
+ </buffer>
612
+ </match>
613
+ ```
614
+
615
+ For more details, please refer the discussion:
616
+ https://github.com/uken/fluent-plugin-elasticsearch/issues/867
617
+
527
618
  ### How to specify index codec
528
619
 
529
620
  Elasticsearch can handle compression methods for stored data such as LZ4 and best_compression.
data/README.md CHANGED
@@ -11,7 +11,7 @@ Send your logs to Elasticsearch (and search them with Kibana maybe?)
11
11
 
12
12
  Note: For Amazon Elasticsearch Service please consider using [fluent-plugin-aws-elasticsearch-service](https://github.com/atomita/fluent-plugin-aws-elasticsearch-service)
13
13
 
14
- Current maintainers: @cosmo0920
14
+ Current maintainers: [Hiroshi Hatake | @cosmo0920](https://github.com/cosmo0920), [Kentaro Hayashi | @kenhys](https://github.com/kenhys)
15
15
 
16
16
  * [Installation](#installation)
17
17
  * [Usage](#usage)
@@ -38,6 +38,7 @@ Current maintainers: @cosmo0920
38
38
  + [suppress_type_name](#suppress_type_name)
39
39
  + [target_index_key](#target_index_key)
40
40
  + [target_type_key](#target_type_key)
41
+ + [target_index_affinity](#target_index_affinity)
41
42
  + [template_name](#template_name)
42
43
  + [template_file](#template_file)
43
44
  + [template_overwrite](#template_overwrite)
@@ -171,6 +172,24 @@ You can specify Elasticsearch host by this parameter.
171
172
 
172
173
  **Note:** Since v3.3.2, `host` parameter supports builtin placeholders. If you want to send events dynamically into different hosts at runtime with `elasticsearch_dynamic` output plugin, please consider to switch to use plain `elasticsearch` output plugin. In more detail for builtin placeholders, please refer to [Placeholders](#placeholders) section.
173
174
 
175
+ To use IPv6 address on `host` parameter, you can use the following styles:
176
+
177
+ #### string style
178
+
179
+ To use string style, you must quote IPv6 address due to prevent to be interpreted as JSON:
180
+
181
+ ```
182
+ host "[2404:7a80:d440:3000:192a:a292:bd7f:ca10]"
183
+ ```
184
+
185
+ #### raw style
186
+
187
+ You can also specify raw IPv6 address. This will be handled as `[specified IPv6 address]`:
188
+
189
+ ```
190
+ host 2404:7a80:d440:3000:192a:a292:bd7f:ca10
191
+ ```
192
+
174
193
  ### port
175
194
 
176
195
  ```
@@ -237,6 +256,16 @@ hosts host1:port1,host2:port2,host3 # port3 is 9200
237
256
 
238
257
  **Note:** Up until v2.8.5, it was allowed to embed the username/password in the URL. However, this syntax is deprecated as of v2.8.6 because it was found to cause serious connection problems (See #394). Please migrate your settings to use the `user` and `password` field (described below) instead.
239
258
 
259
+ #### IPv6 addresses
260
+
261
+ When you want to specify IPv6 addresses, you must specify schema together:
262
+
263
+ ```
264
+ hosts http://[2404:7a80:d440:3000:de:7311:6329:2e6c]:port1,http://[2404:7a80:d440:3000:de:7311:6329:1e6c]:port2,http://[2404:7a80:d440:3000:de:6311:6329:2e6c]:port3
265
+ ```
266
+
267
+ If you don't specify hosts with schema together, Elasticsearch plugin complains Invalid URI for them.
268
+
240
269
  ### user, password, path, scheme, ssl_verify
241
270
 
242
271
  ```
@@ -426,6 +455,75 @@ and this record will be written to the specified index (`logstash-2014.12.19`) r
426
455
 
427
456
  Similar to `target_index_key` config, find the type name to write to in the record under this key (or nested record). If key not found in record - fallback to `type_name` (default "fluentd").
428
457
 
458
+ ### target_index_affinity
459
+
460
+ Enable plugin to dynamically select logstash time based target index in update/upsert operations based on already indexed data rather than current time of indexing.
461
+
462
+ ```
463
+ target_index_affinity true # defaults to false
464
+ ```
465
+
466
+ By default plugin writes data of logstash format index based on current time. For example daily based index after mignight data is written to newly created index. This is normally ok when data is coming from single source and not updated after indexing.
467
+
468
+ But if you have a use case where data is also updated after indexing and `id_key` is used to identify the document uniquely for updating. Logstash format is wanted to be used for easy data managing and retention. Updates are done right after indexing to complete the data (all data not available from single source) and no updates are done anymore later point on time. In this case problem happends at index rotation time where write to 2 indexes with same id_key value may happen.
469
+
470
+ This setting will search existing data by using elastic search's [id query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html) using `id_key` value (with logstash_prefix and logstash_prefix_separator index pattarn e.g. `logstash-*`). The index of found data is used for update/upsert. When no data is found, data is written to current logstash index as normally.
471
+
472
+ This setting requires following other settings:
473
+ ```
474
+ logstash_format true
475
+ id_key myId # Some field on your data to identify the data uniquely
476
+ write_operation upsert # upsert or update
477
+ ```
478
+
479
+ Suppose you have the following situation where you have 2 different match to consume data from 2 different Kafka topics independently but close in time with each other (order not known).
480
+
481
+ ```
482
+ <match data1>
483
+ @type elasticsearch
484
+ ...
485
+ id_key myId
486
+ write_operation upsert
487
+ logstash_format true
488
+ logstash_dateformat %Y.%m.%d
489
+ logstash_prefix myindexprefix
490
+ target_index_affinity true
491
+ ...
492
+
493
+ <match data2>
494
+ @type elasticsearch
495
+ ...
496
+ id_key myId
497
+ write_operation upsert
498
+ logstash_format true
499
+ logstash_dateformat %Y.%m.%d
500
+ logstash_prefix myindexprefix
501
+ target_index_affinity true
502
+ ...
503
+ ```
504
+
505
+ If your first (data1) input is:
506
+ ```
507
+ {
508
+ "myId": "myuniqueId1",
509
+ "datafield1": "some value",
510
+ }
511
+ ```
512
+
513
+ and your second (data2) input is:
514
+ ```
515
+ {
516
+ "myId": "myuniqueId1",
517
+ "datafield99": "some important data from other source tightly related to id myuniqueId1 and wanted to be in same document.",
518
+ }
519
+ ```
520
+
521
+ Date today is 10.05.2021 so data is written to index `myindexprefix-2021.05.10` when both data1 and data2 is consumed during today.
522
+ But when we are close to index rotation and data1 is consumed and indexed at `2021-05-10T23:59:55.59707672Z` and data2
523
+ is consumed a bit later at `2021-05-11T00:00:58.222079Z` i.e. logstash index has been rotated and normally data2 would have been written
524
+ to index `myindexprefix-2021.05.11`. But with target_index_affinity setting as value true, data2 is now written to index `myindexprefix-2021.05.10`
525
+ into same document with data1 as wanted and duplicated document is avoided.
526
+
429
527
  ### template_name
430
528
 
431
529
  The name of the template to define. If a template by the name given is already present, it will be left unchanged, unless [template_overwrite](#template_overwrite) is set, in which case the template will be updated.
@@ -1325,9 +1423,9 @@ Default value is `nil`.
1325
1423
 
1326
1424
  Use legacy template or not.
1327
1425
 
1328
- Elasticsearch 7.8 or later supports the brand new composable templates.
1426
+ For Elasticsearch 7.8 or later, users can specify this parameter as `false` if their [template_file](#template_file) contains a composable index template.
1329
1427
 
1330
- For Elasticsearch 7.7 or older, users should specify this parameter as `false`.
1428
+ For Elasticsearch 7.7 or older, users should specify this parameter as `true`.
1331
1429
 
1332
1430
  Composable template documentation is [Put Index Template API | Elasticsearch Reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html) and legacy template documentation is [Index Templates | Elasticsearch Reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates-v1.html).
1333
1431
 
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '5.0.2'
6
+ s.version = '5.1.0'
7
7
  s.authors = ['diogo', 'pitr', 'Hiroshi Hatake']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com', 'cosmo0920.wp@gmail.com']
9
9
  s.description = %q{Elasticsearch output plugin for Fluent event collector}
@@ -23,6 +23,10 @@ class Fluent::Plugin::ElasticsearchErrorHandler
23
23
  unrecoverable_error_types.include?(type)
24
24
  end
25
25
 
26
+ def unrecoverable_record_error?(type)
27
+ ['json_parse_exception'].include?(type)
28
+ end
29
+
26
30
  def log_es_400_reason(&block)
27
31
  if @plugin.log_es_400_reason
28
32
  block.call
@@ -43,15 +47,17 @@ class Fluent::Plugin::ElasticsearchErrorHandler
43
47
  stats = Hash.new(0)
44
48
  meta = {}
45
49
  header = {}
50
+ affinity_target_indices = @plugin.get_affinity_target_indices(chunk)
46
51
  chunk.msgpack_each do |time, rawrecord|
47
52
  bulk_message = ''
48
53
  next unless rawrecord.is_a? Hash
49
54
  begin
50
55
  # we need a deep copy for process_message to alter
51
56
  processrecord = Marshal.load(Marshal.dump(rawrecord))
52
- meta, header, record = @plugin.process_message(tag, meta, header, time, processrecord, extracted_values)
57
+ meta, header, record = @plugin.process_message(tag, meta, header, time, processrecord, affinity_target_indices, extracted_values)
53
58
  next unless @plugin.append_record_to_messages(@plugin.write_operation, meta, header, record, bulk_message)
54
59
  rescue => e
60
+ @plugin.log.debug("Exception in error handler during deep copy: #{e}")
55
61
  stats[:bad_chunk_record] += 1
56
62
  next
57
63
  end
@@ -105,10 +111,15 @@ class Fluent::Plugin::ElasticsearchErrorHandler
105
111
  elsif item[write_operation].has_key?('error') && item[write_operation]['error'].has_key?('type')
106
112
  type = item[write_operation]['error']['type']
107
113
  stats[type] += 1
108
- retry_stream.add(time, rawrecord)
109
114
  if unrecoverable_error?(type)
110
115
  raise ElasticsearchRequestAbortError, "Rejected Elasticsearch due to #{type}"
111
116
  end
117
+ if unrecoverable_record_error?(type)
118
+ @plugin.router.emit_error_event(tag, time, rawrecord, ElasticsearchError.new("#{status} - #{type}: #{reason}"))
119
+ next
120
+ else
121
+ retry_stream.add(time, rawrecord) unless unrecoverable_record_error?(type)
122
+ end
112
123
  else
113
124
  # When we don't have a type field, something changed in the API
114
125
  # expected return values (ES 2.x)
@@ -32,13 +32,25 @@ module Fluent::ElasticsearchIndexTemplate
32
32
  return false
33
33
  end
34
34
 
35
+ def host_unreachable_exceptions
36
+ if Gem::Version.new(::Elasticsearch::Transport::VERSION) >= Gem::Version.new("7.14.0")
37
+ # elasticsearch-ruby 7.14.0's elasticsearch-transport does not extends
38
+ # Elasticsearch class on Transport.
39
+ # This is why #host_unreachable_exceptions is not callable directly
40
+ # via transport (not transport's transport instance accessor) any more.
41
+ client.transport.transport.host_unreachable_exceptions
42
+ else
43
+ client.transport.host_unreachable_exceptions
44
+ end
45
+ end
46
+
35
47
  def retry_operate(max_retries, fail_on_retry_exceed = true, catch_trasport_exceptions = true)
36
48
  return unless block_given?
37
49
  retries = 0
38
50
  transport_errors = Elasticsearch::Transport::Transport::Errors.constants.map{ |c| Elasticsearch::Transport::Transport::Errors.const_get c } if catch_trasport_exceptions
39
51
  begin
40
52
  yield
41
- rescue *client.transport.host_unreachable_exceptions, *transport_errors, Timeout::Error => e
53
+ rescue *host_unreachable_exceptions, *transport_errors, Timeout::Error => e
42
54
  @_es = nil
43
55
  @_es_info = nil
44
56
  if retries < max_retries
@@ -2,6 +2,7 @@
2
2
  require 'date'
3
3
  require 'excon'
4
4
  require 'elasticsearch'
5
+ require 'set'
5
6
  begin
6
7
  require 'elasticsearch/xpack'
7
8
  rescue LoadError
@@ -13,6 +14,7 @@ begin
13
14
  require 'strptime'
14
15
  rescue LoadError
15
16
  end
17
+ require 'resolv'
16
18
 
17
19
  require 'fluent/plugin/output'
18
20
  require 'fluent/event'
@@ -70,7 +72,7 @@ module Fluent::Plugin
70
72
  DEFAULT_TYPE_NAME_ES_7x = "_doc".freeze
71
73
  DEFAULT_TYPE_NAME = "fluentd".freeze
72
74
  DEFAULT_RELOAD_AFTER = -1
73
- TARGET_BULK_BYTES = 20 * 1024 * 1024
75
+ DEFAULT_TARGET_BULK_BYTES = -1
74
76
  DEFAULT_POLICY_ID = "logstash-policy"
75
77
 
76
78
  config_param :host, :string, :default => 'localhost'
@@ -164,7 +166,7 @@ EOC
164
166
  config_param :suppress_doc_wrap, :bool, :default => false
165
167
  config_param :ignore_exceptions, :array, :default => [], value_type: :string, :desc => "Ignorable exception list"
166
168
  config_param :exception_backup, :bool, :default => true, :desc => "Chunk backup flag when ignore exception occured"
167
- config_param :bulk_message_request_threshold, :size, :default => TARGET_BULK_BYTES
169
+ config_param :bulk_message_request_threshold, :size, :default => DEFAULT_TARGET_BULK_BYTES
168
170
  config_param :compression_level, :enum, list: [:no_compression, :best_speed, :best_compression, :default_compression], :default => :no_compression
169
171
  config_param :enable_ilm, :bool, :default => false
170
172
  config_param :ilm_policy_id, :string, :default => DEFAULT_POLICY_ID
@@ -174,6 +176,7 @@ EOC
174
176
  config_param :truncate_caches_interval, :time, :default => nil
175
177
  config_param :use_legacy_template, :bool, :default => true
176
178
  config_param :catch_transport_exception_on_retry, :bool, :default => true
179
+ config_param :target_index_affinity, :bool, :default => false
177
180
 
178
181
  config_section :metadata, param_name: :metainfo, multi: false do
179
182
  config_param :include_chunk_id, :bool, :default => false
@@ -668,7 +671,11 @@ EOC
668
671
  end
669
672
  end.compact
670
673
  else
671
- [{host: @host, port: @port, scheme: @scheme.to_s}]
674
+ if Resolv::IPv6::Regex.match(@host)
675
+ [{host: "[#{@host}]", scheme: @scheme.to_s, port: @port}]
676
+ else
677
+ [{host: @host, port: @port, scheme: @scheme.to_s}]
678
+ end
672
679
  end.each do |host|
673
680
  host.merge!(user: @user, password: @password) if !host[:user] && @user
674
681
  host.merge!(path: @path) if !host[:path] && @path
@@ -829,13 +836,14 @@ EOC
829
836
  extract_placeholders(@host, chunk)
830
837
  end
831
838
 
839
+ affinity_target_indices = get_affinity_target_indices(chunk)
832
840
  chunk.msgpack_each do |time, record|
833
841
  next unless record.is_a? Hash
834
842
 
835
843
  record = inject_chunk_id_to_record_if_needed(record, chunk_id)
836
844
 
837
845
  begin
838
- meta, header, record = process_message(tag, meta, header, time, record, extracted_values)
846
+ meta, header, record = process_message(tag, meta, header, time, record, affinity_target_indices, extracted_values)
839
847
  info = if @include_index_in_url
840
848
  RequestInfo.new(host, meta.delete("_index".freeze), meta["_index".freeze], meta.delete("_alias".freeze))
841
849
  else
@@ -872,6 +880,42 @@ EOC
872
880
  end
873
881
  end
874
882
 
883
+ def target_index_affinity_enabled?()
884
+ @target_index_affinity && @logstash_format && @id_key && (@write_operation == UPDATE_OP || @write_operation == UPSERT_OP)
885
+ end
886
+
887
+ def get_affinity_target_indices(chunk)
888
+ indices = Hash.new
889
+ if target_index_affinity_enabled?()
890
+ id_key_accessor = record_accessor_create(@id_key)
891
+ ids = Set.new
892
+ chunk.msgpack_each do |time, record|
893
+ next unless record.is_a? Hash
894
+ begin
895
+ ids << id_key_accessor.call(record)
896
+ end
897
+ end
898
+ log.debug("Find affinity target_indices by quering on ES (write_operation #{@write_operation}) for ids: #{ids.to_a}")
899
+ options = {
900
+ :index => "#{logstash_prefix}#{@logstash_prefix_separator}*",
901
+ }
902
+ query = {
903
+ 'query' => { 'ids' => { 'values' => ids.to_a } },
904
+ '_source' => false,
905
+ 'sort' => [
906
+ {"_index" => {"order" => "desc"}}
907
+ ]
908
+ }
909
+ result = client.search(options.merge(:body => Yajl.dump(query)))
910
+ # There should be just one hit per _id, but in case there still is multiple, just the oldest index is stored to map
911
+ result['hits']['hits'].each do |hit|
912
+ indices[hit["_id"]] = hit["_index"]
913
+ log.debug("target_index for id: #{hit["_id"]} from es: #{hit["_index"]}")
914
+ end
915
+ end
916
+ indices
917
+ end
918
+
875
919
  def split_request?(bulk_message, info)
876
920
  # For safety.
877
921
  end
@@ -884,7 +928,7 @@ EOC
884
928
  false
885
929
  end
886
930
 
887
- def process_message(tag, meta, header, time, record, extracted_values)
931
+ def process_message(tag, meta, header, time, record, affinity_target_indices, extracted_values)
888
932
  logstash_prefix, logstash_dateformat, index_name, type_name, _template_name, _customize_template, _deflector_alias, application_name, pipeline, _ilm_policy_id = extracted_values
889
933
 
890
934
  if @flatten_hashes
@@ -925,6 +969,15 @@ EOC
925
969
  record[@tag_key] = tag
926
970
  end
927
971
 
972
+ # If affinity target indices map has value for this particular id, use it as target_index
973
+ if !affinity_target_indices.empty?
974
+ id_accessor = record_accessor_create(@id_key)
975
+ id_value = id_accessor.call(record)
976
+ if affinity_target_indices.key?(id_value)
977
+ target_index = affinity_target_indices[id_value]
978
+ end
979
+ end
980
+
928
981
  target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
929
982
  if target_type_parent && target_type_parent[target_type_child_key]
930
983
  target_type = target_type_parent.delete(target_type_child_key)