fluent-plugin-elasticsearch 4.3.3 → 5.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'fluent-plugin-elasticsearch'
6
- s.version = '4.3.3'
6
+ s.version = '5.0.4'
7
7
  s.authors = ['diogo', 'pitr', 'Hiroshi Hatake']
8
8
  s.email = ['pitr.vern@gmail.com', 'me@diogoterror.com', 'cosmo0920.wp@gmail.com']
9
9
  s.description = %q{Elasticsearch output plugin for Fluent event collector}
@@ -28,6 +28,7 @@ Gem::Specification.new do |s|
28
28
 
29
29
 
30
30
  s.add_development_dependency 'rake', '>= 0'
31
+ s.add_development_dependency 'webrick', '~> 1.7.0'
31
32
  s.add_development_dependency 'webmock', '~> 3'
32
33
  s.add_development_dependency 'test-unit', '~> 3.3.0'
33
34
  s.add_development_dependency 'minitest', '~> 5.8'
@@ -43,13 +43,14 @@ class Fluent::Plugin::ElasticsearchErrorHandler
43
43
  stats = Hash.new(0)
44
44
  meta = {}
45
45
  header = {}
46
+ affinity_target_indices = @plugin.get_affinity_target_indices(chunk)
46
47
  chunk.msgpack_each do |time, rawrecord|
47
48
  bulk_message = ''
48
49
  next unless rawrecord.is_a? Hash
49
50
  begin
50
51
  # we need a deep copy for process_message to alter
51
52
  processrecord = Marshal.load(Marshal.dump(rawrecord))
52
- meta, header, record = @plugin.process_message(tag, meta, header, time, processrecord, extracted_values)
53
+ meta, header, record = @plugin.process_message(tag, meta, header, time, processrecord, affinity_target_indices, extracted_values)
53
54
  next unless @plugin.append_record_to_messages(@plugin.write_operation, meta, header, record, bulk_message)
54
55
  rescue => e
55
56
  stats[:bad_chunk_record] += 1
@@ -2,6 +2,7 @@
2
2
  require 'date'
3
3
  require 'excon'
4
4
  require 'elasticsearch'
5
+ require 'set'
5
6
  begin
6
7
  require 'elasticsearch/xpack'
7
8
  rescue LoadError
@@ -13,6 +14,7 @@ begin
13
14
  require 'strptime'
14
15
  rescue LoadError
15
16
  end
17
+ require 'resolv'
16
18
 
17
19
  require 'fluent/plugin/output'
18
20
  require 'fluent/event'
@@ -174,6 +176,7 @@ EOC
174
176
  config_param :truncate_caches_interval, :time, :default => nil
175
177
  config_param :use_legacy_template, :bool, :default => true
176
178
  config_param :catch_transport_exception_on_retry, :bool, :default => true
179
+ config_param :target_index_affinity, :bool, :default => false
177
180
 
178
181
  config_section :metadata, param_name: :metainfo, multi: false do
179
182
  config_param :include_chunk_id, :bool, :default => false
@@ -668,7 +671,11 @@ EOC
668
671
  end
669
672
  end.compact
670
673
  else
671
- [{host: @host, port: @port, scheme: @scheme.to_s}]
674
+ if Resolv::IPv6::Regex.match(@host)
675
+ [{host: "[#{@host}]", scheme: @scheme.to_s, port: @port}]
676
+ else
677
+ [{host: @host, port: @port, scheme: @scheme.to_s}]
678
+ end
672
679
  end.each do |host|
673
680
  host.merge!(user: @user, password: @password) if !host[:user] && @user
674
681
  host.merge!(path: @path) if !host[:path] && @path
@@ -829,13 +836,14 @@ EOC
829
836
  extract_placeholders(@host, chunk)
830
837
  end
831
838
 
839
+ affinity_target_indices = get_affinity_target_indices(chunk)
832
840
  chunk.msgpack_each do |time, record|
833
841
  next unless record.is_a? Hash
834
842
 
835
843
  record = inject_chunk_id_to_record_if_needed(record, chunk_id)
836
844
 
837
845
  begin
838
- meta, header, record = process_message(tag, meta, header, time, record, extracted_values)
846
+ meta, header, record = process_message(tag, meta, header, time, record, affinity_target_indices, extracted_values)
839
847
  info = if @include_index_in_url
840
848
  RequestInfo.new(host, meta.delete("_index".freeze), meta["_index".freeze], meta.delete("_alias".freeze))
841
849
  else
@@ -872,6 +880,42 @@ EOC
872
880
  end
873
881
  end
874
882
 
883
+ def target_index_affinity_enabled?()
884
+ @target_index_affinity && @logstash_format && @id_key && (@write_operation == UPDATE_OP || @write_operation == UPSERT_OP)
885
+ end
886
+
887
+ def get_affinity_target_indices(chunk)
888
+ indices = Hash.new
889
+ if target_index_affinity_enabled?()
890
+ id_key_accessor = record_accessor_create(@id_key)
891
+ ids = Set.new
892
+ chunk.msgpack_each do |time, record|
893
+ next unless record.is_a? Hash
894
+ begin
895
+ ids << id_key_accessor.call(record)
896
+ end
897
+ end
898
+ log.debug("Find affinity target_indices by quering on ES (write_operation #{@write_operation}) for ids: #{ids.to_a}")
899
+ options = {
900
+ :index => "#{logstash_prefix}#{@logstash_prefix_separator}*",
901
+ }
902
+ query = {
903
+ 'query' => { 'ids' => { 'values' => ids.to_a } },
904
+ '_source' => false,
905
+ 'sort' => [
906
+ {"_index" => {"order" => "desc"}}
907
+ ]
908
+ }
909
+ result = client.search(options.merge(:body => Yajl.dump(query)))
910
+ # There should be just one hit per _id, but in case there still is multiple, just the oldest index is stored to map
911
+ result['hits']['hits'].each do |hit|
912
+ indices[hit["_id"]] = hit["_index"]
913
+ log.debug("target_index for id: #{hit["_id"]} from es: #{hit["_index"]}")
914
+ end
915
+ end
916
+ indices
917
+ end
918
+
875
919
  def split_request?(bulk_message, info)
876
920
  # For safety.
877
921
  end
@@ -884,7 +928,7 @@ EOC
884
928
  false
885
929
  end
886
930
 
887
- def process_message(tag, meta, header, time, record, extracted_values)
931
+ def process_message(tag, meta, header, time, record, affinity_target_indices, extracted_values)
888
932
  logstash_prefix, logstash_dateformat, index_name, type_name, _template_name, _customize_template, _deflector_alias, application_name, pipeline, _ilm_policy_id = extracted_values
889
933
 
890
934
  if @flatten_hashes
@@ -925,6 +969,15 @@ EOC
925
969
  record[@tag_key] = tag
926
970
  end
927
971
 
972
+ # If affinity target indices map has value for this particular id, use it as target_index
973
+ if !affinity_target_indices.empty?
974
+ id_accessor = record_accessor_create(@id_key)
975
+ id_value = id_accessor.call(record)
976
+ if affinity_target_indices.key?(id_value)
977
+ target_index = affinity_target_indices[id_value]
978
+ end
979
+ end
980
+
928
981
  target_type_parent, target_type_child_key = @target_type_key ? get_parent_of(record, @target_type_key) : nil
929
982
  if target_type_parent && target_type_parent[target_type_child_key]
930
983
  target_type = target_type_parent.delete(target_type_child_key)
@@ -0,0 +1,218 @@
1
+ require_relative 'out_elasticsearch'
2
+
3
+ module Fluent::Plugin
4
+ class ElasticsearchOutputDataStream < ElasticsearchOutput
5
+
6
+ Fluent::Plugin.register_output('elasticsearch_data_stream', self)
7
+
8
+ helpers :event_emitter
9
+
10
+ config_param :data_stream_name, :string
11
+ # Elasticsearch 7.9 or later always support new style of index template.
12
+ config_set_default :use_legacy_template, false
13
+
14
+ INVALID_START_CHRACTERS = ["-", "_", "+", "."]
15
+ INVALID_CHARACTERS = ["\\", "/", "*", "?", "\"", "<", ">", "|", " ", ",", "#", ":"]
16
+
17
+ def configure(conf)
18
+ super
19
+
20
+ begin
21
+ require 'elasticsearch/api'
22
+ require 'elasticsearch/xpack'
23
+ rescue LoadError
24
+ raise Fluent::ConfigError, "'elasticsearch/api', 'elasticsearch/xpack' are required for <@elasticsearch_data_stream>."
25
+ end
26
+
27
+ # ref. https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-create-data-stream.html
28
+ unless placeholder?(:data_stream_name_placeholder, @data_stream_name)
29
+ validate_data_stream_name
30
+ else
31
+ @use_placeholder = true
32
+ @data_stream_names = []
33
+ end
34
+
35
+ @client = client
36
+ unless @use_placeholder
37
+ begin
38
+ @data_stream_names = [@data_stream_name]
39
+ create_ilm_policy(@data_stream_name)
40
+ create_index_template(@data_stream_name)
41
+ create_data_stream(@data_stream_name)
42
+ rescue => e
43
+ raise Fluent::ConfigError, "Failed to create data stream: <#{@data_stream_name}> #{e.message}"
44
+ end
45
+ end
46
+ end
47
+
48
+ def validate_data_stream_name
49
+ unless valid_data_stream_name?
50
+ unless start_with_valid_characters?
51
+ if not_dots?
52
+ raise Fluent::ConfigError, "'data_stream_name' must not start with #{INVALID_START_CHRACTERS.join(",")}: <#{@data_stream_name}>"
53
+ else
54
+ raise Fluent::ConfigError, "'data_stream_name' must not be . or ..: <#{@data_stream_name}>"
55
+ end
56
+ end
57
+ unless valid_characters?
58
+ raise Fluent::ConfigError, "'data_stream_name' must not contain invalid characters #{INVALID_CHARACTERS.join(",")}: <#{@data_stream_name}>"
59
+ end
60
+ unless lowercase_only?
61
+ raise Fluent::ConfigError, "'data_stream_name' must be lowercase only: <#{@data_stream_name}>"
62
+ end
63
+ if @data_stream_name.bytes.size > 255
64
+ raise Fluent::ConfigError, "'data_stream_name' must not be longer than 255 bytes: <#{@data_stream_name}>"
65
+ end
66
+ end
67
+ end
68
+
69
+ def create_ilm_policy(name)
70
+ return if data_stream_exist?(name)
71
+ params = {
72
+ policy_id: "#{name}_policy",
73
+ body: File.read(File.join(File.dirname(__FILE__), "default-ilm-policy.json"))
74
+ }
75
+ retry_operate(@max_retry_putting_template,
76
+ @fail_on_putting_template_retry_exceed,
77
+ @catch_transport_exception_on_retry) do
78
+ @client.xpack.ilm.put_policy(params)
79
+ end
80
+ end
81
+
82
+ def create_index_template(name)
83
+ return if data_stream_exist?(name)
84
+ body = {
85
+ "index_patterns" => ["#{name}*"],
86
+ "data_stream" => {},
87
+ "template" => {
88
+ "settings" => {
89
+ "index.lifecycle.name" => "#{name}_policy"
90
+ }
91
+ }
92
+ }
93
+ params = {
94
+ name: name,
95
+ body: body
96
+ }
97
+ retry_operate(@max_retry_putting_template,
98
+ @fail_on_putting_template_retry_exceed,
99
+ @catch_transport_exception_on_retry) do
100
+ @client.indices.put_index_template(params)
101
+ end
102
+ end
103
+
104
+ def data_stream_exist?(name)
105
+ params = {
106
+ "name": name
107
+ }
108
+ begin
109
+ response = @client.indices.get_data_stream(params)
110
+ return (not response.is_a?(Elasticsearch::Transport::Transport::Errors::NotFound))
111
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound => e
112
+ log.info "Specified data stream does not exist. Will be created: <#{e}>"
113
+ return false
114
+ end
115
+ end
116
+
117
+ def create_data_stream(name)
118
+ return if data_stream_exist?(name)
119
+ params = {
120
+ "name": name
121
+ }
122
+ retry_operate(@max_retry_putting_template,
123
+ @fail_on_putting_template_retry_exceed,
124
+ @catch_transport_exception_on_retry) do
125
+ @client.indices.create_data_stream(params)
126
+ end
127
+ end
128
+
129
+ def valid_data_stream_name?
130
+ lowercase_only? and
131
+ valid_characters? and
132
+ start_with_valid_characters? and
133
+ not_dots? and
134
+ @data_stream_name.bytes.size <= 255
135
+ end
136
+
137
+ def lowercase_only?
138
+ @data_stream_name.downcase == @data_stream_name
139
+ end
140
+
141
+ def valid_characters?
142
+ not (INVALID_CHARACTERS.each.any? do |v| @data_stream_name.include?(v) end)
143
+ end
144
+
145
+ def start_with_valid_characters?
146
+ not (INVALID_START_CHRACTERS.each.any? do |v| @data_stream_name.start_with?(v) end)
147
+ end
148
+
149
+ def not_dots?
150
+ not (@data_stream_name == "." or @data_stream_name == "..")
151
+ end
152
+
153
+ def client_library_version
154
+ Elasticsearch::VERSION
155
+ end
156
+
157
+ def multi_workers_ready?
158
+ true
159
+ end
160
+
161
+ def write(chunk)
162
+ data_stream_name = @data_stream_name
163
+ if @use_placeholder
164
+ data_stream_name = extract_placeholders(@data_stream_name, chunk)
165
+ unless @data_stream_names.include?(data_stream_name)
166
+ begin
167
+ create_ilm_policy(data_stream_name)
168
+ create_index_template(data_stream_name)
169
+ create_data_stream(data_stream_name)
170
+ @data_stream_names << data_stream_name
171
+ rescue => e
172
+ raise Fluent::ConfigError, "Failed to create data stream: <#{data_stream_name}> #{e.message}"
173
+ end
174
+ end
175
+ end
176
+
177
+ bulk_message = ""
178
+ headers = {
179
+ CREATE_OP => {}
180
+ }
181
+ tag = chunk.metadata.tag
182
+ chunk.msgpack_each do |time, record|
183
+ next unless record.is_a? Hash
184
+
185
+ begin
186
+ record.merge!({"@timestamp" => Time.at(time).iso8601(@time_precision)})
187
+ bulk_message = append_record_to_messages(CREATE_OP, {}, headers, record, bulk_message)
188
+ rescue => e
189
+ router.emit_error_event(tag, time, record, e)
190
+ end
191
+ end
192
+
193
+ params = {
194
+ index: data_stream_name,
195
+ body: bulk_message
196
+ }
197
+ begin
198
+ response = @client.bulk(params)
199
+ if response['errors']
200
+ log.error "Could not bulk insert to Data Stream: #{data_stream_name} #{response}"
201
+ end
202
+ rescue => e
203
+ log.error "Could not bulk insert to Data Stream: #{data_stream_name} #{e.message}"
204
+ end
205
+ end
206
+
207
+ def append_record_to_messages(op, meta, header, record, msgs)
208
+ header[CREATE_OP] = meta
209
+ msgs << @dump_proc.call(header) << BODY_DELIMITER
210
+ msgs << @dump_proc.call(record) << BODY_DELIMITER
211
+ msgs
212
+ end
213
+
214
+ def retry_stream_retryable?
215
+ @buffer.storable?
216
+ end
217
+ end
218
+ end
@@ -27,10 +27,15 @@ class TestElasticsearchErrorHandler < Test::Unit::TestCase
27
27
  @error_events << {:tag => tag, :time=>time, :record=>record, :error=>e}
28
28
  end
29
29
 
30
- def process_message(tag, meta, header, time, record, extracted_values)
30
+ def process_message(tag, meta, header, time, record, affinity_target_indices, extracted_values)
31
31
  return [meta, header, record]
32
32
  end
33
33
 
34
+ def get_affinity_target_indices(chunk)
35
+ indices = Hash.new
36
+ indices
37
+ end
38
+
34
39
  def append_record_to_messages(op, meta, header, record, msgs)
35
40
  if record.has_key?('raise') && record['raise']
36
41
  raise Exception('process_message')
@@ -10,7 +10,7 @@ class ElasticsearchOutputTest < Test::Unit::TestCase
10
10
  include FlexMock::TestCase
11
11
  include Fluent::Test::Helpers
12
12
 
13
- attr_accessor :index_cmds, :index_command_counts
13
+ attr_accessor :index_cmds, :index_command_counts, :index_cmds_all_requests
14
14
 
15
15
  def setup
16
16
  Fluent::Test.setup
@@ -70,6 +70,14 @@ class ElasticsearchOutputTest < Test::Unit::TestCase
70
70
  end
71
71
  end
72
72
 
73
+ def stub_elastic_all_requests(url="http://localhost:9200/_bulk")
74
+ @index_cmds_all_requests = Array.new
75
+ stub_request(:post, url).with do |req|
76
+ @index_cmds = req.body.split("\n").map {|r| JSON.parse(r) }
77
+ @index_cmds_all_requests << @index_cmds
78
+ end
79
+ end
80
+
73
81
  def stub_elastic_unavailable(url="http://localhost:9200/_bulk")
74
82
  stub_request(:post, url).to_return(:status => [503, "Service Unavailable"])
75
83
  end
@@ -3612,6 +3620,74 @@ class ElasticsearchOutputTest < Test::Unit::TestCase
3612
3620
  assert_equal '/default_path', host2[:path]
3613
3621
  end
3614
3622
 
3623
+ class IPv6AdressStringHostsTest < self
3624
+ def test_legacy_hosts_list
3625
+ config = %{
3626
+ hosts "[2404:7a80:d440:3000:192a:a292:bd7f:ca19]:50,host2:100,host3"
3627
+ scheme https
3628
+ path /es/
3629
+ port 123
3630
+ }
3631
+ instance = driver(config).instance
3632
+
3633
+ assert_raise(URI::InvalidURIError) do
3634
+ instance.get_connection_options[:hosts].length
3635
+ end
3636
+ end
3637
+
3638
+ def test_hosts_list
3639
+ config = %{
3640
+ hosts https://john:password@[2404:7a80:d440:3000:192a:a292:bd7f:ca19]:443/elastic/,http://host2
3641
+ path /default_path
3642
+ user default_user
3643
+ password default_password
3644
+ }
3645
+ instance = driver(config).instance
3646
+
3647
+ assert_equal 2, instance.get_connection_options[:hosts].length
3648
+ host1, host2 = instance.get_connection_options[:hosts]
3649
+
3650
+ assert_equal '[2404:7a80:d440:3000:192a:a292:bd7f:ca19]', host1[:host]
3651
+ assert_equal 443, host1[:port]
3652
+ assert_equal 'https', host1[:scheme]
3653
+ assert_equal 'john', host1[:user]
3654
+ assert_equal 'password', host1[:password]
3655
+ assert_equal '/elastic/', host1[:path]
3656
+
3657
+ assert_equal 'host2', host2[:host]
3658
+ assert_equal 'http', host2[:scheme]
3659
+ assert_equal 'default_user', host2[:user]
3660
+ assert_equal 'default_password', host2[:password]
3661
+ assert_equal '/default_path', host2[:path]
3662
+ end
3663
+
3664
+ def test_hosts_list_with_escape_placeholders
3665
+ config = %{
3666
+ hosts https://%{j+hn}:%{passw@rd}@[2404:7a80:d440:3000:192a:a292:bd7f:ca19]:443/elastic/,http://host2
3667
+ path /default_path
3668
+ user default_user
3669
+ password default_password
3670
+ }
3671
+ instance = driver(config).instance
3672
+
3673
+ assert_equal 2, instance.get_connection_options[:hosts].length
3674
+ host1, host2 = instance.get_connection_options[:hosts]
3675
+
3676
+ assert_equal '[2404:7a80:d440:3000:192a:a292:bd7f:ca19]', host1[:host]
3677
+ assert_equal 443, host1[:port]
3678
+ assert_equal 'https', host1[:scheme]
3679
+ assert_equal 'j%2Bhn', host1[:user]
3680
+ assert_equal 'passw%40rd', host1[:password]
3681
+ assert_equal '/elastic/', host1[:path]
3682
+
3683
+ assert_equal 'host2', host2[:host]
3684
+ assert_equal 'http', host2[:scheme]
3685
+ assert_equal 'default_user', host2[:user]
3686
+ assert_equal 'default_password', host2[:password]
3687
+ assert_equal '/default_path', host2[:path]
3688
+ end
3689
+ end
3690
+
3615
3691
  def test_single_host_params_and_defaults
3616
3692
  config = %{
3617
3693
  host logs.google.com
@@ -3665,6 +3741,46 @@ class ElasticsearchOutputTest < Test::Unit::TestCase
3665
3741
  assert(ports.none? { |p| p == 9200 })
3666
3742
  end
3667
3743
 
3744
+ class IPv6AdressStringHostTest < self
3745
+ def test_single_host_params_and_defaults
3746
+ config = %{
3747
+ host 2404:7a80:d440:3000:192a:a292:bd7f:ca19
3748
+ user john
3749
+ password doe
3750
+ }
3751
+ instance = driver(config).instance
3752
+
3753
+ assert_equal 1, instance.get_connection_options[:hosts].length
3754
+ host1 = instance.get_connection_options[:hosts][0]
3755
+
3756
+ assert_equal '[2404:7a80:d440:3000:192a:a292:bd7f:ca19]', host1[:host]
3757
+ assert_equal 9200, host1[:port]
3758
+ assert_equal 'http', host1[:scheme]
3759
+ assert_equal 'john', host1[:user]
3760
+ assert_equal 'doe', host1[:password]
3761
+ assert_equal nil, host1[:path]
3762
+ end
3763
+
3764
+ def test_single_host_params_and_defaults_with_escape_placeholders
3765
+ config = %{
3766
+ host 2404:7a80:d440:3000:192a:a292:bd7f:ca19
3767
+ user %{j+hn}
3768
+ password %{d@e}
3769
+ }
3770
+ instance = driver(config).instance
3771
+
3772
+ assert_equal 1, instance.get_connection_options[:hosts].length
3773
+ host1 = instance.get_connection_options[:hosts][0]
3774
+
3775
+ assert_equal '[2404:7a80:d440:3000:192a:a292:bd7f:ca19]', host1[:host]
3776
+ assert_equal 9200, host1[:port]
3777
+ assert_equal 'http', host1[:scheme]
3778
+ assert_equal 'j%2Bhn', host1[:user]
3779
+ assert_equal 'd%40e', host1[:password]
3780
+ assert_equal nil, host1[:path]
3781
+ end
3782
+ end
3783
+
3668
3784
  def test_password_is_required_if_specify_user
3669
3785
  config = %{
3670
3786
  user john
@@ -3986,6 +4102,185 @@ class ElasticsearchOutputTest < Test::Unit::TestCase
3986
4102
  assert_equal(pipeline, index_cmds.first['index']['pipeline'])
3987
4103
  end
3988
4104
 
4105
+ def stub_elastic_affinity_target_index_search_with_body(url="http://localhost:9200/logstash-*/_search", ids, return_body_str)
4106
+ # Note: ids used in query is unique list of ids
4107
+ stub_request(:post, url)
4108
+ .with(
4109
+ body: "{\"query\":{\"ids\":{\"values\":#{ids.uniq.to_json}}},\"_source\":false,\"sort\":[{\"_index\":{\"order\":\"desc\"}}]}",
4110
+ )
4111
+ .to_return(lambda do |req|
4112
+ { :status => 200,
4113
+ :headers => { 'Content-Type' => 'json' },
4114
+ :body => return_body_str
4115
+ }
4116
+ end)
4117
+ end
4118
+
4119
+ def stub_elastic_affinity_target_index_search(url="http://localhost:9200/logstash-*/_search", ids, indices)
4120
+ # Example ids and indices arrays.
4121
+ # [ "3408a2c8eecd4fbfb82e45012b54fa82", "2816fc6ef4524b3f8f7e869002005433"]
4122
+ # [ "logstash-2021.04.28", "logstash-2021.04.29"]
4123
+ body = %({
4124
+ "took" : 31,
4125
+ "timed_out" : false,
4126
+ "_shards" : {
4127
+ "total" : 52,
4128
+ "successful" : 52,
4129
+ "skipped" : 48,
4130
+ "failed" : 0
4131
+ },
4132
+ "hits" : {
4133
+ "total" : {
4134
+ "value" : 356,
4135
+ "relation" : "eq"
4136
+ },
4137
+ "max_score" : null,
4138
+ "hits" : [
4139
+ {
4140
+ "_index" : "#{indices[0]}",
4141
+ "_type" : "_doc",
4142
+ "_id" : "#{ids[0]}",
4143
+ "_score" : null,
4144
+ "sort" : [
4145
+ "#{indices[0]}"
4146
+ ]
4147
+ },
4148
+ {
4149
+ "_index" : "#{indices[1]}",
4150
+ "_type" : "_doc",
4151
+ "_id" : "#{ids[1]}",
4152
+ "_score" : null,
4153
+ "sort" : [
4154
+ "#{indices[1]}"
4155
+ ]
4156
+ }
4157
+ ]
4158
+ }
4159
+ })
4160
+ stub_elastic_affinity_target_index_search_with_body(ids, body)
4161
+ end
4162
+
4163
+ def stub_elastic_affinity_target_index_search_return_empty(url="http://localhost:9200/logstash-*/_search", ids)
4164
+ empty_body = %({
4165
+ "took" : 5,
4166
+ "timed_out" : false,
4167
+ "_shards" : {
4168
+ "total" : 54,
4169
+ "successful" : 54,
4170
+ "skipped" : 53,
4171
+ "failed" : 0
4172
+ },
4173
+ "hits" : {
4174
+ "total" : {
4175
+ "value" : 0,
4176
+ "relation" : "eq"
4177
+ },
4178
+ "max_score" : null,
4179
+ "hits" : [ ]
4180
+ }
4181
+ })
4182
+ stub_elastic_affinity_target_index_search_with_body(ids, empty_body)
4183
+ end
4184
+
4185
+ def test_writes_to_affinity_target_index
4186
+ driver.configure("target_index_affinity true
4187
+ logstash_format true
4188
+ id_key my_id
4189
+ write_operation update")
4190
+
4191
+ my_id_value = "3408a2c8eecd4fbfb82e45012b54fa82"
4192
+ ids = [my_id_value]
4193
+ indices = ["logstash-2021.04.28"]
4194
+ stub_elastic
4195
+ stub_elastic_affinity_target_index_search(ids, indices)
4196
+ driver.run(default_tag: 'test') do
4197
+ driver.feed(sample_record('my_id' => my_id_value))
4198
+ end
4199
+ assert_equal('logstash-2021.04.28', index_cmds.first['update']['_index'])
4200
+ end
4201
+
4202
+ def test_writes_to_affinity_target_index_write_operation_upsert
4203
+ driver.configure("target_index_affinity true
4204
+ logstash_format true
4205
+ id_key my_id
4206
+ write_operation upsert")
4207
+
4208
+ my_id_value = "3408a2c8eecd4fbfb82e45012b54fa82"
4209
+ ids = [my_id_value]
4210
+ indices = ["logstash-2021.04.28"]
4211
+ stub_elastic
4212
+ stub_elastic_affinity_target_index_search(ids, indices)
4213
+ driver.run(default_tag: 'test') do
4214
+ driver.feed(sample_record('my_id' => my_id_value))
4215
+ end
4216
+ assert_equal('logstash-2021.04.28', index_cmds.first['update']['_index'])
4217
+ end
4218
+
4219
+ def test_writes_to_affinity_target_index_index_not_exists_yet
4220
+ driver.configure("target_index_affinity true
4221
+ logstash_format true
4222
+ id_key my_id
4223
+ write_operation update")
4224
+
4225
+ my_id_value = "3408a2c8eecd4fbfb82e45012b54fa82"
4226
+ ids = [my_id_value]
4227
+ stub_elastic
4228
+ stub_elastic_affinity_target_index_search_return_empty(ids)
4229
+ time = Time.parse Date.today.iso8601
4230
+ driver.run(default_tag: 'test') do
4231
+ driver.feed(time.to_i, sample_record('my_id' => my_id_value))
4232
+ end
4233
+ assert_equal("logstash-#{time.utc.strftime("%Y.%m.%d")}", index_cmds.first['update']['_index'])
4234
+ end
4235
+
4236
+ def test_writes_to_affinity_target_index_multiple_indices
4237
+ driver.configure("target_index_affinity true
4238
+ logstash_format true
4239
+ id_key my_id
4240
+ write_operation update")
4241
+
4242
+ my_id_value = "2816fc6ef4524b3f8f7e869002005433"
4243
+ my_id_value2 = "3408a2c8eecd4fbfb82e45012b54fa82"
4244
+ ids = [my_id_value, my_id_value2]
4245
+ indices = ["logstash-2021.04.29", "logstash-2021.04.28"]
4246
+ stub_elastic_all_requests
4247
+ stub_elastic_affinity_target_index_search(ids, indices)
4248
+ driver.run(default_tag: 'test') do
4249
+ driver.feed(sample_record('my_id' => my_id_value))
4250
+ driver.feed(sample_record('my_id' => my_id_value2))
4251
+ end
4252
+ assert_equal(2, index_cmds_all_requests.count)
4253
+ assert_equal('logstash-2021.04.29', index_cmds_all_requests[0].first['update']['_index'])
4254
+ assert_equal(my_id_value, index_cmds_all_requests[0].first['update']['_id'])
4255
+ assert_equal('logstash-2021.04.28', index_cmds_all_requests[1].first['update']['_index'])
4256
+ assert_equal(my_id_value2, index_cmds_all_requests[1].first['update']['_id'])
4257
+ end
4258
+
4259
+ def test_writes_to_affinity_target_index_same_id_dublicated_write_to_oldest_index
4260
+ driver.configure("target_index_affinity true
4261
+ logstash_format true
4262
+ id_key my_id
4263
+ write_operation update")
4264
+
4265
+ my_id_value = "2816fc6ef4524b3f8f7e869002005433"
4266
+ # It may happen than same id has inserted to two index while data inserted during rollover period
4267
+ ids = [my_id_value, my_id_value]
4268
+ # Simulate the used sorting here, as search sorts indices in DESC order to pick only oldest index per single _id
4269
+ indices = ["logstash-2021.04.29", "logstash-2021.04.28"]
4270
+
4271
+ stub_elastic_all_requests
4272
+ stub_elastic_affinity_target_index_search(ids, indices)
4273
+ driver.run(default_tag: 'test') do
4274
+ driver.feed(sample_record('my_id' => my_id_value))
4275
+ driver.feed(sample_record('my_id' => my_id_value))
4276
+ end
4277
+ assert_equal('logstash-2021.04.28', index_cmds.first['update']['_index'])
4278
+
4279
+ assert_equal(1, index_cmds_all_requests.count)
4280
+ assert_equal('logstash-2021.04.28', index_cmds_all_requests[0].first['update']['_index'])
4281
+ assert_equal(my_id_value, index_cmds_all_requests[0].first['update']['_id'])
4282
+ end
4283
+
3989
4284
  class PipelinePlaceholdersTest < self
3990
4285
  def test_writes_to_default_index_with_pipeline_tag_placeholder
3991
4286
  pipeline = "fluentd-${tag}"