logstash-output-elasticsearch-test 10.3.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +397 -0
  3. data/CONTRIBUTORS +33 -0
  4. data/Gemfile +15 -0
  5. data/LICENSE +13 -0
  6. data/NOTICE.TXT +5 -0
  7. data/README.md +106 -0
  8. data/docs/index.asciidoc +899 -0
  9. data/lib/logstash/outputs/elasticsearch/common.rb +441 -0
  10. data/lib/logstash/outputs/elasticsearch/common_configs.rb +167 -0
  11. data/lib/logstash/outputs/elasticsearch/default-ilm-policy.json +14 -0
  12. data/lib/logstash/outputs/elasticsearch/elasticsearch-template-es2x.json +95 -0
  13. data/lib/logstash/outputs/elasticsearch/elasticsearch-template-es5x.json +46 -0
  14. data/lib/logstash/outputs/elasticsearch/elasticsearch-template-es6x.json +45 -0
  15. data/lib/logstash/outputs/elasticsearch/elasticsearch-template-es7x.json +44 -0
  16. data/lib/logstash/outputs/elasticsearch/elasticsearch-template-es8x.json +44 -0
  17. data/lib/logstash/outputs/elasticsearch/http_client/manticore_adapter.rb +131 -0
  18. data/lib/logstash/outputs/elasticsearch/http_client/pool.rb +495 -0
  19. data/lib/logstash/outputs/elasticsearch/http_client.rb +432 -0
  20. data/lib/logstash/outputs/elasticsearch/http_client_builder.rb +159 -0
  21. data/lib/logstash/outputs/elasticsearch/ilm.rb +113 -0
  22. data/lib/logstash/outputs/elasticsearch/template_manager.rb +61 -0
  23. data/lib/logstash/outputs/elasticsearch.rb +263 -0
  24. data/logstash-output-elasticsearch.gemspec +33 -0
  25. data/spec/es_spec_helper.rb +189 -0
  26. data/spec/fixtures/_nodes/2x_1x.json +27 -0
  27. data/spec/fixtures/_nodes/5x_6x.json +81 -0
  28. data/spec/fixtures/_nodes/7x.json +92 -0
  29. data/spec/fixtures/htpasswd +2 -0
  30. data/spec/fixtures/nginx_reverse_proxy.conf +22 -0
  31. data/spec/fixtures/scripts/groovy/scripted_update.groovy +2 -0
  32. data/spec/fixtures/scripts/groovy/scripted_update_nested.groovy +2 -0
  33. data/spec/fixtures/scripts/groovy/scripted_upsert.groovy +2 -0
  34. data/spec/fixtures/scripts/painless/scripted_update.painless +2 -0
  35. data/spec/fixtures/scripts/painless/scripted_update_nested.painless +1 -0
  36. data/spec/fixtures/scripts/painless/scripted_upsert.painless +1 -0
  37. data/spec/fixtures/template-with-policy-es6x.json +48 -0
  38. data/spec/fixtures/template-with-policy-es7x.json +45 -0
  39. data/spec/fixtures/test_certs/ca/ca.crt +32 -0
  40. data/spec/fixtures/test_certs/ca/ca.key +51 -0
  41. data/spec/fixtures/test_certs/test.crt +36 -0
  42. data/spec/fixtures/test_certs/test.key +51 -0
  43. data/spec/integration/outputs/compressed_indexing_spec.rb +69 -0
  44. data/spec/integration/outputs/create_spec.rb +67 -0
  45. data/spec/integration/outputs/delete_spec.rb +65 -0
  46. data/spec/integration/outputs/groovy_update_spec.rb +150 -0
  47. data/spec/integration/outputs/ilm_spec.rb +531 -0
  48. data/spec/integration/outputs/index_spec.rb +178 -0
  49. data/spec/integration/outputs/index_version_spec.rb +102 -0
  50. data/spec/integration/outputs/ingest_pipeline_spec.rb +74 -0
  51. data/spec/integration/outputs/metrics_spec.rb +70 -0
  52. data/spec/integration/outputs/no_es_on_startup_spec.rb +58 -0
  53. data/spec/integration/outputs/painless_update_spec.rb +189 -0
  54. data/spec/integration/outputs/parent_spec.rb +102 -0
  55. data/spec/integration/outputs/retry_spec.rb +169 -0
  56. data/spec/integration/outputs/routing_spec.rb +61 -0
  57. data/spec/integration/outputs/sniffer_spec.rb +133 -0
  58. data/spec/integration/outputs/templates_5x_spec.rb +98 -0
  59. data/spec/integration/outputs/templates_spec.rb +98 -0
  60. data/spec/integration/outputs/update_spec.rb +116 -0
  61. data/spec/support/elasticsearch/api/actions/delete_ilm_policy.rb +19 -0
  62. data/spec/support/elasticsearch/api/actions/get_alias.rb +18 -0
  63. data/spec/support/elasticsearch/api/actions/get_ilm_policy.rb +18 -0
  64. data/spec/support/elasticsearch/api/actions/put_alias.rb +24 -0
  65. data/spec/support/elasticsearch/api/actions/put_ilm_policy.rb +25 -0
  66. data/spec/unit/http_client_builder_spec.rb +185 -0
  67. data/spec/unit/outputs/elasticsearch/http_client/manticore_adapter_spec.rb +149 -0
  68. data/spec/unit/outputs/elasticsearch/http_client/pool_spec.rb +274 -0
  69. data/spec/unit/outputs/elasticsearch/http_client_spec.rb +250 -0
  70. data/spec/unit/outputs/elasticsearch/template_manager_spec.rb +25 -0
  71. data/spec/unit/outputs/elasticsearch_proxy_spec.rb +72 -0
  72. data/spec/unit/outputs/elasticsearch_spec.rb +675 -0
  73. data/spec/unit/outputs/elasticsearch_ssl_spec.rb +82 -0
  74. data/spec/unit/outputs/error_whitelist_spec.rb +54 -0
  75. metadata +300 -0
@@ -0,0 +1,495 @@
1
+ module LogStash; module Outputs; class ElasticSearch; class HttpClient;
2
+ class Pool
3
+ class NoConnectionAvailableError < Error; end
4
+ class BadResponseCodeError < Error
5
+ attr_reader :url, :response_code, :request_body, :response_body
6
+
7
+ def initialize(response_code, url, request_body, response_body)
8
+ @response_code = response_code
9
+ @url = url
10
+ @request_body = request_body
11
+ @response_body = response_body
12
+ end
13
+
14
+ def message
15
+ "Got response code '#{response_code}' contacting Elasticsearch at URL '#{@url}'"
16
+ end
17
+ end
18
+ class HostUnreachableError < Error;
19
+ attr_reader :original_error, :url
20
+
21
+ def initialize(original_error, url)
22
+ @original_error = original_error
23
+ @url = url
24
+ end
25
+
26
+ def message
27
+ "Elasticsearch Unreachable: [#{@url}][#{original_error.class}] #{original_error.message}"
28
+ end
29
+ end
30
+
31
+ attr_reader :logger, :adapter, :sniffing, :sniffer_delay, :resurrect_delay, :healthcheck_path, :sniffing_path, :bulk_path
32
+
33
+ ROOT_URI_PATH = '/'.freeze
34
+ LICENSE_PATH = '/_license'.freeze
35
+
36
+ DEFAULT_OPTIONS = {
37
+ :healthcheck_path => ROOT_URI_PATH,
38
+ :sniffing_path => "/_nodes/http",
39
+ :bulk_path => "/_bulk",
40
+ :scheme => 'http',
41
+ :resurrect_delay => 5,
42
+ :sniffing => false,
43
+ :sniffer_delay => 10,
44
+ }.freeze
45
+
46
+ def initialize(logger, adapter, initial_urls=[], options={})
47
+ @logger = logger
48
+ @adapter = adapter
49
+ @metric = options[:metric]
50
+ @initial_urls = initial_urls
51
+
52
+ raise ArgumentError, "No URL Normalizer specified!" unless options[:url_normalizer]
53
+ @url_normalizer = options[:url_normalizer]
54
+ DEFAULT_OPTIONS.merge(options).tap do |merged|
55
+ @bulk_path = merged[:bulk_path]
56
+ @sniffing_path = merged[:sniffing_path]
57
+ @healthcheck_path = merged[:healthcheck_path]
58
+ @resurrect_delay = merged[:resurrect_delay]
59
+ @sniffing = merged[:sniffing]
60
+ @sniffer_delay = merged[:sniffer_delay]
61
+ end
62
+
63
+ # Used for all concurrent operations in this class
64
+ @state_mutex = Mutex.new
65
+
66
+ # Holds metadata about all URLs
67
+ @url_info = {}
68
+ @stopping = false
69
+ end
70
+
71
+ def oss?
72
+ LogStash::Outputs::ElasticSearch.oss?
73
+ end
74
+
75
+ def start
76
+ update_initial_urls
77
+ start_resurrectionist
78
+ start_sniffer if @sniffing
79
+ end
80
+
81
+ def update_initial_urls
82
+ update_urls(@initial_urls)
83
+ end
84
+
85
+ def close
86
+ @state_mutex.synchronize { @stopping = true }
87
+
88
+ logger.debug "Stopping sniffer"
89
+ stop_sniffer
90
+
91
+ logger.debug "Stopping resurrectionist"
92
+ stop_resurrectionist
93
+
94
+ logger.debug "Waiting for in use manticore connections"
95
+ wait_for_in_use_connections
96
+
97
+ logger.debug("Closing adapter #{@adapter}")
98
+ @adapter.close
99
+ end
100
+
101
+ def wait_for_in_use_connections
102
+ until in_use_connections.empty?
103
+ logger.info "Blocked on shutdown to in use connections #{@state_mutex.synchronize {@url_info}}"
104
+ sleep 1
105
+ end
106
+ end
107
+
108
+ def in_use_connections
109
+ @state_mutex.synchronize { @url_info.values.select {|v| v[:in_use] > 0 } }
110
+ end
111
+
112
+ def alive_urls_count
113
+ @state_mutex.synchronize { @url_info.values.select {|v| v[:state] == :alive }.count }
114
+ end
115
+
116
+ def url_info
117
+ @state_mutex.synchronize { @url_info }
118
+ end
119
+
120
+ def maximum_seen_major_version
121
+ @state_mutex.synchronize do
122
+ @maximum_seen_major_version
123
+ end
124
+ end
125
+
126
+ def urls
127
+ url_info.keys
128
+ end
129
+
130
+ def until_stopped(task_name, delay)
131
+ last_done = Time.now
132
+ until @state_mutex.synchronize { @stopping }
133
+ begin
134
+ now = Time.now
135
+ if (now - last_done) >= delay
136
+ last_done = now
137
+ yield
138
+ end
139
+ sleep 1
140
+ rescue => e
141
+ logger.warn(
142
+ "Error while performing #{task_name}",
143
+ :error_message => e.message,
144
+ :class => e.class.name,
145
+ :backtrace => e.backtrace
146
+ )
147
+ end
148
+ end
149
+ end
150
+
151
+ def start_sniffer
152
+ @sniffer = Thread.new do
153
+ until_stopped("sniffing", sniffer_delay) do
154
+ begin
155
+ sniff!
156
+ rescue NoConnectionAvailableError => e
157
+ @state_mutex.synchronize { # Synchronize around @url_info
158
+ logger.warn("Elasticsearch output attempted to sniff for new connections but cannot. No living connections are detected. Pool contains the following current URLs", :url_info => @url_info) }
159
+ end
160
+ end
161
+ end
162
+ end
163
+
164
+ # Sniffs the cluster then updates the internal URLs
165
+ def sniff!
166
+ update_urls(check_sniff)
167
+ end
168
+
169
+ ES1_SNIFF_RE_URL = /\[([^\/]*)?\/?([^:]*):([0-9]+)\]/
170
+ ES2_AND_ABOVE_SNIFF_RE_URL = /([^\/]*)?\/?([^:]*):([0-9]+)/
171
+ # Sniffs and returns the results. Does not update internal URLs!
172
+ def check_sniff
173
+ _, url_meta, resp = perform_request(:get, @sniffing_path)
174
+ @metric.increment(:sniff_requests)
175
+ parsed = LogStash::Json.load(resp.body)
176
+ nodes = parsed['nodes']
177
+ if !nodes || nodes.empty?
178
+ @logger.warn("Sniff returned no nodes! Will not update hosts.")
179
+ return nil
180
+ else
181
+ case major_version(url_meta[:version])
182
+ when 5, 6, 7, 8
183
+ sniff_5x_and_above(nodes)
184
+ when 2, 1
185
+ sniff_2x_1x(nodes)
186
+ else
187
+ @logger.warn("Could not determine version for nodes in ES cluster!")
188
+ return nil
189
+ end
190
+ end
191
+ end
192
+
193
+ def major_version(version_string)
194
+ version_string.split('.').first.to_i
195
+ end
196
+
197
+ def sniff_5x_and_above(nodes)
198
+ nodes.map do |id,info|
199
+ # Skip master-only nodes
200
+ next if info["roles"] && info["roles"] == ["master"]
201
+ address_str_to_uri(info["http"]["publish_address"]) if info["http"]
202
+ end.compact
203
+ end
204
+
205
+ def address_str_to_uri(addr_str)
206
+ matches = addr_str.match(ES1_SNIFF_RE_URL) || addr_str.match(ES2_AND_ABOVE_SNIFF_RE_URL)
207
+ if matches
208
+ host = matches[1].empty? ? matches[2] : matches[1]
209
+ ::LogStash::Util::SafeURI.new("#{host}:#{matches[3]}")
210
+ end
211
+ end
212
+
213
+
214
+ def sniff_2x_1x(nodes)
215
+ nodes.map do |id,info|
216
+ # TODO Make sure this works with shield. Does that listed
217
+ # stuff as 'https_address?'
218
+
219
+ addr_str = info['http_address'].to_s
220
+ next unless addr_str # Skip hosts with HTTP disabled
221
+
222
+ # Only connect to nodes that serve data
223
+ # this will skip connecting to client, tribe, and master only nodes
224
+ # Note that if 'attributes' is NOT set, then that's just a regular node
225
+ # with master + data + client enabled, so we allow that
226
+ attributes = info['attributes']
227
+ next if attributes && attributes['data'] == 'false'
228
+ address_str_to_uri(addr_str)
229
+ end.compact
230
+ end
231
+
232
+ def stop_sniffer
233
+ @sniffer.join if @sniffer
234
+ end
235
+
236
+ def sniffer_alive?
237
+ @sniffer ? @sniffer.alive? : nil
238
+ end
239
+
240
+ def start_resurrectionist
241
+ @resurrectionist = Thread.new do
242
+ until_stopped("resurrection", @resurrect_delay) do
243
+ healthcheck!
244
+ end
245
+ end
246
+ end
247
+
248
+ def get_license(url)
249
+ response = perform_request_to_url(url, :get, LICENSE_PATH)
250
+ LogStash::Json.load(response.body)
251
+ end
252
+
253
+ def valid_es_license?(url)
254
+ license = get_license(url)
255
+ license.fetch("license", {}).fetch("status", nil) == "active"
256
+ rescue => e
257
+ false
258
+ end
259
+
260
+ def health_check_request(url)
261
+ perform_request_to_url(url, :head, @healthcheck_path)
262
+ end
263
+
264
+ def healthcheck!
265
+ # Try to keep locking granularity low such that we don't affect IO...
266
+ @state_mutex.synchronize { @url_info.select {|url,meta| meta[:state] != :alive } }.each do |url,meta|
267
+ begin
268
+ logger.debug("Running health check to see if an Elasticsearch connection is working",
269
+ :healthcheck_url => url, :path => @healthcheck_path)
270
+ health_check_request(url)
271
+ # If no exception was raised it must have succeeded!
272
+ logger.warn("Restored connection to ES instance", :url => url.sanitized.to_s)
273
+ # We reconnected to this node, check its ES version
274
+ es_version = get_es_version(url)
275
+ @state_mutex.synchronize do
276
+ meta[:version] = es_version
277
+ major = major_version(es_version)
278
+ if !@maximum_seen_major_version
279
+ @logger.info("ES Output version determined", :es_version => major)
280
+ set_new_major_version(major)
281
+ elsif major > @maximum_seen_major_version
282
+ @logger.warn("Detected a node with a higher major version than previously observed. This could be the result of an elasticsearch cluster upgrade.", :previous_major => @maximum_seen_major_version, :new_major => major, :node_url => url.sanitized.to_s)
283
+ set_new_major_version(major)
284
+ end
285
+ if oss? || valid_es_license?(url)
286
+ meta[:state] = :alive
287
+ else
288
+ # As this version is to be shipped with Logstash 7.x we won't mark the connection as unlicensed
289
+ #
290
+ # logger.error("Cannot connect to the Elasticsearch cluster configured in the Elasticsearch output. Logstash requires the default distribution of Elasticsearch. Please update to the default distribution of Elasticsearch for full access to all free features, or switch to the OSS distribution of Logstash.", :url => url.sanitized.to_s)
291
+ # meta[:state] = :unlicensed
292
+ #
293
+ # Instead we'll log a deprecation warning and mark it as alive:
294
+ #
295
+ log_license_deprecation_warn(url)
296
+ meta[:state] = :alive
297
+ end
298
+ end
299
+ rescue HostUnreachableError, BadResponseCodeError => e
300
+ logger.warn("Attempted to resurrect connection to dead ES instance, but got an error.", url: url.sanitized.to_s, error_type: e.class, error: e.message)
301
+ end
302
+ end
303
+ end
304
+
305
+ def stop_resurrectionist
306
+ @resurrectionist.join if @resurrectionist
307
+ end
308
+
309
+ def log_license_deprecation_warn(url)
310
+ logger.warn("DEPRECATION WARNING: Connecting to an OSS distribution of Elasticsearch using the default distribution of Logstash will stop working in Logstash 8.0.0. Please upgrade to the default distribution of Elasticsearch, or use the OSS distribution of Logstash", :url => url.sanitized.to_s)
311
+ end
312
+
313
+ def resurrectionist_alive?
314
+ @resurrectionist ? @resurrectionist.alive? : nil
315
+ end
316
+
317
+ def perform_request(method, path, params={}, body=nil)
318
+ with_connection do |url, url_meta|
319
+ resp = perform_request_to_url(url, method, path, params, body)
320
+ [url, url_meta, resp]
321
+ end
322
+ end
323
+
324
+ [:get, :put, :post, :delete, :patch, :head].each do |method|
325
+ define_method(method) do |path, params={}, body=nil|
326
+ _, _, response = perform_request(method, path, params, body)
327
+ response
328
+ end
329
+ end
330
+
331
+ def perform_request_to_url(url, method, path, params={}, body=nil)
332
+ res = @adapter.perform_request(url, method, path, params, body)
333
+ rescue *@adapter.host_unreachable_exceptions => e
334
+ raise HostUnreachableError.new(e, url), "Could not reach host #{e.class}: #{e.message}"
335
+ end
336
+
337
+ def normalize_url(uri)
338
+ u = @url_normalizer.call(uri)
339
+ if !u.is_a?(::LogStash::Util::SafeURI)
340
+ raise "URL Normalizer returned a '#{u.class}' rather than a SafeURI! This shouldn't happen!"
341
+ end
342
+ u
343
+ end
344
+
345
+ def update_urls(new_urls)
346
+ return if new_urls.nil?
347
+
348
+ # Normalize URLs
349
+ new_urls = new_urls.map(&method(:normalize_url))
350
+
351
+ # Used for logging nicely
352
+ state_changes = {:removed => [], :added => []}
353
+ @state_mutex.synchronize do
354
+ # Add new connections
355
+ new_urls.each do |url|
356
+ # URI objects don't have real hash equality! So, since this isn't perf sensitive we do a linear scan
357
+ unless @url_info.keys.include?(url)
358
+ state_changes[:added] << url
359
+ add_url(url)
360
+ end
361
+ end
362
+
363
+ # Delete connections not in the new list
364
+ @url_info.each do |url,_|
365
+ unless new_urls.include?(url)
366
+ state_changes[:removed] << url
367
+ remove_url(url)
368
+ end
369
+ end
370
+ end
371
+
372
+ if state_changes[:removed].size > 0 || state_changes[:added].size > 0
373
+ if logger.info?
374
+ logger.info("Elasticsearch pool URLs updated", :changes => state_changes)
375
+ end
376
+ end
377
+
378
+ # Run an inline healthcheck anytime URLs are updated
379
+ # This guarantees that during startup / post-startup
380
+ # sniffing we don't have idle periods waiting for the
381
+ # periodic sniffer to allow new hosts to come online
382
+ healthcheck!
383
+ end
384
+
385
+ def size
386
+ @state_mutex.synchronize { @url_info.size }
387
+ end
388
+
389
+ def es_versions
390
+ @state_mutex.synchronize { @url_info.size }
391
+ end
392
+
393
+ def add_url(url)
394
+ @url_info[url] ||= empty_url_meta
395
+ end
396
+
397
+ def remove_url(url)
398
+ @url_info.delete(url)
399
+ end
400
+
401
+ def empty_url_meta
402
+ {
403
+ :in_use => 0,
404
+ :state => :unknown
405
+ }
406
+ end
407
+
408
+ def with_connection
409
+ url, url_meta = get_connection
410
+
411
+ # Custom error class used here so that users may retry attempts if they receive this error
412
+ # should they choose to
413
+ raise NoConnectionAvailableError, "No Available connections" unless url
414
+ yield url, url_meta
415
+ rescue HostUnreachableError => e
416
+ # Mark the connection as dead here since this is likely not transient
417
+ mark_dead(url, e)
418
+ raise e
419
+ rescue BadResponseCodeError => e
420
+ # These aren't discarded from the pool because these are often very transient
421
+ # errors
422
+ raise e
423
+ ensure
424
+ return_connection(url)
425
+ end
426
+
427
+ def mark_dead(url, error)
428
+ @state_mutex.synchronize do
429
+ meta = @url_info[url]
430
+ # In case a sniff happened removing the metadata just before there's nothing to mark
431
+ # This is an extreme edge case, but it can happen!
432
+ return unless meta
433
+ logger.warn("Marking url as dead. Last error: [#{error.class}] #{error.message}",
434
+ :url => url, :error_message => error.message, :error_class => error.class.name)
435
+ meta[:state] = :dead
436
+ meta[:last_error] = error
437
+ meta[:last_errored_at] = Time.now
438
+ end
439
+ end
440
+
441
+ def url_meta(url)
442
+ @state_mutex.synchronize do
443
+ @url_info[url]
444
+ end
445
+ end
446
+
447
+ def get_connection
448
+ @state_mutex.synchronize do
449
+ # The goal here is to pick a random connection from the least-in-use connections
450
+ # We want some randomness so that we don't hit the same node over and over, but
451
+ # we also want more 'fair' behavior in the event of high concurrency
452
+ eligible_set = nil
453
+ lowest_value_seen = nil
454
+ @url_info.each do |url,meta|
455
+ meta_in_use = meta[:in_use]
456
+ next if meta[:state] == :dead
457
+
458
+ if lowest_value_seen.nil? || meta_in_use < lowest_value_seen
459
+ lowest_value_seen = meta_in_use
460
+ eligible_set = [[url, meta]]
461
+ elsif lowest_value_seen == meta_in_use
462
+ eligible_set << [url, meta]
463
+ end
464
+ end
465
+
466
+ return nil if eligible_set.nil?
467
+
468
+ pick, pick_meta = eligible_set.sample
469
+ pick_meta[:in_use] += 1
470
+
471
+ [pick, pick_meta]
472
+ end
473
+ end
474
+
475
+ def return_connection(url)
476
+ @state_mutex.synchronize do
477
+ if @url_info[url] # Guard against the condition where the connection has already been deleted
478
+ @url_info[url][:in_use] -= 1
479
+ end
480
+ end
481
+ end
482
+
483
+ def get_es_version(url)
484
+ request = perform_request_to_url(url, :get, ROOT_URI_PATH)
485
+ LogStash::Json.load(request.body)["version"]["number"]
486
+ end
487
+
488
+ def set_new_major_version(version)
489
+ @maximum_seen_major_version = version
490
+ if @maximum_seen_major_version >= 6
491
+ @logger.warn("Detected a 6.x and above cluster: the `type` event field won't be used to determine the document _type", :es_version => @maximum_seen_major_version)
492
+ end
493
+ end
494
+ end
495
+ end; end; end; end;