logstash-output-opensearch 1.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/ADMINS.md +29 -0
  5. data/CODE_OF_CONDUCT.md +25 -0
  6. data/CONTRIBUTING.md +99 -0
  7. data/DEVELOPER_GUIDE.md +208 -0
  8. data/Gemfile +20 -0
  9. data/LICENSE +202 -0
  10. data/MAINTAINERS.md +71 -0
  11. data/NOTICE +2 -0
  12. data/README.md +37 -0
  13. data/RELEASING.md +36 -0
  14. data/SECURITY.md +3 -0
  15. data/lib/logstash/outputs/opensearch.rb +449 -0
  16. data/lib/logstash/outputs/opensearch/distribution_checker.rb +44 -0
  17. data/lib/logstash/outputs/opensearch/http_client.rb +465 -0
  18. data/lib/logstash/outputs/opensearch/http_client/manticore_adapter.rb +140 -0
  19. data/lib/logstash/outputs/opensearch/http_client/pool.rb +467 -0
  20. data/lib/logstash/outputs/opensearch/http_client_builder.rb +182 -0
  21. data/lib/logstash/outputs/opensearch/template_manager.rb +60 -0
  22. data/lib/logstash/outputs/opensearch/templates/ecs-disabled/1x.json +44 -0
  23. data/lib/logstash/outputs/opensearch/templates/ecs-disabled/7x.json +44 -0
  24. data/lib/logstash/plugin_mixins/opensearch/api_configs.rb +168 -0
  25. data/lib/logstash/plugin_mixins/opensearch/common.rb +294 -0
  26. data/lib/logstash/plugin_mixins/opensearch/noop_distribution_checker.rb +18 -0
  27. data/logstash-output-opensearch.gemspec +40 -0
  28. data/spec/fixtures/_nodes/nodes.json +74 -0
  29. data/spec/fixtures/htpasswd +2 -0
  30. data/spec/fixtures/nginx_reverse_proxy.conf +22 -0
  31. data/spec/fixtures/scripts/painless/scripted_update.painless +2 -0
  32. data/spec/fixtures/scripts/painless/scripted_update_nested.painless +1 -0
  33. data/spec/fixtures/scripts/painless/scripted_upsert.painless +1 -0
  34. data/spec/integration/outputs/compressed_indexing_spec.rb +76 -0
  35. data/spec/integration/outputs/create_spec.rb +76 -0
  36. data/spec/integration/outputs/delete_spec.rb +72 -0
  37. data/spec/integration/outputs/index_spec.rb +164 -0
  38. data/spec/integration/outputs/index_version_spec.rb +110 -0
  39. data/spec/integration/outputs/ingest_pipeline_spec.rb +82 -0
  40. data/spec/integration/outputs/metrics_spec.rb +75 -0
  41. data/spec/integration/outputs/no_opensearch_on_startup_spec.rb +67 -0
  42. data/spec/integration/outputs/painless_update_spec.rb +147 -0
  43. data/spec/integration/outputs/parent_spec.rb +103 -0
  44. data/spec/integration/outputs/retry_spec.rb +182 -0
  45. data/spec/integration/outputs/routing_spec.rb +70 -0
  46. data/spec/integration/outputs/sniffer_spec.rb +70 -0
  47. data/spec/integration/outputs/templates_spec.rb +105 -0
  48. data/spec/integration/outputs/update_spec.rb +123 -0
  49. data/spec/opensearch_spec_helper.rb +141 -0
  50. data/spec/spec_helper.rb +19 -0
  51. data/spec/unit/http_client_builder_spec.rb +194 -0
  52. data/spec/unit/outputs/error_whitelist_spec.rb +62 -0
  53. data/spec/unit/outputs/opensearch/http_client/manticore_adapter_spec.rb +159 -0
  54. data/spec/unit/outputs/opensearch/http_client/pool_spec.rb +306 -0
  55. data/spec/unit/outputs/opensearch/http_client_spec.rb +292 -0
  56. data/spec/unit/outputs/opensearch/template_manager_spec.rb +36 -0
  57. data/spec/unit/outputs/opensearch_proxy_spec.rb +112 -0
  58. data/spec/unit/outputs/opensearch_spec.rb +800 -0
  59. data/spec/unit/outputs/opensearch_ssl_spec.rb +179 -0
  60. metadata +289 -0
  61. metadata.gz.sig +0 -0
@@ -0,0 +1,294 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ #
3
+ # The OpenSearch Contributors require contributions made to
4
+ # this file be licensed under the Apache-2.0 license or a
5
+ # compatible open source license.
6
+ #
7
+ # Modifications Copyright OpenSearch Contributors. See
8
+ # GitHub history for details.
9
+
10
+ require "logstash/outputs/opensearch/template_manager"
11
+
12
+ module LogStash; module PluginMixins; module OpenSearch
13
+ module Common
14
+
15
+ # This module defines common methods that can be reused by alternate opensearch output plugins.
16
+
17
+ attr_reader :hosts
18
+
19
+ # These codes apply to documents, not at the request level
20
+ DOC_DLQ_CODES = [400, 404]
21
+ DOC_SUCCESS_CODES = [200, 201]
22
+ DOC_CONFLICT_CODE = 409
23
+
24
+ # Perform some OpenSearch options validations and Build the HttpClient.
25
+ # Note that this methods may sets the @user, @password, @hosts and @client ivars as a side effect.
26
+ # @return [HttpClient] the new http client
27
+ def build_client(distribution_checker=nil)
28
+ params["distribution_checker"] = distribution_checker
29
+ # the following 3 options validation & setup methods are called inside build_client
30
+ # because they must be executed prior to building the client and logstash
31
+ # monitoring and management rely on directly calling build_client
32
+ setup_hosts
33
+
34
+ params["metric"] = metric
35
+ if @proxy.eql?('')
36
+ @logger.warn "Supplied proxy setting (proxy => '') has no effect"
37
+ end
38
+ ::LogStash::Outputs::OpenSearch::HttpClientBuilder.build(@logger, @hosts, params)
39
+ end
40
+
41
+ def setup_hosts
42
+ @hosts = Array(@hosts)
43
+ if @hosts.empty?
44
+ @logger.info("No 'host' set in opensearch output. Defaulting to localhost")
45
+ @hosts.replace(["localhost"])
46
+ end
47
+ end
48
+
49
+ def hosts_default?(hosts)
50
+ # NOTE: would be nice if pipeline allowed us a clean way to detect a config default :
51
+ hosts.is_a?(Array) && hosts.size == 1 && hosts.first.equal?(LogStash::PluginMixins::OpenSearch::APIConfigs::DEFAULT_HOST)
52
+ end
53
+ private :hosts_default?
54
+
55
+
56
+ # Plugin initialization extension point (after a successful OpenSearch connection).
57
+ def finish_register
58
+ end
59
+ protected :finish_register
60
+
61
+ def last_version
62
+ client.last_version
63
+ end
64
+
65
+ def maximum_seen_major_version
66
+ client.maximum_seen_major_version
67
+ end
68
+
69
+ def successful_connection?
70
+ !!maximum_seen_major_version
71
+ end
72
+
73
+ # launch a thread that waits for an initial successful connection to the OpenSearch cluster to call the given block
74
+ # @param block [Proc] the block to execute upon initial successful connection
75
+ # @return [Thread] the successful connection wait thread
76
+ def after_successful_connection(&block)
77
+ Thread.new do
78
+ sleep_interval = @retry_initial_interval
79
+ until successful_connection? || @stopping.true?
80
+ @logger.debug("Waiting for connectivity to OpenSearch cluster, retrying in #{sleep_interval}s")
81
+ sleep_interval = sleep_for_interval(sleep_interval)
82
+ end
83
+ block.call if successful_connection?
84
+ end
85
+ end
86
+ private :after_successful_connection
87
+
88
+ def discover_cluster_uuid
89
+ return unless defined?(plugin_metadata)
90
+ cluster_info = client.get('/')
91
+ plugin_metadata.set(:cluster_uuid, cluster_info['cluster_uuid'])
92
+ rescue => e
93
+ @logger.error("Unable to retrieve OpenSearch cluster uuid", message: e.message, exception: e.class, backtrace: e.backtrace)
94
+ end
95
+
96
+ def retrying_submit(actions)
97
+ # Initially we submit the full list of actions
98
+ submit_actions = actions
99
+
100
+ sleep_interval = @retry_initial_interval
101
+
102
+ while submit_actions && submit_actions.length > 0
103
+
104
+ # We retry with whatever is didn't succeed
105
+ begin
106
+ submit_actions = submit(submit_actions)
107
+ if submit_actions && submit_actions.size > 0
108
+ @logger.info("Retrying individual bulk actions that failed or were rejected by the previous bulk request", count: submit_actions.size)
109
+ end
110
+ rescue => e
111
+ @logger.error("Encountered an unexpected error submitting a bulk request, will retry",
112
+ message: e.message, exception: e.class, backtrace: e.backtrace)
113
+ end
114
+
115
+ # Everything was a success!
116
+ break if !submit_actions || submit_actions.empty?
117
+
118
+ # If we're retrying the action sleep for the recommended interval
119
+ # Double the interval for the next time through to achieve exponential backoff
120
+ sleep_interval = sleep_for_interval(sleep_interval)
121
+ end
122
+ end
123
+
124
+ def sleep_for_interval(sleep_interval)
125
+ stoppable_sleep(sleep_interval)
126
+ next_sleep_interval(sleep_interval)
127
+ end
128
+
129
+ def stoppable_sleep(interval)
130
+ Stud.stoppable_sleep(interval) { @stopping.true? }
131
+ end
132
+
133
+ def next_sleep_interval(current_interval)
134
+ doubled = current_interval * 2
135
+ doubled > @retry_max_interval ? @retry_max_interval : doubled
136
+ end
137
+
138
+ def handle_dlq_status(message, action, status, response)
139
+ # To support bwc, we check if DLQ exists. otherwise we log and drop event (previous behavior)
140
+ if @dlq_writer
141
+ event, action = action.event, [action[0], action[1], action[2]]
142
+ # TODO: Change this to send a map with { :status => status, :action => action } in the future
143
+ @dlq_writer.write(event, "#{message} status: #{status}, action: #{action}, response: #{response}")
144
+ else
145
+ if dig_value(response, 'index', 'error', 'type') == 'invalid_index_name_exception'
146
+ level = :error
147
+ else
148
+ level = :warn
149
+ end
150
+ @logger.send level, message, status: status, action: action, response: response
151
+ end
152
+ end
153
+
154
+ private
155
+
156
+ def submit(actions)
157
+ bulk_response = safe_bulk(actions)
158
+
159
+ # If the response is nil that means we were in a retry loop
160
+ # and aborted since we're shutting down
161
+ return if bulk_response.nil?
162
+
163
+ # If it did return and there are no errors we're good as well
164
+ if bulk_response["errors"]
165
+ @bulk_request_metrics.increment(:with_errors)
166
+ else
167
+ @bulk_request_metrics.increment(:successes)
168
+ @document_level_metrics.increment(:successes, actions.size)
169
+ return
170
+ end
171
+
172
+ responses = bulk_response["items"]
173
+ if responses.size != actions.size # can not map action -> response reliably
174
+ # an ES bug (on 7.10.2, 7.11.1) where a _bulk request to index X documents would return Y (> X) items
175
+ msg = "Sent #{actions.size} documents but OpenSearch returned #{responses.size} responses"
176
+ @logger.warn(msg, actions: actions, responses: responses)
177
+ fail("#{msg} (likely a bug with _bulk endpoint)")
178
+ end
179
+
180
+ actions_to_retry = []
181
+ responses.each_with_index do |response,idx|
182
+ action_type, action_props = response.first
183
+
184
+ status = action_props["status"]
185
+ error = action_props["error"]
186
+ action = actions[idx]
187
+ action_params = action[1]
188
+
189
+ # Retry logic: If it is success, we move on. If it is a failure, we have 3 paths:
190
+ # - For 409, we log and drop. there is nothing we can do
191
+ # - For a mapping error, we send to dead letter queue for a human to intervene at a later point.
192
+ # - For everything else there's mastercard. Yep, and we retry indefinitely. This should fix #572 and other transient network issues
193
+ if DOC_SUCCESS_CODES.include?(status)
194
+ @document_level_metrics.increment(:successes)
195
+ next
196
+ elsif DOC_CONFLICT_CODE == status
197
+ @document_level_metrics.increment(:non_retryable_failures)
198
+ @logger.warn "Failed action", status: status, action: action, response: response if log_failure_type?(error)
199
+ next
200
+ elsif DOC_DLQ_CODES.include?(status)
201
+ handle_dlq_status("Could not index event to OpenSearch.", action, status, response)
202
+ @document_level_metrics.increment(:non_retryable_failures)
203
+ next
204
+ else
205
+ # only log what the user whitelisted
206
+ @document_level_metrics.increment(:retryable_failures)
207
+ @logger.info "Retrying failed action", status: status, action: action, error: error if log_failure_type?(error)
208
+ actions_to_retry << action
209
+ end
210
+ end
211
+
212
+ actions_to_retry
213
+ end
214
+
215
+ def log_failure_type?(failure)
216
+ !failure_type_logging_whitelist.include?(failure["type"])
217
+ end
218
+
219
+ # Rescue retryable errors during bulk submission
220
+ # @param actions a [action, params, event.to_hash] tuple
221
+ # @return response [Hash] which contains 'errors' and processed 'items' entries
222
+ def safe_bulk(actions)
223
+ sleep_interval = @retry_initial_interval
224
+ begin
225
+ @client.bulk(actions) # returns { 'errors': ..., 'items': ... }
226
+ rescue ::LogStash::Outputs::OpenSearch::HttpClient::Pool::HostUnreachableError => e
227
+ # If we can't even connect to the server let's just print out the URL (:hosts is actually a URL)
228
+ # and let the user sort it out from there
229
+ @logger.error(
230
+ "Attempted to send a bulk request but OpenSearch appears to be unreachable or down",
231
+ message: e.message, exception: e.class, will_retry_in_seconds: sleep_interval
232
+ )
233
+ @logger.debug? && @logger.debug("Failed actions for last bad bulk request", :actions => actions)
234
+
235
+ # We retry until there are no errors! Errors should all go to the retry queue
236
+ sleep_interval = sleep_for_interval(sleep_interval)
237
+ @bulk_request_metrics.increment(:failures)
238
+ retry unless @stopping.true?
239
+ rescue ::LogStash::Outputs::OpenSearch::HttpClient::Pool::NoConnectionAvailableError => e
240
+ @logger.error(
241
+ "Attempted to send a bulk request but there are no living connections in the pool " +
242
+ "(perhaps OpenSearch is unreachable or down?)",
243
+ message: e.message, exception: e.class, will_retry_in_seconds: sleep_interval
244
+ )
245
+
246
+ sleep_interval = sleep_for_interval(sleep_interval)
247
+ @bulk_request_metrics.increment(:failures)
248
+ retry unless @stopping.true?
249
+ rescue ::LogStash::Outputs::OpenSearch::HttpClient::Pool::BadResponseCodeError => e
250
+ @bulk_request_metrics.increment(:failures)
251
+ log_hash = {:code => e.response_code, :url => e.url.sanitized.to_s, :content_length => e.request_body.bytesize}
252
+ log_hash[:body] = e.response_body if @logger.debug? # Generally this is too verbose
253
+ message = "Encountered a retryable error (will retry with exponential backoff)"
254
+
255
+ # We treat 429s as a special case because these really aren't errors, but
256
+ # rather just OpenSearch telling us to back off a bit, which we do.
257
+ # The other retryable code is 503, which are true errors
258
+ # Even though we retry the user should be made aware of these
259
+ if e.response_code == 429
260
+ logger.debug(message, log_hash)
261
+ else
262
+ logger.error(message, log_hash)
263
+ end
264
+
265
+ sleep_interval = sleep_for_interval(sleep_interval)
266
+ retry
267
+ rescue => e # Stuff that should never happen - print out full connection issues
268
+ @logger.error(
269
+ "An unknown error occurred sending a bulk request to OpenSearch (will retry indefinitely)",
270
+ message: e.message, exception: e.class, backtrace: e.backtrace
271
+ )
272
+ @logger.debug? && @logger.debug("Failed actions for last bad bulk request", :actions => actions)
273
+
274
+ sleep_interval = sleep_for_interval(sleep_interval)
275
+ @bulk_request_metrics.increment(:failures)
276
+ retry unless @stopping.true?
277
+ end
278
+ end
279
+
280
+ def dlq_enabled?
281
+ # TODO there should be a better way to query if DLQ is enabled
282
+ # See more in: https://github.com/elastic/logstash/issues/8064
283
+ respond_to?(:execution_context) && execution_context.respond_to?(:dlq_writer) &&
284
+ !execution_context.dlq_writer.inner_writer.is_a?(::LogStash::Util::DummyDeadLetterQueueWriter)
285
+ end
286
+
287
+ def dig_value(val, first_key, *rest_keys)
288
+ fail(TypeError, "cannot dig value from #{val.class}") unless val.kind_of?(Hash)
289
+ val = val[first_key]
290
+ return val if rest_keys.empty? || val == nil
291
+ dig_value(val, *rest_keys)
292
+ end
293
+ end
294
+ end; end; end
@@ -0,0 +1,18 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ #
3
+ # The OpenSearch Contributors require contributions made to
4
+ # this file be licensed under the Apache-2.0 license or a
5
+ # compatible open source license.
6
+ #
7
+ # Modifications Copyright OpenSearch Contributors. See
8
+ # GitHub history for details.
9
+
10
+ module LogStash; module PluginMixins; module OpenSearch
11
+ class NoopDistributionChecker
12
+ INSTANCE = self.new
13
+
14
+ def is_supported?(pool, url, major_version)
15
+ true
16
+ end
17
+ end
18
+ end; end; end
@@ -0,0 +1,40 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-output-opensearch'
3
+ s.version = '1.0.0'
4
+
5
+ s.licenses = ['Apache-2.0']
6
+ s.summary = "Stores logs in OpenSearch"
7
+ s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gem. This gem is not a stand-alone program"
8
+ s.authors = ["Elastic", "OpenSearch Contributors"]
9
+ s.email = 'opensearch@amazon.com'
10
+ s.homepage = "https://opensearch.org/"
11
+ s.require_paths = ["lib"]
12
+
13
+ s.platform = RUBY_PLATFORM
14
+
15
+ # Files
16
+ s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","Gemfile","LICENSE","NOTICE", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
17
+
18
+ # Tests
19
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
20
+
21
+ # Special flag to let us know this is actually a logstash plugin
22
+ s.metadata = {
23
+ "logstash_plugin" => "true",
24
+ "logstash_group" => "output",
25
+ "source_code_uri" => "https://github.com/opensearch-project/logstash-output-opensearch"
26
+ }
27
+
28
+ s.cert_chain = ['public.pem']
29
+ s.signing_key = File.expand_path("private.pem") if $0 =~ /gem\z/
30
+
31
+ s.add_runtime_dependency "manticore", '>= 0.5.4', '< 1.0.0'
32
+ s.add_runtime_dependency 'stud', ['>= 0.0.17', '~> 0.0']
33
+ s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
34
+ s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~>1.0'
35
+
36
+ s.add_development_dependency 'logstash-codec-plain'
37
+ s.add_development_dependency 'logstash-devutils'
38
+ s.add_development_dependency 'flores'
39
+ s.add_development_dependency 'cabin', ['~> 0.6']
40
+ end
@@ -0,0 +1,74 @@
1
+ {
2
+ "_nodes" : {
3
+ "total" : 3,
4
+ "successful" : 3,
5
+ "failed" : 0
6
+ },
7
+ "cluster_name" : "opensearch",
8
+ "nodes" : {
9
+ "kVPTh7ZvSgWmTRMy-4YExQ" : {
10
+ "name" : "kVPTh7Z",
11
+ "transport_address" : "127.0.0.1:9300",
12
+ "host" : "dev-master",
13
+ "ip" : "127.0.0.1",
14
+ "version" : "7.0.0",
15
+ "build_flavor" : "oss",
16
+ "build_type" : "tar",
17
+ "build_hash" : "b0e7036",
18
+ "roles" : [
19
+ "master"
20
+ ],
21
+ "http" : {
22
+ "bound_address" : [
23
+ "127.0.0.1:9200",
24
+ "[::1]:9200"
25
+ ],
26
+ "publish_address" : "dev-master/127.0.0.1:9200",
27
+ "max_content_length_in_bytes" : 104857600
28
+ }
29
+ },
30
+ "J47OFlfpSHGFwRJSF2hbcg" : {
31
+ "name" : "J47OFlf",
32
+ "transport_address" : "127.0.0.1:9301",
33
+ "host" : "dev-masterdata",
34
+ "ip" : "127.0.0.1",
35
+ "version" : "7.0.0",
36
+ "build_flavor" : "oss",
37
+ "build_type" : "tar",
38
+ "build_hash" : "b0e7036",
39
+ "roles" : [
40
+ "master",
41
+ "data"
42
+ ],
43
+ "http" : {
44
+ "bound_address" : [
45
+ "127.0.0.1:9201",
46
+ "[::1]:9201"
47
+ ],
48
+ "publish_address" : "dev-masterdata/127.0.0.1:9201",
49
+ "max_content_length_in_bytes" : 104857600
50
+ }
51
+ },
52
+ "pDYE99f0QmutVb8gvsf-yw" : {
53
+ "name" : "pDYE99f",
54
+ "transport_address" : "127.0.0.1:9302",
55
+ "host" : "dev-data",
56
+ "ip" : "127.0.0.1",
57
+ "version" : "7.0.0",
58
+ "build_flavor" : "oss",
59
+ "build_type" : "tar",
60
+ "build_hash" : "b0e7036",
61
+ "roles" : [
62
+ "data"
63
+ ],
64
+ "http" : {
65
+ "bound_address" : [
66
+ "127.0.0.1:9202",
67
+ "[::1]:9202"
68
+ ],
69
+ "publish_address" : "dev-data/127.0.0.1:9202",
70
+ "max_content_length_in_bytes" : 104857600
71
+ }
72
+ }
73
+ }
74
+ }
@@ -0,0 +1,2 @@
1
+ fancyuser:$apr1$Eq3/Qh40$MRzg6mccKUVmx8HJvlqkK1
2
+ simpleuser:$apr1$hQQ4QWmo$ECyA1DFO3iCRs07zVXqAq1
@@ -0,0 +1,22 @@
1
+ worker_processes 1;
2
+ daemon off; # run in foreground
3
+
4
+ events {
5
+ worker_connections 1024;
6
+ }
7
+
8
+ http {
9
+ server {
10
+ listen 9900 default_server;
11
+ ssl on;
12
+ ssl_certificate server.crt;
13
+ ssl_certificate_key server.key;
14
+ client_max_body_size 200m;
15
+
16
+ location / {
17
+ proxy_pass http://localhost:9200;
18
+ auth_basic "Restricted Content";
19
+ auth_basic_user_file htpasswd;
20
+ }
21
+ }
22
+ }