logstash-output-opensearch 1.0.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/ADMINS.md +29 -0
  5. data/CODE_OF_CONDUCT.md +25 -0
  6. data/CONTRIBUTING.md +99 -0
  7. data/DEVELOPER_GUIDE.md +208 -0
  8. data/Gemfile +20 -0
  9. data/LICENSE +202 -0
  10. data/MAINTAINERS.md +71 -0
  11. data/NOTICE +2 -0
  12. data/README.md +37 -0
  13. data/RELEASING.md +36 -0
  14. data/SECURITY.md +3 -0
  15. data/lib/logstash/outputs/opensearch.rb +449 -0
  16. data/lib/logstash/outputs/opensearch/distribution_checker.rb +44 -0
  17. data/lib/logstash/outputs/opensearch/http_client.rb +465 -0
  18. data/lib/logstash/outputs/opensearch/http_client/manticore_adapter.rb +140 -0
  19. data/lib/logstash/outputs/opensearch/http_client/pool.rb +467 -0
  20. data/lib/logstash/outputs/opensearch/http_client_builder.rb +182 -0
  21. data/lib/logstash/outputs/opensearch/template_manager.rb +60 -0
  22. data/lib/logstash/outputs/opensearch/templates/ecs-disabled/1x.json +44 -0
  23. data/lib/logstash/outputs/opensearch/templates/ecs-disabled/7x.json +44 -0
  24. data/lib/logstash/plugin_mixins/opensearch/api_configs.rb +168 -0
  25. data/lib/logstash/plugin_mixins/opensearch/common.rb +294 -0
  26. data/lib/logstash/plugin_mixins/opensearch/noop_distribution_checker.rb +18 -0
  27. data/logstash-output-opensearch.gemspec +40 -0
  28. data/spec/fixtures/_nodes/nodes.json +74 -0
  29. data/spec/fixtures/htpasswd +2 -0
  30. data/spec/fixtures/nginx_reverse_proxy.conf +22 -0
  31. data/spec/fixtures/scripts/painless/scripted_update.painless +2 -0
  32. data/spec/fixtures/scripts/painless/scripted_update_nested.painless +1 -0
  33. data/spec/fixtures/scripts/painless/scripted_upsert.painless +1 -0
  34. data/spec/integration/outputs/compressed_indexing_spec.rb +76 -0
  35. data/spec/integration/outputs/create_spec.rb +76 -0
  36. data/spec/integration/outputs/delete_spec.rb +72 -0
  37. data/spec/integration/outputs/index_spec.rb +164 -0
  38. data/spec/integration/outputs/index_version_spec.rb +110 -0
  39. data/spec/integration/outputs/ingest_pipeline_spec.rb +82 -0
  40. data/spec/integration/outputs/metrics_spec.rb +75 -0
  41. data/spec/integration/outputs/no_opensearch_on_startup_spec.rb +67 -0
  42. data/spec/integration/outputs/painless_update_spec.rb +147 -0
  43. data/spec/integration/outputs/parent_spec.rb +103 -0
  44. data/spec/integration/outputs/retry_spec.rb +182 -0
  45. data/spec/integration/outputs/routing_spec.rb +70 -0
  46. data/spec/integration/outputs/sniffer_spec.rb +70 -0
  47. data/spec/integration/outputs/templates_spec.rb +105 -0
  48. data/spec/integration/outputs/update_spec.rb +123 -0
  49. data/spec/opensearch_spec_helper.rb +141 -0
  50. data/spec/spec_helper.rb +19 -0
  51. data/spec/unit/http_client_builder_spec.rb +194 -0
  52. data/spec/unit/outputs/error_whitelist_spec.rb +62 -0
  53. data/spec/unit/outputs/opensearch/http_client/manticore_adapter_spec.rb +159 -0
  54. data/spec/unit/outputs/opensearch/http_client/pool_spec.rb +306 -0
  55. data/spec/unit/outputs/opensearch/http_client_spec.rb +292 -0
  56. data/spec/unit/outputs/opensearch/template_manager_spec.rb +36 -0
  57. data/spec/unit/outputs/opensearch_proxy_spec.rb +112 -0
  58. data/spec/unit/outputs/opensearch_spec.rb +800 -0
  59. data/spec/unit/outputs/opensearch_ssl_spec.rb +179 -0
  60. metadata +289 -0
  61. metadata.gz.sig +0 -0
@@ -0,0 +1,294 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ #
3
+ # The OpenSearch Contributors require contributions made to
4
+ # this file be licensed under the Apache-2.0 license or a
5
+ # compatible open source license.
6
+ #
7
+ # Modifications Copyright OpenSearch Contributors. See
8
+ # GitHub history for details.
9
+
10
+ require "logstash/outputs/opensearch/template_manager"
11
+
12
+ module LogStash; module PluginMixins; module OpenSearch
13
+ module Common
14
+
15
+ # This module defines common methods that can be reused by alternate opensearch output plugins.
16
+
17
+ attr_reader :hosts
18
+
19
+ # These codes apply to documents, not at the request level
20
+ DOC_DLQ_CODES = [400, 404]
21
+ DOC_SUCCESS_CODES = [200, 201]
22
+ DOC_CONFLICT_CODE = 409
23
+
24
+ # Perform some OpenSearch options validations and Build the HttpClient.
25
+ # Note that this methods may sets the @user, @password, @hosts and @client ivars as a side effect.
26
+ # @return [HttpClient] the new http client
27
+ def build_client(distribution_checker=nil)
28
+ params["distribution_checker"] = distribution_checker
29
+ # the following 3 options validation & setup methods are called inside build_client
30
+ # because they must be executed prior to building the client and logstash
31
+ # monitoring and management rely on directly calling build_client
32
+ setup_hosts
33
+
34
+ params["metric"] = metric
35
+ if @proxy.eql?('')
36
+ @logger.warn "Supplied proxy setting (proxy => '') has no effect"
37
+ end
38
+ ::LogStash::Outputs::OpenSearch::HttpClientBuilder.build(@logger, @hosts, params)
39
+ end
40
+
41
+ def setup_hosts
42
+ @hosts = Array(@hosts)
43
+ if @hosts.empty?
44
+ @logger.info("No 'host' set in opensearch output. Defaulting to localhost")
45
+ @hosts.replace(["localhost"])
46
+ end
47
+ end
48
+
49
+ def hosts_default?(hosts)
50
+ # NOTE: would be nice if pipeline allowed us a clean way to detect a config default :
51
+ hosts.is_a?(Array) && hosts.size == 1 && hosts.first.equal?(LogStash::PluginMixins::OpenSearch::APIConfigs::DEFAULT_HOST)
52
+ end
53
+ private :hosts_default?
54
+
55
+
56
+ # Plugin initialization extension point (after a successful OpenSearch connection).
57
+ def finish_register
58
+ end
59
+ protected :finish_register
60
+
61
+ def last_version
62
+ client.last_version
63
+ end
64
+
65
+ def maximum_seen_major_version
66
+ client.maximum_seen_major_version
67
+ end
68
+
69
+ def successful_connection?
70
+ !!maximum_seen_major_version
71
+ end
72
+
73
+ # launch a thread that waits for an initial successful connection to the OpenSearch cluster to call the given block
74
+ # @param block [Proc] the block to execute upon initial successful connection
75
+ # @return [Thread] the successful connection wait thread
76
+ def after_successful_connection(&block)
77
+ Thread.new do
78
+ sleep_interval = @retry_initial_interval
79
+ until successful_connection? || @stopping.true?
80
+ @logger.debug("Waiting for connectivity to OpenSearch cluster, retrying in #{sleep_interval}s")
81
+ sleep_interval = sleep_for_interval(sleep_interval)
82
+ end
83
+ block.call if successful_connection?
84
+ end
85
+ end
86
+ private :after_successful_connection
87
+
88
+ def discover_cluster_uuid
89
+ return unless defined?(plugin_metadata)
90
+ cluster_info = client.get('/')
91
+ plugin_metadata.set(:cluster_uuid, cluster_info['cluster_uuid'])
92
+ rescue => e
93
+ @logger.error("Unable to retrieve OpenSearch cluster uuid", message: e.message, exception: e.class, backtrace: e.backtrace)
94
+ end
95
+
96
+ def retrying_submit(actions)
97
+ # Initially we submit the full list of actions
98
+ submit_actions = actions
99
+
100
+ sleep_interval = @retry_initial_interval
101
+
102
+ while submit_actions && submit_actions.length > 0
103
+
104
+ # We retry with whatever is didn't succeed
105
+ begin
106
+ submit_actions = submit(submit_actions)
107
+ if submit_actions && submit_actions.size > 0
108
+ @logger.info("Retrying individual bulk actions that failed or were rejected by the previous bulk request", count: submit_actions.size)
109
+ end
110
+ rescue => e
111
+ @logger.error("Encountered an unexpected error submitting a bulk request, will retry",
112
+ message: e.message, exception: e.class, backtrace: e.backtrace)
113
+ end
114
+
115
+ # Everything was a success!
116
+ break if !submit_actions || submit_actions.empty?
117
+
118
+ # If we're retrying the action sleep for the recommended interval
119
+ # Double the interval for the next time through to achieve exponential backoff
120
+ sleep_interval = sleep_for_interval(sleep_interval)
121
+ end
122
+ end
123
+
124
+ def sleep_for_interval(sleep_interval)
125
+ stoppable_sleep(sleep_interval)
126
+ next_sleep_interval(sleep_interval)
127
+ end
128
+
129
+ def stoppable_sleep(interval)
130
+ Stud.stoppable_sleep(interval) { @stopping.true? }
131
+ end
132
+
133
+ def next_sleep_interval(current_interval)
134
+ doubled = current_interval * 2
135
+ doubled > @retry_max_interval ? @retry_max_interval : doubled
136
+ end
137
+
138
+ def handle_dlq_status(message, action, status, response)
139
+ # To support bwc, we check if DLQ exists. otherwise we log and drop event (previous behavior)
140
+ if @dlq_writer
141
+ event, action = action.event, [action[0], action[1], action[2]]
142
+ # TODO: Change this to send a map with { :status => status, :action => action } in the future
143
+ @dlq_writer.write(event, "#{message} status: #{status}, action: #{action}, response: #{response}")
144
+ else
145
+ if dig_value(response, 'index', 'error', 'type') == 'invalid_index_name_exception'
146
+ level = :error
147
+ else
148
+ level = :warn
149
+ end
150
+ @logger.send level, message, status: status, action: action, response: response
151
+ end
152
+ end
153
+
154
+ private
155
+
156
+ def submit(actions)
157
+ bulk_response = safe_bulk(actions)
158
+
159
+ # If the response is nil that means we were in a retry loop
160
+ # and aborted since we're shutting down
161
+ return if bulk_response.nil?
162
+
163
+ # If it did return and there are no errors we're good as well
164
+ if bulk_response["errors"]
165
+ @bulk_request_metrics.increment(:with_errors)
166
+ else
167
+ @bulk_request_metrics.increment(:successes)
168
+ @document_level_metrics.increment(:successes, actions.size)
169
+ return
170
+ end
171
+
172
+ responses = bulk_response["items"]
173
+ if responses.size != actions.size # can not map action -> response reliably
174
+ # an ES bug (on 7.10.2, 7.11.1) where a _bulk request to index X documents would return Y (> X) items
175
+ msg = "Sent #{actions.size} documents but OpenSearch returned #{responses.size} responses"
176
+ @logger.warn(msg, actions: actions, responses: responses)
177
+ fail("#{msg} (likely a bug with _bulk endpoint)")
178
+ end
179
+
180
+ actions_to_retry = []
181
+ responses.each_with_index do |response,idx|
182
+ action_type, action_props = response.first
183
+
184
+ status = action_props["status"]
185
+ error = action_props["error"]
186
+ action = actions[idx]
187
+ action_params = action[1]
188
+
189
+ # Retry logic: If it is success, we move on. If it is a failure, we have 3 paths:
190
+ # - For 409, we log and drop. there is nothing we can do
191
+ # - For a mapping error, we send to dead letter queue for a human to intervene at a later point.
192
+ # - For everything else there's mastercard. Yep, and we retry indefinitely. This should fix #572 and other transient network issues
193
+ if DOC_SUCCESS_CODES.include?(status)
194
+ @document_level_metrics.increment(:successes)
195
+ next
196
+ elsif DOC_CONFLICT_CODE == status
197
+ @document_level_metrics.increment(:non_retryable_failures)
198
+ @logger.warn "Failed action", status: status, action: action, response: response if log_failure_type?(error)
199
+ next
200
+ elsif DOC_DLQ_CODES.include?(status)
201
+ handle_dlq_status("Could not index event to OpenSearch.", action, status, response)
202
+ @document_level_metrics.increment(:non_retryable_failures)
203
+ next
204
+ else
205
+ # only log what the user whitelisted
206
+ @document_level_metrics.increment(:retryable_failures)
207
+ @logger.info "Retrying failed action", status: status, action: action, error: error if log_failure_type?(error)
208
+ actions_to_retry << action
209
+ end
210
+ end
211
+
212
+ actions_to_retry
213
+ end
214
+
215
+ def log_failure_type?(failure)
216
+ !failure_type_logging_whitelist.include?(failure["type"])
217
+ end
218
+
219
+ # Rescue retryable errors during bulk submission
220
+ # @param actions a [action, params, event.to_hash] tuple
221
+ # @return response [Hash] which contains 'errors' and processed 'items' entries
222
+ def safe_bulk(actions)
223
+ sleep_interval = @retry_initial_interval
224
+ begin
225
+ @client.bulk(actions) # returns { 'errors': ..., 'items': ... }
226
+ rescue ::LogStash::Outputs::OpenSearch::HttpClient::Pool::HostUnreachableError => e
227
+ # If we can't even connect to the server let's just print out the URL (:hosts is actually a URL)
228
+ # and let the user sort it out from there
229
+ @logger.error(
230
+ "Attempted to send a bulk request but OpenSearch appears to be unreachable or down",
231
+ message: e.message, exception: e.class, will_retry_in_seconds: sleep_interval
232
+ )
233
+ @logger.debug? && @logger.debug("Failed actions for last bad bulk request", :actions => actions)
234
+
235
+ # We retry until there are no errors! Errors should all go to the retry queue
236
+ sleep_interval = sleep_for_interval(sleep_interval)
237
+ @bulk_request_metrics.increment(:failures)
238
+ retry unless @stopping.true?
239
+ rescue ::LogStash::Outputs::OpenSearch::HttpClient::Pool::NoConnectionAvailableError => e
240
+ @logger.error(
241
+ "Attempted to send a bulk request but there are no living connections in the pool " +
242
+ "(perhaps OpenSearch is unreachable or down?)",
243
+ message: e.message, exception: e.class, will_retry_in_seconds: sleep_interval
244
+ )
245
+
246
+ sleep_interval = sleep_for_interval(sleep_interval)
247
+ @bulk_request_metrics.increment(:failures)
248
+ retry unless @stopping.true?
249
+ rescue ::LogStash::Outputs::OpenSearch::HttpClient::Pool::BadResponseCodeError => e
250
+ @bulk_request_metrics.increment(:failures)
251
+ log_hash = {:code => e.response_code, :url => e.url.sanitized.to_s, :content_length => e.request_body.bytesize}
252
+ log_hash[:body] = e.response_body if @logger.debug? # Generally this is too verbose
253
+ message = "Encountered a retryable error (will retry with exponential backoff)"
254
+
255
+ # We treat 429s as a special case because these really aren't errors, but
256
+ # rather just OpenSearch telling us to back off a bit, which we do.
257
+ # The other retryable code is 503, which are true errors
258
+ # Even though we retry the user should be made aware of these
259
+ if e.response_code == 429
260
+ logger.debug(message, log_hash)
261
+ else
262
+ logger.error(message, log_hash)
263
+ end
264
+
265
+ sleep_interval = sleep_for_interval(sleep_interval)
266
+ retry
267
+ rescue => e # Stuff that should never happen - print out full connection issues
268
+ @logger.error(
269
+ "An unknown error occurred sending a bulk request to OpenSearch (will retry indefinitely)",
270
+ message: e.message, exception: e.class, backtrace: e.backtrace
271
+ )
272
+ @logger.debug? && @logger.debug("Failed actions for last bad bulk request", :actions => actions)
273
+
274
+ sleep_interval = sleep_for_interval(sleep_interval)
275
+ @bulk_request_metrics.increment(:failures)
276
+ retry unless @stopping.true?
277
+ end
278
+ end
279
+
280
+ def dlq_enabled?
281
+ # TODO there should be a better way to query if DLQ is enabled
282
+ # See more in: https://github.com/elastic/logstash/issues/8064
283
+ respond_to?(:execution_context) && execution_context.respond_to?(:dlq_writer) &&
284
+ !execution_context.dlq_writer.inner_writer.is_a?(::LogStash::Util::DummyDeadLetterQueueWriter)
285
+ end
286
+
287
+ def dig_value(val, first_key, *rest_keys)
288
+ fail(TypeError, "cannot dig value from #{val.class}") unless val.kind_of?(Hash)
289
+ val = val[first_key]
290
+ return val if rest_keys.empty? || val == nil
291
+ dig_value(val, *rest_keys)
292
+ end
293
+ end
294
+ end; end; end
@@ -0,0 +1,18 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ #
3
+ # The OpenSearch Contributors require contributions made to
4
+ # this file be licensed under the Apache-2.0 license or a
5
+ # compatible open source license.
6
+ #
7
+ # Modifications Copyright OpenSearch Contributors. See
8
+ # GitHub history for details.
9
+
10
+ module LogStash; module PluginMixins; module OpenSearch
11
+ class NoopDistributionChecker
12
+ INSTANCE = self.new
13
+
14
+ def is_supported?(pool, url, major_version)
15
+ true
16
+ end
17
+ end
18
+ end; end; end
@@ -0,0 +1,40 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-output-opensearch'
3
+ s.version = '1.0.0'
4
+
5
+ s.licenses = ['Apache-2.0']
6
+ s.summary = "Stores logs in OpenSearch"
7
+ s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gem. This gem is not a stand-alone program"
8
+ s.authors = ["Elastic", "OpenSearch Contributors"]
9
+ s.email = 'opensearch@amazon.com'
10
+ s.homepage = "https://opensearch.org/"
11
+ s.require_paths = ["lib"]
12
+
13
+ s.platform = RUBY_PLATFORM
14
+
15
+ # Files
16
+ s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","Gemfile","LICENSE","NOTICE", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
17
+
18
+ # Tests
19
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
20
+
21
+ # Special flag to let us know this is actually a logstash plugin
22
+ s.metadata = {
23
+ "logstash_plugin" => "true",
24
+ "logstash_group" => "output",
25
+ "source_code_uri" => "https://github.com/opensearch-project/logstash-output-opensearch"
26
+ }
27
+
28
+ s.cert_chain = ['public.pem']
29
+ s.signing_key = File.expand_path("private.pem") if $0 =~ /gem\z/
30
+
31
+ s.add_runtime_dependency "manticore", '>= 0.5.4', '< 1.0.0'
32
+ s.add_runtime_dependency 'stud', ['>= 0.0.17', '~> 0.0']
33
+ s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
34
+ s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~>1.0'
35
+
36
+ s.add_development_dependency 'logstash-codec-plain'
37
+ s.add_development_dependency 'logstash-devutils'
38
+ s.add_development_dependency 'flores'
39
+ s.add_development_dependency 'cabin', ['~> 0.6']
40
+ end
@@ -0,0 +1,74 @@
1
+ {
2
+ "_nodes" : {
3
+ "total" : 3,
4
+ "successful" : 3,
5
+ "failed" : 0
6
+ },
7
+ "cluster_name" : "opensearch",
8
+ "nodes" : {
9
+ "kVPTh7ZvSgWmTRMy-4YExQ" : {
10
+ "name" : "kVPTh7Z",
11
+ "transport_address" : "127.0.0.1:9300",
12
+ "host" : "dev-master",
13
+ "ip" : "127.0.0.1",
14
+ "version" : "7.0.0",
15
+ "build_flavor" : "oss",
16
+ "build_type" : "tar",
17
+ "build_hash" : "b0e7036",
18
+ "roles" : [
19
+ "master"
20
+ ],
21
+ "http" : {
22
+ "bound_address" : [
23
+ "127.0.0.1:9200",
24
+ "[::1]:9200"
25
+ ],
26
+ "publish_address" : "dev-master/127.0.0.1:9200",
27
+ "max_content_length_in_bytes" : 104857600
28
+ }
29
+ },
30
+ "J47OFlfpSHGFwRJSF2hbcg" : {
31
+ "name" : "J47OFlf",
32
+ "transport_address" : "127.0.0.1:9301",
33
+ "host" : "dev-masterdata",
34
+ "ip" : "127.0.0.1",
35
+ "version" : "7.0.0",
36
+ "build_flavor" : "oss",
37
+ "build_type" : "tar",
38
+ "build_hash" : "b0e7036",
39
+ "roles" : [
40
+ "master",
41
+ "data"
42
+ ],
43
+ "http" : {
44
+ "bound_address" : [
45
+ "127.0.0.1:9201",
46
+ "[::1]:9201"
47
+ ],
48
+ "publish_address" : "dev-masterdata/127.0.0.1:9201",
49
+ "max_content_length_in_bytes" : 104857600
50
+ }
51
+ },
52
+ "pDYE99f0QmutVb8gvsf-yw" : {
53
+ "name" : "pDYE99f",
54
+ "transport_address" : "127.0.0.1:9302",
55
+ "host" : "dev-data",
56
+ "ip" : "127.0.0.1",
57
+ "version" : "7.0.0",
58
+ "build_flavor" : "oss",
59
+ "build_type" : "tar",
60
+ "build_hash" : "b0e7036",
61
+ "roles" : [
62
+ "data"
63
+ ],
64
+ "http" : {
65
+ "bound_address" : [
66
+ "127.0.0.1:9202",
67
+ "[::1]:9202"
68
+ ],
69
+ "publish_address" : "dev-data/127.0.0.1:9202",
70
+ "max_content_length_in_bytes" : 104857600
71
+ }
72
+ }
73
+ }
74
+ }
@@ -0,0 +1,2 @@
1
+ fancyuser:$apr1$Eq3/Qh40$MRzg6mccKUVmx8HJvlqkK1
2
+ simpleuser:$apr1$hQQ4QWmo$ECyA1DFO3iCRs07zVXqAq1
@@ -0,0 +1,22 @@
1
+ worker_processes 1;
2
+ daemon off; # run in foreground
3
+
4
+ events {
5
+ worker_connections 1024;
6
+ }
7
+
8
+ http {
9
+ server {
10
+ listen 9900 default_server;
11
+ ssl on;
12
+ ssl_certificate server.crt;
13
+ ssl_certificate_key server.key;
14
+ client_max_body_size 200m;
15
+
16
+ location / {
17
+ proxy_pass http://localhost:9200;
18
+ auth_basic "Restricted Content";
19
+ auth_basic_user_file htpasswd;
20
+ }
21
+ }
22
+ }