logstash-filter-elasticsearch 3.18.0 → 3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,9 @@ require_relative "elasticsearch/client"
13
13
 
14
14
  class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
15
15
 
16
+ require 'logstash/filters/elasticsearch/dsl_executor'
17
+ require 'logstash/filters/elasticsearch/esql_executor'
18
+
16
19
  include LogStash::PluginMixins::ECSCompatibilitySupport
17
20
  include LogStash::PluginMixins::ECSCompatibilitySupport::TargetCheck
18
21
 
@@ -25,8 +28,13 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
25
28
  # Field substitution (e.g. `index-name-%{date_field}`) is available
26
29
  config :index, :validate => :string, :default => ""
27
30
 
28
- # Elasticsearch query string. Read the Elasticsearch query string documentation.
29
- # for more info at: https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl-query-string-query.html#query-string-syntax
31
+ # A type of Elasticsearch query, provided by @query.
32
+ config :query_type, :validate => %w[esql dsl], :default => "dsl"
33
+
34
+ # Elasticsearch query string. This can be in DSL or ES|QL query shape defined by @query_type.
35
+ # Read the Elasticsearch query string documentation.
36
+ # DSL: https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl-query-string-query.html#query-string-syntax
37
+ # ES|QL: https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html
30
38
  config :query, :validate => :string
31
39
 
32
40
  # File path to elasticsearch query in DSL format. Read the Elasticsearch query documentation
@@ -138,7 +146,7 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
138
146
  # Tags the event on failure to look up geo information. This can be used in later analysis.
139
147
  config :tag_on_failure, :validate => :array, :default => ["_elasticsearch_lookup_failure"]
140
148
 
141
- # If set, the the result set will be nested under the target field
149
+ # If set, the result set will be nested under the target field
142
150
  config :target, :validate => :field_reference
143
151
 
144
152
  # How many times to retry on failure?
@@ -147,6 +155,16 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
147
155
  # What status codes to retry on?
148
156
  config :retry_on_status, :validate => :number, :list => true, :default => [500, 502, 503, 504]
149
157
 
158
+ # named placeholders in ES|QL query
159
+ # example,
160
+ # if the query is "FROM my-index | WHERE some_type = ?type AND depth > ?min_depth"
161
+ # named placeholders can be applied as the following in query_params:
162
+ # query_params => [
163
+ # {"type" => "%{[type]}"}
164
+ # {"min_depth" => "%{[depth]}"}
165
+ # ]
166
+ config :query_params, :validate => :array, :default => []
167
+
150
168
  # config :ca_trusted_fingerprint, :validate => :sha_256_hex
151
169
  include LogStash::PluginMixins::CATrustedFingerprintSupport
152
170
 
@@ -155,6 +173,9 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
155
173
  include MonitorMixin
156
174
  attr_reader :shared_client
157
175
 
176
+ LS_ESQL_SUPPORT_VERSION = "8.17.4" # the version started using elasticsearch-ruby v8
177
+ ES_ESQL_SUPPORT_VERSION = "8.11.0"
178
+
158
179
  ##
159
180
  # @override to handle proxy => '' as if none was set
160
181
  # @param value [Array<Object>]
@@ -172,17 +193,22 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
172
193
  return super(value, :uri)
173
194
  end
174
195
 
196
+ attr_reader :query_dsl
197
+
175
198
  def register
176
- #Load query if it exists
177
- if @query_template
178
- if File.zero?(@query_template)
179
- raise "template is empty"
180
- end
181
- file = File.open(@query_template, 'r')
182
- @query_dsl = file.read
199
+ case @query_type
200
+ when "esql"
201
+ invalid_params_with_esql = original_params.keys & %w(index query_template sort fields docinfo_fields aggregation_fields enable_sort result_size)
202
+ raise LogStash::ConfigurationError, "Configured #{invalid_params_with_esql} params cannot be used with ES|QL query" if invalid_params_with_esql.any?
203
+
204
+ validate_ls_version_for_esql_support!
205
+ validate_esql_query_and_params!
206
+ @esql_executor ||= LogStash::Filters::Elasticsearch::EsqlExecutor.new(self, @logger)
207
+ else # dsl
208
+ validate_dsl_query_settings!
209
+ @esql_executor ||= LogStash::Filters::Elasticsearch::DslExecutor.new(self, @logger)
183
210
  end
184
211
 
185
- validate_query_settings
186
212
  fill_hosts_from_cloud_id
187
213
  setup_ssl_params!
188
214
  validate_authentication
@@ -191,6 +217,7 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
191
217
  @hosts = Array(@hosts).map { |host| host.to_s } # potential SafeURI#to_s
192
218
 
193
219
  test_connection!
220
+ validate_es_for_esql_support! if @query_type == "esql"
194
221
  setup_serverless
195
222
  if get_client.es_transport_client_type == "elasticsearch_transport"
196
223
  require_relative "elasticsearch/patches/_elasticsearch_transport_http_manticore"
@@ -198,71 +225,15 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
198
225
  end # def register
199
226
 
200
227
  def filter(event)
201
- matched = false
202
- begin
203
- params = { :index => event.sprintf(@index) }
204
-
205
- if @query_dsl
206
- query = LogStash::Json.load(event.sprintf(@query_dsl))
207
- params[:body] = query
208
- else
209
- query = event.sprintf(@query)
210
- params[:q] = query
211
- params[:size] = result_size
212
- params[:sort] = @sort if @enable_sort
213
- end
214
-
215
- @logger.debug("Querying elasticsearch for lookup", :params => params)
216
-
217
- results = get_client.search(params)
218
- raise "Elasticsearch query error: #{results["_shards"]["failures"]}" if results["_shards"].include? "failures"
219
-
220
- event.set("[@metadata][total_hits]", extract_total_from_hits(results['hits']))
221
-
222
- resultsHits = results["hits"]["hits"]
223
- if !resultsHits.nil? && !resultsHits.empty?
224
- matched = true
225
- @fields.each do |old_key, new_key|
226
- old_key_path = extract_path(old_key)
227
- extracted_hit_values = resultsHits.map do |doc|
228
- extract_value(doc["_source"], old_key_path)
229
- end
230
- value_to_set = extracted_hit_values.count > 1 ? extracted_hit_values : extracted_hit_values.first
231
- set_to_event_target(event, new_key, value_to_set)
232
- end
233
- @docinfo_fields.each do |old_key, new_key|
234
- old_key_path = extract_path(old_key)
235
- extracted_docs_info = resultsHits.map do |doc|
236
- extract_value(doc, old_key_path)
237
- end
238
- value_to_set = extracted_docs_info.count > 1 ? extracted_docs_info : extracted_docs_info.first
239
- set_to_event_target(event, new_key, value_to_set)
240
- end
241
- end
242
-
243
- resultsAggs = results["aggregations"]
244
- if !resultsAggs.nil? && !resultsAggs.empty?
245
- matched = true
246
- @aggregation_fields.each do |agg_name, ls_field|
247
- set_to_event_target(event, ls_field, resultsAggs[agg_name])
248
- end
249
- end
250
-
251
- rescue => e
252
- if @logger.trace?
253
- @logger.warn("Failed to query elasticsearch for previous event", :index => @index, :query => query, :event => event.to_hash, :error => e.message, :backtrace => e.backtrace)
254
- elsif @logger.debug?
255
- @logger.warn("Failed to query elasticsearch for previous event", :index => @index, :error => e.message, :backtrace => e.backtrace)
256
- else
257
- @logger.warn("Failed to query elasticsearch for previous event", :index => @index, :error => e.message)
258
- end
259
- @tag_on_failure.each{|tag| event.tag(tag)}
260
- else
261
- filter_matched(event) if matched
262
- end
228
+ @esql_executor.process(get_client, event)
263
229
  end # def filter
264
230
 
265
- # public only to be reuse in testing
231
+ def decorate(event)
232
+ # this Elasticsearch class has access to `filter_matched`
233
+ filter_matched(event)
234
+ end
235
+
236
+ # public only to be reused in testing
266
237
  def prepare_user_agent
267
238
  os_name = java.lang.System.getProperty('os.name')
268
239
  os_version = java.lang.System.getProperty('os.version')
@@ -277,18 +248,6 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
277
248
 
278
249
  private
279
250
 
280
- # if @target is defined, creates a nested structure to inject result into target field
281
- # if not defined, directly sets to the top-level event field
282
- # @param event [LogStash::Event]
283
- # @param new_key [String] name of the field to set
284
- # @param value_to_set [Array] values to set
285
- # @return [void]
286
- def set_to_event_target(event, new_key, value_to_set)
287
- key_to_set = target ? "[#{target}][#{new_key}]" : new_key
288
-
289
- event.set(key_to_set, value_to_set)
290
- end
291
-
292
251
  def client_options
293
252
  @client_options ||= {
294
253
  :user => @user,
@@ -385,53 +344,10 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
385
344
  end
386
345
  end
387
346
 
388
- # get an array of path elements from a path reference
389
- def extract_path(path_reference)
390
- return [path_reference] unless path_reference.start_with?('[') && path_reference.end_with?(']')
391
-
392
- path_reference[1...-1].split('][')
393
- end
394
-
395
- # given a Hash and an array of path fragments, returns the value at the path
396
- # @param source [Hash{String=>Object}]
397
- # @param path [Array{String}]
398
- # @return [Object]
399
- def extract_value(source, path)
400
- path.reduce(source) do |memo, old_key_fragment|
401
- break unless memo.include?(old_key_fragment)
402
- memo[old_key_fragment]
403
- end
404
- end
405
-
406
- # Given a "hits" object from an Elasticsearch response, return the total number of hits in
407
- # the result set.
408
- # @param hits [Hash{String=>Object}]
409
- # @return [Integer]
410
- def extract_total_from_hits(hits)
411
- total = hits['total']
412
-
413
- # Elasticsearch 7.x produces an object containing `value` and `relation` in order
414
- # to enable unambiguous reporting when the total is only a lower bound; if we get
415
- # an object back, return its `value`.
416
- return total['value'] if total.kind_of?(Hash)
417
-
418
- total
419
- end
420
-
421
347
  def hosts_default?(hosts)
422
348
  hosts.is_a?(Array) && hosts.size == 1 && !original_params.key?('hosts')
423
349
  end
424
350
 
425
- def validate_query_settings
426
- unless @query || @query_template
427
- raise LogStash::ConfigurationError, "Both `query` and `query_template` are empty. Require either `query` or `query_template`."
428
- end
429
-
430
- if @query && @query_template
431
- raise LogStash::ConfigurationError, "Both `query` and `query_template` are set. Use either `query` or `query_template`."
432
- end
433
- end
434
-
435
351
  def validate_authentication
436
352
  authn_options = 0
437
353
  authn_options += 1 if @cloud_auth
@@ -560,4 +476,65 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base
560
476
  hosts.all? { |host| host && host.to_s.start_with?("https") }
561
477
  end
562
478
 
479
+ def validate_dsl_query_settings!
480
+ #Load query if it exists
481
+ if @query_template
482
+ if File.zero?(@query_template)
483
+ raise "template is empty"
484
+ end
485
+ file = File.open(@query_template, 'r')
486
+ @query_dsl = file.read
487
+ end
488
+
489
+ validate_query_settings
490
+ end
491
+
492
+ def validate_query_settings
493
+ unless @query || @query_template
494
+ raise LogStash::ConfigurationError, "Both `query` and `query_template` are empty. Require either `query` or `query_template`."
495
+ end
496
+
497
+ if @query && @query_template
498
+ raise LogStash::ConfigurationError, "Both `query` and `query_template` are set. Use either `query` or `query_template`."
499
+ end
500
+
501
+ if original_params.keys.include?("query_params")
502
+ raise LogStash::ConfigurationError, "`query_params` is not allowed when `query_type => 'dsl'`."
503
+ end
504
+ end
505
+
506
+ def validate_ls_version_for_esql_support!
507
+ if Gem::Version.create(LOGSTASH_VERSION) < Gem::Version.create(LS_ESQL_SUPPORT_VERSION)
508
+ fail("Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least #{LS_ESQL_SUPPORT_VERSION}")
509
+ end
510
+ end
511
+
512
+ def validate_esql_query_and_params!
513
+ # If Array, validate that query_params needs to contain only single-entry hashes, convert it to a Hash
514
+ if @query_params.kind_of?(Array)
515
+ illegal_entries = @query_params.reject {|e| e.kind_of?(Hash) && e.size == 1 }
516
+ raise LogStash::ConfigurationError, "`query_params` must contain only single-entry hashes. Illegal placeholders: #{illegal_entries}" if illegal_entries.any?
517
+
518
+ @query_params = @query_params.reduce({}, :merge)
519
+ end
520
+
521
+ illegal_keys = @query_params.keys.reject {|k| k[/^[a-z_][a-z0-9_]*$/] }
522
+ if illegal_keys.any?
523
+ message = "Illegal #{illegal_keys} placeholder names in `query_params`. A valid parameter name starts with a letter and contains letters, digits and underscores only;"
524
+ raise LogStash::ConfigurationError, message
525
+ end
526
+
527
+ placeholders = @query.scan(/(?<=[?])[a-z_][a-z0-9_]*/i)
528
+ placeholders.each do |placeholder|
529
+ raise LogStash::ConfigurationError, "Placeholder #{placeholder} not found in query" unless @query_params.include?(placeholder)
530
+ end
531
+ end
532
+
533
+ def validate_es_for_esql_support!
534
+ # make sure connected ES supports ES|QL (8.11+)
535
+ @es_version ||= get_client.es_version
536
+ es_supports_esql = Gem::Version.create(@es_version) >= Gem::Version.create(ES_ESQL_SUPPORT_VERSION)
537
+ fail("Connected Elasticsearch #{@es_version} version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{ES_ESQL_SUPPORT_VERSION} version.") unless es_supports_esql
538
+ end
539
+
563
540
  end #class LogStash::Filters::Elasticsearch
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-elasticsearch'
4
- s.version = '3.18.0'
4
+ s.version = '3.19.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Copies fields from previous log events in Elasticsearch to current events "
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"