connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221114T233727Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/dispatcher.rb +12 -0
  4. data/lib/app/preflight_check.rb +11 -0
  5. data/lib/connectors/base/connector.rb +19 -12
  6. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  7. data/lib/connectors/example/connector.rb +15 -0
  8. data/lib/connectors/gitlab/connector.rb +15 -1
  9. data/lib/connectors/mongodb/connector.rb +55 -36
  10. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  11. data/lib/core/configuration.rb +3 -1
  12. data/lib/core/connector_job.rb +137 -0
  13. data/lib/core/connector_settings.rb +24 -11
  14. data/lib/core/elastic_connector_actions.rb +263 -24
  15. data/lib/core/filtering/post_process_engine.rb +39 -0
  16. data/lib/core/filtering/post_process_result.rb +27 -0
  17. data/lib/core/filtering/simple_rule.rb +141 -0
  18. data/lib/core/filtering/validation_job_runner.rb +53 -0
  19. data/lib/core/filtering/validation_status.rb +17 -0
  20. data/lib/core/filtering.rb +17 -0
  21. data/lib/core/ingestion/es_sink.rb +59 -0
  22. data/lib/core/ingestion/ingester.rb +90 -0
  23. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  24. data/lib/core/scheduler.rb +40 -10
  25. data/lib/core/sync_job_runner.rb +65 -17
  26. data/lib/core.rb +2 -0
  27. data/lib/utility/bulk_queue.rb +85 -0
  28. data/lib/utility/constants.rb +2 -0
  29. data/lib/utility/filtering.rb +22 -0
  30. data/lib/utility/logger.rb +2 -1
  31. data/lib/utility.rb +5 -4
  32. metadata +16 -7
  33. data/lib/core/output_sink/base_sink.rb +0 -33
  34. data/lib/core/output_sink/combined_sink.rb +0 -38
  35. data/lib/core/output_sink/console_sink.rb +0 -51
  36. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -8,6 +8,7 @@
8
8
 
9
9
  require 'active_support/core_ext/hash/indifferent_access'
10
10
  require 'connectors/connector_status'
11
+ require 'connectors/registry'
11
12
  require 'core/elastic_connector_actions'
12
13
  require 'utility'
13
14
 
@@ -34,13 +35,15 @@ module Core
34
35
  new(es_response, connectors_meta)
35
36
  end
36
37
 
37
- def initialize(es_response, connectors_meta)
38
- @elasticsearch_response = es_response.with_indifferent_access
39
- @connectors_meta = connectors_meta.with_indifferent_access
40
- end
41
-
42
38
  def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
43
- query = { term: { is_native: true } }
39
+ query = {
40
+ bool: {
41
+ filter: [
42
+ { term: { is_native: true } },
43
+ { terms: { service_type: Connectors::REGISTRY.registered_connectors } }
44
+ ]
45
+ }
46
+ }
44
47
  fetch_connectors_by_query(query, page_size)
45
48
  end
46
49
 
@@ -83,23 +86,26 @@ module Core
83
86
  end
84
87
 
85
88
  def filtering
86
- Utility::Common.return_if_present(@elasticsearch_response[:filtering], DEFAULT_FILTERING)
89
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
90
+ filtering = @elasticsearch_response.dig(:_source, :filtering)
91
+
92
+ Utility::Filtering.extract_filter(filtering)
87
93
  end
88
94
 
89
95
  def request_pipeline
90
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
96
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
91
97
  end
92
98
 
93
99
  def extract_binary_content?
94
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
100
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
95
101
  end
96
102
 
97
103
  def reduce_whitespace?
98
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
104
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
99
105
  end
100
106
 
101
107
  def run_ml_inference?
102
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
108
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
103
109
  end
104
110
 
105
111
  def formatted
@@ -116,6 +122,13 @@ module Core
116
122
  index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
117
123
  end
118
124
 
125
+ private
126
+
127
+ def initialize(es_response, connectors_meta)
128
+ @elasticsearch_response = es_response.with_indifferent_access
129
+ @connectors_meta = connectors_meta.with_indifferent_access
130
+ end
131
+
119
132
  def self.fetch_connectors_by_query(query, page_size)
120
133
  connectors_meta = ElasticConnectorActions.connectors_meta
121
134
 
@@ -19,6 +19,12 @@ module Core
19
19
  end
20
20
  end
21
21
 
22
+ class JobNotCreatedError < StandardError
23
+ def initialize(connector_id, response)
24
+ super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
25
+ end
26
+ end
27
+
22
28
  class ConnectorVersionChangedError < StandardError
23
29
  def initialize(connector_id, seq_no, primary_term)
24
30
  super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
@@ -43,10 +49,17 @@ module Core
43
49
  end
44
50
 
45
51
  def get_connector(connector_id)
52
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
46
53
  client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
47
54
  end
48
55
 
56
+ def get_job(job_id)
57
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
58
+ client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
59
+ end
60
+
49
61
  def connectors_meta
62
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
50
63
  alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
51
64
  index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
52
65
  alias_mappings.dig(index, 'mappings', '_meta') || {}
@@ -65,6 +78,19 @@ module Core
65
78
  )
66
79
  end
67
80
 
81
+ def search_jobs(query, page_size, offset)
82
+ client.search(
83
+ :index => Utility::Constants::JOB_INDEX,
84
+ :ignore => 404,
85
+ :body => {
86
+ :size => page_size,
87
+ :from => offset,
88
+ :query => query,
89
+ :sort => ['created_at']
90
+ }
91
+ )
92
+ end
93
+
68
94
  def update_connector_configuration(connector_id, configuration)
69
95
  update_connector_fields(connector_id, :configuration => configuration)
70
96
  end
@@ -84,6 +110,28 @@ module Core
84
110
  update_connector_configuration(connector_id, payload)
85
111
  end
86
112
 
113
+ def update_filtering_validation(connector_id, filter_validation_results)
114
+ return if filter_validation_results.empty?
115
+
116
+ filtering = get_connector(connector_id).dig(:_source, :filtering)
117
+
118
+ case filtering
119
+ when Hash
120
+ update_filter_validation(filtering, filter_validation_results)
121
+ when Array
122
+ return unless should_update_validations?(filter_validation_results, filtering)
123
+
124
+ filtering.each do |filter|
125
+ update_filter_validation(filter, filter_validation_results)
126
+ end
127
+ else
128
+ Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
129
+ return
130
+ end
131
+
132
+ update_connector_fields(connector_id, { :filtering => filtering })
133
+ end
134
+
87
135
  def claim_job(connector_id)
88
136
  seq_no = nil
89
137
  primary_term = nil
@@ -111,24 +159,38 @@ module Core
111
159
  )
112
160
 
113
161
  body = {
114
- :connector_id => connector_id,
115
162
  :status => Connectors::SyncStatus::IN_PROGRESS,
116
163
  :worker_hostname => Socket.gethostname,
117
164
  :created_at => Time.now,
118
- :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
165
+ :started_at => Time.now,
166
+ :last_seen => Time.now,
167
+ :connector => {
168
+ :id => connector_id,
169
+ :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
170
+ }
119
171
  }
120
172
 
121
- client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
173
+ index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
174
+ if index_response['result'] == 'created'
175
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
176
+ return client.get(
177
+ :index => Utility::Constants::JOB_INDEX,
178
+ :id => index_response['_id'],
179
+ :ignore => 404
180
+ ).with_indifferent_access
181
+ end
182
+ raise JobNotCreatedError.new(connector_id, index_response)
122
183
  end
123
184
 
124
185
  def convert_connector_filtering_to_job_filtering(connector_filtering)
125
186
  return [] unless connector_filtering
126
187
  connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
127
188
  connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
189
+ snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
128
190
  job_filtering << {
129
191
  'domain' => filtering_domain['domain'],
130
192
  'rules' => filtering_domain.dig('active', 'rules'),
131
- 'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
193
+ 'advanced_snippet' => snippet['value'] || snippet,
132
194
  'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
133
195
  }
134
196
  end
@@ -145,22 +207,33 @@ module Core
145
207
  update_connector_fields(connector_id, body)
146
208
  end
147
209
 
148
- def complete_sync(connector_id, job_id, status)
149
- sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
210
+ def update_sync(job_id, metadata)
211
+ body = {
212
+ :doc => { :last_seen => Time.now }.merge(metadata)
213
+ }
214
+ client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
215
+ end
216
+
217
+ def complete_sync(connector_id, job_id, metadata, error)
218
+ sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
219
+
220
+ metadata ||= {}
150
221
 
151
222
  update_connector_fields(connector_id,
152
223
  :last_sync_status => sync_status,
153
- :last_sync_error => status[:error],
154
- :error => status[:error],
224
+ :last_sync_error => error,
225
+ :error => error,
155
226
  :last_synced => Time.now,
156
- :last_indexed_document_count => status[:indexed_document_count],
157
- :last_deleted_document_count => status[:deleted_document_count])
227
+ :last_indexed_document_count => metadata[:indexed_document_count],
228
+ :last_deleted_document_count => metadata[:deleted_document_count])
158
229
 
159
230
  body = {
160
231
  :doc => {
161
232
  :status => sync_status,
162
- :completed_at => Time.now
163
- }.merge(status)
233
+ :completed_at => Time.now,
234
+ :last_seen => Time.now,
235
+ :error => error
236
+ }.merge(metadata)
164
237
  }
165
238
  client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
166
239
  end
@@ -248,12 +321,105 @@ module Core
248
321
  :properties => {
249
322
  :api_key_id => { :type => :keyword },
250
323
  :configuration => { :type => :object },
251
- :error => { :type => :text },
324
+ :description => { :type => :text },
325
+ :error => { :type => :keyword },
326
+ :features => {
327
+ :properties => {
328
+ :filtering_advanced_config => { :type => :boolean },
329
+ :filtering_rules => { :type => :boolean }
330
+ }
331
+ },
332
+ :filtering => {
333
+ :properties => {
334
+ :domain => { :type => :keyword },
335
+ :active => {
336
+ :properties => {
337
+ :rules => {
338
+ :properties => {
339
+ :id => { :type => :keyword },
340
+ :policy => { :type => :keyword },
341
+ :field => { :type => :keyword },
342
+ :rule => { :type => :keyword },
343
+ :value => { :type => :keyword },
344
+ :order => { :type => :short },
345
+ :created_at => { :type => :date },
346
+ :updated_at => { :type => :date }
347
+ }
348
+ },
349
+ :advanced_snippet => {
350
+ :properties => {
351
+ :value => { :type => :object },
352
+ :created_at => { :type => :date },
353
+ :updated_at => { :type => :date }
354
+ }
355
+ },
356
+ :validation => {
357
+ :properties => {
358
+ :state => { :type => :keyword },
359
+ :errors => {
360
+ :properties => {
361
+ :ids => { :type => :keyword },
362
+ :messages => { :type => :text }
363
+ }
364
+ }
365
+ }
366
+ }
367
+ }
368
+ },
369
+ :draft => {
370
+ :properties => {
371
+ :rules => {
372
+ :properties => {
373
+ :id => { :type => :keyword },
374
+ :policy => { :type => :keyword },
375
+ :field => { :type => :keyword },
376
+ :rule => { :type => :keyword },
377
+ :value => { :type => :keyword },
378
+ :order => { :type => :short },
379
+ :created_at => { :type => :date },
380
+ :updated_at => { :type => :date }
381
+ }
382
+ },
383
+ :advanced_snippet => {
384
+ :properties => {
385
+ :value => { :type => :object },
386
+ :created_at => { :type => :date },
387
+ :updated_at => { :type => :date }
388
+ }
389
+ },
390
+ :validation => {
391
+ :properties => {
392
+ :state => { :type => :keyword },
393
+ :errors => {
394
+ :properties => {
395
+ :ids => { :type => :keyword },
396
+ :messages => { :type => :text }
397
+ }
398
+ }
399
+ }
400
+ }
401
+ }
402
+ }
403
+ }
404
+ },
252
405
  :index_name => { :type => :keyword },
406
+ :is_native => { :type => :boolean },
407
+ :language => { :type => :keyword },
253
408
  :last_seen => { :type => :date },
409
+ :last_sync_error => { :type => :keyword },
410
+ :last_sync_status => { :type => :keyword },
254
411
  :last_synced => { :type => :date },
255
- :last_indexed_document_count => { :type => :integer },
256
- :last_deleted_document_count => { :type => :integer },
412
+ :last_deleted_document_count => { :type => :long },
413
+ :last_indexed_document_count => { :type => :long },
414
+ :name => { :type => :keyword },
415
+ :pipeline => {
416
+ :properties => {
417
+ :extract_binary_content => { :type => :boolean },
418
+ :name => { :type => :keyword },
419
+ :reduce_whitespace => { :type => :boolean },
420
+ :run_ml_inference => { :type => :boolean }
421
+ }
422
+ },
257
423
  :scheduling => {
258
424
  :properties => {
259
425
  :enabled => { :type => :boolean },
@@ -262,9 +428,7 @@ module Core
262
428
  },
263
429
  :service_type => { :type => :keyword },
264
430
  :status => { :type => :keyword },
265
- :sync_error => { :type => :text },
266
- :sync_now => { :type => :boolean },
267
- :sync_status => { :type => :keyword }
431
+ :sync_now => { :type => :boolean }
268
432
  }
269
433
  }
270
434
  ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
@@ -275,14 +439,68 @@ module Core
275
439
  def ensure_job_index_exists
276
440
  mappings = {
277
441
  :properties => {
278
- :connector_id => { :type => :keyword },
279
- :status => { :type => :keyword },
442
+ :cancelation_requested_at => { :type => :date },
443
+ :canceled_at => { :type => :date },
444
+ :completed_at => { :type => :date },
445
+ :connector => {
446
+ :properties => {
447
+ :configuration => { :type => :object },
448
+ :filtering => {
449
+ :properties => {
450
+ :domain => { :type => :keyword },
451
+ :rules => {
452
+ :properties => {
453
+ :id => { :type => :keyword },
454
+ :policy => { :type => :keyword },
455
+ :field => { :type => :keyword },
456
+ :rule => { :type => :keyword },
457
+ :value => { :type => :keyword },
458
+ :order => { :type => :short },
459
+ :created_at => { :type => :date },
460
+ :updated_at => { :type => :date }
461
+ }
462
+ },
463
+ :advanced_snippet => {
464
+ :properties => {
465
+ :value => { :type => :object },
466
+ :created_at => { :type => :date },
467
+ :updated_at => { :type => :date }
468
+ }
469
+ },
470
+ :warnings => {
471
+ :properties => {
472
+ :ids => { :type => :keyword },
473
+ :messages => { :type => :text }
474
+ }
475
+ }
476
+ }
477
+ },
478
+ :id => { :type => :keyword },
479
+ :index_name => { :type => :keyword },
480
+ :language => { :type => :keyword },
481
+ :pipeline => {
482
+ :properties => {
483
+ :extract_binary_content => { :type => :boolean },
484
+ :name => { :type => :keyword },
485
+ :reduce_whitespace => { :type => :boolean },
486
+ :run_ml_inference => { :type => :boolean }
487
+ }
488
+ },
489
+ :service_type => { :type => :keyword }
490
+ }
491
+ },
492
+ :created_at => { :type => :date },
493
+ :deleted_document_count => { :type => :integer },
280
494
  :error => { :type => :text },
281
- :worker_hostname => { :type => :keyword },
282
495
  :indexed_document_count => { :type => :integer },
283
- :deleted_document_count => { :type => :integer },
284
- :created_at => { :type => :date },
285
- :completed_at => { :type => :date }
496
+ :indexed_document_volume => { :type => :integer },
497
+ :last_seen => { :type => :date },
498
+ :metadata => { :type => :object },
499
+ :started_at => { :type => :date },
500
+ :status => { :type => :keyword },
501
+ :total_document_count => { :type => :integer },
502
+ :trigger_method => { :type => :keyword },
503
+ :worker_hostname => { :type => :keyword }
286
504
  }
287
505
  }
288
506
  ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
@@ -313,8 +531,20 @@ module Core
313
531
  end
314
532
  end
315
533
 
534
+ def document_count(index_name)
535
+ client.count(:index => index_name)['count']
536
+ end
537
+
316
538
  private
317
539
 
540
+ def should_update_validations?(domain_validations, filtering)
541
+ domains_present = filtering.collect { |filter| filter[:domain] }
542
+ domains_to_update = domain_validations.keys
543
+
544
+ # non-empty intersection -> domains to update present
545
+ !(domains_present & domains_to_update).empty?
546
+ end
547
+
318
548
  def client
319
549
  @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
320
550
  end
@@ -324,6 +554,15 @@ module Core
324
554
  index_version = index_versions.max # gets the largest suffix number
325
555
  "#{alias_name}-v#{index_version}"
326
556
  end
557
+
558
+ def update_filter_validation(filter, domain_validations)
559
+ domain = filter[:domain]
560
+
561
+ if domain_validations.key?(domain)
562
+ new_validation_state = { :draft => { :validation => domain_validations[domain] } }
563
+ filter.deep_merge!(new_validation_state)
564
+ end
565
+ end
327
566
  end
328
567
  end
329
568
  end
@@ -0,0 +1,39 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/filtering'
10
+ require 'utility/filtering'
11
+
12
+ module Core
13
+ module Filtering
14
+ class PostProcessEngine
15
+ attr_reader :rules
16
+
17
+ def initialize(job_description)
18
+ @rules = ordered_rules(job_description.dig('connector', 'filtering'))
19
+ end
20
+
21
+ def process(document)
22
+ @rules.each do |rule|
23
+ if rule.match?(document.stringify_keys)
24
+ return PostProcessResult.new(document, rule)
25
+ end
26
+ end
27
+ PostProcessResult.new(document, SimpleRule::DEFAULT_RULE)
28
+ end
29
+
30
+ private
31
+
32
+ def ordered_rules(job_filtering)
33
+ job_rules = Utility::Filtering.extract_filter(job_filtering)['rules']
34
+ sorted_rules = job_rules.sort_by { |rule| rule['order'] }.reject { |rule| rule['id'] == Core::Filtering::SimpleRule::DEFAULT_RULE_ID }
35
+ sorted_rules.each_with_object([]) { |rule, output| output << SimpleRule.new(rule) }
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Filtering
13
+ class PostProcessResult
14
+ attr_reader :document, :matching_rule
15
+
16
+ def initialize(document, matching_rule)
17
+ @document = document
18
+ @matching_rule = matching_rule
19
+ Utility::Logger.debug("Document '#{document['id']}' matched filtering rule: #{matching_rule.id}. It will be #{matching_rule.policy}d")
20
+ end
21
+
22
+ def is_include?
23
+ matching_rule.is_include?
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,141 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Filtering
13
+ class SimpleRule
14
+ DEFAULT_RULE_ID = 'DEFAULT'
15
+
16
+ class Policy
17
+ INCLUDE = 'include'
18
+ EXCLUDE = 'exclude'
19
+ end
20
+
21
+ class Rule
22
+ REGEX = 'regex'
23
+ EQUALS = 'equals'
24
+ STARTS_WITH = 'starts_with'
25
+ ENDS_WITH = 'ends_with'
26
+ CONTAINS = 'contains'
27
+ LESS_THAN = '<'
28
+ GREATER_THAN = '>'
29
+ end
30
+
31
+ attr_reader :policy, :field, :rule, :value, :id
32
+
33
+ def initialize(rule_hash)
34
+ @policy = rule_hash.fetch('policy')
35
+ @field = rule_hash.fetch('field')
36
+ @rule = rule_hash.fetch('rule')
37
+ @value = rule_hash.fetch('value')
38
+ @id = rule_hash.fetch('id')
39
+ @rule_hash = rule_hash
40
+ rescue KeyError => e
41
+ raise "#{e.key} is required"
42
+ end
43
+
44
+ def self.from_args(id, policy, field, rule, value)
45
+ SimpleRule.new(
46
+ {
47
+ 'id' => id,
48
+ 'policy' => policy,
49
+ 'field' => field,
50
+ 'rule' => rule,
51
+ 'value' => value
52
+ }
53
+ )
54
+ end
55
+
56
+ DEFAULT_RULE = SimpleRule.new(
57
+ 'policy' => 'include',
58
+ 'field' => '_',
59
+ 'rule' => 'regex',
60
+ 'value' => '.*',
61
+ 'id' => SimpleRule::DEFAULT_RULE_ID
62
+ )
63
+
64
+ def match?(document)
65
+ return true if id == DEFAULT_RULE_ID
66
+ doc_value = document[field]
67
+ return false if doc_value.nil?
68
+ coerced_value = coerce(doc_value)
69
+ case rule
70
+ when Rule::EQUALS
71
+ case coerced_value
72
+ when Integer
73
+ doc_value == coerced_value
74
+ when DateTime, Time
75
+ doc_value.to_s == coerced_value.to_s
76
+ else
77
+ doc_value.to_s == coerced_value
78
+ end
79
+ when Rule::STARTS_WITH
80
+ doc_value.to_s.start_with?(value)
81
+ when Rule::ENDS_WITH
82
+ doc_value.to_s.end_with?(value)
83
+ when Rule::CONTAINS
84
+ doc_value.to_s.include?(value)
85
+ when Rule::REGEX
86
+ doc_value.to_s.match(/#{value}/)
87
+ when Rule::LESS_THAN
88
+ doc_value < coerced_value
89
+ when Rule::GREATER_THAN
90
+ doc_value > coerced_value
91
+ else
92
+ false
93
+ end
94
+ end
95
+
96
+ def coerce(doc_value)
97
+ case doc_value
98
+ when String
99
+ value.to_s
100
+ when Integer
101
+ value.to_i
102
+ when DateTime, Time
103
+ to_date(value)
104
+ when TrueClass, FalseClass # Ruby doesn't have a Boolean type, TIL
105
+ to_bool(value).to_s
106
+ else
107
+ value.to_s
108
+ end
109
+ rescue StandardError => e
110
+ Utility::Logger.debug("Failed to coerce value '#{value}' (#{value.class}) based on document value '#{doc_value}' (#{doc_value.class}) due to error: #{e.class}: #{e.message}")
111
+ value.to_s
112
+ end
113
+
114
+ def is_include?
115
+ policy == Policy::INCLUDE
116
+ end
117
+
118
+ def is_exclude?
119
+ policy == Policy::EXCLUDE
120
+ end
121
+
122
+ def to_h
123
+ @rule_hash
124
+ end
125
+
126
+ private
127
+
128
+ def to_bool(str)
129
+ return true if str == true || str =~ (/^(true|t|yes|y|on|1)$/i)
130
+ return false if str == false || str.blank? || str =~ (/^(false|f|no|n|off|0)$/i)
131
+ raise ArgumentError.new("invalid value for Boolean: \"#{str}\"")
132
+ end
133
+
134
+ def to_date(str)
135
+ DateTime.parse(str)
136
+ rescue ArgumentError
137
+ Time.at(str.to_i) # try with it as an int string of millis
138
+ end
139
+ end
140
+ end
141
+ end