connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221114T233727Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/dispatcher.rb +12 -0
  4. data/lib/app/preflight_check.rb +11 -0
  5. data/lib/connectors/base/connector.rb +19 -12
  6. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  7. data/lib/connectors/example/connector.rb +15 -0
  8. data/lib/connectors/gitlab/connector.rb +15 -1
  9. data/lib/connectors/mongodb/connector.rb +55 -36
  10. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  11. data/lib/core/configuration.rb +3 -1
  12. data/lib/core/connector_job.rb +137 -0
  13. data/lib/core/connector_settings.rb +24 -11
  14. data/lib/core/elastic_connector_actions.rb +263 -24
  15. data/lib/core/filtering/post_process_engine.rb +39 -0
  16. data/lib/core/filtering/post_process_result.rb +27 -0
  17. data/lib/core/filtering/simple_rule.rb +141 -0
  18. data/lib/core/filtering/validation_job_runner.rb +53 -0
  19. data/lib/core/filtering/validation_status.rb +17 -0
  20. data/lib/core/filtering.rb +17 -0
  21. data/lib/core/ingestion/es_sink.rb +59 -0
  22. data/lib/core/ingestion/ingester.rb +90 -0
  23. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  24. data/lib/core/scheduler.rb +40 -10
  25. data/lib/core/sync_job_runner.rb +65 -17
  26. data/lib/core.rb +2 -0
  27. data/lib/utility/bulk_queue.rb +85 -0
  28. data/lib/utility/constants.rb +2 -0
  29. data/lib/utility/filtering.rb +22 -0
  30. data/lib/utility/logger.rb +2 -1
  31. data/lib/utility.rb +5 -4
  32. metadata +16 -7
  33. data/lib/core/output_sink/base_sink.rb +0 -33
  34. data/lib/core/output_sink/combined_sink.rb +0 -38
  35. data/lib/core/output_sink/console_sink.rb +0 -51
  36. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -8,6 +8,7 @@
8
8
 
9
9
  require 'active_support/core_ext/hash/indifferent_access'
10
10
  require 'connectors/connector_status'
11
+ require 'connectors/registry'
11
12
  require 'core/elastic_connector_actions'
12
13
  require 'utility'
13
14
 
@@ -34,13 +35,15 @@ module Core
34
35
  new(es_response, connectors_meta)
35
36
  end
36
37
 
37
- def initialize(es_response, connectors_meta)
38
- @elasticsearch_response = es_response.with_indifferent_access
39
- @connectors_meta = connectors_meta.with_indifferent_access
40
- end
41
-
42
38
  def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
43
- query = { term: { is_native: true } }
39
+ query = {
40
+ bool: {
41
+ filter: [
42
+ { term: { is_native: true } },
43
+ { terms: { service_type: Connectors::REGISTRY.registered_connectors } }
44
+ ]
45
+ }
46
+ }
44
47
  fetch_connectors_by_query(query, page_size)
45
48
  end
46
49
 
@@ -83,23 +86,26 @@ module Core
83
86
  end
84
87
 
85
88
  def filtering
86
- Utility::Common.return_if_present(@elasticsearch_response[:filtering], DEFAULT_FILTERING)
89
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
90
+ filtering = @elasticsearch_response.dig(:_source, :filtering)
91
+
92
+ Utility::Filtering.extract_filter(filtering)
87
93
  end
88
94
 
89
95
  def request_pipeline
90
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
96
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
91
97
  end
92
98
 
93
99
  def extract_binary_content?
94
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
100
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
95
101
  end
96
102
 
97
103
  def reduce_whitespace?
98
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
104
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
99
105
  end
100
106
 
101
107
  def run_ml_inference?
102
- Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
108
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
103
109
  end
104
110
 
105
111
  def formatted
@@ -116,6 +122,13 @@ module Core
116
122
  index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
117
123
  end
118
124
 
125
+ private
126
+
127
+ def initialize(es_response, connectors_meta)
128
+ @elasticsearch_response = es_response.with_indifferent_access
129
+ @connectors_meta = connectors_meta.with_indifferent_access
130
+ end
131
+
119
132
  def self.fetch_connectors_by_query(query, page_size)
120
133
  connectors_meta = ElasticConnectorActions.connectors_meta
121
134
 
@@ -19,6 +19,12 @@ module Core
19
19
  end
20
20
  end
21
21
 
22
+ class JobNotCreatedError < StandardError
23
+ def initialize(connector_id, response)
24
+ super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
25
+ end
26
+ end
27
+
22
28
  class ConnectorVersionChangedError < StandardError
23
29
  def initialize(connector_id, seq_no, primary_term)
24
30
  super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
@@ -43,10 +49,17 @@ module Core
43
49
  end
44
50
 
45
51
  def get_connector(connector_id)
52
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
46
53
  client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
47
54
  end
48
55
 
56
+ def get_job(job_id)
57
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
58
+ client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
59
+ end
60
+
49
61
  def connectors_meta
62
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
50
63
  alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
51
64
  index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
52
65
  alias_mappings.dig(index, 'mappings', '_meta') || {}
@@ -65,6 +78,19 @@ module Core
65
78
  )
66
79
  end
67
80
 
81
+ def search_jobs(query, page_size, offset)
82
+ client.search(
83
+ :index => Utility::Constants::JOB_INDEX,
84
+ :ignore => 404,
85
+ :body => {
86
+ :size => page_size,
87
+ :from => offset,
88
+ :query => query,
89
+ :sort => ['created_at']
90
+ }
91
+ )
92
+ end
93
+
68
94
  def update_connector_configuration(connector_id, configuration)
69
95
  update_connector_fields(connector_id, :configuration => configuration)
70
96
  end
@@ -84,6 +110,28 @@ module Core
84
110
  update_connector_configuration(connector_id, payload)
85
111
  end
86
112
 
113
+ def update_filtering_validation(connector_id, filter_validation_results)
114
+ return if filter_validation_results.empty?
115
+
116
+ filtering = get_connector(connector_id).dig(:_source, :filtering)
117
+
118
+ case filtering
119
+ when Hash
120
+ update_filter_validation(filtering, filter_validation_results)
121
+ when Array
122
+ return unless should_update_validations?(filter_validation_results, filtering)
123
+
124
+ filtering.each do |filter|
125
+ update_filter_validation(filter, filter_validation_results)
126
+ end
127
+ else
128
+ Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
129
+ return
130
+ end
131
+
132
+ update_connector_fields(connector_id, { :filtering => filtering })
133
+ end
134
+
87
135
  def claim_job(connector_id)
88
136
  seq_no = nil
89
137
  primary_term = nil
@@ -111,24 +159,38 @@ module Core
111
159
  )
112
160
 
113
161
  body = {
114
- :connector_id => connector_id,
115
162
  :status => Connectors::SyncStatus::IN_PROGRESS,
116
163
  :worker_hostname => Socket.gethostname,
117
164
  :created_at => Time.now,
118
- :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
165
+ :started_at => Time.now,
166
+ :last_seen => Time.now,
167
+ :connector => {
168
+ :id => connector_id,
169
+ :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
170
+ }
119
171
  }
120
172
 
121
- client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
173
+ index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
174
+ if index_response['result'] == 'created'
175
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
176
+ return client.get(
177
+ :index => Utility::Constants::JOB_INDEX,
178
+ :id => index_response['_id'],
179
+ :ignore => 404
180
+ ).with_indifferent_access
181
+ end
182
+ raise JobNotCreatedError.new(connector_id, index_response)
122
183
  end
123
184
 
124
185
  def convert_connector_filtering_to_job_filtering(connector_filtering)
125
186
  return [] unless connector_filtering
126
187
  connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
127
188
  connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
189
+ snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
128
190
  job_filtering << {
129
191
  'domain' => filtering_domain['domain'],
130
192
  'rules' => filtering_domain.dig('active', 'rules'),
131
- 'advanced_snippet' => filtering_domain.dig('active', 'advanced_snippet'),
193
+ 'advanced_snippet' => snippet['value'] || snippet,
132
194
  'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
133
195
  }
134
196
  end
@@ -145,22 +207,33 @@ module Core
145
207
  update_connector_fields(connector_id, body)
146
208
  end
147
209
 
148
- def complete_sync(connector_id, job_id, status)
149
- sync_status = status[:error] ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
210
+ def update_sync(job_id, metadata)
211
+ body = {
212
+ :doc => { :last_seen => Time.now }.merge(metadata)
213
+ }
214
+ client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
215
+ end
216
+
217
+ def complete_sync(connector_id, job_id, metadata, error)
218
+ sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
219
+
220
+ metadata ||= {}
150
221
 
151
222
  update_connector_fields(connector_id,
152
223
  :last_sync_status => sync_status,
153
- :last_sync_error => status[:error],
154
- :error => status[:error],
224
+ :last_sync_error => error,
225
+ :error => error,
155
226
  :last_synced => Time.now,
156
- :last_indexed_document_count => status[:indexed_document_count],
157
- :last_deleted_document_count => status[:deleted_document_count])
227
+ :last_indexed_document_count => metadata[:indexed_document_count],
228
+ :last_deleted_document_count => metadata[:deleted_document_count])
158
229
 
159
230
  body = {
160
231
  :doc => {
161
232
  :status => sync_status,
162
- :completed_at => Time.now
163
- }.merge(status)
233
+ :completed_at => Time.now,
234
+ :last_seen => Time.now,
235
+ :error => error
236
+ }.merge(metadata)
164
237
  }
165
238
  client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
166
239
  end
@@ -248,12 +321,105 @@ module Core
248
321
  :properties => {
249
322
  :api_key_id => { :type => :keyword },
250
323
  :configuration => { :type => :object },
251
- :error => { :type => :text },
324
+ :description => { :type => :text },
325
+ :error => { :type => :keyword },
326
+ :features => {
327
+ :properties => {
328
+ :filtering_advanced_config => { :type => :boolean },
329
+ :filtering_rules => { :type => :boolean }
330
+ }
331
+ },
332
+ :filtering => {
333
+ :properties => {
334
+ :domain => { :type => :keyword },
335
+ :active => {
336
+ :properties => {
337
+ :rules => {
338
+ :properties => {
339
+ :id => { :type => :keyword },
340
+ :policy => { :type => :keyword },
341
+ :field => { :type => :keyword },
342
+ :rule => { :type => :keyword },
343
+ :value => { :type => :keyword },
344
+ :order => { :type => :short },
345
+ :created_at => { :type => :date },
346
+ :updated_at => { :type => :date }
347
+ }
348
+ },
349
+ :advanced_snippet => {
350
+ :properties => {
351
+ :value => { :type => :object },
352
+ :created_at => { :type => :date },
353
+ :updated_at => { :type => :date }
354
+ }
355
+ },
356
+ :validation => {
357
+ :properties => {
358
+ :state => { :type => :keyword },
359
+ :errors => {
360
+ :properties => {
361
+ :ids => { :type => :keyword },
362
+ :messages => { :type => :text }
363
+ }
364
+ }
365
+ }
366
+ }
367
+ }
368
+ },
369
+ :draft => {
370
+ :properties => {
371
+ :rules => {
372
+ :properties => {
373
+ :id => { :type => :keyword },
374
+ :policy => { :type => :keyword },
375
+ :field => { :type => :keyword },
376
+ :rule => { :type => :keyword },
377
+ :value => { :type => :keyword },
378
+ :order => { :type => :short },
379
+ :created_at => { :type => :date },
380
+ :updated_at => { :type => :date }
381
+ }
382
+ },
383
+ :advanced_snippet => {
384
+ :properties => {
385
+ :value => { :type => :object },
386
+ :created_at => { :type => :date },
387
+ :updated_at => { :type => :date }
388
+ }
389
+ },
390
+ :validation => {
391
+ :properties => {
392
+ :state => { :type => :keyword },
393
+ :errors => {
394
+ :properties => {
395
+ :ids => { :type => :keyword },
396
+ :messages => { :type => :text }
397
+ }
398
+ }
399
+ }
400
+ }
401
+ }
402
+ }
403
+ }
404
+ },
252
405
  :index_name => { :type => :keyword },
406
+ :is_native => { :type => :boolean },
407
+ :language => { :type => :keyword },
253
408
  :last_seen => { :type => :date },
409
+ :last_sync_error => { :type => :keyword },
410
+ :last_sync_status => { :type => :keyword },
254
411
  :last_synced => { :type => :date },
255
- :last_indexed_document_count => { :type => :integer },
256
- :last_deleted_document_count => { :type => :integer },
412
+ :last_deleted_document_count => { :type => :long },
413
+ :last_indexed_document_count => { :type => :long },
414
+ :name => { :type => :keyword },
415
+ :pipeline => {
416
+ :properties => {
417
+ :extract_binary_content => { :type => :boolean },
418
+ :name => { :type => :keyword },
419
+ :reduce_whitespace => { :type => :boolean },
420
+ :run_ml_inference => { :type => :boolean }
421
+ }
422
+ },
257
423
  :scheduling => {
258
424
  :properties => {
259
425
  :enabled => { :type => :boolean },
@@ -262,9 +428,7 @@ module Core
262
428
  },
263
429
  :service_type => { :type => :keyword },
264
430
  :status => { :type => :keyword },
265
- :sync_error => { :type => :text },
266
- :sync_now => { :type => :boolean },
267
- :sync_status => { :type => :keyword }
431
+ :sync_now => { :type => :boolean }
268
432
  }
269
433
  }
270
434
  ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
@@ -275,14 +439,68 @@ module Core
275
439
  def ensure_job_index_exists
276
440
  mappings = {
277
441
  :properties => {
278
- :connector_id => { :type => :keyword },
279
- :status => { :type => :keyword },
442
+ :cancelation_requested_at => { :type => :date },
443
+ :canceled_at => { :type => :date },
444
+ :completed_at => { :type => :date },
445
+ :connector => {
446
+ :properties => {
447
+ :configuration => { :type => :object },
448
+ :filtering => {
449
+ :properties => {
450
+ :domain => { :type => :keyword },
451
+ :rules => {
452
+ :properties => {
453
+ :id => { :type => :keyword },
454
+ :policy => { :type => :keyword },
455
+ :field => { :type => :keyword },
456
+ :rule => { :type => :keyword },
457
+ :value => { :type => :keyword },
458
+ :order => { :type => :short },
459
+ :created_at => { :type => :date },
460
+ :updated_at => { :type => :date }
461
+ }
462
+ },
463
+ :advanced_snippet => {
464
+ :properties => {
465
+ :value => { :type => :object },
466
+ :created_at => { :type => :date },
467
+ :updated_at => { :type => :date }
468
+ }
469
+ },
470
+ :warnings => {
471
+ :properties => {
472
+ :ids => { :type => :keyword },
473
+ :messages => { :type => :text }
474
+ }
475
+ }
476
+ }
477
+ },
478
+ :id => { :type => :keyword },
479
+ :index_name => { :type => :keyword },
480
+ :language => { :type => :keyword },
481
+ :pipeline => {
482
+ :properties => {
483
+ :extract_binary_content => { :type => :boolean },
484
+ :name => { :type => :keyword },
485
+ :reduce_whitespace => { :type => :boolean },
486
+ :run_ml_inference => { :type => :boolean }
487
+ }
488
+ },
489
+ :service_type => { :type => :keyword }
490
+ }
491
+ },
492
+ :created_at => { :type => :date },
493
+ :deleted_document_count => { :type => :integer },
280
494
  :error => { :type => :text },
281
- :worker_hostname => { :type => :keyword },
282
495
  :indexed_document_count => { :type => :integer },
283
- :deleted_document_count => { :type => :integer },
284
- :created_at => { :type => :date },
285
- :completed_at => { :type => :date }
496
+ :indexed_document_volume => { :type => :integer },
497
+ :last_seen => { :type => :date },
498
+ :metadata => { :type => :object },
499
+ :started_at => { :type => :date },
500
+ :status => { :type => :keyword },
501
+ :total_document_count => { :type => :integer },
502
+ :trigger_method => { :type => :keyword },
503
+ :worker_hostname => { :type => :keyword }
286
504
  }
287
505
  }
288
506
  ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
@@ -313,8 +531,20 @@ module Core
313
531
  end
314
532
  end
315
533
 
534
+ def document_count(index_name)
535
+ client.count(:index => index_name)['count']
536
+ end
537
+
316
538
  private
317
539
 
540
+ def should_update_validations?(domain_validations, filtering)
541
+ domains_present = filtering.collect { |filter| filter[:domain] }
542
+ domains_to_update = domain_validations.keys
543
+
544
+ # non-empty intersection -> domains to update present
545
+ !(domains_present & domains_to_update).empty?
546
+ end
547
+
318
548
  def client
319
549
  @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
320
550
  end
@@ -324,6 +554,15 @@ module Core
324
554
  index_version = index_versions.max # gets the largest suffix number
325
555
  "#{alias_name}-v#{index_version}"
326
556
  end
557
+
558
+ def update_filter_validation(filter, domain_validations)
559
+ domain = filter[:domain]
560
+
561
+ if domain_validations.key?(domain)
562
+ new_validation_state = { :draft => { :validation => domain_validations[domain] } }
563
+ filter.deep_merge!(new_validation_state)
564
+ end
565
+ end
327
566
  end
328
567
  end
329
568
  end
@@ -0,0 +1,39 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/filtering'
10
+ require 'utility/filtering'
11
+
12
+ module Core
13
+ module Filtering
14
+ class PostProcessEngine
15
+ attr_reader :rules
16
+
17
+ def initialize(job_description)
18
+ @rules = ordered_rules(job_description.dig('connector', 'filtering'))
19
+ end
20
+
21
+ def process(document)
22
+ @rules.each do |rule|
23
+ if rule.match?(document.stringify_keys)
24
+ return PostProcessResult.new(document, rule)
25
+ end
26
+ end
27
+ PostProcessResult.new(document, SimpleRule::DEFAULT_RULE)
28
+ end
29
+
30
+ private
31
+
32
+ def ordered_rules(job_filtering)
33
+ job_rules = Utility::Filtering.extract_filter(job_filtering)['rules']
34
+ sorted_rules = job_rules.sort_by { |rule| rule['order'] }.reject { |rule| rule['id'] == Core::Filtering::SimpleRule::DEFAULT_RULE_ID }
35
+ sorted_rules.each_with_object([]) { |rule, output| output << SimpleRule.new(rule) }
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Filtering
13
+ class PostProcessResult
14
+ attr_reader :document, :matching_rule
15
+
16
+ def initialize(document, matching_rule)
17
+ @document = document
18
+ @matching_rule = matching_rule
19
+ Utility::Logger.debug("Document '#{document['id']}' matched filtering rule: #{matching_rule.id}. It will be #{matching_rule.policy}d")
20
+ end
21
+
22
+ def is_include?
23
+ matching_rule.is_include?
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,141 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Filtering
13
+ class SimpleRule
14
+ DEFAULT_RULE_ID = 'DEFAULT'
15
+
16
+ class Policy
17
+ INCLUDE = 'include'
18
+ EXCLUDE = 'exclude'
19
+ end
20
+
21
+ class Rule
22
+ REGEX = 'regex'
23
+ EQUALS = 'equals'
24
+ STARTS_WITH = 'starts_with'
25
+ ENDS_WITH = 'ends_with'
26
+ CONTAINS = 'contains'
27
+ LESS_THAN = '<'
28
+ GREATER_THAN = '>'
29
+ end
30
+
31
+ attr_reader :policy, :field, :rule, :value, :id
32
+
33
+ def initialize(rule_hash)
34
+ @policy = rule_hash.fetch('policy')
35
+ @field = rule_hash.fetch('field')
36
+ @rule = rule_hash.fetch('rule')
37
+ @value = rule_hash.fetch('value')
38
+ @id = rule_hash.fetch('id')
39
+ @rule_hash = rule_hash
40
+ rescue KeyError => e
41
+ raise "#{e.key} is required"
42
+ end
43
+
44
+ def self.from_args(id, policy, field, rule, value)
45
+ SimpleRule.new(
46
+ {
47
+ 'id' => id,
48
+ 'policy' => policy,
49
+ 'field' => field,
50
+ 'rule' => rule,
51
+ 'value' => value
52
+ }
53
+ )
54
+ end
55
+
56
+ DEFAULT_RULE = SimpleRule.new(
57
+ 'policy' => 'include',
58
+ 'field' => '_',
59
+ 'rule' => 'regex',
60
+ 'value' => '.*',
61
+ 'id' => SimpleRule::DEFAULT_RULE_ID
62
+ )
63
+
64
+ def match?(document)
65
+ return true if id == DEFAULT_RULE_ID
66
+ doc_value = document[field]
67
+ return false if doc_value.nil?
68
+ coerced_value = coerce(doc_value)
69
+ case rule
70
+ when Rule::EQUALS
71
+ case coerced_value
72
+ when Integer
73
+ doc_value == coerced_value
74
+ when DateTime, Time
75
+ doc_value.to_s == coerced_value.to_s
76
+ else
77
+ doc_value.to_s == coerced_value
78
+ end
79
+ when Rule::STARTS_WITH
80
+ doc_value.to_s.start_with?(value)
81
+ when Rule::ENDS_WITH
82
+ doc_value.to_s.end_with?(value)
83
+ when Rule::CONTAINS
84
+ doc_value.to_s.include?(value)
85
+ when Rule::REGEX
86
+ doc_value.to_s.match(/#{value}/)
87
+ when Rule::LESS_THAN
88
+ doc_value < coerced_value
89
+ when Rule::GREATER_THAN
90
+ doc_value > coerced_value
91
+ else
92
+ false
93
+ end
94
+ end
95
+
96
+ def coerce(doc_value)
97
+ case doc_value
98
+ when String
99
+ value.to_s
100
+ when Integer
101
+ value.to_i
102
+ when DateTime, Time
103
+ to_date(value)
104
+ when TrueClass, FalseClass # Ruby doesn't have a Boolean type, TIL
105
+ to_bool(value).to_s
106
+ else
107
+ value.to_s
108
+ end
109
+ rescue StandardError => e
110
+ Utility::Logger.debug("Failed to coerce value '#{value}' (#{value.class}) based on document value '#{doc_value}' (#{doc_value.class}) due to error: #{e.class}: #{e.message}")
111
+ value.to_s
112
+ end
113
+
114
+ def is_include?
115
+ policy == Policy::INCLUDE
116
+ end
117
+
118
+ def is_exclude?
119
+ policy == Policy::EXCLUDE
120
+ end
121
+
122
+ def to_h
123
+ @rule_hash
124
+ end
125
+
126
+ private
127
+
128
+ def to_bool(str)
129
+ return true if str == true || str =~ (/^(true|t|yes|y|on|1)$/i)
130
+ return false if str == false || str.blank? || str =~ (/^(false|f|no|n|off|0)$/i)
131
+ raise ArgumentError.new("invalid value for Boolean: \"#{str}\"")
132
+ end
133
+
134
+ def to_date(str)
135
+ DateTime.parse(str)
136
+ rescue ArgumentError
137
+ Time.at(str.to_i) # try with it as an int string of millis
138
+ end
139
+ end
140
+ end
141
+ end