connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -10
  3. data/lib/app/config.rb +2 -0
  4. data/lib/app/dispatcher.rb +17 -1
  5. data/lib/app/preflight_check.rb +15 -0
  6. data/lib/connectors/base/connector.rb +37 -4
  7. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  8. data/lib/connectors/connector_status.rb +4 -4
  9. data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
  10. data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
  11. data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
  12. data/lib/connectors/example/connector.rb +43 -4
  13. data/lib/connectors/gitlab/connector.rb +16 -2
  14. data/lib/connectors/mongodb/connector.rb +173 -50
  15. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  16. data/lib/connectors/registry.rb +2 -2
  17. data/lib/connectors/sync_status.rb +23 -4
  18. data/lib/core/configuration.rb +4 -2
  19. data/lib/core/connector_job.rb +137 -0
  20. data/lib/core/connector_settings.rb +29 -18
  21. data/lib/core/elastic_connector_actions.rb +331 -32
  22. data/lib/core/filtering/post_process_engine.rb +39 -0
  23. data/lib/core/filtering/post_process_result.rb +27 -0
  24. data/lib/core/filtering/simple_rule.rb +141 -0
  25. data/lib/core/filtering/validation_job_runner.rb +53 -0
  26. data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
  27. data/lib/core/filtering.rb +17 -0
  28. data/lib/core/ingestion/es_sink.rb +59 -0
  29. data/lib/core/ingestion/ingester.rb +90 -0
  30. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  31. data/lib/core/native_scheduler.rb +3 -0
  32. data/lib/core/scheduler.rb +43 -10
  33. data/lib/core/single_scheduler.rb +3 -0
  34. data/lib/core/sync_job_runner.rb +78 -18
  35. data/lib/core.rb +2 -0
  36. data/lib/utility/bulk_queue.rb +85 -0
  37. data/lib/utility/common.rb +20 -0
  38. data/lib/utility/constants.rb +2 -0
  39. data/lib/utility/errors.rb +5 -0
  40. data/lib/utility/es_client.rb +6 -2
  41. data/lib/utility/filtering.rb +22 -0
  42. data/lib/utility/logger.rb +2 -1
  43. data/lib/utility.rb +5 -3
  44. metadata +27 -18
  45. data/lib/core/output_sink/base_sink.rb +0 -33
  46. data/lib/core/output_sink/combined_sink.rb +0 -38
  47. data/lib/core/output_sink/console_sink.rb +0 -51
  48. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -10,8 +10,27 @@ require 'active_support/core_ext/hash'
10
10
  require 'connectors/connector_status'
11
11
  require 'connectors/sync_status'
12
12
  require 'utility'
13
+ require 'elastic-transport'
13
14
 
14
15
  module Core
16
+ class JobAlreadyRunningError < StandardError
17
+ def initialize(connector_id)
18
+ super("Sync job for connector '#{connector_id}' is already running.")
19
+ end
20
+ end
21
+
22
+ class JobNotCreatedError < StandardError
23
+ def initialize(connector_id, response)
24
+ super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
25
+ end
26
+ end
27
+
28
+ class ConnectorVersionChangedError < StandardError
29
+ def initialize(connector_id, seq_no, primary_term)
30
+ super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
31
+ end
32
+ end
33
+
15
34
  class ElasticConnectorActions
16
35
  class << self
17
36
 
@@ -30,10 +49,17 @@ module Core
30
49
  end
31
50
 
32
51
  def get_connector(connector_id)
52
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
33
53
  client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
34
54
  end
35
55
 
56
+ def get_job(job_id)
57
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
58
+ client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
59
+ end
60
+
36
61
  def connectors_meta
62
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
37
63
  alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
38
64
  index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
39
65
  alias_mappings.dig(index, 'mappings', '_meta') || {}
@@ -52,6 +78,19 @@ module Core
52
78
  )
53
79
  end
54
80
 
81
+ def search_jobs(query, page_size, offset)
82
+ client.search(
83
+ :index => Utility::Constants::JOB_INDEX,
84
+ :ignore => 404,
85
+ :body => {
86
+ :size => page_size,
87
+ :from => offset,
88
+ :query => query,
89
+ :sort => ['created_at']
90
+ }
91
+ )
92
+ end
93
+
55
94
  def update_connector_configuration(connector_id, configuration)
56
95
  update_connector_fields(connector_id, :configuration => configuration)
57
96
  end
@@ -71,21 +110,90 @@ module Core
71
110
  update_connector_configuration(connector_id, payload)
72
111
  end
73
112
 
113
+ def update_filtering_validation(connector_id, filter_validation_results)
114
+ return if filter_validation_results.empty?
115
+
116
+ filtering = get_connector(connector_id).dig(:_source, :filtering)
117
+
118
+ case filtering
119
+ when Hash
120
+ update_filter_validation(filtering, filter_validation_results)
121
+ when Array
122
+ return unless should_update_validations?(filter_validation_results, filtering)
123
+
124
+ filtering.each do |filter|
125
+ update_filter_validation(filter, filter_validation_results)
126
+ end
127
+ else
128
+ Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
129
+ return
130
+ end
131
+
132
+ update_connector_fields(connector_id, { :filtering => filtering })
133
+ end
134
+
74
135
  def claim_job(connector_id)
75
- update_connector_fields(connector_id,
76
- :sync_now => false,
77
- :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
78
- :last_synced => Time.now)
136
+ seq_no = nil
137
+ primary_term = nil
138
+ sync_in_progress = false
139
+ connector_record = client.get(
140
+ :index => Utility::Constants::CONNECTORS_INDEX,
141
+ :id => connector_id,
142
+ :ignore => 404,
143
+ :refresh => true
144
+ ).tap do |response|
145
+ seq_no = response['_seq_no']
146
+ primary_term = response['_primary_term']
147
+ sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
148
+ end
149
+ if sync_in_progress
150
+ raise JobAlreadyRunningError.new(connector_id)
151
+ end
152
+ update_connector_fields(
153
+ connector_id,
154
+ { :sync_now => false,
155
+ :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
156
+ :last_synced => Time.now },
157
+ seq_no,
158
+ primary_term
159
+ )
79
160
 
80
161
  body = {
81
- :connector_id => connector_id,
82
162
  :status => Connectors::SyncStatus::IN_PROGRESS,
83
163
  :worker_hostname => Socket.gethostname,
84
- :created_at => Time.now
164
+ :created_at => Time.now,
165
+ :started_at => Time.now,
166
+ :last_seen => Time.now,
167
+ :connector => {
168
+ :id => connector_id,
169
+ :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
170
+ }
85
171
  }
86
- job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
87
172
 
88
- job['_id']
173
+ index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
174
+ if index_response['result'] == 'created'
175
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
176
+ return client.get(
177
+ :index => Utility::Constants::JOB_INDEX,
178
+ :id => index_response['_id'],
179
+ :ignore => 404
180
+ ).with_indifferent_access
181
+ end
182
+ raise JobNotCreatedError.new(connector_id, index_response)
183
+ end
184
+
185
+ def convert_connector_filtering_to_job_filtering(connector_filtering)
186
+ return [] unless connector_filtering
187
+ connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
188
+ connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
189
+ snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
190
+ job_filtering << {
191
+ 'domain' => filtering_domain['domain'],
192
+ 'rules' => filtering_domain.dig('active', 'rules'),
193
+ 'advanced_snippet' => snippet['value'] || snippet,
194
+ 'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
195
+ }
196
+ end
89
197
  end
90
198
 
91
199
  def update_connector_status(connector_id, status, error_message = nil)
@@ -99,22 +207,33 @@ module Core
99
207
  update_connector_fields(connector_id, body)
100
208
  end
101
209
 
102
- def complete_sync(connector_id, job_id, status)
103
- sync_status = status[:error] ? Connectors::SyncStatus::FAILED : Connectors::SyncStatus::COMPLETED
210
+ def update_sync(job_id, metadata)
211
+ body = {
212
+ :doc => { :last_seen => Time.now }.merge(metadata)
213
+ }
214
+ client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
215
+ end
216
+
217
+ def complete_sync(connector_id, job_id, metadata, error)
218
+ sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
219
+
220
+ metadata ||= {}
104
221
 
105
222
  update_connector_fields(connector_id,
106
223
  :last_sync_status => sync_status,
107
- :last_sync_error => status[:error],
108
- :error => status[:error],
224
+ :last_sync_error => error,
225
+ :error => error,
109
226
  :last_synced => Time.now,
110
- :last_indexed_document_count => status[:indexed_document_count],
111
- :last_deleted_document_count => status[:deleted_document_count])
227
+ :last_indexed_document_count => metadata[:indexed_document_count],
228
+ :last_deleted_document_count => metadata[:deleted_document_count])
112
229
 
113
230
  body = {
114
231
  :doc => {
115
232
  :status => sync_status,
116
- :completed_at => Time.now
117
- }.merge(status)
233
+ :completed_at => Time.now,
234
+ :last_seen => Time.now,
235
+ :error => error
236
+ }.merge(metadata)
118
237
  }
119
238
  client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
120
239
  end
@@ -136,7 +255,7 @@ module Core
136
255
  }
137
256
  loop do
138
257
  response = client.search(:body => body)
139
- hits = response['hits']['hits']
258
+ hits = response.dig('hits', 'hits') || []
140
259
 
141
260
  ids = hits.map { |h| h['_id'] }
142
261
  result += ids
@@ -202,12 +321,105 @@ module Core
202
321
  :properties => {
203
322
  :api_key_id => { :type => :keyword },
204
323
  :configuration => { :type => :object },
205
- :error => { :type => :text },
324
+ :description => { :type => :text },
325
+ :error => { :type => :keyword },
326
+ :features => {
327
+ :properties => {
328
+ :filtering_advanced_config => { :type => :boolean },
329
+ :filtering_rules => { :type => :boolean }
330
+ }
331
+ },
332
+ :filtering => {
333
+ :properties => {
334
+ :domain => { :type => :keyword },
335
+ :active => {
336
+ :properties => {
337
+ :rules => {
338
+ :properties => {
339
+ :id => { :type => :keyword },
340
+ :policy => { :type => :keyword },
341
+ :field => { :type => :keyword },
342
+ :rule => { :type => :keyword },
343
+ :value => { :type => :keyword },
344
+ :order => { :type => :short },
345
+ :created_at => { :type => :date },
346
+ :updated_at => { :type => :date }
347
+ }
348
+ },
349
+ :advanced_snippet => {
350
+ :properties => {
351
+ :value => { :type => :object },
352
+ :created_at => { :type => :date },
353
+ :updated_at => { :type => :date }
354
+ }
355
+ },
356
+ :validation => {
357
+ :properties => {
358
+ :state => { :type => :keyword },
359
+ :errors => {
360
+ :properties => {
361
+ :ids => { :type => :keyword },
362
+ :messages => { :type => :text }
363
+ }
364
+ }
365
+ }
366
+ }
367
+ }
368
+ },
369
+ :draft => {
370
+ :properties => {
371
+ :rules => {
372
+ :properties => {
373
+ :id => { :type => :keyword },
374
+ :policy => { :type => :keyword },
375
+ :field => { :type => :keyword },
376
+ :rule => { :type => :keyword },
377
+ :value => { :type => :keyword },
378
+ :order => { :type => :short },
379
+ :created_at => { :type => :date },
380
+ :updated_at => { :type => :date }
381
+ }
382
+ },
383
+ :advanced_snippet => {
384
+ :properties => {
385
+ :value => { :type => :object },
386
+ :created_at => { :type => :date },
387
+ :updated_at => { :type => :date }
388
+ }
389
+ },
390
+ :validation => {
391
+ :properties => {
392
+ :state => { :type => :keyword },
393
+ :errors => {
394
+ :properties => {
395
+ :ids => { :type => :keyword },
396
+ :messages => { :type => :text }
397
+ }
398
+ }
399
+ }
400
+ }
401
+ }
402
+ }
403
+ }
404
+ },
206
405
  :index_name => { :type => :keyword },
406
+ :is_native => { :type => :boolean },
407
+ :language => { :type => :keyword },
207
408
  :last_seen => { :type => :date },
409
+ :last_sync_error => { :type => :keyword },
410
+ :last_sync_status => { :type => :keyword },
208
411
  :last_synced => { :type => :date },
209
- :last_indexed_document_count => { :type => :integer },
210
- :last_deleted_document_count => { :type => :integer },
412
+ :last_deleted_document_count => { :type => :long },
413
+ :last_indexed_document_count => { :type => :long },
414
+ :name => { :type => :keyword },
415
+ :pipeline => {
416
+ :properties => {
417
+ :extract_binary_content => { :type => :boolean },
418
+ :name => { :type => :keyword },
419
+ :reduce_whitespace => { :type => :boolean },
420
+ :run_ml_inference => { :type => :boolean }
421
+ }
422
+ },
211
423
  :scheduling => {
212
424
  :properties => {
213
425
  :enabled => { :type => :boolean },
@@ -216,9 +428,7 @@ module Core
216
428
  },
217
429
  :service_type => { :type => :keyword },
218
430
  :status => { :type => :keyword },
219
- :sync_error => { :type => :text },
220
- :sync_now => { :type => :boolean },
221
- :sync_status => { :type => :keyword }
431
+ :sync_now => { :type => :boolean }
222
432
  }
223
433
  }
224
434
  ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
@@ -229,32 +439,112 @@ module Core
229
439
  def ensure_job_index_exists
230
440
  mappings = {
231
441
  :properties => {
232
- :connector_id => { :type => :keyword },
233
- :status => { :type => :keyword },
442
+ :cancelation_requested_at => { :type => :date },
443
+ :canceled_at => { :type => :date },
444
+ :completed_at => { :type => :date },
445
+ :connector => {
446
+ :properties => {
447
+ :configuration => { :type => :object },
448
+ :filtering => {
449
+ :properties => {
450
+ :domain => { :type => :keyword },
451
+ :rules => {
452
+ :properties => {
453
+ :id => { :type => :keyword },
454
+ :policy => { :type => :keyword },
455
+ :field => { :type => :keyword },
456
+ :rule => { :type => :keyword },
457
+ :value => { :type => :keyword },
458
+ :order => { :type => :short },
459
+ :created_at => { :type => :date },
460
+ :updated_at => { :type => :date }
461
+ }
462
+ },
463
+ :advanced_snippet => {
464
+ :properties => {
465
+ :value => { :type => :object },
466
+ :created_at => { :type => :date },
467
+ :updated_at => { :type => :date }
468
+ }
469
+ },
470
+ :warnings => {
471
+ :properties => {
472
+ :ids => { :type => :keyword },
473
+ :messages => { :type => :text }
474
+ }
475
+ }
476
+ }
477
+ },
478
+ :id => { :type => :keyword },
479
+ :index_name => { :type => :keyword },
480
+ :language => { :type => :keyword },
481
+ :pipeline => {
482
+ :properties => {
483
+ :extract_binary_content => { :type => :boolean },
484
+ :name => { :type => :keyword },
485
+ :reduce_whitespace => { :type => :boolean },
486
+ :run_ml_inference => { :type => :boolean }
487
+ }
488
+ },
489
+ :service_type => { :type => :keyword }
490
+ }
491
+ },
492
+ :created_at => { :type => :date },
493
+ :deleted_document_count => { :type => :integer },
234
494
  :error => { :type => :text },
235
- :worker_hostname => { :type => :keyword },
236
495
  :indexed_document_count => { :type => :integer },
237
- :deleted_document_count => { :type => :integer },
238
- :created_at => { :type => :date },
239
- :completed_at => { :type => :date }
496
+ :indexed_document_volume => { :type => :integer },
497
+ :last_seen => { :type => :date },
498
+ :metadata => { :type => :object },
499
+ :started_at => { :type => :date },
500
+ :status => { :type => :keyword },
501
+ :total_document_count => { :type => :integer },
502
+ :trigger_method => { :type => :keyword },
503
+ :worker_hostname => { :type => :keyword }
240
504
  }
241
505
  }
242
506
  ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
243
507
  end
244
508
 
245
- def update_connector_fields(connector_id, doc = {})
509
+ def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
246
510
  return if doc.empty?
247
- client.update(
511
+ update_args = {
248
512
  :index => Utility::Constants::CONNECTORS_INDEX,
249
513
  :id => connector_id,
250
514
  :body => { :doc => doc },
251
515
  :refresh => true,
252
516
  :retry_on_conflict => 3
253
- )
517
+ }
518
+ # seq_no and primary_term are used for optimistic concurrency control
519
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
520
+ if seq_no && primary_term
521
+ update_args[:if_seq_no] = seq_no
522
+ update_args[:if_primary_term] = primary_term
523
+ update_args.delete(:retry_on_conflict)
524
+ end
525
+ begin
526
+ client.update(update_args)
527
+ rescue Elastic::Transport::Transport::Errors::Conflict
528
+ # VersionConflictException
529
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
530
+ raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
531
+ end
532
+ end
533
+
534
+ def document_count(index_name)
535
+ client.count(:index => index_name)['count']
254
536
  end
255
537
 
256
538
  private
257
539
 
540
+ def should_update_validations?(domain_validations, filtering)
541
+ domains_present = filtering.collect { |filter| filter[:domain] }
542
+ domains_to_update = domain_validations.keys
543
+
544
+ # non-empty intersection -> domains to update present
545
+ !(domains_present & domains_to_update).empty?
546
+ end
547
+
258
548
  def client
259
549
  @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
260
550
  end
@@ -264,6 +554,15 @@ module Core
264
554
  index_version = index_versions.max # gets the largest suffix number
265
555
  "#{alias_name}-v#{index_version}"
266
556
  end
557
+
558
+ def update_filter_validation(filter, domain_validations)
559
+ domain = filter[:domain]
560
+
561
+ if domain_validations.key?(domain)
562
+ new_validation_state = { :draft => { :validation => domain_validations[domain] } }
563
+ filter.deep_merge!(new_validation_state)
564
+ end
565
+ end
267
566
  end
268
567
  end
269
568
  end
@@ -0,0 +1,39 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/filtering'
10
+ require 'utility/filtering'
11
+
12
+ module Core
13
+ module Filtering
14
+ class PostProcessEngine
15
+ attr_reader :rules
16
+
17
+ def initialize(job_description)
18
+ @rules = ordered_rules(job_description.dig('connector', 'filtering'))
19
+ end
20
+
21
+ def process(document)
22
+ @rules.each do |rule|
23
+ if rule.match?(document.stringify_keys)
24
+ return PostProcessResult.new(document, rule)
25
+ end
26
+ end
27
+ PostProcessResult.new(document, SimpleRule::DEFAULT_RULE)
28
+ end
29
+
30
+ private
31
+
32
+ def ordered_rules(job_filtering)
33
+ job_rules = Utility::Filtering.extract_filter(job_filtering)['rules']
34
+ sorted_rules = job_rules.sort_by { |rule| rule['order'] }.reject { |rule| rule['id'] == Core::Filtering::SimpleRule::DEFAULT_RULE_ID }
35
+ sorted_rules.each_with_object([]) { |rule, output| output << SimpleRule.new(rule) }
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Filtering
13
+ class PostProcessResult
14
+ attr_reader :document, :matching_rule
15
+
16
+ def initialize(document, matching_rule)
17
+ @document = document
18
+ @matching_rule = matching_rule
19
+ Utility::Logger.debug("Document '#{document['id']}' matched filtering rule: #{matching_rule.id}. It will be #{matching_rule.policy}d")
20
+ end
21
+
22
+ def is_include?
23
+ matching_rule.is_include?
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,141 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Filtering
13
+ class SimpleRule
14
+ DEFAULT_RULE_ID = 'DEFAULT'
15
+
16
+ class Policy
17
+ INCLUDE = 'include'
18
+ EXCLUDE = 'exclude'
19
+ end
20
+
21
+ class Rule
22
+ REGEX = 'regex'
23
+ EQUALS = 'equals'
24
+ STARTS_WITH = 'starts_with'
25
+ ENDS_WITH = 'ends_with'
26
+ CONTAINS = 'contains'
27
+ LESS_THAN = '<'
28
+ GREATER_THAN = '>'
29
+ end
30
+
31
+ attr_reader :policy, :field, :rule, :value, :id
32
+
33
+ def initialize(rule_hash)
34
+ @policy = rule_hash.fetch('policy')
35
+ @field = rule_hash.fetch('field')
36
+ @rule = rule_hash.fetch('rule')
37
+ @value = rule_hash.fetch('value')
38
+ @id = rule_hash.fetch('id')
39
+ @rule_hash = rule_hash
40
+ rescue KeyError => e
41
+ raise "#{e.key} is required"
42
+ end
43
+
44
+ def self.from_args(id, policy, field, rule, value)
45
+ SimpleRule.new(
46
+ {
47
+ 'id' => id,
48
+ 'policy' => policy,
49
+ 'field' => field,
50
+ 'rule' => rule,
51
+ 'value' => value
52
+ }
53
+ )
54
+ end
55
+
56
+ DEFAULT_RULE = SimpleRule.new(
57
+ 'policy' => 'include',
58
+ 'field' => '_',
59
+ 'rule' => 'regex',
60
+ 'value' => '.*',
61
+ 'id' => SimpleRule::DEFAULT_RULE_ID
62
+ )
63
+
64
+ def match?(document)
65
+ return true if id == DEFAULT_RULE_ID
66
+ doc_value = document[field]
67
+ return false if doc_value.nil?
68
+ coerced_value = coerce(doc_value)
69
+ case rule
70
+ when Rule::EQUALS
71
+ case coerced_value
72
+ when Integer
73
+ doc_value == coerced_value
74
+ when DateTime, Time
75
+ doc_value.to_s == coerced_value.to_s
76
+ else
77
+ doc_value.to_s == coerced_value
78
+ end
79
+ when Rule::STARTS_WITH
80
+ doc_value.to_s.start_with?(value)
81
+ when Rule::ENDS_WITH
82
+ doc_value.to_s.end_with?(value)
83
+ when Rule::CONTAINS
84
+ doc_value.to_s.include?(value)
85
+ when Rule::REGEX
86
+ doc_value.to_s.match(/#{value}/)
87
+ when Rule::LESS_THAN
88
+ doc_value < coerced_value
89
+ when Rule::GREATER_THAN
90
+ doc_value > coerced_value
91
+ else
92
+ false
93
+ end
94
+ end
95
+
96
+ def coerce(doc_value)
97
+ case doc_value
98
+ when String
99
+ value.to_s
100
+ when Integer
101
+ value.to_i
102
+ when DateTime, Time
103
+ to_date(value)
104
+ when TrueClass, FalseClass # Ruby doesn't have a Boolean type, TIL
105
+ to_bool(value).to_s
106
+ else
107
+ value.to_s
108
+ end
109
+ rescue StandardError => e
110
+ Utility::Logger.debug("Failed to coerce value '#{value}' (#{value.class}) based on document value '#{doc_value}' (#{doc_value.class}) due to error: #{e.class}: #{e.message}")
111
+ value.to_s
112
+ end
113
+
114
+ def is_include?
115
+ policy == Policy::INCLUDE
116
+ end
117
+
118
+ def is_exclude?
119
+ policy == Policy::EXCLUDE
120
+ end
121
+
122
+ def to_h
123
+ @rule_hash
124
+ end
125
+
126
+ private
127
+
128
+ def to_bool(str)
129
+ return true if str == true || str =~ (/^(true|t|yes|y|on|1)$/i)
130
+ return false if str == false || str.blank? || str =~ (/^(false|f|no|n|off|0)$/i)
131
+ raise ArgumentError.new("invalid value for Boolean: \"#{str}\"")
132
+ end
133
+
134
+ def to_date(str)
135
+ DateTime.parse(str)
136
+ rescue ArgumentError
137
+ Time.at(str.to_i) # try with it as an int string of millis
138
+ end
139
+ end
140
+ end
141
+ end