connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -10
  3. data/lib/app/config.rb +2 -0
  4. data/lib/app/dispatcher.rb +17 -1
  5. data/lib/app/preflight_check.rb +15 -0
  6. data/lib/connectors/base/connector.rb +37 -4
  7. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  8. data/lib/connectors/connector_status.rb +4 -4
  9. data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
  10. data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
  11. data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
  12. data/lib/connectors/example/connector.rb +43 -4
  13. data/lib/connectors/gitlab/connector.rb +16 -2
  14. data/lib/connectors/mongodb/connector.rb +173 -50
  15. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  16. data/lib/connectors/registry.rb +2 -2
  17. data/lib/connectors/sync_status.rb +23 -4
  18. data/lib/core/configuration.rb +4 -2
  19. data/lib/core/connector_job.rb +137 -0
  20. data/lib/core/connector_settings.rb +29 -18
  21. data/lib/core/elastic_connector_actions.rb +331 -32
  22. data/lib/core/filtering/post_process_engine.rb +39 -0
  23. data/lib/core/filtering/post_process_result.rb +27 -0
  24. data/lib/core/filtering/simple_rule.rb +141 -0
  25. data/lib/core/filtering/validation_job_runner.rb +53 -0
  26. data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
  27. data/lib/core/filtering.rb +17 -0
  28. data/lib/core/ingestion/es_sink.rb +59 -0
  29. data/lib/core/ingestion/ingester.rb +90 -0
  30. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  31. data/lib/core/native_scheduler.rb +3 -0
  32. data/lib/core/scheduler.rb +43 -10
  33. data/lib/core/single_scheduler.rb +3 -0
  34. data/lib/core/sync_job_runner.rb +78 -18
  35. data/lib/core.rb +2 -0
  36. data/lib/utility/bulk_queue.rb +85 -0
  37. data/lib/utility/common.rb +20 -0
  38. data/lib/utility/constants.rb +2 -0
  39. data/lib/utility/errors.rb +5 -0
  40. data/lib/utility/es_client.rb +6 -2
  41. data/lib/utility/filtering.rb +22 -0
  42. data/lib/utility/logger.rb +2 -1
  43. data/lib/utility.rb +5 -3
  44. metadata +27 -18
  45. data/lib/core/output_sink/base_sink.rb +0 -33
  46. data/lib/core/output_sink/combined_sink.rb +0 -38
  47. data/lib/core/output_sink/console_sink.rb +0 -51
  48. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -10,8 +10,27 @@ require 'active_support/core_ext/hash'
10
10
  require 'connectors/connector_status'
11
11
  require 'connectors/sync_status'
12
12
  require 'utility'
13
+ require 'elastic-transport'
13
14
 
14
15
  module Core
16
+ class JobAlreadyRunningError < StandardError
17
+ def initialize(connector_id)
18
+ super("Sync job for connector '#{connector_id}' is already running.")
19
+ end
20
+ end
21
+
22
+ class JobNotCreatedError < StandardError
23
+ def initialize(connector_id, response)
24
+ super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
25
+ end
26
+ end
27
+
28
+ class ConnectorVersionChangedError < StandardError
29
+ def initialize(connector_id, seq_no, primary_term)
30
+ super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
31
+ end
32
+ end
33
+
15
34
  class ElasticConnectorActions
16
35
  class << self
17
36
 
@@ -30,10 +49,17 @@ module Core
30
49
  end
31
50
 
32
51
  def get_connector(connector_id)
52
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
33
53
  client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
34
54
  end
35
55
 
56
+ def get_job(job_id)
57
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
58
+ client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
59
+ end
60
+
36
61
  def connectors_meta
62
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
37
63
  alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
38
64
  index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
39
65
  alias_mappings.dig(index, 'mappings', '_meta') || {}
@@ -52,6 +78,19 @@ module Core
52
78
  )
53
79
  end
54
80
 
81
+ def search_jobs(query, page_size, offset)
82
+ client.search(
83
+ :index => Utility::Constants::JOB_INDEX,
84
+ :ignore => 404,
85
+ :body => {
86
+ :size => page_size,
87
+ :from => offset,
88
+ :query => query,
89
+ :sort => ['created_at']
90
+ }
91
+ )
92
+ end
93
+
55
94
  def update_connector_configuration(connector_id, configuration)
56
95
  update_connector_fields(connector_id, :configuration => configuration)
57
96
  end
@@ -71,21 +110,90 @@ module Core
71
110
  update_connector_configuration(connector_id, payload)
72
111
  end
73
112
 
113
+ def update_filtering_validation(connector_id, filter_validation_results)
114
+ return if filter_validation_results.empty?
115
+
116
+ filtering = get_connector(connector_id).dig(:_source, :filtering)
117
+
118
+ case filtering
119
+ when Hash
120
+ update_filter_validation(filtering, filter_validation_results)
121
+ when Array
122
+ return unless should_update_validations?(filter_validation_results, filtering)
123
+
124
+ filtering.each do |filter|
125
+ update_filter_validation(filter, filter_validation_results)
126
+ end
127
+ else
128
+ Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
129
+ return
130
+ end
131
+
132
+ update_connector_fields(connector_id, { :filtering => filtering })
133
+ end
134
+
74
135
  def claim_job(connector_id)
75
- update_connector_fields(connector_id,
76
- :sync_now => false,
77
- :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
78
- :last_synced => Time.now)
136
+ seq_no = nil
137
+ primary_term = nil
138
+ sync_in_progress = false
139
+ connector_record = client.get(
140
+ :index => Utility::Constants::CONNECTORS_INDEX,
141
+ :id => connector_id,
142
+ :ignore => 404,
143
+ :refresh => true
144
+ ).tap do |response|
145
+ seq_no = response['_seq_no']
146
+ primary_term = response['_primary_term']
147
+ sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
148
+ end
149
+ if sync_in_progress
150
+ raise JobAlreadyRunningError.new(connector_id)
151
+ end
152
+ update_connector_fields(
153
+ connector_id,
154
+ { :sync_now => false,
155
+ :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
156
+ :last_synced => Time.now },
157
+ seq_no,
158
+ primary_term
159
+ )
79
160
 
80
161
  body = {
81
- :connector_id => connector_id,
82
162
  :status => Connectors::SyncStatus::IN_PROGRESS,
83
163
  :worker_hostname => Socket.gethostname,
84
- :created_at => Time.now
164
+ :created_at => Time.now,
165
+ :started_at => Time.now,
166
+ :last_seen => Time.now,
167
+ :connector => {
168
+ :id => connector_id,
169
+ :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
170
+ }
85
171
  }
86
- job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
87
172
 
88
- job['_id']
173
+ index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
174
+ if index_response['result'] == 'created'
175
+ # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
176
+ return client.get(
177
+ :index => Utility::Constants::JOB_INDEX,
178
+ :id => index_response['_id'],
179
+ :ignore => 404
180
+ ).with_indifferent_access
181
+ end
182
+ raise JobNotCreatedError.new(connector_id, index_response)
183
+ end
184
+
185
+ def convert_connector_filtering_to_job_filtering(connector_filtering)
186
+ return [] unless connector_filtering
187
+ connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
188
+ connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
189
+ snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
190
+ job_filtering << {
191
+ 'domain' => filtering_domain['domain'],
192
+ 'rules' => filtering_domain.dig('active', 'rules'),
193
+ 'advanced_snippet' => snippet['value'] || snippet,
194
+ 'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
195
+ }
196
+ end
89
197
  end
90
198
 
91
199
  def update_connector_status(connector_id, status, error_message = nil)
@@ -99,22 +207,33 @@ module Core
99
207
  update_connector_fields(connector_id, body)
100
208
  end
101
209
 
102
- def complete_sync(connector_id, job_id, status)
103
- sync_status = status[:error] ? Connectors::SyncStatus::FAILED : Connectors::SyncStatus::COMPLETED
210
+ def update_sync(job_id, metadata)
211
+ body = {
212
+ :doc => { :last_seen => Time.now }.merge(metadata)
213
+ }
214
+ client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
215
+ end
216
+
217
+ def complete_sync(connector_id, job_id, metadata, error)
218
+ sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
219
+
220
+ metadata ||= {}
104
221
 
105
222
  update_connector_fields(connector_id,
106
223
  :last_sync_status => sync_status,
107
- :last_sync_error => status[:error],
108
- :error => status[:error],
224
+ :last_sync_error => error,
225
+ :error => error,
109
226
  :last_synced => Time.now,
110
- :last_indexed_document_count => status[:indexed_document_count],
111
- :last_deleted_document_count => status[:deleted_document_count])
227
+ :last_indexed_document_count => metadata[:indexed_document_count],
228
+ :last_deleted_document_count => metadata[:deleted_document_count])
112
229
 
113
230
  body = {
114
231
  :doc => {
115
232
  :status => sync_status,
116
- :completed_at => Time.now
117
- }.merge(status)
233
+ :completed_at => Time.now,
234
+ :last_seen => Time.now,
235
+ :error => error
236
+ }.merge(metadata)
118
237
  }
119
238
  client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
120
239
  end
@@ -136,7 +255,7 @@ module Core
136
255
  }
137
256
  loop do
138
257
  response = client.search(:body => body)
139
- hits = response['hits']['hits']
258
+ hits = response.dig('hits', 'hits') || []
140
259
 
141
260
  ids = hits.map { |h| h['_id'] }
142
261
  result += ids
@@ -202,12 +321,105 @@ module Core
202
321
  :properties => {
203
322
  :api_key_id => { :type => :keyword },
204
323
  :configuration => { :type => :object },
205
- :error => { :type => :text },
324
+ :description => { :type => :text },
325
+ :error => { :type => :keyword },
326
+ :features => {
327
+ :properties => {
328
+ :filtering_advanced_config => { :type => :boolean },
329
+ :filtering_rules => { :type => :boolean }
330
+ }
331
+ },
332
+ :filtering => {
333
+ :properties => {
334
+ :domain => { :type => :keyword },
335
+ :active => {
336
+ :properties => {
337
+ :rules => {
338
+ :properties => {
339
+ :id => { :type => :keyword },
340
+ :policy => { :type => :keyword },
341
+ :field => { :type => :keyword },
342
+ :rule => { :type => :keyword },
343
+ :value => { :type => :keyword },
344
+ :order => { :type => :short },
345
+ :created_at => { :type => :date },
346
+ :updated_at => { :type => :date }
347
+ }
348
+ },
349
+ :advanced_snippet => {
350
+ :properties => {
351
+ :value => { :type => :object },
352
+ :created_at => { :type => :date },
353
+ :updated_at => { :type => :date }
354
+ }
355
+ },
356
+ :validation => {
357
+ :properties => {
358
+ :state => { :type => :keyword },
359
+ :errors => {
360
+ :properties => {
361
+ :ids => { :type => :keyword },
362
+ :messages => { :type => :text }
363
+ }
364
+ }
365
+ }
366
+ }
367
+ }
368
+ },
369
+ :draft => {
370
+ :properties => {
371
+ :rules => {
372
+ :properties => {
373
+ :id => { :type => :keyword },
374
+ :policy => { :type => :keyword },
375
+ :field => { :type => :keyword },
376
+ :rule => { :type => :keyword },
377
+ :value => { :type => :keyword },
378
+ :order => { :type => :short },
379
+ :created_at => { :type => :date },
380
+ :updated_at => { :type => :date }
381
+ }
382
+ },
383
+ :advanced_snippet => {
384
+ :properties => {
385
+ :value => { :type => :object },
386
+ :created_at => { :type => :date },
387
+ :updated_at => { :type => :date }
388
+ }
389
+ },
390
+ :validation => {
391
+ :properties => {
392
+ :state => { :type => :keyword },
393
+ :errors => {
394
+ :properties => {
395
+ :ids => { :type => :keyword },
396
+ :messages => { :type => :text }
397
+ }
398
+ }
399
+ }
400
+ }
401
+ }
402
+ }
403
+ }
404
+ },
206
405
  :index_name => { :type => :keyword },
406
+ :is_native => { :type => :boolean },
407
+ :language => { :type => :keyword },
207
408
  :last_seen => { :type => :date },
409
+ :last_sync_error => { :type => :keyword },
410
+ :last_sync_status => { :type => :keyword },
208
411
  :last_synced => { :type => :date },
209
- :last_indexed_document_count => { :type => :integer },
210
- :last_deleted_document_count => { :type => :integer },
412
+ :last_deleted_document_count => { :type => :long },
413
+ :last_indexed_document_count => { :type => :long },
414
+ :name => { :type => :keyword },
415
+ :pipeline => {
416
+ :properties => {
417
+ :extract_binary_content => { :type => :boolean },
418
+ :name => { :type => :keyword },
419
+ :reduce_whitespace => { :type => :boolean },
420
+ :run_ml_inference => { :type => :boolean }
421
+ }
422
+ },
211
423
  :scheduling => {
212
424
  :properties => {
213
425
  :enabled => { :type => :boolean },
@@ -216,9 +428,7 @@ module Core
216
428
  },
217
429
  :service_type => { :type => :keyword },
218
430
  :status => { :type => :keyword },
219
- :sync_error => { :type => :text },
220
- :sync_now => { :type => :boolean },
221
- :sync_status => { :type => :keyword }
431
+ :sync_now => { :type => :boolean }
222
432
  }
223
433
  }
224
434
  ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
@@ -229,32 +439,112 @@ module Core
229
439
  def ensure_job_index_exists
230
440
  mappings = {
231
441
  :properties => {
232
- :connector_id => { :type => :keyword },
233
- :status => { :type => :keyword },
442
+ :cancelation_requested_at => { :type => :date },
443
+ :canceled_at => { :type => :date },
444
+ :completed_at => { :type => :date },
445
+ :connector => {
446
+ :properties => {
447
+ :configuration => { :type => :object },
448
+ :filtering => {
449
+ :properties => {
450
+ :domain => { :type => :keyword },
451
+ :rules => {
452
+ :properties => {
453
+ :id => { :type => :keyword },
454
+ :policy => { :type => :keyword },
455
+ :field => { :type => :keyword },
456
+ :rule => { :type => :keyword },
457
+ :value => { :type => :keyword },
458
+ :order => { :type => :short },
459
+ :created_at => { :type => :date },
460
+ :updated_at => { :type => :date }
461
+ }
462
+ },
463
+ :advanced_snippet => {
464
+ :properties => {
465
+ :value => { :type => :object },
466
+ :created_at => { :type => :date },
467
+ :updated_at => { :type => :date }
468
+ }
469
+ },
470
+ :warnings => {
471
+ :properties => {
472
+ :ids => { :type => :keyword },
473
+ :messages => { :type => :text }
474
+ }
475
+ }
476
+ }
477
+ },
478
+ :id => { :type => :keyword },
479
+ :index_name => { :type => :keyword },
480
+ :language => { :type => :keyword },
481
+ :pipeline => {
482
+ :properties => {
483
+ :extract_binary_content => { :type => :boolean },
484
+ :name => { :type => :keyword },
485
+ :reduce_whitespace => { :type => :boolean },
486
+ :run_ml_inference => { :type => :boolean }
487
+ }
488
+ },
489
+ :service_type => { :type => :keyword }
490
+ }
491
+ },
492
+ :created_at => { :type => :date },
493
+ :deleted_document_count => { :type => :integer },
234
494
  :error => { :type => :text },
235
- :worker_hostname => { :type => :keyword },
236
495
  :indexed_document_count => { :type => :integer },
237
- :deleted_document_count => { :type => :integer },
238
- :created_at => { :type => :date },
239
- :completed_at => { :type => :date }
496
+ :indexed_document_volume => { :type => :integer },
497
+ :last_seen => { :type => :date },
498
+ :metadata => { :type => :object },
499
+ :started_at => { :type => :date },
500
+ :status => { :type => :keyword },
501
+ :total_document_count => { :type => :integer },
502
+ :trigger_method => { :type => :keyword },
503
+ :worker_hostname => { :type => :keyword }
240
504
  }
241
505
  }
242
506
  ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
243
507
  end
244
508
 
245
- def update_connector_fields(connector_id, doc = {})
509
+ def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
246
510
  return if doc.empty?
247
- client.update(
511
+ update_args = {
248
512
  :index => Utility::Constants::CONNECTORS_INDEX,
249
513
  :id => connector_id,
250
514
  :body => { :doc => doc },
251
515
  :refresh => true,
252
516
  :retry_on_conflict => 3
253
- )
517
+ }
518
+ # seq_no and primary_term are used for optimistic concurrency control
519
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
520
+ if seq_no && primary_term
521
+ update_args[:if_seq_no] = seq_no
522
+ update_args[:if_primary_term] = primary_term
523
+ update_args.delete(:retry_on_conflict)
524
+ end
525
+ begin
526
+ client.update(update_args)
527
+ rescue Elastic::Transport::Transport::Errors::Conflict
528
+ # VersionConflictException
529
+ # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
530
+ raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
531
+ end
532
+ end
533
+
534
+ def document_count(index_name)
535
+ client.count(:index => index_name)['count']
254
536
  end
255
537
 
256
538
  private
257
539
 
540
+ def should_update_validations?(domain_validations, filtering)
541
+ domains_present = filtering.collect { |filter| filter[:domain] }
542
+ domains_to_update = domain_validations.keys
543
+
544
+ # non-empty intersection -> domains to update present
545
+ !(domains_present & domains_to_update).empty?
546
+ end
547
+
258
548
  def client
259
549
  @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
260
550
  end
@@ -264,6 +554,15 @@ module Core
264
554
  index_version = index_versions.max # gets the largest suffix number
265
555
  "#{alias_name}-v#{index_version}"
266
556
  end
557
+
558
+ def update_filter_validation(filter, domain_validations)
559
+ domain = filter[:domain]
560
+
561
+ if domain_validations.key?(domain)
562
+ new_validation_state = { :draft => { :validation => domain_validations[domain] } }
563
+ filter.deep_merge!(new_validation_state)
564
+ end
565
+ end
267
566
  end
268
567
  end
269
568
  end
@@ -0,0 +1,39 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/filtering'
10
+ require 'utility/filtering'
11
+
12
+ module Core
13
+ module Filtering
14
+ class PostProcessEngine
15
+ attr_reader :rules
16
+
17
+ def initialize(job_description)
18
+ @rules = ordered_rules(job_description.dig('connector', 'filtering'))
19
+ end
20
+
21
+ def process(document)
22
+ @rules.each do |rule|
23
+ if rule.match?(document.stringify_keys)
24
+ return PostProcessResult.new(document, rule)
25
+ end
26
+ end
27
+ PostProcessResult.new(document, SimpleRule::DEFAULT_RULE)
28
+ end
29
+
30
+ private
31
+
32
+ def ordered_rules(job_filtering)
33
+ job_rules = Utility::Filtering.extract_filter(job_filtering)['rules']
34
+ sorted_rules = job_rules.sort_by { |rule| rule['order'] }.reject { |rule| rule['id'] == Core::Filtering::SimpleRule::DEFAULT_RULE_ID }
35
+ sorted_rules.each_with_object([]) { |rule, output| output << SimpleRule.new(rule) }
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Filtering
13
+ class PostProcessResult
14
+ attr_reader :document, :matching_rule
15
+
16
+ def initialize(document, matching_rule)
17
+ @document = document
18
+ @matching_rule = matching_rule
19
+ Utility::Logger.debug("Document '#{document['id']}' matched filtering rule: #{matching_rule.id}. It will be #{matching_rule.policy}d")
20
+ end
21
+
22
+ def is_include?
23
+ matching_rule.is_include?
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,141 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Filtering
13
+ class SimpleRule
14
+ DEFAULT_RULE_ID = 'DEFAULT'
15
+
16
+ class Policy
17
+ INCLUDE = 'include'
18
+ EXCLUDE = 'exclude'
19
+ end
20
+
21
+ class Rule
22
+ REGEX = 'regex'
23
+ EQUALS = 'equals'
24
+ STARTS_WITH = 'starts_with'
25
+ ENDS_WITH = 'ends_with'
26
+ CONTAINS = 'contains'
27
+ LESS_THAN = '<'
28
+ GREATER_THAN = '>'
29
+ end
30
+
31
+ attr_reader :policy, :field, :rule, :value, :id
32
+
33
+ def initialize(rule_hash)
34
+ @policy = rule_hash.fetch('policy')
35
+ @field = rule_hash.fetch('field')
36
+ @rule = rule_hash.fetch('rule')
37
+ @value = rule_hash.fetch('value')
38
+ @id = rule_hash.fetch('id')
39
+ @rule_hash = rule_hash
40
+ rescue KeyError => e
41
+ raise "#{e.key} is required"
42
+ end
43
+
44
+ def self.from_args(id, policy, field, rule, value)
45
+ SimpleRule.new(
46
+ {
47
+ 'id' => id,
48
+ 'policy' => policy,
49
+ 'field' => field,
50
+ 'rule' => rule,
51
+ 'value' => value
52
+ }
53
+ )
54
+ end
55
+
56
+ DEFAULT_RULE = SimpleRule.new(
57
+ 'policy' => 'include',
58
+ 'field' => '_',
59
+ 'rule' => 'regex',
60
+ 'value' => '.*',
61
+ 'id' => SimpleRule::DEFAULT_RULE_ID
62
+ )
63
+
64
+ def match?(document)
65
+ return true if id == DEFAULT_RULE_ID
66
+ doc_value = document[field]
67
+ return false if doc_value.nil?
68
+ coerced_value = coerce(doc_value)
69
+ case rule
70
+ when Rule::EQUALS
71
+ case coerced_value
72
+ when Integer
73
+ doc_value == coerced_value
74
+ when DateTime, Time
75
+ doc_value.to_s == coerced_value.to_s
76
+ else
77
+ doc_value.to_s == coerced_value
78
+ end
79
+ when Rule::STARTS_WITH
80
+ doc_value.to_s.start_with?(value)
81
+ when Rule::ENDS_WITH
82
+ doc_value.to_s.end_with?(value)
83
+ when Rule::CONTAINS
84
+ doc_value.to_s.include?(value)
85
+ when Rule::REGEX
86
+ doc_value.to_s.match(/#{value}/)
87
+ when Rule::LESS_THAN
88
+ doc_value < coerced_value
89
+ when Rule::GREATER_THAN
90
+ doc_value > coerced_value
91
+ else
92
+ false
93
+ end
94
+ end
95
+
96
+ def coerce(doc_value)
97
+ case doc_value
98
+ when String
99
+ value.to_s
100
+ when Integer
101
+ value.to_i
102
+ when DateTime, Time
103
+ to_date(value)
104
+ when TrueClass, FalseClass # Ruby doesn't have a Boolean type, TIL
105
+ to_bool(value).to_s
106
+ else
107
+ value.to_s
108
+ end
109
+ rescue StandardError => e
110
+ Utility::Logger.debug("Failed to coerce value '#{value}' (#{value.class}) based on document value '#{doc_value}' (#{doc_value.class}) due to error: #{e.class}: #{e.message}")
111
+ value.to_s
112
+ end
113
+
114
+ def is_include?
115
+ policy == Policy::INCLUDE
116
+ end
117
+
118
+ def is_exclude?
119
+ policy == Policy::EXCLUDE
120
+ end
121
+
122
+ def to_h
123
+ @rule_hash
124
+ end
125
+
126
+ private
127
+
128
+ def to_bool(str)
129
+ return true if str == true || str =~ (/^(true|t|yes|y|on|1)$/i)
130
+ return false if str == false || str.blank? || str =~ (/^(false|f|no|n|off|0)$/i)
131
+ raise ArgumentError.new("invalid value for Boolean: \"#{str}\"")
132
+ end
133
+
134
+ def to_date(str)
135
+ DateTime.parse(str)
136
+ rescue ArgumentError
137
+ Time.at(str.to_i) # try with it as an int string of millis
138
+ end
139
+ end
140
+ end
141
+ end