connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -10
  3. data/lib/app/config.rb +2 -0
  4. data/lib/app/dispatcher.rb +17 -1
  5. data/lib/app/preflight_check.rb +15 -0
  6. data/lib/connectors/base/connector.rb +37 -4
  7. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  8. data/lib/connectors/connector_status.rb +4 -4
  9. data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
  10. data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
  11. data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
  12. data/lib/connectors/example/connector.rb +43 -4
  13. data/lib/connectors/gitlab/connector.rb +16 -2
  14. data/lib/connectors/mongodb/connector.rb +173 -50
  15. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  16. data/lib/connectors/registry.rb +2 -2
  17. data/lib/connectors/sync_status.rb +23 -4
  18. data/lib/core/configuration.rb +4 -2
  19. data/lib/core/connector_job.rb +137 -0
  20. data/lib/core/connector_settings.rb +29 -18
  21. data/lib/core/elastic_connector_actions.rb +331 -32
  22. data/lib/core/filtering/post_process_engine.rb +39 -0
  23. data/lib/core/filtering/post_process_result.rb +27 -0
  24. data/lib/core/filtering/simple_rule.rb +141 -0
  25. data/lib/core/filtering/validation_job_runner.rb +53 -0
  26. data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
  27. data/lib/core/filtering.rb +17 -0
  28. data/lib/core/ingestion/es_sink.rb +59 -0
  29. data/lib/core/ingestion/ingester.rb +90 -0
  30. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  31. data/lib/core/native_scheduler.rb +3 -0
  32. data/lib/core/scheduler.rb +43 -10
  33. data/lib/core/single_scheduler.rb +3 -0
  34. data/lib/core/sync_job_runner.rb +78 -18
  35. data/lib/core.rb +2 -0
  36. data/lib/utility/bulk_queue.rb +85 -0
  37. data/lib/utility/common.rb +20 -0
  38. data/lib/utility/constants.rb +2 -0
  39. data/lib/utility/errors.rb +5 -0
  40. data/lib/utility/es_client.rb +6 -2
  41. data/lib/utility/filtering.rb +22 -0
  42. data/lib/utility/logger.rb +2 -1
  43. data/lib/utility.rb +5 -3
  44. metadata +27 -18
  45. data/lib/core/output_sink/base_sink.rb +0 -33
  46. data/lib/core/output_sink/combined_sink.rb +0 -38
  47. data/lib/core/output_sink/console_sink.rb +0 -51
  48. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,53 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/connector_status'
10
+ require 'connectors/registry'
11
+
12
+ module Core
13
+ module Filtering
14
+ DEFAULT_DOMAIN = 'DEFAULT'
15
+
16
+ class ValidationJobRunner
17
+ def initialize(connector_settings)
18
+ @connector_settings = connector_settings
19
+ @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
20
+ @validation_finished = false
21
+ @status = { :error => nil }
22
+ end
23
+
24
+ def execute
25
+ Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
26
+
27
+ validation_result = @connector_class.validate_filtering(@connector_settings.filtering)
28
+
29
+ # currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
30
+ ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })
31
+
32
+ @validation_finished = true
33
+ rescue StandardError => e
34
+ Utility::ExceptionTracking.log_exception(e)
35
+ validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID,
36
+ :errors => [
37
+ { :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] }
38
+ ] }
39
+ ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result })
40
+ ensure
41
+ if !@validation_finished && !@status[:error].present?
42
+ @status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.'
43
+ end
44
+
45
+ if @status[:error]
46
+ Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
47
+ else
48
+ Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.")
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -4,10 +4,14 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
- module ConnectorsApp
8
- module Errors
9
- INVALID_API_KEY = 'INVALID_API_KEY'
10
- UNSUPPORTED_AUTH_SCHEME = 'UNSUPPORTED_AUTH_SCHEME'
11
- INTERNAL_SERVER_ERROR = 'INTERNAL_SERVER_ERROR'
7
+ # frozen_string_literal: true
8
+
9
+ module Core
10
+ module Filtering
11
+ class ValidationStatus
12
+ INVALID = 'invalid'
13
+ VALID = 'valid'
14
+ EDITED = 'edited'
15
+ end
12
16
  end
13
17
  end
@@ -0,0 +1,17 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/filtering/post_process_engine'
10
+ require 'core/filtering/post_process_result'
11
+ require 'core/filtering/simple_rule'
12
+ require 'core/filtering/validation_job_runner'
13
+ require 'core/filtering/validation_status'
14
+
15
+ module Core::Filtering
16
+ DEFAULT_DOMAIN = 'DEFAULT'
17
+ end
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'app/config'
10
+ require 'utility/bulk_queue'
11
+ require 'utility/es_client'
12
+ require 'utility/logger'
13
+ require 'elasticsearch/api'
14
+
15
+ module Core
16
+ module Ingestion
17
+ class EsSink
18
+ def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new)
19
+ @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
+ @index_name = index_name
21
+ @request_pipeline = request_pipeline
22
+ @operation_queue = bulk_queue
23
+ end
24
+
25
+ def ingest(id, serialized_document)
26
+ index_op = serialize({ 'index' => { '_index' => index_name, '_id' => id } })
27
+
28
+ flush unless @operation_queue.will_fit?(index_op, serialized_document)
29
+
30
+ @operation_queue.add(
31
+ index_op,
32
+ serialized_document
33
+ )
34
+ end
35
+
36
+ def delete(doc_id)
37
+ delete_op = serialize({ 'delete' => { '_index' => index_name, '_id' => doc_id } })
38
+ flush unless @operation_queue.will_fit?(delete_op)
39
+
40
+ @operation_queue.add(delete_op)
41
+ end
42
+
43
+ def flush
44
+ data = @operation_queue.pop_all
45
+ return if data.empty?
46
+
47
+ @client.bulk(:body => data, :pipeline => @request_pipeline)
48
+ end
49
+
50
+ def serialize(obj)
51
+ Elasticsearch::API.serializer.dump(obj)
52
+ end
53
+
54
+ private
55
+
56
+ attr_accessor :index_name
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,90 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Ingestion
13
+ class Ingester
14
+ def initialize(sink_strategy, max_allowed_document_size = 5 * 1024 * 1024)
15
+ @sink_strategy = sink_strategy
16
+ @max_allowed_document_size = max_allowed_document_size
17
+
18
+ @ingested_count = 0
19
+ @ingested_volume = 0
20
+ @deleted_count = 0
21
+ end
22
+
23
+ def ingest(document)
24
+ unless document&.any?
25
+ Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
26
+ return
27
+ end
28
+
29
+ serialized_document = @sink_strategy.serialize(document)
30
+ document_size = serialized_document.bytesize
31
+
32
+ if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
33
+ Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
34
+ return
35
+ end
36
+
37
+ @sink_strategy.ingest(document['id'], serialized_document)
38
+
39
+ @ingested_count += 1
40
+ @ingested_volume += document_size
41
+ end
42
+
43
+ def ingest_multiple(documents)
44
+ documents.each { |doc| ingest(doc) }
45
+ end
46
+
47
+ def delete(id)
48
+ return if id.nil?
49
+
50
+ @sink_strategy.delete(id)
51
+
52
+ @deleted_count += 1
53
+ end
54
+
55
+ def delete_multiple(ids)
56
+ ids.each { |id| delete(id) }
57
+ end
58
+
59
+ def flush
60
+ @sink_strategy.flush
61
+ end
62
+
63
+ def ingestion_stats
64
+ {
65
+ :indexed_document_count => @ingested_count,
66
+ :indexed_document_volume => @ingested_volume,
67
+ :deleted_document_count => @deleted_count
68
+ }
69
+ end
70
+
71
+ private
72
+
73
+ def do_ingest(_id, _serialized_document)
74
+ raise NotImplementedError
75
+ end
76
+
77
+ def do_delete(_id)
78
+ raise NotImplementedError
79
+ end
80
+
81
+ def do_flush
82
+ raise NotImplementedError
83
+ end
84
+
85
+ def do_serialize(_document)
86
+ raise NotImplementedError
87
+ end
88
+ end
89
+ end
90
+ end
@@ -6,8 +6,5 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'core/output_sink/es_sink'
10
- require 'core/output_sink/console_sink'
11
- require 'core/output_sink/combined_sink'
12
-
13
- module Core::OutputSink; end
9
+ require 'core/ingestion/ingester'
10
+ require 'core/ingestion/es_sink'
@@ -16,6 +16,9 @@ module Core
16
16
  class NativeScheduler < Core::Scheduler
17
17
  def connector_settings
18
18
  Core::ConnectorSettings.fetch_native_connectors || []
19
+ rescue *Utility::AUTHORIZATION_ERRORS => e
20
+ # should be handled by the general scheduler
21
+ raise e
19
22
  rescue StandardError => e
20
23
  Utility::ExceptionTracking.log_exception(e, 'Could not retrieve native connectors due to unexpected error.')
21
24
  []
@@ -9,6 +9,8 @@
9
9
  require 'time'
10
10
  require 'fugit'
11
11
  require 'core/connector_settings'
12
+ require 'core/elastic_connector_actions'
13
+ require 'core/filtering/validation_status'
12
14
  require 'utility/cron'
13
15
  require 'utility/logger'
14
16
  require 'utility/exception_tracking'
@@ -37,13 +39,18 @@ module Core
37
39
  if configuration_triggered?(cs)
38
40
  yield cs, :configuration
39
41
  end
42
+ if filtering_validation_triggered?(cs)
43
+ yield cs, :filter_validation
44
+ end
40
45
  end
41
- if @is_shutting_down
42
- break
43
- end
46
+ rescue *Utility::AUTHORIZATION_ERRORS => e
47
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
44
48
  rescue StandardError => e
45
49
  Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
46
50
  ensure
51
+ if @is_shutting_down
52
+ break
53
+ end
47
54
  if @poll_interval > 0 && !@is_shutting_down
48
55
  Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
49
56
  sleep(@poll_interval)
@@ -59,8 +66,6 @@ module Core
59
66
  private
60
67
 
61
68
  def sync_triggered?(connector_settings)
62
- return false unless connector_registered?(connector_settings.service_type)
63
-
64
69
  unless connector_settings.valid_index_name?
65
70
  Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
66
71
  return false
@@ -126,8 +131,6 @@ module Core
126
131
  end
127
132
 
128
133
  def heartbeat_triggered?(connector_settings)
129
- return false unless connector_registered?(connector_settings.service_type)
130
-
131
134
  last_seen = connector_settings[:last_seen]
132
135
  return true if last_seen.nil? || last_seen.empty?
133
136
  last_seen = begin
@@ -141,11 +144,41 @@ module Core
141
144
  end
142
145
 
143
146
  def configuration_triggered?(connector_settings)
144
- if connector_settings.needs_service_type? || connector_registered?(connector_settings.service_type)
145
- return connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
147
+ connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
148
+ end
149
+
150
+ def filtering_validation_triggered?(connector_settings)
151
+ filtering = connector_settings.filtering
152
+
153
+ unless filtering.present?
154
+ Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
155
+
156
+ return false
146
157
  end
147
158
 
148
- false
159
+ draft_filters = filtering[:draft]
160
+
161
+ unless draft_filters.present?
162
+ Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
163
+
164
+ return false
165
+ end
166
+
167
+ validation = draft_filters[:validation]
168
+
169
+ unless validation.present?
170
+ Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
171
+
172
+ return false
173
+ end
174
+
175
+ unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
176
+ Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
177
+
178
+ return false
179
+ end
180
+
181
+ true
149
182
  end
150
183
 
151
184
  def connector_registered?(service_type)
@@ -21,6 +21,9 @@ module Core
21
21
  def connector_settings
22
22
  connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id)
23
23
  [connector_settings]
24
+ rescue *Utility::AUTHORIZATION_ERRORS => e
25
+ # should be handled by the general scheduler
26
+ raise e
24
27
  rescue StandardError => e
25
28
  Utility::ExceptionTracking.log_exception(e, "Could not retrieve the connector by id #{@connector_id} due to unexpected error.")
26
29
  []
@@ -8,7 +8,9 @@
8
8
 
9
9
  require 'connectors/connector_status'
10
10
  require 'connectors/registry'
11
- require 'core/output_sink'
11
+ require 'core/filtering/post_process_engine'
12
+ require 'core/ingestion'
13
+ require 'core/filtering/validation_status'
12
14
  require 'utility'
13
15
 
14
16
  module Core
@@ -19,14 +21,18 @@ module Core
19
21
  end
20
22
 
21
23
  class SyncJobRunner
24
+ JOB_REPORTING_INTERVAL = 10
25
+
22
26
  def initialize(connector_settings)
23
27
  @connector_settings = connector_settings
24
- @sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
28
+ @ingester = Core::Ingestion::Ingester.new(Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline))
25
29
  @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
26
- @connector_instance = Connectors::REGISTRY.connector(connector_settings.service_type, connector_settings.configuration)
30
+ @sync_finished = false
31
+ @sync_error = nil
27
32
  @status = {
28
33
  :indexed_document_count => 0,
29
34
  :deleted_document_count => 0,
35
+ :indexed_document_volume => 0,
30
36
  :error => nil
31
37
  }
32
38
  end
@@ -39,9 +45,12 @@ module Core
39
45
  private
40
46
 
41
47
  def do_sync!
42
- Utility::Logger.info("Starting sync for connector #{@connector_settings.id}.")
48
+ Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
43
49
 
44
- job_id = ElasticConnectorActions.claim_job(@connector_settings.id)
50
+ job_record = ElasticConnectorActions.claim_job(@connector_settings.id)
51
+ job_description = job_record['_source']
52
+ job_id = job_record['_id']
53
+ job_description['_id'] = job_id
45
54
 
46
55
  unless job_id.present?
47
56
  Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
@@ -51,18 +60,34 @@ module Core
51
60
  begin
52
61
  Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
53
62
 
54
- @connector_instance.do_health_check!
63
+ Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
64
+ validate_filtering(job_description.dig(:connector, :filtering))
65
+ Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
66
+
67
+ connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
68
+
69
+ connector_instance.do_health_check!
55
70
 
56
71
  incoming_ids = []
57
72
  existing_ids = ElasticConnectorActions.fetch_document_ids(@connector_settings.index_name)
58
73
 
59
74
  Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
60
75
 
61
- @connector_instance.yield_documents do |document|
76
+ post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
77
+ reporting_cycle_start = Time.now
78
+ Utility::Logger.info('Yielding documents...')
79
+ connector_instance.yield_documents do |document|
62
80
  document = add_ingest_metadata(document)
63
- @sink.ingest(document)
64
- incoming_ids << document[:id]
65
- @status[:indexed_document_count] += 1
81
+ post_process_result = post_processing_engine.process(document)
82
+ if post_process_result.is_include?
83
+ @ingester.ingest(document)
84
+ incoming_ids << document['id']
85
+ end
86
+
87
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
88
+ ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
89
+ reporting_cycle_start = Time.now
90
+ end
66
91
  end
67
92
 
68
93
  ids_to_delete = existing_ids - incoming_ids.uniq
@@ -70,23 +95,48 @@ module Core
70
95
  Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
71
96
 
72
97
  ids_to_delete.each do |id|
73
- @sink.delete(id)
74
- @status[:deleted_document_count] += 1
98
+ @ingester.delete(id)
99
+
100
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
101
+ ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
102
+ reporting_cycle_start = Time.now
103
+ end
75
104
  end
76
105
 
77
- @sink.flush
106
+ @ingester.flush
107
+
108
+ # We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
109
+ # occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
110
+ @sync_finished = true
78
111
  rescue StandardError => e
79
- @status[:error] = e.message
112
+ @sync_error = e.message
80
113
  Utility::ExceptionTracking.log_exception(e)
81
- ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
82
114
  ensure
115
+ stats = @ingester.ingestion_stats
116
+
117
+ Utility::Logger.debug("Sync stats are: #{stats}")
118
+
119
+ @status[:indexed_document_count] = stats[:indexed_document_count]
120
+ @status[:deleted_document_count] = stats[:deleted_document_count]
121
+ @status[:indexed_document_volume] = stats[:indexed_document_volume]
122
+
83
123
  Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
84
124
  Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
85
125
 
86
- ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
126
+ # Make sure to not override a previous error message
127
+ if !@sync_finished && @sync_error.nil?
128
+ @sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
129
+ end
130
+
131
+ unless connector_instance.nil?
132
+ metadata = @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata)
133
+ metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
134
+ end
135
+
136
+ ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
87
137
 
88
- if @status[:error]
89
- Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error #{@status[:error]}.")
138
+ if @sync_error
139
+ Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
90
140
  else
91
141
  Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
92
142
  end
@@ -107,5 +157,15 @@ module Core
107
157
 
108
158
  raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
109
159
  end
160
+
161
+ def validate_filtering(filtering)
162
+ validation_result = @connector_class.validate_filtering(filtering)
163
+
164
+ wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
165
+ raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
166
+
167
+ errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
168
+ raise errors_present_error if validation_result[:errors].present?
169
+ end
110
170
  end
111
171
  end
data/lib/core.rb CHANGED
@@ -7,8 +7,10 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'core/configuration'
10
+ require 'core/connector_job'
10
11
  require 'core/connector_settings'
11
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering'
12
14
  require 'core/heartbeat'
13
15
  require 'core/scheduler'
14
16
  require 'core/single_scheduler'
@@ -0,0 +1,85 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'json'
8
+
9
+ module Utility
10
+ class BulkQueue
11
+ class QueueOverflowError < StandardError; end
12
+
13
+ # 500 items or 5MB
14
+ def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
15
+ @operation_count_threshold = operation_count_threshold.freeze
16
+ @size_threshold = size_threshold.freeze
17
+
18
+ @buffer = ''
19
+
20
+ @current_operation_count = 0
21
+
22
+ @current_buffer_size = 0
23
+ @current_data_size = 0
24
+ end
25
+
26
+ def pop_all
27
+ result = @buffer
28
+
29
+ reset
30
+
31
+ result
32
+ end
33
+
34
+ def add(operation, payload = nil)
35
+ raise QueueOverflowError unless will_fit?(operation, payload)
36
+
37
+ operation_size = get_size(operation)
38
+ payload_size = get_size(payload)
39
+
40
+ @current_operation_count += 1
41
+ @current_buffer_size += operation_size
42
+ @current_buffer_size += payload_size
43
+ @current_data_size += payload_size
44
+
45
+ @buffer << operation
46
+ @buffer << "\n"
47
+
48
+ if payload
49
+ @buffer << payload
50
+ @buffer << "\n"
51
+ end
52
+ end
53
+
54
+ def will_fit?(operation, payload = nil)
55
+ return false if @current_operation_count + 1 > @operation_count_threshold
56
+
57
+ operation_size = get_size(operation)
58
+ payload_size = get_size(payload)
59
+
60
+ @current_buffer_size + operation_size + payload_size < @size_threshold
61
+ end
62
+
63
+ def current_stats
64
+ {
65
+ :current_operation_count => @current_operation_count,
66
+ :current_buffer_size => @current_buffer_size
67
+ }
68
+ end
69
+
70
+ private
71
+
72
+ def get_size(str)
73
+ return 0 unless str
74
+ str.bytesize
75
+ end
76
+
77
+ def reset
78
+ @current_operation_count = 0
79
+ @current_buffer_size = 0
80
+ @current_data_size = 0
81
+
82
+ @buffer = ''
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,20 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Common
11
+ class << self
12
+ def return_if_present(*args)
13
+ args.each do |arg|
14
+ return arg unless arg.nil?
15
+ end
16
+ nil
17
+ end
18
+ end
19
+ end
20
+ end
@@ -16,5 +16,7 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ FILTERING_RULES_FEATURE = 'filtering_rules'
20
+ FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
19
21
  end
20
22
  end