connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -10
  3. data/lib/app/config.rb +2 -0
  4. data/lib/app/dispatcher.rb +17 -1
  5. data/lib/app/preflight_check.rb +15 -0
  6. data/lib/connectors/base/connector.rb +37 -4
  7. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  8. data/lib/connectors/connector_status.rb +4 -4
  9. data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
  10. data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
  11. data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
  12. data/lib/connectors/example/connector.rb +43 -4
  13. data/lib/connectors/gitlab/connector.rb +16 -2
  14. data/lib/connectors/mongodb/connector.rb +173 -50
  15. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  16. data/lib/connectors/registry.rb +2 -2
  17. data/lib/connectors/sync_status.rb +23 -4
  18. data/lib/core/configuration.rb +4 -2
  19. data/lib/core/connector_job.rb +137 -0
  20. data/lib/core/connector_settings.rb +29 -18
  21. data/lib/core/elastic_connector_actions.rb +331 -32
  22. data/lib/core/filtering/post_process_engine.rb +39 -0
  23. data/lib/core/filtering/post_process_result.rb +27 -0
  24. data/lib/core/filtering/simple_rule.rb +141 -0
  25. data/lib/core/filtering/validation_job_runner.rb +53 -0
  26. data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
  27. data/lib/core/filtering.rb +17 -0
  28. data/lib/core/ingestion/es_sink.rb +59 -0
  29. data/lib/core/ingestion/ingester.rb +90 -0
  30. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  31. data/lib/core/native_scheduler.rb +3 -0
  32. data/lib/core/scheduler.rb +43 -10
  33. data/lib/core/single_scheduler.rb +3 -0
  34. data/lib/core/sync_job_runner.rb +78 -18
  35. data/lib/core.rb +2 -0
  36. data/lib/utility/bulk_queue.rb +85 -0
  37. data/lib/utility/common.rb +20 -0
  38. data/lib/utility/constants.rb +2 -0
  39. data/lib/utility/errors.rb +5 -0
  40. data/lib/utility/es_client.rb +6 -2
  41. data/lib/utility/filtering.rb +22 -0
  42. data/lib/utility/logger.rb +2 -1
  43. data/lib/utility.rb +5 -3
  44. metadata +27 -18
  45. data/lib/core/output_sink/base_sink.rb +0 -33
  46. data/lib/core/output_sink/combined_sink.rb +0 -38
  47. data/lib/core/output_sink/console_sink.rb +0 -51
  48. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,53 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/connector_status'
10
+ require 'connectors/registry'
11
+
12
+ module Core
13
+ module Filtering
14
+ DEFAULT_DOMAIN = 'DEFAULT'
15
+
16
+ class ValidationJobRunner
17
+ def initialize(connector_settings)
18
+ @connector_settings = connector_settings
19
+ @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
20
+ @validation_finished = false
21
+ @status = { :error => nil }
22
+ end
23
+
24
+ def execute
25
+ Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
26
+
27
+ validation_result = @connector_class.validate_filtering(@connector_settings.filtering)
28
+
29
+ # currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
30
+ ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })
31
+
32
+ @validation_finished = true
33
+ rescue StandardError => e
34
+ Utility::ExceptionTracking.log_exception(e)
35
+ validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID,
36
+ :errors => [
37
+ { :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] }
38
+ ] }
39
+ ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result })
40
+ ensure
41
+ if !@validation_finished && !@status[:error].present?
42
+ @status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.'
43
+ end
44
+
45
+ if @status[:error]
46
+ Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
47
+ else
48
+ Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.")
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -4,10 +4,14 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
- module ConnectorsApp
8
- module Errors
9
- INVALID_API_KEY = 'INVALID_API_KEY'
10
- UNSUPPORTED_AUTH_SCHEME = 'UNSUPPORTED_AUTH_SCHEME'
11
- INTERNAL_SERVER_ERROR = 'INTERNAL_SERVER_ERROR'
7
+ # frozen_string_literal: true
8
+
9
+ module Core
10
+ module Filtering
11
+ class ValidationStatus
12
+ INVALID = 'invalid'
13
+ VALID = 'valid'
14
+ EDITED = 'edited'
15
+ end
12
16
  end
13
17
  end
@@ -0,0 +1,17 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/filtering/post_process_engine'
10
+ require 'core/filtering/post_process_result'
11
+ require 'core/filtering/simple_rule'
12
+ require 'core/filtering/validation_job_runner'
13
+ require 'core/filtering/validation_status'
14
+
15
+ module Core::Filtering
16
+ DEFAULT_DOMAIN = 'DEFAULT'
17
+ end
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'app/config'
10
+ require 'utility/bulk_queue'
11
+ require 'utility/es_client'
12
+ require 'utility/logger'
13
+ require 'elasticsearch/api'
14
+
15
+ module Core
16
+ module Ingestion
17
+ class EsSink
18
+ def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new)
19
+ @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
+ @index_name = index_name
21
+ @request_pipeline = request_pipeline
22
+ @operation_queue = bulk_queue
23
+ end
24
+
25
+ def ingest(id, serialized_document)
26
+ index_op = serialize({ 'index' => { '_index' => index_name, '_id' => id } })
27
+
28
+ flush unless @operation_queue.will_fit?(index_op, serialized_document)
29
+
30
+ @operation_queue.add(
31
+ index_op,
32
+ serialized_document
33
+ )
34
+ end
35
+
36
+ def delete(doc_id)
37
+ delete_op = serialize({ 'delete' => { '_index' => index_name, '_id' => doc_id } })
38
+ flush unless @operation_queue.will_fit?(delete_op)
39
+
40
+ @operation_queue.add(delete_op)
41
+ end
42
+
43
+ def flush
44
+ data = @operation_queue.pop_all
45
+ return if data.empty?
46
+
47
+ @client.bulk(:body => data, :pipeline => @request_pipeline)
48
+ end
49
+
50
+ def serialize(obj)
51
+ Elasticsearch::API.serializer.dump(obj)
52
+ end
53
+
54
+ private
55
+
56
+ attr_accessor :index_name
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,90 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Ingestion
13
+ class Ingester
14
+ def initialize(sink_strategy, max_allowed_document_size = 5 * 1024 * 1024)
15
+ @sink_strategy = sink_strategy
16
+ @max_allowed_document_size = max_allowed_document_size
17
+
18
+ @ingested_count = 0
19
+ @ingested_volume = 0
20
+ @deleted_count = 0
21
+ end
22
+
23
+ def ingest(document)
24
+ unless document&.any?
25
+ Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
26
+ return
27
+ end
28
+
29
+ serialized_document = @sink_strategy.serialize(document)
30
+ document_size = serialized_document.bytesize
31
+
32
+ if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
33
+ Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
34
+ return
35
+ end
36
+
37
+ @sink_strategy.ingest(document['id'], serialized_document)
38
+
39
+ @ingested_count += 1
40
+ @ingested_volume += document_size
41
+ end
42
+
43
+ def ingest_multiple(documents)
44
+ documents.each { |doc| ingest(doc) }
45
+ end
46
+
47
+ def delete(id)
48
+ return if id.nil?
49
+
50
+ @sink_strategy.delete(id)
51
+
52
+ @deleted_count += 1
53
+ end
54
+
55
+ def delete_multiple(ids)
56
+ ids.each { |id| delete(id) }
57
+ end
58
+
59
+ def flush
60
+ @sink_strategy.flush
61
+ end
62
+
63
+ def ingestion_stats
64
+ {
65
+ :indexed_document_count => @ingested_count,
66
+ :indexed_document_volume => @ingested_volume,
67
+ :deleted_document_count => @deleted_count
68
+ }
69
+ end
70
+
71
+ private
72
+
73
+ def do_ingest(_id, _serialized_document)
74
+ raise NotImplementedError
75
+ end
76
+
77
+ def do_delete(_id)
78
+ raise NotImplementedError
79
+ end
80
+
81
+ def do_flush
82
+ raise NotImplementedError
83
+ end
84
+
85
+ def do_serialize(_document)
86
+ raise NotImplementedError
87
+ end
88
+ end
89
+ end
90
+ end
@@ -6,8 +6,5 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'core/output_sink/es_sink'
10
- require 'core/output_sink/console_sink'
11
- require 'core/output_sink/combined_sink'
12
-
13
- module Core::OutputSink; end
9
+ require 'core/ingestion/ingester'
10
+ require 'core/ingestion/es_sink'
@@ -16,6 +16,9 @@ module Core
16
16
  class NativeScheduler < Core::Scheduler
17
17
  def connector_settings
18
18
  Core::ConnectorSettings.fetch_native_connectors || []
19
+ rescue *Utility::AUTHORIZATION_ERRORS => e
20
+ # should be handled by the general scheduler
21
+ raise e
19
22
  rescue StandardError => e
20
23
  Utility::ExceptionTracking.log_exception(e, 'Could not retrieve native connectors due to unexpected error.')
21
24
  []
@@ -9,6 +9,8 @@
9
9
  require 'time'
10
10
  require 'fugit'
11
11
  require 'core/connector_settings'
12
+ require 'core/elastic_connector_actions'
13
+ require 'core/filtering/validation_status'
12
14
  require 'utility/cron'
13
15
  require 'utility/logger'
14
16
  require 'utility/exception_tracking'
@@ -37,13 +39,18 @@ module Core
37
39
  if configuration_triggered?(cs)
38
40
  yield cs, :configuration
39
41
  end
42
+ if filtering_validation_triggered?(cs)
43
+ yield cs, :filter_validation
44
+ end
40
45
  end
41
- if @is_shutting_down
42
- break
43
- end
46
+ rescue *Utility::AUTHORIZATION_ERRORS => e
47
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
44
48
  rescue StandardError => e
45
49
  Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
46
50
  ensure
51
+ if @is_shutting_down
52
+ break
53
+ end
47
54
  if @poll_interval > 0 && !@is_shutting_down
48
55
  Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
49
56
  sleep(@poll_interval)
@@ -59,8 +66,6 @@ module Core
59
66
  private
60
67
 
61
68
  def sync_triggered?(connector_settings)
62
- return false unless connector_registered?(connector_settings.service_type)
63
-
64
69
  unless connector_settings.valid_index_name?
65
70
  Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
66
71
  return false
@@ -126,8 +131,6 @@ module Core
126
131
  end
127
132
 
128
133
  def heartbeat_triggered?(connector_settings)
129
- return false unless connector_registered?(connector_settings.service_type)
130
-
131
134
  last_seen = connector_settings[:last_seen]
132
135
  return true if last_seen.nil? || last_seen.empty?
133
136
  last_seen = begin
@@ -141,11 +144,41 @@ module Core
141
144
  end
142
145
 
143
146
  def configuration_triggered?(connector_settings)
144
- if connector_settings.needs_service_type? || connector_registered?(connector_settings.service_type)
145
- return connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
147
+ connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
148
+ end
149
+
150
+ def filtering_validation_triggered?(connector_settings)
151
+ filtering = connector_settings.filtering
152
+
153
+ unless filtering.present?
154
+ Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
155
+
156
+ return false
146
157
  end
147
158
 
148
- false
159
+ draft_filters = filtering[:draft]
160
+
161
+ unless draft_filters.present?
162
+ Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
163
+
164
+ return false
165
+ end
166
+
167
+ validation = draft_filters[:validation]
168
+
169
+ unless validation.present?
170
+ Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
171
+
172
+ return false
173
+ end
174
+
175
+ unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
176
+ Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
177
+
178
+ return false
179
+ end
180
+
181
+ true
149
182
  end
150
183
 
151
184
  def connector_registered?(service_type)
@@ -21,6 +21,9 @@ module Core
21
21
  def connector_settings
22
22
  connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id)
23
23
  [connector_settings]
24
+ rescue *Utility::AUTHORIZATION_ERRORS => e
25
+ # should be handled by the general scheduler
26
+ raise e
24
27
  rescue StandardError => e
25
28
  Utility::ExceptionTracking.log_exception(e, "Could not retrieve the connector by id #{@connector_id} due to unexpected error.")
26
29
  []
@@ -8,7 +8,9 @@
8
8
 
9
9
  require 'connectors/connector_status'
10
10
  require 'connectors/registry'
11
- require 'core/output_sink'
11
+ require 'core/filtering/post_process_engine'
12
+ require 'core/ingestion'
13
+ require 'core/filtering/validation_status'
12
14
  require 'utility'
13
15
 
14
16
  module Core
@@ -19,14 +21,18 @@ module Core
19
21
  end
20
22
 
21
23
  class SyncJobRunner
24
+ JOB_REPORTING_INTERVAL = 10
25
+
22
26
  def initialize(connector_settings)
23
27
  @connector_settings = connector_settings
24
- @sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
28
+ @ingester = Core::Ingestion::Ingester.new(Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline))
25
29
  @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
26
- @connector_instance = Connectors::REGISTRY.connector(connector_settings.service_type, connector_settings.configuration)
30
+ @sync_finished = false
31
+ @sync_error = nil
27
32
  @status = {
28
33
  :indexed_document_count => 0,
29
34
  :deleted_document_count => 0,
35
+ :indexed_document_volume => 0,
30
36
  :error => nil
31
37
  }
32
38
  end
@@ -39,9 +45,12 @@ module Core
39
45
  private
40
46
 
41
47
  def do_sync!
42
- Utility::Logger.info("Starting sync for connector #{@connector_settings.id}.")
48
+ Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
43
49
 
44
- job_id = ElasticConnectorActions.claim_job(@connector_settings.id)
50
+ job_record = ElasticConnectorActions.claim_job(@connector_settings.id)
51
+ job_description = job_record['_source']
52
+ job_id = job_record['_id']
53
+ job_description['_id'] = job_id
45
54
 
46
55
  unless job_id.present?
47
56
  Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
@@ -51,18 +60,34 @@ module Core
51
60
  begin
52
61
  Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
53
62
 
54
- @connector_instance.do_health_check!
63
+ Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
64
+ validate_filtering(job_description.dig(:connector, :filtering))
65
+ Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
66
+
67
+ connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
68
+
69
+ connector_instance.do_health_check!
55
70
 
56
71
  incoming_ids = []
57
72
  existing_ids = ElasticConnectorActions.fetch_document_ids(@connector_settings.index_name)
58
73
 
59
74
  Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
60
75
 
61
- @connector_instance.yield_documents do |document|
76
+ post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
77
+ reporting_cycle_start = Time.now
78
+ Utility::Logger.info('Yielding documents...')
79
+ connector_instance.yield_documents do |document|
62
80
  document = add_ingest_metadata(document)
63
- @sink.ingest(document)
64
- incoming_ids << document[:id]
65
- @status[:indexed_document_count] += 1
81
+ post_process_result = post_processing_engine.process(document)
82
+ if post_process_result.is_include?
83
+ @ingester.ingest(document)
84
+ incoming_ids << document['id']
85
+ end
86
+
87
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
88
+ ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
89
+ reporting_cycle_start = Time.now
90
+ end
66
91
  end
67
92
 
68
93
  ids_to_delete = existing_ids - incoming_ids.uniq
@@ -70,23 +95,48 @@ module Core
70
95
  Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
71
96
 
72
97
  ids_to_delete.each do |id|
73
- @sink.delete(id)
74
- @status[:deleted_document_count] += 1
98
+ @ingester.delete(id)
99
+
100
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
101
+ ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
102
+ reporting_cycle_start = Time.now
103
+ end
75
104
  end
76
105
 
77
- @sink.flush
106
+ @ingester.flush
107
+
108
+ # We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
109
+ # occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
110
+ @sync_finished = true
78
111
  rescue StandardError => e
79
- @status[:error] = e.message
112
+ @sync_error = e.message
80
113
  Utility::ExceptionTracking.log_exception(e)
81
- ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
82
114
  ensure
115
+ stats = @ingester.ingestion_stats
116
+
117
+ Utility::Logger.debug("Sync stats are: #{stats}")
118
+
119
+ @status[:indexed_document_count] = stats[:indexed_document_count]
120
+ @status[:deleted_document_count] = stats[:deleted_document_count]
121
+ @status[:indexed_document_volume] = stats[:indexed_document_volume]
122
+
83
123
  Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
84
124
  Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
85
125
 
86
- ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
126
+ # Make sure to not override a previous error message
127
+ if !@sync_finished && @sync_error.nil?
128
+ @sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
129
+ end
130
+
131
+ unless connector_instance.nil?
132
+ metadata = @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata)
133
+ metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
134
+ end
135
+
136
+ ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
87
137
 
88
- if @status[:error]
89
- Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error #{@status[:error]}.")
138
+ if @sync_error
139
+ Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
90
140
  else
91
141
  Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
92
142
  end
@@ -107,5 +157,15 @@ module Core
107
157
 
108
158
  raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
109
159
  end
160
+
161
+ def validate_filtering(filtering)
162
+ validation_result = @connector_class.validate_filtering(filtering)
163
+
164
+ wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
165
+ raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
166
+
167
+ errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
168
+ raise errors_present_error if validation_result[:errors].present?
169
+ end
110
170
  end
111
171
  end
data/lib/core.rb CHANGED
@@ -7,8 +7,10 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'core/configuration'
10
+ require 'core/connector_job'
10
11
  require 'core/connector_settings'
11
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering'
12
14
  require 'core/heartbeat'
13
15
  require 'core/scheduler'
14
16
  require 'core/single_scheduler'
@@ -0,0 +1,85 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'json'
8
+
9
+ module Utility
10
+ class BulkQueue
11
+ class QueueOverflowError < StandardError; end
12
+
13
+ # 500 items or 5MB
14
+ def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
15
+ @operation_count_threshold = operation_count_threshold.freeze
16
+ @size_threshold = size_threshold.freeze
17
+
18
+ @buffer = ''
19
+
20
+ @current_operation_count = 0
21
+
22
+ @current_buffer_size = 0
23
+ @current_data_size = 0
24
+ end
25
+
26
+ def pop_all
27
+ result = @buffer
28
+
29
+ reset
30
+
31
+ result
32
+ end
33
+
34
+ def add(operation, payload = nil)
35
+ raise QueueOverflowError unless will_fit?(operation, payload)
36
+
37
+ operation_size = get_size(operation)
38
+ payload_size = get_size(payload)
39
+
40
+ @current_operation_count += 1
41
+ @current_buffer_size += operation_size
42
+ @current_buffer_size += payload_size
43
+ @current_data_size += payload_size
44
+
45
+ @buffer << operation
46
+ @buffer << "\n"
47
+
48
+ if payload
49
+ @buffer << payload
50
+ @buffer << "\n"
51
+ end
52
+ end
53
+
54
+ def will_fit?(operation, payload = nil)
55
+ return false if @current_operation_count + 1 > @operation_count_threshold
56
+
57
+ operation_size = get_size(operation)
58
+ payload_size = get_size(payload)
59
+
60
+ @current_buffer_size + operation_size + payload_size < @size_threshold
61
+ end
62
+
63
+ def current_stats
64
+ {
65
+ :current_operation_count => @current_operation_count,
66
+ :current_buffer_size => @current_buffer_size
67
+ }
68
+ end
69
+
70
+ private
71
+
72
+ def get_size(str)
73
+ return 0 unless str
74
+ str.bytesize
75
+ end
76
+
77
+ def reset
78
+ @current_operation_count = 0
79
+ @current_buffer_size = 0
80
+ @current_data_size = 0
81
+
82
+ @buffer = ''
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,20 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Common
11
+ class << self
12
+ def return_if_present(*args)
13
+ args.each do |arg|
14
+ return arg unless arg.nil?
15
+ end
16
+ nil
17
+ end
18
+ end
19
+ end
20
+ end
@@ -16,5 +16,7 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ FILTERING_RULES_FEATURE = 'filtering_rules'
20
+ FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
19
21
  end
20
22
  end