connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221114T233727Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/dispatcher.rb +12 -0
  4. data/lib/app/preflight_check.rb +11 -0
  5. data/lib/connectors/base/connector.rb +19 -12
  6. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  7. data/lib/connectors/example/connector.rb +15 -0
  8. data/lib/connectors/gitlab/connector.rb +15 -1
  9. data/lib/connectors/mongodb/connector.rb +55 -36
  10. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  11. data/lib/core/configuration.rb +3 -1
  12. data/lib/core/connector_job.rb +137 -0
  13. data/lib/core/connector_settings.rb +24 -11
  14. data/lib/core/elastic_connector_actions.rb +263 -24
  15. data/lib/core/filtering/post_process_engine.rb +39 -0
  16. data/lib/core/filtering/post_process_result.rb +27 -0
  17. data/lib/core/filtering/simple_rule.rb +141 -0
  18. data/lib/core/filtering/validation_job_runner.rb +53 -0
  19. data/lib/core/filtering/validation_status.rb +17 -0
  20. data/lib/core/filtering.rb +17 -0
  21. data/lib/core/ingestion/es_sink.rb +59 -0
  22. data/lib/core/ingestion/ingester.rb +90 -0
  23. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  24. data/lib/core/scheduler.rb +40 -10
  25. data/lib/core/sync_job_runner.rb +65 -17
  26. data/lib/core.rb +2 -0
  27. data/lib/utility/bulk_queue.rb +85 -0
  28. data/lib/utility/constants.rb +2 -0
  29. data/lib/utility/filtering.rb +22 -0
  30. data/lib/utility/logger.rb +2 -1
  31. data/lib/utility.rb +5 -4
  32. metadata +16 -7
  33. data/lib/core/output_sink/base_sink.rb +0 -33
  34. data/lib/core/output_sink/combined_sink.rb +0 -38
  35. data/lib/core/output_sink/console_sink.rb +0 -51
  36. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,53 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/connector_status'
10
+ require 'connectors/registry'
11
+
12
+ module Core
13
+ module Filtering
14
+ DEFAULT_DOMAIN = 'DEFAULT'
15
+
16
+ class ValidationJobRunner
17
+ def initialize(connector_settings)
18
+ @connector_settings = connector_settings
19
+ @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
20
+ @validation_finished = false
21
+ @status = { :error => nil }
22
+ end
23
+
24
+ def execute
25
+ Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
26
+
27
+ validation_result = @connector_class.validate_filtering(@connector_settings.filtering)
28
+
29
+ # currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
30
+ ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })
31
+
32
+ @validation_finished = true
33
+ rescue StandardError => e
34
+ Utility::ExceptionTracking.log_exception(e)
35
+ validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID,
36
+ :errors => [
37
+ { :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] }
38
+ ] }
39
+ ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result })
40
+ ensure
41
+ if !@validation_finished && !@status[:error].present?
42
+ @status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.'
43
+ end
44
+
45
+ if @status[:error]
46
+ Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
47
+ else
48
+ Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.")
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,17 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Core
10
+ module Filtering
11
+ class ValidationStatus
12
+ INVALID = 'invalid'
13
+ VALID = 'valid'
14
+ EDITED = 'edited'
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/filtering/post_process_engine'
10
+ require 'core/filtering/post_process_result'
11
+ require 'core/filtering/simple_rule'
12
+ require 'core/filtering/validation_job_runner'
13
+ require 'core/filtering/validation_status'
14
+
15
+ module Core::Filtering
16
+ DEFAULT_DOMAIN = 'DEFAULT'
17
+ end
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'app/config'
10
+ require 'utility/bulk_queue'
11
+ require 'utility/es_client'
12
+ require 'utility/logger'
13
+ require 'elasticsearch/api'
14
+
15
+ module Core
16
+ module Ingestion
17
+ class EsSink
18
+ def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new)
19
+ @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
+ @index_name = index_name
21
+ @request_pipeline = request_pipeline
22
+ @operation_queue = bulk_queue
23
+ end
24
+
25
+ def ingest(id, serialized_document)
26
+ index_op = serialize({ 'index' => { '_index' => index_name, '_id' => id } })
27
+
28
+ flush unless @operation_queue.will_fit?(index_op, serialized_document)
29
+
30
+ @operation_queue.add(
31
+ index_op,
32
+ serialized_document
33
+ )
34
+ end
35
+
36
+ def delete(doc_id)
37
+ delete_op = serialize({ 'delete' => { '_index' => index_name, '_id' => doc_id } })
38
+ flush unless @operation_queue.will_fit?(delete_op)
39
+
40
+ @operation_queue.add(delete_op)
41
+ end
42
+
43
+ def flush
44
+ data = @operation_queue.pop_all
45
+ return if data.empty?
46
+
47
+ @client.bulk(:body => data, :pipeline => @request_pipeline)
48
+ end
49
+
50
+ def serialize(obj)
51
+ Elasticsearch::API.serializer.dump(obj)
52
+ end
53
+
54
+ private
55
+
56
+ attr_accessor :index_name
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,90 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Ingestion
13
+ class Ingester
14
+ def initialize(sink_strategy, max_allowed_document_size = 5 * 1024 * 1024)
15
+ @sink_strategy = sink_strategy
16
+ @max_allowed_document_size = max_allowed_document_size
17
+
18
+ @ingested_count = 0
19
+ @ingested_volume = 0
20
+ @deleted_count = 0
21
+ end
22
+
23
+ def ingest(document)
24
+ unless document&.any?
25
+ Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
26
+ return
27
+ end
28
+
29
+ serialized_document = @sink_strategy.serialize(document)
30
+ document_size = serialized_document.bytesize
31
+
32
+ if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
33
+ Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
34
+ return
35
+ end
36
+
37
+ @sink_strategy.ingest(document['id'], serialized_document)
38
+
39
+ @ingested_count += 1
40
+ @ingested_volume += document_size
41
+ end
42
+
43
+ def ingest_multiple(documents)
44
+ documents.each { |doc| ingest(doc) }
45
+ end
46
+
47
+ def delete(id)
48
+ return if id.nil?
49
+
50
+ @sink_strategy.delete(id)
51
+
52
+ @deleted_count += 1
53
+ end
54
+
55
+ def delete_multiple(ids)
56
+ ids.each { |id| delete(id) }
57
+ end
58
+
59
+ def flush
60
+ @sink_strategy.flush
61
+ end
62
+
63
+ def ingestion_stats
64
+ {
65
+ :indexed_document_count => @ingested_count,
66
+ :indexed_document_volume => @ingested_volume,
67
+ :deleted_document_count => @deleted_count
68
+ }
69
+ end
70
+
71
+ private
72
+
73
+ def do_ingest(_id, _serialized_document)
74
+ raise NotImplementedError
75
+ end
76
+
77
+ def do_delete(_id)
78
+ raise NotImplementedError
79
+ end
80
+
81
+ def do_flush
82
+ raise NotImplementedError
83
+ end
84
+
85
+ def do_serialize(_document)
86
+ raise NotImplementedError
87
+ end
88
+ end
89
+ end
90
+ end
@@ -6,8 +6,5 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'core/output_sink/es_sink'
10
- require 'core/output_sink/console_sink'
11
- require 'core/output_sink/combined_sink'
12
-
13
- module Core::OutputSink; end
9
+ require 'core/ingestion/ingester'
10
+ require 'core/ingestion/es_sink'
@@ -10,6 +10,7 @@ require 'time'
10
10
  require 'fugit'
11
11
  require 'core/connector_settings'
12
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering/validation_status'
13
14
  require 'utility/cron'
14
15
  require 'utility/logger'
15
16
  require 'utility/exception_tracking'
@@ -38,15 +39,18 @@ module Core
38
39
  if configuration_triggered?(cs)
39
40
  yield cs, :configuration
40
41
  end
41
- end
42
- if @is_shutting_down
43
- break
42
+ if filtering_validation_triggered?(cs)
43
+ yield cs, :filter_validation
44
+ end
44
45
  end
45
46
  rescue *Utility::AUTHORIZATION_ERRORS => e
46
47
  Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
47
48
  rescue StandardError => e
48
49
  Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
49
50
  ensure
51
+ if @is_shutting_down
52
+ break
53
+ end
50
54
  if @poll_interval > 0 && !@is_shutting_down
51
55
  Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
52
56
  sleep(@poll_interval)
@@ -62,8 +66,6 @@ module Core
62
66
  private
63
67
 
64
68
  def sync_triggered?(connector_settings)
65
- return false unless connector_registered?(connector_settings.service_type)
66
-
67
69
  unless connector_settings.valid_index_name?
68
70
  Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
69
71
  return false
@@ -129,8 +131,6 @@ module Core
129
131
  end
130
132
 
131
133
  def heartbeat_triggered?(connector_settings)
132
- return false unless connector_registered?(connector_settings.service_type)
133
-
134
134
  last_seen = connector_settings[:last_seen]
135
135
  return true if last_seen.nil? || last_seen.empty?
136
136
  last_seen = begin
@@ -144,11 +144,41 @@ module Core
144
144
  end
145
145
 
146
146
  def configuration_triggered?(connector_settings)
147
- if connector_settings.needs_service_type? || connector_registered?(connector_settings.service_type)
148
- return connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
147
+ connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
148
+ end
149
+
150
+ def filtering_validation_triggered?(connector_settings)
151
+ filtering = connector_settings.filtering
152
+
153
+ unless filtering.present?
154
+ Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
155
+
156
+ return false
149
157
  end
150
158
 
151
- false
159
+ draft_filters = filtering[:draft]
160
+
161
+ unless draft_filters.present?
162
+ Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
163
+
164
+ return false
165
+ end
166
+
167
+ validation = draft_filters[:validation]
168
+
169
+ unless validation.present?
170
+ Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
171
+
172
+ return false
173
+ end
174
+
175
+ unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
176
+ Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
177
+
178
+ return false
179
+ end
180
+
181
+ true
152
182
  end
153
183
 
154
184
  def connector_registered?(service_type)
@@ -8,7 +8,9 @@
8
8
 
9
9
  require 'connectors/connector_status'
10
10
  require 'connectors/registry'
11
- require 'core/output_sink'
11
+ require 'core/filtering/post_process_engine'
12
+ require 'core/ingestion'
13
+ require 'core/filtering/validation_status'
12
14
  require 'utility'
13
15
 
14
16
  module Core
@@ -19,14 +21,18 @@ module Core
19
21
  end
20
22
 
21
23
  class SyncJobRunner
24
+ JOB_REPORTING_INTERVAL = 10
25
+
22
26
  def initialize(connector_settings)
23
27
  @connector_settings = connector_settings
24
- @sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
28
+ @ingester = Core::Ingestion::Ingester.new(Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline))
25
29
  @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
26
30
  @sync_finished = false
31
+ @sync_error = nil
27
32
  @status = {
28
33
  :indexed_document_count => 0,
29
34
  :deleted_document_count => 0,
35
+ :indexed_document_volume => 0,
30
36
  :error => nil
31
37
  }
32
38
  end
@@ -41,8 +47,10 @@ module Core
41
47
  def do_sync!
42
48
  Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
43
49
 
44
- job_description = ElasticConnectorActions.claim_job(@connector_settings.id)
45
- job_id = job_description['_id']
50
+ job_record = ElasticConnectorActions.claim_job(@connector_settings.id)
51
+ job_description = job_record['_source']
52
+ job_id = job_record['_id']
53
+ job_description['_id'] = job_id
46
54
 
47
55
  unless job_id.present?
48
56
  Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
@@ -52,6 +60,10 @@ module Core
52
60
  begin
53
61
  Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
54
62
 
63
+ Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
64
+ validate_filtering(job_description.dig(:connector, :filtering))
65
+ Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
66
+
55
67
  connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
56
68
 
57
69
  connector_instance.do_health_check!
@@ -61,11 +73,21 @@ module Core
61
73
 
62
74
  Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
63
75
 
76
+ post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
77
+ reporting_cycle_start = Time.now
78
+ Utility::Logger.info('Yielding documents...')
64
79
  connector_instance.yield_documents do |document|
65
80
  document = add_ingest_metadata(document)
66
- @sink.ingest(document)
67
- incoming_ids << document['id']
68
- @status[:indexed_document_count] += 1
81
+ post_process_result = post_processing_engine.process(document)
82
+ if post_process_result.is_include?
83
+ @ingester.ingest(document)
84
+ incoming_ids << document['id']
85
+ end
86
+
87
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
88
+ ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
89
+ reporting_cycle_start = Time.now
90
+ end
69
91
  end
70
92
 
71
93
  ids_to_delete = existing_ids - incoming_ids.uniq
@@ -73,32 +95,48 @@ module Core
73
95
  Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
74
96
 
75
97
  ids_to_delete.each do |id|
76
- @sink.delete(id)
77
- @status[:deleted_document_count] += 1
98
+ @ingester.delete(id)
99
+
100
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
101
+ ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
102
+ reporting_cycle_start = Time.now
103
+ end
78
104
  end
79
105
 
80
- @sink.flush
106
+ @ingester.flush
81
107
 
82
108
  # We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
83
109
  # occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
84
110
  @sync_finished = true
85
111
  rescue StandardError => e
86
- @status[:error] = e.message
112
+ @sync_error = e.message
87
113
  Utility::ExceptionTracking.log_exception(e)
88
- ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
89
114
  ensure
115
+ stats = @ingester.ingestion_stats
116
+
117
+ Utility::Logger.debug("Sync stats are: #{stats}")
118
+
119
+ @status[:indexed_document_count] = stats[:indexed_document_count]
120
+ @status[:deleted_document_count] = stats[:deleted_document_count]
121
+ @status[:indexed_document_volume] = stats[:indexed_document_volume]
122
+
90
123
  Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
91
124
  Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
92
125
 
93
126
  # Make sure to not override a previous error message
94
- if !@sync_finished && @status[:error].nil?
95
- @status[:error] = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
127
+ if !@sync_finished && @sync_error.nil?
128
+ @sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
96
129
  end
97
130
 
98
- ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
131
+ unless connector_instance.nil?
132
+ metadata = @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata)
133
+ metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
134
+ end
135
+
136
+ ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
99
137
 
100
- if @status[:error]
101
- Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
138
+ if @sync_error
139
+ Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
102
140
  else
103
141
  Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
104
142
  end
@@ -119,5 +157,15 @@ module Core
119
157
 
120
158
  raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
121
159
  end
160
+
161
+ def validate_filtering(filtering)
162
+ validation_result = @connector_class.validate_filtering(filtering)
163
+
164
+ wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
165
+ raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
166
+
167
+ errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
168
+ raise errors_present_error if validation_result[:errors].present?
169
+ end
122
170
  end
123
171
  end
data/lib/core.rb CHANGED
@@ -7,8 +7,10 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'core/configuration'
10
+ require 'core/connector_job'
10
11
  require 'core/connector_settings'
11
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering'
12
14
  require 'core/heartbeat'
13
15
  require 'core/scheduler'
14
16
  require 'core/single_scheduler'
@@ -0,0 +1,85 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'json'
8
+
9
+ module Utility
10
+ class BulkQueue
11
+ class QueueOverflowError < StandardError; end
12
+
13
+ # 500 items or 5MB
14
+ def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
15
+ @operation_count_threshold = operation_count_threshold.freeze
16
+ @size_threshold = size_threshold.freeze
17
+
18
+ @buffer = ''
19
+
20
+ @current_operation_count = 0
21
+
22
+ @current_buffer_size = 0
23
+ @current_data_size = 0
24
+ end
25
+
26
+ def pop_all
27
+ result = @buffer
28
+
29
+ reset
30
+
31
+ result
32
+ end
33
+
34
+ def add(operation, payload = nil)
35
+ raise QueueOverflowError unless will_fit?(operation, payload)
36
+
37
+ operation_size = get_size(operation)
38
+ payload_size = get_size(payload)
39
+
40
+ @current_operation_count += 1
41
+ @current_buffer_size += operation_size
42
+ @current_buffer_size += payload_size
43
+ @current_data_size += payload_size
44
+
45
+ @buffer << operation
46
+ @buffer << "\n"
47
+
48
+ if payload
49
+ @buffer << payload
50
+ @buffer << "\n"
51
+ end
52
+ end
53
+
54
+ def will_fit?(operation, payload = nil)
55
+ return false if @current_operation_count + 1 > @operation_count_threshold
56
+
57
+ operation_size = get_size(operation)
58
+ payload_size = get_size(payload)
59
+
60
+ @current_buffer_size + operation_size + payload_size < @size_threshold
61
+ end
62
+
63
+ def current_stats
64
+ {
65
+ :current_operation_count => @current_operation_count,
66
+ :current_buffer_size => @current_buffer_size
67
+ }
68
+ end
69
+
70
+ private
71
+
72
+ def get_size(str)
73
+ return 0 unless str
74
+ str.bytesize
75
+ end
76
+
77
+ def reset
78
+ @current_operation_count = 0
79
+ @current_buffer_size = 0
80
+ @current_data_size = 0
81
+
82
+ @buffer = ''
83
+ end
84
+ end
85
+ end
@@ -16,5 +16,7 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ FILTERING_RULES_FEATURE = 'filtering_rules'
20
+ FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
19
21
  end
20
22
  end
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Filtering
11
+ class << self
12
+ def extract_filter(filtering)
13
+ return {} unless filtering.present?
14
+
15
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
16
+ filter = filtering.is_a?(Array) ? filtering.first : filtering
17
+
18
+ filter.present? ? filter : {}
19
+ end
20
+ end
21
+ end
22
+ end
@@ -4,6 +4,7 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ require 'config'
7
8
  require 'logger'
8
9
  require 'active_support/core_ext/module'
9
10
  require 'active_support/core_ext/string/filters'
@@ -23,7 +24,7 @@ module Utility
23
24
  end
24
25
 
25
26
  def logger
26
- @logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
+ @logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
28
  end
28
29
 
29
30
  SUPPORTED_LOG_LEVELS.each do |level|
data/lib/utility.rb CHANGED
@@ -4,14 +4,15 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ require 'utility/bulk_queue'
8
+ require 'utility/common'
7
9
  require 'utility/constants'
8
10
  require 'utility/cron'
9
- require 'utility/common'
11
+ require 'utility/elasticsearch/index/mappings'
12
+ require 'utility/elasticsearch/index/text_analysis_settings'
13
+ require 'utility/environment'
10
14
  require 'utility/errors'
11
15
  require 'utility/es_client'
12
- require 'utility/environment'
13
16
  require 'utility/exception_tracking'
14
17
  require 'utility/extension_mapping_util'
15
18
  require 'utility/logger'
16
- require 'utility/elasticsearch/index/mappings'
17
- require 'utility/elasticsearch/index/text_analysis_settings'