connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221114T233727Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/dispatcher.rb +12 -0
  4. data/lib/app/preflight_check.rb +11 -0
  5. data/lib/connectors/base/connector.rb +19 -12
  6. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  7. data/lib/connectors/example/connector.rb +15 -0
  8. data/lib/connectors/gitlab/connector.rb +15 -1
  9. data/lib/connectors/mongodb/connector.rb +55 -36
  10. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  11. data/lib/core/configuration.rb +3 -1
  12. data/lib/core/connector_job.rb +137 -0
  13. data/lib/core/connector_settings.rb +24 -11
  14. data/lib/core/elastic_connector_actions.rb +263 -24
  15. data/lib/core/filtering/post_process_engine.rb +39 -0
  16. data/lib/core/filtering/post_process_result.rb +27 -0
  17. data/lib/core/filtering/simple_rule.rb +141 -0
  18. data/lib/core/filtering/validation_job_runner.rb +53 -0
  19. data/lib/core/filtering/validation_status.rb +17 -0
  20. data/lib/core/filtering.rb +17 -0
  21. data/lib/core/ingestion/es_sink.rb +59 -0
  22. data/lib/core/ingestion/ingester.rb +90 -0
  23. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  24. data/lib/core/scheduler.rb +40 -10
  25. data/lib/core/sync_job_runner.rb +65 -17
  26. data/lib/core.rb +2 -0
  27. data/lib/utility/bulk_queue.rb +85 -0
  28. data/lib/utility/constants.rb +2 -0
  29. data/lib/utility/filtering.rb +22 -0
  30. data/lib/utility/logger.rb +2 -1
  31. data/lib/utility.rb +5 -4
  32. metadata +16 -7
  33. data/lib/core/output_sink/base_sink.rb +0 -33
  34. data/lib/core/output_sink/combined_sink.rb +0 -38
  35. data/lib/core/output_sink/console_sink.rb +0 -51
  36. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,53 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/connector_status'
10
+ require 'connectors/registry'
11
+
12
+ module Core
13
+ module Filtering
14
+ DEFAULT_DOMAIN = 'DEFAULT'
15
+
16
+ class ValidationJobRunner
17
+ def initialize(connector_settings)
18
+ @connector_settings = connector_settings
19
+ @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
20
+ @validation_finished = false
21
+ @status = { :error => nil }
22
+ end
23
+
24
+ def execute
25
+ Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
26
+
27
+ validation_result = @connector_class.validate_filtering(@connector_settings.filtering)
28
+
29
+ # currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
30
+ ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })
31
+
32
+ @validation_finished = true
33
+ rescue StandardError => e
34
+ Utility::ExceptionTracking.log_exception(e)
35
+ validation_failed_result = { :state => Core::Filtering::ValidationStatus::INVALID,
36
+ :errors => [
37
+ { :ids => [], :messages => ['Unknown problem occurred while validating, see logs for details.'] }
38
+ ] }
39
+ ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_failed_result })
40
+ ensure
41
+ if !@validation_finished && !@status[:error].present?
42
+ @status[:error] = 'Validation thread did not finish execution. Check connector logs for more details.'
43
+ end
44
+
45
+ if @status[:error]
46
+ Utility::Logger.warn("Failed to validate filtering for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
47
+ else
48
+ Utility::Logger.info("Successfully validated filtering for connector #{@connector_settings.id}.")
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,17 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Core
10
+ module Filtering
11
+ class ValidationStatus
12
+ INVALID = 'invalid'
13
+ VALID = 'valid'
14
+ EDITED = 'edited'
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/filtering/post_process_engine'
10
+ require 'core/filtering/post_process_result'
11
+ require 'core/filtering/simple_rule'
12
+ require 'core/filtering/validation_job_runner'
13
+ require 'core/filtering/validation_status'
14
+
15
+ module Core::Filtering
16
+ DEFAULT_DOMAIN = 'DEFAULT'
17
+ end
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'app/config'
10
+ require 'utility/bulk_queue'
11
+ require 'utility/es_client'
12
+ require 'utility/logger'
13
+ require 'elasticsearch/api'
14
+
15
+ module Core
16
+ module Ingestion
17
+ class EsSink
18
+ def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new)
19
+ @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
+ @index_name = index_name
21
+ @request_pipeline = request_pipeline
22
+ @operation_queue = bulk_queue
23
+ end
24
+
25
+ def ingest(id, serialized_document)
26
+ index_op = serialize({ 'index' => { '_index' => index_name, '_id' => id } })
27
+
28
+ flush unless @operation_queue.will_fit?(index_op, serialized_document)
29
+
30
+ @operation_queue.add(
31
+ index_op,
32
+ serialized_document
33
+ )
34
+ end
35
+
36
+ def delete(doc_id)
37
+ delete_op = serialize({ 'delete' => { '_index' => index_name, '_id' => doc_id } })
38
+ flush unless @operation_queue.will_fit?(delete_op)
39
+
40
+ @operation_queue.add(delete_op)
41
+ end
42
+
43
+ def flush
44
+ data = @operation_queue.pop_all
45
+ return if data.empty?
46
+
47
+ @client.bulk(:body => data, :pipeline => @request_pipeline)
48
+ end
49
+
50
+ def serialize(obj)
51
+ Elasticsearch::API.serializer.dump(obj)
52
+ end
53
+
54
+ private
55
+
56
+ attr_accessor :index_name
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,90 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Core
12
+ module Ingestion
13
+ class Ingester
14
+ def initialize(sink_strategy, max_allowed_document_size = 5 * 1024 * 1024)
15
+ @sink_strategy = sink_strategy
16
+ @max_allowed_document_size = max_allowed_document_size
17
+
18
+ @ingested_count = 0
19
+ @ingested_volume = 0
20
+ @deleted_count = 0
21
+ end
22
+
23
+ def ingest(document)
24
+ unless document&.any?
25
+ Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
26
+ return
27
+ end
28
+
29
+ serialized_document = @sink_strategy.serialize(document)
30
+ document_size = serialized_document.bytesize
31
+
32
+ if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
33
+ Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
34
+ return
35
+ end
36
+
37
+ @sink_strategy.ingest(document['id'], serialized_document)
38
+
39
+ @ingested_count += 1
40
+ @ingested_volume += document_size
41
+ end
42
+
43
+ def ingest_multiple(documents)
44
+ documents.each { |doc| ingest(doc) }
45
+ end
46
+
47
+ def delete(id)
48
+ return if id.nil?
49
+
50
+ @sink_strategy.delete(id)
51
+
52
+ @deleted_count += 1
53
+ end
54
+
55
+ def delete_multiple(ids)
56
+ ids.each { |id| delete(id) }
57
+ end
58
+
59
+ def flush
60
+ @sink_strategy.flush
61
+ end
62
+
63
+ def ingestion_stats
64
+ {
65
+ :indexed_document_count => @ingested_count,
66
+ :indexed_document_volume => @ingested_volume,
67
+ :deleted_document_count => @deleted_count
68
+ }
69
+ end
70
+
71
+ private
72
+
73
+ def do_ingest(_id, _serialized_document)
74
+ raise NotImplementedError
75
+ end
76
+
77
+ def do_delete(_id)
78
+ raise NotImplementedError
79
+ end
80
+
81
+ def do_flush
82
+ raise NotImplementedError
83
+ end
84
+
85
+ def do_serialize(_document)
86
+ raise NotImplementedError
87
+ end
88
+ end
89
+ end
90
+ end
@@ -6,8 +6,5 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'core/output_sink/es_sink'
10
- require 'core/output_sink/console_sink'
11
- require 'core/output_sink/combined_sink'
12
-
13
- module Core::OutputSink; end
9
+ require 'core/ingestion/ingester'
10
+ require 'core/ingestion/es_sink'
@@ -10,6 +10,7 @@ require 'time'
10
10
  require 'fugit'
11
11
  require 'core/connector_settings'
12
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering/validation_status'
13
14
  require 'utility/cron'
14
15
  require 'utility/logger'
15
16
  require 'utility/exception_tracking'
@@ -38,15 +39,18 @@ module Core
38
39
  if configuration_triggered?(cs)
39
40
  yield cs, :configuration
40
41
  end
41
- end
42
- if @is_shutting_down
43
- break
42
+ if filtering_validation_triggered?(cs)
43
+ yield cs, :filter_validation
44
+ end
44
45
  end
45
46
  rescue *Utility::AUTHORIZATION_ERRORS => e
46
47
  Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
47
48
  rescue StandardError => e
48
49
  Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
49
50
  ensure
51
+ if @is_shutting_down
52
+ break
53
+ end
50
54
  if @poll_interval > 0 && !@is_shutting_down
51
55
  Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
52
56
  sleep(@poll_interval)
@@ -62,8 +66,6 @@ module Core
62
66
  private
63
67
 
64
68
  def sync_triggered?(connector_settings)
65
- return false unless connector_registered?(connector_settings.service_type)
66
-
67
69
  unless connector_settings.valid_index_name?
68
70
  Utility::Logger.warn("The index name of #{connector_settings.formatted} is invalid.")
69
71
  return false
@@ -129,8 +131,6 @@ module Core
129
131
  end
130
132
 
131
133
  def heartbeat_triggered?(connector_settings)
132
- return false unless connector_registered?(connector_settings.service_type)
133
-
134
134
  last_seen = connector_settings[:last_seen]
135
135
  return true if last_seen.nil? || last_seen.empty?
136
136
  last_seen = begin
@@ -144,11 +144,41 @@ module Core
144
144
  end
145
145
 
146
146
  def configuration_triggered?(connector_settings)
147
- if connector_settings.needs_service_type? || connector_registered?(connector_settings.service_type)
148
- return connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
147
+ connector_settings.needs_service_type? || connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
148
+ end
149
+
150
+ def filtering_validation_triggered?(connector_settings)
151
+ filtering = connector_settings.filtering
152
+
153
+ unless filtering.present?
154
+ Utility::Logger.debug("#{connector_settings.formatted} does not contain filtering to be validated.")
155
+
156
+ return false
149
157
  end
150
158
 
151
- false
159
+ draft_filters = filtering[:draft]
160
+
161
+ unless draft_filters.present?
162
+ Utility::Logger.debug("#{connector_settings.formatted} does not contain a draft filter to be validated.")
163
+
164
+ return false
165
+ end
166
+
167
+ validation = draft_filters[:validation]
168
+
169
+ unless validation.present?
170
+ Utility::Logger.warn("#{connector_settings.formatted} does not contain a validation object inside draft filtering. Check connectors index.")
171
+
172
+ return false
173
+ end
174
+
175
+ unless validation[:state] == Core::Filtering::ValidationStatus::EDITED
176
+ Utility::Logger.debug("#{connector_settings.formatted} filtering validation needs to be in state #{Core::Filtering::ValidationStatus::EDITED} to be able to validate it.")
177
+
178
+ return false
179
+ end
180
+
181
+ true
152
182
  end
153
183
 
154
184
  def connector_registered?(service_type)
@@ -8,7 +8,9 @@
8
8
 
9
9
  require 'connectors/connector_status'
10
10
  require 'connectors/registry'
11
- require 'core/output_sink'
11
+ require 'core/filtering/post_process_engine'
12
+ require 'core/ingestion'
13
+ require 'core/filtering/validation_status'
12
14
  require 'utility'
13
15
 
14
16
  module Core
@@ -19,14 +21,18 @@ module Core
19
21
  end
20
22
 
21
23
  class SyncJobRunner
24
+ JOB_REPORTING_INTERVAL = 10
25
+
22
26
  def initialize(connector_settings)
23
27
  @connector_settings = connector_settings
24
- @sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
28
+ @ingester = Core::Ingestion::Ingester.new(Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline))
25
29
  @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
26
30
  @sync_finished = false
31
+ @sync_error = nil
27
32
  @status = {
28
33
  :indexed_document_count => 0,
29
34
  :deleted_document_count => 0,
35
+ :indexed_document_volume => 0,
30
36
  :error => nil
31
37
  }
32
38
  end
@@ -41,8 +47,10 @@ module Core
41
47
  def do_sync!
42
48
  Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
43
49
 
44
- job_description = ElasticConnectorActions.claim_job(@connector_settings.id)
45
- job_id = job_description['_id']
50
+ job_record = ElasticConnectorActions.claim_job(@connector_settings.id)
51
+ job_description = job_record['_source']
52
+ job_id = job_record['_id']
53
+ job_description['_id'] = job_id
46
54
 
47
55
  unless job_id.present?
48
56
  Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
@@ -52,6 +60,10 @@ module Core
52
60
  begin
53
61
  Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
54
62
 
63
+ Utility::Logger.info("Checking active filtering for sync job #{job_id} for connector #{@connector_settings.id}.")
64
+ validate_filtering(job_description.dig(:connector, :filtering))
65
+ Utility::Logger.debug("Active filtering for sync job #{job_id} for connector #{@connector_settings.id} is valid.")
66
+
55
67
  connector_instance = Connectors::REGISTRY.connector(@connector_settings.service_type, @connector_settings.configuration, job_description: job_description)
56
68
 
57
69
  connector_instance.do_health_check!
@@ -61,11 +73,21 @@ module Core
61
73
 
62
74
  Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
63
75
 
76
+ post_processing_engine = Core::Filtering::PostProcessEngine.new(job_description)
77
+ reporting_cycle_start = Time.now
78
+ Utility::Logger.info('Yielding documents...')
64
79
  connector_instance.yield_documents do |document|
65
80
  document = add_ingest_metadata(document)
66
- @sink.ingest(document)
67
- incoming_ids << document['id']
68
- @status[:indexed_document_count] += 1
81
+ post_process_result = post_processing_engine.process(document)
82
+ if post_process_result.is_include?
83
+ @ingester.ingest(document)
84
+ incoming_ids << document['id']
85
+ end
86
+
87
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
88
+ ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
89
+ reporting_cycle_start = Time.now
90
+ end
69
91
  end
70
92
 
71
93
  ids_to_delete = existing_ids - incoming_ids.uniq
@@ -73,32 +95,48 @@ module Core
73
95
  Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
74
96
 
75
97
  ids_to_delete.each do |id|
76
- @sink.delete(id)
77
- @status[:deleted_document_count] += 1
98
+ @ingester.delete(id)
99
+
100
+ if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
101
+ ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
102
+ reporting_cycle_start = Time.now
103
+ end
78
104
  end
79
105
 
80
- @sink.flush
106
+ @ingester.flush
81
107
 
82
108
  # We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
83
109
  # occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
84
110
  @sync_finished = true
85
111
  rescue StandardError => e
86
- @status[:error] = e.message
112
+ @sync_error = e.message
87
113
  Utility::ExceptionTracking.log_exception(e)
88
- ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
89
114
  ensure
115
+ stats = @ingester.ingestion_stats
116
+
117
+ Utility::Logger.debug("Sync stats are: #{stats}")
118
+
119
+ @status[:indexed_document_count] = stats[:indexed_document_count]
120
+ @status[:deleted_document_count] = stats[:deleted_document_count]
121
+ @status[:indexed_document_volume] = stats[:indexed_document_volume]
122
+
90
123
  Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
91
124
  Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
92
125
 
93
126
  # Make sure to not override a previous error message
94
- if !@sync_finished && @status[:error].nil?
95
- @status[:error] = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
127
+ if !@sync_finished && @sync_error.nil?
128
+ @sync_error = 'Sync thread didn\'t finish execution. Check connector logs for more details.'
96
129
  end
97
130
 
98
- ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
131
+ unless connector_instance.nil?
132
+ metadata = @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata)
133
+ metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
134
+ end
135
+
136
+ ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, metadata, @sync_error)
99
137
 
100
- if @status[:error]
101
- Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@status[:error]}'.")
138
+ if @sync_error
139
+ Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error '#{@sync_error}'.")
102
140
  else
103
141
  Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
104
142
  end
@@ -119,5 +157,15 @@ module Core
119
157
 
120
158
  raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
121
159
  end
160
+
161
+ def validate_filtering(filtering)
162
+ validation_result = @connector_class.validate_filtering(filtering)
163
+
164
+ wrong_state_error = Utility::InvalidFilterConfigError.new("Active filtering is not in valid state (current state: #{validation_result[:state]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
165
+ raise wrong_state_error if validation_result[:state] != Core::Filtering::ValidationStatus::VALID
166
+
167
+ errors_present_error = Utility::InvalidFilterConfigError.new("Active filtering is in valid state, but errors were detected (errors: #{validation_result[:errors]}) for connector #{@connector_settings.id}. Please check active filtering in connectors index.")
168
+ raise errors_present_error if validation_result[:errors].present?
169
+ end
122
170
  end
123
171
  end
data/lib/core.rb CHANGED
@@ -7,8 +7,10 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'core/configuration'
10
+ require 'core/connector_job'
10
11
  require 'core/connector_settings'
11
12
  require 'core/elastic_connector_actions'
13
+ require 'core/filtering'
12
14
  require 'core/heartbeat'
13
15
  require 'core/scheduler'
14
16
  require 'core/single_scheduler'
@@ -0,0 +1,85 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'json'
8
+
9
+ module Utility
10
+ class BulkQueue
11
+ class QueueOverflowError < StandardError; end
12
+
13
+ # 500 items or 5MB
14
+ def initialize(operation_count_threshold = 500, size_threshold = 5 * 1024 * 1024)
15
+ @operation_count_threshold = operation_count_threshold.freeze
16
+ @size_threshold = size_threshold.freeze
17
+
18
+ @buffer = ''
19
+
20
+ @current_operation_count = 0
21
+
22
+ @current_buffer_size = 0
23
+ @current_data_size = 0
24
+ end
25
+
26
+ def pop_all
27
+ result = @buffer
28
+
29
+ reset
30
+
31
+ result
32
+ end
33
+
34
+ def add(operation, payload = nil)
35
+ raise QueueOverflowError unless will_fit?(operation, payload)
36
+
37
+ operation_size = get_size(operation)
38
+ payload_size = get_size(payload)
39
+
40
+ @current_operation_count += 1
41
+ @current_buffer_size += operation_size
42
+ @current_buffer_size += payload_size
43
+ @current_data_size += payload_size
44
+
45
+ @buffer << operation
46
+ @buffer << "\n"
47
+
48
+ if payload
49
+ @buffer << payload
50
+ @buffer << "\n"
51
+ end
52
+ end
53
+
54
+ def will_fit?(operation, payload = nil)
55
+ return false if @current_operation_count + 1 > @operation_count_threshold
56
+
57
+ operation_size = get_size(operation)
58
+ payload_size = get_size(payload)
59
+
60
+ @current_buffer_size + operation_size + payload_size < @size_threshold
61
+ end
62
+
63
+ def current_stats
64
+ {
65
+ :current_operation_count => @current_operation_count,
66
+ :current_buffer_size => @current_buffer_size
67
+ }
68
+ end
69
+
70
+ private
71
+
72
+ def get_size(str)
73
+ return 0 unless str
74
+ str.bytesize
75
+ end
76
+
77
+ def reset
78
+ @current_operation_count = 0
79
+ @current_buffer_size = 0
80
+ @current_data_size = 0
81
+
82
+ @buffer = ''
83
+ end
84
+ end
85
+ end
@@ -16,5 +16,7 @@ module Utility
16
16
  JOB_INDEX = '.elastic-connectors-sync-jobs'
17
17
  CONTENT_INDEX_PREFIX = 'search-'
18
18
  CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ FILTERING_RULES_FEATURE = 'filtering_rules'
20
+ FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
19
21
  end
20
22
  end
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Filtering
11
+ class << self
12
+ def extract_filter(filtering)
13
+ return {} unless filtering.present?
14
+
15
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
16
+ filter = filtering.is_a?(Array) ? filtering.first : filtering
17
+
18
+ filter.present? ? filter : {}
19
+ end
20
+ end
21
+ end
22
+ end
@@ -4,6 +4,7 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ require 'config'
7
8
  require 'logger'
8
9
  require 'active_support/core_ext/module'
9
10
  require 'active_support/core_ext/string/filters'
@@ -23,7 +24,7 @@ module Utility
23
24
  end
24
25
 
25
26
  def logger
26
- @logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
+ @logger ||= defined?(::Settings) && ::Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
28
  end
28
29
 
29
30
  SUPPORTED_LOG_LEVELS.each do |level|
data/lib/utility.rb CHANGED
@@ -4,14 +4,15 @@
4
4
  # you may not use this file except in compliance with the Elastic License.
5
5
  #
6
6
 
7
+ require 'utility/bulk_queue'
8
+ require 'utility/common'
7
9
  require 'utility/constants'
8
10
  require 'utility/cron'
9
- require 'utility/common'
11
+ require 'utility/elasticsearch/index/mappings'
12
+ require 'utility/elasticsearch/index/text_analysis_settings'
13
+ require 'utility/environment'
10
14
  require 'utility/errors'
11
15
  require 'utility/es_client'
12
- require 'utility/environment'
13
16
  require 'utility/exception_tracking'
14
17
  require 'utility/extension_mapping_util'
15
18
  require 'utility/logger'
16
- require 'utility/elasticsearch/index/mappings'
17
- require 'utility/elasticsearch/index/text_analysis_settings'