connectors_service 8.5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +93 -0
  3. data/NOTICE.txt +2 -0
  4. data/bin/connectors_service +4 -0
  5. data/bin/list_connectors +4 -0
  6. data/config/connectors.yml +25 -0
  7. data/lib/app/app.rb +25 -0
  8. data/lib/app/config.rb +132 -0
  9. data/lib/app/console_app.rb +278 -0
  10. data/lib/app/dispatcher.rb +121 -0
  11. data/lib/app/menu.rb +104 -0
  12. data/lib/app/preflight_check.rb +134 -0
  13. data/lib/app/version.rb +10 -0
  14. data/lib/connectors/base/adapter.rb +119 -0
  15. data/lib/connectors/base/connector.rb +57 -0
  16. data/lib/connectors/base/custom_client.rb +111 -0
  17. data/lib/connectors/connector_status.rb +31 -0
  18. data/lib/connectors/crawler/scheduler.rb +32 -0
  19. data/lib/connectors/example/connector.rb +57 -0
  20. data/lib/connectors/example/example_attachments/first_attachment.txt +1 -0
  21. data/lib/connectors/example/example_attachments/second_attachment.txt +1 -0
  22. data/lib/connectors/example/example_attachments/third_attachment.txt +1 -0
  23. data/lib/connectors/gitlab/adapter.rb +50 -0
  24. data/lib/connectors/gitlab/connector.rb +67 -0
  25. data/lib/connectors/gitlab/custom_client.rb +44 -0
  26. data/lib/connectors/gitlab/extractor.rb +69 -0
  27. data/lib/connectors/mongodb/connector.rb +138 -0
  28. data/lib/connectors/registry.rb +52 -0
  29. data/lib/connectors/sync_status.rb +21 -0
  30. data/lib/connectors.rb +16 -0
  31. data/lib/connectors_app/// +13 -0
  32. data/lib/connectors_service.rb +24 -0
  33. data/lib/connectors_utility.rb +16 -0
  34. data/lib/core/configuration.rb +48 -0
  35. data/lib/core/connector_settings.rb +142 -0
  36. data/lib/core/elastic_connector_actions.rb +269 -0
  37. data/lib/core/heartbeat.rb +32 -0
  38. data/lib/core/native_scheduler.rb +24 -0
  39. data/lib/core/output_sink/base_sink.rb +33 -0
  40. data/lib/core/output_sink/combined_sink.rb +38 -0
  41. data/lib/core/output_sink/console_sink.rb +51 -0
  42. data/lib/core/output_sink/es_sink.rb +74 -0
  43. data/lib/core/output_sink.rb +13 -0
  44. data/lib/core/scheduler.rb +158 -0
  45. data/lib/core/single_scheduler.rb +29 -0
  46. data/lib/core/sync_job_runner.rb +111 -0
  47. data/lib/core.rb +16 -0
  48. data/lib/list_connectors.rb +22 -0
  49. data/lib/stubs/app_config.rb +35 -0
  50. data/lib/stubs/connectors/stats.rb +35 -0
  51. data/lib/stubs/service_type.rb +13 -0
  52. data/lib/utility/constants.rb +20 -0
  53. data/lib/utility/cron.rb +81 -0
  54. data/lib/utility/elasticsearch/index/language_data.yml +111 -0
  55. data/lib/utility/elasticsearch/index/mappings.rb +104 -0
  56. data/lib/utility/elasticsearch/index/text_analysis_settings.rb +226 -0
  57. data/lib/utility/environment.rb +33 -0
  58. data/lib/utility/errors.rb +132 -0
  59. data/lib/utility/es_client.rb +84 -0
  60. data/lib/utility/exception_tracking.rb +64 -0
  61. data/lib/utility/extension_mapping_util.rb +123 -0
  62. data/lib/utility/logger.rb +84 -0
  63. data/lib/utility/middleware/basic_auth.rb +27 -0
  64. data/lib/utility/middleware/bearer_auth.rb +27 -0
  65. data/lib/utility/middleware/restrict_hostnames.rb +73 -0
  66. data/lib/utility.rb +16 -0
  67. metadata +487 -0
@@ -0,0 +1,142 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/hash/indifferent_access'
10
+ require 'connectors/connector_status'
11
+ require 'core/elastic_connector_actions'
12
+ require 'utility'
13
+
14
+ module Core
15
+ class ConnectorSettings
16
+
17
+ DEFAULT_REQUEST_PIPELINE = 'ent-search-generic-ingestion'
18
+ DEFAULT_EXTRACT_BINARY_CONTENT = true
19
+ DEFAULT_REDUCE_WHITESPACE = true
20
+ DEFAULT_RUN_ML_INFERENCE = true
21
+
22
+ DEFAULT_PAGE_SIZE = 100
23
+
24
+ # Error Classes
25
+ class ConnectorNotFoundError < StandardError; end
26
+
27
+ def self.fetch_by_id(connector_id)
28
+ es_response = ElasticConnectorActions.get_connector(connector_id)
29
+ connectors_meta = ElasticConnectorActions.connectors_meta
30
+
31
+ raise ConnectorNotFoundError.new("Connector with id=#{connector_id} was not found.") unless es_response[:found]
32
+ new(es_response, connectors_meta)
33
+ end
34
+
35
+ def initialize(es_response, connectors_meta)
36
+ @elasticsearch_response = es_response.with_indifferent_access
37
+ @connectors_meta = connectors_meta.with_indifferent_access
38
+ end
39
+
40
+ def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
41
+ query = { term: { is_native: true } }
42
+ fetch_connectors_by_query(query, page_size)
43
+ end
44
+
45
+ def self.fetch_crawler_connectors(page_size = DEFAULT_PAGE_SIZE)
46
+ query = { term: { service_type: Utility::Constants::CRAWLER_SERVICE_TYPE } }
47
+ fetch_connectors_by_query(query, page_size)
48
+ end
49
+
50
+ def id
51
+ @elasticsearch_response[:_id]
52
+ end
53
+
54
+ def [](property_name)
55
+ # TODO: handle not found
56
+ @elasticsearch_response[:_source][property_name]
57
+ end
58
+
59
+ def index_name
60
+ self[:index_name]
61
+ end
62
+
63
+ def connector_status
64
+ self[:status]
65
+ end
66
+
67
+ def connector_status_allows_sync?
68
+ Connectors::ConnectorStatus::STATUSES_ALLOWING_SYNC.include?(connector_status)
69
+ end
70
+
71
+ def service_type
72
+ self[:service_type]
73
+ end
74
+
75
+ def configuration
76
+ self[:configuration]
77
+ end
78
+
79
+ def scheduling_settings
80
+ self[:scheduling]
81
+ end
82
+
83
+ def request_pipeline
84
+ return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
85
+ end
86
+
87
+ def extract_binary_content?
88
+ return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
89
+ end
90
+
91
+ def reduce_whitespace?
92
+ return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
93
+ end
94
+
95
+ def run_ml_inference?
96
+ return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
97
+ end
98
+
99
+ def formatted
100
+ properties = ["ID: #{id}"]
101
+ properties << "Service type: #{service_type}" if service_type
102
+ "connector (#{properties.join(', ')})"
103
+ end
104
+
105
+ def needs_service_type?
106
+ service_type.to_s.strip.empty?
107
+ end
108
+
109
+ def valid_index_name?
110
+ index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
111
+ end
112
+
113
+ private
114
+
115
+ def self.fetch_connectors_by_query(query, page_size)
116
+ connectors_meta = ElasticConnectorActions.connectors_meta
117
+
118
+ results = []
119
+ offset = 0
120
+ loop do
121
+ response = ElasticConnectorActions.search_connectors(query, page_size, offset)
122
+
123
+ hits = response['hits']['hits']
124
+ total = response['hits']['total']['value']
125
+ results += hits.map do |hit|
126
+ Core::ConnectorSettings.new(hit, connectors_meta)
127
+ end
128
+ break if results.size >= total
129
+ offset += hits.size
130
+ end
131
+
132
+ results
133
+ end
134
+
135
+ def return_if_present(*args)
136
+ args.each do |arg|
137
+ return arg unless arg.nil?
138
+ end
139
+ nil
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,269 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+ #
9
+ require 'active_support/core_ext/hash'
10
+ require 'connectors/connector_status'
11
+ require 'connectors/sync_status'
12
+ require 'utility'
13
+
14
+ module Core
15
+ class ElasticConnectorActions
16
+ class << self
17
+
18
+ def force_sync(connector_id)
19
+ update_connector_fields(connector_id, :scheduling => { :enabled => true }, :sync_now => true)
20
+ end
21
+
22
+ def create_connector(index_name, service_type)
23
+ body = {
24
+ :scheduling => { :enabled => true },
25
+ :index_name => index_name,
26
+ :service_type => service_type
27
+ }
28
+ response = client.index(:index => Utility::Constants::CONNECTORS_INDEX, :body => body)
29
+ response['_id']
30
+ end
31
+
32
+ def get_connector(connector_id)
33
+ client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
34
+ end
35
+
36
+ def connectors_meta
37
+ alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
38
+ index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
39
+ alias_mappings.dig(index, 'mappings', '_meta') || {}
40
+ end
41
+
42
+ def search_connectors(query, page_size, offset)
43
+ client.search(
44
+ :index => Utility::Constants::CONNECTORS_INDEX,
45
+ :ignore => 404,
46
+ :body => {
47
+ :size => page_size,
48
+ :from => offset,
49
+ :query => query,
50
+ :sort => ['name']
51
+ }
52
+ )
53
+ end
54
+
55
+ def update_connector_configuration(connector_id, configuration)
56
+ update_connector_fields(connector_id, :configuration => configuration)
57
+ end
58
+
59
+ def enable_connector_scheduling(connector_id, cron_expression)
60
+ payload = { :enabled => true, :interval => cron_expression }
61
+ update_connector_fields(connector_id, :scheduling => payload)
62
+ end
63
+
64
+ def disable_connector_scheduling(connector_id)
65
+ payload = { :enabled => false }
66
+ update_connector_fields(connector_id, :scheduling => payload)
67
+ end
68
+
69
+ def set_configurable_field(connector_id, field_name, label, value)
70
+ payload = { field_name => { :value => value, :label => label } }
71
+ update_connector_configuration(connector_id, payload)
72
+ end
73
+
74
+ def claim_job(connector_id)
75
+ update_connector_fields(connector_id,
76
+ :sync_now => false,
77
+ :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
78
+ :last_synced => Time.now)
79
+
80
+ body = {
81
+ :connector_id => connector_id,
82
+ :status => Connectors::SyncStatus::IN_PROGRESS,
83
+ :worker_hostname => Socket.gethostname,
84
+ :created_at => Time.now
85
+ }
86
+ job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
87
+
88
+ job['_id']
89
+ end
90
+
91
+ def update_connector_status(connector_id, status, error_message = nil)
92
+ if status == Connectors::ConnectorStatus::ERROR && error_message.nil?
93
+ raise ArgumentError, 'error_message is required when status is error'
94
+ end
95
+ body = {
96
+ :status => status,
97
+ :error => status == Connectors::ConnectorStatus::ERROR ? error_message : nil
98
+ }
99
+ update_connector_fields(connector_id, body)
100
+ end
101
+
102
+ def complete_sync(connector_id, job_id, status)
103
+ sync_status = status[:error] ? Connectors::SyncStatus::FAILED : Connectors::SyncStatus::COMPLETED
104
+
105
+ update_connector_fields(connector_id,
106
+ :last_sync_status => sync_status,
107
+ :last_sync_error => status[:error],
108
+ :error => status[:error],
109
+ :last_synced => Time.now,
110
+ :last_indexed_document_count => status[:indexed_document_count],
111
+ :last_deleted_document_count => status[:deleted_document_count])
112
+
113
+ body = {
114
+ :doc => {
115
+ :status => sync_status,
116
+ :completed_at => Time.now
117
+ }.merge(status)
118
+ }
119
+ client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
120
+ end
121
+
122
+ def fetch_document_ids(index_name)
123
+ page_size = 1000
124
+ result = []
125
+ begin
126
+ pit_id = client.open_point_in_time(:index => index_name, :keep_alive => '1m', :expand_wildcards => 'all')['id']
127
+ body = {
128
+ :query => { :match_all => {} },
129
+ :sort => [{ :id => { :order => :asc } }],
130
+ :pit => {
131
+ :id => pit_id,
132
+ :keep_alive => '1m'
133
+ },
134
+ :size => page_size,
135
+ :_source => false
136
+ }
137
+ loop do
138
+ response = client.search(:body => body)
139
+ hits = response['hits']['hits']
140
+
141
+ ids = hits.map { |h| h['_id'] }
142
+ result += ids
143
+ break if hits.size < page_size
144
+
145
+ body[:search_after] = hits.last['sort']
146
+ body[:pit][:id] = response['pit_id']
147
+ end
148
+ ensure
149
+ client.close_point_in_time(:index => index_name, :body => { :id => pit_id })
150
+ end
151
+
152
+ result
153
+ end
154
+
155
+ def ensure_content_index_exists(index_name, use_icu_locale = false, language_code = nil)
156
+ settings = Utility::Elasticsearch::Index::TextAnalysisSettings.new(:language_code => language_code, :analysis_icu => use_icu_locale).to_h
157
+ mappings = Utility::Elasticsearch::Index::Mappings.default_text_fields_mappings(:connectors_index => true)
158
+
159
+ body_payload = { settings: settings, mappings: mappings }
160
+ ensure_index_exists(index_name, body_payload)
161
+ end
162
+
163
+ def ensure_index_exists(index_name, body = {})
164
+ if client.indices.exists?(:index => index_name)
165
+ return unless body[:mappings]
166
+ Utility::Logger.debug("Index #{index_name} already exists. Checking mappings...")
167
+ Utility::Logger.debug("New mappings: #{body[:mappings]}")
168
+ response = client.indices.get_mapping(:index => index_name)
169
+ existing = response[index_name]['mappings']
170
+ if existing.empty?
171
+ Utility::Logger.debug("Index #{index_name} has no mappings. Adding mappings...")
172
+ client.indices.put_mapping(:index => index_name, :body => body[:mappings], :expand_wildcards => 'all')
173
+ Utility::Logger.debug("Index #{index_name} mappings added.")
174
+ else
175
+ Utility::Logger.debug("Index #{index_name} already has mappings: #{existing}. Skipping...")
176
+ end
177
+ else
178
+ client.indices.create(:index => index_name, :body => body)
179
+ Utility::Logger.debug("Created index #{index_name}")
180
+ end
181
+ end
182
+
183
+ def system_index_body(alias_name: nil, mappings: nil)
184
+ body = {
185
+ :settings => {
186
+ :index => {
187
+ :hidden => true,
188
+ :number_of_replicas => 0,
189
+ :auto_expand_replicas => '0-5'
190
+ }
191
+ }
192
+ }
193
+ body[:aliases] = { alias_name => { :is_write_index => true } } unless alias_name.nil? || alias_name.empty?
194
+ body[:mappings] = mappings unless mappings.nil?
195
+ body
196
+ end
197
+
198
+ # DO NOT USE this method
199
+ # Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
200
+ def ensure_connectors_index_exists
201
+ mappings = {
202
+ :properties => {
203
+ :api_key_id => { :type => :keyword },
204
+ :configuration => { :type => :object },
205
+ :error => { :type => :text },
206
+ :index_name => { :type => :keyword },
207
+ :last_seen => { :type => :date },
208
+ :last_synced => { :type => :date },
209
+ :last_indexed_document_count => { :type => :integer },
210
+ :last_deleted_document_count => { :type => :integer },
211
+ :scheduling => {
212
+ :properties => {
213
+ :enabled => { :type => :boolean },
214
+ :interval => { :type => :text }
215
+ }
216
+ },
217
+ :service_type => { :type => :keyword },
218
+ :status => { :type => :keyword },
219
+ :sync_error => { :type => :text },
220
+ :sync_now => { :type => :boolean },
221
+ :sync_status => { :type => :keyword }
222
+ }
223
+ }
224
+ ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
225
+ end
226
+
227
+ # DO NOT USE this method
228
+ # Creation of job index should be handled by Kibana, this method is only used by ftest.rb
229
+ def ensure_job_index_exists
230
+ mappings = {
231
+ :properties => {
232
+ :connector_id => { :type => :keyword },
233
+ :status => { :type => :keyword },
234
+ :error => { :type => :text },
235
+ :worker_hostname => { :type => :keyword },
236
+ :indexed_document_count => { :type => :integer },
237
+ :deleted_document_count => { :type => :integer },
238
+ :created_at => { :type => :date },
239
+ :completed_at => { :type => :date }
240
+ }
241
+ }
242
+ ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
243
+ end
244
+
245
+ def update_connector_fields(connector_id, doc = {})
246
+ return if doc.empty?
247
+ client.update(
248
+ :index => Utility::Constants::CONNECTORS_INDEX,
249
+ :id => connector_id,
250
+ :body => { :doc => doc },
251
+ :refresh => true,
252
+ :retry_on_conflict => 3
253
+ )
254
+ end
255
+
256
+ private
257
+
258
+ def client
259
+ @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
260
+ end
261
+
262
+ def get_latest_index_in_alias(alias_name, indicies)
263
+ index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
264
+ index_version = index_versions.max # gets the largest suffix number
265
+ "#{alias_name}-v#{index_version}"
266
+ end
267
+ end
268
+ end
269
+ end
@@ -0,0 +1,32 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/connector_status'
10
+ require 'connectors/registry'
11
+ require 'core/connector_settings'
12
+ require 'core/elastic_connector_actions'
13
+
14
+ module Core
15
+ class Heartbeat
16
+ class << self
17
+ def send(connector_settings)
18
+ doc = {
19
+ :last_seen => Time.now
20
+ }
21
+ if connector_settings.connector_status_allows_sync?
22
+ connector_instance = Connectors::REGISTRY.connector(connector_settings.service_type, connector_settings.configuration)
23
+ doc[:status] = connector_instance.is_healthy? ? Connectors::ConnectorStatus::CONNECTED : Connectors::ConnectorStatus::ERROR
24
+ message = "Health check for 3d party service failed for connector [#{connector_settings.id}], service type [#{connector_settings.service_type}]. Check the application logs for more information."
25
+ doc[:error] = doc[:status] == Connectors::ConnectorStatus::ERROR ? message : nil
26
+ end
27
+
28
+ Core::ElasticConnectorActions.update_connector_fields(connector_settings.id, doc)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,24 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/scheduler'
10
+ require 'core/connector_settings'
11
+ require 'core/elastic_connector_actions'
12
+ require 'utility/logger'
13
+ require 'utility/exception_tracking'
14
+
15
+ module Core
16
+ class NativeScheduler < Core::Scheduler
17
+ def connector_settings
18
+ Core::ConnectorSettings.fetch_native_connectors || []
19
+ rescue StandardError => e
20
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve native connectors due to unexpected error.')
21
+ []
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,33 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Core
10
+ module OutputSink
11
+ class BaseSink
12
+ def ingest(_document)
13
+ raise 'not implemented'
14
+ end
15
+
16
+ def ingest_multiple(_documents)
17
+ raise 'not implemented'
18
+ end
19
+
20
+ def delete(_id)
21
+ raise 'not implemented'
22
+ end
23
+
24
+ def delete_multiple(_ids)
25
+ raise 'not implemented'
26
+ end
27
+
28
+ def flush(_size: nil)
29
+ raise 'not implemented'
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,38 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/output_sink/base_sink'
10
+ require 'utility/logger'
11
+
12
+ module Core::OutputSink
13
+ class CombinedSink < Core::OutputSink::BaseSink
14
+ def initialize(sinks = [])
15
+ @sinks = sinks
16
+ end
17
+
18
+ def ingest(document)
19
+ @sinks.each { |sink| sink.ingest(document) }
20
+ end
21
+
22
+ def flush(size: nil)
23
+ @sinks.each { |sink| sink.flush(size: size) }
24
+ end
25
+
26
+ def ingest_multiple(documents)
27
+ @sinks.each { |sink| sink.ingest_multiple(documents) }
28
+ end
29
+
30
+ def delete(id)
31
+ @sinks.each { |sink| sink.delete(id) }
32
+ end
33
+
34
+ def delete_multiple(ids)
35
+ @sinks.each { |sink| sink.delete_multiple(ids) }
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,51 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/output_sink'
10
+ require 'utility/logger'
11
+
12
+ module Core::OutputSink
13
+ class ConsoleSink < Core::OutputSink::BaseSink
14
+ def ingest(document)
15
+ print_header 'Got a single document:'
16
+ puts document
17
+ end
18
+
19
+ def flush(size: nil)
20
+ print_header 'Flushing'
21
+ puts "Flush size: #{size}"
22
+ end
23
+
24
+ def ingest_multiple(documents)
25
+ print_header 'Got multiple documents:'
26
+ puts documents
27
+ end
28
+
29
+ def delete(id)
30
+ print_header "Deleting single id: #{id}"
31
+ puts id
32
+ end
33
+
34
+ def delete_multiple(ids)
35
+ print_header "Deleting several ids: #{ids}"
36
+ puts ids
37
+ end
38
+
39
+ private
40
+
41
+ def print_delim
42
+ puts '----------------------------------------------------'
43
+ end
44
+
45
+ def print_header(header)
46
+ print_delim
47
+ puts header
48
+ print_delim
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,74 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/numeric/time'
10
+ require 'app/config'
11
+ require 'core/output_sink/base_sink'
12
+ require 'utility/es_client'
13
+ require 'utility/logger'
14
+
15
+ module Core::OutputSink
16
+ class EsSink < Core::OutputSink::BaseSink
17
+ def initialize(index_name, request_pipeline, flush_threshold = 50)
18
+ super()
19
+ @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
+ @index_name = index_name
21
+ @request_pipeline = request_pipeline
22
+ @operation_queue = []
23
+ @flush_threshold = flush_threshold
24
+ end
25
+
26
+ def ingest(document)
27
+ return if document.blank?
28
+
29
+ @operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
30
+ flush if ready_to_flush?
31
+ end
32
+
33
+ def delete(doc_id)
34
+ return if doc_id.nil?
35
+
36
+ @operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
37
+ flush if ready_to_flush?
38
+ end
39
+
40
+ def flush(size: nil)
41
+ flush_size = size || @flush_threshold
42
+
43
+ while @operation_queue.any?
44
+ data_to_flush = @operation_queue.pop(flush_size)
45
+ send_data(data_to_flush)
46
+ end
47
+ end
48
+
49
+ def ingest_multiple(documents)
50
+ Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
51
+ documents.each { |doc| ingest(doc) }
52
+ end
53
+
54
+ def delete_multiple(ids)
55
+ Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
56
+ ids.each { |id| delete(id) }
57
+ end
58
+
59
+ private
60
+
61
+ attr_accessor :index_name
62
+
63
+ def send_data(ops)
64
+ return if ops.empty?
65
+
66
+ @client.bulk(:body => ops, :pipeline => @request_pipeline)
67
+ Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
68
+ end
69
+
70
+ def ready_to_flush?
71
+ @operation_queue.size >= @flush_threshold
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,13 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/output_sink/es_sink'
10
+ require 'core/output_sink/console_sink'
11
+ require 'core/output_sink/combined_sink'
12
+
13
+ module Core::OutputSink; end