connectors_service 8.5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +93 -0
  3. data/NOTICE.txt +2 -0
  4. data/bin/connectors_service +4 -0
  5. data/bin/list_connectors +4 -0
  6. data/config/connectors.yml +25 -0
  7. data/lib/app/app.rb +25 -0
  8. data/lib/app/config.rb +132 -0
  9. data/lib/app/console_app.rb +278 -0
  10. data/lib/app/dispatcher.rb +121 -0
  11. data/lib/app/menu.rb +104 -0
  12. data/lib/app/preflight_check.rb +134 -0
  13. data/lib/app/version.rb +10 -0
  14. data/lib/connectors/base/adapter.rb +119 -0
  15. data/lib/connectors/base/connector.rb +57 -0
  16. data/lib/connectors/base/custom_client.rb +111 -0
  17. data/lib/connectors/connector_status.rb +31 -0
  18. data/lib/connectors/crawler/scheduler.rb +32 -0
  19. data/lib/connectors/example/connector.rb +57 -0
  20. data/lib/connectors/example/example_attachments/first_attachment.txt +1 -0
  21. data/lib/connectors/example/example_attachments/second_attachment.txt +1 -0
  22. data/lib/connectors/example/example_attachments/third_attachment.txt +1 -0
  23. data/lib/connectors/gitlab/adapter.rb +50 -0
  24. data/lib/connectors/gitlab/connector.rb +67 -0
  25. data/lib/connectors/gitlab/custom_client.rb +44 -0
  26. data/lib/connectors/gitlab/extractor.rb +69 -0
  27. data/lib/connectors/mongodb/connector.rb +138 -0
  28. data/lib/connectors/registry.rb +52 -0
  29. data/lib/connectors/sync_status.rb +21 -0
  30. data/lib/connectors.rb +16 -0
  31. data/lib/connectors_app/// +13 -0
  32. data/lib/connectors_service.rb +24 -0
  33. data/lib/connectors_utility.rb +16 -0
  34. data/lib/core/configuration.rb +48 -0
  35. data/lib/core/connector_settings.rb +142 -0
  36. data/lib/core/elastic_connector_actions.rb +269 -0
  37. data/lib/core/heartbeat.rb +32 -0
  38. data/lib/core/native_scheduler.rb +24 -0
  39. data/lib/core/output_sink/base_sink.rb +33 -0
  40. data/lib/core/output_sink/combined_sink.rb +38 -0
  41. data/lib/core/output_sink/console_sink.rb +51 -0
  42. data/lib/core/output_sink/es_sink.rb +74 -0
  43. data/lib/core/output_sink.rb +13 -0
  44. data/lib/core/scheduler.rb +158 -0
  45. data/lib/core/single_scheduler.rb +29 -0
  46. data/lib/core/sync_job_runner.rb +111 -0
  47. data/lib/core.rb +16 -0
  48. data/lib/list_connectors.rb +22 -0
  49. data/lib/stubs/app_config.rb +35 -0
  50. data/lib/stubs/connectors/stats.rb +35 -0
  51. data/lib/stubs/service_type.rb +13 -0
  52. data/lib/utility/constants.rb +20 -0
  53. data/lib/utility/cron.rb +81 -0
  54. data/lib/utility/elasticsearch/index/language_data.yml +111 -0
  55. data/lib/utility/elasticsearch/index/mappings.rb +104 -0
  56. data/lib/utility/elasticsearch/index/text_analysis_settings.rb +226 -0
  57. data/lib/utility/environment.rb +33 -0
  58. data/lib/utility/errors.rb +132 -0
  59. data/lib/utility/es_client.rb +84 -0
  60. data/lib/utility/exception_tracking.rb +64 -0
  61. data/lib/utility/extension_mapping_util.rb +123 -0
  62. data/lib/utility/logger.rb +84 -0
  63. data/lib/utility/middleware/basic_auth.rb +27 -0
  64. data/lib/utility/middleware/bearer_auth.rb +27 -0
  65. data/lib/utility/middleware/restrict_hostnames.rb +73 -0
  66. data/lib/utility.rb +16 -0
  67. metadata +487 -0
@@ -0,0 +1,142 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/hash/indifferent_access'
10
+ require 'connectors/connector_status'
11
+ require 'core/elastic_connector_actions'
12
+ require 'utility'
13
+
14
+ module Core
15
+ class ConnectorSettings
16
+
17
+ DEFAULT_REQUEST_PIPELINE = 'ent-search-generic-ingestion'
18
+ DEFAULT_EXTRACT_BINARY_CONTENT = true
19
+ DEFAULT_REDUCE_WHITESPACE = true
20
+ DEFAULT_RUN_ML_INFERENCE = true
21
+
22
+ DEFAULT_PAGE_SIZE = 100
23
+
24
+ # Error Classes
25
+ class ConnectorNotFoundError < StandardError; end
26
+
27
+ def self.fetch_by_id(connector_id)
28
+ es_response = ElasticConnectorActions.get_connector(connector_id)
29
+ connectors_meta = ElasticConnectorActions.connectors_meta
30
+
31
+ raise ConnectorNotFoundError.new("Connector with id=#{connector_id} was not found.") unless es_response[:found]
32
+ new(es_response, connectors_meta)
33
+ end
34
+
35
+ def initialize(es_response, connectors_meta)
36
+ @elasticsearch_response = es_response.with_indifferent_access
37
+ @connectors_meta = connectors_meta.with_indifferent_access
38
+ end
39
+
40
+ def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
41
+ query = { term: { is_native: true } }
42
+ fetch_connectors_by_query(query, page_size)
43
+ end
44
+
45
+ def self.fetch_crawler_connectors(page_size = DEFAULT_PAGE_SIZE)
46
+ query = { term: { service_type: Utility::Constants::CRAWLER_SERVICE_TYPE } }
47
+ fetch_connectors_by_query(query, page_size)
48
+ end
49
+
50
+ def id
51
+ @elasticsearch_response[:_id]
52
+ end
53
+
54
+ def [](property_name)
55
+ # TODO: handle not found
56
+ @elasticsearch_response[:_source][property_name]
57
+ end
58
+
59
+ def index_name
60
+ self[:index_name]
61
+ end
62
+
63
+ def connector_status
64
+ self[:status]
65
+ end
66
+
67
+ def connector_status_allows_sync?
68
+ Connectors::ConnectorStatus::STATUSES_ALLOWING_SYNC.include?(connector_status)
69
+ end
70
+
71
+ def service_type
72
+ self[:service_type]
73
+ end
74
+
75
+ def configuration
76
+ self[:configuration]
77
+ end
78
+
79
+ def scheduling_settings
80
+ self[:scheduling]
81
+ end
82
+
83
+ def request_pipeline
84
+ return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
85
+ end
86
+
87
+ def extract_binary_content?
88
+ return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
89
+ end
90
+
91
+ def reduce_whitespace?
92
+ return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
93
+ end
94
+
95
+ def run_ml_inference?
96
+ return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
97
+ end
98
+
99
+ def formatted
100
+ properties = ["ID: #{id}"]
101
+ properties << "Service type: #{service_type}" if service_type
102
+ "connector (#{properties.join(', ')})"
103
+ end
104
+
105
+ def needs_service_type?
106
+ service_type.to_s.strip.empty?
107
+ end
108
+
109
+ def valid_index_name?
110
+ index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
111
+ end
112
+
113
+ private
114
+
115
+ def self.fetch_connectors_by_query(query, page_size)
116
+ connectors_meta = ElasticConnectorActions.connectors_meta
117
+
118
+ results = []
119
+ offset = 0
120
+ loop do
121
+ response = ElasticConnectorActions.search_connectors(query, page_size, offset)
122
+
123
+ hits = response['hits']['hits']
124
+ total = response['hits']['total']['value']
125
+ results += hits.map do |hit|
126
+ Core::ConnectorSettings.new(hit, connectors_meta)
127
+ end
128
+ break if results.size >= total
129
+ offset += hits.size
130
+ end
131
+
132
+ results
133
+ end
134
+
135
+ def return_if_present(*args)
136
+ args.each do |arg|
137
+ return arg unless arg.nil?
138
+ end
139
+ nil
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,269 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+ #
9
+ require 'active_support/core_ext/hash'
10
+ require 'connectors/connector_status'
11
+ require 'connectors/sync_status'
12
+ require 'utility'
13
+
14
+ module Core
15
+ class ElasticConnectorActions
16
+ class << self
17
+
18
+ def force_sync(connector_id)
19
+ update_connector_fields(connector_id, :scheduling => { :enabled => true }, :sync_now => true)
20
+ end
21
+
22
+ def create_connector(index_name, service_type)
23
+ body = {
24
+ :scheduling => { :enabled => true },
25
+ :index_name => index_name,
26
+ :service_type => service_type
27
+ }
28
+ response = client.index(:index => Utility::Constants::CONNECTORS_INDEX, :body => body)
29
+ response['_id']
30
+ end
31
+
32
+ def get_connector(connector_id)
33
+ client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
34
+ end
35
+
36
+ def connectors_meta
37
+ alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
38
+ index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
39
+ alias_mappings.dig(index, 'mappings', '_meta') || {}
40
+ end
41
+
42
+ def search_connectors(query, page_size, offset)
43
+ client.search(
44
+ :index => Utility::Constants::CONNECTORS_INDEX,
45
+ :ignore => 404,
46
+ :body => {
47
+ :size => page_size,
48
+ :from => offset,
49
+ :query => query,
50
+ :sort => ['name']
51
+ }
52
+ )
53
+ end
54
+
55
+ def update_connector_configuration(connector_id, configuration)
56
+ update_connector_fields(connector_id, :configuration => configuration)
57
+ end
58
+
59
+ def enable_connector_scheduling(connector_id, cron_expression)
60
+ payload = { :enabled => true, :interval => cron_expression }
61
+ update_connector_fields(connector_id, :scheduling => payload)
62
+ end
63
+
64
+ def disable_connector_scheduling(connector_id)
65
+ payload = { :enabled => false }
66
+ update_connector_fields(connector_id, :scheduling => payload)
67
+ end
68
+
69
+ def set_configurable_field(connector_id, field_name, label, value)
70
+ payload = { field_name => { :value => value, :label => label } }
71
+ update_connector_configuration(connector_id, payload)
72
+ end
73
+
74
+ def claim_job(connector_id)
75
+ update_connector_fields(connector_id,
76
+ :sync_now => false,
77
+ :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
78
+ :last_synced => Time.now)
79
+
80
+ body = {
81
+ :connector_id => connector_id,
82
+ :status => Connectors::SyncStatus::IN_PROGRESS,
83
+ :worker_hostname => Socket.gethostname,
84
+ :created_at => Time.now
85
+ }
86
+ job = client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
87
+
88
+ job['_id']
89
+ end
90
+
91
+ def update_connector_status(connector_id, status, error_message = nil)
92
+ if status == Connectors::ConnectorStatus::ERROR && error_message.nil?
93
+ raise ArgumentError, 'error_message is required when status is error'
94
+ end
95
+ body = {
96
+ :status => status,
97
+ :error => status == Connectors::ConnectorStatus::ERROR ? error_message : nil
98
+ }
99
+ update_connector_fields(connector_id, body)
100
+ end
101
+
102
+ def complete_sync(connector_id, job_id, status)
103
+ sync_status = status[:error] ? Connectors::SyncStatus::FAILED : Connectors::SyncStatus::COMPLETED
104
+
105
+ update_connector_fields(connector_id,
106
+ :last_sync_status => sync_status,
107
+ :last_sync_error => status[:error],
108
+ :error => status[:error],
109
+ :last_synced => Time.now,
110
+ :last_indexed_document_count => status[:indexed_document_count],
111
+ :last_deleted_document_count => status[:deleted_document_count])
112
+
113
+ body = {
114
+ :doc => {
115
+ :status => sync_status,
116
+ :completed_at => Time.now
117
+ }.merge(status)
118
+ }
119
+ client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
120
+ end
121
+
122
+ def fetch_document_ids(index_name)
123
+ page_size = 1000
124
+ result = []
125
+ begin
126
+ pit_id = client.open_point_in_time(:index => index_name, :keep_alive => '1m', :expand_wildcards => 'all')['id']
127
+ body = {
128
+ :query => { :match_all => {} },
129
+ :sort => [{ :id => { :order => :asc } }],
130
+ :pit => {
131
+ :id => pit_id,
132
+ :keep_alive => '1m'
133
+ },
134
+ :size => page_size,
135
+ :_source => false
136
+ }
137
+ loop do
138
+ response = client.search(:body => body)
139
+ hits = response['hits']['hits']
140
+
141
+ ids = hits.map { |h| h['_id'] }
142
+ result += ids
143
+ break if hits.size < page_size
144
+
145
+ body[:search_after] = hits.last['sort']
146
+ body[:pit][:id] = response['pit_id']
147
+ end
148
+ ensure
149
+ client.close_point_in_time(:index => index_name, :body => { :id => pit_id })
150
+ end
151
+
152
+ result
153
+ end
154
+
155
+ def ensure_content_index_exists(index_name, use_icu_locale = false, language_code = nil)
156
+ settings = Utility::Elasticsearch::Index::TextAnalysisSettings.new(:language_code => language_code, :analysis_icu => use_icu_locale).to_h
157
+ mappings = Utility::Elasticsearch::Index::Mappings.default_text_fields_mappings(:connectors_index => true)
158
+
159
+ body_payload = { settings: settings, mappings: mappings }
160
+ ensure_index_exists(index_name, body_payload)
161
+ end
162
+
163
+ def ensure_index_exists(index_name, body = {})
164
+ if client.indices.exists?(:index => index_name)
165
+ return unless body[:mappings]
166
+ Utility::Logger.debug("Index #{index_name} already exists. Checking mappings...")
167
+ Utility::Logger.debug("New mappings: #{body[:mappings]}")
168
+ response = client.indices.get_mapping(:index => index_name)
169
+ existing = response[index_name]['mappings']
170
+ if existing.empty?
171
+ Utility::Logger.debug("Index #{index_name} has no mappings. Adding mappings...")
172
+ client.indices.put_mapping(:index => index_name, :body => body[:mappings], :expand_wildcards => 'all')
173
+ Utility::Logger.debug("Index #{index_name} mappings added.")
174
+ else
175
+ Utility::Logger.debug("Index #{index_name} already has mappings: #{existing}. Skipping...")
176
+ end
177
+ else
178
+ client.indices.create(:index => index_name, :body => body)
179
+ Utility::Logger.debug("Created index #{index_name}")
180
+ end
181
+ end
182
+
183
+ def system_index_body(alias_name: nil, mappings: nil)
184
+ body = {
185
+ :settings => {
186
+ :index => {
187
+ :hidden => true,
188
+ :number_of_replicas => 0,
189
+ :auto_expand_replicas => '0-5'
190
+ }
191
+ }
192
+ }
193
+ body[:aliases] = { alias_name => { :is_write_index => true } } unless alias_name.nil? || alias_name.empty?
194
+ body[:mappings] = mappings unless mappings.nil?
195
+ body
196
+ end
197
+
198
+ # DO NOT USE this method
199
+ # Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
200
+ def ensure_connectors_index_exists
201
+ mappings = {
202
+ :properties => {
203
+ :api_key_id => { :type => :keyword },
204
+ :configuration => { :type => :object },
205
+ :error => { :type => :text },
206
+ :index_name => { :type => :keyword },
207
+ :last_seen => { :type => :date },
208
+ :last_synced => { :type => :date },
209
+ :last_indexed_document_count => { :type => :integer },
210
+ :last_deleted_document_count => { :type => :integer },
211
+ :scheduling => {
212
+ :properties => {
213
+ :enabled => { :type => :boolean },
214
+ :interval => { :type => :text }
215
+ }
216
+ },
217
+ :service_type => { :type => :keyword },
218
+ :status => { :type => :keyword },
219
+ :sync_error => { :type => :text },
220
+ :sync_now => { :type => :boolean },
221
+ :sync_status => { :type => :keyword }
222
+ }
223
+ }
224
+ ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
225
+ end
226
+
227
+ # DO NOT USE this method
228
+ # Creation of job index should be handled by Kibana, this method is only used by ftest.rb
229
+ def ensure_job_index_exists
230
+ mappings = {
231
+ :properties => {
232
+ :connector_id => { :type => :keyword },
233
+ :status => { :type => :keyword },
234
+ :error => { :type => :text },
235
+ :worker_hostname => { :type => :keyword },
236
+ :indexed_document_count => { :type => :integer },
237
+ :deleted_document_count => { :type => :integer },
238
+ :created_at => { :type => :date },
239
+ :completed_at => { :type => :date }
240
+ }
241
+ }
242
+ ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
243
+ end
244
+
245
+ def update_connector_fields(connector_id, doc = {})
246
+ return if doc.empty?
247
+ client.update(
248
+ :index => Utility::Constants::CONNECTORS_INDEX,
249
+ :id => connector_id,
250
+ :body => { :doc => doc },
251
+ :refresh => true,
252
+ :retry_on_conflict => 3
253
+ )
254
+ end
255
+
256
+ private
257
+
258
+ def client
259
+ @client ||= Utility::EsClient.new(App::Config[:elasticsearch])
260
+ end
261
+
262
+ def get_latest_index_in_alias(alias_name, indicies)
263
+ index_versions = indicies.map { |index| index.gsub("#{alias_name}-v", '').to_i }
264
+ index_version = index_versions.max # gets the largest suffix number
265
+ "#{alias_name}-v#{index_version}"
266
+ end
267
+ end
268
+ end
269
+ end
@@ -0,0 +1,32 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/connector_status'
10
+ require 'connectors/registry'
11
+ require 'core/connector_settings'
12
+ require 'core/elastic_connector_actions'
13
+
14
+ module Core
15
+ class Heartbeat
16
+ class << self
17
+ def send(connector_settings)
18
+ doc = {
19
+ :last_seen => Time.now
20
+ }
21
+ if connector_settings.connector_status_allows_sync?
22
+ connector_instance = Connectors::REGISTRY.connector(connector_settings.service_type, connector_settings.configuration)
23
+ doc[:status] = connector_instance.is_healthy? ? Connectors::ConnectorStatus::CONNECTED : Connectors::ConnectorStatus::ERROR
24
+ message = "Health check for 3d party service failed for connector [#{connector_settings.id}], service type [#{connector_settings.service_type}]. Check the application logs for more information."
25
+ doc[:error] = doc[:status] == Connectors::ConnectorStatus::ERROR ? message : nil
26
+ end
27
+
28
+ Core::ElasticConnectorActions.update_connector_fields(connector_settings.id, doc)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,24 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/scheduler'
10
+ require 'core/connector_settings'
11
+ require 'core/elastic_connector_actions'
12
+ require 'utility/logger'
13
+ require 'utility/exception_tracking'
14
+
15
+ module Core
16
+ class NativeScheduler < Core::Scheduler
17
+ def connector_settings
18
+ Core::ConnectorSettings.fetch_native_connectors || []
19
+ rescue StandardError => e
20
+ Utility::ExceptionTracking.log_exception(e, 'Could not retrieve native connectors due to unexpected error.')
21
+ []
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,33 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Core
10
+ module OutputSink
11
+ class BaseSink
12
+ def ingest(_document)
13
+ raise 'not implemented'
14
+ end
15
+
16
+ def ingest_multiple(_documents)
17
+ raise 'not implemented'
18
+ end
19
+
20
+ def delete(_id)
21
+ raise 'not implemented'
22
+ end
23
+
24
+ def delete_multiple(_ids)
25
+ raise 'not implemented'
26
+ end
27
+
28
+ def flush(_size: nil)
29
+ raise 'not implemented'
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,38 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/output_sink/base_sink'
10
+ require 'utility/logger'
11
+
12
+ module Core::OutputSink
13
+ class CombinedSink < Core::OutputSink::BaseSink
14
+ def initialize(sinks = [])
15
+ @sinks = sinks
16
+ end
17
+
18
+ def ingest(document)
19
+ @sinks.each { |sink| sink.ingest(document) }
20
+ end
21
+
22
+ def flush(size: nil)
23
+ @sinks.each { |sink| sink.flush(size: size) }
24
+ end
25
+
26
+ def ingest_multiple(documents)
27
+ @sinks.each { |sink| sink.ingest_multiple(documents) }
28
+ end
29
+
30
+ def delete(id)
31
+ @sinks.each { |sink| sink.delete(id) }
32
+ end
33
+
34
+ def delete_multiple(ids)
35
+ @sinks.each { |sink| sink.delete_multiple(ids) }
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,51 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/output_sink'
10
+ require 'utility/logger'
11
+
12
+ module Core::OutputSink
13
+ class ConsoleSink < Core::OutputSink::BaseSink
14
+ def ingest(document)
15
+ print_header 'Got a single document:'
16
+ puts document
17
+ end
18
+
19
+ def flush(size: nil)
20
+ print_header 'Flushing'
21
+ puts "Flush size: #{size}"
22
+ end
23
+
24
+ def ingest_multiple(documents)
25
+ print_header 'Got multiple documents:'
26
+ puts documents
27
+ end
28
+
29
+ def delete(id)
30
+ print_header "Deleting single id: #{id}"
31
+ puts id
32
+ end
33
+
34
+ def delete_multiple(ids)
35
+ print_header "Deleting several ids: #{ids}"
36
+ puts ids
37
+ end
38
+
39
+ private
40
+
41
+ def print_delim
42
+ puts '----------------------------------------------------'
43
+ end
44
+
45
+ def print_header(header)
46
+ print_delim
47
+ puts header
48
+ print_delim
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,74 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/numeric/time'
10
+ require 'app/config'
11
+ require 'core/output_sink/base_sink'
12
+ require 'utility/es_client'
13
+ require 'utility/logger'
14
+
15
+ module Core::OutputSink
16
+ class EsSink < Core::OutputSink::BaseSink
17
+ def initialize(index_name, request_pipeline, flush_threshold = 50)
18
+ super()
19
+ @client = Utility::EsClient.new(App::Config[:elasticsearch])
20
+ @index_name = index_name
21
+ @request_pipeline = request_pipeline
22
+ @operation_queue = []
23
+ @flush_threshold = flush_threshold
24
+ end
25
+
26
+ def ingest(document)
27
+ return if document.blank?
28
+
29
+ @operation_queue << { :index => { :_index => index_name, :_id => document[:id], :data => document } }
30
+ flush if ready_to_flush?
31
+ end
32
+
33
+ def delete(doc_id)
34
+ return if doc_id.nil?
35
+
36
+ @operation_queue << { :delete => { :_index => index_name, :_id => doc_id } }
37
+ flush if ready_to_flush?
38
+ end
39
+
40
+ def flush(size: nil)
41
+ flush_size = size || @flush_threshold
42
+
43
+ while @operation_queue.any?
44
+ data_to_flush = @operation_queue.pop(flush_size)
45
+ send_data(data_to_flush)
46
+ end
47
+ end
48
+
49
+ def ingest_multiple(documents)
50
+ Utility::Logger.debug "Enqueueing #{documents&.size} documents to the index #{index_name}."
51
+ documents.each { |doc| ingest(doc) }
52
+ end
53
+
54
+ def delete_multiple(ids)
55
+ Utility::Logger.debug "Enqueueing #{ids&.size} ids to delete from the index #{index_name}."
56
+ ids.each { |id| delete(id) }
57
+ end
58
+
59
+ private
60
+
61
+ attr_accessor :index_name
62
+
63
+ def send_data(ops)
64
+ return if ops.empty?
65
+
66
+ @client.bulk(:body => ops, :pipeline => @request_pipeline)
67
+ Utility::Logger.info "Applied #{ops.size} upsert/delete operations to the index #{index_name}."
68
+ end
69
+
70
+ def ready_to_flush?
71
+ @operation_queue.size >= @flush_threshold
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,13 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/output_sink/es_sink'
10
+ require 'core/output_sink/console_sink'
11
+ require 'core/output_sink/combined_sink'
12
+
13
+ module Core::OutputSink; end