connectors_service 8.5.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +93 -0
- data/NOTICE.txt +2 -0
- data/bin/connectors_service +4 -0
- data/bin/list_connectors +4 -0
- data/config/connectors.yml +25 -0
- data/lib/app/app.rb +25 -0
- data/lib/app/config.rb +132 -0
- data/lib/app/console_app.rb +278 -0
- data/lib/app/dispatcher.rb +121 -0
- data/lib/app/menu.rb +104 -0
- data/lib/app/preflight_check.rb +134 -0
- data/lib/app/version.rb +10 -0
- data/lib/connectors/base/adapter.rb +119 -0
- data/lib/connectors/base/connector.rb +57 -0
- data/lib/connectors/base/custom_client.rb +111 -0
- data/lib/connectors/connector_status.rb +31 -0
- data/lib/connectors/crawler/scheduler.rb +32 -0
- data/lib/connectors/example/connector.rb +57 -0
- data/lib/connectors/example/example_attachments/first_attachment.txt +1 -0
- data/lib/connectors/example/example_attachments/second_attachment.txt +1 -0
- data/lib/connectors/example/example_attachments/third_attachment.txt +1 -0
- data/lib/connectors/gitlab/adapter.rb +50 -0
- data/lib/connectors/gitlab/connector.rb +67 -0
- data/lib/connectors/gitlab/custom_client.rb +44 -0
- data/lib/connectors/gitlab/extractor.rb +69 -0
- data/lib/connectors/mongodb/connector.rb +138 -0
- data/lib/connectors/registry.rb +52 -0
- data/lib/connectors/sync_status.rb +21 -0
- data/lib/connectors.rb +16 -0
- data/lib/connectors_app/// +13 -0
- data/lib/connectors_service.rb +24 -0
- data/lib/connectors_utility.rb +16 -0
- data/lib/core/configuration.rb +48 -0
- data/lib/core/connector_settings.rb +142 -0
- data/lib/core/elastic_connector_actions.rb +269 -0
- data/lib/core/heartbeat.rb +32 -0
- data/lib/core/native_scheduler.rb +24 -0
- data/lib/core/output_sink/base_sink.rb +33 -0
- data/lib/core/output_sink/combined_sink.rb +38 -0
- data/lib/core/output_sink/console_sink.rb +51 -0
- data/lib/core/output_sink/es_sink.rb +74 -0
- data/lib/core/output_sink.rb +13 -0
- data/lib/core/scheduler.rb +158 -0
- data/lib/core/single_scheduler.rb +29 -0
- data/lib/core/sync_job_runner.rb +111 -0
- data/lib/core.rb +16 -0
- data/lib/list_connectors.rb +22 -0
- data/lib/stubs/app_config.rb +35 -0
- data/lib/stubs/connectors/stats.rb +35 -0
- data/lib/stubs/service_type.rb +13 -0
- data/lib/utility/constants.rb +20 -0
- data/lib/utility/cron.rb +81 -0
- data/lib/utility/elasticsearch/index/language_data.yml +111 -0
- data/lib/utility/elasticsearch/index/mappings.rb +104 -0
- data/lib/utility/elasticsearch/index/text_analysis_settings.rb +226 -0
- data/lib/utility/environment.rb +33 -0
- data/lib/utility/errors.rb +132 -0
- data/lib/utility/es_client.rb +84 -0
- data/lib/utility/exception_tracking.rb +64 -0
- data/lib/utility/extension_mapping_util.rb +123 -0
- data/lib/utility/logger.rb +84 -0
- data/lib/utility/middleware/basic_auth.rb +27 -0
- data/lib/utility/middleware/bearer_auth.rb +27 -0
- data/lib/utility/middleware/restrict_hostnames.rb +73 -0
- data/lib/utility.rb +16 -0
- metadata +487 -0
@@ -0,0 +1,158 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'time'
|
10
|
+
require 'fugit'
|
11
|
+
require 'core/connector_settings'
|
12
|
+
require 'utility/cron'
|
13
|
+
require 'utility/logger'
|
14
|
+
require 'utility/exception_tracking'
|
15
|
+
|
16
|
+
module Core
|
17
|
+
class Scheduler
|
18
|
+
def initialize(poll_interval, heartbeat_interval)
|
19
|
+
@poll_interval = poll_interval
|
20
|
+
@heartbeat_interval = heartbeat_interval
|
21
|
+
@is_shutting_down = false
|
22
|
+
end
|
23
|
+
|
24
|
+
def connector_settings
|
25
|
+
raise 'Not implemented'
|
26
|
+
end
|
27
|
+
|
28
|
+
def when_triggered
|
29
|
+
loop do
|
30
|
+
connector_settings.each do |cs|
|
31
|
+
if sync_triggered?(cs)
|
32
|
+
yield cs, :sync
|
33
|
+
end
|
34
|
+
if heartbeat_triggered?(cs)
|
35
|
+
yield cs, :heartbeat
|
36
|
+
end
|
37
|
+
if configuration_triggered?(cs)
|
38
|
+
yield cs, :configuration
|
39
|
+
end
|
40
|
+
end
|
41
|
+
if @is_shutting_down
|
42
|
+
break
|
43
|
+
end
|
44
|
+
rescue StandardError => e
|
45
|
+
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
46
|
+
ensure
|
47
|
+
if @poll_interval > 0 && !@is_shutting_down
|
48
|
+
Utility::Logger.info("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
49
|
+
sleep(@poll_interval)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def shutdown
|
55
|
+
Utility::Logger.info("Shutting down scheduler #{self.class.name}.")
|
56
|
+
@is_shutting_down = true
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def sync_triggered?(connector_settings)
|
62
|
+
return false unless connector_registered?(connector_settings.service_type)
|
63
|
+
|
64
|
+
unless connector_settings.valid_index_name?
|
65
|
+
Utility::Logger.info("The index name of #{connector_settings.formatted} is invalid.")
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
|
69
|
+
unless connector_settings.connector_status_allows_sync?
|
70
|
+
Utility::Logger.info("#{connector_settings.formatted.capitalize} is in status \"#{connector_settings.connector_status}\" and won't sync yet. Connector needs to be in one of the following statuses: #{Connectors::ConnectorStatus::STATUSES_ALLOWING_SYNC} to run.")
|
71
|
+
|
72
|
+
return false
|
73
|
+
end
|
74
|
+
|
75
|
+
# Sync when sync_now flag is true for the connector
|
76
|
+
if connector_settings[:sync_now] == true
|
77
|
+
Utility::Logger.info("#{connector_settings.formatted.capitalize} is manually triggered to sync now.")
|
78
|
+
return true
|
79
|
+
end
|
80
|
+
|
81
|
+
# Don't sync if sync is explicitly disabled
|
82
|
+
scheduling_settings = connector_settings.scheduling_settings
|
83
|
+
unless scheduling_settings.present? && scheduling_settings[:enabled] == true
|
84
|
+
Utility::Logger.info("#{connector_settings.formatted.capitalize} scheduling is disabled.")
|
85
|
+
return false
|
86
|
+
end
|
87
|
+
|
88
|
+
# We want to sync when sync never actually happened
|
89
|
+
last_synced = connector_settings[:last_synced]
|
90
|
+
if last_synced.nil? || last_synced.empty?
|
91
|
+
Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
|
92
|
+
return true
|
93
|
+
end
|
94
|
+
|
95
|
+
current_schedule = scheduling_settings[:interval]
|
96
|
+
|
97
|
+
# Don't sync if there is no actual scheduling interval
|
98
|
+
if current_schedule.nil? || current_schedule.empty?
|
99
|
+
Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
|
100
|
+
return false
|
101
|
+
end
|
102
|
+
|
103
|
+
current_schedule = begin
|
104
|
+
Utility::Cron.quartz_to_crontab(current_schedule)
|
105
|
+
rescue StandardError => e
|
106
|
+
Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
|
107
|
+
return false
|
108
|
+
end
|
109
|
+
cron_parser = Fugit::Cron.parse(current_schedule)
|
110
|
+
|
111
|
+
# Don't sync if the scheduling interval is non-parsable
|
112
|
+
unless cron_parser
|
113
|
+
Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
|
114
|
+
return false
|
115
|
+
end
|
116
|
+
|
117
|
+
next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
|
118
|
+
|
119
|
+
# Sync if next trigger for the connector is in past
|
120
|
+
if next_trigger_time < Time.now
|
121
|
+
Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
|
122
|
+
return true
|
123
|
+
end
|
124
|
+
|
125
|
+
false
|
126
|
+
end
|
127
|
+
|
128
|
+
def heartbeat_triggered?(connector_settings)
|
129
|
+
return false unless connector_registered?(connector_settings.service_type)
|
130
|
+
|
131
|
+
last_seen = connector_settings[:last_seen]
|
132
|
+
return true if last_seen.nil? || last_seen.empty?
|
133
|
+
last_seen = begin
|
134
|
+
Time.parse(last_seen)
|
135
|
+
rescue StandardError
|
136
|
+
Utility::Logger.warn("Unable to parse last_seen #{last_seen}")
|
137
|
+
nil
|
138
|
+
end
|
139
|
+
return true unless last_seen
|
140
|
+
last_seen + @heartbeat_interval < Time.now
|
141
|
+
end
|
142
|
+
|
143
|
+
def configuration_triggered?(connector_settings)
|
144
|
+
return false unless connector_registered?(connector_settings.service_type)
|
145
|
+
|
146
|
+
connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
|
147
|
+
end
|
148
|
+
|
149
|
+
def connector_registered?(service_type)
|
150
|
+
if Connectors::REGISTRY.registered?(service_type)
|
151
|
+
true
|
152
|
+
else
|
153
|
+
Utility::Logger.info("The service type (#{service_type}) is not supported.")
|
154
|
+
false
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/scheduler'
|
10
|
+
require 'core/connector_settings'
|
11
|
+
require 'utility/logger'
|
12
|
+
require 'utility/exception_tracking'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class SingleScheduler < Core::Scheduler
|
16
|
+
def initialize(connector_id, poll_interval, heartbeat_interval)
|
17
|
+
super(poll_interval, heartbeat_interval)
|
18
|
+
@connector_id = connector_id
|
19
|
+
end
|
20
|
+
|
21
|
+
def connector_settings
|
22
|
+
connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id)
|
23
|
+
[connector_settings]
|
24
|
+
rescue StandardError => e
|
25
|
+
Utility::ExceptionTracking.log_exception(e, "Could not retrieve the connector by id #{@connector_id} due to unexpected error.")
|
26
|
+
[]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors/connector_status'
|
10
|
+
require 'connectors/registry'
|
11
|
+
require 'core/output_sink'
|
12
|
+
require 'utility'
|
13
|
+
|
14
|
+
module Core
|
15
|
+
class IncompatibleConfigurableFieldsError < StandardError
|
16
|
+
def initialize(service_type, expected_fields, actual_fields)
|
17
|
+
super("Connector of service_type '#{service_type}' expected configurable fields: #{expected_fields}, actual stored fields: #{actual_fields}")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class SyncJobRunner
|
22
|
+
def initialize(connector_settings)
|
23
|
+
@connector_settings = connector_settings
|
24
|
+
@sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
|
25
|
+
@connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
|
26
|
+
@connector_instance = Connectors::REGISTRY.connector(connector_settings.service_type, connector_settings.configuration)
|
27
|
+
@status = {
|
28
|
+
:indexed_document_count => 0,
|
29
|
+
:deleted_document_count => 0,
|
30
|
+
:error => nil
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
def execute
|
35
|
+
validate_configuration!
|
36
|
+
do_sync!
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def do_sync!
|
42
|
+
Utility::Logger.info("Starting sync for connector #{@connector_settings.id}.")
|
43
|
+
|
44
|
+
job_id = ElasticConnectorActions.claim_job(@connector_settings.id)
|
45
|
+
|
46
|
+
unless job_id.present?
|
47
|
+
Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
|
48
|
+
return
|
49
|
+
end
|
50
|
+
|
51
|
+
begin
|
52
|
+
Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
|
53
|
+
|
54
|
+
@connector_instance.do_health_check!
|
55
|
+
|
56
|
+
incoming_ids = []
|
57
|
+
existing_ids = ElasticConnectorActions.fetch_document_ids(@connector_settings.index_name)
|
58
|
+
|
59
|
+
Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
|
60
|
+
|
61
|
+
@connector_instance.yield_documents do |document|
|
62
|
+
document = add_ingest_metadata(document)
|
63
|
+
@sink.ingest(document)
|
64
|
+
incoming_ids << document[:id]
|
65
|
+
@status[:indexed_document_count] += 1
|
66
|
+
end
|
67
|
+
|
68
|
+
ids_to_delete = existing_ids - incoming_ids.uniq
|
69
|
+
|
70
|
+
Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
|
71
|
+
|
72
|
+
ids_to_delete.each do |id|
|
73
|
+
@sink.delete(id)
|
74
|
+
@status[:deleted_document_count] += 1
|
75
|
+
end
|
76
|
+
|
77
|
+
@sink.flush
|
78
|
+
rescue StandardError => e
|
79
|
+
@status[:error] = e.message
|
80
|
+
Utility::ExceptionTracking.log_exception(e)
|
81
|
+
ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
|
82
|
+
ensure
|
83
|
+
Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
|
84
|
+
Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
|
85
|
+
|
86
|
+
ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
|
87
|
+
|
88
|
+
if @status[:error]
|
89
|
+
Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error #{@status[:error]}.")
|
90
|
+
else
|
91
|
+
Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def add_ingest_metadata(document)
|
97
|
+
document.tap do |it|
|
98
|
+
it['_extract_binary_content'] = @connector_settings.extract_binary_content? if @connector_settings.extract_binary_content?
|
99
|
+
it['_reduce_whitespace'] = @connector_settings.reduce_whitespace? if @connector_settings.reduce_whitespace?
|
100
|
+
it['_run_ml_inference'] = @connector_settings.run_ml_inference? if @connector_settings.run_ml_inference?
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def validate_configuration!
|
105
|
+
expected_fields = @connector_class.configurable_fields.keys.map(&:to_s).sort
|
106
|
+
actual_fields = @connector_settings.configuration.keys.map(&:to_s).sort
|
107
|
+
|
108
|
+
raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
data/lib/core.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/configuration'
|
10
|
+
require 'core/connector_settings'
|
11
|
+
require 'core/elastic_connector_actions'
|
12
|
+
require 'core/heartbeat'
|
13
|
+
require 'core/scheduler'
|
14
|
+
require 'core/single_scheduler'
|
15
|
+
require 'core/native_scheduler'
|
16
|
+
require 'core/sync_job_runner'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors/registry'
|
10
|
+
require 'utility'
|
11
|
+
|
12
|
+
class ListConnectors
|
13
|
+
def self.run!
|
14
|
+
Utility::Environment.set_execution_environment(App::Config) do
|
15
|
+
Utility::Logger.info('Registered connectors:')
|
16
|
+
Connectors::REGISTRY.registered_connectors.each do |connector|
|
17
|
+
Utility::Logger.info("- #{Connectors::REGISTRY.connector_class(connector).display_name}")
|
18
|
+
end
|
19
|
+
Utility::Logger.info('Bye')
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
class AppConfig
|
8
|
+
class << self
|
9
|
+
def connectors
|
10
|
+
{
|
11
|
+
'transient_server_error_retry_delay_minutes' => 5
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
def content_source_sync_max_errors
|
16
|
+
1000
|
17
|
+
end
|
18
|
+
|
19
|
+
def content_source_sync_max_consecutive_errors
|
20
|
+
10
|
21
|
+
end
|
22
|
+
|
23
|
+
def content_source_sync_max_error_ratio
|
24
|
+
0.15
|
25
|
+
end
|
26
|
+
|
27
|
+
def content_source_sync_error_ratio_window_size
|
28
|
+
100
|
29
|
+
end
|
30
|
+
|
31
|
+
def content_source_sync_thumbnails_enabled?
|
32
|
+
true
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/inflector'
|
10
|
+
|
11
|
+
module Connectors
|
12
|
+
module Stats
|
13
|
+
def self.measure(_key, _value = nil, &block)
|
14
|
+
block.call
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.increment(key, value = 1)
|
18
|
+
# no op
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.prefix_key(key)
|
22
|
+
"connectors.#{key}"
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.class_key(klass, deconstantize = true)
|
26
|
+
name = klass.name
|
27
|
+
# Changes Connectors::GoogleDrive::Adapter to Connectors::GoogleDrive
|
28
|
+
name = ActiveSupport::Inflector.deconstantize(name) if deconstantize
|
29
|
+
# Changes Connectors::GoogleDrive to GoogleDrive
|
30
|
+
name = ActiveSupport::Inflector.demodulize(name)
|
31
|
+
# Changes GoogleDrive to google_drive
|
32
|
+
ActiveSupport::Inflector.underscore(name)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
class ServiceType
|
10
|
+
def classify
|
11
|
+
'classify'
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Constants
|
11
|
+
THUMBNAIL_FIELDS = %w[_thumbnail_80x100 _thumbnail_310x430].freeze
|
12
|
+
SUBEXTRACTOR_RESERVED_FIELDS = %w[_subextracted_as_of _subextracted_version].freeze
|
13
|
+
ALLOW_FIELD = '_allow_permissions'
|
14
|
+
DENY_FIELD = '_deny_permissions'
|
15
|
+
CONNECTORS_INDEX = '.elastic-connectors'
|
16
|
+
JOB_INDEX = '.elastic-connectors-sync-jobs'
|
17
|
+
CONTENT_INDEX_PREFIX = 'search-'
|
18
|
+
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
|
+
end
|
20
|
+
end
|
data/lib/utility/cron.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
# taken from https://regex101.com/r/cU7zG2/1
|
13
|
+
# previous regexp allowed days of the week as [0-6], but it's not correct because the Kibana scheduler
|
14
|
+
# is using [1-7] for days of the week, aligned with the Quartz scheduler: see http://www.quartz-scheduler.org/documentation/2.4.0-SNAPSHOT/tutorials/tutorial-lesson-06.html
|
15
|
+
# But just replacing with [1-7] would also be incorrect, since according to the Cron spec, the days of the week
|
16
|
+
# are 1-6 for Monday-Saturday, and 0 or 7 for Sunday, 7 being a non-standard but still widely used. So, we need to
|
17
|
+
# allow for 0-7.
|
18
|
+
CRON_REGEXP = /^\s*($|#|\w+\s*=|(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?(?:,(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?)*)\s+(\?|\*|(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?(?:,(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?)*)\s+(\?|\*|(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?(?:,(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?)*|\?|\*|(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?(?:,(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?)*)\s+(\?|\*|(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?(?:,(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?)*|\?|\*|(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?(?:,(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?)*)(|\s)+(\?|\*|(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?(?:,(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?)*))$/
|
19
|
+
|
20
|
+
# see https://github.com/quartz-scheduler/quartz/blob/master/quartz-core/src/main/java/org/quartz/CronExpression.java
|
21
|
+
module Cron
|
22
|
+
def self.check(expr)
|
23
|
+
raise StandardError.new("Unsupported expression #{expr} with #") if expr.include?('#')
|
24
|
+
raise StandardError.new("Unsupported expression #{expr} with L") if expr.include?('L')
|
25
|
+
raise StandardError.new("Unsupported expression #{expr} with W") if expr.include?('W') && !expr.include?('WED')
|
26
|
+
|
27
|
+
expr
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.quartz_to_crontab(expression)
|
31
|
+
@seconds = '*'
|
32
|
+
@minutes = '*'
|
33
|
+
@hours = '*'
|
34
|
+
@day_of_month = '*'
|
35
|
+
@month = '*'
|
36
|
+
@day_of_week = '*'
|
37
|
+
@year = '*'
|
38
|
+
|
39
|
+
# ? is not supported
|
40
|
+
converted_expression = expression.tr('?', '*')
|
41
|
+
|
42
|
+
matched = false
|
43
|
+
converted_expression.match(CRON_REGEXP) { |m|
|
44
|
+
@seconds = m[2]
|
45
|
+
@minutes = m[3]
|
46
|
+
@hours = m[4]
|
47
|
+
@day_of_month = check(m[5])
|
48
|
+
@month = check(m[6])
|
49
|
+
@day_of_week = scheduler_dow_to_crontab(check(m[7])).to_s
|
50
|
+
@year = m[9]
|
51
|
+
matched = true
|
52
|
+
}
|
53
|
+
|
54
|
+
raise StandardError.new("Unknown format #{expression}") unless matched
|
55
|
+
|
56
|
+
# Unix cron has five: minute, hour, day, month, and dayofweek
|
57
|
+
# Quartz adds seconds and year
|
58
|
+
converted_expression = "#{@minutes} #{@hours} #{@day_of_month} #{@month} #{@day_of_week}"
|
59
|
+
|
60
|
+
Utility::Logger.debug("Converted Quartz Cron expression '#{expression}' to Standard Cron Expression '#{converted_expression}'")
|
61
|
+
|
62
|
+
converted_expression
|
63
|
+
end
|
64
|
+
|
65
|
+
# As described above, Quartz uses 1-7 for days of the week, starting with Sunday,
|
66
|
+
# while Unix cron uses 0-6, starting with Monday, and also 7 as an extra non-standard index for Sunday.
|
67
|
+
# (see https://en.wikipedia.org/wiki/Cron for more details)
|
68
|
+
# This means that we need to shift the Quartz day of week that are between 1 and 7 by minus one, but we also allow 0
|
69
|
+
# in case it's not a quartz expression but already the cron standard.
|
70
|
+
# See also the code in connectors-python that does the same thing: https://github.com/elastic/connectors-python/blob/main/connectors/quartz.py
|
71
|
+
def self.scheduler_dow_to_crontab(day)
|
72
|
+
unless /\d/.match?(day)
|
73
|
+
return day
|
74
|
+
end
|
75
|
+
if day.to_i <= 0
|
76
|
+
return day
|
77
|
+
end
|
78
|
+
day.to_i - 1
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
---
|
2
|
+
da:
|
3
|
+
name: Danish
|
4
|
+
stemmer: danish
|
5
|
+
stop_words: _danish_
|
6
|
+
de:
|
7
|
+
name: German
|
8
|
+
stemmer: light_german
|
9
|
+
stop_words: _german_
|
10
|
+
en:
|
11
|
+
name: English
|
12
|
+
stemmer: light_english
|
13
|
+
stop_words: _english_
|
14
|
+
es:
|
15
|
+
name: Spanish
|
16
|
+
stemmer: light_spanish
|
17
|
+
stop_words: _spanish_
|
18
|
+
fr:
|
19
|
+
name: French
|
20
|
+
stemmer: light_french
|
21
|
+
stop_words: _french_
|
22
|
+
custom_filter_definitions:
|
23
|
+
fr-elision:
|
24
|
+
type: elision
|
25
|
+
articles:
|
26
|
+
- l
|
27
|
+
- m
|
28
|
+
- t
|
29
|
+
- qu
|
30
|
+
- n
|
31
|
+
- s
|
32
|
+
- j
|
33
|
+
- d
|
34
|
+
- c
|
35
|
+
- jusqu
|
36
|
+
- quoiqu
|
37
|
+
- lorsqu
|
38
|
+
- puisqu
|
39
|
+
articles_case: true
|
40
|
+
prepended_filters:
|
41
|
+
- fr-elision
|
42
|
+
it:
|
43
|
+
name: Italian
|
44
|
+
stemmer: light_italian
|
45
|
+
stop_words: _italian_
|
46
|
+
custom_filter_definitions:
|
47
|
+
it-elision:
|
48
|
+
type: elision
|
49
|
+
articles:
|
50
|
+
- c
|
51
|
+
- l
|
52
|
+
- all
|
53
|
+
- dall
|
54
|
+
- dell
|
55
|
+
- nell
|
56
|
+
- sull
|
57
|
+
- coll
|
58
|
+
- pell
|
59
|
+
- gl
|
60
|
+
- agl
|
61
|
+
- dagl
|
62
|
+
- degl
|
63
|
+
- negl
|
64
|
+
- sugl
|
65
|
+
- un
|
66
|
+
- m
|
67
|
+
- t
|
68
|
+
- s
|
69
|
+
- v
|
70
|
+
- d
|
71
|
+
articles_case: true
|
72
|
+
prepended_filters:
|
73
|
+
- it-elision
|
74
|
+
ja:
|
75
|
+
name: Japanese
|
76
|
+
stemmer: light_english
|
77
|
+
stop_words: _english_
|
78
|
+
postpended_filters:
|
79
|
+
- cjk_bigram
|
80
|
+
ko:
|
81
|
+
name: Korean
|
82
|
+
stemmer: light_english
|
83
|
+
stop_words: _english_
|
84
|
+
postpended_filters:
|
85
|
+
- cjk_bigram
|
86
|
+
nl:
|
87
|
+
name: Dutch
|
88
|
+
stemmer: dutch
|
89
|
+
stop_words: _dutch_
|
90
|
+
pt:
|
91
|
+
name: Portuguese
|
92
|
+
stemmer: light_portuguese
|
93
|
+
stop_words: _portuguese_
|
94
|
+
pt-br:
|
95
|
+
name: Portuguese (Brazil)
|
96
|
+
stemmer: brazilian
|
97
|
+
stop_words: _brazilian_
|
98
|
+
ru:
|
99
|
+
name: Russian
|
100
|
+
stemmer: russian
|
101
|
+
stop_words: _russian_
|
102
|
+
th:
|
103
|
+
name: Thai
|
104
|
+
stemmer: light_english
|
105
|
+
stop_words: _thai_
|
106
|
+
zh:
|
107
|
+
name: Chinese
|
108
|
+
stemmer: light_english
|
109
|
+
stop_words: _english_
|
110
|
+
postpended_filters:
|
111
|
+
- cjk_bigram
|