connectors_service 8.5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +93 -0
  3. data/NOTICE.txt +2 -0
  4. data/bin/connectors_service +4 -0
  5. data/bin/list_connectors +4 -0
  6. data/config/connectors.yml +25 -0
  7. data/lib/app/app.rb +25 -0
  8. data/lib/app/config.rb +132 -0
  9. data/lib/app/console_app.rb +278 -0
  10. data/lib/app/dispatcher.rb +121 -0
  11. data/lib/app/menu.rb +104 -0
  12. data/lib/app/preflight_check.rb +134 -0
  13. data/lib/app/version.rb +10 -0
  14. data/lib/connectors/base/adapter.rb +119 -0
  15. data/lib/connectors/base/connector.rb +57 -0
  16. data/lib/connectors/base/custom_client.rb +111 -0
  17. data/lib/connectors/connector_status.rb +31 -0
  18. data/lib/connectors/crawler/scheduler.rb +32 -0
  19. data/lib/connectors/example/connector.rb +57 -0
  20. data/lib/connectors/example/example_attachments/first_attachment.txt +1 -0
  21. data/lib/connectors/example/example_attachments/second_attachment.txt +1 -0
  22. data/lib/connectors/example/example_attachments/third_attachment.txt +1 -0
  23. data/lib/connectors/gitlab/adapter.rb +50 -0
  24. data/lib/connectors/gitlab/connector.rb +67 -0
  25. data/lib/connectors/gitlab/custom_client.rb +44 -0
  26. data/lib/connectors/gitlab/extractor.rb +69 -0
  27. data/lib/connectors/mongodb/connector.rb +138 -0
  28. data/lib/connectors/registry.rb +52 -0
  29. data/lib/connectors/sync_status.rb +21 -0
  30. data/lib/connectors.rb +16 -0
  31. data/lib/connectors_app/// +13 -0
  32. data/lib/connectors_service.rb +24 -0
  33. data/lib/connectors_utility.rb +16 -0
  34. data/lib/core/configuration.rb +48 -0
  35. data/lib/core/connector_settings.rb +142 -0
  36. data/lib/core/elastic_connector_actions.rb +269 -0
  37. data/lib/core/heartbeat.rb +32 -0
  38. data/lib/core/native_scheduler.rb +24 -0
  39. data/lib/core/output_sink/base_sink.rb +33 -0
  40. data/lib/core/output_sink/combined_sink.rb +38 -0
  41. data/lib/core/output_sink/console_sink.rb +51 -0
  42. data/lib/core/output_sink/es_sink.rb +74 -0
  43. data/lib/core/output_sink.rb +13 -0
  44. data/lib/core/scheduler.rb +158 -0
  45. data/lib/core/single_scheduler.rb +29 -0
  46. data/lib/core/sync_job_runner.rb +111 -0
  47. data/lib/core.rb +16 -0
  48. data/lib/list_connectors.rb +22 -0
  49. data/lib/stubs/app_config.rb +35 -0
  50. data/lib/stubs/connectors/stats.rb +35 -0
  51. data/lib/stubs/service_type.rb +13 -0
  52. data/lib/utility/constants.rb +20 -0
  53. data/lib/utility/cron.rb +81 -0
  54. data/lib/utility/elasticsearch/index/language_data.yml +111 -0
  55. data/lib/utility/elasticsearch/index/mappings.rb +104 -0
  56. data/lib/utility/elasticsearch/index/text_analysis_settings.rb +226 -0
  57. data/lib/utility/environment.rb +33 -0
  58. data/lib/utility/errors.rb +132 -0
  59. data/lib/utility/es_client.rb +84 -0
  60. data/lib/utility/exception_tracking.rb +64 -0
  61. data/lib/utility/extension_mapping_util.rb +123 -0
  62. data/lib/utility/logger.rb +84 -0
  63. data/lib/utility/middleware/basic_auth.rb +27 -0
  64. data/lib/utility/middleware/bearer_auth.rb +27 -0
  65. data/lib/utility/middleware/restrict_hostnames.rb +73 -0
  66. data/lib/utility.rb +16 -0
  67. metadata +487 -0
@@ -0,0 +1,158 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'time'
10
+ require 'fugit'
11
+ require 'core/connector_settings'
12
+ require 'utility/cron'
13
+ require 'utility/logger'
14
+ require 'utility/exception_tracking'
15
+
16
+ module Core
17
+ class Scheduler
18
+ def initialize(poll_interval, heartbeat_interval)
19
+ @poll_interval = poll_interval
20
+ @heartbeat_interval = heartbeat_interval
21
+ @is_shutting_down = false
22
+ end
23
+
24
+ def connector_settings
25
+ raise 'Not implemented'
26
+ end
27
+
28
+ def when_triggered
29
+ loop do
30
+ connector_settings.each do |cs|
31
+ if sync_triggered?(cs)
32
+ yield cs, :sync
33
+ end
34
+ if heartbeat_triggered?(cs)
35
+ yield cs, :heartbeat
36
+ end
37
+ if configuration_triggered?(cs)
38
+ yield cs, :configuration
39
+ end
40
+ end
41
+ if @is_shutting_down
42
+ break
43
+ end
44
+ rescue StandardError => e
45
+ Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
46
+ ensure
47
+ if @poll_interval > 0 && !@is_shutting_down
48
+ Utility::Logger.info("Sleeping for #{@poll_interval} seconds in #{self.class}.")
49
+ sleep(@poll_interval)
50
+ end
51
+ end
52
+ end
53
+
54
+ def shutdown
55
+ Utility::Logger.info("Shutting down scheduler #{self.class.name}.")
56
+ @is_shutting_down = true
57
+ end
58
+
59
+ private
60
+
61
+ def sync_triggered?(connector_settings)
62
+ return false unless connector_registered?(connector_settings.service_type)
63
+
64
+ unless connector_settings.valid_index_name?
65
+ Utility::Logger.info("The index name of #{connector_settings.formatted} is invalid.")
66
+ return false
67
+ end
68
+
69
+ unless connector_settings.connector_status_allows_sync?
70
+ Utility::Logger.info("#{connector_settings.formatted.capitalize} is in status \"#{connector_settings.connector_status}\" and won't sync yet. Connector needs to be in one of the following statuses: #{Connectors::ConnectorStatus::STATUSES_ALLOWING_SYNC} to run.")
71
+
72
+ return false
73
+ end
74
+
75
+ # Sync when sync_now flag is true for the connector
76
+ if connector_settings[:sync_now] == true
77
+ Utility::Logger.info("#{connector_settings.formatted.capitalize} is manually triggered to sync now.")
78
+ return true
79
+ end
80
+
81
+ # Don't sync if sync is explicitly disabled
82
+ scheduling_settings = connector_settings.scheduling_settings
83
+ unless scheduling_settings.present? && scheduling_settings[:enabled] == true
84
+ Utility::Logger.info("#{connector_settings.formatted.capitalize} scheduling is disabled.")
85
+ return false
86
+ end
87
+
88
+ # We want to sync when sync never actually happened
89
+ last_synced = connector_settings[:last_synced]
90
+ if last_synced.nil? || last_synced.empty?
91
+ Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
92
+ return true
93
+ end
94
+
95
+ current_schedule = scheduling_settings[:interval]
96
+
97
+ # Don't sync if there is no actual scheduling interval
98
+ if current_schedule.nil? || current_schedule.empty?
99
+ Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
100
+ return false
101
+ end
102
+
103
+ current_schedule = begin
104
+ Utility::Cron.quartz_to_crontab(current_schedule)
105
+ rescue StandardError => e
106
+ Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
107
+ return false
108
+ end
109
+ cron_parser = Fugit::Cron.parse(current_schedule)
110
+
111
+ # Don't sync if the scheduling interval is non-parsable
112
+ unless cron_parser
113
+ Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
114
+ return false
115
+ end
116
+
117
+ next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
118
+
119
+ # Sync if next trigger for the connector is in past
120
+ if next_trigger_time < Time.now
121
+ Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
122
+ return true
123
+ end
124
+
125
+ false
126
+ end
127
+
128
+ def heartbeat_triggered?(connector_settings)
129
+ return false unless connector_registered?(connector_settings.service_type)
130
+
131
+ last_seen = connector_settings[:last_seen]
132
+ return true if last_seen.nil? || last_seen.empty?
133
+ last_seen = begin
134
+ Time.parse(last_seen)
135
+ rescue StandardError
136
+ Utility::Logger.warn("Unable to parse last_seen #{last_seen}")
137
+ nil
138
+ end
139
+ return true unless last_seen
140
+ last_seen + @heartbeat_interval < Time.now
141
+ end
142
+
143
+ def configuration_triggered?(connector_settings)
144
+ return false unless connector_registered?(connector_settings.service_type)
145
+
146
+ connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
147
+ end
148
+
149
+ def connector_registered?(service_type)
150
+ if Connectors::REGISTRY.registered?(service_type)
151
+ true
152
+ else
153
+ Utility::Logger.info("The service type (#{service_type}) is not supported.")
154
+ false
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,29 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/scheduler'
10
+ require 'core/connector_settings'
11
+ require 'utility/logger'
12
+ require 'utility/exception_tracking'
13
+
14
+ module Core
15
+ class SingleScheduler < Core::Scheduler
16
+ def initialize(connector_id, poll_interval, heartbeat_interval)
17
+ super(poll_interval, heartbeat_interval)
18
+ @connector_id = connector_id
19
+ end
20
+
21
+ def connector_settings
22
+ connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id)
23
+ [connector_settings]
24
+ rescue StandardError => e
25
+ Utility::ExceptionTracking.log_exception(e, "Could not retrieve the connector by id #{@connector_id} due to unexpected error.")
26
+ []
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,111 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/connector_status'
10
+ require 'connectors/registry'
11
+ require 'core/output_sink'
12
+ require 'utility'
13
+
14
+ module Core
15
+ class IncompatibleConfigurableFieldsError < StandardError
16
+ def initialize(service_type, expected_fields, actual_fields)
17
+ super("Connector of service_type '#{service_type}' expected configurable fields: #{expected_fields}, actual stored fields: #{actual_fields}")
18
+ end
19
+ end
20
+
21
+ class SyncJobRunner
22
+ def initialize(connector_settings)
23
+ @connector_settings = connector_settings
24
+ @sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
25
+ @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
26
+ @connector_instance = Connectors::REGISTRY.connector(connector_settings.service_type, connector_settings.configuration)
27
+ @status = {
28
+ :indexed_document_count => 0,
29
+ :deleted_document_count => 0,
30
+ :error => nil
31
+ }
32
+ end
33
+
34
+ def execute
35
+ validate_configuration!
36
+ do_sync!
37
+ end
38
+
39
+ private
40
+
41
+ def do_sync!
42
+ Utility::Logger.info("Starting sync for connector #{@connector_settings.id}.")
43
+
44
+ job_id = ElasticConnectorActions.claim_job(@connector_settings.id)
45
+
46
+ unless job_id.present?
47
+ Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
48
+ return
49
+ end
50
+
51
+ begin
52
+ Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
53
+
54
+ @connector_instance.do_health_check!
55
+
56
+ incoming_ids = []
57
+ existing_ids = ElasticConnectorActions.fetch_document_ids(@connector_settings.index_name)
58
+
59
+ Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
60
+
61
+ @connector_instance.yield_documents do |document|
62
+ document = add_ingest_metadata(document)
63
+ @sink.ingest(document)
64
+ incoming_ids << document[:id]
65
+ @status[:indexed_document_count] += 1
66
+ end
67
+
68
+ ids_to_delete = existing_ids - incoming_ids.uniq
69
+
70
+ Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
71
+
72
+ ids_to_delete.each do |id|
73
+ @sink.delete(id)
74
+ @status[:deleted_document_count] += 1
75
+ end
76
+
77
+ @sink.flush
78
+ rescue StandardError => e
79
+ @status[:error] = e.message
80
+ Utility::ExceptionTracking.log_exception(e)
81
+ ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
82
+ ensure
83
+ Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
84
+ Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
85
+
86
+ ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
87
+
88
+ if @status[:error]
89
+ Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error #{@status[:error]}.")
90
+ else
91
+ Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
92
+ end
93
+ end
94
+ end
95
+
96
+ def add_ingest_metadata(document)
97
+ document.tap do |it|
98
+ it['_extract_binary_content'] = @connector_settings.extract_binary_content? if @connector_settings.extract_binary_content?
99
+ it['_reduce_whitespace'] = @connector_settings.reduce_whitespace? if @connector_settings.reduce_whitespace?
100
+ it['_run_ml_inference'] = @connector_settings.run_ml_inference? if @connector_settings.run_ml_inference?
101
+ end
102
+ end
103
+
104
+ def validate_configuration!
105
+ expected_fields = @connector_class.configurable_fields.keys.map(&:to_s).sort
106
+ actual_fields = @connector_settings.configuration.keys.map(&:to_s).sort
107
+
108
+ raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
109
+ end
110
+ end
111
+ end
data/lib/core.rb ADDED
@@ -0,0 +1,16 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/configuration'
10
+ require 'core/connector_settings'
11
+ require 'core/elastic_connector_actions'
12
+ require 'core/heartbeat'
13
+ require 'core/scheduler'
14
+ require 'core/single_scheduler'
15
+ require 'core/native_scheduler'
16
+ require 'core/sync_job_runner'
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/registry'
10
+ require 'utility'
11
+
12
+ class ListConnectors
13
+ def self.run!
14
+ Utility::Environment.set_execution_environment(App::Config) do
15
+ Utility::Logger.info('Registered connectors:')
16
+ Connectors::REGISTRY.registered_connectors.each do |connector|
17
+ Utility::Logger.info("- #{Connectors::REGISTRY.connector_class(connector).display_name}")
18
+ end
19
+ Utility::Logger.info('Bye')
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,35 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ class AppConfig
8
+ class << self
9
+ def connectors
10
+ {
11
+ 'transient_server_error_retry_delay_minutes' => 5
12
+ }
13
+ end
14
+
15
+ def content_source_sync_max_errors
16
+ 1000
17
+ end
18
+
19
+ def content_source_sync_max_consecutive_errors
20
+ 10
21
+ end
22
+
23
+ def content_source_sync_max_error_ratio
24
+ 0.15
25
+ end
26
+
27
+ def content_source_sync_error_ratio_window_size
28
+ 100
29
+ end
30
+
31
+ def content_source_sync_thumbnails_enabled?
32
+ true
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,35 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/inflector'
10
+
11
+ module Connectors
12
+ module Stats
13
+ def self.measure(_key, _value = nil, &block)
14
+ block.call
15
+ end
16
+
17
+ def self.increment(key, value = 1)
18
+ # no op
19
+ end
20
+
21
+ def self.prefix_key(key)
22
+ "connectors.#{key}"
23
+ end
24
+
25
+ def self.class_key(klass, deconstantize = true)
26
+ name = klass.name
27
+ # Changes Connectors::GoogleDrive::Adapter to Connectors::GoogleDrive
28
+ name = ActiveSupport::Inflector.deconstantize(name) if deconstantize
29
+ # Changes Connectors::GoogleDrive to GoogleDrive
30
+ name = ActiveSupport::Inflector.demodulize(name)
31
+ # Changes GoogleDrive to google_drive
32
+ ActiveSupport::Inflector.underscore(name)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,13 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ class ServiceType
10
+ def classify
11
+ 'classify'
12
+ end
13
+ end
@@ -0,0 +1,20 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Constants
11
+ THUMBNAIL_FIELDS = %w[_thumbnail_80x100 _thumbnail_310x430].freeze
12
+ SUBEXTRACTOR_RESERVED_FIELDS = %w[_subextracted_as_of _subextracted_version].freeze
13
+ ALLOW_FIELD = '_allow_permissions'
14
+ DENY_FIELD = '_deny_permissions'
15
+ CONNECTORS_INDEX = '.elastic-connectors'
16
+ JOB_INDEX = '.elastic-connectors-sync-jobs'
17
+ CONTENT_INDEX_PREFIX = 'search-'
18
+ CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ end
20
+ end
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Utility
12
+ # taken from https://regex101.com/r/cU7zG2/1
13
+ # previous regexp allowed days of the week as [0-6], but it's not correct because the Kibana scheduler
14
+ # is using [1-7] for days of the week, aligned with the Quartz scheduler: see http://www.quartz-scheduler.org/documentation/2.4.0-SNAPSHOT/tutorials/tutorial-lesson-06.html
15
+ # But just replacing with [1-7] would also be incorrect, since according to the Cron spec, the days of the week
16
+ # are 1-6 for Monday-Saturday, and 0 or 7 for Sunday, 7 being a non-standard but still widely used. So, we need to
17
+ # allow for 0-7.
18
+ CRON_REGEXP = /^\s*($|#|\w+\s*=|(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?(?:,(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?)*)\s+(\?|\*|(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?(?:,(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?)*)\s+(\?|\*|(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?(?:,(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?)*|\?|\*|(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?(?:,(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?)*)\s+(\?|\*|(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?(?:,(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?)*|\?|\*|(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?(?:,(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?)*)(|\s)+(\?|\*|(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?(?:,(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?)*))$/
19
+
20
+ # see https://github.com/quartz-scheduler/quartz/blob/master/quartz-core/src/main/java/org/quartz/CronExpression.java
21
+ module Cron
22
+ def self.check(expr)
23
+ raise StandardError.new("Unsupported expression #{expr} with #") if expr.include?('#')
24
+ raise StandardError.new("Unsupported expression #{expr} with L") if expr.include?('L')
25
+ raise StandardError.new("Unsupported expression #{expr} with W") if expr.include?('W') && !expr.include?('WED')
26
+
27
+ expr
28
+ end
29
+
30
+ def self.quartz_to_crontab(expression)
31
+ @seconds = '*'
32
+ @minutes = '*'
33
+ @hours = '*'
34
+ @day_of_month = '*'
35
+ @month = '*'
36
+ @day_of_week = '*'
37
+ @year = '*'
38
+
39
+ # ? is not supported
40
+ converted_expression = expression.tr('?', '*')
41
+
42
+ matched = false
43
+ converted_expression.match(CRON_REGEXP) { |m|
44
+ @seconds = m[2]
45
+ @minutes = m[3]
46
+ @hours = m[4]
47
+ @day_of_month = check(m[5])
48
+ @month = check(m[6])
49
+ @day_of_week = scheduler_dow_to_crontab(check(m[7])).to_s
50
+ @year = m[9]
51
+ matched = true
52
+ }
53
+
54
+ raise StandardError.new("Unknown format #{expression}") unless matched
55
+
56
+ # Unix cron has five: minute, hour, day, month, and dayofweek
57
+ # Quartz adds seconds and year
58
+ converted_expression = "#{@minutes} #{@hours} #{@day_of_month} #{@month} #{@day_of_week}"
59
+
60
+ Utility::Logger.debug("Converted Quartz Cron expression '#{expression}' to Standard Cron Expression '#{converted_expression}'")
61
+
62
+ converted_expression
63
+ end
64
+
65
+ # As described above, Quartz uses 1-7 for days of the week, starting with Sunday,
66
+ # while Unix cron uses 0-6, starting with Monday, and also 7 as an extra non-standard index for Sunday.
67
+ # (see https://en.wikipedia.org/wiki/Cron for more details)
68
+ # This means that we need to shift the Quartz day of week that are between 1 and 7 by minus one, but we also allow 0
69
+ # in case it's not a quartz expression but already the cron standard.
70
+ # See also the code in connectors-python that does the same thing: https://github.com/elastic/connectors-python/blob/main/connectors/quartz.py
71
+ def self.scheduler_dow_to_crontab(day)
72
+ unless /\d/.match?(day)
73
+ return day
74
+ end
75
+ if day.to_i <= 0
76
+ return day
77
+ end
78
+ day.to_i - 1
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,111 @@
1
+ ---
2
+ da:
3
+ name: Danish
4
+ stemmer: danish
5
+ stop_words: _danish_
6
+ de:
7
+ name: German
8
+ stemmer: light_german
9
+ stop_words: _german_
10
+ en:
11
+ name: English
12
+ stemmer: light_english
13
+ stop_words: _english_
14
+ es:
15
+ name: Spanish
16
+ stemmer: light_spanish
17
+ stop_words: _spanish_
18
+ fr:
19
+ name: French
20
+ stemmer: light_french
21
+ stop_words: _french_
22
+ custom_filter_definitions:
23
+ fr-elision:
24
+ type: elision
25
+ articles:
26
+ - l
27
+ - m
28
+ - t
29
+ - qu
30
+ - n
31
+ - s
32
+ - j
33
+ - d
34
+ - c
35
+ - jusqu
36
+ - quoiqu
37
+ - lorsqu
38
+ - puisqu
39
+ articles_case: true
40
+ prepended_filters:
41
+ - fr-elision
42
+ it:
43
+ name: Italian
44
+ stemmer: light_italian
45
+ stop_words: _italian_
46
+ custom_filter_definitions:
47
+ it-elision:
48
+ type: elision
49
+ articles:
50
+ - c
51
+ - l
52
+ - all
53
+ - dall
54
+ - dell
55
+ - nell
56
+ - sull
57
+ - coll
58
+ - pell
59
+ - gl
60
+ - agl
61
+ - dagl
62
+ - degl
63
+ - negl
64
+ - sugl
65
+ - un
66
+ - m
67
+ - t
68
+ - s
69
+ - v
70
+ - d
71
+ articles_case: true
72
+ prepended_filters:
73
+ - it-elision
74
+ ja:
75
+ name: Japanese
76
+ stemmer: light_english
77
+ stop_words: _english_
78
+ postpended_filters:
79
+ - cjk_bigram
80
+ ko:
81
+ name: Korean
82
+ stemmer: light_english
83
+ stop_words: _english_
84
+ postpended_filters:
85
+ - cjk_bigram
86
+ nl:
87
+ name: Dutch
88
+ stemmer: dutch
89
+ stop_words: _dutch_
90
+ pt:
91
+ name: Portuguese
92
+ stemmer: light_portuguese
93
+ stop_words: _portuguese_
94
+ pt-br:
95
+ name: Portuguese (Brazil)
96
+ stemmer: brazilian
97
+ stop_words: _brazilian_
98
+ ru:
99
+ name: Russian
100
+ stemmer: russian
101
+ stop_words: _russian_
102
+ th:
103
+ name: Thai
104
+ stemmer: light_english
105
+ stop_words: _thai_
106
+ zh:
107
+ name: Chinese
108
+ stemmer: light_english
109
+ stop_words: _english_
110
+ postpended_filters:
111
+ - cjk_bigram