connectors_service 8.5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +93 -0
  3. data/NOTICE.txt +2 -0
  4. data/bin/connectors_service +4 -0
  5. data/bin/list_connectors +4 -0
  6. data/config/connectors.yml +25 -0
  7. data/lib/app/app.rb +25 -0
  8. data/lib/app/config.rb +132 -0
  9. data/lib/app/console_app.rb +278 -0
  10. data/lib/app/dispatcher.rb +121 -0
  11. data/lib/app/menu.rb +104 -0
  12. data/lib/app/preflight_check.rb +134 -0
  13. data/lib/app/version.rb +10 -0
  14. data/lib/connectors/base/adapter.rb +119 -0
  15. data/lib/connectors/base/connector.rb +57 -0
  16. data/lib/connectors/base/custom_client.rb +111 -0
  17. data/lib/connectors/connector_status.rb +31 -0
  18. data/lib/connectors/crawler/scheduler.rb +32 -0
  19. data/lib/connectors/example/connector.rb +57 -0
  20. data/lib/connectors/example/example_attachments/first_attachment.txt +1 -0
  21. data/lib/connectors/example/example_attachments/second_attachment.txt +1 -0
  22. data/lib/connectors/example/example_attachments/third_attachment.txt +1 -0
  23. data/lib/connectors/gitlab/adapter.rb +50 -0
  24. data/lib/connectors/gitlab/connector.rb +67 -0
  25. data/lib/connectors/gitlab/custom_client.rb +44 -0
  26. data/lib/connectors/gitlab/extractor.rb +69 -0
  27. data/lib/connectors/mongodb/connector.rb +138 -0
  28. data/lib/connectors/registry.rb +52 -0
  29. data/lib/connectors/sync_status.rb +21 -0
  30. data/lib/connectors.rb +16 -0
  31. data/lib/connectors_app/// +13 -0
  32. data/lib/connectors_service.rb +24 -0
  33. data/lib/connectors_utility.rb +16 -0
  34. data/lib/core/configuration.rb +48 -0
  35. data/lib/core/connector_settings.rb +142 -0
  36. data/lib/core/elastic_connector_actions.rb +269 -0
  37. data/lib/core/heartbeat.rb +32 -0
  38. data/lib/core/native_scheduler.rb +24 -0
  39. data/lib/core/output_sink/base_sink.rb +33 -0
  40. data/lib/core/output_sink/combined_sink.rb +38 -0
  41. data/lib/core/output_sink/console_sink.rb +51 -0
  42. data/lib/core/output_sink/es_sink.rb +74 -0
  43. data/lib/core/output_sink.rb +13 -0
  44. data/lib/core/scheduler.rb +158 -0
  45. data/lib/core/single_scheduler.rb +29 -0
  46. data/lib/core/sync_job_runner.rb +111 -0
  47. data/lib/core.rb +16 -0
  48. data/lib/list_connectors.rb +22 -0
  49. data/lib/stubs/app_config.rb +35 -0
  50. data/lib/stubs/connectors/stats.rb +35 -0
  51. data/lib/stubs/service_type.rb +13 -0
  52. data/lib/utility/constants.rb +20 -0
  53. data/lib/utility/cron.rb +81 -0
  54. data/lib/utility/elasticsearch/index/language_data.yml +111 -0
  55. data/lib/utility/elasticsearch/index/mappings.rb +104 -0
  56. data/lib/utility/elasticsearch/index/text_analysis_settings.rb +226 -0
  57. data/lib/utility/environment.rb +33 -0
  58. data/lib/utility/errors.rb +132 -0
  59. data/lib/utility/es_client.rb +84 -0
  60. data/lib/utility/exception_tracking.rb +64 -0
  61. data/lib/utility/extension_mapping_util.rb +123 -0
  62. data/lib/utility/logger.rb +84 -0
  63. data/lib/utility/middleware/basic_auth.rb +27 -0
  64. data/lib/utility/middleware/bearer_auth.rb +27 -0
  65. data/lib/utility/middleware/restrict_hostnames.rb +73 -0
  66. data/lib/utility.rb +16 -0
  67. metadata +487 -0
@@ -0,0 +1,158 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'time'
10
+ require 'fugit'
11
+ require 'core/connector_settings'
12
+ require 'utility/cron'
13
+ require 'utility/logger'
14
+ require 'utility/exception_tracking'
15
+
16
+ module Core
17
+ class Scheduler
18
+ def initialize(poll_interval, heartbeat_interval)
19
+ @poll_interval = poll_interval
20
+ @heartbeat_interval = heartbeat_interval
21
+ @is_shutting_down = false
22
+ end
23
+
24
+ def connector_settings
25
+ raise 'Not implemented'
26
+ end
27
+
28
+ def when_triggered
29
+ loop do
30
+ connector_settings.each do |cs|
31
+ if sync_triggered?(cs)
32
+ yield cs, :sync
33
+ end
34
+ if heartbeat_triggered?(cs)
35
+ yield cs, :heartbeat
36
+ end
37
+ if configuration_triggered?(cs)
38
+ yield cs, :configuration
39
+ end
40
+ end
41
+ if @is_shutting_down
42
+ break
43
+ end
44
+ rescue StandardError => e
45
+ Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
46
+ ensure
47
+ if @poll_interval > 0 && !@is_shutting_down
48
+ Utility::Logger.info("Sleeping for #{@poll_interval} seconds in #{self.class}.")
49
+ sleep(@poll_interval)
50
+ end
51
+ end
52
+ end
53
+
54
+ def shutdown
55
+ Utility::Logger.info("Shutting down scheduler #{self.class.name}.")
56
+ @is_shutting_down = true
57
+ end
58
+
59
+ private
60
+
61
+ def sync_triggered?(connector_settings)
62
+ return false unless connector_registered?(connector_settings.service_type)
63
+
64
+ unless connector_settings.valid_index_name?
65
+ Utility::Logger.info("The index name of #{connector_settings.formatted} is invalid.")
66
+ return false
67
+ end
68
+
69
+ unless connector_settings.connector_status_allows_sync?
70
+ Utility::Logger.info("#{connector_settings.formatted.capitalize} is in status \"#{connector_settings.connector_status}\" and won't sync yet. Connector needs to be in one of the following statuses: #{Connectors::ConnectorStatus::STATUSES_ALLOWING_SYNC} to run.")
71
+
72
+ return false
73
+ end
74
+
75
+ # Sync when sync_now flag is true for the connector
76
+ if connector_settings[:sync_now] == true
77
+ Utility::Logger.info("#{connector_settings.formatted.capitalize} is manually triggered to sync now.")
78
+ return true
79
+ end
80
+
81
+ # Don't sync if sync is explicitly disabled
82
+ scheduling_settings = connector_settings.scheduling_settings
83
+ unless scheduling_settings.present? && scheduling_settings[:enabled] == true
84
+ Utility::Logger.info("#{connector_settings.formatted.capitalize} scheduling is disabled.")
85
+ return false
86
+ end
87
+
88
+ # We want to sync when sync never actually happened
89
+ last_synced = connector_settings[:last_synced]
90
+ if last_synced.nil? || last_synced.empty?
91
+ Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
92
+ return true
93
+ end
94
+
95
+ current_schedule = scheduling_settings[:interval]
96
+
97
+ # Don't sync if there is no actual scheduling interval
98
+ if current_schedule.nil? || current_schedule.empty?
99
+ Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
100
+ return false
101
+ end
102
+
103
+ current_schedule = begin
104
+ Utility::Cron.quartz_to_crontab(current_schedule)
105
+ rescue StandardError => e
106
+ Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
107
+ return false
108
+ end
109
+ cron_parser = Fugit::Cron.parse(current_schedule)
110
+
111
+ # Don't sync if the scheduling interval is non-parsable
112
+ unless cron_parser
113
+ Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
114
+ return false
115
+ end
116
+
117
+ next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
118
+
119
+ # Sync if next trigger for the connector is in past
120
+ if next_trigger_time < Time.now
121
+ Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
122
+ return true
123
+ end
124
+
125
+ false
126
+ end
127
+
128
+ def heartbeat_triggered?(connector_settings)
129
+ return false unless connector_registered?(connector_settings.service_type)
130
+
131
+ last_seen = connector_settings[:last_seen]
132
+ return true if last_seen.nil? || last_seen.empty?
133
+ last_seen = begin
134
+ Time.parse(last_seen)
135
+ rescue StandardError
136
+ Utility::Logger.warn("Unable to parse last_seen #{last_seen}")
137
+ nil
138
+ end
139
+ return true unless last_seen
140
+ last_seen + @heartbeat_interval < Time.now
141
+ end
142
+
143
+ def configuration_triggered?(connector_settings)
144
+ return false unless connector_registered?(connector_settings.service_type)
145
+
146
+ connector_settings.connector_status == Connectors::ConnectorStatus::CREATED
147
+ end
148
+
149
+ def connector_registered?(service_type)
150
+ if Connectors::REGISTRY.registered?(service_type)
151
+ true
152
+ else
153
+ Utility::Logger.info("The service type (#{service_type}) is not supported.")
154
+ false
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,29 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/scheduler'
10
+ require 'core/connector_settings'
11
+ require 'utility/logger'
12
+ require 'utility/exception_tracking'
13
+
14
+ module Core
15
+ class SingleScheduler < Core::Scheduler
16
+ def initialize(connector_id, poll_interval, heartbeat_interval)
17
+ super(poll_interval, heartbeat_interval)
18
+ @connector_id = connector_id
19
+ end
20
+
21
+ def connector_settings
22
+ connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id)
23
+ [connector_settings]
24
+ rescue StandardError => e
25
+ Utility::ExceptionTracking.log_exception(e, "Could not retrieve the connector by id #{@connector_id} due to unexpected error.")
26
+ []
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,111 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/connector_status'
10
+ require 'connectors/registry'
11
+ require 'core/output_sink'
12
+ require 'utility'
13
+
14
+ module Core
15
+ class IncompatibleConfigurableFieldsError < StandardError
16
+ def initialize(service_type, expected_fields, actual_fields)
17
+ super("Connector of service_type '#{service_type}' expected configurable fields: #{expected_fields}, actual stored fields: #{actual_fields}")
18
+ end
19
+ end
20
+
21
+ class SyncJobRunner
22
+ def initialize(connector_settings)
23
+ @connector_settings = connector_settings
24
+ @sink = Core::OutputSink::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
25
+ @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
26
+ @connector_instance = Connectors::REGISTRY.connector(connector_settings.service_type, connector_settings.configuration)
27
+ @status = {
28
+ :indexed_document_count => 0,
29
+ :deleted_document_count => 0,
30
+ :error => nil
31
+ }
32
+ end
33
+
34
+ def execute
35
+ validate_configuration!
36
+ do_sync!
37
+ end
38
+
39
+ private
40
+
41
+ def do_sync!
42
+ Utility::Logger.info("Starting sync for connector #{@connector_settings.id}.")
43
+
44
+ job_id = ElasticConnectorActions.claim_job(@connector_settings.id)
45
+
46
+ unless job_id.present?
47
+ Utility::Logger.error("Failed to claim the job for #{@connector_settings.id}. Please check the logs for the cause of this error.")
48
+ return
49
+ end
50
+
51
+ begin
52
+ Utility::Logger.debug("Successfully claimed job for connector #{@connector_settings.id}.")
53
+
54
+ @connector_instance.do_health_check!
55
+
56
+ incoming_ids = []
57
+ existing_ids = ElasticConnectorActions.fetch_document_ids(@connector_settings.index_name)
58
+
59
+ Utility::Logger.debug("#{existing_ids.size} documents are present in index #{@connector_settings.index_name}.")
60
+
61
+ @connector_instance.yield_documents do |document|
62
+ document = add_ingest_metadata(document)
63
+ @sink.ingest(document)
64
+ incoming_ids << document[:id]
65
+ @status[:indexed_document_count] += 1
66
+ end
67
+
68
+ ids_to_delete = existing_ids - incoming_ids.uniq
69
+
70
+ Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
71
+
72
+ ids_to_delete.each do |id|
73
+ @sink.delete(id)
74
+ @status[:deleted_document_count] += 1
75
+ end
76
+
77
+ @sink.flush
78
+ rescue StandardError => e
79
+ @status[:error] = e.message
80
+ Utility::ExceptionTracking.log_exception(e)
81
+ ElasticConnectorActions.update_connector_status(@connector_settings.id, Connectors::ConnectorStatus::ERROR, Utility::Logger.abbreviated_message(e.message))
82
+ ensure
83
+ Utility::Logger.info("Upserted #{@status[:indexed_document_count]} documents into #{@connector_settings.index_name}.")
84
+ Utility::Logger.info("Deleted #{@status[:deleted_document_count]} documents into #{@connector_settings.index_name}.")
85
+
86
+ ElasticConnectorActions.complete_sync(@connector_settings.id, job_id, @status.dup)
87
+
88
+ if @status[:error]
89
+ Utility::Logger.info("Failed to sync for connector #{@connector_settings.id} with error #{@status[:error]}.")
90
+ else
91
+ Utility::Logger.info("Successfully synced for connector #{@connector_settings.id}.")
92
+ end
93
+ end
94
+ end
95
+
96
+ def add_ingest_metadata(document)
97
+ document.tap do |it|
98
+ it['_extract_binary_content'] = @connector_settings.extract_binary_content? if @connector_settings.extract_binary_content?
99
+ it['_reduce_whitespace'] = @connector_settings.reduce_whitespace? if @connector_settings.reduce_whitespace?
100
+ it['_run_ml_inference'] = @connector_settings.run_ml_inference? if @connector_settings.run_ml_inference?
101
+ end
102
+ end
103
+
104
+ def validate_configuration!
105
+ expected_fields = @connector_class.configurable_fields.keys.map(&:to_s).sort
106
+ actual_fields = @connector_settings.configuration.keys.map(&:to_s).sort
107
+
108
+ raise IncompatibleConfigurableFieldsError.new(@connector_class.service_type, expected_fields, actual_fields) if expected_fields != actual_fields
109
+ end
110
+ end
111
+ end
data/lib/core.rb ADDED
@@ -0,0 +1,16 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'core/configuration'
10
+ require 'core/connector_settings'
11
+ require 'core/elastic_connector_actions'
12
+ require 'core/heartbeat'
13
+ require 'core/scheduler'
14
+ require 'core/single_scheduler'
15
+ require 'core/native_scheduler'
16
+ require 'core/sync_job_runner'
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/registry'
10
+ require 'utility'
11
+
12
+ class ListConnectors
13
+ def self.run!
14
+ Utility::Environment.set_execution_environment(App::Config) do
15
+ Utility::Logger.info('Registered connectors:')
16
+ Connectors::REGISTRY.registered_connectors.each do |connector|
17
+ Utility::Logger.info("- #{Connectors::REGISTRY.connector_class(connector).display_name}")
18
+ end
19
+ Utility::Logger.info('Bye')
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,35 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ class AppConfig
8
+ class << self
9
+ def connectors
10
+ {
11
+ 'transient_server_error_retry_delay_minutes' => 5
12
+ }
13
+ end
14
+
15
+ def content_source_sync_max_errors
16
+ 1000
17
+ end
18
+
19
+ def content_source_sync_max_consecutive_errors
20
+ 10
21
+ end
22
+
23
+ def content_source_sync_max_error_ratio
24
+ 0.15
25
+ end
26
+
27
+ def content_source_sync_error_ratio_window_size
28
+ 100
29
+ end
30
+
31
+ def content_source_sync_thumbnails_enabled?
32
+ true
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,35 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/inflector'
10
+
11
+ module Connectors
12
+ module Stats
13
+ def self.measure(_key, _value = nil, &block)
14
+ block.call
15
+ end
16
+
17
+ def self.increment(key, value = 1)
18
+ # no op
19
+ end
20
+
21
+ def self.prefix_key(key)
22
+ "connectors.#{key}"
23
+ end
24
+
25
+ def self.class_key(klass, deconstantize = true)
26
+ name = klass.name
27
+ # Changes Connectors::GoogleDrive::Adapter to Connectors::GoogleDrive
28
+ name = ActiveSupport::Inflector.deconstantize(name) if deconstantize
29
+ # Changes Connectors::GoogleDrive to GoogleDrive
30
+ name = ActiveSupport::Inflector.demodulize(name)
31
+ # Changes GoogleDrive to google_drive
32
+ ActiveSupport::Inflector.underscore(name)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,13 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ class ServiceType
10
+ def classify
11
+ 'classify'
12
+ end
13
+ end
@@ -0,0 +1,20 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ class Constants
11
+ THUMBNAIL_FIELDS = %w[_thumbnail_80x100 _thumbnail_310x430].freeze
12
+ SUBEXTRACTOR_RESERVED_FIELDS = %w[_subextracted_as_of _subextracted_version].freeze
13
+ ALLOW_FIELD = '_allow_permissions'
14
+ DENY_FIELD = '_deny_permissions'
15
+ CONNECTORS_INDEX = '.elastic-connectors'
16
+ JOB_INDEX = '.elastic-connectors-sync-jobs'
17
+ CONTENT_INDEX_PREFIX = 'search-'
18
+ CRAWLER_SERVICE_TYPE = 'elastic-crawler'
19
+ end
20
+ end
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Utility
12
+ # taken from https://regex101.com/r/cU7zG2/1
13
+ # previous regexp allowed days of the week as [0-6], but it's not correct because the Kibana scheduler
14
+ # is using [1-7] for days of the week, aligned with the Quartz scheduler: see http://www.quartz-scheduler.org/documentation/2.4.0-SNAPSHOT/tutorials/tutorial-lesson-06.html
15
+ # But just replacing with [1-7] would also be incorrect, since according to the Cron spec, the days of the week
16
+ # are 1-6 for Monday-Saturday, and 0 or 7 for Sunday, 7 being a non-standard but still widely used. So, we need to
17
+ # allow for 0-7.
18
+ CRON_REGEXP = /^\s*($|#|\w+\s*=|(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?(?:,(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?)*)\s+(\?|\*|(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?(?:,(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?)*)\s+(\?|\*|(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?(?:,(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?)*|\?|\*|(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?(?:,(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?)*)\s+(\?|\*|(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?(?:,(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?)*|\?|\*|(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?(?:,(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?)*)(|\s)+(\?|\*|(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?(?:,(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?)*))$/
19
+
20
+ # see https://github.com/quartz-scheduler/quartz/blob/master/quartz-core/src/main/java/org/quartz/CronExpression.java
21
+ module Cron
22
+ def self.check(expr)
23
+ raise StandardError.new("Unsupported expression #{expr} with #") if expr.include?('#')
24
+ raise StandardError.new("Unsupported expression #{expr} with L") if expr.include?('L')
25
+ raise StandardError.new("Unsupported expression #{expr} with W") if expr.include?('W') && !expr.include?('WED')
26
+
27
+ expr
28
+ end
29
+
30
+ def self.quartz_to_crontab(expression)
31
+ @seconds = '*'
32
+ @minutes = '*'
33
+ @hours = '*'
34
+ @day_of_month = '*'
35
+ @month = '*'
36
+ @day_of_week = '*'
37
+ @year = '*'
38
+
39
+ # ? is not supported
40
+ converted_expression = expression.tr('?', '*')
41
+
42
+ matched = false
43
+ converted_expression.match(CRON_REGEXP) { |m|
44
+ @seconds = m[2]
45
+ @minutes = m[3]
46
+ @hours = m[4]
47
+ @day_of_month = check(m[5])
48
+ @month = check(m[6])
49
+ @day_of_week = scheduler_dow_to_crontab(check(m[7])).to_s
50
+ @year = m[9]
51
+ matched = true
52
+ }
53
+
54
+ raise StandardError.new("Unknown format #{expression}") unless matched
55
+
56
+ # Unix cron has five: minute, hour, day, month, and dayofweek
57
+ # Quartz adds seconds and year
58
+ converted_expression = "#{@minutes} #{@hours} #{@day_of_month} #{@month} #{@day_of_week}"
59
+
60
+ Utility::Logger.debug("Converted Quartz Cron expression '#{expression}' to Standard Cron Expression '#{converted_expression}'")
61
+
62
+ converted_expression
63
+ end
64
+
65
+ # As described above, Quartz uses 1-7 for days of the week, starting with Sunday,
66
+ # while Unix cron uses 0-6, starting with Monday, and also 7 as an extra non-standard index for Sunday.
67
+ # (see https://en.wikipedia.org/wiki/Cron for more details)
68
+ # This means that we need to shift the Quartz day of week that are between 1 and 7 by minus one, but we also allow 0
69
+ # in case it's not a quartz expression but already the cron standard.
70
+ # See also the code in connectors-python that does the same thing: https://github.com/elastic/connectors-python/blob/main/connectors/quartz.py
71
+ def self.scheduler_dow_to_crontab(day)
72
+ unless /\d/.match?(day)
73
+ return day
74
+ end
75
+ if day.to_i <= 0
76
+ return day
77
+ end
78
+ day.to_i - 1
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,111 @@
1
+ ---
2
+ da:
3
+ name: Danish
4
+ stemmer: danish
5
+ stop_words: _danish_
6
+ de:
7
+ name: German
8
+ stemmer: light_german
9
+ stop_words: _german_
10
+ en:
11
+ name: English
12
+ stemmer: light_english
13
+ stop_words: _english_
14
+ es:
15
+ name: Spanish
16
+ stemmer: light_spanish
17
+ stop_words: _spanish_
18
+ fr:
19
+ name: French
20
+ stemmer: light_french
21
+ stop_words: _french_
22
+ custom_filter_definitions:
23
+ fr-elision:
24
+ type: elision
25
+ articles:
26
+ - l
27
+ - m
28
+ - t
29
+ - qu
30
+ - n
31
+ - s
32
+ - j
33
+ - d
34
+ - c
35
+ - jusqu
36
+ - quoiqu
37
+ - lorsqu
38
+ - puisqu
39
+ articles_case: true
40
+ prepended_filters:
41
+ - fr-elision
42
+ it:
43
+ name: Italian
44
+ stemmer: light_italian
45
+ stop_words: _italian_
46
+ custom_filter_definitions:
47
+ it-elision:
48
+ type: elision
49
+ articles:
50
+ - c
51
+ - l
52
+ - all
53
+ - dall
54
+ - dell
55
+ - nell
56
+ - sull
57
+ - coll
58
+ - pell
59
+ - gl
60
+ - agl
61
+ - dagl
62
+ - degl
63
+ - negl
64
+ - sugl
65
+ - un
66
+ - m
67
+ - t
68
+ - s
69
+ - v
70
+ - d
71
+ articles_case: true
72
+ prepended_filters:
73
+ - it-elision
74
+ ja:
75
+ name: Japanese
76
+ stemmer: light_english
77
+ stop_words: _english_
78
+ postpended_filters:
79
+ - cjk_bigram
80
+ ko:
81
+ name: Korean
82
+ stemmer: light_english
83
+ stop_words: _english_
84
+ postpended_filters:
85
+ - cjk_bigram
86
+ nl:
87
+ name: Dutch
88
+ stemmer: dutch
89
+ stop_words: _dutch_
90
+ pt:
91
+ name: Portuguese
92
+ stemmer: light_portuguese
93
+ stop_words: _portuguese_
94
+ pt-br:
95
+ name: Portuguese (Brazil)
96
+ stemmer: brazilian
97
+ stop_words: _brazilian_
98
+ ru:
99
+ name: Russian
100
+ stemmer: russian
101
+ stop_words: _russian_
102
+ th:
103
+ name: Thai
104
+ stemmer: light_english
105
+ stop_words: _thai_
106
+ zh:
107
+ name: Chinese
108
+ stemmer: light_english
109
+ stop_words: _english_
110
+ postpended_filters:
111
+ - cjk_bigram