connectors_utility 8.4.0.1 → 8.5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/connectors/connector_status.rb +31 -0
- data/lib/connectors/crawler/scheduler.rb +26 -0
- data/lib/connectors/sync_status.rb +21 -0
- data/lib/connectors_utility.rb +8 -2
- data/lib/core/connector_settings.rb +142 -0
- data/lib/core/elastic_connector_actions.rb +269 -0
- data/lib/core/scheduler.rb +138 -0
- data/lib/utility/constants.rb +20 -0
- data/lib/utility/cron.rb +81 -0
- data/lib/utility/environment.rb +33 -0
- data/lib/utility/errors.rb +132 -0
- data/lib/utility/es_client.rb +84 -0
- data/lib/utility/exception_tracking.rb +64 -0
- data/lib/utility/extension_mapping_util.rb +123 -0
- data/lib/utility/logger.rb +58 -0
- data/lib/utility.rb +16 -0
- metadata +118 -5
data/lib/utility/cron.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
# taken from https://regex101.com/r/cU7zG2/1
|
13
|
+
# previous regexp allowed days of the week as [0-6], but it's not correct because the Kibana scheduler
|
14
|
+
# is using [1-7] for days of the week, aligned with the Quartz scheduler: see http://www.quartz-scheduler.org/documentation/2.4.0-SNAPSHOT/tutorials/tutorial-lesson-06.html
|
15
|
+
# But just replacing with [1-7] would also be incorrect, since according to the Cron spec, the days of the week
|
16
|
+
# are 1-6 for Monday-Saturday, and 0 or 7 for Sunday, 7 being a non-standard but still widely used. So, we need to
|
17
|
+
# allow for 0-7.
|
18
|
+
CRON_REGEXP = /^\s*($|#|\w+\s*=|(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?(?:,(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?)*)\s+(\?|\*|(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?(?:,(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?)*)\s+(\?|\*|(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?(?:,(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?)*|\?|\*|(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?(?:,(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?)*)\s+(\?|\*|(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?(?:,(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?)*|\?|\*|(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?(?:,(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?)*)(|\s)+(\?|\*|(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?(?:,(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?)*))$/
|
19
|
+
|
20
|
+
# see https://github.com/quartz-scheduler/quartz/blob/master/quartz-core/src/main/java/org/quartz/CronExpression.java
|
21
|
+
module Cron
|
22
|
+
def self.check(expr)
|
23
|
+
raise StandardError.new("Unsupported expression #{expr} with #") if expr.include?('#')
|
24
|
+
raise StandardError.new("Unsupported expression #{expr} with L") if expr.include?('L')
|
25
|
+
raise StandardError.new("Unsupported expression #{expr} with W") if expr.include?('W') && !expr.include?('WED')
|
26
|
+
|
27
|
+
expr
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.quartz_to_crontab(expression)
|
31
|
+
@seconds = '*'
|
32
|
+
@minutes = '*'
|
33
|
+
@hours = '*'
|
34
|
+
@day_of_month = '*'
|
35
|
+
@month = '*'
|
36
|
+
@day_of_week = '*'
|
37
|
+
@year = '*'
|
38
|
+
|
39
|
+
# ? is not supported
|
40
|
+
converted_expression = expression.tr('?', '*')
|
41
|
+
|
42
|
+
matched = false
|
43
|
+
converted_expression.match(CRON_REGEXP) { |m|
|
44
|
+
@seconds = m[2]
|
45
|
+
@minutes = m[3]
|
46
|
+
@hours = m[4]
|
47
|
+
@day_of_month = check(m[5])
|
48
|
+
@month = check(m[6])
|
49
|
+
@day_of_week = scheduler_dow_to_crontab(check(m[7])).to_s
|
50
|
+
@year = m[9]
|
51
|
+
matched = true
|
52
|
+
}
|
53
|
+
|
54
|
+
raise StandardError.new("Unknown format #{expression}") unless matched
|
55
|
+
|
56
|
+
# Unix cron has five: minute, hour, day, month, and dayofweek
|
57
|
+
# Quartz adds seconds and year
|
58
|
+
converted_expression = "#{@minutes} #{@hours} #{@day_of_month} #{@month} #{@day_of_week}"
|
59
|
+
|
60
|
+
Utility::Logger.debug("Converted Quartz Cron expression '#{expression}' to Standard Cron Expression '#{converted_expression}'")
|
61
|
+
|
62
|
+
converted_expression
|
63
|
+
end
|
64
|
+
|
65
|
+
# As described above, Quartz uses 1-7 for days of the week, starting with Sunday,
|
66
|
+
# while Unix cron uses 0-6, starting with Monday, and also 7 as an extra non-standard index for Sunday.
|
67
|
+
# (see https://en.wikipedia.org/wiki/Cron for more details)
|
68
|
+
# This means that we need to shift the Quartz day of week that are between 1 and 7 by minus one, but we also allow 0
|
69
|
+
# in case it's not a quartz expression but already the cron standard.
|
70
|
+
# See also the code in connectors-python that does the same thing: https://github.com/elastic/connectors-python/blob/main/connectors/quartz.py
|
71
|
+
def self.scheduler_dow_to_crontab(day)
|
72
|
+
unless /\d/.match?(day)
|
73
|
+
return day
|
74
|
+
end
|
75
|
+
if day.to_i <= 0
|
76
|
+
return day
|
77
|
+
end
|
78
|
+
day.to_i - 1
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'logger'
|
8
|
+
require 'utility/logger'
|
9
|
+
require 'active_support/core_ext/module'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
module Environment
|
13
|
+
def self.set_execution_environment(config, &block)
|
14
|
+
# Set UTC as the timezone
|
15
|
+
ENV['TZ'] = 'UTC'
|
16
|
+
Logger.level = config[:log_level]
|
17
|
+
es_config = config[:elasticsearch]
|
18
|
+
disable_warnings = if es_config.has_key?(:disable_warnings)
|
19
|
+
es_config[:disable_warnings]
|
20
|
+
else
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
if disable_warnings
|
25
|
+
Logger.info('Disabling warnings')
|
26
|
+
Kernel.silence_warnings(&block)
|
27
|
+
else
|
28
|
+
Logger.info('Enabling warnings')
|
29
|
+
Kernel.enable_warnings(&block)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'active_support/core_ext/string'
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class DocumentError
|
11
|
+
attr_accessor :error_class, :error_message, :stack_trace, :error_id
|
12
|
+
|
13
|
+
def initialize(error_class, error_message, stack_trace, error_id)
|
14
|
+
@error_class = error_class
|
15
|
+
@error_message = error_message
|
16
|
+
@error_id = error_id
|
17
|
+
|
18
|
+
# keywords must be < 32kb, UTF-8 chars can be up to 3 bytes, thus 32k/3 ~= 10k
|
19
|
+
# See https://github.com/elastic/workplace-search-team/issues/1723
|
20
|
+
@stack_trace = stack_trace.truncate(10_000)
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_h
|
24
|
+
{
|
25
|
+
'error_class' => error_class,
|
26
|
+
'error_message' => error_message,
|
27
|
+
'stack_trace' => stack_trace,
|
28
|
+
'error_id' => error_id
|
29
|
+
}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class ClientError < StandardError; end
|
34
|
+
class EvictionWithNoProgressError < StandardError; end
|
35
|
+
class EvictionError < StandardError
|
36
|
+
attr_accessor :cursors
|
37
|
+
|
38
|
+
def initialize(message = nil, cursors: nil)
|
39
|
+
super(message)
|
40
|
+
@cursors = cursors
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class SuspendedJobError < StandardError
|
45
|
+
attr_accessor :suspend_until, :cursors
|
46
|
+
|
47
|
+
def initialize(message = nil, suspend_until:, cursors: nil)
|
48
|
+
super(message)
|
49
|
+
@suspend_until = suspend_until
|
50
|
+
@cursors = cursors
|
51
|
+
end
|
52
|
+
end
|
53
|
+
class ThrottlingError < SuspendedJobError; end
|
54
|
+
class TransientServerError < SuspendedJobError; end
|
55
|
+
class UnrecoverableServerError < StandardError; end
|
56
|
+
class TransientSubextractorError < StandardError; end
|
57
|
+
class JobDocumentLimitError < StandardError; end
|
58
|
+
class JobClaimingError < StandardError; end
|
59
|
+
|
60
|
+
class MonitoringError < StandardError
|
61
|
+
attr_accessor :tripped_by
|
62
|
+
|
63
|
+
def initialize(message = nil, tripped_by: nil)
|
64
|
+
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
65
|
+
@tripped_by = tripped_by
|
66
|
+
end
|
67
|
+
end
|
68
|
+
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
69
|
+
class MaxErrorsExceededError < MonitoringError; end
|
70
|
+
class MaxErrorsInWindowExceededError < MonitoringError; end
|
71
|
+
|
72
|
+
class JobSyncNotPossibleYetError < StandardError
|
73
|
+
attr_accessor :sync_will_be_possible_at
|
74
|
+
|
75
|
+
def initialize(message = nil, sync_will_be_possible_at: nil)
|
76
|
+
human_readable_errors = []
|
77
|
+
|
78
|
+
human_readable_errors.push(message) unless message.nil?
|
79
|
+
human_readable_errors.push("Content source was created too recently to schedule jobs, next job scheduling is possible at #{sync_will_be_possible_at}.") unless sync_will_be_possible_at.nil?
|
80
|
+
|
81
|
+
super(human_readable_errors.join(' '))
|
82
|
+
end
|
83
|
+
end
|
84
|
+
class PlatinumLicenseRequiredError < StandardError; end
|
85
|
+
class JobInterruptedError < StandardError; end
|
86
|
+
class JobCannotBeUpdatedError < StandardError; end
|
87
|
+
class SecretInvalidError < StandardError; end
|
88
|
+
class InvalidIndexingConfigurationError < StandardError; end
|
89
|
+
class InvalidTokenError < StandardError; end
|
90
|
+
class TokenRefreshFailedError < StandardError; end
|
91
|
+
class ConnectorNotAvailableError < StandardError; end
|
92
|
+
|
93
|
+
# For when we want to explicitly set a #cause but can't
|
94
|
+
class ExplicitlyCausedError < StandardError
|
95
|
+
attr_reader :reason
|
96
|
+
|
97
|
+
def initialize(reason)
|
98
|
+
@reason = reason
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class PublishingFailedError < ExplicitlyCausedError; end
|
103
|
+
|
104
|
+
class Error
|
105
|
+
attr_reader :status_code, :code, :message
|
106
|
+
|
107
|
+
def initialize(status_code, code, message)
|
108
|
+
@status_code = status_code
|
109
|
+
@code = code
|
110
|
+
@message = message
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_h
|
114
|
+
{
|
115
|
+
'code' => @code,
|
116
|
+
'message' => @message
|
117
|
+
}
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
class HealthCheckFailedError < StandardError
|
122
|
+
def initialize(msg = nil)
|
123
|
+
super("Health check failed for 3rd-party service: #{msg}")
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
|
128
|
+
INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
|
129
|
+
UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
|
130
|
+
INVALID_ACCESS_TOKEN = Utility::Error.new(401, 'INVALID_ACCESS_TOKEN', 'Invalid/expired access token, please refresh the token')
|
131
|
+
TOKEN_REFRESH_ERROR = Utility::Error.new(401, 'TOKEN_REFRESH_ERROR', 'Failed to refresh token, please re-authenticate the application')
|
132
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'logger'
|
10
|
+
require 'elasticsearch'
|
11
|
+
|
12
|
+
module Utility
|
13
|
+
class EsClient < ::Elasticsearch::Client
|
14
|
+
class IndexingFailedError < StandardError
|
15
|
+
def initialize(message, error = nil)
|
16
|
+
super(message)
|
17
|
+
@cause = error
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :cause
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(es_config)
|
24
|
+
super(connection_configs(es_config))
|
25
|
+
end
|
26
|
+
|
27
|
+
def connection_configs(es_config)
|
28
|
+
configs = {}
|
29
|
+
configs[:api_key] = es_config[:api_key] if es_config[:api_key]
|
30
|
+
if es_config[:cloud_id]
|
31
|
+
configs[:cloud_id] = es_config[:cloud_id]
|
32
|
+
elsif es_config[:hosts]
|
33
|
+
configs[:hosts] = es_config[:hosts]
|
34
|
+
else
|
35
|
+
raise 'Either elasticsearch.cloud_id or elasticsearch.hosts should be configured.'
|
36
|
+
end
|
37
|
+
configs[:retry_on_failure] = es_config[:retry_on_failure] || false
|
38
|
+
configs[:request_timeout] = es_config[:request_timeout] || nil
|
39
|
+
configs[:log] = es_config[:log] || false
|
40
|
+
configs[:trace] = es_config[:trace] || false
|
41
|
+
|
42
|
+
# if log or trace is activated, we use the application logger
|
43
|
+
configs[:logger] = if configs[:log] || configs[:trace]
|
44
|
+
Utility::Logger.logger
|
45
|
+
else
|
46
|
+
# silence!
|
47
|
+
::Logger.new(IO::NULL)
|
48
|
+
end
|
49
|
+
configs
|
50
|
+
end
|
51
|
+
|
52
|
+
def bulk(arguments = {})
|
53
|
+
raise_if_necessary(super(arguments))
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def raise_if_necessary(response)
|
59
|
+
if response['errors']
|
60
|
+
first_error = nil
|
61
|
+
|
62
|
+
response['items'].each do |item|
|
63
|
+
%w[index delete].each do |op|
|
64
|
+
if item.has_key?(op) && item[op].has_key?('error')
|
65
|
+
first_error = item
|
66
|
+
|
67
|
+
break
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if first_error
|
73
|
+
trace_id = Utility::Logger.generate_trace_id
|
74
|
+
Utility::Logger.error("Failed to index documents into Elasticsearch. First error in response is: #{first_error.to_json}")
|
75
|
+
short_message = Utility::Logger.abbreviated_message(first_error.to_json)
|
76
|
+
raise IndexingFailedError.new("Failed to index documents into Elasticsearch with an error '#{short_message}'. Look up the error ID [#{trace_id}] in the application logs to see the full error message.")
|
77
|
+
else
|
78
|
+
raise IndexingFailedError.new('Failed to index documents into Elasticsearch due to unknown error. Try enabling tracing for Elasticsearch and checking the logs.')
|
79
|
+
end
|
80
|
+
end
|
81
|
+
response
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'bson'
|
10
|
+
require 'utility/logger'
|
11
|
+
|
12
|
+
module Utility
|
13
|
+
class ExceptionTracking
|
14
|
+
class << self
|
15
|
+
def capture_message(message, context = {})
|
16
|
+
Utility::Logger.error("Error: #{message}. Context: #{context.inspect}")
|
17
|
+
|
18
|
+
# When the method is called from a rescue block, our return value may leak outside of its
|
19
|
+
# intended scope, so let's explicitly return nil here to be safe.
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def capture_exception(exception, context = {})
|
24
|
+
Utility::Logger.error(generate_stack_trace(exception))
|
25
|
+
Utility::Logger.error("Context: #{context.inspect}") if context
|
26
|
+
end
|
27
|
+
|
28
|
+
def log_exception(exception, message = nil)
|
29
|
+
Utility::Logger.error(message) if message
|
30
|
+
Utility::Logger.error(generate_stack_trace(exception))
|
31
|
+
end
|
32
|
+
|
33
|
+
def augment_exception(exception)
|
34
|
+
unless exception.respond_to?(:id)
|
35
|
+
exception.instance_eval do
|
36
|
+
def id
|
37
|
+
@error_id ||= BSON::ObjectId.new.to_s
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def generate_error_message(exception, message, context)
|
44
|
+
context = { :message_id => exception.id }.merge(context || {}) if exception.respond_to?(:id)
|
45
|
+
context_message = context && "Context: #{context.inspect}"
|
46
|
+
['Exception', message, exception.class.to_s, exception.message, context_message]
|
47
|
+
.compact
|
48
|
+
.map { |part| part.to_s.dup.force_encoding('UTF-8') }
|
49
|
+
.join(': ')
|
50
|
+
end
|
51
|
+
|
52
|
+
def generate_stack_trace(exception)
|
53
|
+
full_message = exception.full_message
|
54
|
+
|
55
|
+
cause = exception
|
56
|
+
while cause.cause != cause && (cause = cause.cause)
|
57
|
+
full_message << "Cause:\n#{cause.full_message}"
|
58
|
+
end
|
59
|
+
|
60
|
+
full_message.dup.force_encoding('UTF-8')
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
class ExtensionMappingUtil
|
13
|
+
@extension_to_mime = {
|
14
|
+
:doc => %w[
|
15
|
+
application/x-tika-msoffice
|
16
|
+
application/msword
|
17
|
+
].freeze,
|
18
|
+
:docx => %w[
|
19
|
+
application/x-tika-ooxml
|
20
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
21
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.template
|
22
|
+
application/vnd.ms-word.template.macroenabled.12
|
23
|
+
application/vnd.ms-word.document.macroenabled.12
|
24
|
+
].freeze,
|
25
|
+
:html => %w[
|
26
|
+
text/html
|
27
|
+
application/xhtml+xml
|
28
|
+
].freeze,
|
29
|
+
:odt => %w[
|
30
|
+
application/x-vnd.oasis.opendocument.graphics-template
|
31
|
+
application/vnd.sun.xml.writer application/x-vnd.oasis.opendocument.text
|
32
|
+
application/x-vnd.oasis.opendocument.text-web
|
33
|
+
application/x-vnd.oasis.opendocument.spreadsheet-template
|
34
|
+
application/vnd.oasis.opendocument.formula-template
|
35
|
+
application/vnd.oasis.opendocument.presentation
|
36
|
+
application/vnd.oasis.opendocument.image-template
|
37
|
+
application/x-vnd.oasis.opendocument.graphics
|
38
|
+
application/vnd.oasis.opendocument.chart-template
|
39
|
+
application/vnd.oasis.opendocument.presentation-template
|
40
|
+
application/x-vnd.oasis.opendocument.image-template
|
41
|
+
application/vnd.oasis.opendocument.formula
|
42
|
+
application/x-vnd.oasis.opendocument.image
|
43
|
+
application/vnd.oasis.opendocument.spreadsheet-template
|
44
|
+
application/x-vnd.oasis.opendocument.chart-template
|
45
|
+
application/x-vnd.oasis.opendocument.formula
|
46
|
+
application/vnd.oasis.opendocument.spreadsheet
|
47
|
+
application/vnd.oasis.opendocument.text-web
|
48
|
+
application/vnd.oasis.opendocument.text-template
|
49
|
+
application/vnd.oasis.opendocument.text
|
50
|
+
application/x-vnd.oasis.opendocument.formula-template
|
51
|
+
application/x-vnd.oasis.opendocument.spreadsheet
|
52
|
+
application/x-vnd.oasis.opendocument.chart
|
53
|
+
application/vnd.oasis.opendocument.text-master
|
54
|
+
application/x-vnd.oasis.opendocument.text-master
|
55
|
+
application/x-vnd.oasis.opendocument.text-template
|
56
|
+
application/vnd.oasis.opendocument.graphics
|
57
|
+
application/vnd.oasis.opendocument.graphics-template
|
58
|
+
application/x-vnd.oasis.opendocument.presentation
|
59
|
+
application/vnd.oasis.opendocument.image
|
60
|
+
application/x-vnd.oasis.opendocument.presentation-template
|
61
|
+
application/vnd.oasis.opendocument.chart
|
62
|
+
].freeze,
|
63
|
+
:one => %w[
|
64
|
+
application/onenote
|
65
|
+
application/msonenote
|
66
|
+
].freeze,
|
67
|
+
:pdf => %w[
|
68
|
+
application/pdf
|
69
|
+
].freeze,
|
70
|
+
:ppt => %w[
|
71
|
+
application/vnd.ms-powerpoint
|
72
|
+
].freeze,
|
73
|
+
:pptx => %w[
|
74
|
+
application/vnd.openxmlformats-officedocument.presentationml.presentation
|
75
|
+
application/vnd.ms-powerpoint.presentation.macroenabled.12
|
76
|
+
application/vnd.openxmlformats-officedocument.presentationml.template
|
77
|
+
application/vnd.ms-powerpoint.slideshow.macroenabled.12
|
78
|
+
application/vnd.ms-powerpoint.addin.macroenabled.12
|
79
|
+
application/vnd.openxmlformats-officedocument.presentationml.slideshow
|
80
|
+
].freeze,
|
81
|
+
:rtf => %w[
|
82
|
+
message/richtext
|
83
|
+
text/richtext
|
84
|
+
text/rtf
|
85
|
+
application/rtf
|
86
|
+
].freeze,
|
87
|
+
:txt => %w[
|
88
|
+
text/plain
|
89
|
+
].freeze,
|
90
|
+
:xls => %w[
|
91
|
+
application/x-tika-msoffice
|
92
|
+
application/vnd.ms-excel
|
93
|
+
application/vnd.ms-excel.sheet.3
|
94
|
+
application/vnd.ms-excel.sheet.2
|
95
|
+
application/vnd.ms-excel.workspace.3
|
96
|
+
application/vnd.ms-excel.workspace.4
|
97
|
+
application/vnd.ms-excel.sheet.4
|
98
|
+
].freeze,
|
99
|
+
:xlsx => %w[
|
100
|
+
application/x-tika-ooxml
|
101
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
102
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.template
|
103
|
+
application/vnd.ms-excel.addin.macroenabled.12
|
104
|
+
application/vnd.ms-excel.template.macroenabled.12
|
105
|
+
application/vnd.ms-excel.sheet.macroenabled.12
|
106
|
+
].freeze
|
107
|
+
}.with_indifferent_access.freeze
|
108
|
+
|
109
|
+
def self.mime_to_extension
|
110
|
+
@mime_to_extension ||= @extension_to_mime.each_with_object({}) do |(key, values), memo|
|
111
|
+
values.each { |value| memo[value] = key.to_s }
|
112
|
+
end.with_indifferent_access.freeze
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.get_extension(mime_type)
|
116
|
+
mime_to_extension[mime_type.to_s.downcase]
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.get_mime_types(extension)
|
120
|
+
@extension_to_mime[extension.to_s.downcase]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'logger'
|
8
|
+
require 'active_support/core_ext/module'
|
9
|
+
require 'active_support/core_ext/string/filters'
|
10
|
+
require 'ecs_logging/logger'
|
11
|
+
|
12
|
+
module Utility
|
13
|
+
class Logger
|
14
|
+
SUPPORTED_LOG_LEVELS = %i[fatal error warn info debug].freeze
|
15
|
+
MAX_SHORT_MESSAGE_LENGTH = 1000.freeze
|
16
|
+
|
17
|
+
class << self
|
18
|
+
|
19
|
+
delegate :formatter, :formatter=, :to => :logger
|
20
|
+
|
21
|
+
def level=(log_level)
|
22
|
+
logger.level = log_level
|
23
|
+
end
|
24
|
+
|
25
|
+
def logger
|
26
|
+
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
|
+
end
|
28
|
+
|
29
|
+
SUPPORTED_LOG_LEVELS.each do |level|
|
30
|
+
define_method(level) do |message|
|
31
|
+
if logger.is_a?(EcsLogging::Logger)
|
32
|
+
logger.public_send(level, message, service: { name: 'connectors-ruby' })
|
33
|
+
else
|
34
|
+
logger.public_send(level, message)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def error_with_backtrace(message: nil, exception: nil, prog_name: nil)
|
40
|
+
logger.error(prog_name) { message } if message
|
41
|
+
logger.error exception.message if exception
|
42
|
+
logger.error exception.backtrace.join("\n") if exception
|
43
|
+
end
|
44
|
+
|
45
|
+
def new_line
|
46
|
+
logger.info("\n")
|
47
|
+
end
|
48
|
+
|
49
|
+
def generate_trace_id
|
50
|
+
SecureRandom.uuid
|
51
|
+
end
|
52
|
+
|
53
|
+
def abbreviated_message(message)
|
54
|
+
message.gsub(/\s+/, ' ').strip.truncate(MAX_SHORT_MESSAGE_LENGTH)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/utility.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'utility/constants'
|
8
|
+
require 'utility/cron'
|
9
|
+
require 'utility/errors'
|
10
|
+
require 'utility/es_client'
|
11
|
+
require 'utility/environment'
|
12
|
+
require 'utility/exception_tracking'
|
13
|
+
require 'utility/extension_mapping_util'
|
14
|
+
require 'utility/logger'
|
15
|
+
require 'utility/elasticsearch/index/mappings'
|
16
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|