connectors_utility 8.4.0.1 → 8.5.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/connectors/connector_status.rb +31 -0
- data/lib/connectors/crawler/scheduler.rb +32 -0
- data/lib/connectors/sync_status.rb +21 -0
- data/lib/connectors_utility.rb +8 -2
- data/lib/core/connector_settings.rb +142 -0
- data/lib/core/elastic_connector_actions.rb +269 -0
- data/lib/core/scheduler.rb +160 -0
- data/lib/utility/constants.rb +20 -0
- data/lib/utility/cron.rb +81 -0
- data/lib/utility/environment.rb +33 -0
- data/lib/utility/errors.rb +132 -0
- data/lib/utility/es_client.rb +84 -0
- data/lib/utility/exception_tracking.rb +64 -0
- data/lib/utility/extension_mapping_util.rb +123 -0
- data/lib/utility/logger.rb +84 -0
- data/lib/utility.rb +16 -0
- metadata +107 -8
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class Constants
|
11
|
+
THUMBNAIL_FIELDS = %w[_thumbnail_80x100 _thumbnail_310x430].freeze
|
12
|
+
SUBEXTRACTOR_RESERVED_FIELDS = %w[_subextracted_as_of _subextracted_version].freeze
|
13
|
+
ALLOW_FIELD = '_allow_permissions'
|
14
|
+
DENY_FIELD = '_deny_permissions'
|
15
|
+
CONNECTORS_INDEX = '.elastic-connectors'
|
16
|
+
JOB_INDEX = '.elastic-connectors-sync-jobs'
|
17
|
+
CONTENT_INDEX_PREFIX = 'search-'
|
18
|
+
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
|
+
end
|
20
|
+
end
|
data/lib/utility/cron.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
# taken from https://regex101.com/r/cU7zG2/1
|
13
|
+
# previous regexp allowed days of the week as [0-6], but it's not correct because the Kibana scheduler
|
14
|
+
# is using [1-7] for days of the week, aligned with the Quartz scheduler: see http://www.quartz-scheduler.org/documentation/2.4.0-SNAPSHOT/tutorials/tutorial-lesson-06.html
|
15
|
+
# But just replacing with [1-7] would also be incorrect, since according to the Cron spec, the days of the week
|
16
|
+
# are 1-6 for Monday-Saturday, and 0 or 7 for Sunday, 7 being a non-standard but still widely used. So, we need to
|
17
|
+
# allow for 0-7.
|
18
|
+
CRON_REGEXP = /^\s*($|#|\w+\s*=|(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?(?:,(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?)*)\s+(\?|\*|(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?(?:,(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?)*)\s+(\?|\*|(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?(?:,(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?)*|\?|\*|(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?(?:,(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?)*)\s+(\?|\*|(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?(?:,(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?)*|\?|\*|(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?(?:,(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?)*)(|\s)+(\?|\*|(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?(?:,(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?)*))$/
|
19
|
+
|
20
|
+
# see https://github.com/quartz-scheduler/quartz/blob/master/quartz-core/src/main/java/org/quartz/CronExpression.java
|
21
|
+
module Cron
|
22
|
+
def self.check(expr)
|
23
|
+
raise StandardError.new("Unsupported expression #{expr} with #") if expr.include?('#')
|
24
|
+
raise StandardError.new("Unsupported expression #{expr} with L") if expr.include?('L')
|
25
|
+
raise StandardError.new("Unsupported expression #{expr} with W") if expr.include?('W') && !expr.include?('WED')
|
26
|
+
|
27
|
+
expr
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.quartz_to_crontab(expression)
|
31
|
+
@seconds = '*'
|
32
|
+
@minutes = '*'
|
33
|
+
@hours = '*'
|
34
|
+
@day_of_month = '*'
|
35
|
+
@month = '*'
|
36
|
+
@day_of_week = '*'
|
37
|
+
@year = '*'
|
38
|
+
|
39
|
+
# ? is not supported
|
40
|
+
converted_expression = expression.tr('?', '*')
|
41
|
+
|
42
|
+
matched = false
|
43
|
+
converted_expression.match(CRON_REGEXP) { |m|
|
44
|
+
@seconds = m[2]
|
45
|
+
@minutes = m[3]
|
46
|
+
@hours = m[4]
|
47
|
+
@day_of_month = check(m[5])
|
48
|
+
@month = check(m[6])
|
49
|
+
@day_of_week = scheduler_dow_to_crontab(check(m[7])).to_s
|
50
|
+
@year = m[9]
|
51
|
+
matched = true
|
52
|
+
}
|
53
|
+
|
54
|
+
raise StandardError.new("Unknown format #{expression}") unless matched
|
55
|
+
|
56
|
+
# Unix cron has five: minute, hour, day, month, and dayofweek
|
57
|
+
# Quartz adds seconds and year
|
58
|
+
converted_expression = "#{@minutes} #{@hours} #{@day_of_month} #{@month} #{@day_of_week}"
|
59
|
+
|
60
|
+
Utility::Logger.debug("Converted Quartz Cron expression '#{expression}' to Standard Cron Expression '#{converted_expression}'")
|
61
|
+
|
62
|
+
converted_expression
|
63
|
+
end
|
64
|
+
|
65
|
+
# As described above, Quartz uses 1-7 for days of the week, starting with Sunday,
|
66
|
+
# while Unix cron uses 0-6, starting with Monday, and also 7 as an extra non-standard index for Sunday.
|
67
|
+
# (see https://en.wikipedia.org/wiki/Cron for more details)
|
68
|
+
# This means that we need to shift the Quartz day of week that are between 1 and 7 by minus one, but we also allow 0
|
69
|
+
# in case it's not a quartz expression but already the cron standard.
|
70
|
+
# See also the code in connectors-python that does the same thing: https://github.com/elastic/connectors-python/blob/main/connectors/quartz.py
|
71
|
+
def self.scheduler_dow_to_crontab(day)
|
72
|
+
unless /\d/.match?(day)
|
73
|
+
return day
|
74
|
+
end
|
75
|
+
if day.to_i <= 0
|
76
|
+
return day
|
77
|
+
end
|
78
|
+
day.to_i - 1
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'logger'
|
8
|
+
require 'utility/logger'
|
9
|
+
require 'active_support/core_ext/module'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
module Environment
|
13
|
+
def self.set_execution_environment(config, &block)
|
14
|
+
# Set UTC as the timezone
|
15
|
+
ENV['TZ'] = 'UTC'
|
16
|
+
Logger.level = config[:log_level]
|
17
|
+
es_config = config[:elasticsearch]
|
18
|
+
disable_warnings = if es_config.has_key?(:disable_warnings)
|
19
|
+
es_config[:disable_warnings]
|
20
|
+
else
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
if disable_warnings
|
25
|
+
Logger.info('Disabling warnings')
|
26
|
+
Kernel.silence_warnings(&block)
|
27
|
+
else
|
28
|
+
Logger.info('Enabling warnings')
|
29
|
+
Kernel.enable_warnings(&block)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'active_support/core_ext/string'
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
class DocumentError
|
11
|
+
attr_accessor :error_class, :error_message, :stack_trace, :error_id
|
12
|
+
|
13
|
+
def initialize(error_class, error_message, stack_trace, error_id)
|
14
|
+
@error_class = error_class
|
15
|
+
@error_message = error_message
|
16
|
+
@error_id = error_id
|
17
|
+
|
18
|
+
# keywords must be < 32kb, UTF-8 chars can be up to 3 bytes, thus 32k/3 ~= 10k
|
19
|
+
# See https://github.com/elastic/workplace-search-team/issues/1723
|
20
|
+
@stack_trace = stack_trace.truncate(10_000)
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_h
|
24
|
+
{
|
25
|
+
'error_class' => error_class,
|
26
|
+
'error_message' => error_message,
|
27
|
+
'stack_trace' => stack_trace,
|
28
|
+
'error_id' => error_id
|
29
|
+
}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class ClientError < StandardError; end
|
34
|
+
class EvictionWithNoProgressError < StandardError; end
|
35
|
+
class EvictionError < StandardError
|
36
|
+
attr_accessor :cursors
|
37
|
+
|
38
|
+
def initialize(message = nil, cursors: nil)
|
39
|
+
super(message)
|
40
|
+
@cursors = cursors
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class SuspendedJobError < StandardError
|
45
|
+
attr_accessor :suspend_until, :cursors
|
46
|
+
|
47
|
+
def initialize(message = nil, suspend_until:, cursors: nil)
|
48
|
+
super(message)
|
49
|
+
@suspend_until = suspend_until
|
50
|
+
@cursors = cursors
|
51
|
+
end
|
52
|
+
end
|
53
|
+
class ThrottlingError < SuspendedJobError; end
|
54
|
+
class TransientServerError < SuspendedJobError; end
|
55
|
+
class UnrecoverableServerError < StandardError; end
|
56
|
+
class TransientSubextractorError < StandardError; end
|
57
|
+
class JobDocumentLimitError < StandardError; end
|
58
|
+
class JobClaimingError < StandardError; end
|
59
|
+
|
60
|
+
class MonitoringError < StandardError
|
61
|
+
attr_accessor :tripped_by
|
62
|
+
|
63
|
+
def initialize(message = nil, tripped_by: nil)
|
64
|
+
super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
|
65
|
+
@tripped_by = tripped_by
|
66
|
+
end
|
67
|
+
end
|
68
|
+
class MaxSuccessiveErrorsExceededError < MonitoringError; end
|
69
|
+
class MaxErrorsExceededError < MonitoringError; end
|
70
|
+
class MaxErrorsInWindowExceededError < MonitoringError; end
|
71
|
+
|
72
|
+
class JobSyncNotPossibleYetError < StandardError
|
73
|
+
attr_accessor :sync_will_be_possible_at
|
74
|
+
|
75
|
+
def initialize(message = nil, sync_will_be_possible_at: nil)
|
76
|
+
human_readable_errors = []
|
77
|
+
|
78
|
+
human_readable_errors.push(message) unless message.nil?
|
79
|
+
human_readable_errors.push("Content source was created too recently to schedule jobs, next job scheduling is possible at #{sync_will_be_possible_at}.") unless sync_will_be_possible_at.nil?
|
80
|
+
|
81
|
+
super(human_readable_errors.join(' '))
|
82
|
+
end
|
83
|
+
end
|
84
|
+
class PlatinumLicenseRequiredError < StandardError; end
|
85
|
+
class JobInterruptedError < StandardError; end
|
86
|
+
class JobCannotBeUpdatedError < StandardError; end
|
87
|
+
class SecretInvalidError < StandardError; end
|
88
|
+
class InvalidIndexingConfigurationError < StandardError; end
|
89
|
+
class InvalidTokenError < StandardError; end
|
90
|
+
class TokenRefreshFailedError < StandardError; end
|
91
|
+
class ConnectorNotAvailableError < StandardError; end
|
92
|
+
|
93
|
+
# For when we want to explicitly set a #cause but can't
|
94
|
+
class ExplicitlyCausedError < StandardError
|
95
|
+
attr_reader :reason
|
96
|
+
|
97
|
+
def initialize(reason)
|
98
|
+
@reason = reason
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class PublishingFailedError < ExplicitlyCausedError; end
|
103
|
+
|
104
|
+
class Error
|
105
|
+
attr_reader :status_code, :code, :message
|
106
|
+
|
107
|
+
def initialize(status_code, code, message)
|
108
|
+
@status_code = status_code
|
109
|
+
@code = code
|
110
|
+
@message = message
|
111
|
+
end
|
112
|
+
|
113
|
+
def to_h
|
114
|
+
{
|
115
|
+
'code' => @code,
|
116
|
+
'message' => @message
|
117
|
+
}
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
class HealthCheckFailedError < StandardError
|
122
|
+
def initialize(msg = nil)
|
123
|
+
super("Health check failed for 3rd-party service: #{msg}")
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
|
128
|
+
INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
|
129
|
+
UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
|
130
|
+
INVALID_ACCESS_TOKEN = Utility::Error.new(401, 'INVALID_ACCESS_TOKEN', 'Invalid/expired access token, please refresh the token')
|
131
|
+
TOKEN_REFRESH_ERROR = Utility::Error.new(401, 'TOKEN_REFRESH_ERROR', 'Failed to refresh token, please re-authenticate the application')
|
132
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'logger'
|
10
|
+
require 'elasticsearch'
|
11
|
+
|
12
|
+
module Utility
|
13
|
+
class EsClient < ::Elasticsearch::Client
|
14
|
+
class IndexingFailedError < StandardError
|
15
|
+
def initialize(message, error = nil)
|
16
|
+
super(message)
|
17
|
+
@cause = error
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :cause
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(es_config)
|
24
|
+
super(connection_configs(es_config))
|
25
|
+
end
|
26
|
+
|
27
|
+
def connection_configs(es_config)
|
28
|
+
configs = {}
|
29
|
+
configs[:api_key] = es_config[:api_key] if es_config[:api_key]
|
30
|
+
if es_config[:cloud_id]
|
31
|
+
configs[:cloud_id] = es_config[:cloud_id]
|
32
|
+
elsif es_config[:hosts]
|
33
|
+
configs[:hosts] = es_config[:hosts]
|
34
|
+
else
|
35
|
+
raise 'Either elasticsearch.cloud_id or elasticsearch.hosts should be configured.'
|
36
|
+
end
|
37
|
+
configs[:retry_on_failure] = es_config[:retry_on_failure] || false
|
38
|
+
configs[:request_timeout] = es_config[:request_timeout] || nil
|
39
|
+
configs[:log] = es_config[:log] || false
|
40
|
+
configs[:trace] = es_config[:trace] || false
|
41
|
+
|
42
|
+
# if log or trace is activated, we use the application logger
|
43
|
+
configs[:logger] = if configs[:log] || configs[:trace]
|
44
|
+
Utility::Logger.logger
|
45
|
+
else
|
46
|
+
# silence!
|
47
|
+
::Logger.new(IO::NULL)
|
48
|
+
end
|
49
|
+
configs
|
50
|
+
end
|
51
|
+
|
52
|
+
def bulk(arguments = {})
|
53
|
+
raise_if_necessary(super(arguments))
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def raise_if_necessary(response)
|
59
|
+
if response['errors']
|
60
|
+
first_error = nil
|
61
|
+
|
62
|
+
response['items'].each do |item|
|
63
|
+
%w[index delete].each do |op|
|
64
|
+
if item.has_key?(op) && item[op].has_key?('error')
|
65
|
+
first_error = item
|
66
|
+
|
67
|
+
break
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if first_error
|
73
|
+
trace_id = Utility::Logger.generate_trace_id
|
74
|
+
Utility::Logger.error("Failed to index documents into Elasticsearch. First error in response is: #{first_error.to_json}")
|
75
|
+
short_message = Utility::Logger.abbreviated_message(first_error.to_json)
|
76
|
+
raise IndexingFailedError.new("Failed to index documents into Elasticsearch with an error '#{short_message}'. Look up the error ID [#{trace_id}] in the application logs to see the full error message.")
|
77
|
+
else
|
78
|
+
raise IndexingFailedError.new('Failed to index documents into Elasticsearch due to unknown error. Try enabling tracing for Elasticsearch and checking the logs.')
|
79
|
+
end
|
80
|
+
end
|
81
|
+
response
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'bson'
|
10
|
+
require 'utility/logger'
|
11
|
+
|
12
|
+
module Utility
|
13
|
+
class ExceptionTracking
|
14
|
+
class << self
|
15
|
+
def capture_message(message, context = {})
|
16
|
+
Utility::Logger.error("Error: #{message}. Context: #{context.inspect}")
|
17
|
+
|
18
|
+
# When the method is called from a rescue block, our return value may leak outside of its
|
19
|
+
# intended scope, so let's explicitly return nil here to be safe.
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def capture_exception(exception, context = {})
|
24
|
+
Utility::Logger.log_stacktrace(generate_stack_trace(exception))
|
25
|
+
Utility::Logger.error("Context: #{context.inspect}") if context
|
26
|
+
end
|
27
|
+
|
28
|
+
def log_exception(exception, message = nil)
|
29
|
+
Utility::Logger.error(message) if message
|
30
|
+
Utility::Logger.log_stacktrace(generate_stack_trace(exception))
|
31
|
+
end
|
32
|
+
|
33
|
+
def augment_exception(exception)
|
34
|
+
unless exception.respond_to?(:id)
|
35
|
+
exception.instance_eval do
|
36
|
+
def id
|
37
|
+
@error_id ||= BSON::ObjectId.new.to_s
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def generate_error_message(exception, message, context)
|
44
|
+
context = { :message_id => exception.id }.merge(context || {}) if exception.respond_to?(:id)
|
45
|
+
context_message = context && "Context: #{context.inspect}"
|
46
|
+
['Exception', message, exception.class.to_s, exception.message, context_message]
|
47
|
+
.compact
|
48
|
+
.map { |part| part.to_s.dup.force_encoding('UTF-8') }
|
49
|
+
.join(': ')
|
50
|
+
end
|
51
|
+
|
52
|
+
def generate_stack_trace(exception)
|
53
|
+
full_message = exception.full_message
|
54
|
+
|
55
|
+
cause = exception
|
56
|
+
while cause.cause != cause && (cause = cause.cause)
|
57
|
+
full_message << "Cause:\n#{cause.full_message}"
|
58
|
+
end
|
59
|
+
|
60
|
+
full_message.dup.force_encoding('UTF-8')
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
class ExtensionMappingUtil
|
13
|
+
@extension_to_mime = {
|
14
|
+
:doc => %w[
|
15
|
+
application/x-tika-msoffice
|
16
|
+
application/msword
|
17
|
+
].freeze,
|
18
|
+
:docx => %w[
|
19
|
+
application/x-tika-ooxml
|
20
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
21
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.template
|
22
|
+
application/vnd.ms-word.template.macroenabled.12
|
23
|
+
application/vnd.ms-word.document.macroenabled.12
|
24
|
+
].freeze,
|
25
|
+
:html => %w[
|
26
|
+
text/html
|
27
|
+
application/xhtml+xml
|
28
|
+
].freeze,
|
29
|
+
:odt => %w[
|
30
|
+
application/x-vnd.oasis.opendocument.graphics-template
|
31
|
+
application/vnd.sun.xml.writer application/x-vnd.oasis.opendocument.text
|
32
|
+
application/x-vnd.oasis.opendocument.text-web
|
33
|
+
application/x-vnd.oasis.opendocument.spreadsheet-template
|
34
|
+
application/vnd.oasis.opendocument.formula-template
|
35
|
+
application/vnd.oasis.opendocument.presentation
|
36
|
+
application/vnd.oasis.opendocument.image-template
|
37
|
+
application/x-vnd.oasis.opendocument.graphics
|
38
|
+
application/vnd.oasis.opendocument.chart-template
|
39
|
+
application/vnd.oasis.opendocument.presentation-template
|
40
|
+
application/x-vnd.oasis.opendocument.image-template
|
41
|
+
application/vnd.oasis.opendocument.formula
|
42
|
+
application/x-vnd.oasis.opendocument.image
|
43
|
+
application/vnd.oasis.opendocument.spreadsheet-template
|
44
|
+
application/x-vnd.oasis.opendocument.chart-template
|
45
|
+
application/x-vnd.oasis.opendocument.formula
|
46
|
+
application/vnd.oasis.opendocument.spreadsheet
|
47
|
+
application/vnd.oasis.opendocument.text-web
|
48
|
+
application/vnd.oasis.opendocument.text-template
|
49
|
+
application/vnd.oasis.opendocument.text
|
50
|
+
application/x-vnd.oasis.opendocument.formula-template
|
51
|
+
application/x-vnd.oasis.opendocument.spreadsheet
|
52
|
+
application/x-vnd.oasis.opendocument.chart
|
53
|
+
application/vnd.oasis.opendocument.text-master
|
54
|
+
application/x-vnd.oasis.opendocument.text-master
|
55
|
+
application/x-vnd.oasis.opendocument.text-template
|
56
|
+
application/vnd.oasis.opendocument.graphics
|
57
|
+
application/vnd.oasis.opendocument.graphics-template
|
58
|
+
application/x-vnd.oasis.opendocument.presentation
|
59
|
+
application/vnd.oasis.opendocument.image
|
60
|
+
application/x-vnd.oasis.opendocument.presentation-template
|
61
|
+
application/vnd.oasis.opendocument.chart
|
62
|
+
].freeze,
|
63
|
+
:one => %w[
|
64
|
+
application/onenote
|
65
|
+
application/msonenote
|
66
|
+
].freeze,
|
67
|
+
:pdf => %w[
|
68
|
+
application/pdf
|
69
|
+
].freeze,
|
70
|
+
:ppt => %w[
|
71
|
+
application/vnd.ms-powerpoint
|
72
|
+
].freeze,
|
73
|
+
:pptx => %w[
|
74
|
+
application/vnd.openxmlformats-officedocument.presentationml.presentation
|
75
|
+
application/vnd.ms-powerpoint.presentation.macroenabled.12
|
76
|
+
application/vnd.openxmlformats-officedocument.presentationml.template
|
77
|
+
application/vnd.ms-powerpoint.slideshow.macroenabled.12
|
78
|
+
application/vnd.ms-powerpoint.addin.macroenabled.12
|
79
|
+
application/vnd.openxmlformats-officedocument.presentationml.slideshow
|
80
|
+
].freeze,
|
81
|
+
:rtf => %w[
|
82
|
+
message/richtext
|
83
|
+
text/richtext
|
84
|
+
text/rtf
|
85
|
+
application/rtf
|
86
|
+
].freeze,
|
87
|
+
:txt => %w[
|
88
|
+
text/plain
|
89
|
+
].freeze,
|
90
|
+
:xls => %w[
|
91
|
+
application/x-tika-msoffice
|
92
|
+
application/vnd.ms-excel
|
93
|
+
application/vnd.ms-excel.sheet.3
|
94
|
+
application/vnd.ms-excel.sheet.2
|
95
|
+
application/vnd.ms-excel.workspace.3
|
96
|
+
application/vnd.ms-excel.workspace.4
|
97
|
+
application/vnd.ms-excel.sheet.4
|
98
|
+
].freeze,
|
99
|
+
:xlsx => %w[
|
100
|
+
application/x-tika-ooxml
|
101
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
102
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.template
|
103
|
+
application/vnd.ms-excel.addin.macroenabled.12
|
104
|
+
application/vnd.ms-excel.template.macroenabled.12
|
105
|
+
application/vnd.ms-excel.sheet.macroenabled.12
|
106
|
+
].freeze
|
107
|
+
}.with_indifferent_access.freeze
|
108
|
+
|
109
|
+
def self.mime_to_extension
|
110
|
+
@mime_to_extension ||= @extension_to_mime.each_with_object({}) do |(key, values), memo|
|
111
|
+
values.each { |value| memo[value] = key.to_s }
|
112
|
+
end.with_indifferent_access.freeze
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.get_extension(mime_type)
|
116
|
+
mime_to_extension[mime_type.to_s.downcase]
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.get_mime_types(extension)
|
120
|
+
@extension_to_mime[extension.to_s.downcase]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'logger'
|
8
|
+
require 'active_support/core_ext/module'
|
9
|
+
require 'active_support/core_ext/string/filters'
|
10
|
+
require 'ecs_logging/logger'
|
11
|
+
|
12
|
+
module Utility
|
13
|
+
class Logger
|
14
|
+
SUPPORTED_LOG_LEVELS = %i[fatal error warn info debug].freeze
|
15
|
+
MAX_SHORT_MESSAGE_LENGTH = 1000.freeze
|
16
|
+
|
17
|
+
class << self
|
18
|
+
|
19
|
+
delegate :formatter, :formatter=, :to => :logger
|
20
|
+
|
21
|
+
def level=(log_level)
|
22
|
+
logger.level = log_level
|
23
|
+
end
|
24
|
+
|
25
|
+
def logger
|
26
|
+
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
|
+
end
|
28
|
+
|
29
|
+
SUPPORTED_LOG_LEVELS.each do |level|
|
30
|
+
define_method(level) do |message|
|
31
|
+
if logger.is_a?(EcsLogging::Logger)
|
32
|
+
logger.public_send(level, message, extra_ecs_fields)
|
33
|
+
else
|
34
|
+
logger.public_send(level, message)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def log_stacktrace(stacktrace)
|
40
|
+
if logger.is_a?(EcsLogging::Logger)
|
41
|
+
logger.error(nil, extra_ecs_fields.merge(:error => { :stack_trace => stacktrace }))
|
42
|
+
else
|
43
|
+
logger.error(stacktrace)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def error_with_backtrace(message: nil, exception: nil, prog_name: nil)
|
48
|
+
logger.error(prog_name) { message } if message
|
49
|
+
logger.error exception.message if exception
|
50
|
+
logger.error exception.backtrace.join("\n") if exception
|
51
|
+
end
|
52
|
+
|
53
|
+
def new_line
|
54
|
+
logger.info("\n")
|
55
|
+
end
|
56
|
+
|
57
|
+
def generate_trace_id
|
58
|
+
SecureRandom.uuid
|
59
|
+
end
|
60
|
+
|
61
|
+
def abbreviated_message(message)
|
62
|
+
message.gsub(/\s+/, ' ').strip.truncate(MAX_SHORT_MESSAGE_LENGTH)
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def extra_ecs_fields
|
68
|
+
{
|
69
|
+
:labels => { :index_date => Time.now.strftime('%Y.%m.%d') },
|
70
|
+
:log => { :logger => logger.progname },
|
71
|
+
:service => {
|
72
|
+
:type => 'connectors-ruby',
|
73
|
+
:version => Settings.version
|
74
|
+
},
|
75
|
+
:process => {
|
76
|
+
:pid => Process.pid,
|
77
|
+
:name => $PROGRAM_NAME,
|
78
|
+
:thread => Thread.current.object_id
|
79
|
+
}
|
80
|
+
}
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
data/lib/utility.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'utility/constants'
|
8
|
+
require 'utility/cron'
|
9
|
+
require 'utility/errors'
|
10
|
+
require 'utility/es_client'
|
11
|
+
require 'utility/environment'
|
12
|
+
require 'utility/exception_tracking'
|
13
|
+
require 'utility/extension_mapping_util'
|
14
|
+
require 'utility/logger'
|
15
|
+
require 'utility/elasticsearch/index/mappings'
|
16
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|