connectors_utility 8.4.0.0 → 8.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'utility/logger'
10
+
11
+ module Utility
12
+ # taken from https://regex101.com/r/cU7zG2/1
13
+ # previous regexp allowed days of the week as [0-6], but it's not correct because the Kibana scheduler
14
+ # is using [1-7] for days of the week, aligned with the Quartz scheduler: see http://www.quartz-scheduler.org/documentation/2.4.0-SNAPSHOT/tutorials/tutorial-lesson-06.html
15
+ # But just replacing with [1-7] would also be incorrect, since according to the Cron spec, the days of the week
16
+ # are 1-6 for Monday-Saturday, and 0 or 7 for Sunday, 7 being a non-standard but still widely used. So, we need to
17
+ # allow for 0-7.
18
+ CRON_REGEXP = /^\s*($|#|\w+\s*=|(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?(?:,(?:[0-5]?\d)(?:(?:-|\/|,)(?:[0-5]?\d))?)*)\s+(\?|\*|(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?(?:,(?:[01]?\d|2[0-3])(?:(?:-|\/|,)(?:[01]?\d|2[0-3]))?)*)\s+(\?|\*|(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?(?:,(?:0?[1-9]|[12]\d|3[01])(?:(?:-|\/|,)(?:0?[1-9]|[12]\d|3[01]))?)*)\s+(\?|\*|(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?(?:,(?:[1-9]|1[012])(?:(?:-|\/|,)(?:[1-9]|1[012]))?(?:L|W)?)*|\?|\*|(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?(?:,(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(?:(?:-)(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC))?)*)\s+(\?|\*|(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?(?:,(?:[0-7])(?:(?:-|\/|,|#)(?:[0-7]))?(?:L)?)*|\?|\*|(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?(?:,(?:MON|TUE|WED|THU|FRI|SAT|SUN)(?:(?:-)(?:MON|TUE|WED|THU|FRI|SAT|SUN))?)*)(|\s)+(\?|\*|(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?(?:,(?:|\d{4})(?:(?:-|\/|,)(?:|\d{4}))?)*))$/
19
+
20
+ # see https://github.com/quartz-scheduler/quartz/blob/master/quartz-core/src/main/java/org/quartz/CronExpression.java
21
+ module Cron
22
+ def self.check(expr)
23
+ raise StandardError.new("Unsupported expression #{expr} with #") if expr.include?('#')
24
+ raise StandardError.new("Unsupported expression #{expr} with L") if expr.include?('L')
25
+ raise StandardError.new("Unsupported expression #{expr} with W") if expr.include?('W') && !expr.include?('WED')
26
+
27
+ expr
28
+ end
29
+
30
+ def self.quartz_to_crontab(expression)
31
+ @seconds = '*'
32
+ @minutes = '*'
33
+ @hours = '*'
34
+ @day_of_month = '*'
35
+ @month = '*'
36
+ @day_of_week = '*'
37
+ @year = '*'
38
+
39
+ # ? is not supported
40
+ converted_expression = expression.tr('?', '*')
41
+
42
+ matched = false
43
+ converted_expression.match(CRON_REGEXP) { |m|
44
+ @seconds = m[2]
45
+ @minutes = m[3]
46
+ @hours = m[4]
47
+ @day_of_month = check(m[5])
48
+ @month = check(m[6])
49
+ @day_of_week = scheduler_dow_to_crontab(check(m[7])).to_s
50
+ @year = m[9]
51
+ matched = true
52
+ }
53
+
54
+ raise StandardError.new("Unknown format #{expression}") unless matched
55
+
56
+ # Unix cron has five: minute, hour, day, month, and dayofweek
57
+ # Quartz adds seconds and year
58
+ converted_expression = "#{@minutes} #{@hours} #{@day_of_month} #{@month} #{@day_of_week}"
59
+
60
+ Utility::Logger.debug("Converted Quartz Cron expression '#{expression}' to Standard Cron Expression '#{converted_expression}'")
61
+
62
+ converted_expression
63
+ end
64
+
65
+ # As described above, Quartz uses 1-7 for days of the week, starting with Sunday,
66
+ # while Unix cron uses 0-6, starting with Monday, and also 7 as an extra non-standard index for Sunday.
67
+ # (see https://en.wikipedia.org/wiki/Cron for more details)
68
+ # This means that we need to shift the Quartz day of week that are between 1 and 7 by minus one, but we also allow 0
69
+ # in case it's not a quartz expression but already the cron standard.
70
+ # See also the code in connectors-python that does the same thing: https://github.com/elastic/connectors-python/blob/main/connectors/quartz.py
71
+ def self.scheduler_dow_to_crontab(day)
72
+ unless /\d/.match?(day)
73
+ return day
74
+ end
75
+ if day.to_i <= 0
76
+ return day
77
+ end
78
+ day.to_i - 1
79
+ end
80
+ end
81
+ end
@@ -12,63 +12,89 @@ module Utility
12
12
  module Mappings
13
13
  ENUM_IGNORE_ABOVE = 2048
14
14
 
15
- WORKPLACE_SEARCH_SUBEXTRACTION_STAMP_FIELD_MAPPINGS = {
16
- _subextracted_as_of: {
17
- type: 'date'
18
- },
19
- _subextracted_version: {
20
- type: 'keyword'
15
+ DATE_FIELD_MAPPING = {
16
+ type: 'date'
17
+ }
18
+
19
+ KEYWORD_FIELD_MAPPING = {
20
+ type: 'keyword'
21
+ }
22
+
23
+ TEXT_FIELD_MAPPING = {
24
+ type: 'text',
25
+ analyzer: 'iq_text_base',
26
+ index_options: 'freqs',
27
+ fields: {
28
+ 'stem': {
29
+ type: 'text',
30
+ analyzer: 'iq_text_stem'
31
+ },
32
+ 'prefix' => {
33
+ type: 'text',
34
+ analyzer: 'i_prefix',
35
+ search_analyzer: 'q_prefix',
36
+ index_options: 'docs'
37
+ },
38
+ 'delimiter' => {
39
+ type: 'text',
40
+ analyzer: 'iq_text_delimiter',
41
+ index_options: 'freqs'
42
+ },
43
+ 'joined': {
44
+ type: 'text',
45
+ analyzer: 'i_text_bigram',
46
+ search_analyzer: 'q_text_bigram',
47
+ index_options: 'freqs'
48
+ },
49
+ 'enum': {
50
+ type: 'keyword',
51
+ ignore_above: ENUM_IGNORE_ABOVE
52
+ }
21
53
  }
54
+ }
55
+
56
+ WORKPLACE_SEARCH_SUBEXTRACTION_STAMP_FIELD_MAPPINGS = {
57
+ _subextracted_as_of: DATE_FIELD_MAPPING,
58
+ _subextracted_version: KEYWORD_FIELD_MAPPING
59
+ }.freeze
60
+
61
+ CRAWLER_FIELD_MAPPINGS = {
62
+ additional_urls: KEYWORD_FIELD_MAPPING,
63
+ body_content: TEXT_FIELD_MAPPING,
64
+ domains: KEYWORD_FIELD_MAPPING,
65
+ headings: TEXT_FIELD_MAPPING,
66
+ last_crawled_at: DATE_FIELD_MAPPING,
67
+ links: KEYWORD_FIELD_MAPPING,
68
+ meta_description: TEXT_FIELD_MAPPING,
69
+ meta_keywords: KEYWORD_FIELD_MAPPING,
70
+ title: TEXT_FIELD_MAPPING,
71
+ url: KEYWORD_FIELD_MAPPING,
72
+ url_host: KEYWORD_FIELD_MAPPING,
73
+ url_path: KEYWORD_FIELD_MAPPING,
74
+ url_path_dir1: KEYWORD_FIELD_MAPPING,
75
+ url_path_dir2: KEYWORD_FIELD_MAPPING,
76
+ url_path_dir3: KEYWORD_FIELD_MAPPING,
77
+ url_port: KEYWORD_FIELD_MAPPING,
78
+ url_scheme: KEYWORD_FIELD_MAPPING
22
79
  }.freeze
23
80
 
24
- def self.default_text_fields_mappings(connectors_index:)
81
+ def self.default_text_fields_mappings(connectors_index:, crawler_index: false)
25
82
  {
26
83
  dynamic: true,
27
84
  dynamic_templates: [
28
85
  {
29
86
  data: {
30
87
  match_mapping_type: 'string',
31
- mapping: {
32
- type: 'text',
33
- analyzer: 'iq_text_base',
34
- index_options: 'freqs',
35
- fields: {
36
- 'stem': {
37
- type: 'text',
38
- analyzer: 'iq_text_stem'
39
- },
40
- 'prefix' => {
41
- type: 'text',
42
- analyzer: 'i_prefix',
43
- search_analyzer: 'q_prefix',
44
- index_options: 'docs'
45
- },
46
- 'delimiter' => {
47
- type: 'text',
48
- analyzer: 'iq_text_delimiter',
49
- index_options: 'freqs'
50
- },
51
- 'joined': {
52
- type: 'text',
53
- analyzer: 'i_text_bigram',
54
- search_analyzer: 'q_text_bigram',
55
- index_options: 'freqs'
56
- },
57
- 'enum': {
58
- type: 'keyword',
59
- ignore_above: ENUM_IGNORE_ABOVE
60
- }
61
- }
62
- }
88
+ mapping: TEXT_FIELD_MAPPING
63
89
  }
64
90
  }
65
91
  ],
66
92
  properties: {
67
- id: {
68
- type: 'keyword'
69
- }
93
+ id: KEYWORD_FIELD_MAPPING
70
94
  }.tap do |properties|
71
95
  properties.merge!(WORKPLACE_SEARCH_SUBEXTRACTION_STAMP_FIELD_MAPPINGS) if connectors_index
96
+ end.tap do |properties|
97
+ properties.merge!(CRAWLER_FIELD_MAPPINGS) if crawler_index
72
98
  end
73
99
  }
74
100
  end
@@ -0,0 +1,33 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'logger'
8
+ require 'utility/logger'
9
+ require 'active_support/core_ext/module'
10
+
11
+ module Utility
12
+ module Environment
13
+ def self.set_execution_environment(config, &block)
14
+ # Set UTC as the timezone
15
+ ENV['TZ'] = 'UTC'
16
+ Logger.level = config[:log_level]
17
+ es_config = config[:elasticsearch]
18
+ disable_warnings = if es_config.has_key?(:disable_warnings)
19
+ es_config[:disable_warnings]
20
+ else
21
+ true
22
+ end
23
+
24
+ if disable_warnings
25
+ Logger.info('Disabling warnings')
26
+ Kernel.silence_warnings(&block)
27
+ else
28
+ Logger.info('Enabling warnings')
29
+ Kernel.enable_warnings(&block)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,132 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'active_support/core_ext/string'
8
+
9
+ module Utility
10
+ class DocumentError
11
+ attr_accessor :error_class, :error_message, :stack_trace, :error_id
12
+
13
+ def initialize(error_class, error_message, stack_trace, error_id)
14
+ @error_class = error_class
15
+ @error_message = error_message
16
+ @error_id = error_id
17
+
18
+ # keywords must be < 32kb, UTF-8 chars can be up to 3 bytes, thus 32k/3 ~= 10k
19
+ # See https://github.com/elastic/workplace-search-team/issues/1723
20
+ @stack_trace = stack_trace.truncate(10_000)
21
+ end
22
+
23
+ def to_h
24
+ {
25
+ 'error_class' => error_class,
26
+ 'error_message' => error_message,
27
+ 'stack_trace' => stack_trace,
28
+ 'error_id' => error_id
29
+ }
30
+ end
31
+ end
32
+
33
+ class ClientError < StandardError; end
34
+ class EvictionWithNoProgressError < StandardError; end
35
+ class EvictionError < StandardError
36
+ attr_accessor :cursors
37
+
38
+ def initialize(message = nil, cursors: nil)
39
+ super(message)
40
+ @cursors = cursors
41
+ end
42
+ end
43
+
44
+ class SuspendedJobError < StandardError
45
+ attr_accessor :suspend_until, :cursors
46
+
47
+ def initialize(message = nil, suspend_until:, cursors: nil)
48
+ super(message)
49
+ @suspend_until = suspend_until
50
+ @cursors = cursors
51
+ end
52
+ end
53
+ class ThrottlingError < SuspendedJobError; end
54
+ class TransientServerError < SuspendedJobError; end
55
+ class UnrecoverableServerError < StandardError; end
56
+ class TransientSubextractorError < StandardError; end
57
+ class JobDocumentLimitError < StandardError; end
58
+ class JobClaimingError < StandardError; end
59
+
60
+ class MonitoringError < StandardError
61
+ attr_accessor :tripped_by
62
+
63
+ def initialize(message = nil, tripped_by: nil)
64
+ super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
65
+ @tripped_by = tripped_by
66
+ end
67
+ end
68
+ class MaxSuccessiveErrorsExceededError < MonitoringError; end
69
+ class MaxErrorsExceededError < MonitoringError; end
70
+ class MaxErrorsInWindowExceededError < MonitoringError; end
71
+
72
+ class JobSyncNotPossibleYetError < StandardError
73
+ attr_accessor :sync_will_be_possible_at
74
+
75
+ def initialize(message = nil, sync_will_be_possible_at: nil)
76
+ human_readable_errors = []
77
+
78
+ human_readable_errors.push(message) unless message.nil?
79
+ human_readable_errors.push("Content source was created too recently to schedule jobs, next job scheduling is possible at #{sync_will_be_possible_at}.") unless sync_will_be_possible_at.nil?
80
+
81
+ super(human_readable_errors.join(' '))
82
+ end
83
+ end
84
+ class PlatinumLicenseRequiredError < StandardError; end
85
+ class JobInterruptedError < StandardError; end
86
+ class JobCannotBeUpdatedError < StandardError; end
87
+ class SecretInvalidError < StandardError; end
88
+ class InvalidIndexingConfigurationError < StandardError; end
89
+ class InvalidTokenError < StandardError; end
90
+ class TokenRefreshFailedError < StandardError; end
91
+ class ConnectorNotAvailableError < StandardError; end
92
+
93
+ # For when we want to explicitly set a #cause but can't
94
+ class ExplicitlyCausedError < StandardError
95
+ attr_reader :reason
96
+
97
+ def initialize(reason)
98
+ @reason = reason
99
+ end
100
+ end
101
+
102
+ class PublishingFailedError < ExplicitlyCausedError; end
103
+
104
+ class Error
105
+ attr_reader :status_code, :code, :message
106
+
107
+ def initialize(status_code, code, message)
108
+ @status_code = status_code
109
+ @code = code
110
+ @message = message
111
+ end
112
+
113
+ def to_h
114
+ {
115
+ 'code' => @code,
116
+ 'message' => @message
117
+ }
118
+ end
119
+ end
120
+
121
+ class HealthCheckFailedError < StandardError
122
+ def initialize(msg = nil)
123
+ super("Health check failed for 3rd-party service: #{msg}")
124
+ end
125
+ end
126
+
127
+ INTERNAL_SERVER_ERROR = Utility::Error.new(500, 'INTERNAL_SERVER_ERROR', 'Internal server error')
128
+ INVALID_API_KEY = Utility::Error.new(401, 'INVALID_API_KEY', 'Invalid API key')
129
+ UNSUPPORTED_AUTH_SCHEME = Utility::Error.new(401, 'UNSUPPORTED_AUTH_SCHEME', 'Unsupported authorization scheme')
130
+ INVALID_ACCESS_TOKEN = Utility::Error.new(401, 'INVALID_ACCESS_TOKEN', 'Invalid/expired access token, please refresh the token')
131
+ TOKEN_REFRESH_ERROR = Utility::Error.new(401, 'TOKEN_REFRESH_ERROR', 'Failed to refresh token, please re-authenticate the application')
132
+ end
@@ -0,0 +1,84 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'logger'
10
+ require 'elasticsearch'
11
+
12
+ module Utility
13
+ class EsClient < ::Elasticsearch::Client
14
+ class IndexingFailedError < StandardError
15
+ def initialize(message, error = nil)
16
+ super(message)
17
+ @cause = error
18
+ end
19
+
20
+ attr_reader :cause
21
+ end
22
+
23
+ def initialize(es_config)
24
+ super(connection_configs(es_config))
25
+ end
26
+
27
+ def connection_configs(es_config)
28
+ configs = {}
29
+ configs[:api_key] = es_config[:api_key] if es_config[:api_key]
30
+ if es_config[:cloud_id]
31
+ configs[:cloud_id] = es_config[:cloud_id]
32
+ elsif es_config[:hosts]
33
+ configs[:hosts] = es_config[:hosts]
34
+ else
35
+ raise 'Either elasticsearch.cloud_id or elasticsearch.hosts should be configured.'
36
+ end
37
+ configs[:retry_on_failure] = es_config[:retry_on_failure] || false
38
+ configs[:request_timeout] = es_config[:request_timeout] || nil
39
+ configs[:log] = es_config[:log] || false
40
+ configs[:trace] = es_config[:trace] || false
41
+
42
+ # if log or trace is activated, we use the application logger
43
+ configs[:logger] = if configs[:log] || configs[:trace]
44
+ Utility::Logger.logger
45
+ else
46
+ # silence!
47
+ ::Logger.new(IO::NULL)
48
+ end
49
+ configs
50
+ end
51
+
52
+ def bulk(arguments = {})
53
+ raise_if_necessary(super(arguments))
54
+ end
55
+
56
+ private
57
+
58
+ def raise_if_necessary(response)
59
+ if response['errors']
60
+ first_error = nil
61
+
62
+ response['items'].each do |item|
63
+ %w[index delete].each do |op|
64
+ if item.has_key?(op) && item[op].has_key?('error')
65
+ first_error = item
66
+
67
+ break
68
+ end
69
+ end
70
+ end
71
+
72
+ if first_error
73
+ trace_id = Utility::Logger.generate_trace_id
74
+ Utility::Logger.error("Failed to index documents into Elasticsearch. First error in response is: #{first_error.to_json}")
75
+ short_message = Utility::Logger.abbreviated_message(first_error.to_json)
76
+ raise IndexingFailedError.new("Failed to index documents into Elasticsearch with an error '#{short_message}'. Look up the error ID [#{trace_id}] in the application logs to see the full error message.")
77
+ else
78
+ raise IndexingFailedError.new('Failed to index documents into Elasticsearch due to unknown error. Try enabling tracing for Elasticsearch and checking the logs.')
79
+ end
80
+ end
81
+ response
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,64 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'bson'
10
+ require 'utility/logger'
11
+
12
+ module Utility
13
+ class ExceptionTracking
14
+ class << self
15
+ def capture_message(message, context = {})
16
+ Utility::Logger.error("Error: #{message}. Context: #{context.inspect}")
17
+
18
+ # When the method is called from a rescue block, our return value may leak outside of its
19
+ # intended scope, so let's explicitly return nil here to be safe.
20
+ nil
21
+ end
22
+
23
+ def capture_exception(exception, context = {})
24
+ Utility::Logger.error(generate_stack_trace(exception))
25
+ Utility::Logger.error("Context: #{context.inspect}") if context
26
+ end
27
+
28
+ def log_exception(exception, message = nil)
29
+ Utility::Logger.error(message) if message
30
+ Utility::Logger.error(generate_stack_trace(exception))
31
+ end
32
+
33
+ def augment_exception(exception)
34
+ unless exception.respond_to?(:id)
35
+ exception.instance_eval do
36
+ def id
37
+ @error_id ||= BSON::ObjectId.new.to_s
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+ def generate_error_message(exception, message, context)
44
+ context = { :message_id => exception.id }.merge(context || {}) if exception.respond_to?(:id)
45
+ context_message = context && "Context: #{context.inspect}"
46
+ ['Exception', message, exception.class.to_s, exception.message, context_message]
47
+ .compact
48
+ .map { |part| part.to_s.dup.force_encoding('UTF-8') }
49
+ .join(': ')
50
+ end
51
+
52
+ def generate_stack_trace(exception)
53
+ full_message = exception.full_message
54
+
55
+ cause = exception
56
+ while cause.cause != cause && (cause = cause.cause)
57
+ full_message << "Cause:\n#{cause.full_message}"
58
+ end
59
+
60
+ full_message.dup.force_encoding('UTF-8')
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,123 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/hash'
10
+
11
+ module Utility
12
+ class ExtensionMappingUtil
13
+ @extension_to_mime = {
14
+ :doc => %w[
15
+ application/x-tika-msoffice
16
+ application/msword
17
+ ].freeze,
18
+ :docx => %w[
19
+ application/x-tika-ooxml
20
+ application/vnd.openxmlformats-officedocument.wordprocessingml.document
21
+ application/vnd.openxmlformats-officedocument.wordprocessingml.template
22
+ application/vnd.ms-word.template.macroenabled.12
23
+ application/vnd.ms-word.document.macroenabled.12
24
+ ].freeze,
25
+ :html => %w[
26
+ text/html
27
+ application/xhtml+xml
28
+ ].freeze,
29
+ :odt => %w[
30
+ application/x-vnd.oasis.opendocument.graphics-template
31
+ application/vnd.sun.xml.writer application/x-vnd.oasis.opendocument.text
32
+ application/x-vnd.oasis.opendocument.text-web
33
+ application/x-vnd.oasis.opendocument.spreadsheet-template
34
+ application/vnd.oasis.opendocument.formula-template
35
+ application/vnd.oasis.opendocument.presentation
36
+ application/vnd.oasis.opendocument.image-template
37
+ application/x-vnd.oasis.opendocument.graphics
38
+ application/vnd.oasis.opendocument.chart-template
39
+ application/vnd.oasis.opendocument.presentation-template
40
+ application/x-vnd.oasis.opendocument.image-template
41
+ application/vnd.oasis.opendocument.formula
42
+ application/x-vnd.oasis.opendocument.image
43
+ application/vnd.oasis.opendocument.spreadsheet-template
44
+ application/x-vnd.oasis.opendocument.chart-template
45
+ application/x-vnd.oasis.opendocument.formula
46
+ application/vnd.oasis.opendocument.spreadsheet
47
+ application/vnd.oasis.opendocument.text-web
48
+ application/vnd.oasis.opendocument.text-template
49
+ application/vnd.oasis.opendocument.text
50
+ application/x-vnd.oasis.opendocument.formula-template
51
+ application/x-vnd.oasis.opendocument.spreadsheet
52
+ application/x-vnd.oasis.opendocument.chart
53
+ application/vnd.oasis.opendocument.text-master
54
+ application/x-vnd.oasis.opendocument.text-master
55
+ application/x-vnd.oasis.opendocument.text-template
56
+ application/vnd.oasis.opendocument.graphics
57
+ application/vnd.oasis.opendocument.graphics-template
58
+ application/x-vnd.oasis.opendocument.presentation
59
+ application/vnd.oasis.opendocument.image
60
+ application/x-vnd.oasis.opendocument.presentation-template
61
+ application/vnd.oasis.opendocument.chart
62
+ ].freeze,
63
+ :one => %w[
64
+ application/onenote
65
+ application/msonenote
66
+ ].freeze,
67
+ :pdf => %w[
68
+ application/pdf
69
+ ].freeze,
70
+ :ppt => %w[
71
+ application/vnd.ms-powerpoint
72
+ ].freeze,
73
+ :pptx => %w[
74
+ application/vnd.openxmlformats-officedocument.presentationml.presentation
75
+ application/vnd.ms-powerpoint.presentation.macroenabled.12
76
+ application/vnd.openxmlformats-officedocument.presentationml.template
77
+ application/vnd.ms-powerpoint.slideshow.macroenabled.12
78
+ application/vnd.ms-powerpoint.addin.macroenabled.12
79
+ application/vnd.openxmlformats-officedocument.presentationml.slideshow
80
+ ].freeze,
81
+ :rtf => %w[
82
+ message/richtext
83
+ text/richtext
84
+ text/rtf
85
+ application/rtf
86
+ ].freeze,
87
+ :txt => %w[
88
+ text/plain
89
+ ].freeze,
90
+ :xls => %w[
91
+ application/x-tika-msoffice
92
+ application/vnd.ms-excel
93
+ application/vnd.ms-excel.sheet.3
94
+ application/vnd.ms-excel.sheet.2
95
+ application/vnd.ms-excel.workspace.3
96
+ application/vnd.ms-excel.workspace.4
97
+ application/vnd.ms-excel.sheet.4
98
+ ].freeze,
99
+ :xlsx => %w[
100
+ application/x-tika-ooxml
101
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
102
+ application/vnd.openxmlformats-officedocument.spreadsheetml.template
103
+ application/vnd.ms-excel.addin.macroenabled.12
104
+ application/vnd.ms-excel.template.macroenabled.12
105
+ application/vnd.ms-excel.sheet.macroenabled.12
106
+ ].freeze
107
+ }.with_indifferent_access.freeze
108
+
109
+ def self.mime_to_extension
110
+ @mime_to_extension ||= @extension_to_mime.each_with_object({}) do |(key, values), memo|
111
+ values.each { |value| memo[value] = key.to_s }
112
+ end.with_indifferent_access.freeze
113
+ end
114
+
115
+ def self.get_extension(mime_type)
116
+ mime_to_extension[mime_type.to_s.downcase]
117
+ end
118
+
119
+ def self.get_mime_types(extension)
120
+ @extension_to_mime[extension.to_s.downcase]
121
+ end
122
+ end
123
+ end