connectors_sdk 8.3.0.0.pre.20220414T060419Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +93 -0
- data/NOTICE.txt +2 -0
- data/lib/connectors_sdk/base/.config.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.connectors.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.registry.rb.un~ +0 -0
- data/lib/connectors_sdk/base/adapter.rb +118 -0
- data/lib/connectors_sdk/base/config.rb +27 -0
- data/lib/connectors_sdk/base/custom_client.rb +112 -0
- data/lib/connectors_sdk/base/extractor.rb +256 -0
- data/lib/connectors_sdk/base/registry.rb +32 -0
- data/lib/connectors_sdk/office365/adapter.rb +153 -0
- data/lib/connectors_sdk/office365/config.rb +37 -0
- data/lib/connectors_sdk/office365/custom_client.rb +319 -0
- data/lib/connectors_sdk/office365/extractor.rb +230 -0
- data/lib/connectors_sdk/share_point/.http_call_wrapper.rb.un~ +0 -0
- data/lib/connectors_sdk/share_point/adapter.rb +47 -0
- data/lib/connectors_sdk/share_point/authorization.rb +91 -0
- data/lib/connectors_sdk/share_point/extractor.rb +31 -0
- data/lib/connectors_sdk/share_point/http_call_wrapper.rb +117 -0
- data/lib/connectors_sdk.rb +16 -0
- data/lib/connectors_shared/constants.rb +14 -0
- data/lib/connectors_shared/errors.rb +126 -0
- data/lib/connectors_shared/exception_tracking.rb +39 -0
- data/lib/connectors_shared/extension_mapping_util.rb +123 -0
- data/lib/connectors_shared/logger.rb +33 -0
- data/lib/connectors_shared/monitor.rb +99 -0
- data/lib/connectors_shared.rb +12 -0
- metadata +114 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'stubs/swiftype/exception_tracking' unless defined?(Rails)
|
10
|
+
require 'bson'
|
11
|
+
require 'connectors_shared/logger'
|
12
|
+
|
13
|
+
module ConnectorsShared
|
14
|
+
class ExceptionTracking
|
15
|
+
class << self
|
16
|
+
def capture_message(message, context = {})
|
17
|
+
Swiftype::ExceptionTracking.capture_message(message, context)
|
18
|
+
end
|
19
|
+
|
20
|
+
def capture_exception(exception, context = {})
|
21
|
+
Swiftype::ExceptionTracking.log_exception(exception, :context => context)
|
22
|
+
end
|
23
|
+
|
24
|
+
def log_exception(exception, message = nil)
|
25
|
+
Swiftype::ExceptionTracking.log_exception(exception, message, :logger => ConnectorsShared::Logger.logger)
|
26
|
+
end
|
27
|
+
|
28
|
+
def augment_exception(exception)
|
29
|
+
unless exception.respond_to?(:id)
|
30
|
+
exception.instance_eval do
|
31
|
+
def id
|
32
|
+
@error_id ||= BSON::ObjectId.new.to_s
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash'
|
10
|
+
|
11
|
+
module ConnectorsShared
|
12
|
+
class ExtensionMappingUtil
|
13
|
+
@extension_to_mime = {
|
14
|
+
:doc => %w[
|
15
|
+
application/x-tika-msoffice
|
16
|
+
application/msword
|
17
|
+
].freeze,
|
18
|
+
:docx => %w[
|
19
|
+
application/x-tika-ooxml
|
20
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
21
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.template
|
22
|
+
application/vnd.ms-word.template.macroenabled.12
|
23
|
+
application/vnd.ms-word.document.macroenabled.12
|
24
|
+
].freeze,
|
25
|
+
:html => %w[
|
26
|
+
text/html
|
27
|
+
application/xhtml+xml
|
28
|
+
].freeze,
|
29
|
+
:odt => %w[
|
30
|
+
application/x-vnd.oasis.opendocument.graphics-template
|
31
|
+
application/vnd.sun.xml.writer application/x-vnd.oasis.opendocument.text
|
32
|
+
application/x-vnd.oasis.opendocument.text-web
|
33
|
+
application/x-vnd.oasis.opendocument.spreadsheet-template
|
34
|
+
application/vnd.oasis.opendocument.formula-template
|
35
|
+
application/vnd.oasis.opendocument.presentation
|
36
|
+
application/vnd.oasis.opendocument.image-template
|
37
|
+
application/x-vnd.oasis.opendocument.graphics
|
38
|
+
application/vnd.oasis.opendocument.chart-template
|
39
|
+
application/vnd.oasis.opendocument.presentation-template
|
40
|
+
application/x-vnd.oasis.opendocument.image-template
|
41
|
+
application/vnd.oasis.opendocument.formula
|
42
|
+
application/x-vnd.oasis.opendocument.image
|
43
|
+
application/vnd.oasis.opendocument.spreadsheet-template
|
44
|
+
application/x-vnd.oasis.opendocument.chart-template
|
45
|
+
application/x-vnd.oasis.opendocument.formula
|
46
|
+
application/vnd.oasis.opendocument.spreadsheet
|
47
|
+
application/vnd.oasis.opendocument.text-web
|
48
|
+
application/vnd.oasis.opendocument.text-template
|
49
|
+
application/vnd.oasis.opendocument.text
|
50
|
+
application/x-vnd.oasis.opendocument.formula-template
|
51
|
+
application/x-vnd.oasis.opendocument.spreadsheet
|
52
|
+
application/x-vnd.oasis.opendocument.chart
|
53
|
+
application/vnd.oasis.opendocument.text-master
|
54
|
+
application/x-vnd.oasis.opendocument.text-master
|
55
|
+
application/x-vnd.oasis.opendocument.text-template
|
56
|
+
application/vnd.oasis.opendocument.graphics
|
57
|
+
application/vnd.oasis.opendocument.graphics-template
|
58
|
+
application/x-vnd.oasis.opendocument.presentation
|
59
|
+
application/vnd.oasis.opendocument.image
|
60
|
+
application/x-vnd.oasis.opendocument.presentation-template
|
61
|
+
application/vnd.oasis.opendocument.chart
|
62
|
+
].freeze,
|
63
|
+
:one => %w[
|
64
|
+
application/onenote
|
65
|
+
application/msonenote
|
66
|
+
].freeze,
|
67
|
+
:pdf => %w[
|
68
|
+
application/pdf
|
69
|
+
].freeze,
|
70
|
+
:ppt => %w[
|
71
|
+
application/vnd.ms-powerpoint
|
72
|
+
].freeze,
|
73
|
+
:pptx => %w[
|
74
|
+
application/vnd.openxmlformats-officedocument.presentationml.presentation
|
75
|
+
application/vnd.ms-powerpoint.presentation.macroenabled.12
|
76
|
+
application/vnd.openxmlformats-officedocument.presentationml.template
|
77
|
+
application/vnd.ms-powerpoint.slideshow.macroenabled.12
|
78
|
+
application/vnd.ms-powerpoint.addin.macroenabled.12
|
79
|
+
application/vnd.openxmlformats-officedocument.presentationml.slideshow
|
80
|
+
].freeze,
|
81
|
+
:rtf => %w[
|
82
|
+
message/richtext
|
83
|
+
text/richtext
|
84
|
+
text/rtf
|
85
|
+
application/rtf
|
86
|
+
].freeze,
|
87
|
+
:txt => %w[
|
88
|
+
text/plain
|
89
|
+
].freeze,
|
90
|
+
:xls => %w[
|
91
|
+
application/x-tika-msoffice
|
92
|
+
application/vnd.ms-excel
|
93
|
+
application/vnd.ms-excel.sheet.3
|
94
|
+
application/vnd.ms-excel.sheet.2
|
95
|
+
application/vnd.ms-excel.workspace.3
|
96
|
+
application/vnd.ms-excel.workspace.4
|
97
|
+
application/vnd.ms-excel.sheet.4
|
98
|
+
].freeze,
|
99
|
+
:xlsx => %w[
|
100
|
+
application/x-tika-ooxml
|
101
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
102
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.template
|
103
|
+
application/vnd.ms-excel.addin.macroenabled.12
|
104
|
+
application/vnd.ms-excel.template.macroenabled.12
|
105
|
+
application/vnd.ms-excel.sheet.macroenabled.12
|
106
|
+
].freeze
|
107
|
+
}.with_indifferent_access.freeze
|
108
|
+
|
109
|
+
def self.mime_to_extension
|
110
|
+
@mime_to_extension ||= @extension_to_mime.each_with_object({}) do |(key, values), memo|
|
111
|
+
values.each { |value| memo[value] = key.to_s }
|
112
|
+
end.with_indifferent_access.freeze
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.get_extension(mime_type)
|
116
|
+
mime_to_extension[mime_type.to_s.downcase]
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.get_mime_types(extension)
|
120
|
+
@extension_to_mime[extension.to_s.downcase]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'stubs/app_config' unless defined?(Rails)
|
8
|
+
require 'active_support/core_ext/module'
|
9
|
+
|
10
|
+
module ConnectorsShared
|
11
|
+
class Logger
|
12
|
+
SUPPORTED_LOG_LEVELS = %i[fatal error warn info debug].freeze
|
13
|
+
|
14
|
+
class << self
|
15
|
+
|
16
|
+
delegate :formatter, :formatter=, :to => :logger
|
17
|
+
|
18
|
+
def setup!(logger)
|
19
|
+
@logger = logger
|
20
|
+
end
|
21
|
+
|
22
|
+
def logger
|
23
|
+
@logger ||= AppConfig.connectors_logger
|
24
|
+
end
|
25
|
+
|
26
|
+
SUPPORTED_LOG_LEVELS.each do |level|
|
27
|
+
define_method(level) do |message|
|
28
|
+
logger.public_send(level, message)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors_shared/errors'
|
10
|
+
require 'stubs/app_config' unless defined?(Rails)
|
11
|
+
require 'stubs/swiftype/exception_tracking' unless defined?(Rails)
|
12
|
+
|
13
|
+
module ConnectorsShared
|
14
|
+
class Monitor
|
15
|
+
attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
|
16
|
+
|
17
|
+
def initialize(
|
18
|
+
connector:,
|
19
|
+
max_errors: AppConfig.content_source_sync_max_errors,
|
20
|
+
max_consecutive_errors: AppConfig.content_source_sync_max_consecutive_errors,
|
21
|
+
max_error_ratio: AppConfig.content_source_sync_max_error_ratio,
|
22
|
+
window_size: AppConfig.content_source_sync_error_ratio_window_size,
|
23
|
+
error_queue_size: 20
|
24
|
+
)
|
25
|
+
@connector = connector
|
26
|
+
@max_errors = max_errors
|
27
|
+
@max_consecutive_errors = max_consecutive_errors
|
28
|
+
@max_error_ratio = max_error_ratio
|
29
|
+
@window_size = window_size
|
30
|
+
@total_error_count = 0
|
31
|
+
@success_count = 0
|
32
|
+
@consecutive_error_count = 0
|
33
|
+
@window_errors = Array.new(window_size) { false }
|
34
|
+
@window_index = 0
|
35
|
+
@last_error = nil
|
36
|
+
@error_queue_size = error_queue_size
|
37
|
+
@error_queue = []
|
38
|
+
end
|
39
|
+
|
40
|
+
def note_success
|
41
|
+
@consecutive_error_count = 0
|
42
|
+
@success_count += 1
|
43
|
+
increment_window_index
|
44
|
+
end
|
45
|
+
|
46
|
+
def note_error(error, id: Time.now.to_i)
|
47
|
+
stack_trace = Swiftype::ExceptionTracking.generate_stack_trace(error)
|
48
|
+
error_message = Swiftype::ExceptionTracking.generate_error_message(error, nil, nil)
|
49
|
+
@connector.log_debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
50
|
+
@total_error_count += 1
|
51
|
+
@consecutive_error_count += 1
|
52
|
+
@window_errors[@window_index] = true
|
53
|
+
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
54
|
+
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
55
|
+
increment_window_index
|
56
|
+
@last_error = error
|
57
|
+
|
58
|
+
raise_if_necessary
|
59
|
+
end
|
60
|
+
|
61
|
+
def finalize
|
62
|
+
total_documents = @total_error_count + @success_count
|
63
|
+
if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
|
64
|
+
raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def raise_if_necessary
|
71
|
+
error =
|
72
|
+
if @consecutive_error_count > @max_consecutive_errors
|
73
|
+
MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
|
74
|
+
elsif @total_error_count > @max_errors
|
75
|
+
MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
|
76
|
+
elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
|
77
|
+
MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
|
78
|
+
elsif @last_error.instance_of?(JobInterruptedError)
|
79
|
+
@last_error
|
80
|
+
end
|
81
|
+
|
82
|
+
raise_with_last_cause(error) if error
|
83
|
+
end
|
84
|
+
|
85
|
+
def num_errors_in_window
|
86
|
+
@window_errors.count(&:itself).to_f
|
87
|
+
end
|
88
|
+
|
89
|
+
def increment_window_index
|
90
|
+
@window_index = (@window_index + 1) % @window_size
|
91
|
+
end
|
92
|
+
|
93
|
+
def raise_with_last_cause(error)
|
94
|
+
raise @last_error
|
95
|
+
rescue StandardError
|
96
|
+
raise error
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'connectors_shared/constants'
|
8
|
+
require 'connectors_shared/errors'
|
9
|
+
require 'connectors_shared/exception_tracking'
|
10
|
+
require 'connectors_shared/extension_mapping_util'
|
11
|
+
require 'connectors_shared/logger'
|
12
|
+
require 'connectors_shared/monitor'
|
metadata
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: connectors_sdk
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 8.3.0.0.pre.20220414T060419Z
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Elastic
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-04-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bson
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: mime-types
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: ''
|
56
|
+
email: ent-search-dev@elastic.co
|
57
|
+
executables: []
|
58
|
+
extensions: []
|
59
|
+
extra_rdoc_files: []
|
60
|
+
files:
|
61
|
+
- LICENSE
|
62
|
+
- NOTICE.txt
|
63
|
+
- lib/connectors_sdk.rb
|
64
|
+
- lib/connectors_sdk/base/.config.rb.un~
|
65
|
+
- lib/connectors_sdk/base/.connectors.rb.un~
|
66
|
+
- lib/connectors_sdk/base/.registry.rb.un~
|
67
|
+
- lib/connectors_sdk/base/adapter.rb
|
68
|
+
- lib/connectors_sdk/base/config.rb
|
69
|
+
- lib/connectors_sdk/base/custom_client.rb
|
70
|
+
- lib/connectors_sdk/base/extractor.rb
|
71
|
+
- lib/connectors_sdk/base/registry.rb
|
72
|
+
- lib/connectors_sdk/office365/adapter.rb
|
73
|
+
- lib/connectors_sdk/office365/config.rb
|
74
|
+
- lib/connectors_sdk/office365/custom_client.rb
|
75
|
+
- lib/connectors_sdk/office365/extractor.rb
|
76
|
+
- lib/connectors_sdk/share_point/.http_call_wrapper.rb.un~
|
77
|
+
- lib/connectors_sdk/share_point/adapter.rb
|
78
|
+
- lib/connectors_sdk/share_point/authorization.rb
|
79
|
+
- lib/connectors_sdk/share_point/extractor.rb
|
80
|
+
- lib/connectors_sdk/share_point/http_call_wrapper.rb
|
81
|
+
- lib/connectors_shared.rb
|
82
|
+
- lib/connectors_shared/constants.rb
|
83
|
+
- lib/connectors_shared/errors.rb
|
84
|
+
- lib/connectors_shared/exception_tracking.rb
|
85
|
+
- lib/connectors_shared/extension_mapping_util.rb
|
86
|
+
- lib/connectors_shared/logger.rb
|
87
|
+
- lib/connectors_shared/monitor.rb
|
88
|
+
homepage: https://github.com/elastic/connectors
|
89
|
+
licenses:
|
90
|
+
- Elastic-2.0
|
91
|
+
metadata:
|
92
|
+
revision: 427fad57fc69a8107c127ecbf99c6a5e803ea90c
|
93
|
+
repository: https://github.com/elastic/connectors.git
|
94
|
+
post_install_message:
|
95
|
+
rdoc_options: []
|
96
|
+
require_paths:
|
97
|
+
- lib
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ">"
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: 1.3.1
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 2.7.7
|
111
|
+
signing_key:
|
112
|
+
specification_version: 4
|
113
|
+
summary: Gem containing apis used by Enterprise Search and implementations of Connectors
|
114
|
+
test_files: []
|