connectors_sdk 8.3.0.0.pre.20220414T060419Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +93 -0
- data/NOTICE.txt +2 -0
- data/lib/connectors_sdk/base/.config.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.connectors.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.registry.rb.un~ +0 -0
- data/lib/connectors_sdk/base/adapter.rb +118 -0
- data/lib/connectors_sdk/base/config.rb +27 -0
- data/lib/connectors_sdk/base/custom_client.rb +112 -0
- data/lib/connectors_sdk/base/extractor.rb +256 -0
- data/lib/connectors_sdk/base/registry.rb +32 -0
- data/lib/connectors_sdk/office365/adapter.rb +153 -0
- data/lib/connectors_sdk/office365/config.rb +37 -0
- data/lib/connectors_sdk/office365/custom_client.rb +319 -0
- data/lib/connectors_sdk/office365/extractor.rb +230 -0
- data/lib/connectors_sdk/share_point/.http_call_wrapper.rb.un~ +0 -0
- data/lib/connectors_sdk/share_point/adapter.rb +47 -0
- data/lib/connectors_sdk/share_point/authorization.rb +91 -0
- data/lib/connectors_sdk/share_point/extractor.rb +31 -0
- data/lib/connectors_sdk/share_point/http_call_wrapper.rb +117 -0
- data/lib/connectors_sdk.rb +16 -0
- data/lib/connectors_shared/constants.rb +14 -0
- data/lib/connectors_shared/errors.rb +126 -0
- data/lib/connectors_shared/exception_tracking.rb +39 -0
- data/lib/connectors_shared/extension_mapping_util.rb +123 -0
- data/lib/connectors_shared/logger.rb +33 -0
- data/lib/connectors_shared/monitor.rb +99 -0
- data/lib/connectors_shared.rb +12 -0
- metadata +114 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a44a80ba5fe5032b6254fb8a2db25516542df5818044d627b6ba31f652a7812f
|
4
|
+
data.tar.gz: 52d1245cc24ab6e1a42c00483ec4a8ece48ede4a6465c9b46b3bf5e18c979678
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7a4021e1d7b83ec7334913f1ef5e684e02296097f223e96610cb31eb5502c36f9b45b03eeaf80e876aac5bcc531f85d10445917459b86cedc30d6609b33f5351
|
7
|
+
data.tar.gz: ca04ab72f48ed6e76b3a0e0cc8c260f81779ea906325a782eb61456c9c5be231043d7a73ca9625df63459460de587c65aa352fb2e1150fc8c92a253536fcd634
|
data/LICENSE
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
Elastic License 2.0
|
2
|
+
|
3
|
+
URL: https://www.elastic.co/licensing/elastic-license
|
4
|
+
|
5
|
+
## Acceptance
|
6
|
+
|
7
|
+
By using the software, you agree to all of the terms and conditions below.
|
8
|
+
|
9
|
+
## Copyright License
|
10
|
+
|
11
|
+
The licensor grants you a non-exclusive, royalty-free, worldwide,
|
12
|
+
non-sublicensable, non-transferable license to use, copy, distribute, make
|
13
|
+
available, and prepare derivative works of the software, in each case subject to
|
14
|
+
the limitations and conditions below.
|
15
|
+
|
16
|
+
## Limitations
|
17
|
+
|
18
|
+
You may not provide the software to third parties as a hosted or managed
|
19
|
+
service, where the service provides users with access to any substantial set of
|
20
|
+
the features or functionality of the software.
|
21
|
+
|
22
|
+
You may not move, change, disable, or circumvent the license key functionality
|
23
|
+
in the software, and you may not remove or obscure any functionality in the
|
24
|
+
software that is protected by the license key.
|
25
|
+
|
26
|
+
You may not alter, remove, or obscure any licensing, copyright, or other notices
|
27
|
+
of the licensor in the software. Any use of the licensor’s trademarks is subject
|
28
|
+
to applicable law.
|
29
|
+
|
30
|
+
## Patents
|
31
|
+
|
32
|
+
The licensor grants you a license, under any patent claims the licensor can
|
33
|
+
license, or becomes able to license, to make, have made, use, sell, offer for
|
34
|
+
sale, import and have imported the software, in each case subject to the
|
35
|
+
limitations and conditions in this license. This license does not cover any
|
36
|
+
patent claims that you cause to be infringed by modifications or additions to
|
37
|
+
the software. If you or your company make any written claim that the software
|
38
|
+
infringes or contributes to infringement of any patent, your patent license for
|
39
|
+
the software granted under these terms ends immediately. If your company makes
|
40
|
+
such a claim, your patent license ends immediately for work on behalf of your
|
41
|
+
company.
|
42
|
+
|
43
|
+
## Notices
|
44
|
+
|
45
|
+
You must ensure that anyone who gets a copy of any part of the software from you
|
46
|
+
also gets a copy of these terms.
|
47
|
+
|
48
|
+
If you modify the software, you must include in any modified copies of the
|
49
|
+
software prominent notices stating that you have modified the software.
|
50
|
+
|
51
|
+
## No Other Rights
|
52
|
+
|
53
|
+
These terms do not imply any licenses other than those expressly granted in
|
54
|
+
these terms.
|
55
|
+
|
56
|
+
## Termination
|
57
|
+
|
58
|
+
If you use the software in violation of these terms, such use is not licensed,
|
59
|
+
and your licenses will automatically terminate. If the licensor provides you
|
60
|
+
with a notice of your violation, and you cease all violation of this license no
|
61
|
+
later than 30 days after you receive that notice, your licenses will be
|
62
|
+
reinstated retroactively. However, if you violate these terms after such
|
63
|
+
reinstatement, any additional violation of these terms will cause your licenses
|
64
|
+
to terminate automatically and permanently.
|
65
|
+
|
66
|
+
## No Liability
|
67
|
+
|
68
|
+
*As far as the law allows, the software comes as is, without any warranty or
|
69
|
+
condition, and the licensor will not be liable to you for any damages arising
|
70
|
+
out of these terms or the use or nature of the software, under any kind of
|
71
|
+
legal claim.*
|
72
|
+
|
73
|
+
## Definitions
|
74
|
+
|
75
|
+
The **licensor** is the entity offering these terms, and the **software** is the
|
76
|
+
software the licensor makes available under these terms, including any portion
|
77
|
+
of it.
|
78
|
+
|
79
|
+
**you** refers to the individual or entity agreeing to these terms.
|
80
|
+
|
81
|
+
**your company** is any legal entity, sole proprietorship, or other kind of
|
82
|
+
organization that you work for, plus all organizations that have control over,
|
83
|
+
are under the control of, or are under common control with that
|
84
|
+
organization. **control** means ownership of substantially all the assets of an
|
85
|
+
entity, or the power to direct its management and policies by vote, contract, or
|
86
|
+
otherwise. Control can be direct or indirect.
|
87
|
+
|
88
|
+
**your licenses** are all the licenses granted to you for the software under
|
89
|
+
these terms.
|
90
|
+
|
91
|
+
**use** means anything you do with the software requiring one of your licenses.
|
92
|
+
|
93
|
+
**trademark** means trademarks, service marks, and similar rights.
|
data/NOTICE.txt
ADDED
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,118 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'active_support/core_ext/array/wrap'
|
8
|
+
require 'active_support/core_ext/numeric/time'
|
9
|
+
require 'active_support/core_ext/object/deep_dup'
|
10
|
+
require 'connectors_shared'
|
11
|
+
require 'connectors_shared/extension_mapping_util'
|
12
|
+
require 'date'
|
13
|
+
require 'active_support/all'
|
14
|
+
require 'mime-types'
|
15
|
+
|
16
|
+
module ConnectorsSdk
|
17
|
+
module Base
|
18
|
+
class Adapter
|
19
|
+
def self.fields_to_preserve
|
20
|
+
@fields_to_preserve ||= ['body']
|
21
|
+
.concat(ConnectorsShared::Constants::THUMBNAIL_FIELDS)
|
22
|
+
.concat(ConnectorsShared::Constants::SUBEXTRACTOR_RESERVED_FIELDS)
|
23
|
+
.map(&:freeze)
|
24
|
+
.freeze
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.generate_id_helpers(method_prefix, id_prefix)
|
28
|
+
define_singleton_method("#{method_prefix}_id_to_fp_id") do |id|
|
29
|
+
"#{id_prefix}_#{id}"
|
30
|
+
end
|
31
|
+
|
32
|
+
define_singleton_method("fp_id_is_#{method_prefix}_id?") do |fp_id|
|
33
|
+
regex_match = /#{id_prefix}_(.+)$/.match(fp_id)
|
34
|
+
regex_match.present? && regex_match.size == 2
|
35
|
+
end
|
36
|
+
|
37
|
+
define_singleton_method("fp_id_to_#{method_prefix}_id") do |fp_id|
|
38
|
+
regex_match = /#{id_prefix}_(.+)$/.match(fp_id)
|
39
|
+
|
40
|
+
raise ArgumentError, "Invalid id #{fp_id} for source with method prefix #{method_prefix}." if regex_match.nil? || regex_match.length != 2
|
41
|
+
regex_match[1]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.mime_type_for_file(file_name)
|
46
|
+
ruby_detected_type = MIME::Types.type_for(file_name)
|
47
|
+
return ruby_detected_type.first.simplified if ruby_detected_type.present?
|
48
|
+
extension = extension_for_file(file_name)
|
49
|
+
ConnectorsShared::ExtensionMappingUtil.get_mime_types(extension)&.first
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.extension_for_file(file_name)
|
53
|
+
File.extname(file_name.downcase).delete_prefix!('.')
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.strip_file_extension(file_name)
|
57
|
+
File.basename(file_name, File.extname(file_name))
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.normalize_enum(enum)
|
61
|
+
enum&.to_s&.downcase
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.normalize_date(date)
|
65
|
+
return nil if date.blank?
|
66
|
+
|
67
|
+
case date
|
68
|
+
when Date, Time, DateTime, ActiveSupport::TimeWithZone
|
69
|
+
date.to_datetime.rfc3339
|
70
|
+
else
|
71
|
+
begin
|
72
|
+
Time.zone.parse(date).to_datetime.rfc3339
|
73
|
+
rescue ArgumentError, TypeError => e
|
74
|
+
ConnectorsShared::ExceptionTracking.capture_exception(e)
|
75
|
+
nil
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.normalize_path(path)
|
81
|
+
return nil if path.blank?
|
82
|
+
return path if path.start_with?('/')
|
83
|
+
"/#{path}"
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.url_to_path(url)
|
87
|
+
return nil if url.blank?
|
88
|
+
uri = URI(url)
|
89
|
+
return nil if uri.scheme.blank?
|
90
|
+
normalize_path(uri.path)
|
91
|
+
rescue URI::InvalidURIError, ArgumentError
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.swiftype_document_from_configured_object_base(object_type:, object:, fields:)
|
96
|
+
object_as_json = object.as_json
|
97
|
+
|
98
|
+
adapted_object = {
|
99
|
+
:type => normalize_enum(object_type)
|
100
|
+
}
|
101
|
+
|
102
|
+
fields.each do |field_data|
|
103
|
+
remote_field_name = field_data.fetch(:remote)
|
104
|
+
|
105
|
+
value = object_as_json[remote_field_name]
|
106
|
+
value = object_as_json.dig(*remote_field_name.split('.')) if value.blank?
|
107
|
+
next if value.nil?
|
108
|
+
|
109
|
+
adapted_object[field_data.fetch(:target)] = value
|
110
|
+
end
|
111
|
+
|
112
|
+
adapted_object.symbolize_keys
|
113
|
+
end
|
114
|
+
|
115
|
+
delegate :normalize_enum, :normalize_date, :normalize_path, :to => :class
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
module ConnectorsSdk
|
8
|
+
module Base
|
9
|
+
class Config
|
10
|
+
attr_reader :cursors
|
11
|
+
|
12
|
+
def initialize(cursors:)
|
13
|
+
@cursors = cursors || {}
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_h
|
17
|
+
{
|
18
|
+
:cursors => cursors
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def overwrite_cursors!(new_cursors)
|
23
|
+
@cursors = new_cursors
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'faraday'
|
8
|
+
require 'httpclient'
|
9
|
+
require 'active_support/core_ext/array/wrap'
|
10
|
+
require 'active_support/core_ext/numeric/time'
|
11
|
+
require 'active_support/core_ext/object/deep_dup'
|
12
|
+
require 'connectors_shared'
|
13
|
+
require 'date'
|
14
|
+
require 'active_support/all'
|
15
|
+
|
16
|
+
module ConnectorsSdk
|
17
|
+
module Base
|
18
|
+
class CustomClient
|
19
|
+
attr_reader :base_url, :middleware, :ensure_fresh_auth
|
20
|
+
|
21
|
+
MAX_RETRIES = 5
|
22
|
+
|
23
|
+
def initialize(base_url: nil, ensure_fresh_auth: nil)
|
24
|
+
@base_url = base_url
|
25
|
+
@ensure_fresh_auth = ensure_fresh_auth
|
26
|
+
middleware!
|
27
|
+
end
|
28
|
+
|
29
|
+
def middleware!
|
30
|
+
@middleware = Array.wrap(additional_middleware)
|
31
|
+
@middleware += Array.wrap(default_middleware)
|
32
|
+
@middleware.compact!
|
33
|
+
end
|
34
|
+
|
35
|
+
def additional_middleware
|
36
|
+
[] # define as needed in subclass
|
37
|
+
end
|
38
|
+
|
39
|
+
def default_middleware
|
40
|
+
[[Faraday::Request::Retry, retry_config]]
|
41
|
+
end
|
42
|
+
|
43
|
+
def retry_config
|
44
|
+
{
|
45
|
+
:retry_statuses => [429],
|
46
|
+
:backoff_factor => 2,
|
47
|
+
:max => MAX_RETRIES,
|
48
|
+
:interval => 0.05
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
[
|
53
|
+
:delete,
|
54
|
+
:get,
|
55
|
+
:head,
|
56
|
+
:options,
|
57
|
+
:patch,
|
58
|
+
:post,
|
59
|
+
:put,
|
60
|
+
].each do |http_verb|
|
61
|
+
define_method http_verb do |*args, &block|
|
62
|
+
ensure_fresh_auth.call(self) if ensure_fresh_auth.present?
|
63
|
+
http_client.public_send(http_verb, *args, &block)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def http_client!
|
68
|
+
@http_client = nil
|
69
|
+
http_client
|
70
|
+
end
|
71
|
+
|
72
|
+
def http_client
|
73
|
+
@http_client ||= Faraday.new(base_url) do |faraday|
|
74
|
+
middleware.each do |middleware_config|
|
75
|
+
faraday.use(*middleware_config)
|
76
|
+
end
|
77
|
+
|
78
|
+
faraday.adapter(:httpclient)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
# https://github.com/lostisland/faraday/blob/b09c6db31591dd1a58fffcc0979b0c7d96b5388b/lib/faraday/connection.rb#L171
|
85
|
+
METHODS_WITH_BODY = [:post, :put, :patch].freeze
|
86
|
+
|
87
|
+
def send_body?(method)
|
88
|
+
METHODS_WITH_BODY.include?(method.to_sym)
|
89
|
+
end
|
90
|
+
|
91
|
+
def request_with_throttling(method, url, options = {})
|
92
|
+
response =
|
93
|
+
if send_body?(method)
|
94
|
+
public_send(method, url, options[:body], options[:headers])
|
95
|
+
else
|
96
|
+
public_send(method, url, options[:params], options[:headers])
|
97
|
+
end
|
98
|
+
|
99
|
+
if response.status == 429
|
100
|
+
retry_after = response.headers['Retry-After']
|
101
|
+
multiplier = options.fetch(:retry_mulitplier, 1)
|
102
|
+
retry_after_secs = (retry_after.is_a?(Array) ? retry_after.first.to_i : retry_after.to_i) * multiplier
|
103
|
+
retry_after_secs = 60 if retry_after_secs <= 0
|
104
|
+
ConnectorsShared::Logger.warn("Exceeded #{self.class} request limits. Going to sleep for #{retry_after_secs} seconds")
|
105
|
+
raise ConnectorsShared::ThrottlingError.new(:suspend_until => DateTime.now + retry_after_secs.seconds, :cursors => options[:cursors])
|
106
|
+
else
|
107
|
+
response
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,256 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'faraday'
|
8
|
+
require 'httpclient'
|
9
|
+
require 'active_support/core_ext/array/wrap'
|
10
|
+
require 'active_support/core_ext/numeric/time'
|
11
|
+
require 'active_support/core_ext/object/deep_dup'
|
12
|
+
require 'connectors_shared'
|
13
|
+
require 'date'
|
14
|
+
require 'active_support/all'
|
15
|
+
require 'stubs/connectors/stats' unless defined?(Rails)
|
16
|
+
|
17
|
+
module ConnectorsSdk
|
18
|
+
module Base
|
19
|
+
class Extractor
|
20
|
+
MAX_CONNECTION_ATTEMPTS = 3
|
21
|
+
DEFAULT_CURSOR_KEY = 'all'.freeze
|
22
|
+
|
23
|
+
TRANSIENT_SERVER_ERROR_CLASSES = Set.new(
|
24
|
+
[
|
25
|
+
Faraday::ConnectionFailed,
|
26
|
+
Faraday::SSLError,
|
27
|
+
Faraday::TimeoutError,
|
28
|
+
HTTPClient::ConnectTimeoutError,
|
29
|
+
Net::OpenTimeout
|
30
|
+
]
|
31
|
+
)
|
32
|
+
|
33
|
+
attr_reader :content_source_id, :config, :features, :original_cursors, :service_type, :completed
|
34
|
+
attr_accessor :monitor, :client_proc
|
35
|
+
|
36
|
+
def initialize(content_source_id:,
|
37
|
+
service_type:,
|
38
|
+
config:,
|
39
|
+
features:,
|
40
|
+
client_proc:,
|
41
|
+
authorization_data_proc:,
|
42
|
+
monitor: ConnectorsShared::Monitor.new(:connector => self))
|
43
|
+
@content_source_id = content_source_id
|
44
|
+
@service_type = service_type
|
45
|
+
@config = config
|
46
|
+
@features = features
|
47
|
+
@client_proc = client_proc
|
48
|
+
@authorization_data_proc = authorization_data_proc
|
49
|
+
@original_cursors = config.cursors.deep_dup
|
50
|
+
@monitor = monitor
|
51
|
+
@completed = false
|
52
|
+
end
|
53
|
+
|
54
|
+
def authorization_data!
|
55
|
+
@authorization_data = nil
|
56
|
+
authorization_data
|
57
|
+
end
|
58
|
+
|
59
|
+
def authorization_data
|
60
|
+
@authorization_data ||= @authorization_data_proc.call
|
61
|
+
end
|
62
|
+
|
63
|
+
def client!
|
64
|
+
@client = nil
|
65
|
+
client
|
66
|
+
end
|
67
|
+
|
68
|
+
def client
|
69
|
+
@client ||= client_proc.call
|
70
|
+
end
|
71
|
+
|
72
|
+
def retrieve_latest_cursors
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
|
76
|
+
def with_auth_tokens_and_retry(&block)
|
77
|
+
connection_attempts = 0
|
78
|
+
|
79
|
+
begin
|
80
|
+
convert_transient_server_errors do
|
81
|
+
convert_rate_limit_errors(&block)
|
82
|
+
end
|
83
|
+
rescue ConnectorsShared::TokenRefreshFailedError => e
|
84
|
+
log_error('Could not refresh token, aborting')
|
85
|
+
raise e
|
86
|
+
rescue ConnectorsShared::PublishingFailedError => e
|
87
|
+
log_error('Could not publish, aborting')
|
88
|
+
raise e.reason
|
89
|
+
rescue ConnectorsShared::EvictionWithNoProgressError
|
90
|
+
log_error('Aborting job because it did not make any progress and cannot be evicted')
|
91
|
+
raise
|
92
|
+
rescue ConnectorsShared::EvictionError,
|
93
|
+
ConnectorsShared::ThrottlingError,
|
94
|
+
ConnectorsShared::JobDocumentLimitError,
|
95
|
+
ConnectorsShared::MonitoringError,
|
96
|
+
ConnectorsShared::JobInterruptedError,
|
97
|
+
ConnectorsShared::SecretInvalidError,
|
98
|
+
ConnectorsShared::InvalidIndexingConfigurationError => e
|
99
|
+
# Don't retry eviction, throttling, document limit, or monitoring errors, let them bubble out
|
100
|
+
raise
|
101
|
+
rescue StandardError => e
|
102
|
+
ConnectorsShared::ExceptionTracking.augment_exception(e)
|
103
|
+
connection_attempts += 1
|
104
|
+
if connection_attempts >= MAX_CONNECTION_ATTEMPTS
|
105
|
+
log_warn("Failed to connect in with_auth_tokens_and_retry Reason: #{e.class}: #{e.message} {:message_id => #{e.id}}")
|
106
|
+
log_warn("Retries: #{connection_attempts}/#{MAX_CONNECTION_ATTEMPTS}, giving up.")
|
107
|
+
ConnectorsShared::ExceptionTracking.log_exception(e)
|
108
|
+
raise e
|
109
|
+
else
|
110
|
+
log_warn("Failed to connect in with_auth_tokens_and_retry. Reason: #{e.class}: #{e.message} {:message_id => #{e.id}}")
|
111
|
+
log_warn("Retries: #{connection_attempts}/#{MAX_CONNECTION_ATTEMPTS}, trying again.")
|
112
|
+
retry
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def yield_document_changes(modified_since: nil)
|
118
|
+
raise NotImplementedError
|
119
|
+
end
|
120
|
+
|
121
|
+
def document_changes(modified_since: nil, &block)
|
122
|
+
enum = nil
|
123
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.documents") do
|
124
|
+
with_auth_tokens_and_retry do
|
125
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.yield_documents") do
|
126
|
+
counter = 0
|
127
|
+
enum = Enumerator.new do |yielder|
|
128
|
+
yield_document_changes(:modified_since => modified_since) do |action, change, subextractors|
|
129
|
+
yielder.yield action, change, subextractors
|
130
|
+
counter += 1
|
131
|
+
log_info("Extracted #{counter} documents so far") if counter % 100 == 0
|
132
|
+
end
|
133
|
+
end
|
134
|
+
enum.each(&block) if block_given?
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
enum
|
139
|
+
end
|
140
|
+
|
141
|
+
def yield_single_document_change(identifier: nil, &block)
|
142
|
+
log_debug("Extracting single document for #{identifier}") if identifier
|
143
|
+
convert_transient_server_errors do
|
144
|
+
convert_rate_limit_errors(&block)
|
145
|
+
end
|
146
|
+
monitor.note_success
|
147
|
+
rescue *fatal_exception_classes => e
|
148
|
+
ConnectorsShared::ExceptionTracking.augment_exception(e)
|
149
|
+
log_error("Encountered a fall-through error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
|
150
|
+
raise
|
151
|
+
rescue StandardError => e
|
152
|
+
ConnectorsShared::ExceptionTracking.augment_exception(e)
|
153
|
+
log_warn("Encountered error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
|
154
|
+
monitor.note_error(e, :id => e.id)
|
155
|
+
end
|
156
|
+
|
157
|
+
def identifying_error_message(identifier)
|
158
|
+
identifier.present? ? " of '#{identifier}'" : ''
|
159
|
+
end
|
160
|
+
|
161
|
+
def yield_deleted_ids(_ids)
|
162
|
+
raise NotImplementedError
|
163
|
+
end
|
164
|
+
|
165
|
+
def deleted_ids(ids, &block)
|
166
|
+
enum = nil
|
167
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.deleted_ids") do
|
168
|
+
with_auth_tokens_and_retry do
|
169
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.yield_deleted_ids") do
|
170
|
+
counter = 0
|
171
|
+
enum = Enumerator.new do |yielder|
|
172
|
+
yield_deleted_ids(ids) do |id|
|
173
|
+
yielder.yield id
|
174
|
+
counter += 1
|
175
|
+
log_info("Deleted #{counter} documents so far") if counter % 100 == 0
|
176
|
+
end
|
177
|
+
end
|
178
|
+
enum.each(&block) if block_given?
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
enum
|
183
|
+
end
|
184
|
+
|
185
|
+
def yield_permissions(source_user_id)
|
186
|
+
# no-op for content source without DLP support
|
187
|
+
end
|
188
|
+
|
189
|
+
def permissions(source_user_id, &block)
|
190
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.permissions") do
|
191
|
+
with_auth_tokens_and_retry do
|
192
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.yield_permissions") do
|
193
|
+
yield_permissions(source_user_id) do |permissions|
|
194
|
+
log_info("Extracted #{permissions.size} permissions for source user #{source_user_id}")
|
195
|
+
block.call(permissions) if block_given?
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
ConnectorsShared::Logger::SUPPORTED_LOG_LEVELS.each do |log_level|
|
203
|
+
define_method(:"log_#{log_level}") do |message|
|
204
|
+
if message.kind_of?(String)
|
205
|
+
message = "ContentSource[#{content_source_id}, #{service_type}]: #{message}"
|
206
|
+
end
|
207
|
+
ConnectorsShared::Logger.public_send(log_level, message)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def convert_transient_server_errors
|
212
|
+
yield
|
213
|
+
rescue StandardError => e
|
214
|
+
raise unless transient_error?(e)
|
215
|
+
|
216
|
+
raise ConnectorsShared::TransientServerError.new(
|
217
|
+
"Transient error #{e.class}: #{e.message}",
|
218
|
+
:suspend_until => Connectors.config.fetch('transient_server_error_retry_delay_minutes').minutes.from_now,
|
219
|
+
:cursors => config.cursors
|
220
|
+
)
|
221
|
+
end
|
222
|
+
|
223
|
+
def transient_error?(error)
|
224
|
+
TRANSIENT_SERVER_ERROR_CLASSES.any? { |error_class| error.kind_of?(error_class) }
|
225
|
+
end
|
226
|
+
|
227
|
+
def evictable?
|
228
|
+
false
|
229
|
+
end
|
230
|
+
|
231
|
+
def cursors_modified_since_start?
|
232
|
+
config.cursors != original_cursors
|
233
|
+
end
|
234
|
+
|
235
|
+
def download_args_and_proc(id:, name:, size:, download_args:, &block)
|
236
|
+
[id, name, size, download_args, block]
|
237
|
+
end
|
238
|
+
|
239
|
+
private
|
240
|
+
|
241
|
+
def convert_rate_limit_errors
|
242
|
+
yield # subclasses override this with source-specific handling.
|
243
|
+
end
|
244
|
+
|
245
|
+
def fatal_exception_classes
|
246
|
+
[
|
247
|
+
ConnectorsShared::TokenRefreshFailedError,
|
248
|
+
ConnectorsShared::EvictionError,
|
249
|
+
ConnectorsShared::ThrottlingError,
|
250
|
+
ConnectorsShared::JobDocumentLimitError,
|
251
|
+
ConnectorsShared::MonitoringError
|
252
|
+
]
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
module ConnectorsSdk
|
8
|
+
module Base
|
9
|
+
class Factory
|
10
|
+
attr_reader :connectors
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@connectors = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
def register(name, klass)
|
17
|
+
@connectors[name] = klass
|
18
|
+
end
|
19
|
+
|
20
|
+
def connector(name)
|
21
|
+
@connectors[name].new
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
REGISTRY = Factory.new
|
26
|
+
|
27
|
+
# loading plugins (might replace this with a directory scan and conventions on names)
|
28
|
+
require_relative '../share_point/http_call_wrapper'
|
29
|
+
|
30
|
+
REGISTRY.register(ConnectorsSdk::SharePoint::SERVICE_TYPE, ConnectorsSdk::SharePoint::HttpCallWrapper)
|
31
|
+
end
|
32
|
+
end
|