connectors_sdk 8.3.0.0.pre.20220414T060419Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +93 -0
- data/NOTICE.txt +2 -0
- data/lib/connectors_sdk/base/.config.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.connectors.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.registry.rb.un~ +0 -0
- data/lib/connectors_sdk/base/adapter.rb +118 -0
- data/lib/connectors_sdk/base/config.rb +27 -0
- data/lib/connectors_sdk/base/custom_client.rb +112 -0
- data/lib/connectors_sdk/base/extractor.rb +256 -0
- data/lib/connectors_sdk/base/registry.rb +32 -0
- data/lib/connectors_sdk/office365/adapter.rb +153 -0
- data/lib/connectors_sdk/office365/config.rb +37 -0
- data/lib/connectors_sdk/office365/custom_client.rb +319 -0
- data/lib/connectors_sdk/office365/extractor.rb +230 -0
- data/lib/connectors_sdk/share_point/.http_call_wrapper.rb.un~ +0 -0
- data/lib/connectors_sdk/share_point/adapter.rb +47 -0
- data/lib/connectors_sdk/share_point/authorization.rb +91 -0
- data/lib/connectors_sdk/share_point/extractor.rb +31 -0
- data/lib/connectors_sdk/share_point/http_call_wrapper.rb +117 -0
- data/lib/connectors_sdk.rb +16 -0
- data/lib/connectors_shared/constants.rb +14 -0
- data/lib/connectors_shared/errors.rb +126 -0
- data/lib/connectors_shared/exception_tracking.rb +39 -0
- data/lib/connectors_shared/extension_mapping_util.rb +123 -0
- data/lib/connectors_shared/logger.rb +33 -0
- data/lib/connectors_shared/monitor.rb +99 -0
- data/lib/connectors_shared.rb +12 -0
- metadata +114 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a44a80ba5fe5032b6254fb8a2db25516542df5818044d627b6ba31f652a7812f
|
4
|
+
data.tar.gz: 52d1245cc24ab6e1a42c00483ec4a8ece48ede4a6465c9b46b3bf5e18c979678
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7a4021e1d7b83ec7334913f1ef5e684e02296097f223e96610cb31eb5502c36f9b45b03eeaf80e876aac5bcc531f85d10445917459b86cedc30d6609b33f5351
|
7
|
+
data.tar.gz: ca04ab72f48ed6e76b3a0e0cc8c260f81779ea906325a782eb61456c9c5be231043d7a73ca9625df63459460de587c65aa352fb2e1150fc8c92a253536fcd634
|
data/LICENSE
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
Elastic License 2.0
|
2
|
+
|
3
|
+
URL: https://www.elastic.co/licensing/elastic-license
|
4
|
+
|
5
|
+
## Acceptance
|
6
|
+
|
7
|
+
By using the software, you agree to all of the terms and conditions below.
|
8
|
+
|
9
|
+
## Copyright License
|
10
|
+
|
11
|
+
The licensor grants you a non-exclusive, royalty-free, worldwide,
|
12
|
+
non-sublicensable, non-transferable license to use, copy, distribute, make
|
13
|
+
available, and prepare derivative works of the software, in each case subject to
|
14
|
+
the limitations and conditions below.
|
15
|
+
|
16
|
+
## Limitations
|
17
|
+
|
18
|
+
You may not provide the software to third parties as a hosted or managed
|
19
|
+
service, where the service provides users with access to any substantial set of
|
20
|
+
the features or functionality of the software.
|
21
|
+
|
22
|
+
You may not move, change, disable, or circumvent the license key functionality
|
23
|
+
in the software, and you may not remove or obscure any functionality in the
|
24
|
+
software that is protected by the license key.
|
25
|
+
|
26
|
+
You may not alter, remove, or obscure any licensing, copyright, or other notices
|
27
|
+
of the licensor in the software. Any use of the licensor’s trademarks is subject
|
28
|
+
to applicable law.
|
29
|
+
|
30
|
+
## Patents
|
31
|
+
|
32
|
+
The licensor grants you a license, under any patent claims the licensor can
|
33
|
+
license, or becomes able to license, to make, have made, use, sell, offer for
|
34
|
+
sale, import and have imported the software, in each case subject to the
|
35
|
+
limitations and conditions in this license. This license does not cover any
|
36
|
+
patent claims that you cause to be infringed by modifications or additions to
|
37
|
+
the software. If you or your company make any written claim that the software
|
38
|
+
infringes or contributes to infringement of any patent, your patent license for
|
39
|
+
the software granted under these terms ends immediately. If your company makes
|
40
|
+
such a claim, your patent license ends immediately for work on behalf of your
|
41
|
+
company.
|
42
|
+
|
43
|
+
## Notices
|
44
|
+
|
45
|
+
You must ensure that anyone who gets a copy of any part of the software from you
|
46
|
+
also gets a copy of these terms.
|
47
|
+
|
48
|
+
If you modify the software, you must include in any modified copies of the
|
49
|
+
software prominent notices stating that you have modified the software.
|
50
|
+
|
51
|
+
## No Other Rights
|
52
|
+
|
53
|
+
These terms do not imply any licenses other than those expressly granted in
|
54
|
+
these terms.
|
55
|
+
|
56
|
+
## Termination
|
57
|
+
|
58
|
+
If you use the software in violation of these terms, such use is not licensed,
|
59
|
+
and your licenses will automatically terminate. If the licensor provides you
|
60
|
+
with a notice of your violation, and you cease all violation of this license no
|
61
|
+
later than 30 days after you receive that notice, your licenses will be
|
62
|
+
reinstated retroactively. However, if you violate these terms after such
|
63
|
+
reinstatement, any additional violation of these terms will cause your licenses
|
64
|
+
to terminate automatically and permanently.
|
65
|
+
|
66
|
+
## No Liability
|
67
|
+
|
68
|
+
*As far as the law allows, the software comes as is, without any warranty or
|
69
|
+
condition, and the licensor will not be liable to you for any damages arising
|
70
|
+
out of these terms or the use or nature of the software, under any kind of
|
71
|
+
legal claim.*
|
72
|
+
|
73
|
+
## Definitions
|
74
|
+
|
75
|
+
The **licensor** is the entity offering these terms, and the **software** is the
|
76
|
+
software the licensor makes available under these terms, including any portion
|
77
|
+
of it.
|
78
|
+
|
79
|
+
**you** refers to the individual or entity agreeing to these terms.
|
80
|
+
|
81
|
+
**your company** is any legal entity, sole proprietorship, or other kind of
|
82
|
+
organization that you work for, plus all organizations that have control over,
|
83
|
+
are under the control of, or are under common control with that
|
84
|
+
organization. **control** means ownership of substantially all the assets of an
|
85
|
+
entity, or the power to direct its management and policies by vote, contract, or
|
86
|
+
otherwise. Control can be direct or indirect.
|
87
|
+
|
88
|
+
**your licenses** are all the licenses granted to you for the software under
|
89
|
+
these terms.
|
90
|
+
|
91
|
+
**use** means anything you do with the software requiring one of your licenses.
|
92
|
+
|
93
|
+
**trademark** means trademarks, service marks, and similar rights.
|
data/NOTICE.txt
ADDED
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,118 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'active_support/core_ext/array/wrap'
|
8
|
+
require 'active_support/core_ext/numeric/time'
|
9
|
+
require 'active_support/core_ext/object/deep_dup'
|
10
|
+
require 'connectors_shared'
|
11
|
+
require 'connectors_shared/extension_mapping_util'
|
12
|
+
require 'date'
|
13
|
+
require 'active_support/all'
|
14
|
+
require 'mime-types'
|
15
|
+
|
16
|
+
module ConnectorsSdk
|
17
|
+
module Base
|
18
|
+
class Adapter
|
19
|
+
def self.fields_to_preserve
|
20
|
+
@fields_to_preserve ||= ['body']
|
21
|
+
.concat(ConnectorsShared::Constants::THUMBNAIL_FIELDS)
|
22
|
+
.concat(ConnectorsShared::Constants::SUBEXTRACTOR_RESERVED_FIELDS)
|
23
|
+
.map(&:freeze)
|
24
|
+
.freeze
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.generate_id_helpers(method_prefix, id_prefix)
|
28
|
+
define_singleton_method("#{method_prefix}_id_to_fp_id") do |id|
|
29
|
+
"#{id_prefix}_#{id}"
|
30
|
+
end
|
31
|
+
|
32
|
+
define_singleton_method("fp_id_is_#{method_prefix}_id?") do |fp_id|
|
33
|
+
regex_match = /#{id_prefix}_(.+)$/.match(fp_id)
|
34
|
+
regex_match.present? && regex_match.size == 2
|
35
|
+
end
|
36
|
+
|
37
|
+
define_singleton_method("fp_id_to_#{method_prefix}_id") do |fp_id|
|
38
|
+
regex_match = /#{id_prefix}_(.+)$/.match(fp_id)
|
39
|
+
|
40
|
+
raise ArgumentError, "Invalid id #{fp_id} for source with method prefix #{method_prefix}." if regex_match.nil? || regex_match.length != 2
|
41
|
+
regex_match[1]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.mime_type_for_file(file_name)
|
46
|
+
ruby_detected_type = MIME::Types.type_for(file_name)
|
47
|
+
return ruby_detected_type.first.simplified if ruby_detected_type.present?
|
48
|
+
extension = extension_for_file(file_name)
|
49
|
+
ConnectorsShared::ExtensionMappingUtil.get_mime_types(extension)&.first
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.extension_for_file(file_name)
|
53
|
+
File.extname(file_name.downcase).delete_prefix!('.')
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.strip_file_extension(file_name)
|
57
|
+
File.basename(file_name, File.extname(file_name))
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.normalize_enum(enum)
|
61
|
+
enum&.to_s&.downcase
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.normalize_date(date)
|
65
|
+
return nil if date.blank?
|
66
|
+
|
67
|
+
case date
|
68
|
+
when Date, Time, DateTime, ActiveSupport::TimeWithZone
|
69
|
+
date.to_datetime.rfc3339
|
70
|
+
else
|
71
|
+
begin
|
72
|
+
Time.zone.parse(date).to_datetime.rfc3339
|
73
|
+
rescue ArgumentError, TypeError => e
|
74
|
+
ConnectorsShared::ExceptionTracking.capture_exception(e)
|
75
|
+
nil
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.normalize_path(path)
|
81
|
+
return nil if path.blank?
|
82
|
+
return path if path.start_with?('/')
|
83
|
+
"/#{path}"
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.url_to_path(url)
|
87
|
+
return nil if url.blank?
|
88
|
+
uri = URI(url)
|
89
|
+
return nil if uri.scheme.blank?
|
90
|
+
normalize_path(uri.path)
|
91
|
+
rescue URI::InvalidURIError, ArgumentError
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.swiftype_document_from_configured_object_base(object_type:, object:, fields:)
|
96
|
+
object_as_json = object.as_json
|
97
|
+
|
98
|
+
adapted_object = {
|
99
|
+
:type => normalize_enum(object_type)
|
100
|
+
}
|
101
|
+
|
102
|
+
fields.each do |field_data|
|
103
|
+
remote_field_name = field_data.fetch(:remote)
|
104
|
+
|
105
|
+
value = object_as_json[remote_field_name]
|
106
|
+
value = object_as_json.dig(*remote_field_name.split('.')) if value.blank?
|
107
|
+
next if value.nil?
|
108
|
+
|
109
|
+
adapted_object[field_data.fetch(:target)] = value
|
110
|
+
end
|
111
|
+
|
112
|
+
adapted_object.symbolize_keys
|
113
|
+
end
|
114
|
+
|
115
|
+
delegate :normalize_enum, :normalize_date, :normalize_path, :to => :class
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
module ConnectorsSdk
|
8
|
+
module Base
|
9
|
+
class Config
|
10
|
+
attr_reader :cursors
|
11
|
+
|
12
|
+
def initialize(cursors:)
|
13
|
+
@cursors = cursors || {}
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_h
|
17
|
+
{
|
18
|
+
:cursors => cursors
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def overwrite_cursors!(new_cursors)
|
23
|
+
@cursors = new_cursors
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'faraday'
|
8
|
+
require 'httpclient'
|
9
|
+
require 'active_support/core_ext/array/wrap'
|
10
|
+
require 'active_support/core_ext/numeric/time'
|
11
|
+
require 'active_support/core_ext/object/deep_dup'
|
12
|
+
require 'connectors_shared'
|
13
|
+
require 'date'
|
14
|
+
require 'active_support/all'
|
15
|
+
|
16
|
+
module ConnectorsSdk
|
17
|
+
module Base
|
18
|
+
class CustomClient
|
19
|
+
attr_reader :base_url, :middleware, :ensure_fresh_auth
|
20
|
+
|
21
|
+
MAX_RETRIES = 5
|
22
|
+
|
23
|
+
def initialize(base_url: nil, ensure_fresh_auth: nil)
|
24
|
+
@base_url = base_url
|
25
|
+
@ensure_fresh_auth = ensure_fresh_auth
|
26
|
+
middleware!
|
27
|
+
end
|
28
|
+
|
29
|
+
def middleware!
|
30
|
+
@middleware = Array.wrap(additional_middleware)
|
31
|
+
@middleware += Array.wrap(default_middleware)
|
32
|
+
@middleware.compact!
|
33
|
+
end
|
34
|
+
|
35
|
+
def additional_middleware
|
36
|
+
[] # define as needed in subclass
|
37
|
+
end
|
38
|
+
|
39
|
+
def default_middleware
|
40
|
+
[[Faraday::Request::Retry, retry_config]]
|
41
|
+
end
|
42
|
+
|
43
|
+
def retry_config
|
44
|
+
{
|
45
|
+
:retry_statuses => [429],
|
46
|
+
:backoff_factor => 2,
|
47
|
+
:max => MAX_RETRIES,
|
48
|
+
:interval => 0.05
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
[
|
53
|
+
:delete,
|
54
|
+
:get,
|
55
|
+
:head,
|
56
|
+
:options,
|
57
|
+
:patch,
|
58
|
+
:post,
|
59
|
+
:put,
|
60
|
+
].each do |http_verb|
|
61
|
+
define_method http_verb do |*args, &block|
|
62
|
+
ensure_fresh_auth.call(self) if ensure_fresh_auth.present?
|
63
|
+
http_client.public_send(http_verb, *args, &block)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def http_client!
|
68
|
+
@http_client = nil
|
69
|
+
http_client
|
70
|
+
end
|
71
|
+
|
72
|
+
def http_client
|
73
|
+
@http_client ||= Faraday.new(base_url) do |faraday|
|
74
|
+
middleware.each do |middleware_config|
|
75
|
+
faraday.use(*middleware_config)
|
76
|
+
end
|
77
|
+
|
78
|
+
faraday.adapter(:httpclient)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
# https://github.com/lostisland/faraday/blob/b09c6db31591dd1a58fffcc0979b0c7d96b5388b/lib/faraday/connection.rb#L171
|
85
|
+
METHODS_WITH_BODY = [:post, :put, :patch].freeze
|
86
|
+
|
87
|
+
def send_body?(method)
|
88
|
+
METHODS_WITH_BODY.include?(method.to_sym)
|
89
|
+
end
|
90
|
+
|
91
|
+
def request_with_throttling(method, url, options = {})
|
92
|
+
response =
|
93
|
+
if send_body?(method)
|
94
|
+
public_send(method, url, options[:body], options[:headers])
|
95
|
+
else
|
96
|
+
public_send(method, url, options[:params], options[:headers])
|
97
|
+
end
|
98
|
+
|
99
|
+
if response.status == 429
|
100
|
+
retry_after = response.headers['Retry-After']
|
101
|
+
multiplier = options.fetch(:retry_mulitplier, 1)
|
102
|
+
retry_after_secs = (retry_after.is_a?(Array) ? retry_after.first.to_i : retry_after.to_i) * multiplier
|
103
|
+
retry_after_secs = 60 if retry_after_secs <= 0
|
104
|
+
ConnectorsShared::Logger.warn("Exceeded #{self.class} request limits. Going to sleep for #{retry_after_secs} seconds")
|
105
|
+
raise ConnectorsShared::ThrottlingError.new(:suspend_until => DateTime.now + retry_after_secs.seconds, :cursors => options[:cursors])
|
106
|
+
else
|
107
|
+
response
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,256 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'faraday'
|
8
|
+
require 'httpclient'
|
9
|
+
require 'active_support/core_ext/array/wrap'
|
10
|
+
require 'active_support/core_ext/numeric/time'
|
11
|
+
require 'active_support/core_ext/object/deep_dup'
|
12
|
+
require 'connectors_shared'
|
13
|
+
require 'date'
|
14
|
+
require 'active_support/all'
|
15
|
+
require 'stubs/connectors/stats' unless defined?(Rails)
|
16
|
+
|
17
|
+
module ConnectorsSdk
|
18
|
+
module Base
|
19
|
+
class Extractor
|
20
|
+
MAX_CONNECTION_ATTEMPTS = 3
|
21
|
+
DEFAULT_CURSOR_KEY = 'all'.freeze
|
22
|
+
|
23
|
+
TRANSIENT_SERVER_ERROR_CLASSES = Set.new(
|
24
|
+
[
|
25
|
+
Faraday::ConnectionFailed,
|
26
|
+
Faraday::SSLError,
|
27
|
+
Faraday::TimeoutError,
|
28
|
+
HTTPClient::ConnectTimeoutError,
|
29
|
+
Net::OpenTimeout
|
30
|
+
]
|
31
|
+
)
|
32
|
+
|
33
|
+
attr_reader :content_source_id, :config, :features, :original_cursors, :service_type, :completed
|
34
|
+
attr_accessor :monitor, :client_proc
|
35
|
+
|
36
|
+
def initialize(content_source_id:,
|
37
|
+
service_type:,
|
38
|
+
config:,
|
39
|
+
features:,
|
40
|
+
client_proc:,
|
41
|
+
authorization_data_proc:,
|
42
|
+
monitor: ConnectorsShared::Monitor.new(:connector => self))
|
43
|
+
@content_source_id = content_source_id
|
44
|
+
@service_type = service_type
|
45
|
+
@config = config
|
46
|
+
@features = features
|
47
|
+
@client_proc = client_proc
|
48
|
+
@authorization_data_proc = authorization_data_proc
|
49
|
+
@original_cursors = config.cursors.deep_dup
|
50
|
+
@monitor = monitor
|
51
|
+
@completed = false
|
52
|
+
end
|
53
|
+
|
54
|
+
def authorization_data!
|
55
|
+
@authorization_data = nil
|
56
|
+
authorization_data
|
57
|
+
end
|
58
|
+
|
59
|
+
def authorization_data
|
60
|
+
@authorization_data ||= @authorization_data_proc.call
|
61
|
+
end
|
62
|
+
|
63
|
+
def client!
|
64
|
+
@client = nil
|
65
|
+
client
|
66
|
+
end
|
67
|
+
|
68
|
+
def client
|
69
|
+
@client ||= client_proc.call
|
70
|
+
end
|
71
|
+
|
72
|
+
def retrieve_latest_cursors
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
|
76
|
+
def with_auth_tokens_and_retry(&block)
|
77
|
+
connection_attempts = 0
|
78
|
+
|
79
|
+
begin
|
80
|
+
convert_transient_server_errors do
|
81
|
+
convert_rate_limit_errors(&block)
|
82
|
+
end
|
83
|
+
rescue ConnectorsShared::TokenRefreshFailedError => e
|
84
|
+
log_error('Could not refresh token, aborting')
|
85
|
+
raise e
|
86
|
+
rescue ConnectorsShared::PublishingFailedError => e
|
87
|
+
log_error('Could not publish, aborting')
|
88
|
+
raise e.reason
|
89
|
+
rescue ConnectorsShared::EvictionWithNoProgressError
|
90
|
+
log_error('Aborting job because it did not make any progress and cannot be evicted')
|
91
|
+
raise
|
92
|
+
rescue ConnectorsShared::EvictionError,
|
93
|
+
ConnectorsShared::ThrottlingError,
|
94
|
+
ConnectorsShared::JobDocumentLimitError,
|
95
|
+
ConnectorsShared::MonitoringError,
|
96
|
+
ConnectorsShared::JobInterruptedError,
|
97
|
+
ConnectorsShared::SecretInvalidError,
|
98
|
+
ConnectorsShared::InvalidIndexingConfigurationError => e
|
99
|
+
# Don't retry eviction, throttling, document limit, or monitoring errors, let them bubble out
|
100
|
+
raise
|
101
|
+
rescue StandardError => e
|
102
|
+
ConnectorsShared::ExceptionTracking.augment_exception(e)
|
103
|
+
connection_attempts += 1
|
104
|
+
if connection_attempts >= MAX_CONNECTION_ATTEMPTS
|
105
|
+
log_warn("Failed to connect in with_auth_tokens_and_retry Reason: #{e.class}: #{e.message} {:message_id => #{e.id}}")
|
106
|
+
log_warn("Retries: #{connection_attempts}/#{MAX_CONNECTION_ATTEMPTS}, giving up.")
|
107
|
+
ConnectorsShared::ExceptionTracking.log_exception(e)
|
108
|
+
raise e
|
109
|
+
else
|
110
|
+
log_warn("Failed to connect in with_auth_tokens_and_retry. Reason: #{e.class}: #{e.message} {:message_id => #{e.id}}")
|
111
|
+
log_warn("Retries: #{connection_attempts}/#{MAX_CONNECTION_ATTEMPTS}, trying again.")
|
112
|
+
retry
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def yield_document_changes(modified_since: nil)
|
118
|
+
raise NotImplementedError
|
119
|
+
end
|
120
|
+
|
121
|
+
def document_changes(modified_since: nil, &block)
|
122
|
+
enum = nil
|
123
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.documents") do
|
124
|
+
with_auth_tokens_and_retry do
|
125
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.yield_documents") do
|
126
|
+
counter = 0
|
127
|
+
enum = Enumerator.new do |yielder|
|
128
|
+
yield_document_changes(:modified_since => modified_since) do |action, change, subextractors|
|
129
|
+
yielder.yield action, change, subextractors
|
130
|
+
counter += 1
|
131
|
+
log_info("Extracted #{counter} documents so far") if counter % 100 == 0
|
132
|
+
end
|
133
|
+
end
|
134
|
+
enum.each(&block) if block_given?
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
enum
|
139
|
+
end
|
140
|
+
|
141
|
+
def yield_single_document_change(identifier: nil, &block)
|
142
|
+
log_debug("Extracting single document for #{identifier}") if identifier
|
143
|
+
convert_transient_server_errors do
|
144
|
+
convert_rate_limit_errors(&block)
|
145
|
+
end
|
146
|
+
monitor.note_success
|
147
|
+
rescue *fatal_exception_classes => e
|
148
|
+
ConnectorsShared::ExceptionTracking.augment_exception(e)
|
149
|
+
log_error("Encountered a fall-through error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
|
150
|
+
raise
|
151
|
+
rescue StandardError => e
|
152
|
+
ConnectorsShared::ExceptionTracking.augment_exception(e)
|
153
|
+
log_warn("Encountered error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
|
154
|
+
monitor.note_error(e, :id => e.id)
|
155
|
+
end
|
156
|
+
|
157
|
+
def identifying_error_message(identifier)
|
158
|
+
identifier.present? ? " of '#{identifier}'" : ''
|
159
|
+
end
|
160
|
+
|
161
|
+
def yield_deleted_ids(_ids)
|
162
|
+
raise NotImplementedError
|
163
|
+
end
|
164
|
+
|
165
|
+
def deleted_ids(ids, &block)
|
166
|
+
enum = nil
|
167
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.deleted_ids") do
|
168
|
+
with_auth_tokens_and_retry do
|
169
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.yield_deleted_ids") do
|
170
|
+
counter = 0
|
171
|
+
enum = Enumerator.new do |yielder|
|
172
|
+
yield_deleted_ids(ids) do |id|
|
173
|
+
yielder.yield id
|
174
|
+
counter += 1
|
175
|
+
log_info("Deleted #{counter} documents so far") if counter % 100 == 0
|
176
|
+
end
|
177
|
+
end
|
178
|
+
enum.each(&block) if block_given?
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
enum
|
183
|
+
end
|
184
|
+
|
185
|
+
def yield_permissions(source_user_id)
|
186
|
+
# no-op for content source without DLP support
|
187
|
+
end
|
188
|
+
|
189
|
+
def permissions(source_user_id, &block)
|
190
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.permissions") do
|
191
|
+
with_auth_tokens_and_retry do
|
192
|
+
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.yield_permissions") do
|
193
|
+
yield_permissions(source_user_id) do |permissions|
|
194
|
+
log_info("Extracted #{permissions.size} permissions for source user #{source_user_id}")
|
195
|
+
block.call(permissions) if block_given?
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
ConnectorsShared::Logger::SUPPORTED_LOG_LEVELS.each do |log_level|
|
203
|
+
define_method(:"log_#{log_level}") do |message|
|
204
|
+
if message.kind_of?(String)
|
205
|
+
message = "ContentSource[#{content_source_id}, #{service_type}]: #{message}"
|
206
|
+
end
|
207
|
+
ConnectorsShared::Logger.public_send(log_level, message)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def convert_transient_server_errors
|
212
|
+
yield
|
213
|
+
rescue StandardError => e
|
214
|
+
raise unless transient_error?(e)
|
215
|
+
|
216
|
+
raise ConnectorsShared::TransientServerError.new(
|
217
|
+
"Transient error #{e.class}: #{e.message}",
|
218
|
+
:suspend_until => Connectors.config.fetch('transient_server_error_retry_delay_minutes').minutes.from_now,
|
219
|
+
:cursors => config.cursors
|
220
|
+
)
|
221
|
+
end
|
222
|
+
|
223
|
+
def transient_error?(error)
|
224
|
+
TRANSIENT_SERVER_ERROR_CLASSES.any? { |error_class| error.kind_of?(error_class) }
|
225
|
+
end
|
226
|
+
|
227
|
+
def evictable?
|
228
|
+
false
|
229
|
+
end
|
230
|
+
|
231
|
+
def cursors_modified_since_start?
|
232
|
+
config.cursors != original_cursors
|
233
|
+
end
|
234
|
+
|
235
|
+
def download_args_and_proc(id:, name:, size:, download_args:, &block)
|
236
|
+
[id, name, size, download_args, block]
|
237
|
+
end
|
238
|
+
|
239
|
+
private
|
240
|
+
|
241
|
+
def convert_rate_limit_errors
|
242
|
+
yield # subclasses override this with source-specific handling.
|
243
|
+
end
|
244
|
+
|
245
|
+
def fatal_exception_classes
|
246
|
+
[
|
247
|
+
ConnectorsShared::TokenRefreshFailedError,
|
248
|
+
ConnectorsShared::EvictionError,
|
249
|
+
ConnectorsShared::ThrottlingError,
|
250
|
+
ConnectorsShared::JobDocumentLimitError,
|
251
|
+
ConnectorsShared::MonitoringError
|
252
|
+
]
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
module ConnectorsSdk
|
8
|
+
module Base
|
9
|
+
class Factory
|
10
|
+
attr_reader :connectors
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@connectors = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
def register(name, klass)
|
17
|
+
@connectors[name] = klass
|
18
|
+
end
|
19
|
+
|
20
|
+
def connector(name)
|
21
|
+
@connectors[name].new
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
REGISTRY = Factory.new
|
26
|
+
|
27
|
+
# loading plugins (might replace this with a directory scan and conventions on names)
|
28
|
+
require_relative '../share_point/http_call_wrapper'
|
29
|
+
|
30
|
+
REGISTRY.register(ConnectorsSdk::SharePoint::SERVICE_TYPE, ConnectorsSdk::SharePoint::HttpCallWrapper)
|
31
|
+
end
|
32
|
+
end
|