connectors_sdk 8.3.0.0.pre.20220414T060419Z → 8.3.0.0.pre.20220510T144908Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/connectors_sdk/atlassian/config.rb +27 -0
- data/lib/connectors_sdk/atlassian/custom_client.rb +87 -0
- data/lib/connectors_sdk/base/adapter.rb +7 -8
- data/lib/connectors_sdk/base/authorization.rb +89 -0
- data/lib/connectors_sdk/base/custom_client.rb +0 -1
- data/lib/connectors_sdk/base/extractor.rb +3 -2
- data/lib/connectors_sdk/base/http_call_wrapper.rb +135 -0
- data/lib/connectors_sdk/base/registry.rb +5 -3
- data/lib/connectors_sdk/confluence/adapter.rb +216 -0
- data/lib/connectors_sdk/confluence/custom_client.rb +143 -0
- data/lib/connectors_sdk/confluence/extractor.rb +270 -0
- data/lib/connectors_sdk/confluence_cloud/authorization.rb +64 -0
- data/lib/connectors_sdk/confluence_cloud/custom_client.rb +61 -0
- data/lib/connectors_sdk/confluence_cloud/extractor.rb +59 -0
- data/lib/connectors_sdk/confluence_cloud/http_call_wrapper.rb +59 -0
- data/lib/connectors_sdk/helpers/atlassian_time_formatter.rb +10 -0
- data/lib/connectors_sdk/office365/adapter.rb +7 -7
- data/lib/connectors_sdk/office365/config.rb +1 -0
- data/lib/connectors_sdk/office365/custom_client.rb +31 -9
- data/lib/connectors_sdk/office365/extractor.rb +8 -8
- data/lib/connectors_sdk/share_point/adapter.rb +12 -12
- data/lib/connectors_sdk/share_point/authorization.rb +14 -62
- data/lib/connectors_sdk/share_point/extractor.rb +2 -2
- data/lib/connectors_sdk/share_point/http_call_wrapper.rb +24 -83
- data/lib/connectors_shared/exception_tracking.rb +4 -4
- data/lib/connectors_shared/extraction_utils.rb +109 -0
- data/lib/connectors_shared/middleware/basic_auth.rb +27 -0
- data/lib/connectors_shared/middleware/bearer_auth.rb +27 -0
- data/lib/connectors_shared/middleware/restrict_hostnames.rb +73 -0
- data/lib/connectors_shared/monitor.rb +3 -3
- data/lib/stubs/enterprise_search/exception_tracking.rb +43 -0
- metadata +22 -10
- data/lib/connectors_sdk/base/.config.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.connectors.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.registry.rb.un~ +0 -0
- data/lib/connectors_sdk/share_point/.http_call_wrapper.rb.un~ +0 -0
@@ -11,11 +11,11 @@ require 'connectors_sdk/base/adapter'
|
|
11
11
|
module ConnectorsSdk
|
12
12
|
module Office365
|
13
13
|
class Adapter < ConnectorsSdk::Base::Adapter
|
14
|
-
def self.
|
14
|
+
def self.es_document_from_file(_file)
|
15
15
|
raise NotImplementedError
|
16
16
|
end
|
17
17
|
|
18
|
-
def self.
|
18
|
+
def self.es_document_from_folder(_folder)
|
19
19
|
raise NotImplementedError
|
20
20
|
end
|
21
21
|
|
@@ -26,7 +26,7 @@ module ConnectorsSdk
|
|
26
26
|
@item = item
|
27
27
|
end
|
28
28
|
|
29
|
-
def self.
|
29
|
+
def self.convert_id_to_es_id(_id)
|
30
30
|
raise NotImplementedError
|
31
31
|
end
|
32
32
|
|
@@ -41,10 +41,10 @@ module ConnectorsSdk
|
|
41
41
|
ConnectorsSdk::Office365::Adapter.normalize_path("#{parent_folder_path}/#{item.name}")
|
42
42
|
end
|
43
43
|
|
44
|
-
def
|
44
|
+
def to_es_document
|
45
45
|
{
|
46
46
|
:_fields_to_preserve => ConnectorsSdk::Office365::Adapter.fields_to_preserve,
|
47
|
-
:id => self.class.
|
47
|
+
:id => self.class.convert_id_to_es_id(item.id),
|
48
48
|
:path => get_path(item),
|
49
49
|
:title => item.name,
|
50
50
|
:url => item.webUrl,
|
@@ -96,7 +96,7 @@ module ConnectorsSdk
|
|
96
96
|
end
|
97
97
|
|
98
98
|
class FileGraphItem < GraphItem
|
99
|
-
def self.
|
99
|
+
def self.convert_id_to_es_id(_id)
|
100
100
|
raise NotImplementedError
|
101
101
|
end
|
102
102
|
|
@@ -132,7 +132,7 @@ module ConnectorsSdk
|
|
132
132
|
end
|
133
133
|
|
134
134
|
class PackageGraphItem < GraphItem
|
135
|
-
def self.
|
135
|
+
def self.convert_id_to_es_id(id)
|
136
136
|
raise NotImplementedError
|
137
137
|
end
|
138
138
|
|
@@ -6,9 +6,11 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
+
require 'hashie/mash'
|
10
|
+
|
9
11
|
require 'connectors_sdk/base/custom_client'
|
12
|
+
require 'connectors_sdk/office365/extractor'
|
10
13
|
require 'connectors_shared'
|
11
|
-
require 'hashie/mash'
|
12
14
|
|
13
15
|
module ConnectorsSdk
|
14
16
|
module Office365
|
@@ -125,7 +127,7 @@ module ConnectorsSdk
|
|
125
127
|
yielded = 0
|
126
128
|
while stack.any?
|
127
129
|
folder_id = stack.pop
|
128
|
-
item_children(drive_id, folder_id, :fields => fields_with_id) do |item|
|
130
|
+
item_children(drive_id, folder_id, :fields => fields_with_id, :break_after_page => break_after_page) do |item|
|
129
131
|
if item.folder
|
130
132
|
stack << item.id
|
131
133
|
end
|
@@ -134,9 +136,14 @@ module ConnectorsSdk
|
|
134
136
|
yielded += 1
|
135
137
|
end
|
136
138
|
|
137
|
-
if break_after_page && yielded >= 100
|
138
|
-
cursors['
|
139
|
-
|
139
|
+
if break_after_page && yielded >= 100
|
140
|
+
if cursors['item_children_next_link'].present?
|
141
|
+
stack << folder_id
|
142
|
+
end
|
143
|
+
if stack.any?
|
144
|
+
cursors['page_cursor'] = stack.dup
|
145
|
+
break
|
146
|
+
end
|
140
147
|
end
|
141
148
|
end
|
142
149
|
end
|
@@ -256,15 +263,30 @@ module ConnectorsSdk
|
|
256
263
|
request_endpoint(:endpoint => "drives/#{drive_id}/root", :query_params => query_params)
|
257
264
|
end
|
258
265
|
|
259
|
-
def item_children(drive_id, item_id, fields: [], &block)
|
260
|
-
|
261
|
-
query_params = transform_fields_to_request_query_params(fields)
|
262
|
-
response = request_endpoint(:endpoint => endpoint, :query_params => query_params)
|
266
|
+
def item_children(drive_id, item_id, fields: [], break_after_page: false, &block)
|
267
|
+
next_link = cursors.delete('item_children_next_link') if break_after_page
|
263
268
|
|
269
|
+
response = if next_link.present?
|
270
|
+
request_json(:url => next_link)
|
271
|
+
else
|
272
|
+
endpoint = "drives/#{drive_id}/items/#{item_id}/children"
|
273
|
+
query_params = transform_fields_to_request_query_params(fields)
|
274
|
+
request_endpoint(:endpoint => endpoint, :query_params => query_params)
|
275
|
+
end
|
276
|
+
|
277
|
+
yielded = 0
|
264
278
|
loop do
|
265
279
|
response.value.each(&block)
|
266
280
|
next_link = response['@odata.nextLink']
|
281
|
+
|
267
282
|
break if next_link.nil?
|
283
|
+
|
284
|
+
yielded += response.value.size
|
285
|
+
if break_after_page && yielded >= 100
|
286
|
+
cursors['item_children_next_link'] = next_link
|
287
|
+
break
|
288
|
+
end
|
289
|
+
|
268
290
|
response = request_json(:url => next_link)
|
269
291
|
end
|
270
292
|
end
|
@@ -51,12 +51,12 @@ module ConnectorsSdk
|
|
51
51
|
capture_exception(e)
|
52
52
|
end
|
53
53
|
|
54
|
-
if break_after_page && config.cursors['page_cursor'].present?
|
54
|
+
if break_after_page && (config.cursors['page_cursor'].present? || config.cursors['item_children_next_link'].present?)
|
55
55
|
break
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
|
-
if break_after_page && config.cursors['page_cursor'].blank?
|
59
|
+
if break_after_page && config.cursors['page_cursor'].blank? && config.cursors['item_children_next_link'].blank?
|
60
60
|
@completed = true
|
61
61
|
config.overwrite_cursors!(retrieve_latest_cursors)
|
62
62
|
log_debug("Completed #{modified_since.nil? ? 'full' : 'incremental'} extraction")
|
@@ -132,7 +132,7 @@ module ConnectorsSdk
|
|
132
132
|
@existing_drive_item_ids ||= Set.new.tap do |ids|
|
133
133
|
drives_to_index.each do |drive|
|
134
134
|
client.list_items(drive.id) do |item|
|
135
|
-
ids <<
|
135
|
+
ids << convert_id_to_es_id(item.id)
|
136
136
|
end
|
137
137
|
end
|
138
138
|
end
|
@@ -142,7 +142,7 @@ module ConnectorsSdk
|
|
142
142
|
raise NotImplementedError
|
143
143
|
end
|
144
144
|
|
145
|
-
def
|
145
|
+
def convert_id_to_es_id(_id)
|
146
146
|
raise NotImplementedError
|
147
147
|
end
|
148
148
|
|
@@ -170,7 +170,7 @@ module ConnectorsSdk
|
|
170
170
|
if item.deleted.nil?
|
171
171
|
yield_create_or_update(drive_id, item, &block)
|
172
172
|
else
|
173
|
-
yield :delete,
|
173
|
+
yield :delete, convert_id_to_es_id(item.id)
|
174
174
|
end
|
175
175
|
end
|
176
176
|
|
@@ -210,11 +210,11 @@ module ConnectorsSdk
|
|
210
210
|
|
211
211
|
def generate_document(item)
|
212
212
|
if item.file
|
213
|
-
adapter.
|
213
|
+
adapter.es_document_from_file(item)
|
214
214
|
elsif item.folder
|
215
|
-
adapter.
|
215
|
+
adapter.es_document_from_folder(item)
|
216
216
|
elsif item.package
|
217
|
-
adapter.
|
217
|
+
adapter.es_document_from_package(item)
|
218
218
|
else
|
219
219
|
raise "Unexpected Office 365 item type for item #{item}"
|
220
220
|
end
|
@@ -13,33 +13,33 @@ module ConnectorsSdk
|
|
13
13
|
class Adapter < Office365::Adapter
|
14
14
|
generate_id_helpers :share_point, 'share_point'
|
15
15
|
|
16
|
-
def self.
|
17
|
-
FileGraphItem.new(file).
|
16
|
+
def self.es_document_from_file(file)
|
17
|
+
FileGraphItem.new(file).to_es_document
|
18
18
|
end
|
19
19
|
|
20
|
-
def self.
|
21
|
-
FolderGraphItem.new(folder).
|
20
|
+
def self.es_document_from_folder(folder)
|
21
|
+
FolderGraphItem.new(folder).to_es_document
|
22
22
|
end
|
23
23
|
|
24
|
-
def self.
|
25
|
-
PackageGraphItem.new(package).
|
24
|
+
def self.es_document_from_package(package)
|
25
|
+
PackageGraphItem.new(package).to_es_document
|
26
26
|
end
|
27
27
|
|
28
28
|
class FileGraphItem < Office365::Adapter::FileGraphItem
|
29
|
-
def self.
|
30
|
-
ConnectorsSdk::SharePoint::Adapter.
|
29
|
+
def self.convert_id_to_es_id(id)
|
30
|
+
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
34
|
class FolderGraphItem < Office365::Adapter::FolderGraphItem
|
35
|
-
def self.
|
36
|
-
ConnectorsSdk::SharePoint::Adapter.
|
35
|
+
def self.convert_id_to_es_id(id)
|
36
|
+
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
40
|
class PackageGraphItem < Office365::Adapter::PackageGraphItem
|
41
|
-
def self.
|
42
|
-
ConnectorsSdk::SharePoint::Adapter.
|
41
|
+
def self.convert_id_to_es_id(id)
|
42
|
+
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
43
43
|
end
|
44
44
|
end
|
45
45
|
end
|
@@ -6,70 +6,12 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require '
|
10
|
-
require 'signet'
|
11
|
-
require 'signet/oauth_2'
|
12
|
-
require 'signet/oauth_2/client'
|
9
|
+
require 'connectors_sdk/base/authorization'
|
13
10
|
|
14
11
|
module ConnectorsSdk
|
15
12
|
module SharePoint
|
16
|
-
class Authorization
|
13
|
+
class Authorization < ConnectorsSdk::Base::Authorization
|
17
14
|
class << self
|
18
|
-
def authorization_url
|
19
|
-
'https://login.microsoftonline.com/common/oauth2/v2.0/authorize'
|
20
|
-
end
|
21
|
-
|
22
|
-
def token_credential_uri
|
23
|
-
'https://login.microsoftonline.com/common/oauth2/v2.0/token'
|
24
|
-
end
|
25
|
-
|
26
|
-
def authorization_uri(params)
|
27
|
-
missing = missing_fields(params, %w[client_id])
|
28
|
-
unless missing.blank?
|
29
|
-
raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
|
30
|
-
end
|
31
|
-
|
32
|
-
params[:response_type] = 'code'
|
33
|
-
params[:additional_parameters] = { :prompt => 'consent' }
|
34
|
-
client = oauth_client(params)
|
35
|
-
client.authorization_uri.to_s
|
36
|
-
end
|
37
|
-
|
38
|
-
def access_token(params)
|
39
|
-
missing = missing_fields(params, %w[client_id client_secret code redirect_uri])
|
40
|
-
unless missing.blank?
|
41
|
-
raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
|
42
|
-
end
|
43
|
-
|
44
|
-
params[:grant_type] = 'authorization_code'
|
45
|
-
client = oauth_client(params)
|
46
|
-
client.fetch_access_token
|
47
|
-
end
|
48
|
-
|
49
|
-
def refresh(params)
|
50
|
-
missing = missing_fields(params, %w[client_id client_secret refresh_token])
|
51
|
-
unless missing.blank?
|
52
|
-
raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
|
53
|
-
end
|
54
|
-
|
55
|
-
params[:grant_type] = 'refresh_token'
|
56
|
-
client = oauth_client(params)
|
57
|
-
client.refresh!
|
58
|
-
rescue StandardError => e
|
59
|
-
ConnectorsShared::ExceptionTracking.log_exception(e)
|
60
|
-
raise ConnectorsShared::TokenRefreshFailedError
|
61
|
-
end
|
62
|
-
|
63
|
-
def oauth_client(params)
|
64
|
-
options = params.merge(
|
65
|
-
:authorization_uri => authorization_url,
|
66
|
-
:token_credential_uri => token_credential_uri,
|
67
|
-
:scope => oauth_scope
|
68
|
-
)
|
69
|
-
options[:state] = JSON.dump(options[:state]) if options[:state]
|
70
|
-
Signet::OAuth2::Client.new(options)
|
71
|
-
end
|
72
|
-
|
73
15
|
def oauth_scope
|
74
16
|
%w[
|
75
17
|
User.ReadBasic.All
|
@@ -82,8 +24,18 @@ module ConnectorsSdk
|
|
82
24
|
]
|
83
25
|
end
|
84
26
|
|
85
|
-
|
86
|
-
|
27
|
+
private
|
28
|
+
|
29
|
+
def authorization_url
|
30
|
+
'https://login.microsoftonline.com/common/oauth2/v2.0/authorize'
|
31
|
+
end
|
32
|
+
|
33
|
+
def token_credential_uri
|
34
|
+
'https://login.microsoftonline.com/common/oauth2/v2.0/token'
|
35
|
+
end
|
36
|
+
|
37
|
+
def additional_parameters
|
38
|
+
{ :prompt => 'consent' }
|
87
39
|
end
|
88
40
|
end
|
89
41
|
end
|
@@ -15,8 +15,8 @@ module ConnectorsSdk
|
|
15
15
|
|
16
16
|
private
|
17
17
|
|
18
|
-
def
|
19
|
-
ConnectorsSdk::SharePoint::Adapter.
|
18
|
+
def convert_id_to_es_id(id)
|
19
|
+
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
20
20
|
end
|
21
21
|
|
22
22
|
def adapter
|
@@ -9,108 +9,49 @@
|
|
9
9
|
require 'connectors_sdk/office365/config'
|
10
10
|
require 'connectors_sdk/share_point/extractor'
|
11
11
|
require 'connectors_sdk/share_point/authorization'
|
12
|
-
require '
|
12
|
+
require 'connectors_sdk/base/http_call_wrapper'
|
13
13
|
|
14
14
|
module ConnectorsSdk
|
15
15
|
module SharePoint
|
16
|
-
|
16
|
+
class HttpCallWrapper < ConnectorsSdk::Base::HttpCallWrapper
|
17
|
+
SERVICE_TYPE = 'share_point'
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
cursors = params.fetch(:cursors, {}) || {}
|
21
|
-
features = params.fetch(:features, {}) || {}
|
22
|
-
|
23
|
-
# XXX can we cache that class across calls?
|
24
|
-
ConnectorsSdk::SharePoint::Extractor.new(
|
25
|
-
content_source_id: BSON::ObjectId.new,
|
26
|
-
service_type: SERVICE_TYPE,
|
27
|
-
authorization_data_proc: proc { { access_token: params[:access_token] } },
|
28
|
-
client_proc: proc { ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token], :cursors => cursors) },
|
29
|
-
config: ConnectorsSdk::Office365::Config.new(:cursors => cursors, :drive_ids => 'all', :index_permissions => params[:index_permissions] || false),
|
30
|
-
features: features
|
31
|
-
)
|
32
|
-
end
|
33
|
-
|
34
|
-
def document_batch(params)
|
35
|
-
results = []
|
36
|
-
|
37
|
-
@extractor = extractor(params)
|
38
|
-
|
39
|
-
@extractor.yield_document_changes(:break_after_page => true, :modified_since => @extractor.config.cursors['modified_since']) do |action, doc, download_args_and_proc|
|
40
|
-
download_obj = nil
|
41
|
-
if download_args_and_proc
|
42
|
-
download_obj = {
|
43
|
-
id: download_args_and_proc[0],
|
44
|
-
name: download_args_and_proc[1],
|
45
|
-
size: download_args_and_proc[2],
|
46
|
-
download_args: download_args_and_proc[3]
|
47
|
-
}
|
48
|
-
end
|
49
|
-
|
50
|
-
results << {
|
51
|
-
:action => action,
|
52
|
-
:document => doc,
|
53
|
-
:download => download_obj
|
54
|
-
}
|
55
|
-
end
|
56
|
-
|
57
|
-
results
|
58
|
-
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
|
59
|
-
raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
|
60
|
-
end
|
61
|
-
|
62
|
-
def cursors
|
63
|
-
@extractor.config.cursors
|
19
|
+
def name
|
20
|
+
'SharePoint'
|
64
21
|
end
|
65
22
|
|
66
|
-
def
|
67
|
-
|
23
|
+
def service_type
|
24
|
+
SERVICE_TYPE
|
68
25
|
end
|
69
26
|
|
70
|
-
|
71
|
-
results = []
|
72
|
-
extractor(params).yield_deleted_ids(params[:ids]) do |id|
|
73
|
-
results << id
|
74
|
-
end
|
75
|
-
results
|
76
|
-
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
|
77
|
-
raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
|
78
|
-
end
|
27
|
+
private
|
79
28
|
|
80
|
-
def
|
81
|
-
|
82
|
-
return permissions
|
83
|
-
end
|
84
|
-
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
|
85
|
-
raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
|
29
|
+
def extractor_class
|
30
|
+
ConnectorsSdk::SharePoint::Extractor
|
86
31
|
end
|
87
32
|
|
88
|
-
def
|
89
|
-
ConnectorsSdk::SharePoint::Authorization
|
33
|
+
def authorization
|
34
|
+
ConnectorsSdk::SharePoint::Authorization
|
90
35
|
end
|
91
36
|
|
92
|
-
def
|
93
|
-
ConnectorsSdk::
|
37
|
+
def client(params)
|
38
|
+
ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token], :cursors => params.fetch(:cursors, {}) || {})
|
94
39
|
end
|
95
40
|
|
96
|
-
def
|
97
|
-
ConnectorsSdk::
|
41
|
+
def custom_client_error
|
42
|
+
ConnectorsSdk::Office365::CustomClient::ClientError
|
98
43
|
end
|
99
44
|
|
100
|
-
def
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
45
|
+
def config(params)
|
46
|
+
ConnectorsSdk::Office365::Config.new(
|
47
|
+
:cursors => params.fetch(:cursors, {}) || {},
|
48
|
+
:drive_ids => 'all',
|
49
|
+
:index_permissions => params[:index_permissions] || false
|
50
|
+
)
|
106
51
|
end
|
107
52
|
|
108
|
-
def
|
109
|
-
client
|
110
|
-
client.me
|
111
|
-
{ :status => 'OK', :statusCode => 200, :message => 'Connected to SharePoint' }
|
112
|
-
rescue StandardError => e
|
113
|
-
{ :status => 'FAILURE', :statusCode => e.is_a?(ConnectorsSdk::Office365::CustomClient::ClientError) ? e.status_code : 500, :message => e.message }
|
53
|
+
def health_check(params)
|
54
|
+
client(params).me
|
114
55
|
end
|
115
56
|
end
|
116
57
|
end
|
@@ -6,23 +6,23 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require 'stubs/swiftype/exception_tracking' unless defined?(Rails)
|
10
9
|
require 'bson'
|
11
10
|
require 'connectors_shared/logger'
|
11
|
+
require 'stubs/enterprise_search/exception_tracking'
|
12
12
|
|
13
13
|
module ConnectorsShared
|
14
14
|
class ExceptionTracking
|
15
15
|
class << self
|
16
16
|
def capture_message(message, context = {})
|
17
|
-
|
17
|
+
EnterpriseSearch::ExceptionTracking.capture_message(message, context)
|
18
18
|
end
|
19
19
|
|
20
20
|
def capture_exception(exception, context = {})
|
21
|
-
|
21
|
+
EnterpriseSearch::ExceptionTracking.log_exception(exception, :context => context)
|
22
22
|
end
|
23
23
|
|
24
24
|
def log_exception(exception, message = nil)
|
25
|
-
|
25
|
+
EnterpriseSearch::ExceptionTracking.log_exception(exception, message, :logger => ConnectorsShared::Logger.logger)
|
26
26
|
end
|
27
27
|
|
28
28
|
def augment_exception(exception)
|
@@ -0,0 +1,109 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'set'
|
10
|
+
|
11
|
+
module ConnectorsShared
|
12
|
+
module ExtractionUtils
|
13
|
+
# A list of tags tags we want to remove before extracting content
|
14
|
+
NON_CONTENT_TAGS = Set.new(%w[
|
15
|
+
comment
|
16
|
+
object
|
17
|
+
script
|
18
|
+
style
|
19
|
+
svg
|
20
|
+
video
|
21
|
+
]).freeze
|
22
|
+
|
23
|
+
# Tags, that generate a word/line break when rendered
|
24
|
+
BREAK_ELEMENTS = Set.new(%w[
|
25
|
+
br
|
26
|
+
hr
|
27
|
+
]).freeze
|
28
|
+
|
29
|
+
# The character used to signal that a string has been truncated
|
30
|
+
OMISSION = '…'
|
31
|
+
|
32
|
+
#-------------------------------------------------------------------------------------------------
|
33
|
+
# Expects a Nokogiri HTML node, returns textual content from the node and all of its children
|
34
|
+
def self.node_descendant_text(node)
|
35
|
+
return '' unless node&.present?
|
36
|
+
|
37
|
+
unless node.respond_to?(:children) && node.respond_to?(:name) && node.respond_to?(:text?)
|
38
|
+
raise ArgumentError, "Expecting something node-like but got a #{node.class}"
|
39
|
+
end
|
40
|
+
|
41
|
+
to_process_stack = [node]
|
42
|
+
text = []
|
43
|
+
|
44
|
+
loop do
|
45
|
+
# Get the next node to process
|
46
|
+
node = to_process_stack.pop
|
47
|
+
break unless node
|
48
|
+
|
49
|
+
# Base cases where we append content to the text buffer
|
50
|
+
if node.kind_of?(String)
|
51
|
+
text << node unless node == ' ' && text.last == ' '
|
52
|
+
next
|
53
|
+
end
|
54
|
+
|
55
|
+
# Remove tags that do not contain any text (and which sometimes are treated as CDATA, generating garbage text in jruby)
|
56
|
+
next if NON_CONTENT_TAGS.include?(node.name)
|
57
|
+
|
58
|
+
# Tags, that need to be replaced by spaces according to the standards
|
59
|
+
if replace_with_whitespace?(node)
|
60
|
+
text << ' ' unless text.last == ' '
|
61
|
+
next
|
62
|
+
end
|
63
|
+
|
64
|
+
# Extract the text from all text nodes
|
65
|
+
if node.text?
|
66
|
+
content = node.content
|
67
|
+
text << content.squish if content
|
68
|
+
next
|
69
|
+
end
|
70
|
+
|
71
|
+
# Add spaces before all tags
|
72
|
+
to_process_stack << ' '
|
73
|
+
|
74
|
+
# Recursion by adding the node's children to the stack and looping
|
75
|
+
node.children.reverse_each { |child| to_process_stack << child }
|
76
|
+
|
77
|
+
# Add spaces after all tags
|
78
|
+
to_process_stack << ' '
|
79
|
+
end
|
80
|
+
|
81
|
+
# Remove any duplicate spaces and return the content
|
82
|
+
text.join.squish!
|
83
|
+
end
|
84
|
+
|
85
|
+
#-------------------------------------------------------------------------------------------------
|
86
|
+
# Returns true, if the node should be replaced with a space when extracting text from a document
|
87
|
+
def self.replace_with_whitespace?(node)
|
88
|
+
BREAK_ELEMENTS.include?(node.name)
|
89
|
+
end
|
90
|
+
|
91
|
+
#-------------------------------------------------------------------------------------------------
|
92
|
+
# Limits the size of a given string value down to a given limit (in bytes)
|
93
|
+
# This is heavily inspired by https://github.com/rails/rails/pull/27319/files
|
94
|
+
def self.limit_bytesize(string, limit)
|
95
|
+
return string if string.nil? || string.bytesize <= limit
|
96
|
+
real_limit = limit - OMISSION.bytesize
|
97
|
+
(+'').tap do |cut|
|
98
|
+
string.scan(/\X/) do |grapheme|
|
99
|
+
if cut.bytesize + grapheme.bytesize <= real_limit
|
100
|
+
cut << grapheme
|
101
|
+
else
|
102
|
+
cut << OMISSION
|
103
|
+
break
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ConnectorsShared
|
10
|
+
module Middleware
|
11
|
+
class BasicAuth
|
12
|
+
AUTHORIZATION = 'Authorization'
|
13
|
+
|
14
|
+
attr_reader :basic_auth_token
|
15
|
+
|
16
|
+
def initialize(app = nil, options = {})
|
17
|
+
@app = app
|
18
|
+
@basic_auth_token = options.fetch(:basic_auth_token)
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(env)
|
22
|
+
env.request_headers[AUTHORIZATION] = "Basic #{basic_auth_token}"
|
23
|
+
@app.call(env)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ConnectorsShared
|
10
|
+
module Middleware
|
11
|
+
class BearerAuth
|
12
|
+
AUTHORIZATION = 'Authorization'
|
13
|
+
|
14
|
+
attr_reader :bearer_auth_token
|
15
|
+
|
16
|
+
def initialize(app = nil, options = {})
|
17
|
+
@app = app
|
18
|
+
@bearer_auth_token = options.fetch(:bearer_auth_token)
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(env)
|
22
|
+
env.request_headers[AUTHORIZATION] = "Bearer #{bearer_auth_token}"
|
23
|
+
@app.call(env)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|