connectors_sdk 8.3.0.0.pre.20220414T060419Z → 8.3.0.0.pre.20220510T144908Z
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/connectors_sdk/atlassian/config.rb +27 -0
- data/lib/connectors_sdk/atlassian/custom_client.rb +87 -0
- data/lib/connectors_sdk/base/adapter.rb +7 -8
- data/lib/connectors_sdk/base/authorization.rb +89 -0
- data/lib/connectors_sdk/base/custom_client.rb +0 -1
- data/lib/connectors_sdk/base/extractor.rb +3 -2
- data/lib/connectors_sdk/base/http_call_wrapper.rb +135 -0
- data/lib/connectors_sdk/base/registry.rb +5 -3
- data/lib/connectors_sdk/confluence/adapter.rb +216 -0
- data/lib/connectors_sdk/confluence/custom_client.rb +143 -0
- data/lib/connectors_sdk/confluence/extractor.rb +270 -0
- data/lib/connectors_sdk/confluence_cloud/authorization.rb +64 -0
- data/lib/connectors_sdk/confluence_cloud/custom_client.rb +61 -0
- data/lib/connectors_sdk/confluence_cloud/extractor.rb +59 -0
- data/lib/connectors_sdk/confluence_cloud/http_call_wrapper.rb +59 -0
- data/lib/connectors_sdk/helpers/atlassian_time_formatter.rb +10 -0
- data/lib/connectors_sdk/office365/adapter.rb +7 -7
- data/lib/connectors_sdk/office365/config.rb +1 -0
- data/lib/connectors_sdk/office365/custom_client.rb +31 -9
- data/lib/connectors_sdk/office365/extractor.rb +8 -8
- data/lib/connectors_sdk/share_point/adapter.rb +12 -12
- data/lib/connectors_sdk/share_point/authorization.rb +14 -62
- data/lib/connectors_sdk/share_point/extractor.rb +2 -2
- data/lib/connectors_sdk/share_point/http_call_wrapper.rb +24 -83
- data/lib/connectors_shared/exception_tracking.rb +4 -4
- data/lib/connectors_shared/extraction_utils.rb +109 -0
- data/lib/connectors_shared/middleware/basic_auth.rb +27 -0
- data/lib/connectors_shared/middleware/bearer_auth.rb +27 -0
- data/lib/connectors_shared/middleware/restrict_hostnames.rb +73 -0
- data/lib/connectors_shared/monitor.rb +3 -3
- data/lib/stubs/enterprise_search/exception_tracking.rb +43 -0
- metadata +22 -10
- data/lib/connectors_sdk/base/.config.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.connectors.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.registry.rb.un~ +0 -0
- data/lib/connectors_sdk/share_point/.http_call_wrapper.rb.un~ +0 -0
@@ -11,11 +11,11 @@ require 'connectors_sdk/base/adapter'
|
|
11
11
|
module ConnectorsSdk
|
12
12
|
module Office365
|
13
13
|
class Adapter < ConnectorsSdk::Base::Adapter
|
14
|
-
def self.
|
14
|
+
def self.es_document_from_file(_file)
|
15
15
|
raise NotImplementedError
|
16
16
|
end
|
17
17
|
|
18
|
-
def self.
|
18
|
+
def self.es_document_from_folder(_folder)
|
19
19
|
raise NotImplementedError
|
20
20
|
end
|
21
21
|
|
@@ -26,7 +26,7 @@ module ConnectorsSdk
|
|
26
26
|
@item = item
|
27
27
|
end
|
28
28
|
|
29
|
-
def self.
|
29
|
+
def self.convert_id_to_es_id(_id)
|
30
30
|
raise NotImplementedError
|
31
31
|
end
|
32
32
|
|
@@ -41,10 +41,10 @@ module ConnectorsSdk
|
|
41
41
|
ConnectorsSdk::Office365::Adapter.normalize_path("#{parent_folder_path}/#{item.name}")
|
42
42
|
end
|
43
43
|
|
44
|
-
def
|
44
|
+
def to_es_document
|
45
45
|
{
|
46
46
|
:_fields_to_preserve => ConnectorsSdk::Office365::Adapter.fields_to_preserve,
|
47
|
-
:id => self.class.
|
47
|
+
:id => self.class.convert_id_to_es_id(item.id),
|
48
48
|
:path => get_path(item),
|
49
49
|
:title => item.name,
|
50
50
|
:url => item.webUrl,
|
@@ -96,7 +96,7 @@ module ConnectorsSdk
|
|
96
96
|
end
|
97
97
|
|
98
98
|
class FileGraphItem < GraphItem
|
99
|
-
def self.
|
99
|
+
def self.convert_id_to_es_id(_id)
|
100
100
|
raise NotImplementedError
|
101
101
|
end
|
102
102
|
|
@@ -132,7 +132,7 @@ module ConnectorsSdk
|
|
132
132
|
end
|
133
133
|
|
134
134
|
class PackageGraphItem < GraphItem
|
135
|
-
def self.
|
135
|
+
def self.convert_id_to_es_id(id)
|
136
136
|
raise NotImplementedError
|
137
137
|
end
|
138
138
|
|
@@ -6,9 +6,11 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
+
require 'hashie/mash'
|
10
|
+
|
9
11
|
require 'connectors_sdk/base/custom_client'
|
12
|
+
require 'connectors_sdk/office365/extractor'
|
10
13
|
require 'connectors_shared'
|
11
|
-
require 'hashie/mash'
|
12
14
|
|
13
15
|
module ConnectorsSdk
|
14
16
|
module Office365
|
@@ -125,7 +127,7 @@ module ConnectorsSdk
|
|
125
127
|
yielded = 0
|
126
128
|
while stack.any?
|
127
129
|
folder_id = stack.pop
|
128
|
-
item_children(drive_id, folder_id, :fields => fields_with_id) do |item|
|
130
|
+
item_children(drive_id, folder_id, :fields => fields_with_id, :break_after_page => break_after_page) do |item|
|
129
131
|
if item.folder
|
130
132
|
stack << item.id
|
131
133
|
end
|
@@ -134,9 +136,14 @@ module ConnectorsSdk
|
|
134
136
|
yielded += 1
|
135
137
|
end
|
136
138
|
|
137
|
-
if break_after_page && yielded >= 100
|
138
|
-
cursors['
|
139
|
-
|
139
|
+
if break_after_page && yielded >= 100
|
140
|
+
if cursors['item_children_next_link'].present?
|
141
|
+
stack << folder_id
|
142
|
+
end
|
143
|
+
if stack.any?
|
144
|
+
cursors['page_cursor'] = stack.dup
|
145
|
+
break
|
146
|
+
end
|
140
147
|
end
|
141
148
|
end
|
142
149
|
end
|
@@ -256,15 +263,30 @@ module ConnectorsSdk
|
|
256
263
|
request_endpoint(:endpoint => "drives/#{drive_id}/root", :query_params => query_params)
|
257
264
|
end
|
258
265
|
|
259
|
-
def item_children(drive_id, item_id, fields: [], &block)
|
260
|
-
|
261
|
-
query_params = transform_fields_to_request_query_params(fields)
|
262
|
-
response = request_endpoint(:endpoint => endpoint, :query_params => query_params)
|
266
|
+
def item_children(drive_id, item_id, fields: [], break_after_page: false, &block)
|
267
|
+
next_link = cursors.delete('item_children_next_link') if break_after_page
|
263
268
|
|
269
|
+
response = if next_link.present?
|
270
|
+
request_json(:url => next_link)
|
271
|
+
else
|
272
|
+
endpoint = "drives/#{drive_id}/items/#{item_id}/children"
|
273
|
+
query_params = transform_fields_to_request_query_params(fields)
|
274
|
+
request_endpoint(:endpoint => endpoint, :query_params => query_params)
|
275
|
+
end
|
276
|
+
|
277
|
+
yielded = 0
|
264
278
|
loop do
|
265
279
|
response.value.each(&block)
|
266
280
|
next_link = response['@odata.nextLink']
|
281
|
+
|
267
282
|
break if next_link.nil?
|
283
|
+
|
284
|
+
yielded += response.value.size
|
285
|
+
if break_after_page && yielded >= 100
|
286
|
+
cursors['item_children_next_link'] = next_link
|
287
|
+
break
|
288
|
+
end
|
289
|
+
|
268
290
|
response = request_json(:url => next_link)
|
269
291
|
end
|
270
292
|
end
|
@@ -51,12 +51,12 @@ module ConnectorsSdk
|
|
51
51
|
capture_exception(e)
|
52
52
|
end
|
53
53
|
|
54
|
-
if break_after_page && config.cursors['page_cursor'].present?
|
54
|
+
if break_after_page && (config.cursors['page_cursor'].present? || config.cursors['item_children_next_link'].present?)
|
55
55
|
break
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
|
-
if break_after_page && config.cursors['page_cursor'].blank?
|
59
|
+
if break_after_page && config.cursors['page_cursor'].blank? && config.cursors['item_children_next_link'].blank?
|
60
60
|
@completed = true
|
61
61
|
config.overwrite_cursors!(retrieve_latest_cursors)
|
62
62
|
log_debug("Completed #{modified_since.nil? ? 'full' : 'incremental'} extraction")
|
@@ -132,7 +132,7 @@ module ConnectorsSdk
|
|
132
132
|
@existing_drive_item_ids ||= Set.new.tap do |ids|
|
133
133
|
drives_to_index.each do |drive|
|
134
134
|
client.list_items(drive.id) do |item|
|
135
|
-
ids <<
|
135
|
+
ids << convert_id_to_es_id(item.id)
|
136
136
|
end
|
137
137
|
end
|
138
138
|
end
|
@@ -142,7 +142,7 @@ module ConnectorsSdk
|
|
142
142
|
raise NotImplementedError
|
143
143
|
end
|
144
144
|
|
145
|
-
def
|
145
|
+
def convert_id_to_es_id(_id)
|
146
146
|
raise NotImplementedError
|
147
147
|
end
|
148
148
|
|
@@ -170,7 +170,7 @@ module ConnectorsSdk
|
|
170
170
|
if item.deleted.nil?
|
171
171
|
yield_create_or_update(drive_id, item, &block)
|
172
172
|
else
|
173
|
-
yield :delete,
|
173
|
+
yield :delete, convert_id_to_es_id(item.id)
|
174
174
|
end
|
175
175
|
end
|
176
176
|
|
@@ -210,11 +210,11 @@ module ConnectorsSdk
|
|
210
210
|
|
211
211
|
def generate_document(item)
|
212
212
|
if item.file
|
213
|
-
adapter.
|
213
|
+
adapter.es_document_from_file(item)
|
214
214
|
elsif item.folder
|
215
|
-
adapter.
|
215
|
+
adapter.es_document_from_folder(item)
|
216
216
|
elsif item.package
|
217
|
-
adapter.
|
217
|
+
adapter.es_document_from_package(item)
|
218
218
|
else
|
219
219
|
raise "Unexpected Office 365 item type for item #{item}"
|
220
220
|
end
|
@@ -13,33 +13,33 @@ module ConnectorsSdk
|
|
13
13
|
class Adapter < Office365::Adapter
|
14
14
|
generate_id_helpers :share_point, 'share_point'
|
15
15
|
|
16
|
-
def self.
|
17
|
-
FileGraphItem.new(file).
|
16
|
+
def self.es_document_from_file(file)
|
17
|
+
FileGraphItem.new(file).to_es_document
|
18
18
|
end
|
19
19
|
|
20
|
-
def self.
|
21
|
-
FolderGraphItem.new(folder).
|
20
|
+
def self.es_document_from_folder(folder)
|
21
|
+
FolderGraphItem.new(folder).to_es_document
|
22
22
|
end
|
23
23
|
|
24
|
-
def self.
|
25
|
-
PackageGraphItem.new(package).
|
24
|
+
def self.es_document_from_package(package)
|
25
|
+
PackageGraphItem.new(package).to_es_document
|
26
26
|
end
|
27
27
|
|
28
28
|
class FileGraphItem < Office365::Adapter::FileGraphItem
|
29
|
-
def self.
|
30
|
-
ConnectorsSdk::SharePoint::Adapter.
|
29
|
+
def self.convert_id_to_es_id(id)
|
30
|
+
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
34
|
class FolderGraphItem < Office365::Adapter::FolderGraphItem
|
35
|
-
def self.
|
36
|
-
ConnectorsSdk::SharePoint::Adapter.
|
35
|
+
def self.convert_id_to_es_id(id)
|
36
|
+
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
40
|
class PackageGraphItem < Office365::Adapter::PackageGraphItem
|
41
|
-
def self.
|
42
|
-
ConnectorsSdk::SharePoint::Adapter.
|
41
|
+
def self.convert_id_to_es_id(id)
|
42
|
+
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
43
43
|
end
|
44
44
|
end
|
45
45
|
end
|
@@ -6,70 +6,12 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require '
|
10
|
-
require 'signet'
|
11
|
-
require 'signet/oauth_2'
|
12
|
-
require 'signet/oauth_2/client'
|
9
|
+
require 'connectors_sdk/base/authorization'
|
13
10
|
|
14
11
|
module ConnectorsSdk
|
15
12
|
module SharePoint
|
16
|
-
class Authorization
|
13
|
+
class Authorization < ConnectorsSdk::Base::Authorization
|
17
14
|
class << self
|
18
|
-
def authorization_url
|
19
|
-
'https://login.microsoftonline.com/common/oauth2/v2.0/authorize'
|
20
|
-
end
|
21
|
-
|
22
|
-
def token_credential_uri
|
23
|
-
'https://login.microsoftonline.com/common/oauth2/v2.0/token'
|
24
|
-
end
|
25
|
-
|
26
|
-
def authorization_uri(params)
|
27
|
-
missing = missing_fields(params, %w[client_id])
|
28
|
-
unless missing.blank?
|
29
|
-
raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
|
30
|
-
end
|
31
|
-
|
32
|
-
params[:response_type] = 'code'
|
33
|
-
params[:additional_parameters] = { :prompt => 'consent' }
|
34
|
-
client = oauth_client(params)
|
35
|
-
client.authorization_uri.to_s
|
36
|
-
end
|
37
|
-
|
38
|
-
def access_token(params)
|
39
|
-
missing = missing_fields(params, %w[client_id client_secret code redirect_uri])
|
40
|
-
unless missing.blank?
|
41
|
-
raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
|
42
|
-
end
|
43
|
-
|
44
|
-
params[:grant_type] = 'authorization_code'
|
45
|
-
client = oauth_client(params)
|
46
|
-
client.fetch_access_token
|
47
|
-
end
|
48
|
-
|
49
|
-
def refresh(params)
|
50
|
-
missing = missing_fields(params, %w[client_id client_secret refresh_token])
|
51
|
-
unless missing.blank?
|
52
|
-
raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
|
53
|
-
end
|
54
|
-
|
55
|
-
params[:grant_type] = 'refresh_token'
|
56
|
-
client = oauth_client(params)
|
57
|
-
client.refresh!
|
58
|
-
rescue StandardError => e
|
59
|
-
ConnectorsShared::ExceptionTracking.log_exception(e)
|
60
|
-
raise ConnectorsShared::TokenRefreshFailedError
|
61
|
-
end
|
62
|
-
|
63
|
-
def oauth_client(params)
|
64
|
-
options = params.merge(
|
65
|
-
:authorization_uri => authorization_url,
|
66
|
-
:token_credential_uri => token_credential_uri,
|
67
|
-
:scope => oauth_scope
|
68
|
-
)
|
69
|
-
options[:state] = JSON.dump(options[:state]) if options[:state]
|
70
|
-
Signet::OAuth2::Client.new(options)
|
71
|
-
end
|
72
|
-
|
73
15
|
def oauth_scope
|
74
16
|
%w[
|
75
17
|
User.ReadBasic.All
|
@@ -82,8 +24,18 @@ module ConnectorsSdk
|
|
82
24
|
]
|
83
25
|
end
|
84
26
|
|
85
|
-
|
86
|
-
|
27
|
+
private
|
28
|
+
|
29
|
+
def authorization_url
|
30
|
+
'https://login.microsoftonline.com/common/oauth2/v2.0/authorize'
|
31
|
+
end
|
32
|
+
|
33
|
+
def token_credential_uri
|
34
|
+
'https://login.microsoftonline.com/common/oauth2/v2.0/token'
|
35
|
+
end
|
36
|
+
|
37
|
+
def additional_parameters
|
38
|
+
{ :prompt => 'consent' }
|
87
39
|
end
|
88
40
|
end
|
89
41
|
end
|
@@ -15,8 +15,8 @@ module ConnectorsSdk
|
|
15
15
|
|
16
16
|
private
|
17
17
|
|
18
|
-
def
|
19
|
-
ConnectorsSdk::SharePoint::Adapter.
|
18
|
+
def convert_id_to_es_id(id)
|
19
|
+
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
20
20
|
end
|
21
21
|
|
22
22
|
def adapter
|
@@ -9,108 +9,49 @@
|
|
9
9
|
require 'connectors_sdk/office365/config'
|
10
10
|
require 'connectors_sdk/share_point/extractor'
|
11
11
|
require 'connectors_sdk/share_point/authorization'
|
12
|
-
require '
|
12
|
+
require 'connectors_sdk/base/http_call_wrapper'
|
13
13
|
|
14
14
|
module ConnectorsSdk
|
15
15
|
module SharePoint
|
16
|
-
|
16
|
+
class HttpCallWrapper < ConnectorsSdk::Base::HttpCallWrapper
|
17
|
+
SERVICE_TYPE = 'share_point'
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
cursors = params.fetch(:cursors, {}) || {}
|
21
|
-
features = params.fetch(:features, {}) || {}
|
22
|
-
|
23
|
-
# XXX can we cache that class across calls?
|
24
|
-
ConnectorsSdk::SharePoint::Extractor.new(
|
25
|
-
content_source_id: BSON::ObjectId.new,
|
26
|
-
service_type: SERVICE_TYPE,
|
27
|
-
authorization_data_proc: proc { { access_token: params[:access_token] } },
|
28
|
-
client_proc: proc { ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token], :cursors => cursors) },
|
29
|
-
config: ConnectorsSdk::Office365::Config.new(:cursors => cursors, :drive_ids => 'all', :index_permissions => params[:index_permissions] || false),
|
30
|
-
features: features
|
31
|
-
)
|
32
|
-
end
|
33
|
-
|
34
|
-
def document_batch(params)
|
35
|
-
results = []
|
36
|
-
|
37
|
-
@extractor = extractor(params)
|
38
|
-
|
39
|
-
@extractor.yield_document_changes(:break_after_page => true, :modified_since => @extractor.config.cursors['modified_since']) do |action, doc, download_args_and_proc|
|
40
|
-
download_obj = nil
|
41
|
-
if download_args_and_proc
|
42
|
-
download_obj = {
|
43
|
-
id: download_args_and_proc[0],
|
44
|
-
name: download_args_and_proc[1],
|
45
|
-
size: download_args_and_proc[2],
|
46
|
-
download_args: download_args_and_proc[3]
|
47
|
-
}
|
48
|
-
end
|
49
|
-
|
50
|
-
results << {
|
51
|
-
:action => action,
|
52
|
-
:document => doc,
|
53
|
-
:download => download_obj
|
54
|
-
}
|
55
|
-
end
|
56
|
-
|
57
|
-
results
|
58
|
-
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
|
59
|
-
raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
|
60
|
-
end
|
61
|
-
|
62
|
-
def cursors
|
63
|
-
@extractor.config.cursors
|
19
|
+
def name
|
20
|
+
'SharePoint'
|
64
21
|
end
|
65
22
|
|
66
|
-
def
|
67
|
-
|
23
|
+
def service_type
|
24
|
+
SERVICE_TYPE
|
68
25
|
end
|
69
26
|
|
70
|
-
|
71
|
-
results = []
|
72
|
-
extractor(params).yield_deleted_ids(params[:ids]) do |id|
|
73
|
-
results << id
|
74
|
-
end
|
75
|
-
results
|
76
|
-
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
|
77
|
-
raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
|
78
|
-
end
|
27
|
+
private
|
79
28
|
|
80
|
-
def
|
81
|
-
|
82
|
-
return permissions
|
83
|
-
end
|
84
|
-
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
|
85
|
-
raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
|
29
|
+
def extractor_class
|
30
|
+
ConnectorsSdk::SharePoint::Extractor
|
86
31
|
end
|
87
32
|
|
88
|
-
def
|
89
|
-
ConnectorsSdk::SharePoint::Authorization
|
33
|
+
def authorization
|
34
|
+
ConnectorsSdk::SharePoint::Authorization
|
90
35
|
end
|
91
36
|
|
92
|
-
def
|
93
|
-
ConnectorsSdk::
|
37
|
+
def client(params)
|
38
|
+
ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token], :cursors => params.fetch(:cursors, {}) || {})
|
94
39
|
end
|
95
40
|
|
96
|
-
def
|
97
|
-
ConnectorsSdk::
|
41
|
+
def custom_client_error
|
42
|
+
ConnectorsSdk::Office365::CustomClient::ClientError
|
98
43
|
end
|
99
44
|
|
100
|
-
def
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
45
|
+
def config(params)
|
46
|
+
ConnectorsSdk::Office365::Config.new(
|
47
|
+
:cursors => params.fetch(:cursors, {}) || {},
|
48
|
+
:drive_ids => 'all',
|
49
|
+
:index_permissions => params[:index_permissions] || false
|
50
|
+
)
|
106
51
|
end
|
107
52
|
|
108
|
-
def
|
109
|
-
client
|
110
|
-
client.me
|
111
|
-
{ :status => 'OK', :statusCode => 200, :message => 'Connected to SharePoint' }
|
112
|
-
rescue StandardError => e
|
113
|
-
{ :status => 'FAILURE', :statusCode => e.is_a?(ConnectorsSdk::Office365::CustomClient::ClientError) ? e.status_code : 500, :message => e.message }
|
53
|
+
def health_check(params)
|
54
|
+
client(params).me
|
114
55
|
end
|
115
56
|
end
|
116
57
|
end
|
@@ -6,23 +6,23 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require 'stubs/swiftype/exception_tracking' unless defined?(Rails)
|
10
9
|
require 'bson'
|
11
10
|
require 'connectors_shared/logger'
|
11
|
+
require 'stubs/enterprise_search/exception_tracking'
|
12
12
|
|
13
13
|
module ConnectorsShared
|
14
14
|
class ExceptionTracking
|
15
15
|
class << self
|
16
16
|
def capture_message(message, context = {})
|
17
|
-
|
17
|
+
EnterpriseSearch::ExceptionTracking.capture_message(message, context)
|
18
18
|
end
|
19
19
|
|
20
20
|
def capture_exception(exception, context = {})
|
21
|
-
|
21
|
+
EnterpriseSearch::ExceptionTracking.log_exception(exception, :context => context)
|
22
22
|
end
|
23
23
|
|
24
24
|
def log_exception(exception, message = nil)
|
25
|
-
|
25
|
+
EnterpriseSearch::ExceptionTracking.log_exception(exception, message, :logger => ConnectorsShared::Logger.logger)
|
26
26
|
end
|
27
27
|
|
28
28
|
def augment_exception(exception)
|
@@ -0,0 +1,109 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'set'
|
10
|
+
|
11
|
+
module ConnectorsShared
|
12
|
+
module ExtractionUtils
|
13
|
+
# A list of tags tags we want to remove before extracting content
|
14
|
+
NON_CONTENT_TAGS = Set.new(%w[
|
15
|
+
comment
|
16
|
+
object
|
17
|
+
script
|
18
|
+
style
|
19
|
+
svg
|
20
|
+
video
|
21
|
+
]).freeze
|
22
|
+
|
23
|
+
# Tags, that generate a word/line break when rendered
|
24
|
+
BREAK_ELEMENTS = Set.new(%w[
|
25
|
+
br
|
26
|
+
hr
|
27
|
+
]).freeze
|
28
|
+
|
29
|
+
# The character used to signal that a string has been truncated
|
30
|
+
OMISSION = '…'
|
31
|
+
|
32
|
+
#-------------------------------------------------------------------------------------------------
|
33
|
+
# Expects a Nokogiri HTML node, returns textual content from the node and all of its children
|
34
|
+
def self.node_descendant_text(node)
|
35
|
+
return '' unless node&.present?
|
36
|
+
|
37
|
+
unless node.respond_to?(:children) && node.respond_to?(:name) && node.respond_to?(:text?)
|
38
|
+
raise ArgumentError, "Expecting something node-like but got a #{node.class}"
|
39
|
+
end
|
40
|
+
|
41
|
+
to_process_stack = [node]
|
42
|
+
text = []
|
43
|
+
|
44
|
+
loop do
|
45
|
+
# Get the next node to process
|
46
|
+
node = to_process_stack.pop
|
47
|
+
break unless node
|
48
|
+
|
49
|
+
# Base cases where we append content to the text buffer
|
50
|
+
if node.kind_of?(String)
|
51
|
+
text << node unless node == ' ' && text.last == ' '
|
52
|
+
next
|
53
|
+
end
|
54
|
+
|
55
|
+
# Remove tags that do not contain any text (and which sometimes are treated as CDATA, generating garbage text in jruby)
|
56
|
+
next if NON_CONTENT_TAGS.include?(node.name)
|
57
|
+
|
58
|
+
# Tags, that need to be replaced by spaces according to the standards
|
59
|
+
if replace_with_whitespace?(node)
|
60
|
+
text << ' ' unless text.last == ' '
|
61
|
+
next
|
62
|
+
end
|
63
|
+
|
64
|
+
# Extract the text from all text nodes
|
65
|
+
if node.text?
|
66
|
+
content = node.content
|
67
|
+
text << content.squish if content
|
68
|
+
next
|
69
|
+
end
|
70
|
+
|
71
|
+
# Add spaces before all tags
|
72
|
+
to_process_stack << ' '
|
73
|
+
|
74
|
+
# Recursion by adding the node's children to the stack and looping
|
75
|
+
node.children.reverse_each { |child| to_process_stack << child }
|
76
|
+
|
77
|
+
# Add spaces after all tags
|
78
|
+
to_process_stack << ' '
|
79
|
+
end
|
80
|
+
|
81
|
+
# Remove any duplicate spaces and return the content
|
82
|
+
text.join.squish!
|
83
|
+
end
|
84
|
+
|
85
|
+
#-------------------------------------------------------------------------------------------------
|
86
|
+
# Returns true, if the node should be replaced with a space when extracting text from a document
|
87
|
+
def self.replace_with_whitespace?(node)
|
88
|
+
BREAK_ELEMENTS.include?(node.name)
|
89
|
+
end
|
90
|
+
|
91
|
+
#-------------------------------------------------------------------------------------------------
|
92
|
+
# Limits the size of a given string value down to a given limit (in bytes)
|
93
|
+
# This is heavily inspired by https://github.com/rails/rails/pull/27319/files
|
94
|
+
def self.limit_bytesize(string, limit)
|
95
|
+
return string if string.nil? || string.bytesize <= limit
|
96
|
+
real_limit = limit - OMISSION.bytesize
|
97
|
+
(+'').tap do |cut|
|
98
|
+
string.scan(/\X/) do |grapheme|
|
99
|
+
if cut.bytesize + grapheme.bytesize <= real_limit
|
100
|
+
cut << grapheme
|
101
|
+
else
|
102
|
+
cut << OMISSION
|
103
|
+
break
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ConnectorsShared
|
10
|
+
module Middleware
|
11
|
+
class BasicAuth
|
12
|
+
AUTHORIZATION = 'Authorization'
|
13
|
+
|
14
|
+
attr_reader :basic_auth_token
|
15
|
+
|
16
|
+
def initialize(app = nil, options = {})
|
17
|
+
@app = app
|
18
|
+
@basic_auth_token = options.fetch(:basic_auth_token)
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(env)
|
22
|
+
env.request_headers[AUTHORIZATION] = "Basic #{basic_auth_token}"
|
23
|
+
@app.call(env)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ConnectorsShared
|
10
|
+
module Middleware
|
11
|
+
class BearerAuth
|
12
|
+
AUTHORIZATION = 'Authorization'
|
13
|
+
|
14
|
+
attr_reader :bearer_auth_token
|
15
|
+
|
16
|
+
def initialize(app = nil, options = {})
|
17
|
+
@app = app
|
18
|
+
@bearer_auth_token = options.fetch(:bearer_auth_token)
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(env)
|
22
|
+
env.request_headers[AUTHORIZATION] = "Bearer #{bearer_auth_token}"
|
23
|
+
@app.call(env)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|