connectors_sdk 8.3.0.0.pre.20220414T060419Z → 8.3.0.0.pre.20220510T144908Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/lib/connectors_sdk/atlassian/config.rb +27 -0
  3. data/lib/connectors_sdk/atlassian/custom_client.rb +87 -0
  4. data/lib/connectors_sdk/base/adapter.rb +7 -8
  5. data/lib/connectors_sdk/base/authorization.rb +89 -0
  6. data/lib/connectors_sdk/base/custom_client.rb +0 -1
  7. data/lib/connectors_sdk/base/extractor.rb +3 -2
  8. data/lib/connectors_sdk/base/http_call_wrapper.rb +135 -0
  9. data/lib/connectors_sdk/base/registry.rb +5 -3
  10. data/lib/connectors_sdk/confluence/adapter.rb +216 -0
  11. data/lib/connectors_sdk/confluence/custom_client.rb +143 -0
  12. data/lib/connectors_sdk/confluence/extractor.rb +270 -0
  13. data/lib/connectors_sdk/confluence_cloud/authorization.rb +64 -0
  14. data/lib/connectors_sdk/confluence_cloud/custom_client.rb +61 -0
  15. data/lib/connectors_sdk/confluence_cloud/extractor.rb +59 -0
  16. data/lib/connectors_sdk/confluence_cloud/http_call_wrapper.rb +59 -0
  17. data/lib/connectors_sdk/helpers/atlassian_time_formatter.rb +10 -0
  18. data/lib/connectors_sdk/office365/adapter.rb +7 -7
  19. data/lib/connectors_sdk/office365/config.rb +1 -0
  20. data/lib/connectors_sdk/office365/custom_client.rb +31 -9
  21. data/lib/connectors_sdk/office365/extractor.rb +8 -8
  22. data/lib/connectors_sdk/share_point/adapter.rb +12 -12
  23. data/lib/connectors_sdk/share_point/authorization.rb +14 -62
  24. data/lib/connectors_sdk/share_point/extractor.rb +2 -2
  25. data/lib/connectors_sdk/share_point/http_call_wrapper.rb +24 -83
  26. data/lib/connectors_shared/exception_tracking.rb +4 -4
  27. data/lib/connectors_shared/extraction_utils.rb +109 -0
  28. data/lib/connectors_shared/middleware/basic_auth.rb +27 -0
  29. data/lib/connectors_shared/middleware/bearer_auth.rb +27 -0
  30. data/lib/connectors_shared/middleware/restrict_hostnames.rb +73 -0
  31. data/lib/connectors_shared/monitor.rb +3 -3
  32. data/lib/stubs/enterprise_search/exception_tracking.rb +43 -0
  33. metadata +22 -10
  34. data/lib/connectors_sdk/base/.config.rb.un~ +0 -0
  35. data/lib/connectors_sdk/base/.connectors.rb.un~ +0 -0
  36. data/lib/connectors_sdk/base/.registry.rb.un~ +0 -0
  37. data/lib/connectors_sdk/share_point/.http_call_wrapper.rb.un~ +0 -0
@@ -11,11 +11,11 @@ require 'connectors_sdk/base/adapter'
11
11
  module ConnectorsSdk
12
12
  module Office365
13
13
  class Adapter < ConnectorsSdk::Base::Adapter
14
- def self.swiftype_document_from_file(_file)
14
+ def self.es_document_from_file(_file)
15
15
  raise NotImplementedError
16
16
  end
17
17
 
18
- def self.swiftype_document_from_folder(_folder)
18
+ def self.es_document_from_folder(_folder)
19
19
  raise NotImplementedError
20
20
  end
21
21
 
@@ -26,7 +26,7 @@ module ConnectorsSdk
26
26
  @item = item
27
27
  end
28
28
 
29
- def self.convert_id_to_fp_id(_id)
29
+ def self.convert_id_to_es_id(_id)
30
30
  raise NotImplementedError
31
31
  end
32
32
 
@@ -41,10 +41,10 @@ module ConnectorsSdk
41
41
  ConnectorsSdk::Office365::Adapter.normalize_path("#{parent_folder_path}/#{item.name}")
42
42
  end
43
43
 
44
- def to_swiftype_document
44
+ def to_es_document
45
45
  {
46
46
  :_fields_to_preserve => ConnectorsSdk::Office365::Adapter.fields_to_preserve,
47
- :id => self.class.convert_id_to_fp_id(item.id),
47
+ :id => self.class.convert_id_to_es_id(item.id),
48
48
  :path => get_path(item),
49
49
  :title => item.name,
50
50
  :url => item.webUrl,
@@ -96,7 +96,7 @@ module ConnectorsSdk
96
96
  end
97
97
 
98
98
  class FileGraphItem < GraphItem
99
- def self.convert_id_to_fp_id(_id)
99
+ def self.convert_id_to_es_id(_id)
100
100
  raise NotImplementedError
101
101
  end
102
102
 
@@ -132,7 +132,7 @@ module ConnectorsSdk
132
132
  end
133
133
 
134
134
  class PackageGraphItem < GraphItem
135
- def self.convert_id_to_fp_id(id)
135
+ def self.convert_id_to_es_id(id)
136
136
  raise NotImplementedError
137
137
  end
138
138
 
@@ -7,6 +7,7 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'connectors_sdk/base/config'
10
+ require 'connectors_sdk/office365/extractor'
10
11
 
11
12
  module ConnectorsSdk
12
13
  module Office365
@@ -6,9 +6,11 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
+ require 'hashie/mash'
10
+
9
11
  require 'connectors_sdk/base/custom_client'
12
+ require 'connectors_sdk/office365/extractor'
10
13
  require 'connectors_shared'
11
- require 'hashie/mash'
12
14
 
13
15
  module ConnectorsSdk
14
16
  module Office365
@@ -125,7 +127,7 @@ module ConnectorsSdk
125
127
  yielded = 0
126
128
  while stack.any?
127
129
  folder_id = stack.pop
128
- item_children(drive_id, folder_id, :fields => fields_with_id) do |item|
130
+ item_children(drive_id, folder_id, :fields => fields_with_id, :break_after_page => break_after_page) do |item|
129
131
  if item.folder
130
132
  stack << item.id
131
133
  end
@@ -134,9 +136,14 @@ module ConnectorsSdk
134
136
  yielded += 1
135
137
  end
136
138
 
137
- if break_after_page && yielded >= 100 && stack.any?
138
- cursors['page_cursor'] = stack.dup
139
- break
139
+ if break_after_page && yielded >= 100
140
+ if cursors['item_children_next_link'].present?
141
+ stack << folder_id
142
+ end
143
+ if stack.any?
144
+ cursors['page_cursor'] = stack.dup
145
+ break
146
+ end
140
147
  end
141
148
  end
142
149
  end
@@ -256,15 +263,30 @@ module ConnectorsSdk
256
263
  request_endpoint(:endpoint => "drives/#{drive_id}/root", :query_params => query_params)
257
264
  end
258
265
 
259
- def item_children(drive_id, item_id, fields: [], &block)
260
- endpoint = "drives/#{drive_id}/items/#{item_id}/children"
261
- query_params = transform_fields_to_request_query_params(fields)
262
- response = request_endpoint(:endpoint => endpoint, :query_params => query_params)
266
+ def item_children(drive_id, item_id, fields: [], break_after_page: false, &block)
267
+ next_link = cursors.delete('item_children_next_link') if break_after_page
263
268
 
269
+ response = if next_link.present?
270
+ request_json(:url => next_link)
271
+ else
272
+ endpoint = "drives/#{drive_id}/items/#{item_id}/children"
273
+ query_params = transform_fields_to_request_query_params(fields)
274
+ request_endpoint(:endpoint => endpoint, :query_params => query_params)
275
+ end
276
+
277
+ yielded = 0
264
278
  loop do
265
279
  response.value.each(&block)
266
280
  next_link = response['@odata.nextLink']
281
+
267
282
  break if next_link.nil?
283
+
284
+ yielded += response.value.size
285
+ if break_after_page && yielded >= 100
286
+ cursors['item_children_next_link'] = next_link
287
+ break
288
+ end
289
+
268
290
  response = request_json(:url => next_link)
269
291
  end
270
292
  end
@@ -51,12 +51,12 @@ module ConnectorsSdk
51
51
  capture_exception(e)
52
52
  end
53
53
 
54
- if break_after_page && config.cursors['page_cursor'].present?
54
+ if break_after_page && (config.cursors['page_cursor'].present? || config.cursors['item_children_next_link'].present?)
55
55
  break
56
56
  end
57
57
  end
58
58
 
59
- if break_after_page && config.cursors['page_cursor'].blank?
59
+ if break_after_page && config.cursors['page_cursor'].blank? && config.cursors['item_children_next_link'].blank?
60
60
  @completed = true
61
61
  config.overwrite_cursors!(retrieve_latest_cursors)
62
62
  log_debug("Completed #{modified_since.nil? ? 'full' : 'incremental'} extraction")
@@ -132,7 +132,7 @@ module ConnectorsSdk
132
132
  @existing_drive_item_ids ||= Set.new.tap do |ids|
133
133
  drives_to_index.each do |drive|
134
134
  client.list_items(drive.id) do |item|
135
- ids << convert_id_to_fp_id(item.id)
135
+ ids << convert_id_to_es_id(item.id)
136
136
  end
137
137
  end
138
138
  end
@@ -142,7 +142,7 @@ module ConnectorsSdk
142
142
  raise NotImplementedError
143
143
  end
144
144
 
145
- def convert_id_to_fp_id(_id)
145
+ def convert_id_to_es_id(_id)
146
146
  raise NotImplementedError
147
147
  end
148
148
 
@@ -170,7 +170,7 @@ module ConnectorsSdk
170
170
  if item.deleted.nil?
171
171
  yield_create_or_update(drive_id, item, &block)
172
172
  else
173
- yield :delete, convert_id_to_fp_id(item.id)
173
+ yield :delete, convert_id_to_es_id(item.id)
174
174
  end
175
175
  end
176
176
 
@@ -210,11 +210,11 @@ module ConnectorsSdk
210
210
 
211
211
  def generate_document(item)
212
212
  if item.file
213
- adapter.swiftype_document_from_file(item)
213
+ adapter.es_document_from_file(item)
214
214
  elsif item.folder
215
- adapter.swiftype_document_from_folder(item)
215
+ adapter.es_document_from_folder(item)
216
216
  elsif item.package
217
- adapter.swiftype_document_from_package(item)
217
+ adapter.es_document_from_package(item)
218
218
  else
219
219
  raise "Unexpected Office 365 item type for item #{item}"
220
220
  end
@@ -13,33 +13,33 @@ module ConnectorsSdk
13
13
  class Adapter < Office365::Adapter
14
14
  generate_id_helpers :share_point, 'share_point'
15
15
 
16
- def self.swiftype_document_from_file(file)
17
- FileGraphItem.new(file).to_swiftype_document
16
+ def self.es_document_from_file(file)
17
+ FileGraphItem.new(file).to_es_document
18
18
  end
19
19
 
20
- def self.swiftype_document_from_folder(folder)
21
- FolderGraphItem.new(folder).to_swiftype_document
20
+ def self.es_document_from_folder(folder)
21
+ FolderGraphItem.new(folder).to_es_document
22
22
  end
23
23
 
24
- def self.swiftype_document_from_package(package)
25
- PackageGraphItem.new(package).to_swiftype_document
24
+ def self.es_document_from_package(package)
25
+ PackageGraphItem.new(package).to_es_document
26
26
  end
27
27
 
28
28
  class FileGraphItem < Office365::Adapter::FileGraphItem
29
- def self.convert_id_to_fp_id(id)
30
- ConnectorsSdk::SharePoint::Adapter.share_point_id_to_fp_id(id)
29
+ def self.convert_id_to_es_id(id)
30
+ ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
31
31
  end
32
32
  end
33
33
 
34
34
  class FolderGraphItem < Office365::Adapter::FolderGraphItem
35
- def self.convert_id_to_fp_id(id)
36
- ConnectorsSdk::SharePoint::Adapter.share_point_id_to_fp_id(id)
35
+ def self.convert_id_to_es_id(id)
36
+ ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
37
37
  end
38
38
  end
39
39
 
40
40
  class PackageGraphItem < Office365::Adapter::PackageGraphItem
41
- def self.convert_id_to_fp_id(id)
42
- ConnectorsSdk::SharePoint::Adapter.share_point_id_to_fp_id(id)
41
+ def self.convert_id_to_es_id(id)
42
+ ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
43
43
  end
44
44
  end
45
45
  end
@@ -6,70 +6,12 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'connectors_shared'
10
- require 'signet'
11
- require 'signet/oauth_2'
12
- require 'signet/oauth_2/client'
9
+ require 'connectors_sdk/base/authorization'
13
10
 
14
11
  module ConnectorsSdk
15
12
  module SharePoint
16
- class Authorization
13
+ class Authorization < ConnectorsSdk::Base::Authorization
17
14
  class << self
18
- def authorization_url
19
- 'https://login.microsoftonline.com/common/oauth2/v2.0/authorize'
20
- end
21
-
22
- def token_credential_uri
23
- 'https://login.microsoftonline.com/common/oauth2/v2.0/token'
24
- end
25
-
26
- def authorization_uri(params)
27
- missing = missing_fields(params, %w[client_id])
28
- unless missing.blank?
29
- raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
30
- end
31
-
32
- params[:response_type] = 'code'
33
- params[:additional_parameters] = { :prompt => 'consent' }
34
- client = oauth_client(params)
35
- client.authorization_uri.to_s
36
- end
37
-
38
- def access_token(params)
39
- missing = missing_fields(params, %w[client_id client_secret code redirect_uri])
40
- unless missing.blank?
41
- raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
42
- end
43
-
44
- params[:grant_type] = 'authorization_code'
45
- client = oauth_client(params)
46
- client.fetch_access_token
47
- end
48
-
49
- def refresh(params)
50
- missing = missing_fields(params, %w[client_id client_secret refresh_token])
51
- unless missing.blank?
52
- raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
53
- end
54
-
55
- params[:grant_type] = 'refresh_token'
56
- client = oauth_client(params)
57
- client.refresh!
58
- rescue StandardError => e
59
- ConnectorsShared::ExceptionTracking.log_exception(e)
60
- raise ConnectorsShared::TokenRefreshFailedError
61
- end
62
-
63
- def oauth_client(params)
64
- options = params.merge(
65
- :authorization_uri => authorization_url,
66
- :token_credential_uri => token_credential_uri,
67
- :scope => oauth_scope
68
- )
69
- options[:state] = JSON.dump(options[:state]) if options[:state]
70
- Signet::OAuth2::Client.new(options)
71
- end
72
-
73
15
  def oauth_scope
74
16
  %w[
75
17
  User.ReadBasic.All
@@ -82,8 +24,18 @@ module ConnectorsSdk
82
24
  ]
83
25
  end
84
26
 
85
- def missing_fields(params, required = [])
86
- Array.wrap(required).select { |field| params[field.to_sym].nil? }
27
+ private
28
+
29
+ def authorization_url
30
+ 'https://login.microsoftonline.com/common/oauth2/v2.0/authorize'
31
+ end
32
+
33
+ def token_credential_uri
34
+ 'https://login.microsoftonline.com/common/oauth2/v2.0/token'
35
+ end
36
+
37
+ def additional_parameters
38
+ { :prompt => 'consent' }
87
39
  end
88
40
  end
89
41
  end
@@ -15,8 +15,8 @@ module ConnectorsSdk
15
15
 
16
16
  private
17
17
 
18
- def convert_id_to_fp_id(id)
19
- ConnectorsSdk::SharePoint::Adapter.share_point_id_to_fp_id(id)
18
+ def convert_id_to_es_id(id)
19
+ ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
20
20
  end
21
21
 
22
22
  def adapter
@@ -9,108 +9,49 @@
9
9
  require 'connectors_sdk/office365/config'
10
10
  require 'connectors_sdk/share_point/extractor'
11
11
  require 'connectors_sdk/share_point/authorization'
12
- require 'bson'
12
+ require 'connectors_sdk/base/http_call_wrapper'
13
13
 
14
14
  module ConnectorsSdk
15
15
  module SharePoint
16
- SERVICE_TYPE = 'share_point'
16
+ class HttpCallWrapper < ConnectorsSdk::Base::HttpCallWrapper
17
+ SERVICE_TYPE = 'share_point'
17
18
 
18
- class HttpCallWrapper
19
- def extractor(params)
20
- cursors = params.fetch(:cursors, {}) || {}
21
- features = params.fetch(:features, {}) || {}
22
-
23
- # XXX can we cache that class across calls?
24
- ConnectorsSdk::SharePoint::Extractor.new(
25
- content_source_id: BSON::ObjectId.new,
26
- service_type: SERVICE_TYPE,
27
- authorization_data_proc: proc { { access_token: params[:access_token] } },
28
- client_proc: proc { ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token], :cursors => cursors) },
29
- config: ConnectorsSdk::Office365::Config.new(:cursors => cursors, :drive_ids => 'all', :index_permissions => params[:index_permissions] || false),
30
- features: features
31
- )
32
- end
33
-
34
- def document_batch(params)
35
- results = []
36
-
37
- @extractor = extractor(params)
38
-
39
- @extractor.yield_document_changes(:break_after_page => true, :modified_since => @extractor.config.cursors['modified_since']) do |action, doc, download_args_and_proc|
40
- download_obj = nil
41
- if download_args_and_proc
42
- download_obj = {
43
- id: download_args_and_proc[0],
44
- name: download_args_and_proc[1],
45
- size: download_args_and_proc[2],
46
- download_args: download_args_and_proc[3]
47
- }
48
- end
49
-
50
- results << {
51
- :action => action,
52
- :document => doc,
53
- :download => download_obj
54
- }
55
- end
56
-
57
- results
58
- rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
59
- raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
60
- end
61
-
62
- def cursors
63
- @extractor.config.cursors
19
+ def name
20
+ 'SharePoint'
64
21
  end
65
22
 
66
- def completed?
67
- @extractor.completed
23
+ def service_type
24
+ SERVICE_TYPE
68
25
  end
69
26
 
70
- def deleted(params)
71
- results = []
72
- extractor(params).yield_deleted_ids(params[:ids]) do |id|
73
- results << id
74
- end
75
- results
76
- rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
77
- raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
78
- end
27
+ private
79
28
 
80
- def permissions(params)
81
- extractor(params).yield_permissions(params[:user_id]) do |permissions|
82
- return permissions
83
- end
84
- rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
85
- raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
29
+ def extractor_class
30
+ ConnectorsSdk::SharePoint::Extractor
86
31
  end
87
32
 
88
- def authorization_uri(body)
89
- ConnectorsSdk::SharePoint::Authorization.authorization_uri(body)
33
+ def authorization
34
+ ConnectorsSdk::SharePoint::Authorization
90
35
  end
91
36
 
92
- def access_token(params)
93
- ConnectorsSdk::SharePoint::Authorization.access_token(params)
37
+ def client(params)
38
+ ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token], :cursors => params.fetch(:cursors, {}) || {})
94
39
  end
95
40
 
96
- def refresh(params)
97
- ConnectorsSdk::SharePoint::Authorization.refresh(params)
41
+ def custom_client_error
42
+ ConnectorsSdk::Office365::CustomClient::ClientError
98
43
  end
99
44
 
100
- def download(params)
101
- extractor(params).download(params[:meta])
102
- end
103
-
104
- def name
105
- 'SharePoint'
45
+ def config(params)
46
+ ConnectorsSdk::Office365::Config.new(
47
+ :cursors => params.fetch(:cursors, {}) || {},
48
+ :drive_ids => 'all',
49
+ :index_permissions => params[:index_permissions] || false
50
+ )
106
51
  end
107
52
 
108
- def source_status(params)
109
- client = ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token])
110
- client.me
111
- { :status => 'OK', :statusCode => 200, :message => 'Connected to SharePoint' }
112
- rescue StandardError => e
113
- { :status => 'FAILURE', :statusCode => e.is_a?(ConnectorsSdk::Office365::CustomClient::ClientError) ? e.status_code : 500, :message => e.message }
53
+ def health_check(params)
54
+ client(params).me
114
55
  end
115
56
  end
116
57
  end
@@ -6,23 +6,23 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'stubs/swiftype/exception_tracking' unless defined?(Rails)
10
9
  require 'bson'
11
10
  require 'connectors_shared/logger'
11
+ require 'stubs/enterprise_search/exception_tracking'
12
12
 
13
13
  module ConnectorsShared
14
14
  class ExceptionTracking
15
15
  class << self
16
16
  def capture_message(message, context = {})
17
- Swiftype::ExceptionTracking.capture_message(message, context)
17
+ EnterpriseSearch::ExceptionTracking.capture_message(message, context)
18
18
  end
19
19
 
20
20
  def capture_exception(exception, context = {})
21
- Swiftype::ExceptionTracking.log_exception(exception, :context => context)
21
+ EnterpriseSearch::ExceptionTracking.log_exception(exception, :context => context)
22
22
  end
23
23
 
24
24
  def log_exception(exception, message = nil)
25
- Swiftype::ExceptionTracking.log_exception(exception, message, :logger => ConnectorsShared::Logger.logger)
25
+ EnterpriseSearch::ExceptionTracking.log_exception(exception, message, :logger => ConnectorsShared::Logger.logger)
26
26
  end
27
27
 
28
28
  def augment_exception(exception)
@@ -0,0 +1,109 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'set'
10
+
11
+ module ConnectorsShared
12
+ module ExtractionUtils
13
+ # A list of tags tags we want to remove before extracting content
14
+ NON_CONTENT_TAGS = Set.new(%w[
15
+ comment
16
+ object
17
+ script
18
+ style
19
+ svg
20
+ video
21
+ ]).freeze
22
+
23
+ # Tags, that generate a word/line break when rendered
24
+ BREAK_ELEMENTS = Set.new(%w[
25
+ br
26
+ hr
27
+ ]).freeze
28
+
29
+ # The character used to signal that a string has been truncated
30
+ OMISSION = '…'
31
+
32
+ #-------------------------------------------------------------------------------------------------
33
+ # Expects a Nokogiri HTML node, returns textual content from the node and all of its children
34
+ def self.node_descendant_text(node)
35
+ return '' unless node&.present?
36
+
37
+ unless node.respond_to?(:children) && node.respond_to?(:name) && node.respond_to?(:text?)
38
+ raise ArgumentError, "Expecting something node-like but got a #{node.class}"
39
+ end
40
+
41
+ to_process_stack = [node]
42
+ text = []
43
+
44
+ loop do
45
+ # Get the next node to process
46
+ node = to_process_stack.pop
47
+ break unless node
48
+
49
+ # Base cases where we append content to the text buffer
50
+ if node.kind_of?(String)
51
+ text << node unless node == ' ' && text.last == ' '
52
+ next
53
+ end
54
+
55
+ # Remove tags that do not contain any text (and which sometimes are treated as CDATA, generating garbage text in jruby)
56
+ next if NON_CONTENT_TAGS.include?(node.name)
57
+
58
+ # Tags, that need to be replaced by spaces according to the standards
59
+ if replace_with_whitespace?(node)
60
+ text << ' ' unless text.last == ' '
61
+ next
62
+ end
63
+
64
+ # Extract the text from all text nodes
65
+ if node.text?
66
+ content = node.content
67
+ text << content.squish if content
68
+ next
69
+ end
70
+
71
+ # Add spaces before all tags
72
+ to_process_stack << ' '
73
+
74
+ # Recursion by adding the node's children to the stack and looping
75
+ node.children.reverse_each { |child| to_process_stack << child }
76
+
77
+ # Add spaces after all tags
78
+ to_process_stack << ' '
79
+ end
80
+
81
+ # Remove any duplicate spaces and return the content
82
+ text.join.squish!
83
+ end
84
+
85
+ #-------------------------------------------------------------------------------------------------
86
+ # Returns true, if the node should be replaced with a space when extracting text from a document
87
+ def self.replace_with_whitespace?(node)
88
+ BREAK_ELEMENTS.include?(node.name)
89
+ end
90
+
91
+ #-------------------------------------------------------------------------------------------------
92
+ # Limits the size of a given string value down to a given limit (in bytes)
93
+ # This is heavily inspired by https://github.com/rails/rails/pull/27319/files
94
+ def self.limit_bytesize(string, limit)
95
+ return string if string.nil? || string.bytesize <= limit
96
+ real_limit = limit - OMISSION.bytesize
97
+ (+'').tap do |cut|
98
+ string.scan(/\X/) do |grapheme|
99
+ if cut.bytesize + grapheme.bytesize <= real_limit
100
+ cut << grapheme
101
+ else
102
+ cut << OMISSION
103
+ break
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module ConnectorsShared
10
+ module Middleware
11
+ class BasicAuth
12
+ AUTHORIZATION = 'Authorization'
13
+
14
+ attr_reader :basic_auth_token
15
+
16
+ def initialize(app = nil, options = {})
17
+ @app = app
18
+ @basic_auth_token = options.fetch(:basic_auth_token)
19
+ end
20
+
21
+ def call(env)
22
+ env.request_headers[AUTHORIZATION] = "Basic #{basic_auth_token}"
23
+ @app.call(env)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module ConnectorsShared
10
+ module Middleware
11
+ class BearerAuth
12
+ AUTHORIZATION = 'Authorization'
13
+
14
+ attr_reader :bearer_auth_token
15
+
16
+ def initialize(app = nil, options = {})
17
+ @app = app
18
+ @bearer_auth_token = options.fetch(:bearer_auth_token)
19
+ end
20
+
21
+ def call(env)
22
+ env.request_headers[AUTHORIZATION] = "Bearer #{bearer_auth_token}"
23
+ @app.call(env)
24
+ end
25
+ end
26
+ end
27
+ end