connectors_sdk 8.3.0.0.pre.20220414T060419Z → 8.3.0.0.pre.20220510T144908Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/lib/connectors_sdk/atlassian/config.rb +27 -0
  3. data/lib/connectors_sdk/atlassian/custom_client.rb +87 -0
  4. data/lib/connectors_sdk/base/adapter.rb +7 -8
  5. data/lib/connectors_sdk/base/authorization.rb +89 -0
  6. data/lib/connectors_sdk/base/custom_client.rb +0 -1
  7. data/lib/connectors_sdk/base/extractor.rb +3 -2
  8. data/lib/connectors_sdk/base/http_call_wrapper.rb +135 -0
  9. data/lib/connectors_sdk/base/registry.rb +5 -3
  10. data/lib/connectors_sdk/confluence/adapter.rb +216 -0
  11. data/lib/connectors_sdk/confluence/custom_client.rb +143 -0
  12. data/lib/connectors_sdk/confluence/extractor.rb +270 -0
  13. data/lib/connectors_sdk/confluence_cloud/authorization.rb +64 -0
  14. data/lib/connectors_sdk/confluence_cloud/custom_client.rb +61 -0
  15. data/lib/connectors_sdk/confluence_cloud/extractor.rb +59 -0
  16. data/lib/connectors_sdk/confluence_cloud/http_call_wrapper.rb +59 -0
  17. data/lib/connectors_sdk/helpers/atlassian_time_formatter.rb +10 -0
  18. data/lib/connectors_sdk/office365/adapter.rb +7 -7
  19. data/lib/connectors_sdk/office365/config.rb +1 -0
  20. data/lib/connectors_sdk/office365/custom_client.rb +31 -9
  21. data/lib/connectors_sdk/office365/extractor.rb +8 -8
  22. data/lib/connectors_sdk/share_point/adapter.rb +12 -12
  23. data/lib/connectors_sdk/share_point/authorization.rb +14 -62
  24. data/lib/connectors_sdk/share_point/extractor.rb +2 -2
  25. data/lib/connectors_sdk/share_point/http_call_wrapper.rb +24 -83
  26. data/lib/connectors_shared/exception_tracking.rb +4 -4
  27. data/lib/connectors_shared/extraction_utils.rb +109 -0
  28. data/lib/connectors_shared/middleware/basic_auth.rb +27 -0
  29. data/lib/connectors_shared/middleware/bearer_auth.rb +27 -0
  30. data/lib/connectors_shared/middleware/restrict_hostnames.rb +73 -0
  31. data/lib/connectors_shared/monitor.rb +3 -3
  32. data/lib/stubs/enterprise_search/exception_tracking.rb +43 -0
  33. metadata +22 -10
  34. data/lib/connectors_sdk/base/.config.rb.un~ +0 -0
  35. data/lib/connectors_sdk/base/.connectors.rb.un~ +0 -0
  36. data/lib/connectors_sdk/base/.registry.rb.un~ +0 -0
  37. data/lib/connectors_sdk/share_point/.http_call_wrapper.rb.un~ +0 -0
@@ -11,11 +11,11 @@ require 'connectors_sdk/base/adapter'
11
11
  module ConnectorsSdk
12
12
  module Office365
13
13
  class Adapter < ConnectorsSdk::Base::Adapter
14
- def self.swiftype_document_from_file(_file)
14
+ def self.es_document_from_file(_file)
15
15
  raise NotImplementedError
16
16
  end
17
17
 
18
- def self.swiftype_document_from_folder(_folder)
18
+ def self.es_document_from_folder(_folder)
19
19
  raise NotImplementedError
20
20
  end
21
21
 
@@ -26,7 +26,7 @@ module ConnectorsSdk
26
26
  @item = item
27
27
  end
28
28
 
29
- def self.convert_id_to_fp_id(_id)
29
+ def self.convert_id_to_es_id(_id)
30
30
  raise NotImplementedError
31
31
  end
32
32
 
@@ -41,10 +41,10 @@ module ConnectorsSdk
41
41
  ConnectorsSdk::Office365::Adapter.normalize_path("#{parent_folder_path}/#{item.name}")
42
42
  end
43
43
 
44
- def to_swiftype_document
44
+ def to_es_document
45
45
  {
46
46
  :_fields_to_preserve => ConnectorsSdk::Office365::Adapter.fields_to_preserve,
47
- :id => self.class.convert_id_to_fp_id(item.id),
47
+ :id => self.class.convert_id_to_es_id(item.id),
48
48
  :path => get_path(item),
49
49
  :title => item.name,
50
50
  :url => item.webUrl,
@@ -96,7 +96,7 @@ module ConnectorsSdk
96
96
  end
97
97
 
98
98
  class FileGraphItem < GraphItem
99
- def self.convert_id_to_fp_id(_id)
99
+ def self.convert_id_to_es_id(_id)
100
100
  raise NotImplementedError
101
101
  end
102
102
 
@@ -132,7 +132,7 @@ module ConnectorsSdk
132
132
  end
133
133
 
134
134
  class PackageGraphItem < GraphItem
135
- def self.convert_id_to_fp_id(id)
135
+ def self.convert_id_to_es_id(id)
136
136
  raise NotImplementedError
137
137
  end
138
138
 
@@ -7,6 +7,7 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'connectors_sdk/base/config'
10
+ require 'connectors_sdk/office365/extractor'
10
11
 
11
12
  module ConnectorsSdk
12
13
  module Office365
@@ -6,9 +6,11 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
+ require 'hashie/mash'
10
+
9
11
  require 'connectors_sdk/base/custom_client'
12
+ require 'connectors_sdk/office365/extractor'
10
13
  require 'connectors_shared'
11
- require 'hashie/mash'
12
14
 
13
15
  module ConnectorsSdk
14
16
  module Office365
@@ -125,7 +127,7 @@ module ConnectorsSdk
125
127
  yielded = 0
126
128
  while stack.any?
127
129
  folder_id = stack.pop
128
- item_children(drive_id, folder_id, :fields => fields_with_id) do |item|
130
+ item_children(drive_id, folder_id, :fields => fields_with_id, :break_after_page => break_after_page) do |item|
129
131
  if item.folder
130
132
  stack << item.id
131
133
  end
@@ -134,9 +136,14 @@ module ConnectorsSdk
134
136
  yielded += 1
135
137
  end
136
138
 
137
- if break_after_page && yielded >= 100 && stack.any?
138
- cursors['page_cursor'] = stack.dup
139
- break
139
+ if break_after_page && yielded >= 100
140
+ if cursors['item_children_next_link'].present?
141
+ stack << folder_id
142
+ end
143
+ if stack.any?
144
+ cursors['page_cursor'] = stack.dup
145
+ break
146
+ end
140
147
  end
141
148
  end
142
149
  end
@@ -256,15 +263,30 @@ module ConnectorsSdk
256
263
  request_endpoint(:endpoint => "drives/#{drive_id}/root", :query_params => query_params)
257
264
  end
258
265
 
259
- def item_children(drive_id, item_id, fields: [], &block)
260
- endpoint = "drives/#{drive_id}/items/#{item_id}/children"
261
- query_params = transform_fields_to_request_query_params(fields)
262
- response = request_endpoint(:endpoint => endpoint, :query_params => query_params)
266
+ def item_children(drive_id, item_id, fields: [], break_after_page: false, &block)
267
+ next_link = cursors.delete('item_children_next_link') if break_after_page
263
268
 
269
+ response = if next_link.present?
270
+ request_json(:url => next_link)
271
+ else
272
+ endpoint = "drives/#{drive_id}/items/#{item_id}/children"
273
+ query_params = transform_fields_to_request_query_params(fields)
274
+ request_endpoint(:endpoint => endpoint, :query_params => query_params)
275
+ end
276
+
277
+ yielded = 0
264
278
  loop do
265
279
  response.value.each(&block)
266
280
  next_link = response['@odata.nextLink']
281
+
267
282
  break if next_link.nil?
283
+
284
+ yielded += response.value.size
285
+ if break_after_page && yielded >= 100
286
+ cursors['item_children_next_link'] = next_link
287
+ break
288
+ end
289
+
268
290
  response = request_json(:url => next_link)
269
291
  end
270
292
  end
@@ -51,12 +51,12 @@ module ConnectorsSdk
51
51
  capture_exception(e)
52
52
  end
53
53
 
54
- if break_after_page && config.cursors['page_cursor'].present?
54
+ if break_after_page && (config.cursors['page_cursor'].present? || config.cursors['item_children_next_link'].present?)
55
55
  break
56
56
  end
57
57
  end
58
58
 
59
- if break_after_page && config.cursors['page_cursor'].blank?
59
+ if break_after_page && config.cursors['page_cursor'].blank? && config.cursors['item_children_next_link'].blank?
60
60
  @completed = true
61
61
  config.overwrite_cursors!(retrieve_latest_cursors)
62
62
  log_debug("Completed #{modified_since.nil? ? 'full' : 'incremental'} extraction")
@@ -132,7 +132,7 @@ module ConnectorsSdk
132
132
  @existing_drive_item_ids ||= Set.new.tap do |ids|
133
133
  drives_to_index.each do |drive|
134
134
  client.list_items(drive.id) do |item|
135
- ids << convert_id_to_fp_id(item.id)
135
+ ids << convert_id_to_es_id(item.id)
136
136
  end
137
137
  end
138
138
  end
@@ -142,7 +142,7 @@ module ConnectorsSdk
142
142
  raise NotImplementedError
143
143
  end
144
144
 
145
- def convert_id_to_fp_id(_id)
145
+ def convert_id_to_es_id(_id)
146
146
  raise NotImplementedError
147
147
  end
148
148
 
@@ -170,7 +170,7 @@ module ConnectorsSdk
170
170
  if item.deleted.nil?
171
171
  yield_create_or_update(drive_id, item, &block)
172
172
  else
173
- yield :delete, convert_id_to_fp_id(item.id)
173
+ yield :delete, convert_id_to_es_id(item.id)
174
174
  end
175
175
  end
176
176
 
@@ -210,11 +210,11 @@ module ConnectorsSdk
210
210
 
211
211
  def generate_document(item)
212
212
  if item.file
213
- adapter.swiftype_document_from_file(item)
213
+ adapter.es_document_from_file(item)
214
214
  elsif item.folder
215
- adapter.swiftype_document_from_folder(item)
215
+ adapter.es_document_from_folder(item)
216
216
  elsif item.package
217
- adapter.swiftype_document_from_package(item)
217
+ adapter.es_document_from_package(item)
218
218
  else
219
219
  raise "Unexpected Office 365 item type for item #{item}"
220
220
  end
@@ -13,33 +13,33 @@ module ConnectorsSdk
13
13
  class Adapter < Office365::Adapter
14
14
  generate_id_helpers :share_point, 'share_point'
15
15
 
16
- def self.swiftype_document_from_file(file)
17
- FileGraphItem.new(file).to_swiftype_document
16
+ def self.es_document_from_file(file)
17
+ FileGraphItem.new(file).to_es_document
18
18
  end
19
19
 
20
- def self.swiftype_document_from_folder(folder)
21
- FolderGraphItem.new(folder).to_swiftype_document
20
+ def self.es_document_from_folder(folder)
21
+ FolderGraphItem.new(folder).to_es_document
22
22
  end
23
23
 
24
- def self.swiftype_document_from_package(package)
25
- PackageGraphItem.new(package).to_swiftype_document
24
+ def self.es_document_from_package(package)
25
+ PackageGraphItem.new(package).to_es_document
26
26
  end
27
27
 
28
28
  class FileGraphItem < Office365::Adapter::FileGraphItem
29
- def self.convert_id_to_fp_id(id)
30
- ConnectorsSdk::SharePoint::Adapter.share_point_id_to_fp_id(id)
29
+ def self.convert_id_to_es_id(id)
30
+ ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
31
31
  end
32
32
  end
33
33
 
34
34
  class FolderGraphItem < Office365::Adapter::FolderGraphItem
35
- def self.convert_id_to_fp_id(id)
36
- ConnectorsSdk::SharePoint::Adapter.share_point_id_to_fp_id(id)
35
+ def self.convert_id_to_es_id(id)
36
+ ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
37
37
  end
38
38
  end
39
39
 
40
40
  class PackageGraphItem < Office365::Adapter::PackageGraphItem
41
- def self.convert_id_to_fp_id(id)
42
- ConnectorsSdk::SharePoint::Adapter.share_point_id_to_fp_id(id)
41
+ def self.convert_id_to_es_id(id)
42
+ ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
43
43
  end
44
44
  end
45
45
  end
@@ -6,70 +6,12 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'connectors_shared'
10
- require 'signet'
11
- require 'signet/oauth_2'
12
- require 'signet/oauth_2/client'
9
+ require 'connectors_sdk/base/authorization'
13
10
 
14
11
  module ConnectorsSdk
15
12
  module SharePoint
16
- class Authorization
13
+ class Authorization < ConnectorsSdk::Base::Authorization
17
14
  class << self
18
- def authorization_url
19
- 'https://login.microsoftonline.com/common/oauth2/v2.0/authorize'
20
- end
21
-
22
- def token_credential_uri
23
- 'https://login.microsoftonline.com/common/oauth2/v2.0/token'
24
- end
25
-
26
- def authorization_uri(params)
27
- missing = missing_fields(params, %w[client_id])
28
- unless missing.blank?
29
- raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
30
- end
31
-
32
- params[:response_type] = 'code'
33
- params[:additional_parameters] = { :prompt => 'consent' }
34
- client = oauth_client(params)
35
- client.authorization_uri.to_s
36
- end
37
-
38
- def access_token(params)
39
- missing = missing_fields(params, %w[client_id client_secret code redirect_uri])
40
- unless missing.blank?
41
- raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
42
- end
43
-
44
- params[:grant_type] = 'authorization_code'
45
- client = oauth_client(params)
46
- client.fetch_access_token
47
- end
48
-
49
- def refresh(params)
50
- missing = missing_fields(params, %w[client_id client_secret refresh_token])
51
- unless missing.blank?
52
- raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
53
- end
54
-
55
- params[:grant_type] = 'refresh_token'
56
- client = oauth_client(params)
57
- client.refresh!
58
- rescue StandardError => e
59
- ConnectorsShared::ExceptionTracking.log_exception(e)
60
- raise ConnectorsShared::TokenRefreshFailedError
61
- end
62
-
63
- def oauth_client(params)
64
- options = params.merge(
65
- :authorization_uri => authorization_url,
66
- :token_credential_uri => token_credential_uri,
67
- :scope => oauth_scope
68
- )
69
- options[:state] = JSON.dump(options[:state]) if options[:state]
70
- Signet::OAuth2::Client.new(options)
71
- end
72
-
73
15
  def oauth_scope
74
16
  %w[
75
17
  User.ReadBasic.All
@@ -82,8 +24,18 @@ module ConnectorsSdk
82
24
  ]
83
25
  end
84
26
 
85
- def missing_fields(params, required = [])
86
- Array.wrap(required).select { |field| params[field.to_sym].nil? }
27
+ private
28
+
29
+ def authorization_url
30
+ 'https://login.microsoftonline.com/common/oauth2/v2.0/authorize'
31
+ end
32
+
33
+ def token_credential_uri
34
+ 'https://login.microsoftonline.com/common/oauth2/v2.0/token'
35
+ end
36
+
37
+ def additional_parameters
38
+ { :prompt => 'consent' }
87
39
  end
88
40
  end
89
41
  end
@@ -15,8 +15,8 @@ module ConnectorsSdk
15
15
 
16
16
  private
17
17
 
18
- def convert_id_to_fp_id(id)
19
- ConnectorsSdk::SharePoint::Adapter.share_point_id_to_fp_id(id)
18
+ def convert_id_to_es_id(id)
19
+ ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
20
20
  end
21
21
 
22
22
  def adapter
@@ -9,108 +9,49 @@
9
9
  require 'connectors_sdk/office365/config'
10
10
  require 'connectors_sdk/share_point/extractor'
11
11
  require 'connectors_sdk/share_point/authorization'
12
- require 'bson'
12
+ require 'connectors_sdk/base/http_call_wrapper'
13
13
 
14
14
  module ConnectorsSdk
15
15
  module SharePoint
16
- SERVICE_TYPE = 'share_point'
16
+ class HttpCallWrapper < ConnectorsSdk::Base::HttpCallWrapper
17
+ SERVICE_TYPE = 'share_point'
17
18
 
18
- class HttpCallWrapper
19
- def extractor(params)
20
- cursors = params.fetch(:cursors, {}) || {}
21
- features = params.fetch(:features, {}) || {}
22
-
23
- # XXX can we cache that class across calls?
24
- ConnectorsSdk::SharePoint::Extractor.new(
25
- content_source_id: BSON::ObjectId.new,
26
- service_type: SERVICE_TYPE,
27
- authorization_data_proc: proc { { access_token: params[:access_token] } },
28
- client_proc: proc { ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token], :cursors => cursors) },
29
- config: ConnectorsSdk::Office365::Config.new(:cursors => cursors, :drive_ids => 'all', :index_permissions => params[:index_permissions] || false),
30
- features: features
31
- )
32
- end
33
-
34
- def document_batch(params)
35
- results = []
36
-
37
- @extractor = extractor(params)
38
-
39
- @extractor.yield_document_changes(:break_after_page => true, :modified_since => @extractor.config.cursors['modified_since']) do |action, doc, download_args_and_proc|
40
- download_obj = nil
41
- if download_args_and_proc
42
- download_obj = {
43
- id: download_args_and_proc[0],
44
- name: download_args_and_proc[1],
45
- size: download_args_and_proc[2],
46
- download_args: download_args_and_proc[3]
47
- }
48
- end
49
-
50
- results << {
51
- :action => action,
52
- :document => doc,
53
- :download => download_obj
54
- }
55
- end
56
-
57
- results
58
- rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
59
- raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
60
- end
61
-
62
- def cursors
63
- @extractor.config.cursors
19
+ def name
20
+ 'SharePoint'
64
21
  end
65
22
 
66
- def completed?
67
- @extractor.completed
23
+ def service_type
24
+ SERVICE_TYPE
68
25
  end
69
26
 
70
- def deleted(params)
71
- results = []
72
- extractor(params).yield_deleted_ids(params[:ids]) do |id|
73
- results << id
74
- end
75
- results
76
- rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
77
- raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
78
- end
27
+ private
79
28
 
80
- def permissions(params)
81
- extractor(params).yield_permissions(params[:user_id]) do |permissions|
82
- return permissions
83
- end
84
- rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
85
- raise e.status_code == 401 ? ConnectorsShared::InvalidTokenError : e
29
+ def extractor_class
30
+ ConnectorsSdk::SharePoint::Extractor
86
31
  end
87
32
 
88
- def authorization_uri(body)
89
- ConnectorsSdk::SharePoint::Authorization.authorization_uri(body)
33
+ def authorization
34
+ ConnectorsSdk::SharePoint::Authorization
90
35
  end
91
36
 
92
- def access_token(params)
93
- ConnectorsSdk::SharePoint::Authorization.access_token(params)
37
+ def client(params)
38
+ ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token], :cursors => params.fetch(:cursors, {}) || {})
94
39
  end
95
40
 
96
- def refresh(params)
97
- ConnectorsSdk::SharePoint::Authorization.refresh(params)
41
+ def custom_client_error
42
+ ConnectorsSdk::Office365::CustomClient::ClientError
98
43
  end
99
44
 
100
- def download(params)
101
- extractor(params).download(params[:meta])
102
- end
103
-
104
- def name
105
- 'SharePoint'
45
+ def config(params)
46
+ ConnectorsSdk::Office365::Config.new(
47
+ :cursors => params.fetch(:cursors, {}) || {},
48
+ :drive_ids => 'all',
49
+ :index_permissions => params[:index_permissions] || false
50
+ )
106
51
  end
107
52
 
108
- def source_status(params)
109
- client = ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token])
110
- client.me
111
- { :status => 'OK', :statusCode => 200, :message => 'Connected to SharePoint' }
112
- rescue StandardError => e
113
- { :status => 'FAILURE', :statusCode => e.is_a?(ConnectorsSdk::Office365::CustomClient::ClientError) ? e.status_code : 500, :message => e.message }
53
+ def health_check(params)
54
+ client(params).me
114
55
  end
115
56
  end
116
57
  end
@@ -6,23 +6,23 @@
6
6
 
7
7
  # frozen_string_literal: true
8
8
 
9
- require 'stubs/swiftype/exception_tracking' unless defined?(Rails)
10
9
  require 'bson'
11
10
  require 'connectors_shared/logger'
11
+ require 'stubs/enterprise_search/exception_tracking'
12
12
 
13
13
  module ConnectorsShared
14
14
  class ExceptionTracking
15
15
  class << self
16
16
  def capture_message(message, context = {})
17
- Swiftype::ExceptionTracking.capture_message(message, context)
17
+ EnterpriseSearch::ExceptionTracking.capture_message(message, context)
18
18
  end
19
19
 
20
20
  def capture_exception(exception, context = {})
21
- Swiftype::ExceptionTracking.log_exception(exception, :context => context)
21
+ EnterpriseSearch::ExceptionTracking.log_exception(exception, :context => context)
22
22
  end
23
23
 
24
24
  def log_exception(exception, message = nil)
25
- Swiftype::ExceptionTracking.log_exception(exception, message, :logger => ConnectorsShared::Logger.logger)
25
+ EnterpriseSearch::ExceptionTracking.log_exception(exception, message, :logger => ConnectorsShared::Logger.logger)
26
26
  end
27
27
 
28
28
  def augment_exception(exception)
@@ -0,0 +1,109 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'set'
10
+
11
+ module ConnectorsShared
12
+ module ExtractionUtils
13
+ # A list of tags tags we want to remove before extracting content
14
+ NON_CONTENT_TAGS = Set.new(%w[
15
+ comment
16
+ object
17
+ script
18
+ style
19
+ svg
20
+ video
21
+ ]).freeze
22
+
23
+ # Tags, that generate a word/line break when rendered
24
+ BREAK_ELEMENTS = Set.new(%w[
25
+ br
26
+ hr
27
+ ]).freeze
28
+
29
+ # The character used to signal that a string has been truncated
30
+ OMISSION = '…'
31
+
32
+ #-------------------------------------------------------------------------------------------------
33
+ # Expects a Nokogiri HTML node, returns textual content from the node and all of its children
34
+ def self.node_descendant_text(node)
35
+ return '' unless node&.present?
36
+
37
+ unless node.respond_to?(:children) && node.respond_to?(:name) && node.respond_to?(:text?)
38
+ raise ArgumentError, "Expecting something node-like but got a #{node.class}"
39
+ end
40
+
41
+ to_process_stack = [node]
42
+ text = []
43
+
44
+ loop do
45
+ # Get the next node to process
46
+ node = to_process_stack.pop
47
+ break unless node
48
+
49
+ # Base cases where we append content to the text buffer
50
+ if node.kind_of?(String)
51
+ text << node unless node == ' ' && text.last == ' '
52
+ next
53
+ end
54
+
55
+ # Remove tags that do not contain any text (and which sometimes are treated as CDATA, generating garbage text in jruby)
56
+ next if NON_CONTENT_TAGS.include?(node.name)
57
+
58
+ # Tags, that need to be replaced by spaces according to the standards
59
+ if replace_with_whitespace?(node)
60
+ text << ' ' unless text.last == ' '
61
+ next
62
+ end
63
+
64
+ # Extract the text from all text nodes
65
+ if node.text?
66
+ content = node.content
67
+ text << content.squish if content
68
+ next
69
+ end
70
+
71
+ # Add spaces before all tags
72
+ to_process_stack << ' '
73
+
74
+ # Recursion by adding the node's children to the stack and looping
75
+ node.children.reverse_each { |child| to_process_stack << child }
76
+
77
+ # Add spaces after all tags
78
+ to_process_stack << ' '
79
+ end
80
+
81
+ # Remove any duplicate spaces and return the content
82
+ text.join.squish!
83
+ end
84
+
85
+ #-------------------------------------------------------------------------------------------------
86
+ # Returns true, if the node should be replaced with a space when extracting text from a document
87
+ def self.replace_with_whitespace?(node)
88
+ BREAK_ELEMENTS.include?(node.name)
89
+ end
90
+
91
+ #-------------------------------------------------------------------------------------------------
92
+ # Limits the size of a given string value down to a given limit (in bytes)
93
+ # This is heavily inspired by https://github.com/rails/rails/pull/27319/files
94
+ def self.limit_bytesize(string, limit)
95
+ return string if string.nil? || string.bytesize <= limit
96
+ real_limit = limit - OMISSION.bytesize
97
+ (+'').tap do |cut|
98
+ string.scan(/\X/) do |grapheme|
99
+ if cut.bytesize + grapheme.bytesize <= real_limit
100
+ cut << grapheme
101
+ else
102
+ cut << OMISSION
103
+ break
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module ConnectorsShared
10
+ module Middleware
11
+ class BasicAuth
12
+ AUTHORIZATION = 'Authorization'
13
+
14
+ attr_reader :basic_auth_token
15
+
16
+ def initialize(app = nil, options = {})
17
+ @app = app
18
+ @basic_auth_token = options.fetch(:basic_auth_token)
19
+ end
20
+
21
+ def call(env)
22
+ env.request_headers[AUTHORIZATION] = "Basic #{basic_auth_token}"
23
+ @app.call(env)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module ConnectorsShared
10
+ module Middleware
11
+ class BearerAuth
12
+ AUTHORIZATION = 'Authorization'
13
+
14
+ attr_reader :bearer_auth_token
15
+
16
+ def initialize(app = nil, options = {})
17
+ @app = app
18
+ @bearer_auth_token = options.fetch(:bearer_auth_token)
19
+ end
20
+
21
+ def call(env)
22
+ env.request_headers[AUTHORIZATION] = "Bearer #{bearer_auth_token}"
23
+ @app.call(env)
24
+ end
25
+ end
26
+ end
27
+ end