connectors_sdk 8.3.0.0.pre.20220414T060419Z → 8.3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/connectors_sdk/atlassian/config.rb +27 -0
- data/lib/connectors_sdk/atlassian/custom_client.rb +87 -0
- data/lib/connectors_sdk/base/adapter.rb +7 -8
- data/lib/connectors_sdk/base/authorization.rb +89 -0
- data/lib/connectors_sdk/base/connector.rb +166 -0
- data/lib/connectors_sdk/base/custom_client.rb +1 -2
- data/lib/connectors_sdk/base/extractor.rb +6 -2
- data/lib/connectors_sdk/base/registry.rb +9 -4
- data/lib/connectors_sdk/confluence/adapter.rb +216 -0
- data/lib/connectors_sdk/confluence/custom_client.rb +143 -0
- data/lib/connectors_sdk/confluence/extractor.rb +265 -0
- data/lib/connectors_sdk/confluence_cloud/authorization.rb +64 -0
- data/lib/connectors_sdk/confluence_cloud/connector.rb +84 -0
- data/lib/connectors_sdk/confluence_cloud/custom_client.rb +61 -0
- data/lib/connectors_sdk/confluence_cloud/extractor.rb +59 -0
- data/lib/connectors_sdk/helpers/atlassian_time_formatter.rb +10 -0
- data/lib/connectors_sdk/office365/adapter.rb +7 -7
- data/lib/connectors_sdk/office365/config.rb +1 -0
- data/lib/connectors_sdk/office365/custom_client.rb +20 -39
- data/lib/connectors_sdk/office365/extractor.rb +18 -34
- data/lib/connectors_sdk/share_point/adapter.rb +24 -12
- data/lib/connectors_sdk/share_point/authorization.rb +14 -62
- data/lib/connectors_sdk/share_point/connector.rb +82 -0
- data/lib/connectors_sdk/share_point/extractor.rb +2 -2
- data/lib/connectors_sdk/stub_connector/connector.rb +62 -0
- data/lib/connectors_shared/constants.rb +12 -0
- data/lib/connectors_shared/exception_tracking.rb +4 -4
- data/lib/connectors_shared/extraction_utils.rb +109 -0
- data/lib/connectors_shared/job_status.rb +18 -0
- data/lib/connectors_shared/middleware/basic_auth.rb +27 -0
- data/lib/connectors_shared/middleware/bearer_auth.rb +27 -0
- data/lib/connectors_shared/middleware/restrict_hostnames.rb +73 -0
- data/lib/connectors_shared/monitor.rb +3 -3
- data/lib/connectors_shared.rb +1 -0
- data/lib/stubs/enterprise_search/exception_tracking.rb +43 -0
- metadata +30 -16
- data/lib/connectors_sdk/base/.config.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.connectors.rb.un~ +0 -0
- data/lib/connectors_sdk/base/.registry.rb.un~ +0 -0
- data/lib/connectors_sdk/share_point/.http_call_wrapper.rb.un~ +0 -0
- data/lib/connectors_sdk/share_point/http_call_wrapper.rb +0 -117
@@ -6,70 +6,12 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require '
|
10
|
-
require 'signet'
|
11
|
-
require 'signet/oauth_2'
|
12
|
-
require 'signet/oauth_2/client'
|
9
|
+
require 'connectors_sdk/base/authorization'
|
13
10
|
|
14
11
|
module ConnectorsSdk
|
15
12
|
module SharePoint
|
16
|
-
class Authorization
|
13
|
+
class Authorization < ConnectorsSdk::Base::Authorization
|
17
14
|
class << self
|
18
|
-
def authorization_url
|
19
|
-
'https://login.microsoftonline.com/common/oauth2/v2.0/authorize'
|
20
|
-
end
|
21
|
-
|
22
|
-
def token_credential_uri
|
23
|
-
'https://login.microsoftonline.com/common/oauth2/v2.0/token'
|
24
|
-
end
|
25
|
-
|
26
|
-
def authorization_uri(params)
|
27
|
-
missing = missing_fields(params, %w[client_id])
|
28
|
-
unless missing.blank?
|
29
|
-
raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
|
30
|
-
end
|
31
|
-
|
32
|
-
params[:response_type] = 'code'
|
33
|
-
params[:additional_parameters] = { :prompt => 'consent' }
|
34
|
-
client = oauth_client(params)
|
35
|
-
client.authorization_uri.to_s
|
36
|
-
end
|
37
|
-
|
38
|
-
def access_token(params)
|
39
|
-
missing = missing_fields(params, %w[client_id client_secret code redirect_uri])
|
40
|
-
unless missing.blank?
|
41
|
-
raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
|
42
|
-
end
|
43
|
-
|
44
|
-
params[:grant_type] = 'authorization_code'
|
45
|
-
client = oauth_client(params)
|
46
|
-
client.fetch_access_token
|
47
|
-
end
|
48
|
-
|
49
|
-
def refresh(params)
|
50
|
-
missing = missing_fields(params, %w[client_id client_secret refresh_token])
|
51
|
-
unless missing.blank?
|
52
|
-
raise ConnectorsShared::ClientError.new("Missing required fields: #{missing.join(', ')}")
|
53
|
-
end
|
54
|
-
|
55
|
-
params[:grant_type] = 'refresh_token'
|
56
|
-
client = oauth_client(params)
|
57
|
-
client.refresh!
|
58
|
-
rescue StandardError => e
|
59
|
-
ConnectorsShared::ExceptionTracking.log_exception(e)
|
60
|
-
raise ConnectorsShared::TokenRefreshFailedError
|
61
|
-
end
|
62
|
-
|
63
|
-
def oauth_client(params)
|
64
|
-
options = params.merge(
|
65
|
-
:authorization_uri => authorization_url,
|
66
|
-
:token_credential_uri => token_credential_uri,
|
67
|
-
:scope => oauth_scope
|
68
|
-
)
|
69
|
-
options[:state] = JSON.dump(options[:state]) if options[:state]
|
70
|
-
Signet::OAuth2::Client.new(options)
|
71
|
-
end
|
72
|
-
|
73
15
|
def oauth_scope
|
74
16
|
%w[
|
75
17
|
User.ReadBasic.All
|
@@ -82,8 +24,18 @@ module ConnectorsSdk
|
|
82
24
|
]
|
83
25
|
end
|
84
26
|
|
85
|
-
|
86
|
-
|
27
|
+
private
|
28
|
+
|
29
|
+
def authorization_url
|
30
|
+
'https://login.microsoftonline.com/common/oauth2/v2.0/authorize'
|
31
|
+
end
|
32
|
+
|
33
|
+
def token_credential_uri
|
34
|
+
'https://login.microsoftonline.com/common/oauth2/v2.0/token'
|
35
|
+
end
|
36
|
+
|
37
|
+
def additional_parameters
|
38
|
+
{ :prompt => 'consent' }
|
87
39
|
end
|
88
40
|
end
|
89
41
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors_sdk/base/connector'
|
10
|
+
require 'connectors_sdk/office365/config'
|
11
|
+
require 'connectors_sdk/share_point/extractor'
|
12
|
+
require 'connectors_sdk/share_point/authorization'
|
13
|
+
|
14
|
+
module ConnectorsSdk
|
15
|
+
module SharePoint
|
16
|
+
class Connector < ConnectorsSdk::Base::Connector
|
17
|
+
SERVICE_TYPE = 'share_point'
|
18
|
+
|
19
|
+
def compare_secrets(params)
|
20
|
+
missing_secrets?(params)
|
21
|
+
|
22
|
+
previous_user = client(:access_token => params[:other_secret][:access_token]).me
|
23
|
+
equivalent = previous_user.nil? ? false : previous_user.id == client(:access_token => params[:secret][:access_token]).me&.id
|
24
|
+
|
25
|
+
{
|
26
|
+
:equivalent => equivalent
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def display_name
|
31
|
+
'SharePoint Online'
|
32
|
+
end
|
33
|
+
|
34
|
+
def connection_requires_redirect
|
35
|
+
true
|
36
|
+
end
|
37
|
+
|
38
|
+
def configurable_fields
|
39
|
+
[
|
40
|
+
{
|
41
|
+
'key' => 'client_id',
|
42
|
+
'label' => 'Client ID'
|
43
|
+
},
|
44
|
+
{
|
45
|
+
'key' => 'client_secret',
|
46
|
+
'label' => 'Client Secret'
|
47
|
+
},
|
48
|
+
]
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def extractor_class
|
54
|
+
ConnectorsSdk::SharePoint::Extractor
|
55
|
+
end
|
56
|
+
|
57
|
+
def authorization
|
58
|
+
ConnectorsSdk::SharePoint::Authorization
|
59
|
+
end
|
60
|
+
|
61
|
+
def client(params)
|
62
|
+
ConnectorsSdk::Office365::CustomClient.new(:access_token => params[:access_token], :cursors => params.fetch(:cursors, {}) || {})
|
63
|
+
end
|
64
|
+
|
65
|
+
def custom_client_error
|
66
|
+
ConnectorsSdk::Office365::CustomClient::ClientError
|
67
|
+
end
|
68
|
+
|
69
|
+
def config(params)
|
70
|
+
ConnectorsSdk::Office365::Config.new(
|
71
|
+
:cursors => params.fetch(:cursors, {}) || {},
|
72
|
+
:drive_ids => 'all',
|
73
|
+
:index_permissions => params[:index_permissions] || false
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
77
|
+
def health_check(params)
|
78
|
+
client(params).me
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -15,8 +15,8 @@ module ConnectorsSdk
|
|
15
15
|
|
16
16
|
private
|
17
17
|
|
18
|
-
def
|
19
|
-
ConnectorsSdk::SharePoint::Adapter.
|
18
|
+
def convert_id_to_es_id(id)
|
19
|
+
ConnectorsSdk::SharePoint::Adapter.share_point_id_to_es_id(id)
|
20
20
|
end
|
21
21
|
|
22
22
|
def adapter
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'connectors_sdk/base/connector'
|
10
|
+
|
11
|
+
module ConnectorsSdk
|
12
|
+
module StubConnector
|
13
|
+
class Connector < ConnectorsSdk::Base::Connector
|
14
|
+
SERVICE_TYPE = 'stub_connector'
|
15
|
+
|
16
|
+
def display_name
|
17
|
+
'Stub Connector'
|
18
|
+
end
|
19
|
+
|
20
|
+
def configurable_fields
|
21
|
+
[
|
22
|
+
{
|
23
|
+
'key' => 'third_party_url',
|
24
|
+
'label' => 'Third Party URL'
|
25
|
+
},
|
26
|
+
{
|
27
|
+
'key' => 'third_party_api_key',
|
28
|
+
'label' => 'Third Party API Key'
|
29
|
+
}
|
30
|
+
]
|
31
|
+
end
|
32
|
+
|
33
|
+
def health_check(_params)
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
37
|
+
def document_batch(_params)
|
38
|
+
results = 30.times.map do |i|
|
39
|
+
{
|
40
|
+
:action => :create_or_update,
|
41
|
+
:document => {
|
42
|
+
:id => "document_#{i}",
|
43
|
+
:type => 'document',
|
44
|
+
:body => "contents for document number: #{i}"
|
45
|
+
},
|
46
|
+
:download => nil
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
[results, {}, true]
|
51
|
+
end
|
52
|
+
|
53
|
+
def deleted(_params)
|
54
|
+
[]
|
55
|
+
end
|
56
|
+
|
57
|
+
def permissions(_params)
|
58
|
+
[]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -10,5 +10,17 @@ module ConnectorsShared
|
|
10
10
|
SUBEXTRACTOR_RESERVED_FIELDS = %w[_subextracted_as_of _subextracted_version].freeze
|
11
11
|
ALLOW_FIELD = '_allow_permissions'.freeze
|
12
12
|
DENY_FIELD = '_deny_permissions'.freeze
|
13
|
+
|
14
|
+
# The following section reads as following:
|
15
|
+
# The job will extract documents until the job queue size will reach
|
16
|
+
# JOB_QUEUE_SIZE_IDLE_THRESHOLD items. After that, the job will attempt to sleep
|
17
|
+
# for IDLE_SLEEP_TIME seconds and check the queue size again. If the queue is still
|
18
|
+
# full, it will sleep for maximum MAX_IDDLE_ATTEMPTS times, and if the queue is still
|
19
|
+
# full, then job will be terminated.
|
20
|
+
JOB_QUEUE_SIZE_IDLE_THRESHOLD = 500 # How many documents the job queue stores until it sleeps
|
21
|
+
IDLE_SLEEP_TIME = 10 # For how long job queue will sleep before checking the queue size again
|
22
|
+
MAX_IDLE_ATTEMPTS = 30 # How many consecutive times job will try to sleep until it's destroyed
|
23
|
+
|
24
|
+
STALE_JOB_TIMEOUT = 60 * 30 # Time in seconds after which the job will be cleaned up if the job is considered stuck
|
13
25
|
end
|
14
26
|
end
|
@@ -6,23 +6,23 @@
|
|
6
6
|
|
7
7
|
# frozen_string_literal: true
|
8
8
|
|
9
|
-
require 'stubs/swiftype/exception_tracking' unless defined?(Rails)
|
10
9
|
require 'bson'
|
11
10
|
require 'connectors_shared/logger'
|
11
|
+
require 'stubs/enterprise_search/exception_tracking'
|
12
12
|
|
13
13
|
module ConnectorsShared
|
14
14
|
class ExceptionTracking
|
15
15
|
class << self
|
16
16
|
def capture_message(message, context = {})
|
17
|
-
|
17
|
+
EnterpriseSearch::ExceptionTracking.capture_message(message, context)
|
18
18
|
end
|
19
19
|
|
20
20
|
def capture_exception(exception, context = {})
|
21
|
-
|
21
|
+
EnterpriseSearch::ExceptionTracking.log_exception(exception, :context => context)
|
22
22
|
end
|
23
23
|
|
24
24
|
def log_exception(exception, message = nil)
|
25
|
-
|
25
|
+
EnterpriseSearch::ExceptionTracking.log_exception(exception, message, :logger => ConnectorsShared::Logger.logger)
|
26
26
|
end
|
27
27
|
|
28
28
|
def augment_exception(exception)
|
@@ -0,0 +1,109 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'set'
|
10
|
+
|
11
|
+
module ConnectorsShared
|
12
|
+
module ExtractionUtils
|
13
|
+
# A list of tags tags we want to remove before extracting content
|
14
|
+
NON_CONTENT_TAGS = Set.new(%w[
|
15
|
+
comment
|
16
|
+
object
|
17
|
+
script
|
18
|
+
style
|
19
|
+
svg
|
20
|
+
video
|
21
|
+
]).freeze
|
22
|
+
|
23
|
+
# Tags, that generate a word/line break when rendered
|
24
|
+
BREAK_ELEMENTS = Set.new(%w[
|
25
|
+
br
|
26
|
+
hr
|
27
|
+
]).freeze
|
28
|
+
|
29
|
+
# The character used to signal that a string has been truncated
|
30
|
+
OMISSION = '…'
|
31
|
+
|
32
|
+
#-------------------------------------------------------------------------------------------------
|
33
|
+
# Expects a Nokogiri HTML node, returns textual content from the node and all of its children
|
34
|
+
def self.node_descendant_text(node)
|
35
|
+
return '' unless node&.present?
|
36
|
+
|
37
|
+
unless node.respond_to?(:children) && node.respond_to?(:name) && node.respond_to?(:text?)
|
38
|
+
raise ArgumentError, "Expecting something node-like but got a #{node.class}"
|
39
|
+
end
|
40
|
+
|
41
|
+
to_process_stack = [node]
|
42
|
+
text = []
|
43
|
+
|
44
|
+
loop do
|
45
|
+
# Get the next node to process
|
46
|
+
node = to_process_stack.pop
|
47
|
+
break unless node
|
48
|
+
|
49
|
+
# Base cases where we append content to the text buffer
|
50
|
+
if node.kind_of?(String)
|
51
|
+
text << node unless node == ' ' && text.last == ' '
|
52
|
+
next
|
53
|
+
end
|
54
|
+
|
55
|
+
# Remove tags that do not contain any text (and which sometimes are treated as CDATA, generating garbage text in jruby)
|
56
|
+
next if NON_CONTENT_TAGS.include?(node.name)
|
57
|
+
|
58
|
+
# Tags, that need to be replaced by spaces according to the standards
|
59
|
+
if replace_with_whitespace?(node)
|
60
|
+
text << ' ' unless text.last == ' '
|
61
|
+
next
|
62
|
+
end
|
63
|
+
|
64
|
+
# Extract the text from all text nodes
|
65
|
+
if node.text?
|
66
|
+
content = node.content
|
67
|
+
text << content.squish if content
|
68
|
+
next
|
69
|
+
end
|
70
|
+
|
71
|
+
# Add spaces before all tags
|
72
|
+
to_process_stack << ' '
|
73
|
+
|
74
|
+
# Recursion by adding the node's children to the stack and looping
|
75
|
+
node.children.reverse_each { |child| to_process_stack << child }
|
76
|
+
|
77
|
+
# Add spaces after all tags
|
78
|
+
to_process_stack << ' '
|
79
|
+
end
|
80
|
+
|
81
|
+
# Remove any duplicate spaces and return the content
|
82
|
+
text.join.squish!
|
83
|
+
end
|
84
|
+
|
85
|
+
#-------------------------------------------------------------------------------------------------
|
86
|
+
# Returns true, if the node should be replaced with a space when extracting text from a document
|
87
|
+
def self.replace_with_whitespace?(node)
|
88
|
+
BREAK_ELEMENTS.include?(node.name)
|
89
|
+
end
|
90
|
+
|
91
|
+
#-------------------------------------------------------------------------------------------------
|
92
|
+
# Limits the size of a given string value down to a given limit (in bytes)
|
93
|
+
# This is heavily inspired by https://github.com/rails/rails/pull/27319/files
|
94
|
+
def self.limit_bytesize(string, limit)
|
95
|
+
return string if string.nil? || string.bytesize <= limit
|
96
|
+
real_limit = limit - OMISSION.bytesize
|
97
|
+
(+'').tap do |cut|
|
98
|
+
string.scan(/\X/) do |grapheme|
|
99
|
+
if cut.bytesize + grapheme.bytesize <= real_limit
|
100
|
+
cut << grapheme
|
101
|
+
else
|
102
|
+
cut << OMISSION
|
103
|
+
break
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
module ConnectorsShared
|
8
|
+
class JobStatus
|
9
|
+
CREATED = 'created'
|
10
|
+
RUNNING = 'running'
|
11
|
+
FINISHED = 'finished'
|
12
|
+
FAILED = 'failed'
|
13
|
+
|
14
|
+
def self.is_valid?(status)
|
15
|
+
[CREATED, RUNNING, FINISHED, FAILED].include? status
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ConnectorsShared
|
10
|
+
module Middleware
|
11
|
+
class BasicAuth
|
12
|
+
AUTHORIZATION = 'Authorization'
|
13
|
+
|
14
|
+
attr_reader :basic_auth_token
|
15
|
+
|
16
|
+
def initialize(app = nil, options = {})
|
17
|
+
@app = app
|
18
|
+
@basic_auth_token = options.fetch(:basic_auth_token)
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(env)
|
22
|
+
env.request_headers[AUTHORIZATION] = "Basic #{basic_auth_token}"
|
23
|
+
@app.call(env)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ConnectorsShared
|
10
|
+
module Middleware
|
11
|
+
class BearerAuth
|
12
|
+
AUTHORIZATION = 'Authorization'
|
13
|
+
|
14
|
+
attr_reader :bearer_auth_token
|
15
|
+
|
16
|
+
def initialize(app = nil, options = {})
|
17
|
+
@app = app
|
18
|
+
@bearer_auth_token = options.fetch(:bearer_auth_token)
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(env)
|
22
|
+
env.request_headers[AUTHORIZATION] = "Bearer #{bearer_auth_token}"
|
23
|
+
@app.call(env)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'faraday/middleware'
|
10
|
+
require 'resolv'
|
11
|
+
|
12
|
+
require 'connectors_shared/errors'
|
13
|
+
require 'connectors_shared/logger'
|
14
|
+
|
15
|
+
module ConnectorsShared
|
16
|
+
module Middleware
|
17
|
+
class RestrictHostnames < Faraday::Middleware
|
18
|
+
class AddressNotAllowed < ConnectorsShared::ClientError; end
|
19
|
+
URL_PATTERN = /\Ahttp/
|
20
|
+
|
21
|
+
attr_reader :allowed_hosts, :allowed_ips
|
22
|
+
|
23
|
+
def initialize(app = nil, options = {})
|
24
|
+
super(app)
|
25
|
+
@allowed_hosts = options[:allowed_hosts]
|
26
|
+
@allowed_ips = ips_from_hosts(@allowed_hosts)
|
27
|
+
end
|
28
|
+
|
29
|
+
def call(env)
|
30
|
+
raise AddressNotAllowed.new("Address not allowed for #{env[:url]}") if denied?(env)
|
31
|
+
@app.call(env)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def ips_from_hosts(hosts)
|
37
|
+
hosts&.flat_map do |host|
|
38
|
+
if URL_PATTERN.match(host)
|
39
|
+
lookup_ips(Addressable::URI.parse(host).hostname)
|
40
|
+
elsif Resolv::IPv4::Regex.match(host) || Resolv::IPv6::Regex.match(host)
|
41
|
+
IPAddr.new(host)
|
42
|
+
else
|
43
|
+
lookup_ips(host)
|
44
|
+
end
|
45
|
+
end || []
|
46
|
+
end
|
47
|
+
|
48
|
+
def denied?(env)
|
49
|
+
requested_ips = lookup_ips(env[:url].hostname)
|
50
|
+
no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) }
|
51
|
+
return false unless no_match
|
52
|
+
ConnectorsShared::Logger.warn("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \
|
53
|
+
"allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}. Retrying.")
|
54
|
+
@allowed_ips = ips_from_hosts(@allowed_hosts) # maybe the IP has changed for an allowed host. Re-do allowed_hosts DNS lookup
|
55
|
+
no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) }
|
56
|
+
ConnectorsShared::Logger.error("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \
|
57
|
+
"allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}") if no_match
|
58
|
+
no_match
|
59
|
+
end
|
60
|
+
|
61
|
+
def lookup_ips(hostname)
|
62
|
+
addr_infos(hostname).map { |a| IPAddr.new(a.ip_address) }
|
63
|
+
end
|
64
|
+
|
65
|
+
def addr_infos(hostname)
|
66
|
+
Addrinfo.getaddrinfo(hostname, nil, :UNSPEC, :STREAM)
|
67
|
+
rescue SocketError
|
68
|
+
# In case of invalid hostname, return an empty list of addresses
|
69
|
+
[]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -8,7 +8,7 @@
|
|
8
8
|
|
9
9
|
require 'connectors_shared/errors'
|
10
10
|
require 'stubs/app_config' unless defined?(Rails)
|
11
|
-
require 'stubs/
|
11
|
+
require 'stubs/enterprise_search/exception_tracking' unless defined?(Rails)
|
12
12
|
|
13
13
|
module ConnectorsShared
|
14
14
|
class Monitor
|
@@ -44,8 +44,8 @@ module ConnectorsShared
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def note_error(error, id: Time.now.to_i)
|
47
|
-
stack_trace =
|
48
|
-
error_message =
|
47
|
+
stack_trace = EnterpriseSearch::ExceptionTracking.generate_stack_trace(error)
|
48
|
+
error_message = EnterpriseSearch::ExceptionTracking.generate_error_message(error, nil, nil)
|
49
49
|
@connector.log_debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
50
50
|
@total_error_count += 1
|
51
51
|
@consecutive_error_count += 1
|
data/lib/connectors_shared.rb
CHANGED
@@ -8,5 +8,6 @@ require 'connectors_shared/constants'
|
|
8
8
|
require 'connectors_shared/errors'
|
9
9
|
require 'connectors_shared/exception_tracking'
|
10
10
|
require 'connectors_shared/extension_mapping_util'
|
11
|
+
require 'connectors_shared/job_status'
|
11
12
|
require 'connectors_shared/logger'
|
12
13
|
require 'connectors_shared/monitor'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
module EnterpriseSearch
|
8
|
+
class ExceptionTracking
|
9
|
+
def self.capture_message(message, context = {})
|
10
|
+
AppConfig.connectors_logger.error { "Error: #{message}. Context: #{context.inspect}" }
|
11
|
+
|
12
|
+
# When the method is called from a rescue block, our return value may leak outside of its
|
13
|
+
# intended scope, so let's explicitly return nil here to be safe.
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.log_exception(exception, message = nil, context: nil, logger: AppConfig.connectors_logger)
|
18
|
+
logger.error { message } if message
|
19
|
+
logger.error { generate_stack_trace(exception) }
|
20
|
+
logger.error { "Context: #{context.inspect}" } if context
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.generate_error_message(exception, message, context)
|
24
|
+
context = { :message_id => exception.id }.merge(context || {}) if exception.respond_to?(:id)
|
25
|
+
context_message = context && "Context: #{context.inspect}"
|
26
|
+
['Exception', message, exception.class.to_s, exception.message, context_message]
|
27
|
+
.compact
|
28
|
+
.map { |part| part.to_s.dup.force_encoding('UTF-8') }
|
29
|
+
.join(': ')
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.generate_stack_trace(exception)
|
33
|
+
full_message = exception.full_message
|
34
|
+
|
35
|
+
cause = exception
|
36
|
+
while cause.cause != cause && (cause = cause.cause)
|
37
|
+
full_message << "Cause:\n#{cause.full_message}"
|
38
|
+
end
|
39
|
+
|
40
|
+
full_message.dup.force_encoding('UTF-8')
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|