connectors_service 8.5.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +93 -0
- data/NOTICE.txt +2 -0
- data/bin/connectors_service +4 -0
- data/bin/list_connectors +4 -0
- data/config/connectors.yml +25 -0
- data/lib/app/app.rb +25 -0
- data/lib/app/config.rb +132 -0
- data/lib/app/console_app.rb +278 -0
- data/lib/app/dispatcher.rb +121 -0
- data/lib/app/menu.rb +104 -0
- data/lib/app/preflight_check.rb +134 -0
- data/lib/app/version.rb +10 -0
- data/lib/connectors/base/adapter.rb +119 -0
- data/lib/connectors/base/connector.rb +57 -0
- data/lib/connectors/base/custom_client.rb +111 -0
- data/lib/connectors/connector_status.rb +31 -0
- data/lib/connectors/crawler/scheduler.rb +32 -0
- data/lib/connectors/example/connector.rb +57 -0
- data/lib/connectors/example/example_attachments/first_attachment.txt +1 -0
- data/lib/connectors/example/example_attachments/second_attachment.txt +1 -0
- data/lib/connectors/example/example_attachments/third_attachment.txt +1 -0
- data/lib/connectors/gitlab/adapter.rb +50 -0
- data/lib/connectors/gitlab/connector.rb +67 -0
- data/lib/connectors/gitlab/custom_client.rb +44 -0
- data/lib/connectors/gitlab/extractor.rb +69 -0
- data/lib/connectors/mongodb/connector.rb +138 -0
- data/lib/connectors/registry.rb +52 -0
- data/lib/connectors/sync_status.rb +21 -0
- data/lib/connectors.rb +16 -0
- data/lib/connectors_app/// +13 -0
- data/lib/connectors_service.rb +24 -0
- data/lib/connectors_utility.rb +16 -0
- data/lib/core/configuration.rb +48 -0
- data/lib/core/connector_settings.rb +142 -0
- data/lib/core/elastic_connector_actions.rb +269 -0
- data/lib/core/heartbeat.rb +32 -0
- data/lib/core/native_scheduler.rb +24 -0
- data/lib/core/output_sink/base_sink.rb +33 -0
- data/lib/core/output_sink/combined_sink.rb +38 -0
- data/lib/core/output_sink/console_sink.rb +51 -0
- data/lib/core/output_sink/es_sink.rb +74 -0
- data/lib/core/output_sink.rb +13 -0
- data/lib/core/scheduler.rb +158 -0
- data/lib/core/single_scheduler.rb +29 -0
- data/lib/core/sync_job_runner.rb +111 -0
- data/lib/core.rb +16 -0
- data/lib/list_connectors.rb +22 -0
- data/lib/stubs/app_config.rb +35 -0
- data/lib/stubs/connectors/stats.rb +35 -0
- data/lib/stubs/service_type.rb +13 -0
- data/lib/utility/constants.rb +20 -0
- data/lib/utility/cron.rb +81 -0
- data/lib/utility/elasticsearch/index/language_data.yml +111 -0
- data/lib/utility/elasticsearch/index/mappings.rb +104 -0
- data/lib/utility/elasticsearch/index/text_analysis_settings.rb +226 -0
- data/lib/utility/environment.rb +33 -0
- data/lib/utility/errors.rb +132 -0
- data/lib/utility/es_client.rb +84 -0
- data/lib/utility/exception_tracking.rb +64 -0
- data/lib/utility/extension_mapping_util.rb +123 -0
- data/lib/utility/logger.rb +84 -0
- data/lib/utility/middleware/basic_auth.rb +27 -0
- data/lib/utility/middleware/bearer_auth.rb +27 -0
- data/lib/utility/middleware/restrict_hostnames.rb +73 -0
- data/lib/utility.rb +16 -0
- metadata +487 -0
@@ -0,0 +1,123 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
class ExtensionMappingUtil
|
13
|
+
@extension_to_mime = {
|
14
|
+
:doc => %w[
|
15
|
+
application/x-tika-msoffice
|
16
|
+
application/msword
|
17
|
+
].freeze,
|
18
|
+
:docx => %w[
|
19
|
+
application/x-tika-ooxml
|
20
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
21
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.template
|
22
|
+
application/vnd.ms-word.template.macroenabled.12
|
23
|
+
application/vnd.ms-word.document.macroenabled.12
|
24
|
+
].freeze,
|
25
|
+
:html => %w[
|
26
|
+
text/html
|
27
|
+
application/xhtml+xml
|
28
|
+
].freeze,
|
29
|
+
:odt => %w[
|
30
|
+
application/x-vnd.oasis.opendocument.graphics-template
|
31
|
+
application/vnd.sun.xml.writer application/x-vnd.oasis.opendocument.text
|
32
|
+
application/x-vnd.oasis.opendocument.text-web
|
33
|
+
application/x-vnd.oasis.opendocument.spreadsheet-template
|
34
|
+
application/vnd.oasis.opendocument.formula-template
|
35
|
+
application/vnd.oasis.opendocument.presentation
|
36
|
+
application/vnd.oasis.opendocument.image-template
|
37
|
+
application/x-vnd.oasis.opendocument.graphics
|
38
|
+
application/vnd.oasis.opendocument.chart-template
|
39
|
+
application/vnd.oasis.opendocument.presentation-template
|
40
|
+
application/x-vnd.oasis.opendocument.image-template
|
41
|
+
application/vnd.oasis.opendocument.formula
|
42
|
+
application/x-vnd.oasis.opendocument.image
|
43
|
+
application/vnd.oasis.opendocument.spreadsheet-template
|
44
|
+
application/x-vnd.oasis.opendocument.chart-template
|
45
|
+
application/x-vnd.oasis.opendocument.formula
|
46
|
+
application/vnd.oasis.opendocument.spreadsheet
|
47
|
+
application/vnd.oasis.opendocument.text-web
|
48
|
+
application/vnd.oasis.opendocument.text-template
|
49
|
+
application/vnd.oasis.opendocument.text
|
50
|
+
application/x-vnd.oasis.opendocument.formula-template
|
51
|
+
application/x-vnd.oasis.opendocument.spreadsheet
|
52
|
+
application/x-vnd.oasis.opendocument.chart
|
53
|
+
application/vnd.oasis.opendocument.text-master
|
54
|
+
application/x-vnd.oasis.opendocument.text-master
|
55
|
+
application/x-vnd.oasis.opendocument.text-template
|
56
|
+
application/vnd.oasis.opendocument.graphics
|
57
|
+
application/vnd.oasis.opendocument.graphics-template
|
58
|
+
application/x-vnd.oasis.opendocument.presentation
|
59
|
+
application/vnd.oasis.opendocument.image
|
60
|
+
application/x-vnd.oasis.opendocument.presentation-template
|
61
|
+
application/vnd.oasis.opendocument.chart
|
62
|
+
].freeze,
|
63
|
+
:one => %w[
|
64
|
+
application/onenote
|
65
|
+
application/msonenote
|
66
|
+
].freeze,
|
67
|
+
:pdf => %w[
|
68
|
+
application/pdf
|
69
|
+
].freeze,
|
70
|
+
:ppt => %w[
|
71
|
+
application/vnd.ms-powerpoint
|
72
|
+
].freeze,
|
73
|
+
:pptx => %w[
|
74
|
+
application/vnd.openxmlformats-officedocument.presentationml.presentation
|
75
|
+
application/vnd.ms-powerpoint.presentation.macroenabled.12
|
76
|
+
application/vnd.openxmlformats-officedocument.presentationml.template
|
77
|
+
application/vnd.ms-powerpoint.slideshow.macroenabled.12
|
78
|
+
application/vnd.ms-powerpoint.addin.macroenabled.12
|
79
|
+
application/vnd.openxmlformats-officedocument.presentationml.slideshow
|
80
|
+
].freeze,
|
81
|
+
:rtf => %w[
|
82
|
+
message/richtext
|
83
|
+
text/richtext
|
84
|
+
text/rtf
|
85
|
+
application/rtf
|
86
|
+
].freeze,
|
87
|
+
:txt => %w[
|
88
|
+
text/plain
|
89
|
+
].freeze,
|
90
|
+
:xls => %w[
|
91
|
+
application/x-tika-msoffice
|
92
|
+
application/vnd.ms-excel
|
93
|
+
application/vnd.ms-excel.sheet.3
|
94
|
+
application/vnd.ms-excel.sheet.2
|
95
|
+
application/vnd.ms-excel.workspace.3
|
96
|
+
application/vnd.ms-excel.workspace.4
|
97
|
+
application/vnd.ms-excel.sheet.4
|
98
|
+
].freeze,
|
99
|
+
:xlsx => %w[
|
100
|
+
application/x-tika-ooxml
|
101
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
102
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.template
|
103
|
+
application/vnd.ms-excel.addin.macroenabled.12
|
104
|
+
application/vnd.ms-excel.template.macroenabled.12
|
105
|
+
application/vnd.ms-excel.sheet.macroenabled.12
|
106
|
+
].freeze
|
107
|
+
}.with_indifferent_access.freeze
|
108
|
+
|
109
|
+
def self.mime_to_extension
|
110
|
+
@mime_to_extension ||= @extension_to_mime.each_with_object({}) do |(key, values), memo|
|
111
|
+
values.each { |value| memo[value] = key.to_s }
|
112
|
+
end.with_indifferent_access.freeze
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.get_extension(mime_type)
|
116
|
+
mime_to_extension[mime_type.to_s.downcase]
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.get_mime_types(extension)
|
120
|
+
@extension_to_mime[extension.to_s.downcase]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'logger'
|
8
|
+
require 'active_support/core_ext/module'
|
9
|
+
require 'active_support/core_ext/string/filters'
|
10
|
+
require 'ecs_logging/logger'
|
11
|
+
|
12
|
+
module Utility
|
13
|
+
class Logger
|
14
|
+
SUPPORTED_LOG_LEVELS = %i[fatal error warn info debug].freeze
|
15
|
+
MAX_SHORT_MESSAGE_LENGTH = 1000.freeze
|
16
|
+
|
17
|
+
class << self
|
18
|
+
|
19
|
+
delegate :formatter, :formatter=, :to => :logger
|
20
|
+
|
21
|
+
def level=(log_level)
|
22
|
+
logger.level = log_level
|
23
|
+
end
|
24
|
+
|
25
|
+
def logger
|
26
|
+
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
|
+
end
|
28
|
+
|
29
|
+
SUPPORTED_LOG_LEVELS.each do |level|
|
30
|
+
define_method(level) do |message|
|
31
|
+
if logger.is_a?(EcsLogging::Logger)
|
32
|
+
logger.public_send(level, message, extra_ecs_fields)
|
33
|
+
else
|
34
|
+
logger.public_send(level, message)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def log_stacktrace(stacktrace)
|
40
|
+
if logger.is_a?(EcsLogging::Logger)
|
41
|
+
logger.error(nil, extra_ecs_fields.merge(:error => { :stack_trace => stacktrace }))
|
42
|
+
else
|
43
|
+
logger.error(stacktrace)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def error_with_backtrace(message: nil, exception: nil, prog_name: nil)
|
48
|
+
logger.error(prog_name) { message } if message
|
49
|
+
logger.error exception.message if exception
|
50
|
+
logger.error exception.backtrace.join("\n") if exception
|
51
|
+
end
|
52
|
+
|
53
|
+
def new_line
|
54
|
+
logger.info("\n")
|
55
|
+
end
|
56
|
+
|
57
|
+
def generate_trace_id
|
58
|
+
SecureRandom.uuid
|
59
|
+
end
|
60
|
+
|
61
|
+
def abbreviated_message(message)
|
62
|
+
message.gsub(/\s+/, ' ').strip.truncate(MAX_SHORT_MESSAGE_LENGTH)
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def extra_ecs_fields
|
68
|
+
{
|
69
|
+
:labels => { :index_date => Time.now.strftime('%Y.%m.%d') },
|
70
|
+
:log => { :logger => logger.progname },
|
71
|
+
:service => {
|
72
|
+
:type => 'connectors-ruby',
|
73
|
+
:version => Settings.version
|
74
|
+
},
|
75
|
+
:process => {
|
76
|
+
:pid => Process.pid,
|
77
|
+
:name => $PROGRAM_NAME,
|
78
|
+
:thread => Thread.current.object_id
|
79
|
+
}
|
80
|
+
}
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
module Middleware
|
11
|
+
class BasicAuth
|
12
|
+
AUTHORIZATION = 'Authorization'
|
13
|
+
|
14
|
+
attr_reader :basic_auth_token
|
15
|
+
|
16
|
+
def initialize(app = nil, options = {})
|
17
|
+
@app = app
|
18
|
+
@basic_auth_token = options.fetch(:basic_auth_token)
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(env)
|
22
|
+
env.request_headers[AUTHORIZATION] = "Basic #{basic_auth_token}"
|
23
|
+
@app.call(env)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
module Middleware
|
11
|
+
class BearerAuth
|
12
|
+
AUTHORIZATION = 'Authorization'
|
13
|
+
|
14
|
+
attr_reader :bearer_auth_token
|
15
|
+
|
16
|
+
def initialize(app = nil, options = {})
|
17
|
+
@app = app
|
18
|
+
@bearer_auth_token = options.fetch(:bearer_auth_token)
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(env)
|
22
|
+
env.request_headers[AUTHORIZATION] = "Bearer #{bearer_auth_token}"
|
23
|
+
@app.call(env)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'faraday/middleware'
|
10
|
+
require 'resolv'
|
11
|
+
|
12
|
+
require 'utility/errors'
|
13
|
+
require 'utility/logger'
|
14
|
+
|
15
|
+
module Utility
|
16
|
+
module Middleware
|
17
|
+
class RestrictHostnames < Faraday::Middleware
|
18
|
+
class AddressNotAllowed < Utility::ClientError; end
|
19
|
+
URL_PATTERN = /\Ahttp/
|
20
|
+
|
21
|
+
attr_reader :allowed_hosts, :allowed_ips
|
22
|
+
|
23
|
+
def initialize(app = nil, options = {})
|
24
|
+
super(app)
|
25
|
+
@allowed_hosts = options[:allowed_hosts]
|
26
|
+
@allowed_ips = ips_from_hosts(@allowed_hosts)
|
27
|
+
end
|
28
|
+
|
29
|
+
def call(env)
|
30
|
+
raise AddressNotAllowed.new("Address not allowed for #{env[:url]}") if denied?(env)
|
31
|
+
@app.call(env)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def ips_from_hosts(hosts)
|
37
|
+
hosts&.flat_map do |host|
|
38
|
+
if URL_PATTERN.match(host)
|
39
|
+
lookup_ips(Addressable::URI.parse(host).hostname)
|
40
|
+
elsif Resolv::IPv4::Regex.match(host) || Resolv::IPv6::Regex.match(host)
|
41
|
+
IPAddr.new(host)
|
42
|
+
else
|
43
|
+
lookup_ips(host)
|
44
|
+
end
|
45
|
+
end || []
|
46
|
+
end
|
47
|
+
|
48
|
+
def denied?(env)
|
49
|
+
requested_ips = lookup_ips(env[:url].hostname)
|
50
|
+
no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) }
|
51
|
+
return false unless no_match
|
52
|
+
Utility::Logger.warn("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \
|
53
|
+
"allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}. Retrying.")
|
54
|
+
@allowed_ips = ips_from_hosts(@allowed_hosts) # maybe the IP has changed for an allowed host. Re-do allowed_hosts DNS lookup
|
55
|
+
no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) }
|
56
|
+
Utility::Logger.error("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \
|
57
|
+
"allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}") if no_match
|
58
|
+
no_match
|
59
|
+
end
|
60
|
+
|
61
|
+
def lookup_ips(hostname)
|
62
|
+
addr_infos(hostname).map { |a| IPAddr.new(a.ip_address) }
|
63
|
+
end
|
64
|
+
|
65
|
+
def addr_infos(hostname)
|
66
|
+
Addrinfo.getaddrinfo(hostname, nil, :UNSPEC, :STREAM)
|
67
|
+
rescue SocketError
|
68
|
+
# In case of invalid hostname, return an empty list of addresses
|
69
|
+
[]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/utility.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'utility/constants'
|
8
|
+
require 'utility/cron'
|
9
|
+
require 'utility/errors'
|
10
|
+
require 'utility/es_client'
|
11
|
+
require 'utility/environment'
|
12
|
+
require 'utility/exception_tracking'
|
13
|
+
require 'utility/extension_mapping_util'
|
14
|
+
require 'utility/logger'
|
15
|
+
require 'utility/elasticsearch/index/mappings'
|
16
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|