connectors_service 8.5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +93 -0
- data/NOTICE.txt +2 -0
- data/bin/connectors_service +4 -0
- data/bin/list_connectors +4 -0
- data/config/connectors.yml +25 -0
- data/lib/app/app.rb +25 -0
- data/lib/app/config.rb +132 -0
- data/lib/app/console_app.rb +278 -0
- data/lib/app/dispatcher.rb +121 -0
- data/lib/app/menu.rb +104 -0
- data/lib/app/preflight_check.rb +134 -0
- data/lib/app/version.rb +10 -0
- data/lib/connectors/base/adapter.rb +119 -0
- data/lib/connectors/base/connector.rb +57 -0
- data/lib/connectors/base/custom_client.rb +111 -0
- data/lib/connectors/connector_status.rb +31 -0
- data/lib/connectors/crawler/scheduler.rb +32 -0
- data/lib/connectors/example/connector.rb +57 -0
- data/lib/connectors/example/example_attachments/first_attachment.txt +1 -0
- data/lib/connectors/example/example_attachments/second_attachment.txt +1 -0
- data/lib/connectors/example/example_attachments/third_attachment.txt +1 -0
- data/lib/connectors/gitlab/adapter.rb +50 -0
- data/lib/connectors/gitlab/connector.rb +67 -0
- data/lib/connectors/gitlab/custom_client.rb +44 -0
- data/lib/connectors/gitlab/extractor.rb +69 -0
- data/lib/connectors/mongodb/connector.rb +138 -0
- data/lib/connectors/registry.rb +52 -0
- data/lib/connectors/sync_status.rb +21 -0
- data/lib/connectors.rb +16 -0
- data/lib/connectors_app/// +13 -0
- data/lib/connectors_service.rb +24 -0
- data/lib/connectors_utility.rb +16 -0
- data/lib/core/configuration.rb +48 -0
- data/lib/core/connector_settings.rb +142 -0
- data/lib/core/elastic_connector_actions.rb +269 -0
- data/lib/core/heartbeat.rb +32 -0
- data/lib/core/native_scheduler.rb +24 -0
- data/lib/core/output_sink/base_sink.rb +33 -0
- data/lib/core/output_sink/combined_sink.rb +38 -0
- data/lib/core/output_sink/console_sink.rb +51 -0
- data/lib/core/output_sink/es_sink.rb +74 -0
- data/lib/core/output_sink.rb +13 -0
- data/lib/core/scheduler.rb +158 -0
- data/lib/core/single_scheduler.rb +29 -0
- data/lib/core/sync_job_runner.rb +111 -0
- data/lib/core.rb +16 -0
- data/lib/list_connectors.rb +22 -0
- data/lib/stubs/app_config.rb +35 -0
- data/lib/stubs/connectors/stats.rb +35 -0
- data/lib/stubs/service_type.rb +13 -0
- data/lib/utility/constants.rb +20 -0
- data/lib/utility/cron.rb +81 -0
- data/lib/utility/elasticsearch/index/language_data.yml +111 -0
- data/lib/utility/elasticsearch/index/mappings.rb +104 -0
- data/lib/utility/elasticsearch/index/text_analysis_settings.rb +226 -0
- data/lib/utility/environment.rb +33 -0
- data/lib/utility/errors.rb +132 -0
- data/lib/utility/es_client.rb +84 -0
- data/lib/utility/exception_tracking.rb +64 -0
- data/lib/utility/extension_mapping_util.rb +123 -0
- data/lib/utility/logger.rb +84 -0
- data/lib/utility/middleware/basic_auth.rb +27 -0
- data/lib/utility/middleware/bearer_auth.rb +27 -0
- data/lib/utility/middleware/restrict_hostnames.rb +73 -0
- data/lib/utility.rb +16 -0
- metadata +487 -0
@@ -0,0 +1,123 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'active_support/core_ext/hash'
|
10
|
+
|
11
|
+
module Utility
|
12
|
+
class ExtensionMappingUtil
|
13
|
+
@extension_to_mime = {
|
14
|
+
:doc => %w[
|
15
|
+
application/x-tika-msoffice
|
16
|
+
application/msword
|
17
|
+
].freeze,
|
18
|
+
:docx => %w[
|
19
|
+
application/x-tika-ooxml
|
20
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
21
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.template
|
22
|
+
application/vnd.ms-word.template.macroenabled.12
|
23
|
+
application/vnd.ms-word.document.macroenabled.12
|
24
|
+
].freeze,
|
25
|
+
:html => %w[
|
26
|
+
text/html
|
27
|
+
application/xhtml+xml
|
28
|
+
].freeze,
|
29
|
+
:odt => %w[
|
30
|
+
application/x-vnd.oasis.opendocument.graphics-template
|
31
|
+
application/vnd.sun.xml.writer application/x-vnd.oasis.opendocument.text
|
32
|
+
application/x-vnd.oasis.opendocument.text-web
|
33
|
+
application/x-vnd.oasis.opendocument.spreadsheet-template
|
34
|
+
application/vnd.oasis.opendocument.formula-template
|
35
|
+
application/vnd.oasis.opendocument.presentation
|
36
|
+
application/vnd.oasis.opendocument.image-template
|
37
|
+
application/x-vnd.oasis.opendocument.graphics
|
38
|
+
application/vnd.oasis.opendocument.chart-template
|
39
|
+
application/vnd.oasis.opendocument.presentation-template
|
40
|
+
application/x-vnd.oasis.opendocument.image-template
|
41
|
+
application/vnd.oasis.opendocument.formula
|
42
|
+
application/x-vnd.oasis.opendocument.image
|
43
|
+
application/vnd.oasis.opendocument.spreadsheet-template
|
44
|
+
application/x-vnd.oasis.opendocument.chart-template
|
45
|
+
application/x-vnd.oasis.opendocument.formula
|
46
|
+
application/vnd.oasis.opendocument.spreadsheet
|
47
|
+
application/vnd.oasis.opendocument.text-web
|
48
|
+
application/vnd.oasis.opendocument.text-template
|
49
|
+
application/vnd.oasis.opendocument.text
|
50
|
+
application/x-vnd.oasis.opendocument.formula-template
|
51
|
+
application/x-vnd.oasis.opendocument.spreadsheet
|
52
|
+
application/x-vnd.oasis.opendocument.chart
|
53
|
+
application/vnd.oasis.opendocument.text-master
|
54
|
+
application/x-vnd.oasis.opendocument.text-master
|
55
|
+
application/x-vnd.oasis.opendocument.text-template
|
56
|
+
application/vnd.oasis.opendocument.graphics
|
57
|
+
application/vnd.oasis.opendocument.graphics-template
|
58
|
+
application/x-vnd.oasis.opendocument.presentation
|
59
|
+
application/vnd.oasis.opendocument.image
|
60
|
+
application/x-vnd.oasis.opendocument.presentation-template
|
61
|
+
application/vnd.oasis.opendocument.chart
|
62
|
+
].freeze,
|
63
|
+
:one => %w[
|
64
|
+
application/onenote
|
65
|
+
application/msonenote
|
66
|
+
].freeze,
|
67
|
+
:pdf => %w[
|
68
|
+
application/pdf
|
69
|
+
].freeze,
|
70
|
+
:ppt => %w[
|
71
|
+
application/vnd.ms-powerpoint
|
72
|
+
].freeze,
|
73
|
+
:pptx => %w[
|
74
|
+
application/vnd.openxmlformats-officedocument.presentationml.presentation
|
75
|
+
application/vnd.ms-powerpoint.presentation.macroenabled.12
|
76
|
+
application/vnd.openxmlformats-officedocument.presentationml.template
|
77
|
+
application/vnd.ms-powerpoint.slideshow.macroenabled.12
|
78
|
+
application/vnd.ms-powerpoint.addin.macroenabled.12
|
79
|
+
application/vnd.openxmlformats-officedocument.presentationml.slideshow
|
80
|
+
].freeze,
|
81
|
+
:rtf => %w[
|
82
|
+
message/richtext
|
83
|
+
text/richtext
|
84
|
+
text/rtf
|
85
|
+
application/rtf
|
86
|
+
].freeze,
|
87
|
+
:txt => %w[
|
88
|
+
text/plain
|
89
|
+
].freeze,
|
90
|
+
:xls => %w[
|
91
|
+
application/x-tika-msoffice
|
92
|
+
application/vnd.ms-excel
|
93
|
+
application/vnd.ms-excel.sheet.3
|
94
|
+
application/vnd.ms-excel.sheet.2
|
95
|
+
application/vnd.ms-excel.workspace.3
|
96
|
+
application/vnd.ms-excel.workspace.4
|
97
|
+
application/vnd.ms-excel.sheet.4
|
98
|
+
].freeze,
|
99
|
+
:xlsx => %w[
|
100
|
+
application/x-tika-ooxml
|
101
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
102
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.template
|
103
|
+
application/vnd.ms-excel.addin.macroenabled.12
|
104
|
+
application/vnd.ms-excel.template.macroenabled.12
|
105
|
+
application/vnd.ms-excel.sheet.macroenabled.12
|
106
|
+
].freeze
|
107
|
+
}.with_indifferent_access.freeze
|
108
|
+
|
109
|
+
def self.mime_to_extension
|
110
|
+
@mime_to_extension ||= @extension_to_mime.each_with_object({}) do |(key, values), memo|
|
111
|
+
values.each { |value| memo[value] = key.to_s }
|
112
|
+
end.with_indifferent_access.freeze
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.get_extension(mime_type)
|
116
|
+
mime_to_extension[mime_type.to_s.downcase]
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.get_mime_types(extension)
|
120
|
+
@extension_to_mime[extension.to_s.downcase]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'logger'
|
8
|
+
require 'active_support/core_ext/module'
|
9
|
+
require 'active_support/core_ext/string/filters'
|
10
|
+
require 'ecs_logging/logger'
|
11
|
+
|
12
|
+
module Utility
|
13
|
+
class Logger
|
14
|
+
SUPPORTED_LOG_LEVELS = %i[fatal error warn info debug].freeze
|
15
|
+
MAX_SHORT_MESSAGE_LENGTH = 1000.freeze
|
16
|
+
|
17
|
+
class << self
|
18
|
+
|
19
|
+
delegate :formatter, :formatter=, :to => :logger
|
20
|
+
|
21
|
+
def level=(log_level)
|
22
|
+
logger.level = log_level
|
23
|
+
end
|
24
|
+
|
25
|
+
def logger
|
26
|
+
@logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
|
27
|
+
end
|
28
|
+
|
29
|
+
SUPPORTED_LOG_LEVELS.each do |level|
|
30
|
+
define_method(level) do |message|
|
31
|
+
if logger.is_a?(EcsLogging::Logger)
|
32
|
+
logger.public_send(level, message, extra_ecs_fields)
|
33
|
+
else
|
34
|
+
logger.public_send(level, message)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def log_stacktrace(stacktrace)
|
40
|
+
if logger.is_a?(EcsLogging::Logger)
|
41
|
+
logger.error(nil, extra_ecs_fields.merge(:error => { :stack_trace => stacktrace }))
|
42
|
+
else
|
43
|
+
logger.error(stacktrace)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def error_with_backtrace(message: nil, exception: nil, prog_name: nil)
|
48
|
+
logger.error(prog_name) { message } if message
|
49
|
+
logger.error exception.message if exception
|
50
|
+
logger.error exception.backtrace.join("\n") if exception
|
51
|
+
end
|
52
|
+
|
53
|
+
def new_line
|
54
|
+
logger.info("\n")
|
55
|
+
end
|
56
|
+
|
57
|
+
def generate_trace_id
|
58
|
+
SecureRandom.uuid
|
59
|
+
end
|
60
|
+
|
61
|
+
def abbreviated_message(message)
|
62
|
+
message.gsub(/\s+/, ' ').strip.truncate(MAX_SHORT_MESSAGE_LENGTH)
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def extra_ecs_fields
|
68
|
+
{
|
69
|
+
:labels => { :index_date => Time.now.strftime('%Y.%m.%d') },
|
70
|
+
:log => { :logger => logger.progname },
|
71
|
+
:service => {
|
72
|
+
:type => 'connectors-ruby',
|
73
|
+
:version => Settings.version
|
74
|
+
},
|
75
|
+
:process => {
|
76
|
+
:pid => Process.pid,
|
77
|
+
:name => $PROGRAM_NAME,
|
78
|
+
:thread => Thread.current.object_id
|
79
|
+
}
|
80
|
+
}
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
module Middleware
|
11
|
+
class BasicAuth
|
12
|
+
AUTHORIZATION = 'Authorization'
|
13
|
+
|
14
|
+
attr_reader :basic_auth_token
|
15
|
+
|
16
|
+
def initialize(app = nil, options = {})
|
17
|
+
@app = app
|
18
|
+
@basic_auth_token = options.fetch(:basic_auth_token)
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(env)
|
22
|
+
env.request_headers[AUTHORIZATION] = "Basic #{basic_auth_token}"
|
23
|
+
@app.call(env)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Utility
|
10
|
+
module Middleware
|
11
|
+
class BearerAuth
|
12
|
+
AUTHORIZATION = 'Authorization'
|
13
|
+
|
14
|
+
attr_reader :bearer_auth_token
|
15
|
+
|
16
|
+
def initialize(app = nil, options = {})
|
17
|
+
@app = app
|
18
|
+
@bearer_auth_token = options.fetch(:bearer_auth_token)
|
19
|
+
end
|
20
|
+
|
21
|
+
def call(env)
|
22
|
+
env.request_headers[AUTHORIZATION] = "Bearer #{bearer_auth_token}"
|
23
|
+
@app.call(env)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'faraday/middleware'
|
10
|
+
require 'resolv'
|
11
|
+
|
12
|
+
require 'utility/errors'
|
13
|
+
require 'utility/logger'
|
14
|
+
|
15
|
+
module Utility
|
16
|
+
module Middleware
|
17
|
+
class RestrictHostnames < Faraday::Middleware
|
18
|
+
class AddressNotAllowed < Utility::ClientError; end
|
19
|
+
URL_PATTERN = /\Ahttp/
|
20
|
+
|
21
|
+
attr_reader :allowed_hosts, :allowed_ips
|
22
|
+
|
23
|
+
def initialize(app = nil, options = {})
|
24
|
+
super(app)
|
25
|
+
@allowed_hosts = options[:allowed_hosts]
|
26
|
+
@allowed_ips = ips_from_hosts(@allowed_hosts)
|
27
|
+
end
|
28
|
+
|
29
|
+
def call(env)
|
30
|
+
raise AddressNotAllowed.new("Address not allowed for #{env[:url]}") if denied?(env)
|
31
|
+
@app.call(env)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def ips_from_hosts(hosts)
|
37
|
+
hosts&.flat_map do |host|
|
38
|
+
if URL_PATTERN.match(host)
|
39
|
+
lookup_ips(Addressable::URI.parse(host).hostname)
|
40
|
+
elsif Resolv::IPv4::Regex.match(host) || Resolv::IPv6::Regex.match(host)
|
41
|
+
IPAddr.new(host)
|
42
|
+
else
|
43
|
+
lookup_ips(host)
|
44
|
+
end
|
45
|
+
end || []
|
46
|
+
end
|
47
|
+
|
48
|
+
def denied?(env)
|
49
|
+
requested_ips = lookup_ips(env[:url].hostname)
|
50
|
+
no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) }
|
51
|
+
return false unless no_match
|
52
|
+
Utility::Logger.warn("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \
|
53
|
+
"allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}. Retrying.")
|
54
|
+
@allowed_ips = ips_from_hosts(@allowed_hosts) # maybe the IP has changed for an allowed host. Re-do allowed_hosts DNS lookup
|
55
|
+
no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) }
|
56
|
+
Utility::Logger.error("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \
|
57
|
+
"allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}") if no_match
|
58
|
+
no_match
|
59
|
+
end
|
60
|
+
|
61
|
+
def lookup_ips(hostname)
|
62
|
+
addr_infos(hostname).map { |a| IPAddr.new(a.ip_address) }
|
63
|
+
end
|
64
|
+
|
65
|
+
def addr_infos(hostname)
|
66
|
+
Addrinfo.getaddrinfo(hostname, nil, :UNSPEC, :STREAM)
|
67
|
+
rescue SocketError
|
68
|
+
# In case of invalid hostname, return an empty list of addresses
|
69
|
+
[]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/utility.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'utility/constants'
|
8
|
+
require 'utility/cron'
|
9
|
+
require 'utility/errors'
|
10
|
+
require 'utility/es_client'
|
11
|
+
require 'utility/environment'
|
12
|
+
require 'utility/exception_tracking'
|
13
|
+
require 'utility/extension_mapping_util'
|
14
|
+
require 'utility/logger'
|
15
|
+
require 'utility/elasticsearch/index/mappings'
|
16
|
+
require 'utility/elasticsearch/index/text_analysis_settings'
|