connectors_service 8.5.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +93 -0
  3. data/NOTICE.txt +2 -0
  4. data/bin/connectors_service +4 -0
  5. data/bin/list_connectors +4 -0
  6. data/config/connectors.yml +25 -0
  7. data/lib/app/app.rb +25 -0
  8. data/lib/app/config.rb +132 -0
  9. data/lib/app/console_app.rb +278 -0
  10. data/lib/app/dispatcher.rb +121 -0
  11. data/lib/app/menu.rb +104 -0
  12. data/lib/app/preflight_check.rb +134 -0
  13. data/lib/app/version.rb +10 -0
  14. data/lib/connectors/base/adapter.rb +119 -0
  15. data/lib/connectors/base/connector.rb +57 -0
  16. data/lib/connectors/base/custom_client.rb +111 -0
  17. data/lib/connectors/connector_status.rb +31 -0
  18. data/lib/connectors/crawler/scheduler.rb +32 -0
  19. data/lib/connectors/example/connector.rb +57 -0
  20. data/lib/connectors/example/example_attachments/first_attachment.txt +1 -0
  21. data/lib/connectors/example/example_attachments/second_attachment.txt +1 -0
  22. data/lib/connectors/example/example_attachments/third_attachment.txt +1 -0
  23. data/lib/connectors/gitlab/adapter.rb +50 -0
  24. data/lib/connectors/gitlab/connector.rb +67 -0
  25. data/lib/connectors/gitlab/custom_client.rb +44 -0
  26. data/lib/connectors/gitlab/extractor.rb +69 -0
  27. data/lib/connectors/mongodb/connector.rb +138 -0
  28. data/lib/connectors/registry.rb +52 -0
  29. data/lib/connectors/sync_status.rb +21 -0
  30. data/lib/connectors.rb +16 -0
  31. data/lib/connectors_app/// +13 -0
  32. data/lib/connectors_service.rb +24 -0
  33. data/lib/connectors_utility.rb +16 -0
  34. data/lib/core/configuration.rb +48 -0
  35. data/lib/core/connector_settings.rb +142 -0
  36. data/lib/core/elastic_connector_actions.rb +269 -0
  37. data/lib/core/heartbeat.rb +32 -0
  38. data/lib/core/native_scheduler.rb +24 -0
  39. data/lib/core/output_sink/base_sink.rb +33 -0
  40. data/lib/core/output_sink/combined_sink.rb +38 -0
  41. data/lib/core/output_sink/console_sink.rb +51 -0
  42. data/lib/core/output_sink/es_sink.rb +74 -0
  43. data/lib/core/output_sink.rb +13 -0
  44. data/lib/core/scheduler.rb +158 -0
  45. data/lib/core/single_scheduler.rb +29 -0
  46. data/lib/core/sync_job_runner.rb +111 -0
  47. data/lib/core.rb +16 -0
  48. data/lib/list_connectors.rb +22 -0
  49. data/lib/stubs/app_config.rb +35 -0
  50. data/lib/stubs/connectors/stats.rb +35 -0
  51. data/lib/stubs/service_type.rb +13 -0
  52. data/lib/utility/constants.rb +20 -0
  53. data/lib/utility/cron.rb +81 -0
  54. data/lib/utility/elasticsearch/index/language_data.yml +111 -0
  55. data/lib/utility/elasticsearch/index/mappings.rb +104 -0
  56. data/lib/utility/elasticsearch/index/text_analysis_settings.rb +226 -0
  57. data/lib/utility/environment.rb +33 -0
  58. data/lib/utility/errors.rb +132 -0
  59. data/lib/utility/es_client.rb +84 -0
  60. data/lib/utility/exception_tracking.rb +64 -0
  61. data/lib/utility/extension_mapping_util.rb +123 -0
  62. data/lib/utility/logger.rb +84 -0
  63. data/lib/utility/middleware/basic_auth.rb +27 -0
  64. data/lib/utility/middleware/bearer_auth.rb +27 -0
  65. data/lib/utility/middleware/restrict_hostnames.rb +73 -0
  66. data/lib/utility.rb +16 -0
  67. metadata +487 -0
@@ -0,0 +1,123 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/hash'
10
+
11
+ module Utility
12
+ class ExtensionMappingUtil
13
+ @extension_to_mime = {
14
+ :doc => %w[
15
+ application/x-tika-msoffice
16
+ application/msword
17
+ ].freeze,
18
+ :docx => %w[
19
+ application/x-tika-ooxml
20
+ application/vnd.openxmlformats-officedocument.wordprocessingml.document
21
+ application/vnd.openxmlformats-officedocument.wordprocessingml.template
22
+ application/vnd.ms-word.template.macroenabled.12
23
+ application/vnd.ms-word.document.macroenabled.12
24
+ ].freeze,
25
+ :html => %w[
26
+ text/html
27
+ application/xhtml+xml
28
+ ].freeze,
29
+ :odt => %w[
30
+ application/x-vnd.oasis.opendocument.graphics-template
31
+ application/vnd.sun.xml.writer application/x-vnd.oasis.opendocument.text
32
+ application/x-vnd.oasis.opendocument.text-web
33
+ application/x-vnd.oasis.opendocument.spreadsheet-template
34
+ application/vnd.oasis.opendocument.formula-template
35
+ application/vnd.oasis.opendocument.presentation
36
+ application/vnd.oasis.opendocument.image-template
37
+ application/x-vnd.oasis.opendocument.graphics
38
+ application/vnd.oasis.opendocument.chart-template
39
+ application/vnd.oasis.opendocument.presentation-template
40
+ application/x-vnd.oasis.opendocument.image-template
41
+ application/vnd.oasis.opendocument.formula
42
+ application/x-vnd.oasis.opendocument.image
43
+ application/vnd.oasis.opendocument.spreadsheet-template
44
+ application/x-vnd.oasis.opendocument.chart-template
45
+ application/x-vnd.oasis.opendocument.formula
46
+ application/vnd.oasis.opendocument.spreadsheet
47
+ application/vnd.oasis.opendocument.text-web
48
+ application/vnd.oasis.opendocument.text-template
49
+ application/vnd.oasis.opendocument.text
50
+ application/x-vnd.oasis.opendocument.formula-template
51
+ application/x-vnd.oasis.opendocument.spreadsheet
52
+ application/x-vnd.oasis.opendocument.chart
53
+ application/vnd.oasis.opendocument.text-master
54
+ application/x-vnd.oasis.opendocument.text-master
55
+ application/x-vnd.oasis.opendocument.text-template
56
+ application/vnd.oasis.opendocument.graphics
57
+ application/vnd.oasis.opendocument.graphics-template
58
+ application/x-vnd.oasis.opendocument.presentation
59
+ application/vnd.oasis.opendocument.image
60
+ application/x-vnd.oasis.opendocument.presentation-template
61
+ application/vnd.oasis.opendocument.chart
62
+ ].freeze,
63
+ :one => %w[
64
+ application/onenote
65
+ application/msonenote
66
+ ].freeze,
67
+ :pdf => %w[
68
+ application/pdf
69
+ ].freeze,
70
+ :ppt => %w[
71
+ application/vnd.ms-powerpoint
72
+ ].freeze,
73
+ :pptx => %w[
74
+ application/vnd.openxmlformats-officedocument.presentationml.presentation
75
+ application/vnd.ms-powerpoint.presentation.macroenabled.12
76
+ application/vnd.openxmlformats-officedocument.presentationml.template
77
+ application/vnd.ms-powerpoint.slideshow.macroenabled.12
78
+ application/vnd.ms-powerpoint.addin.macroenabled.12
79
+ application/vnd.openxmlformats-officedocument.presentationml.slideshow
80
+ ].freeze,
81
+ :rtf => %w[
82
+ message/richtext
83
+ text/richtext
84
+ text/rtf
85
+ application/rtf
86
+ ].freeze,
87
+ :txt => %w[
88
+ text/plain
89
+ ].freeze,
90
+ :xls => %w[
91
+ application/x-tika-msoffice
92
+ application/vnd.ms-excel
93
+ application/vnd.ms-excel.sheet.3
94
+ application/vnd.ms-excel.sheet.2
95
+ application/vnd.ms-excel.workspace.3
96
+ application/vnd.ms-excel.workspace.4
97
+ application/vnd.ms-excel.sheet.4
98
+ ].freeze,
99
+ :xlsx => %w[
100
+ application/x-tika-ooxml
101
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
102
+ application/vnd.openxmlformats-officedocument.spreadsheetml.template
103
+ application/vnd.ms-excel.addin.macroenabled.12
104
+ application/vnd.ms-excel.template.macroenabled.12
105
+ application/vnd.ms-excel.sheet.macroenabled.12
106
+ ].freeze
107
+ }.with_indifferent_access.freeze
108
+
109
+ def self.mime_to_extension
110
+ @mime_to_extension ||= @extension_to_mime.each_with_object({}) do |(key, values), memo|
111
+ values.each { |value| memo[value] = key.to_s }
112
+ end.with_indifferent_access.freeze
113
+ end
114
+
115
+ def self.get_extension(mime_type)
116
+ mime_to_extension[mime_type.to_s.downcase]
117
+ end
118
+
119
+ def self.get_mime_types(extension)
120
+ @extension_to_mime[extension.to_s.downcase]
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,84 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'logger'
8
+ require 'active_support/core_ext/module'
9
+ require 'active_support/core_ext/string/filters'
10
+ require 'ecs_logging/logger'
11
+
12
+ module Utility
13
+ class Logger
14
+ SUPPORTED_LOG_LEVELS = %i[fatal error warn info debug].freeze
15
+ MAX_SHORT_MESSAGE_LENGTH = 1000.freeze
16
+
17
+ class << self
18
+
19
+ delegate :formatter, :formatter=, :to => :logger
20
+
21
+ def level=(log_level)
22
+ logger.level = log_level
23
+ end
24
+
25
+ def logger
26
+ @logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
+ end
28
+
29
+ SUPPORTED_LOG_LEVELS.each do |level|
30
+ define_method(level) do |message|
31
+ if logger.is_a?(EcsLogging::Logger)
32
+ logger.public_send(level, message, extra_ecs_fields)
33
+ else
34
+ logger.public_send(level, message)
35
+ end
36
+ end
37
+ end
38
+
39
+ def log_stacktrace(stacktrace)
40
+ if logger.is_a?(EcsLogging::Logger)
41
+ logger.error(nil, extra_ecs_fields.merge(:error => { :stack_trace => stacktrace }))
42
+ else
43
+ logger.error(stacktrace)
44
+ end
45
+ end
46
+
47
+ def error_with_backtrace(message: nil, exception: nil, prog_name: nil)
48
+ logger.error(prog_name) { message } if message
49
+ logger.error exception.message if exception
50
+ logger.error exception.backtrace.join("\n") if exception
51
+ end
52
+
53
+ def new_line
54
+ logger.info("\n")
55
+ end
56
+
57
+ def generate_trace_id
58
+ SecureRandom.uuid
59
+ end
60
+
61
+ def abbreviated_message(message)
62
+ message.gsub(/\s+/, ' ').strip.truncate(MAX_SHORT_MESSAGE_LENGTH)
63
+ end
64
+
65
+ private
66
+
67
+ def extra_ecs_fields
68
+ {
69
+ :labels => { :index_date => Time.now.strftime('%Y.%m.%d') },
70
+ :log => { :logger => logger.progname },
71
+ :service => {
72
+ :type => 'connectors-ruby',
73
+ :version => Settings.version
74
+ },
75
+ :process => {
76
+ :pid => Process.pid,
77
+ :name => $PROGRAM_NAME,
78
+ :thread => Thread.current.object_id
79
+ }
80
+ }
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ module Middleware
11
+ class BasicAuth
12
+ AUTHORIZATION = 'Authorization'
13
+
14
+ attr_reader :basic_auth_token
15
+
16
+ def initialize(app = nil, options = {})
17
+ @app = app
18
+ @basic_auth_token = options.fetch(:basic_auth_token)
19
+ end
20
+
21
+ def call(env)
22
+ env.request_headers[AUTHORIZATION] = "Basic #{basic_auth_token}"
23
+ @app.call(env)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ module Middleware
11
+ class BearerAuth
12
+ AUTHORIZATION = 'Authorization'
13
+
14
+ attr_reader :bearer_auth_token
15
+
16
+ def initialize(app = nil, options = {})
17
+ @app = app
18
+ @bearer_auth_token = options.fetch(:bearer_auth_token)
19
+ end
20
+
21
+ def call(env)
22
+ env.request_headers[AUTHORIZATION] = "Bearer #{bearer_auth_token}"
23
+ @app.call(env)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,73 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'faraday/middleware'
10
+ require 'resolv'
11
+
12
+ require 'utility/errors'
13
+ require 'utility/logger'
14
+
15
+ module Utility
16
+ module Middleware
17
+ class RestrictHostnames < Faraday::Middleware
18
+ class AddressNotAllowed < Utility::ClientError; end
19
+ URL_PATTERN = /\Ahttp/
20
+
21
+ attr_reader :allowed_hosts, :allowed_ips
22
+
23
+ def initialize(app = nil, options = {})
24
+ super(app)
25
+ @allowed_hosts = options[:allowed_hosts]
26
+ @allowed_ips = ips_from_hosts(@allowed_hosts)
27
+ end
28
+
29
+ def call(env)
30
+ raise AddressNotAllowed.new("Address not allowed for #{env[:url]}") if denied?(env)
31
+ @app.call(env)
32
+ end
33
+
34
+ private
35
+
36
+ def ips_from_hosts(hosts)
37
+ hosts&.flat_map do |host|
38
+ if URL_PATTERN.match(host)
39
+ lookup_ips(Addressable::URI.parse(host).hostname)
40
+ elsif Resolv::IPv4::Regex.match(host) || Resolv::IPv6::Regex.match(host)
41
+ IPAddr.new(host)
42
+ else
43
+ lookup_ips(host)
44
+ end
45
+ end || []
46
+ end
47
+
48
+ def denied?(env)
49
+ requested_ips = lookup_ips(env[:url].hostname)
50
+ no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) }
51
+ return false unless no_match
52
+ Utility::Logger.warn("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \
53
+ "allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}. Retrying.")
54
+ @allowed_ips = ips_from_hosts(@allowed_hosts) # maybe the IP has changed for an allowed host. Re-do allowed_hosts DNS lookup
55
+ no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) }
56
+ Utility::Logger.error("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \
57
+ "allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}") if no_match
58
+ no_match
59
+ end
60
+
61
+ def lookup_ips(hostname)
62
+ addr_infos(hostname).map { |a| IPAddr.new(a.ip_address) }
63
+ end
64
+
65
+ def addr_infos(hostname)
66
+ Addrinfo.getaddrinfo(hostname, nil, :UNSPEC, :STREAM)
67
+ rescue SocketError
68
+ # In case of invalid hostname, return an empty list of addresses
69
+ []
70
+ end
71
+ end
72
+ end
73
+ end
data/lib/utility.rb ADDED
@@ -0,0 +1,16 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'utility/constants'
8
+ require 'utility/cron'
9
+ require 'utility/errors'
10
+ require 'utility/es_client'
11
+ require 'utility/environment'
12
+ require 'utility/exception_tracking'
13
+ require 'utility/extension_mapping_util'
14
+ require 'utility/logger'
15
+ require 'utility/elasticsearch/index/mappings'
16
+ require 'utility/elasticsearch/index/text_analysis_settings'