connectors_service 8.5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +93 -0
  3. data/NOTICE.txt +2 -0
  4. data/bin/connectors_service +4 -0
  5. data/bin/list_connectors +4 -0
  6. data/config/connectors.yml +25 -0
  7. data/lib/app/app.rb +25 -0
  8. data/lib/app/config.rb +132 -0
  9. data/lib/app/console_app.rb +278 -0
  10. data/lib/app/dispatcher.rb +121 -0
  11. data/lib/app/menu.rb +104 -0
  12. data/lib/app/preflight_check.rb +134 -0
  13. data/lib/app/version.rb +10 -0
  14. data/lib/connectors/base/adapter.rb +119 -0
  15. data/lib/connectors/base/connector.rb +57 -0
  16. data/lib/connectors/base/custom_client.rb +111 -0
  17. data/lib/connectors/connector_status.rb +31 -0
  18. data/lib/connectors/crawler/scheduler.rb +32 -0
  19. data/lib/connectors/example/connector.rb +57 -0
  20. data/lib/connectors/example/example_attachments/first_attachment.txt +1 -0
  21. data/lib/connectors/example/example_attachments/second_attachment.txt +1 -0
  22. data/lib/connectors/example/example_attachments/third_attachment.txt +1 -0
  23. data/lib/connectors/gitlab/adapter.rb +50 -0
  24. data/lib/connectors/gitlab/connector.rb +67 -0
  25. data/lib/connectors/gitlab/custom_client.rb +44 -0
  26. data/lib/connectors/gitlab/extractor.rb +69 -0
  27. data/lib/connectors/mongodb/connector.rb +138 -0
  28. data/lib/connectors/registry.rb +52 -0
  29. data/lib/connectors/sync_status.rb +21 -0
  30. data/lib/connectors.rb +16 -0
  31. data/lib/connectors_app/// +13 -0
  32. data/lib/connectors_service.rb +24 -0
  33. data/lib/connectors_utility.rb +16 -0
  34. data/lib/core/configuration.rb +48 -0
  35. data/lib/core/connector_settings.rb +142 -0
  36. data/lib/core/elastic_connector_actions.rb +269 -0
  37. data/lib/core/heartbeat.rb +32 -0
  38. data/lib/core/native_scheduler.rb +24 -0
  39. data/lib/core/output_sink/base_sink.rb +33 -0
  40. data/lib/core/output_sink/combined_sink.rb +38 -0
  41. data/lib/core/output_sink/console_sink.rb +51 -0
  42. data/lib/core/output_sink/es_sink.rb +74 -0
  43. data/lib/core/output_sink.rb +13 -0
  44. data/lib/core/scheduler.rb +158 -0
  45. data/lib/core/single_scheduler.rb +29 -0
  46. data/lib/core/sync_job_runner.rb +111 -0
  47. data/lib/core.rb +16 -0
  48. data/lib/list_connectors.rb +22 -0
  49. data/lib/stubs/app_config.rb +35 -0
  50. data/lib/stubs/connectors/stats.rb +35 -0
  51. data/lib/stubs/service_type.rb +13 -0
  52. data/lib/utility/constants.rb +20 -0
  53. data/lib/utility/cron.rb +81 -0
  54. data/lib/utility/elasticsearch/index/language_data.yml +111 -0
  55. data/lib/utility/elasticsearch/index/mappings.rb +104 -0
  56. data/lib/utility/elasticsearch/index/text_analysis_settings.rb +226 -0
  57. data/lib/utility/environment.rb +33 -0
  58. data/lib/utility/errors.rb +132 -0
  59. data/lib/utility/es_client.rb +84 -0
  60. data/lib/utility/exception_tracking.rb +64 -0
  61. data/lib/utility/extension_mapping_util.rb +123 -0
  62. data/lib/utility/logger.rb +84 -0
  63. data/lib/utility/middleware/basic_auth.rb +27 -0
  64. data/lib/utility/middleware/bearer_auth.rb +27 -0
  65. data/lib/utility/middleware/restrict_hostnames.rb +73 -0
  66. data/lib/utility.rb +16 -0
  67. metadata +487 -0
@@ -0,0 +1,123 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/hash'
10
+
11
+ module Utility
12
+ class ExtensionMappingUtil
13
+ @extension_to_mime = {
14
+ :doc => %w[
15
+ application/x-tika-msoffice
16
+ application/msword
17
+ ].freeze,
18
+ :docx => %w[
19
+ application/x-tika-ooxml
20
+ application/vnd.openxmlformats-officedocument.wordprocessingml.document
21
+ application/vnd.openxmlformats-officedocument.wordprocessingml.template
22
+ application/vnd.ms-word.template.macroenabled.12
23
+ application/vnd.ms-word.document.macroenabled.12
24
+ ].freeze,
25
+ :html => %w[
26
+ text/html
27
+ application/xhtml+xml
28
+ ].freeze,
29
+ :odt => %w[
30
+ application/x-vnd.oasis.opendocument.graphics-template
31
+ application/vnd.sun.xml.writer application/x-vnd.oasis.opendocument.text
32
+ application/x-vnd.oasis.opendocument.text-web
33
+ application/x-vnd.oasis.opendocument.spreadsheet-template
34
+ application/vnd.oasis.opendocument.formula-template
35
+ application/vnd.oasis.opendocument.presentation
36
+ application/vnd.oasis.opendocument.image-template
37
+ application/x-vnd.oasis.opendocument.graphics
38
+ application/vnd.oasis.opendocument.chart-template
39
+ application/vnd.oasis.opendocument.presentation-template
40
+ application/x-vnd.oasis.opendocument.image-template
41
+ application/vnd.oasis.opendocument.formula
42
+ application/x-vnd.oasis.opendocument.image
43
+ application/vnd.oasis.opendocument.spreadsheet-template
44
+ application/x-vnd.oasis.opendocument.chart-template
45
+ application/x-vnd.oasis.opendocument.formula
46
+ application/vnd.oasis.opendocument.spreadsheet
47
+ application/vnd.oasis.opendocument.text-web
48
+ application/vnd.oasis.opendocument.text-template
49
+ application/vnd.oasis.opendocument.text
50
+ application/x-vnd.oasis.opendocument.formula-template
51
+ application/x-vnd.oasis.opendocument.spreadsheet
52
+ application/x-vnd.oasis.opendocument.chart
53
+ application/vnd.oasis.opendocument.text-master
54
+ application/x-vnd.oasis.opendocument.text-master
55
+ application/x-vnd.oasis.opendocument.text-template
56
+ application/vnd.oasis.opendocument.graphics
57
+ application/vnd.oasis.opendocument.graphics-template
58
+ application/x-vnd.oasis.opendocument.presentation
59
+ application/vnd.oasis.opendocument.image
60
+ application/x-vnd.oasis.opendocument.presentation-template
61
+ application/vnd.oasis.opendocument.chart
62
+ ].freeze,
63
+ :one => %w[
64
+ application/onenote
65
+ application/msonenote
66
+ ].freeze,
67
+ :pdf => %w[
68
+ application/pdf
69
+ ].freeze,
70
+ :ppt => %w[
71
+ application/vnd.ms-powerpoint
72
+ ].freeze,
73
+ :pptx => %w[
74
+ application/vnd.openxmlformats-officedocument.presentationml.presentation
75
+ application/vnd.ms-powerpoint.presentation.macroenabled.12
76
+ application/vnd.openxmlformats-officedocument.presentationml.template
77
+ application/vnd.ms-powerpoint.slideshow.macroenabled.12
78
+ application/vnd.ms-powerpoint.addin.macroenabled.12
79
+ application/vnd.openxmlformats-officedocument.presentationml.slideshow
80
+ ].freeze,
81
+ :rtf => %w[
82
+ message/richtext
83
+ text/richtext
84
+ text/rtf
85
+ application/rtf
86
+ ].freeze,
87
+ :txt => %w[
88
+ text/plain
89
+ ].freeze,
90
+ :xls => %w[
91
+ application/x-tika-msoffice
92
+ application/vnd.ms-excel
93
+ application/vnd.ms-excel.sheet.3
94
+ application/vnd.ms-excel.sheet.2
95
+ application/vnd.ms-excel.workspace.3
96
+ application/vnd.ms-excel.workspace.4
97
+ application/vnd.ms-excel.sheet.4
98
+ ].freeze,
99
+ :xlsx => %w[
100
+ application/x-tika-ooxml
101
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
102
+ application/vnd.openxmlformats-officedocument.spreadsheetml.template
103
+ application/vnd.ms-excel.addin.macroenabled.12
104
+ application/vnd.ms-excel.template.macroenabled.12
105
+ application/vnd.ms-excel.sheet.macroenabled.12
106
+ ].freeze
107
+ }.with_indifferent_access.freeze
108
+
109
+ def self.mime_to_extension
110
+ @mime_to_extension ||= @extension_to_mime.each_with_object({}) do |(key, values), memo|
111
+ values.each { |value| memo[value] = key.to_s }
112
+ end.with_indifferent_access.freeze
113
+ end
114
+
115
+ def self.get_extension(mime_type)
116
+ mime_to_extension[mime_type.to_s.downcase]
117
+ end
118
+
119
+ def self.get_mime_types(extension)
120
+ @extension_to_mime[extension.to_s.downcase]
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,84 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'logger'
8
+ require 'active_support/core_ext/module'
9
+ require 'active_support/core_ext/string/filters'
10
+ require 'ecs_logging/logger'
11
+
12
+ module Utility
13
+ class Logger
14
+ SUPPORTED_LOG_LEVELS = %i[fatal error warn info debug].freeze
15
+ MAX_SHORT_MESSAGE_LENGTH = 1000.freeze
16
+
17
+ class << self
18
+
19
+ delegate :formatter, :formatter=, :to => :logger
20
+
21
+ def level=(log_level)
22
+ logger.level = log_level
23
+ end
24
+
25
+ def logger
26
+ @logger ||= Settings[:ecs_logging] ? EcsLogging::Logger.new(STDOUT) : ::Logger.new(STDOUT)
27
+ end
28
+
29
+ SUPPORTED_LOG_LEVELS.each do |level|
30
+ define_method(level) do |message|
31
+ if logger.is_a?(EcsLogging::Logger)
32
+ logger.public_send(level, message, extra_ecs_fields)
33
+ else
34
+ logger.public_send(level, message)
35
+ end
36
+ end
37
+ end
38
+
39
+ def log_stacktrace(stacktrace)
40
+ if logger.is_a?(EcsLogging::Logger)
41
+ logger.error(nil, extra_ecs_fields.merge(:error => { :stack_trace => stacktrace }))
42
+ else
43
+ logger.error(stacktrace)
44
+ end
45
+ end
46
+
47
+ def error_with_backtrace(message: nil, exception: nil, prog_name: nil)
48
+ logger.error(prog_name) { message } if message
49
+ logger.error exception.message if exception
50
+ logger.error exception.backtrace.join("\n") if exception
51
+ end
52
+
53
+ def new_line
54
+ logger.info("\n")
55
+ end
56
+
57
+ def generate_trace_id
58
+ SecureRandom.uuid
59
+ end
60
+
61
+ def abbreviated_message(message)
62
+ message.gsub(/\s+/, ' ').strip.truncate(MAX_SHORT_MESSAGE_LENGTH)
63
+ end
64
+
65
+ private
66
+
67
+ def extra_ecs_fields
68
+ {
69
+ :labels => { :index_date => Time.now.strftime('%Y.%m.%d') },
70
+ :log => { :logger => logger.progname },
71
+ :service => {
72
+ :type => 'connectors-ruby',
73
+ :version => Settings.version
74
+ },
75
+ :process => {
76
+ :pid => Process.pid,
77
+ :name => $PROGRAM_NAME,
78
+ :thread => Thread.current.object_id
79
+ }
80
+ }
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ module Middleware
11
+ class BasicAuth
12
+ AUTHORIZATION = 'Authorization'
13
+
14
+ attr_reader :basic_auth_token
15
+
16
+ def initialize(app = nil, options = {})
17
+ @app = app
18
+ @basic_auth_token = options.fetch(:basic_auth_token)
19
+ end
20
+
21
+ def call(env)
22
+ env.request_headers[AUTHORIZATION] = "Basic #{basic_auth_token}"
23
+ @app.call(env)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Utility
10
+ module Middleware
11
+ class BearerAuth
12
+ AUTHORIZATION = 'Authorization'
13
+
14
+ attr_reader :bearer_auth_token
15
+
16
+ def initialize(app = nil, options = {})
17
+ @app = app
18
+ @bearer_auth_token = options.fetch(:bearer_auth_token)
19
+ end
20
+
21
+ def call(env)
22
+ env.request_headers[AUTHORIZATION] = "Bearer #{bearer_auth_token}"
23
+ @app.call(env)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,73 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'faraday/middleware'
10
+ require 'resolv'
11
+
12
+ require 'utility/errors'
13
+ require 'utility/logger'
14
+
15
+ module Utility
16
+ module Middleware
17
+ class RestrictHostnames < Faraday::Middleware
18
+ class AddressNotAllowed < Utility::ClientError; end
19
+ URL_PATTERN = /\Ahttp/
20
+
21
+ attr_reader :allowed_hosts, :allowed_ips
22
+
23
+ def initialize(app = nil, options = {})
24
+ super(app)
25
+ @allowed_hosts = options[:allowed_hosts]
26
+ @allowed_ips = ips_from_hosts(@allowed_hosts)
27
+ end
28
+
29
+ def call(env)
30
+ raise AddressNotAllowed.new("Address not allowed for #{env[:url]}") if denied?(env)
31
+ @app.call(env)
32
+ end
33
+
34
+ private
35
+
36
+ def ips_from_hosts(hosts)
37
+ hosts&.flat_map do |host|
38
+ if URL_PATTERN.match(host)
39
+ lookup_ips(Addressable::URI.parse(host).hostname)
40
+ elsif Resolv::IPv4::Regex.match(host) || Resolv::IPv6::Regex.match(host)
41
+ IPAddr.new(host)
42
+ else
43
+ lookup_ips(host)
44
+ end
45
+ end || []
46
+ end
47
+
48
+ def denied?(env)
49
+ requested_ips = lookup_ips(env[:url].hostname)
50
+ no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) }
51
+ return false unless no_match
52
+ Utility::Logger.warn("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \
53
+ "allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}. Retrying.")
54
+ @allowed_ips = ips_from_hosts(@allowed_hosts) # maybe the IP has changed for an allowed host. Re-do allowed_hosts DNS lookup
55
+ no_match = requested_ips.all? { |ip| !@allowed_ips.include?(ip) }
56
+ Utility::Logger.error("Requested url #{env[:url]} with resolved ip addresses #{requested_ips} does not match " \
57
+ "allowed hosts #{@allowed_hosts} with resolved ip addresses #{@allowed_ips}") if no_match
58
+ no_match
59
+ end
60
+
61
+ def lookup_ips(hostname)
62
+ addr_infos(hostname).map { |a| IPAddr.new(a.ip_address) }
63
+ end
64
+
65
+ def addr_infos(hostname)
66
+ Addrinfo.getaddrinfo(hostname, nil, :UNSPEC, :STREAM)
67
+ rescue SocketError
68
+ # In case of invalid hostname, return an empty list of addresses
69
+ []
70
+ end
71
+ end
72
+ end
73
+ end
data/lib/utility.rb ADDED
@@ -0,0 +1,16 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'utility/constants'
8
+ require 'utility/cron'
9
+ require 'utility/errors'
10
+ require 'utility/es_client'
11
+ require 'utility/environment'
12
+ require 'utility/exception_tracking'
13
+ require 'utility/extension_mapping_util'
14
+ require 'utility/logger'
15
+ require 'utility/elasticsearch/index/mappings'
16
+ require 'utility/elasticsearch/index/text_analysis_settings'