connectors_service 8.6.0.3 → 8.6.0.4.pre.20221114T233727Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -10
  3. data/lib/app/config.rb +2 -0
  4. data/lib/app/dispatcher.rb +17 -1
  5. data/lib/app/preflight_check.rb +15 -0
  6. data/lib/connectors/base/connector.rb +37 -4
  7. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  8. data/lib/connectors/connector_status.rb +4 -4
  9. data/lib/connectors/example/{example_attachments → attachments}/first_attachment.txt +0 -0
  10. data/lib/connectors/example/{example_attachments → attachments}/second_attachment.txt +0 -0
  11. data/lib/connectors/example/{example_attachments → attachments}/third_attachment.txt +0 -0
  12. data/lib/connectors/example/connector.rb +43 -4
  13. data/lib/connectors/gitlab/connector.rb +16 -2
  14. data/lib/connectors/mongodb/connector.rb +173 -50
  15. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  16. data/lib/connectors/registry.rb +2 -2
  17. data/lib/connectors/sync_status.rb +23 -4
  18. data/lib/core/configuration.rb +4 -2
  19. data/lib/core/connector_job.rb +137 -0
  20. data/lib/core/connector_settings.rb +29 -18
  21. data/lib/core/elastic_connector_actions.rb +331 -32
  22. data/lib/core/filtering/post_process_engine.rb +39 -0
  23. data/lib/core/filtering/post_process_result.rb +27 -0
  24. data/lib/core/filtering/simple_rule.rb +141 -0
  25. data/lib/core/filtering/validation_job_runner.rb +53 -0
  26. data/lib/{connectors_app/// → core/filtering/validation_status.rb} +9 -5
  27. data/lib/core/filtering.rb +17 -0
  28. data/lib/core/ingestion/es_sink.rb +59 -0
  29. data/lib/core/ingestion/ingester.rb +90 -0
  30. data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
  31. data/lib/core/native_scheduler.rb +3 -0
  32. data/lib/core/scheduler.rb +43 -10
  33. data/lib/core/single_scheduler.rb +3 -0
  34. data/lib/core/sync_job_runner.rb +78 -18
  35. data/lib/core.rb +2 -0
  36. data/lib/utility/bulk_queue.rb +85 -0
  37. data/lib/utility/common.rb +20 -0
  38. data/lib/utility/constants.rb +2 -0
  39. data/lib/utility/errors.rb +5 -0
  40. data/lib/utility/es_client.rb +6 -2
  41. data/lib/utility/filtering.rb +22 -0
  42. data/lib/utility/logger.rb +2 -1
  43. data/lib/utility.rb +5 -3
  44. metadata +27 -18
  45. data/lib/core/output_sink/base_sink.rb +0 -33
  46. data/lib/core/output_sink/combined_sink.rb +0 -38
  47. data/lib/core/output_sink/console_sink.rb +0 -51
  48. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/object'
10
+ require 'connectors/base/simple_rules_parser'
11
+ require 'core/filtering/simple_rule'
12
+
13
+ module Connectors
14
+ module MongoDB
15
+ class MongoRulesParser < Connectors::Base::SimpleRulesParser
16
+ def parse_rule(rule)
17
+ field = rule.field
18
+ value = rule.value
19
+ unless value.present?
20
+ raise "value is required for field: #{field}"
21
+ end
22
+ unless field.present?
23
+ raise "field is required for rule: #{rule}"
24
+ end
25
+ op = rule.rule
26
+ case op
27
+ when Core::Filtering::SimpleRule::Rule::EQUALS
28
+ parse_equals(rule)
29
+ when Core::Filtering::SimpleRule::Rule::GREATER_THAN
30
+ parse_greater_than(rule)
31
+ when Core::Filtering::SimpleRule::Rule::LESS_THAN
32
+ parse_less_than(rule)
33
+ when Core::Filtering::SimpleRule::Rule::REGEX
34
+ parse_regex(rule)
35
+ else
36
+ raise "Unknown operator: #{op}"
37
+ end
38
+ end
39
+
40
+ def merge_rules(rules)
41
+ return {} if rules.empty?
42
+ return rules[0] if rules.size == 1
43
+ { '$and' => rules }
44
+ end
45
+
46
+ private
47
+
48
+ def parse_equals(rule)
49
+ if rule.is_include?
50
+ { rule.field => rule.value }
51
+ else
52
+ { rule.field => { '$ne' => rule.value } }
53
+ end
54
+ end
55
+
56
+ def parse_greater_than(rule)
57
+ if rule.is_include?
58
+ { rule.field => { '$gt' => rule.value } }
59
+ else
60
+ { rule.field => { '$lte' => rule.value } }
61
+ end
62
+ end
63
+
64
+ def parse_less_than(rule)
65
+ if rule.is_include?
66
+ { rule.field => { '$lt' => rule.value } }
67
+ else
68
+ { rule.field => { '$gte' => rule.value } }
69
+ end
70
+ end
71
+
72
+ def parse_regex(rule)
73
+ if rule.is_include?
74
+ { rule.field => /#{rule.value}/ }
75
+ else
76
+ { rule.field => { '$not' => /#{rule.value}/ } }
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -24,10 +24,10 @@ module Connectors
24
24
  @connectors[name]
25
25
  end
26
26
 
27
- def connector(name, configuration)
27
+ def connector(name, configuration, job_description: {})
28
28
  klass = connector_class(name)
29
29
  if klass.present?
30
- return klass.new(configuration: configuration)
30
+ return klass.new(configuration: configuration, job_description: job_description)
31
31
  end
32
32
  raise "Connector #{name} is not yet registered. You need to register it before use"
33
33
  end
@@ -8,14 +8,33 @@
8
8
 
9
9
  module Connectors
10
10
  class SyncStatus
11
- COMPLETED = 'completed'
11
+ PENDING = 'pending'
12
12
  IN_PROGRESS = 'in_progress'
13
- FAILED = 'failed'
13
+ CANCELING = 'canceling'
14
+ CANCELED = 'canceled'
15
+ SUSPENDED = 'suspended'
16
+ COMPLETED = 'completed'
17
+ ERROR = 'error'
14
18
 
15
19
  STATUSES = [
16
- COMPLETED,
20
+ PENDING,
17
21
  IN_PROGRESS,
18
- FAILED
22
+ CANCELING,
23
+ CANCELED,
24
+ SUSPENDED,
25
+ COMPLETED,
26
+ ERROR
27
+ ]
28
+
29
+ PENDING_STATUES = [
30
+ PENDING,
31
+ SUSPENDED
32
+ ]
33
+
34
+ TERMINAL_STATUSES = [
35
+ CANCELED,
36
+ COMPLETED,
37
+ ERROR
19
38
  ]
20
39
  end
21
40
  end
@@ -23,9 +23,11 @@ module Core
23
23
  Utility::Logger.error("Couldn't find connector for service type #{connector_settings.service_type || service_type}")
24
24
  return
25
25
  end
26
- configuration = connector_class.configurable_fields
26
+ configuration = connector_class.configurable_fields_indifferent_access
27
+ features = connector_class.kibana_features.each_with_object({}) { |feature, hsh| hsh[feature] = true }
27
28
  doc = {
28
- :configuration => configuration
29
+ :configuration => configuration,
30
+ :features => features
29
31
  }
30
32
 
31
33
  doc[:service_type] = service_type if service_type && connector_settings.needs_service_type?
@@ -0,0 +1,137 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/hash/indifferent_access'
10
+ require 'connectors/sync_status'
11
+ require 'core/elastic_connector_actions'
12
+ require 'utility'
13
+
14
+ module Core
15
+ class ConnectorJob
16
+ DEFAULT_PAGE_SIZE = 100
17
+
18
+ # Error Classes
19
+ class ConnectorJobNotFoundError < StandardError; end
20
+
21
+ def self.fetch_by_id(job_id)
22
+ es_response = ElasticConnectorActions.get_job(job_id)
23
+
24
+ raise ConnectorJobNotFoundError.new("Connector job with id=#{job_id} was not found.") unless es_response[:found]
25
+ new(es_response)
26
+ end
27
+
28
+ def self.pending_jobs(page_size = DEFAULT_PAGE_SIZE)
29
+ query = { terms: { status: Connectors::SyncStatus::PENDING_STATUES } }
30
+ fetch_jobs_by_query(query, page_size)
31
+ end
32
+
33
+ def self.orphaned_jobs(_page_size = DEFAULT_PAGE_SIZE)
34
+ []
35
+ end
36
+
37
+ def self.stuck_jobs(_page_size = DEFAULT_PAGE_SIZE)
38
+ []
39
+ end
40
+
41
+ def self.enqueue(_connector_id)
42
+ nil
43
+ end
44
+
45
+ def id
46
+ @elasticsearch_response[:_id]
47
+ end
48
+
49
+ def [](property_name)
50
+ @elasticsearch_response[:_source][property_name]
51
+ end
52
+
53
+ def status
54
+ self[:status]
55
+ end
56
+
57
+ def in_progress?
58
+ status == Connectors::SyncStatus::IN_PROGRESS
59
+ end
60
+
61
+ def canceling?
62
+ status == Connectors::SyncStatus::CANCELING
63
+ end
64
+
65
+ def connector_snapshot
66
+ self[:connector]
67
+ end
68
+
69
+ def connector_id
70
+ connector_snapshot[:id]
71
+ end
72
+
73
+ def index_name
74
+ connector_snapshot[:configuration]
75
+ end
76
+
77
+ def language
78
+ connector_snapshot[:language]
79
+ end
80
+
81
+ def service_type
82
+ connector_snapshot[:service_type]
83
+ end
84
+
85
+ def configuration
86
+ connector_snapshot[:configuration]
87
+ end
88
+
89
+ def filtering
90
+ Utility::Filtering.extract_filter(connector_snapshot[:filtering])
91
+ end
92
+
93
+ def pipeline
94
+ connector_snapshot[:pipeline]
95
+ end
96
+
97
+ def connector
98
+ @connector ||= ConnectorSettings.fetch_by_id(connector_id)
99
+ end
100
+
101
+ def reload_connector!
102
+ @connector = nil
103
+ connector
104
+ end
105
+
106
+ def reload
107
+ es_response = ElasticConnectorActions.get_job(id)
108
+ raise ConnectorJobNotFoundError.new("Connector job with id=#{id} was not found.") unless es_response[:found]
109
+ # TODO: remove the usage of with_indifferent_access. get_id method is expected to return a hash
110
+ @elasticsearch_response = es_response.with_indifferent_access
111
+ @connector = nil
112
+ end
113
+
114
+ private
115
+
116
+ def initialize(es_response)
117
+ # TODO: remove the usage of with_indifferent_access. The initialize method should expect a hash argument
118
+ @elasticsearch_response = es_response.with_indifferent_access
119
+ end
120
+
121
+ def self.fetch_jobs_by_query(query, page_size)
122
+ results = []
123
+ offset = 0
124
+ loop do
125
+ response = ElasticConnectorActions.search_jobs(query, page_size, offset)
126
+
127
+ hits = response.dig('hits', 'hits') || []
128
+ total = response.dig('hits', 'total', 'value') || 0
129
+ results += hits.map { |hit| new(hit) }
130
+ break if results.size >= total
131
+ offset += hits.size
132
+ end
133
+
134
+ results
135
+ end
136
+ end
137
+ end
@@ -8,6 +8,7 @@
8
8
 
9
9
  require 'active_support/core_ext/hash/indifferent_access'
10
10
  require 'connectors/connector_status'
11
+ require 'connectors/registry'
11
12
  require 'core/elastic_connector_actions'
12
13
  require 'utility'
13
14
 
@@ -19,6 +20,8 @@ module Core
19
20
  DEFAULT_REDUCE_WHITESPACE = true
20
21
  DEFAULT_RUN_ML_INFERENCE = true
21
22
 
23
+ DEFAULT_FILTERING = {}
24
+
22
25
  DEFAULT_PAGE_SIZE = 100
23
26
 
24
27
  # Error Classes
@@ -32,13 +35,15 @@ module Core
32
35
  new(es_response, connectors_meta)
33
36
  end
34
37
 
35
- def initialize(es_response, connectors_meta)
36
- @elasticsearch_response = es_response.with_indifferent_access
37
- @connectors_meta = connectors_meta.with_indifferent_access
38
- end
39
-
40
38
  def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
41
- query = { term: { is_native: true } }
39
+ query = {
40
+ bool: {
41
+ filter: [
42
+ { term: { is_native: true } },
43
+ { terms: { service_type: Connectors::REGISTRY.registered_connectors } }
44
+ ]
45
+ }
46
+ }
42
47
  fetch_connectors_by_query(query, page_size)
43
48
  end
44
49
 
@@ -80,20 +85,27 @@ module Core
80
85
  self[:scheduling]
81
86
  end
82
87
 
88
+ def filtering
89
+ # assume for now, that first object in filtering array or a filter object itself is the only filtering object
90
+ filtering = @elasticsearch_response.dig(:_source, :filtering)
91
+
92
+ Utility::Filtering.extract_filter(filtering)
93
+ end
94
+
83
95
  def request_pipeline
84
- return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
96
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
85
97
  end
86
98
 
87
99
  def extract_binary_content?
88
- return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
100
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
89
101
  end
90
102
 
91
103
  def reduce_whitespace?
92
- return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
104
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
93
105
  end
94
106
 
95
107
  def run_ml_inference?
96
- return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
108
+ Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
97
109
  end
98
110
 
99
111
  def formatted
@@ -112,6 +124,11 @@ module Core
112
124
 
113
125
  private
114
126
 
127
+ def initialize(es_response, connectors_meta)
128
+ @elasticsearch_response = es_response.with_indifferent_access
129
+ @connectors_meta = connectors_meta.with_indifferent_access
130
+ end
131
+
115
132
  def self.fetch_connectors_by_query(query, page_size)
116
133
  connectors_meta = ElasticConnectorActions.connectors_meta
117
134
 
@@ -120,8 +137,8 @@ module Core
120
137
  loop do
121
138
  response = ElasticConnectorActions.search_connectors(query, page_size, offset)
122
139
 
123
- hits = response['hits']['hits']
124
- total = response['hits']['total']['value']
140
+ hits = response.dig('hits', 'hits') || []
141
+ total = response.dig('hits', 'total', 'value') || 0
125
142
  results += hits.map do |hit|
126
143
  Core::ConnectorSettings.new(hit, connectors_meta)
127
144
  end
@@ -132,11 +149,5 @@ module Core
132
149
  results
133
150
  end
134
151
 
135
- def return_if_present(*args)
136
- args.each do |arg|
137
- return arg unless arg.nil?
138
- end
139
- nil
140
- end
141
152
  end
142
153
  end