connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221116T024501Z

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/app.rb +4 -0
  4. data/lib/app/dispatcher.rb +42 -17
  5. data/lib/app/preflight_check.rb +11 -0
  6. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
  7. data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
  8. data/lib/connectors/base/connector.rb +43 -14
  9. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  10. data/lib/connectors/example/connector.rb +6 -0
  11. data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
  12. data/lib/connectors/gitlab/connector.rb +6 -1
  13. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
  14. data/lib/connectors/mongodb/connector.rb +47 -43
  15. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
  17. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  18. data/lib/connectors/sync_status.rb +6 -1
  19. data/lib/connectors/tolerable_error_helper.rb +43 -0
  20. data/lib/core/configuration.rb +3 -1
  21. data/lib/core/connector_job.rb +210 -0
  22. data/lib/core/connector_settings.rb +52 -16
  23. data/lib/core/elastic_connector_actions.rb +320 -59
  24. data/lib/core/filtering/post_process_engine.rb +39 -0
  25. data/lib/core/filtering/post_process_result.rb +27 -0
  26. data/lib/core/filtering/simple_rule.rb +141 -0
  27. data/lib/core/filtering/validation_job_runner.rb +53 -0
  28. data/lib/core/filtering/validation_status.rb +17 -0
  29. data/lib/core/filtering.rb +17 -0
  30. data/lib/core/ingestion/es_sink.rb +118 -0
  31. data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
  32. data/lib/core/jobs/consumer.rb +114 -0
  33. data/lib/core/jobs/producer.rb +26 -0
  34. data/lib/core/scheduler.rb +40 -10
  35. data/lib/core/single_scheduler.rb +1 -1
  36. data/lib/core/sync_job_runner.rb +72 -16
  37. data/lib/core.rb +4 -0
  38. data/lib/utility/bulk_queue.rb +85 -0
  39. data/lib/utility/constants.rb +2 -0
  40. data/lib/utility/error_monitor.rb +108 -0
  41. data/lib/utility/errors.rb +0 -12
  42. data/lib/utility/filtering.rb +22 -0
  43. data/lib/utility/logger.rb +1 -1
  44. data/lib/utility.rb +11 -4
  45. metadata +25 -7
  46. data/lib/core/output_sink/base_sink.rb +0 -33
  47. data/lib/core/output_sink/combined_sink.rb +0 -38
  48. data/lib/core/output_sink/console_sink.rb +0 -51
  49. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -7,6 +7,9 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'connectors/base/connector'
10
+ require 'core/filtering/validation_status'
11
+ require 'connectors/mongodb/mongo_rules_parser'
12
+ require 'connectors/mongodb/mongo_advanced_snippet_against_schema_validator'
10
13
  require 'mongo'
11
14
  require 'utility'
12
15
 
@@ -28,27 +31,31 @@ module Connectors
28
31
 
29
32
  def self.configurable_fields
30
33
  {
31
- :host => {
32
- :label => 'Server Hostname'
33
- },
34
- :user => {
35
- :label => 'Username'
36
- },
37
- :password => {
38
- :label => 'Password'
39
- },
40
- :database => {
41
- :label => 'Database'
42
- },
43
- :collection => {
44
- :label => 'Collection'
45
- },
46
- :direct_connection => {
47
- :label => 'Direct connection? (true/false)'
48
- }
34
+ :host => {
35
+ :label => 'Server Hostname'
36
+ },
37
+ :user => {
38
+ :label => 'Username'
39
+ },
40
+ :password => {
41
+ :label => 'Password'
42
+ },
43
+ :database => {
44
+ :label => 'Database'
45
+ },
46
+ :collection => {
47
+ :label => 'Collection'
48
+ },
49
+ :direct_connection => {
50
+ :label => 'Direct connection? (true/false)'
51
+ }
49
52
  }
50
53
  end
51
54
 
55
+ def self.advanced_snippet_validator
56
+ MongoAdvancedSnippetAgainstSchemaValidator
57
+ end
58
+
52
59
  def initialize(configuration: {}, job_description: {})
53
60
  super
54
61
 
@@ -61,8 +68,6 @@ module Connectors
61
68
  end
62
69
 
63
70
  def yield_documents
64
- check_filtering
65
-
66
71
  with_client do |client|
67
72
  # We do paging using skip().limit() here to make Ruby recycle the memory for each page pulled from the server after it's not needed any more.
68
73
  # This gives us more control on the usage of the memory (we can adjust PAGE_SIZE constant for that to decrease max memory consumption).
@@ -89,15 +94,15 @@ module Connectors
89
94
  loop do
90
95
  found_in_page = 0
91
96
 
97
+ Utility::Logger.info("Requesting #{PAGE_SIZE} documents from MongoDB (Starting at #{skip})")
92
98
  view = cursor.skip(skip).limit(PAGE_SIZE)
93
99
  view.each do |document|
94
- yield serialize(document)
95
-
96
- found_in_page += 1
97
- found_overall += 1
98
-
99
- overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
100
-
100
+ yield_with_handling_tolerable_errors do
101
+ yield serialize(document)
102
+ found_in_page += 1
103
+ found_overall += 1
104
+ overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
105
+ end
101
106
  break if overall_limit_reached
102
107
  end
103
108
 
@@ -117,20 +122,9 @@ module Connectors
117
122
 
118
123
  return create_aggregate_cursor(collection) if @advanced_filter_config[:aggregate].present?
119
124
 
120
- collection.find
121
- end
122
-
123
- def check_filtering
124
- return unless filtering_present?
125
+ return create_simple_rules_cursor(collection) if @rules.present?
125
126
 
126
- check_find_and_aggregate
127
- end
128
-
129
- def check_find_and_aggregate
130
- if @advanced_filter_config.keys.size != 1
131
- invalid_keys_msg = "Only one of #{ALLOWED_TOP_LEVEL_FILTER_KEYS} is allowed in the filtering object. Keys present: '#{@advanced_filter_config.keys}'."
132
- raise Utility::InvalidFilterConfigError.new(invalid_keys_msg)
133
- end
127
+ collection.find
134
128
  end
135
129
 
136
130
  def create_aggregate_cursor(collection)
@@ -159,6 +153,16 @@ module Connectors
159
153
  [collection.find(filter, options), options]
160
154
  end
161
155
 
156
+ def create_simple_rules_cursor(collection)
157
+ filter = {}
158
+ if @rules.present?
159
+ parser = MongoRulesParser.new(@rules)
160
+ filter = parser.parse
161
+ end
162
+ Utility::Logger.info("Filtering with simple rules filter: #{filter}")
163
+ filter.present? ? collection.find(filter) : collection.find
164
+ end
165
+
162
166
  def extract_options(mongodb_function)
163
167
  mongodb_function[:options].present? ? mongodb_function[:options] : {}
164
168
  end
@@ -173,9 +177,9 @@ module Connectors
173
177
  raise "Invalid value for 'Direct connection' : #{@direct_connection}." unless %w[true false].include?(@direct_connection.to_s.strip.downcase)
174
178
 
175
179
  args = {
176
- database: @database,
177
- direct_connection: to_boolean(@direct_connection)
178
- }
180
+ database: @database,
181
+ direct_connection: to_boolean(@direct_connection)
182
+ }
179
183
 
180
184
  if @user.present? || @password.present?
181
185
  args[:user] = @user
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/base/advanced_snippet_against_schema_validator'
10
+ require 'connectors/mongodb/mongo_advanced_snippet_schema'
11
+
12
+ module Connectors
13
+ module MongoDB
14
+ class MongoAdvancedSnippetAgainstSchemaValidator < Connectors::Base::AdvancedSnippetAgainstSchemaValidator
15
+
16
+ def initialize(advanced_snippet, schema = Connectors::MongoDB::AdvancedSnippet::SCHEMA)
17
+ super
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,292 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Connectors
10
+ module MongoDB
11
+ module AdvancedSnippet
12
+ # Pipeline stages: https://www.mongodb.com/docs/manual/reference/operator/aggregation-pipeline/
13
+ ALLOWED_PIPELINE_STAGES = %w[
14
+ $addFields $bucket $bucketAuto $changeStream $collStats $count $densify
15
+ $documents $facet $fill $geoNear $graphLookup $group $indexStats $limit
16
+ $listSessions $lookup $match $merge $out $planCacheStats $project $redact
17
+ $replaceRoot $replaceWith $sample $search $searchMeta $set $setWindowFields
18
+ $skip $sort $sortByCount $unionWith $unset $unwind
19
+ ]
20
+
21
+ # All except the $out, $merge, $geoNear, and $changeStream stages can appear multiple times in a pipeline.
22
+ # Source: https://www.mongodb.com/docs/manual/reference/operator/aggregation-pipeline/
23
+ PIPELINE_STAGES_ALLOWED_ONCE = %w[$out $merge $geoNear $changeStream]
24
+
25
+ NON_NEGATIVE_INTEGER = ->(value) { value.is_a?(Integer) && value >= 0 }
26
+ READ_CONCERN_LEVEL = ->(level) { %w[local available majority linearizable].include?(level) }
27
+ STRING_OR_DOCUMENT = ->(value) { value.is_a?(Hash) || value.is_a?(String) }
28
+ MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.size <= 1 }
29
+
30
+ AGGREGATION_PIPELINE = lambda { |pipeline|
31
+ return false unless pipeline.is_a?(Array)
32
+
33
+ allowed_once_appearances = Set.new
34
+
35
+ pipeline.flat_map(&:keys).each do |key|
36
+ return false unless ALLOWED_PIPELINE_STAGES.include?(key)
37
+
38
+ if PIPELINE_STAGES_ALLOWED_ONCE.include?(key)
39
+ return false if allowed_once_appearances.include?(key)
40
+
41
+ allowed_once_appearances.add(key)
42
+ end
43
+ end
44
+
45
+ true
46
+ }
47
+
48
+ # Ruby has no 'Boolean' class
49
+ BOOLEAN = ->(value) { value.is_a?(TrueClass) || value.is_a?(FalseClass) }
50
+
51
+ COLLATION = {
52
+ :name => 'collation',
53
+ :type => Hash,
54
+ :optional => true,
55
+ :fields => [
56
+ {
57
+ :name => 'locale',
58
+ :type => String,
59
+ :optional => true
60
+ },
61
+ {
62
+ :name => 'caseLevel',
63
+ :type => BOOLEAN,
64
+ :optional => true
65
+ },
66
+ {
67
+ :name => 'caseFirst',
68
+ :type => String,
69
+ :optional => true
70
+ },
71
+ {
72
+ :name => 'strength',
73
+ :type => Integer,
74
+ :optional => true
75
+ },
76
+ {
77
+ :name => 'numericOrdering',
78
+ :type => BOOLEAN,
79
+ :optional => true
80
+ },
81
+ {
82
+ :name => 'alternate',
83
+ :type => String,
84
+ :optional => true
85
+ },
86
+ {
87
+ :name => 'maxVariable',
88
+ :type => String,
89
+ :optional => true
90
+ },
91
+ {
92
+ :name => 'backwards',
93
+ :type => BOOLEAN,
94
+ :optional => true
95
+ },
96
+ ]
97
+ }
98
+
99
+ CURSOR_TYPE = ->(cursor) { [:tailable, :tailable_await].include?(cursor) }
100
+
101
+ # Aggregate options: https://www.mongodb.com/docs/manual/reference/method/db.collection.aggregate/
102
+ AGGREGATE_OPTIONS = {
103
+ :name => 'options',
104
+ :type => Hash,
105
+ :optional => true,
106
+ :fields => [
107
+ {
108
+ :name => 'explain',
109
+ :type => BOOLEAN,
110
+ :optional => true
111
+ },
112
+ {
113
+ :name => 'allowDiskUse',
114
+ :type => BOOLEAN,
115
+ :optional => true
116
+ },
117
+ {
118
+ :name => 'cursor',
119
+ :type => Hash,
120
+ :optional => true,
121
+ :fields => [
122
+ {
123
+ :name => 'batchSize',
124
+ :type => NON_NEGATIVE_INTEGER
125
+ }
126
+ ]
127
+ },
128
+ {
129
+ :name => 'maxTimeMS',
130
+ :type => NON_NEGATIVE_INTEGER,
131
+ :optional => true
132
+ },
133
+ {
134
+ :name => 'bypassDocumentValidation',
135
+ :type => BOOLEAN,
136
+ :optional => true
137
+ },
138
+ {
139
+ :name => 'readConcern',
140
+ :type => Hash,
141
+ :optional => true,
142
+ :fields => [
143
+ {
144
+ :name => 'level',
145
+ :type => READ_CONCERN_LEVEL
146
+ }
147
+ ]
148
+ },
149
+ COLLATION,
150
+ {
151
+ :name => 'hint',
152
+ :type => STRING_OR_DOCUMENT,
153
+ :optional => true
154
+ },
155
+ {
156
+ :name => 'comment',
157
+ :type => String,
158
+ :optional => true
159
+ },
160
+ {
161
+ :name => 'writeConcern',
162
+ :type => Hash,
163
+ :optional => true
164
+ },
165
+ {
166
+ :name => 'let',
167
+ :type => Hash,
168
+ :optional => true
169
+ }
170
+ ]
171
+ }
172
+
173
+ AGGREGATE_PIPELINE = {
174
+ :name => 'pipeline',
175
+ :type => AGGREGATION_PIPELINE,
176
+ :optional => true,
177
+ }
178
+
179
+ AGGREGATE = {
180
+ :name => 'aggregate',
181
+ :type => Hash,
182
+ :optional => true,
183
+ :fields => [
184
+ AGGREGATE_PIPELINE,
185
+ AGGREGATE_OPTIONS
186
+ ]
187
+ }
188
+
189
+ FIND_OPTIONS = {
190
+ :name => 'options',
191
+ :type => Hash,
192
+ :optional => true,
193
+ :fields => [
194
+ {
195
+ :name => 'allowDiskUse',
196
+ :type => BOOLEAN,
197
+ :optional => true
198
+ },
199
+ {
200
+ :name => 'allowPartialResults',
201
+ :type => BOOLEAN,
202
+ :optional => true
203
+ },
204
+ {
205
+ :name => 'batchSize',
206
+ :type => NON_NEGATIVE_INTEGER,
207
+ :optional => true
208
+ },
209
+ COLLATION,
210
+ {
211
+ :name => 'cursorType',
212
+ :type => CURSOR_TYPE,
213
+ :optional => true
214
+ },
215
+ {
216
+ :name => 'limit',
217
+ :type => NON_NEGATIVE_INTEGER,
218
+ :optional => true
219
+ },
220
+ {
221
+ :name => 'maxTimeMS',
222
+ :type => NON_NEGATIVE_INTEGER,
223
+ :optional => true
224
+ },
225
+ {
226
+ :name => 'modifiers',
227
+ :type => Hash,
228
+ :optional => true
229
+ },
230
+ {
231
+ :name => 'noCursorTimeout',
232
+ :type => BOOLEAN,
233
+ :optional => true
234
+ },
235
+ {
236
+ :name => 'oplogReplay',
237
+ :type => BOOLEAN,
238
+ :optional => true
239
+ },
240
+ {
241
+ :name => 'projection',
242
+ :type => Hash,
243
+ :optional => true
244
+ },
245
+ {
246
+ :name => 'skip',
247
+ :type => NON_NEGATIVE_INTEGER,
248
+ :optional => true
249
+ },
250
+ {
251
+ :name => 'sort',
252
+ :type => Hash,
253
+ :optional => true
254
+ },
255
+ {
256
+ :name => 'let',
257
+ :type => Hash,
258
+ :optional => true
259
+ }
260
+ ]
261
+ }
262
+
263
+ # TODO: return true for now. Will be more involved (basically needs full query parsing or "dummy" execution against a running instance)
264
+ FILTER = ->(_filter) { true }
265
+
266
+ FIND_FILTER = {
267
+ :name => 'filter',
268
+ :type => FILTER
269
+ }
270
+
271
+ FIND = {
272
+ :name => 'find',
273
+ :type => Hash,
274
+ :optional => true,
275
+ :fields => [
276
+ FIND_OPTIONS,
277
+ FIND_FILTER
278
+ ]
279
+ }
280
+
281
+ SCHEMA = {
282
+ :fields => {
283
+ :constraints => MUTUAL_EXCLUSIVE_FILTER,
284
+ :values => [
285
+ AGGREGATE,
286
+ FIND
287
+ ]
288
+ }
289
+ }
290
+ end
291
+ end
292
+ end
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/object'
10
+ require 'connectors/base/simple_rules_parser'
11
+ require 'core/filtering/simple_rule'
12
+
13
+ module Connectors
14
+ module MongoDB
15
+ class MongoRulesParser < Connectors::Base::SimpleRulesParser
16
+ def parse_rule(rule)
17
+ field = rule.field
18
+ value = rule.value
19
+ unless value.present?
20
+ raise "value is required for field: #{field}"
21
+ end
22
+ unless field.present?
23
+ raise "field is required for rule: #{rule}"
24
+ end
25
+ op = rule.rule
26
+ case op
27
+ when Core::Filtering::SimpleRule::Rule::EQUALS
28
+ parse_equals(rule)
29
+ when Core::Filtering::SimpleRule::Rule::GREATER_THAN
30
+ parse_greater_than(rule)
31
+ when Core::Filtering::SimpleRule::Rule::LESS_THAN
32
+ parse_less_than(rule)
33
+ when Core::Filtering::SimpleRule::Rule::REGEX
34
+ parse_regex(rule)
35
+ else
36
+ raise "Unknown operator: #{op}"
37
+ end
38
+ end
39
+
40
+ def merge_rules(rules)
41
+ return {} if rules.empty?
42
+ return rules[0] if rules.size == 1
43
+ { '$and' => rules }
44
+ end
45
+
46
+ private
47
+
48
+ def parse_equals(rule)
49
+ if rule.is_include?
50
+ { rule.field => rule.value }
51
+ else
52
+ { rule.field => { '$ne' => rule.value } }
53
+ end
54
+ end
55
+
56
+ def parse_greater_than(rule)
57
+ if rule.is_include?
58
+ { rule.field => { '$gt' => rule.value } }
59
+ else
60
+ { rule.field => { '$lte' => rule.value } }
61
+ end
62
+ end
63
+
64
+ def parse_less_than(rule)
65
+ if rule.is_include?
66
+ { rule.field => { '$lt' => rule.value } }
67
+ else
68
+ { rule.field => { '$gte' => rule.value } }
69
+ end
70
+ end
71
+
72
+ def parse_regex(rule)
73
+ if rule.is_include?
74
+ { rule.field => /#{rule.value}/ }
75
+ else
76
+ { rule.field => { '$not' => /#{rule.value}/ } }
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -26,11 +26,16 @@ module Connectors
26
26
  ERROR
27
27
  ]
28
28
 
29
- PENDING_STATUES = [
29
+ PENDING_STATUSES = [
30
30
  PENDING,
31
31
  SUSPENDED
32
32
  ]
33
33
 
34
+ ACTIVE_STATUSES = [
35
+ IN_PROGRESS,
36
+ CANCELING
37
+ ]
38
+
34
39
  TERMINAL_STATUSES = [
35
40
  CANCELED,
36
41
  COMPLETED,
@@ -0,0 +1,43 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'utility/logger'
8
+ require 'utility/exception_tracking'
9
+ require 'utility/error_monitor'
10
+
11
+ module Connectors
12
+ class TolerableErrorHelper
13
+ def initialize(error_monitor)
14
+ @error_monitor = error_monitor
15
+ end
16
+
17
+ def yield_single_document(identifier: nil)
18
+ Utility::Logger.debug("Extracting single document for #{identifier}") if identifier
19
+ yield
20
+ @error_monitor.note_success
21
+ rescue *fatal_exception_classes => e
22
+ Utility::ExceptionTracking.augment_exception(e)
23
+ Utility::Logger.error("Encountered a fall-through error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
24
+ raise
25
+ rescue StandardError => e
26
+ Utility::ExceptionTracking.augment_exception(e)
27
+ Utility::Logger.warn("Encountered error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
28
+ @error_monitor.note_error(e, :id => e.id)
29
+ end
30
+
31
+ private
32
+
33
+ def identifying_error_message(identifier)
34
+ identifier.present? ? " of '#{identifier}'" : ''
35
+ end
36
+
37
+ def fatal_exception_classes
38
+ [
39
+ Utility::ErrorMonitor::MonitoringError
40
+ ]
41
+ end
42
+ end
43
+ end
@@ -24,8 +24,10 @@ module Core
24
24
  return
25
25
  end
26
26
  configuration = connector_class.configurable_fields_indifferent_access
27
+ features = connector_class.kibana_features.each_with_object({}) { |feature, hsh| hsh[feature] = true }
27
28
  doc = {
28
- :configuration => configuration
29
+ :configuration => configuration,
30
+ :features => features
29
31
  }
30
32
 
31
33
  doc[:service_type] = service_type if service_type && connector_settings.needs_service_type?