connectors_service 8.6.0.4 → 8.7.0.0.pre.20221117T004928Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +9 -8
  3. data/lib/app/app.rb +4 -0
  4. data/lib/app/config.rb +3 -0
  5. data/lib/app/dispatcher.rb +44 -17
  6. data/lib/app/preflight_check.rb +11 -0
  7. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
  8. data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
  9. data/lib/connectors/base/connector.rb +43 -14
  10. data/lib/connectors/base/simple_rules_parser.rb +42 -0
  11. data/lib/connectors/example/connector.rb +6 -0
  12. data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
  13. data/lib/connectors/gitlab/connector.rb +6 -1
  14. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
  15. data/lib/connectors/mongodb/connector.rb +47 -43
  16. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
  17. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
  18. data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
  19. data/lib/connectors/sync_status.rb +6 -1
  20. data/lib/connectors/tolerable_error_helper.rb +43 -0
  21. data/lib/connectors_app/// +13 -0
  22. data/lib/core/configuration.rb +3 -1
  23. data/lib/core/connector_job.rb +210 -0
  24. data/lib/core/connector_settings.rb +52 -16
  25. data/lib/core/elastic_connector_actions.rb +320 -59
  26. data/lib/core/filtering/post_process_engine.rb +39 -0
  27. data/lib/core/filtering/post_process_result.rb +27 -0
  28. data/lib/core/filtering/simple_rule.rb +141 -0
  29. data/lib/core/filtering/validation_job_runner.rb +53 -0
  30. data/lib/core/filtering/validation_status.rb +17 -0
  31. data/lib/core/filtering.rb +17 -0
  32. data/lib/core/ingestion/es_sink.rb +118 -0
  33. data/lib/core/{output_sink.rb → ingestion.rb} +1 -5
  34. data/lib/core/jobs/consumer.rb +132 -0
  35. data/lib/core/jobs/producer.rb +26 -0
  36. data/lib/core/scheduler.rb +40 -10
  37. data/lib/core/single_scheduler.rb +1 -1
  38. data/lib/core/sync_job_runner.rb +80 -16
  39. data/lib/core.rb +4 -0
  40. data/lib/utility/bulk_queue.rb +87 -0
  41. data/lib/utility/constants.rb +7 -0
  42. data/lib/utility/error_monitor.rb +108 -0
  43. data/lib/utility/errors.rb +0 -12
  44. data/lib/utility/filtering.rb +22 -0
  45. data/lib/utility/logger.rb +1 -1
  46. data/lib/utility.rb +11 -4
  47. metadata +31 -12
  48. data/lib/core/output_sink/base_sink.rb +0 -33
  49. data/lib/core/output_sink/combined_sink.rb +0 -38
  50. data/lib/core/output_sink/console_sink.rb +0 -51
  51. data/lib/core/output_sink/es_sink.rb +0 -74
@@ -7,6 +7,9 @@
7
7
  # frozen_string_literal: true
8
8
 
9
9
  require 'connectors/base/connector'
10
+ require 'core/filtering/validation_status'
11
+ require 'connectors/mongodb/mongo_rules_parser'
12
+ require 'connectors/mongodb/mongo_advanced_snippet_against_schema_validator'
10
13
  require 'mongo'
11
14
  require 'utility'
12
15
 
@@ -28,27 +31,31 @@ module Connectors
28
31
 
29
32
  def self.configurable_fields
30
33
  {
31
- :host => {
32
- :label => 'Server Hostname'
33
- },
34
- :user => {
35
- :label => 'Username'
36
- },
37
- :password => {
38
- :label => 'Password'
39
- },
40
- :database => {
41
- :label => 'Database'
42
- },
43
- :collection => {
44
- :label => 'Collection'
45
- },
46
- :direct_connection => {
47
- :label => 'Direct connection? (true/false)'
48
- }
34
+ :host => {
35
+ :label => 'Server Hostname'
36
+ },
37
+ :user => {
38
+ :label => 'Username'
39
+ },
40
+ :password => {
41
+ :label => 'Password'
42
+ },
43
+ :database => {
44
+ :label => 'Database'
45
+ },
46
+ :collection => {
47
+ :label => 'Collection'
48
+ },
49
+ :direct_connection => {
50
+ :label => 'Direct connection? (true/false)'
51
+ }
49
52
  }
50
53
  end
51
54
 
55
+ def self.advanced_snippet_validator
56
+ MongoAdvancedSnippetAgainstSchemaValidator
57
+ end
58
+
52
59
  def initialize(configuration: {}, job_description: {})
53
60
  super
54
61
 
@@ -61,8 +68,6 @@ module Connectors
61
68
  end
62
69
 
63
70
  def yield_documents
64
- check_filtering
65
-
66
71
  with_client do |client|
67
72
  # We do paging using skip().limit() here to make Ruby recycle the memory for each page pulled from the server after it's not needed any more.
68
73
  # This gives us more control on the usage of the memory (we can adjust PAGE_SIZE constant for that to decrease max memory consumption).
@@ -89,15 +94,15 @@ module Connectors
89
94
  loop do
90
95
  found_in_page = 0
91
96
 
97
+ Utility::Logger.info("Requesting #{PAGE_SIZE} documents from MongoDB (Starting at #{skip})")
92
98
  view = cursor.skip(skip).limit(PAGE_SIZE)
93
99
  view.each do |document|
94
- yield serialize(document)
95
-
96
- found_in_page += 1
97
- found_overall += 1
98
-
99
- overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
100
-
100
+ yield_with_handling_tolerable_errors do
101
+ yield serialize(document)
102
+ found_in_page += 1
103
+ found_overall += 1
104
+ overall_limit_reached = found_overall >= overall_limit && overall_limit != Float::INFINITY
105
+ end
101
106
  break if overall_limit_reached
102
107
  end
103
108
 
@@ -117,20 +122,9 @@ module Connectors
117
122
 
118
123
  return create_aggregate_cursor(collection) if @advanced_filter_config[:aggregate].present?
119
124
 
120
- collection.find
121
- end
122
-
123
- def check_filtering
124
- return unless filtering_present?
125
+ return create_simple_rules_cursor(collection) if @rules.present?
125
126
 
126
- check_find_and_aggregate
127
- end
128
-
129
- def check_find_and_aggregate
130
- if @advanced_filter_config.keys.size != 1
131
- invalid_keys_msg = "Only one of #{ALLOWED_TOP_LEVEL_FILTER_KEYS} is allowed in the filtering object. Keys present: '#{@advanced_filter_config.keys}'."
132
- raise Utility::InvalidFilterConfigError.new(invalid_keys_msg)
133
- end
127
+ collection.find
134
128
  end
135
129
 
136
130
  def create_aggregate_cursor(collection)
@@ -159,6 +153,16 @@ module Connectors
159
153
  [collection.find(filter, options), options]
160
154
  end
161
155
 
156
+ def create_simple_rules_cursor(collection)
157
+ filter = {}
158
+ if @rules.present?
159
+ parser = MongoRulesParser.new(@rules)
160
+ filter = parser.parse
161
+ end
162
+ Utility::Logger.info("Filtering with simple rules filter: #{filter}")
163
+ filter.present? ? collection.find(filter) : collection.find
164
+ end
165
+
162
166
  def extract_options(mongodb_function)
163
167
  mongodb_function[:options].present? ? mongodb_function[:options] : {}
164
168
  end
@@ -173,9 +177,9 @@ module Connectors
173
177
  raise "Invalid value for 'Direct connection' : #{@direct_connection}." unless %w[true false].include?(@direct_connection.to_s.strip.downcase)
174
178
 
175
179
  args = {
176
- database: @database,
177
- direct_connection: to_boolean(@direct_connection)
178
- }
180
+ database: @database,
181
+ direct_connection: to_boolean(@direct_connection)
182
+ }
179
183
 
180
184
  if @user.present? || @password.present?
181
185
  args[:user] = @user
@@ -0,0 +1,22 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'connectors/base/advanced_snippet_against_schema_validator'
10
+ require 'connectors/mongodb/mongo_advanced_snippet_schema'
11
+
12
+ module Connectors
13
+ module MongoDB
14
+ class MongoAdvancedSnippetAgainstSchemaValidator < Connectors::Base::AdvancedSnippetAgainstSchemaValidator
15
+
16
+ def initialize(advanced_snippet, schema = Connectors::MongoDB::AdvancedSnippet::SCHEMA)
17
+ super
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,292 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ module Connectors
10
+ module MongoDB
11
+ module AdvancedSnippet
12
+ # Pipeline stages: https://www.mongodb.com/docs/manual/reference/operator/aggregation-pipeline/
13
+ ALLOWED_PIPELINE_STAGES = %w[
14
+ $addFields $bucket $bucketAuto $changeStream $collStats $count $densify
15
+ $documents $facet $fill $geoNear $graphLookup $group $indexStats $limit
16
+ $listSessions $lookup $match $merge $out $planCacheStats $project $redact
17
+ $replaceRoot $replaceWith $sample $search $searchMeta $set $setWindowFields
18
+ $skip $sort $sortByCount $unionWith $unset $unwind
19
+ ]
20
+
21
+ # All except the $out, $merge, $geoNear, and $changeStream stages can appear multiple times in a pipeline.
22
+ # Source: https://www.mongodb.com/docs/manual/reference/operator/aggregation-pipeline/
23
+ PIPELINE_STAGES_ALLOWED_ONCE = %w[$out $merge $geoNear $changeStream]
24
+
25
+ NON_NEGATIVE_INTEGER = ->(value) { value.is_a?(Integer) && value >= 0 }
26
+ READ_CONCERN_LEVEL = ->(level) { %w[local available majority linearizable].include?(level) }
27
+ STRING_OR_DOCUMENT = ->(value) { value.is_a?(Hash) || value.is_a?(String) }
28
+ MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.size <= 1 }
29
+
30
+ AGGREGATION_PIPELINE = lambda { |pipeline|
31
+ return false unless pipeline.is_a?(Array)
32
+
33
+ allowed_once_appearances = Set.new
34
+
35
+ pipeline.flat_map(&:keys).each do |key|
36
+ return false unless ALLOWED_PIPELINE_STAGES.include?(key)
37
+
38
+ if PIPELINE_STAGES_ALLOWED_ONCE.include?(key)
39
+ return false if allowed_once_appearances.include?(key)
40
+
41
+ allowed_once_appearances.add(key)
42
+ end
43
+ end
44
+
45
+ true
46
+ }
47
+
48
+ # Ruby has no 'Boolean' class
49
+ BOOLEAN = ->(value) { value.is_a?(TrueClass) || value.is_a?(FalseClass) }
50
+
51
+ COLLATION = {
52
+ :name => 'collation',
53
+ :type => Hash,
54
+ :optional => true,
55
+ :fields => [
56
+ {
57
+ :name => 'locale',
58
+ :type => String,
59
+ :optional => true
60
+ },
61
+ {
62
+ :name => 'caseLevel',
63
+ :type => BOOLEAN,
64
+ :optional => true
65
+ },
66
+ {
67
+ :name => 'caseFirst',
68
+ :type => String,
69
+ :optional => true
70
+ },
71
+ {
72
+ :name => 'strength',
73
+ :type => Integer,
74
+ :optional => true
75
+ },
76
+ {
77
+ :name => 'numericOrdering',
78
+ :type => BOOLEAN,
79
+ :optional => true
80
+ },
81
+ {
82
+ :name => 'alternate',
83
+ :type => String,
84
+ :optional => true
85
+ },
86
+ {
87
+ :name => 'maxVariable',
88
+ :type => String,
89
+ :optional => true
90
+ },
91
+ {
92
+ :name => 'backwards',
93
+ :type => BOOLEAN,
94
+ :optional => true
95
+ },
96
+ ]
97
+ }
98
+
99
+ CURSOR_TYPE = ->(cursor) { [:tailable, :tailable_await].include?(cursor) }
100
+
101
+ # Aggregate options: https://www.mongodb.com/docs/manual/reference/method/db.collection.aggregate/
102
+ AGGREGATE_OPTIONS = {
103
+ :name => 'options',
104
+ :type => Hash,
105
+ :optional => true,
106
+ :fields => [
107
+ {
108
+ :name => 'explain',
109
+ :type => BOOLEAN,
110
+ :optional => true
111
+ },
112
+ {
113
+ :name => 'allowDiskUse',
114
+ :type => BOOLEAN,
115
+ :optional => true
116
+ },
117
+ {
118
+ :name => 'cursor',
119
+ :type => Hash,
120
+ :optional => true,
121
+ :fields => [
122
+ {
123
+ :name => 'batchSize',
124
+ :type => NON_NEGATIVE_INTEGER
125
+ }
126
+ ]
127
+ },
128
+ {
129
+ :name => 'maxTimeMS',
130
+ :type => NON_NEGATIVE_INTEGER,
131
+ :optional => true
132
+ },
133
+ {
134
+ :name => 'bypassDocumentValidation',
135
+ :type => BOOLEAN,
136
+ :optional => true
137
+ },
138
+ {
139
+ :name => 'readConcern',
140
+ :type => Hash,
141
+ :optional => true,
142
+ :fields => [
143
+ {
144
+ :name => 'level',
145
+ :type => READ_CONCERN_LEVEL
146
+ }
147
+ ]
148
+ },
149
+ COLLATION,
150
+ {
151
+ :name => 'hint',
152
+ :type => STRING_OR_DOCUMENT,
153
+ :optional => true
154
+ },
155
+ {
156
+ :name => 'comment',
157
+ :type => String,
158
+ :optional => true
159
+ },
160
+ {
161
+ :name => 'writeConcern',
162
+ :type => Hash,
163
+ :optional => true
164
+ },
165
+ {
166
+ :name => 'let',
167
+ :type => Hash,
168
+ :optional => true
169
+ }
170
+ ]
171
+ }
172
+
173
+ AGGREGATE_PIPELINE = {
174
+ :name => 'pipeline',
175
+ :type => AGGREGATION_PIPELINE,
176
+ :optional => true,
177
+ }
178
+
179
+ AGGREGATE = {
180
+ :name => 'aggregate',
181
+ :type => Hash,
182
+ :optional => true,
183
+ :fields => [
184
+ AGGREGATE_PIPELINE,
185
+ AGGREGATE_OPTIONS
186
+ ]
187
+ }
188
+
189
+ FIND_OPTIONS = {
190
+ :name => 'options',
191
+ :type => Hash,
192
+ :optional => true,
193
+ :fields => [
194
+ {
195
+ :name => 'allowDiskUse',
196
+ :type => BOOLEAN,
197
+ :optional => true
198
+ },
199
+ {
200
+ :name => 'allowPartialResults',
201
+ :type => BOOLEAN,
202
+ :optional => true
203
+ },
204
+ {
205
+ :name => 'batchSize',
206
+ :type => NON_NEGATIVE_INTEGER,
207
+ :optional => true
208
+ },
209
+ COLLATION,
210
+ {
211
+ :name => 'cursorType',
212
+ :type => CURSOR_TYPE,
213
+ :optional => true
214
+ },
215
+ {
216
+ :name => 'limit',
217
+ :type => NON_NEGATIVE_INTEGER,
218
+ :optional => true
219
+ },
220
+ {
221
+ :name => 'maxTimeMS',
222
+ :type => NON_NEGATIVE_INTEGER,
223
+ :optional => true
224
+ },
225
+ {
226
+ :name => 'modifiers',
227
+ :type => Hash,
228
+ :optional => true
229
+ },
230
+ {
231
+ :name => 'noCursorTimeout',
232
+ :type => BOOLEAN,
233
+ :optional => true
234
+ },
235
+ {
236
+ :name => 'oplogReplay',
237
+ :type => BOOLEAN,
238
+ :optional => true
239
+ },
240
+ {
241
+ :name => 'projection',
242
+ :type => Hash,
243
+ :optional => true
244
+ },
245
+ {
246
+ :name => 'skip',
247
+ :type => NON_NEGATIVE_INTEGER,
248
+ :optional => true
249
+ },
250
+ {
251
+ :name => 'sort',
252
+ :type => Hash,
253
+ :optional => true
254
+ },
255
+ {
256
+ :name => 'let',
257
+ :type => Hash,
258
+ :optional => true
259
+ }
260
+ ]
261
+ }
262
+
263
+ # TODO: return true for now. Will be more involved (basically needs full query parsing or "dummy" execution against a running instance)
264
+ FILTER = ->(_filter) { true }
265
+
266
+ FIND_FILTER = {
267
+ :name => 'filter',
268
+ :type => FILTER
269
+ }
270
+
271
+ FIND = {
272
+ :name => 'find',
273
+ :type => Hash,
274
+ :optional => true,
275
+ :fields => [
276
+ FIND_OPTIONS,
277
+ FIND_FILTER
278
+ ]
279
+ }
280
+
281
+ SCHEMA = {
282
+ :fields => {
283
+ :constraints => MUTUAL_EXCLUSIVE_FILTER,
284
+ :values => [
285
+ AGGREGATE,
286
+ FIND
287
+ ]
288
+ }
289
+ }
290
+ end
291
+ end
292
+ end
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ # frozen_string_literal: true
8
+
9
+ require 'active_support/core_ext/object'
10
+ require 'connectors/base/simple_rules_parser'
11
+ require 'core/filtering/simple_rule'
12
+
13
+ module Connectors
14
+ module MongoDB
15
+ class MongoRulesParser < Connectors::Base::SimpleRulesParser
16
+ def parse_rule(rule)
17
+ field = rule.field
18
+ value = rule.value
19
+ unless value.present?
20
+ raise "value is required for field: #{field}"
21
+ end
22
+ unless field.present?
23
+ raise "field is required for rule: #{rule}"
24
+ end
25
+ op = rule.rule
26
+ case op
27
+ when Core::Filtering::SimpleRule::Rule::EQUALS
28
+ parse_equals(rule)
29
+ when Core::Filtering::SimpleRule::Rule::GREATER_THAN
30
+ parse_greater_than(rule)
31
+ when Core::Filtering::SimpleRule::Rule::LESS_THAN
32
+ parse_less_than(rule)
33
+ when Core::Filtering::SimpleRule::Rule::REGEX
34
+ parse_regex(rule)
35
+ else
36
+ raise "Unknown operator: #{op}"
37
+ end
38
+ end
39
+
40
+ def merge_rules(rules)
41
+ return {} if rules.empty?
42
+ return rules[0] if rules.size == 1
43
+ { '$and' => rules }
44
+ end
45
+
46
+ private
47
+
48
+ def parse_equals(rule)
49
+ if rule.is_include?
50
+ { rule.field => rule.value }
51
+ else
52
+ { rule.field => { '$ne' => rule.value } }
53
+ end
54
+ end
55
+
56
+ def parse_greater_than(rule)
57
+ if rule.is_include?
58
+ { rule.field => { '$gt' => rule.value } }
59
+ else
60
+ { rule.field => { '$lte' => rule.value } }
61
+ end
62
+ end
63
+
64
+ def parse_less_than(rule)
65
+ if rule.is_include?
66
+ { rule.field => { '$lt' => rule.value } }
67
+ else
68
+ { rule.field => { '$gte' => rule.value } }
69
+ end
70
+ end
71
+
72
+ def parse_regex(rule)
73
+ if rule.is_include?
74
+ { rule.field => /#{rule.value}/ }
75
+ else
76
+ { rule.field => { '$not' => /#{rule.value}/ } }
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -26,11 +26,16 @@ module Connectors
26
26
  ERROR
27
27
  ]
28
28
 
29
- PENDING_STATUES = [
29
+ PENDING_STATUSES = [
30
30
  PENDING,
31
31
  SUSPENDED
32
32
  ]
33
33
 
34
+ ACTIVE_STATUSES = [
35
+ IN_PROGRESS,
36
+ CANCELING
37
+ ]
38
+
34
39
  TERMINAL_STATUSES = [
35
40
  CANCELED,
36
41
  COMPLETED,
@@ -0,0 +1,43 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ require 'utility/logger'
8
+ require 'utility/exception_tracking'
9
+ require 'utility/error_monitor'
10
+
11
+ module Connectors
12
+ class TolerableErrorHelper
13
+ def initialize(error_monitor)
14
+ @error_monitor = error_monitor
15
+ end
16
+
17
+ def yield_single_document(identifier: nil)
18
+ Utility::Logger.debug("Extracting single document for #{identifier}") if identifier
19
+ yield
20
+ @error_monitor.note_success
21
+ rescue *fatal_exception_classes => e
22
+ Utility::ExceptionTracking.augment_exception(e)
23
+ Utility::Logger.error("Encountered a fall-through error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
24
+ raise
25
+ rescue StandardError => e
26
+ Utility::ExceptionTracking.augment_exception(e)
27
+ Utility::Logger.warn("Encountered error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
28
+ @error_monitor.note_error(e, :id => e.id)
29
+ end
30
+
31
+ private
32
+
33
+ def identifying_error_message(identifier)
34
+ identifier.present? ? " of '#{identifier}'" : ''
35
+ end
36
+
37
+ def fatal_exception_classes
38
+ [
39
+ Utility::ErrorMonitor::MonitoringError
40
+ ]
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,13 @@
1
+ #
2
+ # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
+ # or more contributor license agreements. Licensed under the Elastic License;
4
+ # you may not use this file except in compliance with the Elastic License.
5
+ #
6
+
7
+ module ConnectorsApp
8
+ module Errors
9
+ INVALID_API_KEY = 'INVALID_API_KEY'
10
+ UNSUPPORTED_AUTH_SCHEME = 'UNSUPPORTED_AUTH_SCHEME'
11
+ INTERNAL_SERVER_ERROR = 'INTERNAL_SERVER_ERROR'
12
+ end
13
+ end
@@ -24,8 +24,10 @@ module Core
24
24
  return
25
25
  end
26
26
  configuration = connector_class.configurable_fields_indifferent_access
27
+ features = connector_class.kibana_features.each_with_object({}) { |feature, hsh| hsh[feature] = true }
27
28
  doc = {
28
- :configuration => configuration
29
+ :configuration => configuration,
30
+ :features => features
29
31
  }
30
32
 
31
33
  doc[:service_type] = service_type if service_type && connector_settings.needs_service_type?