connectors_service 8.6.0.4.pre.20221116T024501Z → 8.6.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/config/connectors.yml +6 -6
  3. data/lib/app/app.rb +0 -4
  4. data/lib/app/dispatcher.rb +17 -42
  5. data/lib/app/preflight_check.rb +0 -11
  6. data/lib/connectors/base/connector.rb +14 -43
  7. data/lib/connectors/example/connector.rb +0 -6
  8. data/lib/connectors/gitlab/connector.rb +1 -6
  9. data/lib/connectors/mongodb/connector.rb +43 -47
  10. data/lib/connectors/sync_status.rb +1 -6
  11. data/lib/core/configuration.rb +1 -3
  12. data/lib/core/connector_settings.rb +16 -52
  13. data/lib/core/elastic_connector_actions.rb +59 -320
  14. data/lib/core/output_sink/base_sink.rb +33 -0
  15. data/lib/core/output_sink/combined_sink.rb +38 -0
  16. data/lib/core/output_sink/console_sink.rb +51 -0
  17. data/lib/core/output_sink/es_sink.rb +74 -0
  18. data/lib/core/{ingestion.rb → output_sink.rb} +5 -1
  19. data/lib/core/scheduler.rb +10 -40
  20. data/lib/core/single_scheduler.rb +1 -1
  21. data/lib/core/sync_job_runner.rb +16 -72
  22. data/lib/core.rb +0 -4
  23. data/lib/utility/constants.rb +0 -2
  24. data/lib/utility/errors.rb +12 -0
  25. data/lib/utility/logger.rb +1 -1
  26. data/lib/utility.rb +4 -11
  27. metadata +9 -27
  28. data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +0 -173
  29. data/lib/connectors/base/advanced_snippet_validator.rb +0 -34
  30. data/lib/connectors/base/simple_rules_parser.rb +0 -42
  31. data/lib/connectors/example/example_advanced_snippet_validator.rb +0 -35
  32. data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +0 -35
  33. data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +0 -22
  34. data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +0 -292
  35. data/lib/connectors/mongodb/mongo_rules_parser.rb +0 -81
  36. data/lib/connectors/tolerable_error_helper.rb +0 -43
  37. data/lib/core/connector_job.rb +0 -210
  38. data/lib/core/filtering/post_process_engine.rb +0 -39
  39. data/lib/core/filtering/post_process_result.rb +0 -27
  40. data/lib/core/filtering/simple_rule.rb +0 -141
  41. data/lib/core/filtering/validation_job_runner.rb +0 -53
  42. data/lib/core/filtering/validation_status.rb +0 -17
  43. data/lib/core/filtering.rb +0 -17
  44. data/lib/core/ingestion/es_sink.rb +0 -118
  45. data/lib/core/jobs/consumer.rb +0 -114
  46. data/lib/core/jobs/producer.rb +0 -26
  47. data/lib/utility/bulk_queue.rb +0 -85
  48. data/lib/utility/error_monitor.rb +0 -108
  49. data/lib/utility/filtering.rb +0 -22
@@ -1,42 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
- # frozen_string_literal: true
7
-
8
- require 'active_support/core_ext/hash/indifferent_access'
9
- require 'active_support/core_ext/object/blank'
10
- require 'core/filtering/simple_rule'
11
-
12
- module Connectors
13
- module Base
14
- class SimpleRulesParser
15
- def initialize(rules)
16
- @rules = (rules || []).map(&:with_indifferent_access).filter { |r| r[:id] != 'DEFAULT' }.sort_by { |r| r[:order] }
17
- end
18
-
19
- def parse
20
- merge_rules(@rules.map do |rule_hash|
21
- rule = Core::Filtering::SimpleRule.new(rule_hash)
22
- unless rule.is_include? || rule.is_exclude?
23
- raise "Unknown policy: #{rule.policy}"
24
- end
25
- parse_rule(rule)
26
- end)
27
- end
28
-
29
- private
30
-
31
- # merge all rules into a filter object or array
32
- # in a base case, does no transformations
33
- def merge_rules(rules)
34
- rules || []
35
- end
36
-
37
- def parse_rule(_rule)
38
- raise 'Not implemented'
39
- end
40
- end
41
- end
42
- end
@@ -1,35 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'connectors/base/advanced_snippet_validator'
10
-
11
- module Connectors
12
- module Example
13
- class ExampleAdvancedSnippetValidator < Connectors::Base::AdvancedSnippetValidator
14
-
15
- def is_snippet_valid?
16
- # TODO: real filtering validation will follow later
17
- errors = [
18
- {
19
- :ids => ['missing-implementation'],
20
- :messages => ['Filtering is not implemented yet for the example connector']
21
- }
22
- ]
23
-
24
- validation_result = if @advanced_snippet.present? && !@advanced_snippet.empty?
25
- { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors }
26
- else
27
- { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
28
- end
29
- log_validation_result(validation_result)
30
- validation_result
31
- end
32
-
33
- end
34
- end
35
- end
@@ -1,35 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'connectors/base/advanced_snippet_validator'
10
-
11
- module Connectors
12
- module GitLab
13
- class GitLabAdvancedSnippetValidator < Connectors::Base::AdvancedSnippetValidator
14
-
15
- def is_snippet_valid?
16
- # TODO: real filtering validation will follow later
17
- errors = [
18
- {
19
- :ids => ['missing-implementation'],
20
- :messages => ['Filtering is not implemented yet for the GitLab connector']
21
- }
22
- ]
23
-
24
- validation_result = if @advanced_snippet.present? && !@advanced_snippet.empty?
25
- { :state => Core::Filtering::ValidationStatus::INVALID, :errors => errors }
26
- else
27
- { :state => Core::Filtering::ValidationStatus::VALID, :errors => [] }
28
- end
29
- log_validation_result(validation_result)
30
- validation_result
31
- end
32
-
33
- end
34
- end
35
- end
@@ -1,22 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'connectors/base/advanced_snippet_against_schema_validator'
10
- require 'connectors/mongodb/mongo_advanced_snippet_schema'
11
-
12
- module Connectors
13
- module MongoDB
14
- class MongoAdvancedSnippetAgainstSchemaValidator < Connectors::Base::AdvancedSnippetAgainstSchemaValidator
15
-
16
- def initialize(advanced_snippet, schema = Connectors::MongoDB::AdvancedSnippet::SCHEMA)
17
- super
18
- end
19
-
20
- end
21
- end
22
- end
@@ -1,292 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- module Connectors
10
- module MongoDB
11
- module AdvancedSnippet
12
- # Pipeline stages: https://www.mongodb.com/docs/manual/reference/operator/aggregation-pipeline/
13
- ALLOWED_PIPELINE_STAGES = %w[
14
- $addFields $bucket $bucketAuto $changeStream $collStats $count $densify
15
- $documents $facet $fill $geoNear $graphLookup $group $indexStats $limit
16
- $listSessions $lookup $match $merge $out $planCacheStats $project $redact
17
- $replaceRoot $replaceWith $sample $search $searchMeta $set $setWindowFields
18
- $skip $sort $sortByCount $unionWith $unset $unwind
19
- ]
20
-
21
- # All except the $out, $merge, $geoNear, and $changeStream stages can appear multiple times in a pipeline.
22
- # Source: https://www.mongodb.com/docs/manual/reference/operator/aggregation-pipeline/
23
- PIPELINE_STAGES_ALLOWED_ONCE = %w[$out $merge $geoNear $changeStream]
24
-
25
- NON_NEGATIVE_INTEGER = ->(value) { value.is_a?(Integer) && value >= 0 }
26
- READ_CONCERN_LEVEL = ->(level) { %w[local available majority linearizable].include?(level) }
27
- STRING_OR_DOCUMENT = ->(value) { value.is_a?(Hash) || value.is_a?(String) }
28
- MUTUAL_EXCLUSIVE_FILTER = ->(fields) { fields.size <= 1 }
29
-
30
- AGGREGATION_PIPELINE = lambda { |pipeline|
31
- return false unless pipeline.is_a?(Array)
32
-
33
- allowed_once_appearances = Set.new
34
-
35
- pipeline.flat_map(&:keys).each do |key|
36
- return false unless ALLOWED_PIPELINE_STAGES.include?(key)
37
-
38
- if PIPELINE_STAGES_ALLOWED_ONCE.include?(key)
39
- return false if allowed_once_appearances.include?(key)
40
-
41
- allowed_once_appearances.add(key)
42
- end
43
- end
44
-
45
- true
46
- }
47
-
48
- # Ruby has no 'Boolean' class
49
- BOOLEAN = ->(value) { value.is_a?(TrueClass) || value.is_a?(FalseClass) }
50
-
51
- COLLATION = {
52
- :name => 'collation',
53
- :type => Hash,
54
- :optional => true,
55
- :fields => [
56
- {
57
- :name => 'locale',
58
- :type => String,
59
- :optional => true
60
- },
61
- {
62
- :name => 'caseLevel',
63
- :type => BOOLEAN,
64
- :optional => true
65
- },
66
- {
67
- :name => 'caseFirst',
68
- :type => String,
69
- :optional => true
70
- },
71
- {
72
- :name => 'strength',
73
- :type => Integer,
74
- :optional => true
75
- },
76
- {
77
- :name => 'numericOrdering',
78
- :type => BOOLEAN,
79
- :optional => true
80
- },
81
- {
82
- :name => 'alternate',
83
- :type => String,
84
- :optional => true
85
- },
86
- {
87
- :name => 'maxVariable',
88
- :type => String,
89
- :optional => true
90
- },
91
- {
92
- :name => 'backwards',
93
- :type => BOOLEAN,
94
- :optional => true
95
- },
96
- ]
97
- }
98
-
99
- CURSOR_TYPE = ->(cursor) { [:tailable, :tailable_await].include?(cursor) }
100
-
101
- # Aggregate options: https://www.mongodb.com/docs/manual/reference/method/db.collection.aggregate/
102
- AGGREGATE_OPTIONS = {
103
- :name => 'options',
104
- :type => Hash,
105
- :optional => true,
106
- :fields => [
107
- {
108
- :name => 'explain',
109
- :type => BOOLEAN,
110
- :optional => true
111
- },
112
- {
113
- :name => 'allowDiskUse',
114
- :type => BOOLEAN,
115
- :optional => true
116
- },
117
- {
118
- :name => 'cursor',
119
- :type => Hash,
120
- :optional => true,
121
- :fields => [
122
- {
123
- :name => 'batchSize',
124
- :type => NON_NEGATIVE_INTEGER
125
- }
126
- ]
127
- },
128
- {
129
- :name => 'maxTimeMS',
130
- :type => NON_NEGATIVE_INTEGER,
131
- :optional => true
132
- },
133
- {
134
- :name => 'bypassDocumentValidation',
135
- :type => BOOLEAN,
136
- :optional => true
137
- },
138
- {
139
- :name => 'readConcern',
140
- :type => Hash,
141
- :optional => true,
142
- :fields => [
143
- {
144
- :name => 'level',
145
- :type => READ_CONCERN_LEVEL
146
- }
147
- ]
148
- },
149
- COLLATION,
150
- {
151
- :name => 'hint',
152
- :type => STRING_OR_DOCUMENT,
153
- :optional => true
154
- },
155
- {
156
- :name => 'comment',
157
- :type => String,
158
- :optional => true
159
- },
160
- {
161
- :name => 'writeConcern',
162
- :type => Hash,
163
- :optional => true
164
- },
165
- {
166
- :name => 'let',
167
- :type => Hash,
168
- :optional => true
169
- }
170
- ]
171
- }
172
-
173
- AGGREGATE_PIPELINE = {
174
- :name => 'pipeline',
175
- :type => AGGREGATION_PIPELINE,
176
- :optional => true,
177
- }
178
-
179
- AGGREGATE = {
180
- :name => 'aggregate',
181
- :type => Hash,
182
- :optional => true,
183
- :fields => [
184
- AGGREGATE_PIPELINE,
185
- AGGREGATE_OPTIONS
186
- ]
187
- }
188
-
189
- FIND_OPTIONS = {
190
- :name => 'options',
191
- :type => Hash,
192
- :optional => true,
193
- :fields => [
194
- {
195
- :name => 'allowDiskUse',
196
- :type => BOOLEAN,
197
- :optional => true
198
- },
199
- {
200
- :name => 'allowPartialResults',
201
- :type => BOOLEAN,
202
- :optional => true
203
- },
204
- {
205
- :name => 'batchSize',
206
- :type => NON_NEGATIVE_INTEGER,
207
- :optional => true
208
- },
209
- COLLATION,
210
- {
211
- :name => 'cursorType',
212
- :type => CURSOR_TYPE,
213
- :optional => true
214
- },
215
- {
216
- :name => 'limit',
217
- :type => NON_NEGATIVE_INTEGER,
218
- :optional => true
219
- },
220
- {
221
- :name => 'maxTimeMS',
222
- :type => NON_NEGATIVE_INTEGER,
223
- :optional => true
224
- },
225
- {
226
- :name => 'modifiers',
227
- :type => Hash,
228
- :optional => true
229
- },
230
- {
231
- :name => 'noCursorTimeout',
232
- :type => BOOLEAN,
233
- :optional => true
234
- },
235
- {
236
- :name => 'oplogReplay',
237
- :type => BOOLEAN,
238
- :optional => true
239
- },
240
- {
241
- :name => 'projection',
242
- :type => Hash,
243
- :optional => true
244
- },
245
- {
246
- :name => 'skip',
247
- :type => NON_NEGATIVE_INTEGER,
248
- :optional => true
249
- },
250
- {
251
- :name => 'sort',
252
- :type => Hash,
253
- :optional => true
254
- },
255
- {
256
- :name => 'let',
257
- :type => Hash,
258
- :optional => true
259
- }
260
- ]
261
- }
262
-
263
- # TODO: return true for now. Will be more involved (basically needs full query parsing or "dummy" execution against a running instance)
264
- FILTER = ->(_filter) { true }
265
-
266
- FIND_FILTER = {
267
- :name => 'filter',
268
- :type => FILTER
269
- }
270
-
271
- FIND = {
272
- :name => 'find',
273
- :type => Hash,
274
- :optional => true,
275
- :fields => [
276
- FIND_OPTIONS,
277
- FIND_FILTER
278
- ]
279
- }
280
-
281
- SCHEMA = {
282
- :fields => {
283
- :constraints => MUTUAL_EXCLUSIVE_FILTER,
284
- :values => [
285
- AGGREGATE,
286
- FIND
287
- ]
288
- }
289
- }
290
- end
291
- end
292
- end
@@ -1,81 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- # frozen_string_literal: true
8
-
9
- require 'active_support/core_ext/object'
10
- require 'connectors/base/simple_rules_parser'
11
- require 'core/filtering/simple_rule'
12
-
13
- module Connectors
14
- module MongoDB
15
- class MongoRulesParser < Connectors::Base::SimpleRulesParser
16
- def parse_rule(rule)
17
- field = rule.field
18
- value = rule.value
19
- unless value.present?
20
- raise "value is required for field: #{field}"
21
- end
22
- unless field.present?
23
- raise "field is required for rule: #{rule}"
24
- end
25
- op = rule.rule
26
- case op
27
- when Core::Filtering::SimpleRule::Rule::EQUALS
28
- parse_equals(rule)
29
- when Core::Filtering::SimpleRule::Rule::GREATER_THAN
30
- parse_greater_than(rule)
31
- when Core::Filtering::SimpleRule::Rule::LESS_THAN
32
- parse_less_than(rule)
33
- when Core::Filtering::SimpleRule::Rule::REGEX
34
- parse_regex(rule)
35
- else
36
- raise "Unknown operator: #{op}"
37
- end
38
- end
39
-
40
- def merge_rules(rules)
41
- return {} if rules.empty?
42
- return rules[0] if rules.size == 1
43
- { '$and' => rules }
44
- end
45
-
46
- private
47
-
48
- def parse_equals(rule)
49
- if rule.is_include?
50
- { rule.field => rule.value }
51
- else
52
- { rule.field => { '$ne' => rule.value } }
53
- end
54
- end
55
-
56
- def parse_greater_than(rule)
57
- if rule.is_include?
58
- { rule.field => { '$gt' => rule.value } }
59
- else
60
- { rule.field => { '$lte' => rule.value } }
61
- end
62
- end
63
-
64
- def parse_less_than(rule)
65
- if rule.is_include?
66
- { rule.field => { '$lt' => rule.value } }
67
- else
68
- { rule.field => { '$gte' => rule.value } }
69
- end
70
- end
71
-
72
- def parse_regex(rule)
73
- if rule.is_include?
74
- { rule.field => /#{rule.value}/ }
75
- else
76
- { rule.field => { '$not' => /#{rule.value}/ } }
77
- end
78
- end
79
- end
80
- end
81
- end
@@ -1,43 +0,0 @@
1
- #
2
- # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3
- # or more contributor license agreements. Licensed under the Elastic License;
4
- # you may not use this file except in compliance with the Elastic License.
5
- #
6
-
7
- require 'utility/logger'
8
- require 'utility/exception_tracking'
9
- require 'utility/error_monitor'
10
-
11
- module Connectors
12
- class TolerableErrorHelper
13
- def initialize(error_monitor)
14
- @error_monitor = error_monitor
15
- end
16
-
17
- def yield_single_document(identifier: nil)
18
- Utility::Logger.debug("Extracting single document for #{identifier}") if identifier
19
- yield
20
- @error_monitor.note_success
21
- rescue *fatal_exception_classes => e
22
- Utility::ExceptionTracking.augment_exception(e)
23
- Utility::Logger.error("Encountered a fall-through error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
24
- raise
25
- rescue StandardError => e
26
- Utility::ExceptionTracking.augment_exception(e)
27
- Utility::Logger.warn("Encountered error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
28
- @error_monitor.note_error(e, :id => e.id)
29
- end
30
-
31
- private
32
-
33
- def identifying_error_message(identifier)
34
- identifier.present? ? " of '#{identifier}'" : ''
35
- end
36
-
37
- def fatal_exception_classes
38
- [
39
- Utility::ErrorMonitor::MonitoringError
40
- ]
41
- end
42
- end
43
- end